diff --git a/config.json b/config.json index 5acc1f58477a5f4c354ee4748c920d2ede9983fc..61c812f50a5dd286ecbd18ea85231d5c25f73568 100644 --- a/config.json +++ b/config.json @@ -13,7 +13,7 @@ "num_hidden_layers": 28, "rescale_every": 6, "tie_word_embeddings": false, - "torch_dtype": "float32", + "torch_dtype": "bfloat16", "transformers_version": "4.30.0.dev0", "use_cache": true, "vocab_size": 52000 diff --git a/model-00001-of-00015.safetensors b/model-00001-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50719fc00bfd4439910ce4e6315c4a17ecf083e4 --- /dev/null +++ b/model-00001-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f31d9b9d24c2be5e317302e6196c0ecb64f4011d82eea868135127b72ea0e2 +size 996609896 diff --git a/model-00001-of-00030.safetensors b/model-00001-of-00030.safetensors deleted file mode 100644 index c607481df9d2e529071d745e90a75e2bb94eb245..0000000000000000000000000000000000000000 --- a/model-00001-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b1564b3facea1f85525e6d529dff4dc1f1f2abcfbff410917a25aaa4f292bb6 -size 986367416 diff --git a/model-00002-of-00015.safetensors b/model-00002-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3755b998a588d2c92a6e2a744b4bb7ca1f5cf1b --- /dev/null +++ b/model-00002-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e440c42a7999400a16d88af2f1c8e96374d9fbd3b89d91c095e151a40acd1e53 +size 872599456 diff --git a/model-00002-of-00030.safetensors b/model-00002-of-00030.safetensors deleted file mode 100644 index 7949e9f4556e2034408fbeea3fd1f74288592c11..0000000000000000000000000000000000000000 --- a/model-00002-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6155ecb531752ee842ef45e36d599208722fd431890c0d5fb7ddfb2594e442ac -size 939706432 diff --git a/model-00003-of-00015.safetensors b/model-00003-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f512e9a9691cddf0490472e8bbf52faa510ad05 --- /dev/null +++ b/model-00003-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df6d25bb68e5e98a02f052714931ae443dd5ef19c13fcc0f9730bd889abd6df +size 872599456 diff --git a/model-00003-of-00030.safetensors b/model-00003-of-00030.safetensors deleted file mode 100644 index d5efb0426eaf578791a8bb636835402037513c24..0000000000000000000000000000000000000000 --- a/model-00003-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c49e16df29d1a2f527317b1e5e73f17d207ec25983f8b85904193a72f818490 -size 939739432 diff --git a/model-00004-of-00015.safetensors b/model-00004-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f33a1673604e9753eec490898d365d7b6d0b25df --- /dev/null +++ b/model-00004-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec574f691e99a889c0cd5fc2d017c6687bb412c71ae94d68f7c1bdb6efb7ebd4 +size 872599456 diff --git a/model-00004-of-00030.safetensors b/model-00004-of-00030.safetensors deleted file mode 100644 index f53d54cec964e5ed9be260637afcf8f447f92418..0000000000000000000000000000000000000000 --- a/model-00004-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0c84a18564a9010ff75b385f08a82ba640a13f6695b4c824396e2c1bb8004f9 -size 872597456 diff --git a/model-00005-of-00015.safetensors b/model-00005-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5a7c2a757d972fb23ba818faa263a5fd9640b38 --- /dev/null +++ b/model-00005-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7683f09b9fb7afe925b2ac87bc1cff0538408ffcf5c4035730cd87fbb64a7b +size 872599456 diff --git a/model-00005-of-00030.safetensors b/model-00005-of-00030.safetensors deleted file mode 100644 index ab1e77c06db8cc3bdafd0244fc36d0cd168038ee..0000000000000000000000000000000000000000 --- a/model-00005-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a06704ea148302d6cc5738321bf0dce23270583c9dc2404b8915950240e9c4b7 -size 872597456 diff --git a/model-00006-of-00015.safetensors b/model-00006-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e946fd668bd525f4b9d9b7a0f67689ec834c6d14 --- /dev/null +++ b/model-00006-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a9bd8deaf303af48ff55fb5a7eb3ea061426897ae7ca0fef92b82819941737 +size 872599488 diff --git a/model-00006-of-00030.safetensors b/model-00006-of-00030.safetensors deleted file mode 100644 index 5f1fa22c793f56bd117f1c71e031d1ed197a0087..0000000000000000000000000000000000000000 --- a/model-00006-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:14e624a8fdd7d6e8e7ebb45d491ab6eeff6e2f27e568d43c9da9e81ad6a43a69 -size 872597456 diff --git a/model-00007-of-00015.safetensors b/model-00007-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a816f1040cbffc6ab707bf2613a3ae21f4e597e0 --- /dev/null +++ b/model-00007-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146c55d89a29e099678dccf5e15f9c5e222e03972ef86caaeb2fa79da0259237 +size 872599496 diff --git a/model-00007-of-00030.safetensors b/model-00007-of-00030.safetensors deleted file mode 100644 index 09718fe128d15d897c764acdf096b9d062e14fc5..0000000000000000000000000000000000000000 --- a/model-00007-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f2a032699d64069522567bcd45219701d550eabbc75fa8962718bd3a5d5901bf -size 872597456 diff --git a/model-00008-of-00015.safetensors b/model-00008-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e446cb6568f90ba64a66bc58d7ec34768bd3d0ed --- /dev/null +++ b/model-00008-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5cc091ed177e06ad46fb2d5f6cecbc5938a9cf455e544e39f827c5d74fff6d +size 872599496 diff --git a/model-00008-of-00030.safetensors b/model-00008-of-00030.safetensors deleted file mode 100644 index 50bf1bcafe6a789841ed9e07c7728d7ee48a0e99..0000000000000000000000000000000000000000 --- a/model-00008-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:30118129d50be5f416ec34afaf4173f4b6b68ab3823fed9f7b4fb34a0f7eb514 -size 872597456 diff --git a/model-00009-of-00015.safetensors b/model-00009-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3e35f4a97aa6c3df2b68fc7c9bad48dc5258ddb --- /dev/null +++ b/model-00009-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223beaab951ec6f8f155dcca2582c860b724919d5b0a943e6f8a02ea8be37b2c +size 872599496 diff --git a/model-00009-of-00030.safetensors b/model-00009-of-00030.safetensors deleted file mode 100644 index 4d6acd25ba232f1314b842f7a8288c89864e54b2..0000000000000000000000000000000000000000 --- a/model-00009-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e7730f1946117965a8d853e66cd24acbfaae265135490dae4100135b345639a -size 872597456 diff --git a/model-00010-of-00015.safetensors b/model-00010-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cfde43ffe280396b91f008ffd57ec32fcbb5621 --- /dev/null +++ b/model-00010-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe59b2c942a110a9240d5996c80d8a2ddabfe3359ae98acb943ada62f687e776 +size 872599496 diff --git a/model-00010-of-00030.safetensors b/model-00010-of-00030.safetensors deleted file mode 100644 index 5d519580471f841ebe2afaf77eab145f3a509830..0000000000000000000000000000000000000000 --- a/model-00010-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48f9fce176af20011050225b6a9557a83086454239f89cd33e3d16c62b73d47a -size 872597456 diff --git a/model-00011-of-00015.safetensors b/model-00011-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cfa34667b08997687422d8456eda9ac1a30ae52 --- /dev/null +++ b/model-00011-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eede5189a289344a837319003f798c73e471752a9abfaa04e42d583eaaafb309 +size 872599496 diff --git a/model-00011-of-00030.safetensors b/model-00011-of-00030.safetensors deleted file mode 100644 index a3bb6b43cdcbeb6e2c7917d1d1f9b58669adb69c..0000000000000000000000000000000000000000 --- a/model-00011-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eaf356d5f273c35ee7c1195223b6cb096437b32354da760ae04723765d549fcc -size 872597472 diff --git a/model-00012-of-00015.safetensors b/model-00012-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69d42625315f4795784b08f3361d273a4fa50e92 --- /dev/null +++ b/model-00012-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43716490d178cebf11e022fc3fd9c585c50b6b2631a730774de4ac08e18f7dc7 +size 872599496 diff --git a/model-00012-of-00030.safetensors b/model-00012-of-00030.safetensors deleted file mode 100644 index 65e57665863a3471f08e1b77b06c87d04616832d..0000000000000000000000000000000000000000 --- a/model-00012-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4d700c362e3b7390f521f4fc4fda44c8bc922b31bde249eb61b3b67ee6b73d50 -size 872597472 diff --git a/model-00013-of-00015.safetensors b/model-00013-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10c8436cc646a27bd94d8d0f67accd7bc57ee5cb --- /dev/null +++ b/model-00013-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ecede78c405fc5720f72ea4f3afa6a2d310611f193778c518f6a0bde2cc4da +size 872599496 diff --git a/model-00013-of-00030.safetensors b/model-00013-of-00030.safetensors deleted file mode 100644 index 014d81e92add72de96fae98b04aa278799644a7e..0000000000000000000000000000000000000000 --- a/model-00013-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05eba864dfddd8726699f4bd8a9690f567f5085fa40975b6a6a2d399f76d1353 -size 872597472 diff --git a/model-00014-of-00015.safetensors b/model-00014-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12addedac4f3701ddbf75fc15bbb8204d7e1ccfd --- /dev/null +++ b/model-00014-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ca00fc3659b05010b7ff59c778e8555d2b840f1f75bc66c93c20e736caa0b85 +size 872599496 diff --git a/model-00014-of-00030.safetensors b/model-00014-of-00030.safetensors deleted file mode 100644 index 0f1e1dfe3cbb26318688f13759b210f7267f060f..0000000000000000000000000000000000000000 --- a/model-00014-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79fe77c0bab7ea6e126cf5a54aaeb1d57b06d09dcdb73587328053774caedacb -size 872597472 diff --git a/model-00015-of-00015.safetensors b/model-00015-of-00015.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6012ac32dbc635c0f4f3f9163ca566d92224e24f --- /dev/null +++ b/model-00015-of-00015.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432c75b3c4faafae4a001864d0cd7b6c2d94a5504b0a0982f438502505b50800 +size 727990928 diff --git a/model-00015-of-00030.safetensors b/model-00015-of-00030.safetensors deleted file mode 100644 index cfc231f0ce1f4ea2e34dc392fbb57d4ae4ed31f9..0000000000000000000000000000000000000000 --- a/model-00015-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e557259c097317c3df8e2d170ca863a4cebe4608d1e9bf5d77e39ae05550ed17 -size 872597472 diff --git a/model-00016-of-00030.safetensors b/model-00016-of-00030.safetensors deleted file mode 100644 index e1950b5df39009e97fd9ef3a96af197911cf007c..0000000000000000000000000000000000000000 --- a/model-00016-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe8243e8c0e9cd54c34c8752b5cdfb61c73a831e57d34ee721c0468d8e54e50f -size 872597472 diff --git a/model-00017-of-00030.safetensors b/model-00017-of-00030.safetensors deleted file mode 100644 index b8da59f85652e13827e29fdf4a9498b0346d200a..0000000000000000000000000000000000000000 --- a/model-00017-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd1959f2b85ba27ba4a66951ccebcc90926613b9564c29d9311ccb2fae8713ba -size 872597472 diff --git a/model-00018-of-00030.safetensors b/model-00018-of-00030.safetensors deleted file mode 100644 index 120f2e93cb73b63de5b4a8bf01329bd95d7805fe..0000000000000000000000000000000000000000 --- a/model-00018-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11475e1d7be89ba9394cb31bd10e898cf49b5d190f8beb35ecdb21d5f00288dd -size 872597472 diff --git a/model-00019-of-00030.safetensors b/model-00019-of-00030.safetensors deleted file mode 100644 index c58c34c6ee6d5aacb3a03c9a8459d07ef80a2bc2..0000000000000000000000000000000000000000 --- a/model-00019-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:13d656f43ce3d628f50c0ac23b722a9e0904f0ff997f1c01e2cc97a8fd6d8896 -size 872597472 diff --git a/model-00020-of-00030.safetensors b/model-00020-of-00030.safetensors deleted file mode 100644 index 2a073d492845b3837ad67ef5bb1c3dc8f6c6c2f1..0000000000000000000000000000000000000000 --- a/model-00020-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d6d3f1a4630f5a43601c7e8df51ebf7c3936a8e705308b40c7f9180478d094a -size 872597472 diff --git a/model-00021-of-00030.safetensors b/model-00021-of-00030.safetensors deleted file mode 100644 index 152e814d08e9f1cec55d88cc412f46b5fc4260c4..0000000000000000000000000000000000000000 --- a/model-00021-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:201e35623e89bdc06a7b08190d05b1deb2bba3c707cee88d03659936bcd264e4 -size 872597472 diff --git a/model-00022-of-00030.safetensors b/model-00022-of-00030.safetensors deleted file mode 100644 index a8a12a9ea3135e6c1ab2ad4c5a4b0559b543cb68..0000000000000000000000000000000000000000 --- a/model-00022-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46896ff2aad8a8b3113b72044ad159137e68c5bbbb1ce1f7de597499279dd96a -size 872597472 diff --git a/model-00023-of-00030.safetensors b/model-00023-of-00030.safetensors deleted file mode 100644 index edd5a1a113497cbbc8b50bfc72c176064da09438..0000000000000000000000000000000000000000 --- a/model-00023-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:720df898df45852aaa64f3d8fbb1d1566b925d7cc938234d4af5e98cb24f8325 -size 872597472 diff --git a/model-00024-of-00030.safetensors b/model-00024-of-00030.safetensors deleted file mode 100644 index d08c4c2288f8a8deea772996b519002f6a2fae9d..0000000000000000000000000000000000000000 --- a/model-00024-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abfce58890845f482f09e76de44fe740d870df82c727fd9d80c3afa873c41df3 -size 872597472 diff --git a/model-00025-of-00030.safetensors b/model-00025-of-00030.safetensors deleted file mode 100644 index 1f4c7c36db384b534a5eacd9420cb609e1a395b2..0000000000000000000000000000000000000000 --- a/model-00025-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0ca2c54bcdfe4ff6bce52f80e457e788572c72af6d95a0f5905b7130b980e226 -size 872597472 diff --git a/model-00026-of-00030.safetensors b/model-00026-of-00030.safetensors deleted file mode 100644 index 109dcc453e21d9511d2e92d2edebe1bd3d009605..0000000000000000000000000000000000000000 --- a/model-00026-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:315cac0ffa49b8d741b516d187a64864fa8ad9ace54011c158e4518cb41635e4 -size 872597472 diff --git a/model-00027-of-00030.safetensors b/model-00027-of-00030.safetensors deleted file mode 100644 index ebbb6b5e6e1389c8880d42f68e1cea06be452909..0000000000000000000000000000000000000000 --- a/model-00027-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b811d7df854a0d07a02680757159760d19bd14b369998afdb154fef53c3a399a -size 872597472 diff --git a/model-00028-of-00030.safetensors b/model-00028-of-00030.safetensors deleted file mode 100644 index 0c4228d62314568583e2a51e20c8a34af2c729c8..0000000000000000000000000000000000000000 --- a/model-00028-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a4db3e11ce563562639e71362a0186e286c10e1230789e69ea9f5489c6b3011 -size 872597472 diff --git a/model-00029-of-00030.safetensors b/model-00029-of-00030.safetensors deleted file mode 100644 index fa545de67fa97fd41b5bdda52c14a51152dfbeee..0000000000000000000000000000000000000000 --- a/model-00029-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4db61c915178f05d0ca21b15ee0143accf89d998c208a44c73d3b5f43707c77 -size 604013112 diff --git a/model-00030-of-00030.safetensors b/model-00030-of-00030.safetensors deleted file mode 100644 index 605b4aa181ae07fe8554519e9cb39b4329534407..0000000000000000000000000000000000000000 --- a/model-00030-of-00030.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f03ebe151ec769f9fec912368cf772b6b595084f0fa44acf3bf5563e136bb9d -size 851968120 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 921abf23914cb3deec86aa79ac97f9f9236886c2..2940e0157a048495badecb3e374082218f43864e 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,517 +1,517 @@ { "metadata": { - "total_size": 26136674304 + "total_size": 13068337152 }, "weight_map": { - "head.weight": "model-00030-of-00030.safetensors", - "rwkv.blocks.0.attention.key.weight": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.attention.output.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.attention.receptance.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.attention.time_decay": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.attention.time_first": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.attention.time_mix_key": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.attention.time_mix_receptance": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.attention.time_mix_value": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.attention.value.weight": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.feed_forward.key.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.feed_forward.receptance.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.feed_forward.time_mix_key": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.feed_forward.time_mix_receptance": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.feed_forward.value.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.0.ln1.bias": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.ln1.weight": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.ln2.bias": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.ln2.weight": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.pre_ln.bias": "model-00001-of-00030.safetensors", - "rwkv.blocks.0.pre_ln.weight": "model-00001-of-00030.safetensors", - "rwkv.blocks.1.attention.key.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.output.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.1.attention.receptance.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.time_decay": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.time_first": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.time_mix_key": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.time_mix_receptance": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.time_mix_value": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.attention.value.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.feed_forward.key.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.1.feed_forward.receptance.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.1.feed_forward.time_mix_key": "model-00003-of-00030.safetensors", - "rwkv.blocks.1.feed_forward.time_mix_receptance": "model-00003-of-00030.safetensors", - "rwkv.blocks.1.feed_forward.value.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.1.ln1.bias": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.ln1.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.ln2.bias": "model-00002-of-00030.safetensors", - "rwkv.blocks.1.ln2.weight": "model-00002-of-00030.safetensors", - "rwkv.blocks.10.attention.key.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.output.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.receptance.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.time_decay": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.time_first": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.time_mix_key": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.time_mix_receptance": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.time_mix_value": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.attention.value.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.feed_forward.key.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.10.feed_forward.receptance.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.10.feed_forward.time_mix_key": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.feed_forward.time_mix_receptance": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.feed_forward.value.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.10.ln1.bias": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.ln1.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.ln2.bias": "model-00011-of-00030.safetensors", - "rwkv.blocks.10.ln2.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.11.attention.key.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.output.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.receptance.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.time_decay": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.time_first": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.time_mix_key": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.time_mix_receptance": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.time_mix_value": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.attention.value.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.feed_forward.key.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.11.feed_forward.receptance.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.11.feed_forward.time_mix_key": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.feed_forward.time_mix_receptance": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.feed_forward.value.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.11.ln1.bias": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.ln1.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.ln2.bias": "model-00012-of-00030.safetensors", - "rwkv.blocks.11.ln2.weight": "model-00012-of-00030.safetensors", - "rwkv.blocks.12.attention.key.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.output.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.receptance.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.time_decay": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.time_first": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.time_mix_key": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.time_mix_receptance": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.time_mix_value": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.attention.value.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.feed_forward.key.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.12.feed_forward.receptance.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.12.feed_forward.time_mix_key": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.feed_forward.time_mix_receptance": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.feed_forward.value.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.12.ln1.bias": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.ln1.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.ln2.bias": "model-00013-of-00030.safetensors", - "rwkv.blocks.12.ln2.weight": "model-00013-of-00030.safetensors", - "rwkv.blocks.13.attention.key.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.output.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.receptance.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.time_decay": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.time_first": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.time_mix_key": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.time_mix_receptance": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.time_mix_value": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.attention.value.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.feed_forward.key.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.13.feed_forward.receptance.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.13.feed_forward.time_mix_key": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.feed_forward.time_mix_receptance": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.feed_forward.value.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.13.ln1.bias": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.ln1.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.ln2.bias": "model-00014-of-00030.safetensors", - "rwkv.blocks.13.ln2.weight": "model-00014-of-00030.safetensors", - "rwkv.blocks.14.attention.key.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.output.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.receptance.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.time_decay": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.time_first": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.time_mix_key": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.time_mix_receptance": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.time_mix_value": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.attention.value.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.feed_forward.key.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.14.feed_forward.receptance.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.14.feed_forward.time_mix_key": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.feed_forward.time_mix_receptance": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.feed_forward.value.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.14.ln1.bias": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.ln1.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.ln2.bias": "model-00015-of-00030.safetensors", - "rwkv.blocks.14.ln2.weight": "model-00015-of-00030.safetensors", - "rwkv.blocks.15.attention.key.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.output.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.receptance.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.time_decay": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.time_first": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.time_mix_key": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.time_mix_receptance": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.time_mix_value": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.attention.value.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.feed_forward.key.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.15.feed_forward.receptance.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.15.feed_forward.time_mix_key": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.feed_forward.time_mix_receptance": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.feed_forward.value.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.15.ln1.bias": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.ln1.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.ln2.bias": "model-00016-of-00030.safetensors", - "rwkv.blocks.15.ln2.weight": "model-00016-of-00030.safetensors", - "rwkv.blocks.16.attention.key.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.output.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.receptance.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.time_decay": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.time_first": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.time_mix_key": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.time_mix_receptance": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.time_mix_value": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.attention.value.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.feed_forward.key.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.16.feed_forward.receptance.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.16.feed_forward.time_mix_key": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.feed_forward.time_mix_receptance": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.feed_forward.value.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.16.ln1.bias": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.ln1.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.ln2.bias": "model-00017-of-00030.safetensors", - "rwkv.blocks.16.ln2.weight": "model-00017-of-00030.safetensors", - "rwkv.blocks.17.attention.key.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.output.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.receptance.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.time_decay": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.time_first": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.time_mix_key": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.time_mix_receptance": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.time_mix_value": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.attention.value.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.feed_forward.key.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.17.feed_forward.receptance.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.17.feed_forward.time_mix_key": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.feed_forward.time_mix_receptance": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.feed_forward.value.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.17.ln1.bias": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.ln1.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.ln2.bias": "model-00018-of-00030.safetensors", - "rwkv.blocks.17.ln2.weight": "model-00018-of-00030.safetensors", - "rwkv.blocks.18.attention.key.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.output.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.receptance.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.time_decay": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.time_first": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.time_mix_key": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.time_mix_receptance": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.time_mix_value": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.attention.value.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.feed_forward.key.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.18.feed_forward.receptance.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.18.feed_forward.time_mix_key": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.feed_forward.time_mix_receptance": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.feed_forward.value.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.18.ln1.bias": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.ln1.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.ln2.bias": "model-00019-of-00030.safetensors", - "rwkv.blocks.18.ln2.weight": "model-00019-of-00030.safetensors", - "rwkv.blocks.19.attention.key.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.output.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.receptance.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.time_decay": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.time_first": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.time_mix_key": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.time_mix_receptance": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.time_mix_value": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.attention.value.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.feed_forward.key.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.19.feed_forward.receptance.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.19.feed_forward.time_mix_key": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.feed_forward.time_mix_receptance": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.feed_forward.value.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.19.ln1.bias": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.ln1.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.ln2.bias": "model-00020-of-00030.safetensors", - "rwkv.blocks.19.ln2.weight": "model-00020-of-00030.safetensors", - "rwkv.blocks.2.attention.key.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.output.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.receptance.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.time_decay": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.time_first": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.time_mix_key": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.time_mix_receptance": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.time_mix_value": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.attention.value.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.feed_forward.key.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.2.feed_forward.receptance.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.2.feed_forward.time_mix_key": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.feed_forward.time_mix_receptance": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.feed_forward.value.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.2.ln1.bias": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.ln1.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.ln2.bias": "model-00003-of-00030.safetensors", - "rwkv.blocks.2.ln2.weight": "model-00003-of-00030.safetensors", - "rwkv.blocks.20.attention.key.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.output.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.receptance.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.time_decay": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.time_first": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.time_mix_key": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.time_mix_receptance": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.time_mix_value": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.attention.value.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.feed_forward.key.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.20.feed_forward.receptance.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.20.feed_forward.time_mix_key": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.feed_forward.time_mix_receptance": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.feed_forward.value.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.20.ln1.bias": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.ln1.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.ln2.bias": "model-00021-of-00030.safetensors", - "rwkv.blocks.20.ln2.weight": "model-00021-of-00030.safetensors", - "rwkv.blocks.21.attention.key.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.output.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.receptance.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.time_decay": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.time_first": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.time_mix_key": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.time_mix_receptance": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.time_mix_value": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.attention.value.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.feed_forward.key.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.21.feed_forward.receptance.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.21.feed_forward.time_mix_key": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.feed_forward.time_mix_receptance": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.feed_forward.value.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.21.ln1.bias": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.ln1.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.ln2.bias": "model-00022-of-00030.safetensors", - "rwkv.blocks.21.ln2.weight": "model-00022-of-00030.safetensors", - "rwkv.blocks.22.attention.key.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.output.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.receptance.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.time_decay": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.time_first": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.time_mix_key": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.time_mix_receptance": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.time_mix_value": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.attention.value.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.feed_forward.key.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.22.feed_forward.receptance.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.22.feed_forward.time_mix_key": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.feed_forward.time_mix_receptance": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.feed_forward.value.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.22.ln1.bias": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.ln1.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.ln2.bias": "model-00023-of-00030.safetensors", - "rwkv.blocks.22.ln2.weight": "model-00023-of-00030.safetensors", - "rwkv.blocks.23.attention.key.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.output.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.receptance.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.time_decay": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.time_first": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.time_mix_key": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.time_mix_receptance": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.time_mix_value": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.attention.value.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.feed_forward.key.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.23.feed_forward.receptance.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.23.feed_forward.time_mix_key": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.feed_forward.time_mix_receptance": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.feed_forward.value.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.23.ln1.bias": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.ln1.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.ln2.bias": "model-00024-of-00030.safetensors", - "rwkv.blocks.23.ln2.weight": "model-00024-of-00030.safetensors", - "rwkv.blocks.24.attention.key.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.output.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.receptance.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.time_decay": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.time_first": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.time_mix_key": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.time_mix_receptance": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.time_mix_value": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.attention.value.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.feed_forward.key.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.24.feed_forward.receptance.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.24.feed_forward.time_mix_key": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.feed_forward.time_mix_receptance": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.feed_forward.value.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.24.ln1.bias": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.ln1.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.ln2.bias": "model-00025-of-00030.safetensors", - "rwkv.blocks.24.ln2.weight": "model-00025-of-00030.safetensors", - "rwkv.blocks.25.attention.key.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.output.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.receptance.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.time_decay": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.time_first": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.time_mix_key": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.time_mix_receptance": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.time_mix_value": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.attention.value.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.feed_forward.key.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.25.feed_forward.receptance.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.25.feed_forward.time_mix_key": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.feed_forward.time_mix_receptance": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.feed_forward.value.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.25.ln1.bias": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.ln1.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.ln2.bias": "model-00026-of-00030.safetensors", - "rwkv.blocks.25.ln2.weight": "model-00026-of-00030.safetensors", - "rwkv.blocks.26.attention.key.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.output.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.receptance.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.time_decay": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.time_first": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.time_mix_key": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.time_mix_receptance": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.time_mix_value": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.attention.value.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.feed_forward.key.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.26.feed_forward.receptance.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.26.feed_forward.time_mix_key": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.feed_forward.time_mix_receptance": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.feed_forward.value.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.26.ln1.bias": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.ln1.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.ln2.bias": "model-00027-of-00030.safetensors", - "rwkv.blocks.26.ln2.weight": "model-00027-of-00030.safetensors", - "rwkv.blocks.27.attention.key.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.output.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.receptance.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.time_decay": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.time_first": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.time_mix_key": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.time_mix_receptance": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.time_mix_value": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.attention.value.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.feed_forward.key.weight": "model-00029-of-00030.safetensors", - "rwkv.blocks.27.feed_forward.receptance.weight": "model-00029-of-00030.safetensors", - "rwkv.blocks.27.feed_forward.time_mix_key": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.feed_forward.time_mix_receptance": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.feed_forward.value.weight": "model-00029-of-00030.safetensors", - "rwkv.blocks.27.ln1.bias": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.ln1.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.ln2.bias": "model-00028-of-00030.safetensors", - "rwkv.blocks.27.ln2.weight": "model-00028-of-00030.safetensors", - "rwkv.blocks.3.attention.key.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.output.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.receptance.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.time_decay": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.time_first": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.time_mix_key": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.time_mix_receptance": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.time_mix_value": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.attention.value.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.feed_forward.key.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.3.feed_forward.receptance.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.3.feed_forward.time_mix_key": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.feed_forward.time_mix_receptance": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.feed_forward.value.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.3.ln1.bias": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.ln1.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.ln2.bias": "model-00004-of-00030.safetensors", - "rwkv.blocks.3.ln2.weight": "model-00004-of-00030.safetensors", - "rwkv.blocks.4.attention.key.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.output.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.receptance.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.time_decay": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.time_first": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.time_mix_key": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.time_mix_receptance": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.time_mix_value": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.attention.value.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.feed_forward.key.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.4.feed_forward.receptance.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.4.feed_forward.time_mix_key": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.feed_forward.time_mix_receptance": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.feed_forward.value.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.4.ln1.bias": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.ln1.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.ln2.bias": "model-00005-of-00030.safetensors", - "rwkv.blocks.4.ln2.weight": "model-00005-of-00030.safetensors", - "rwkv.blocks.5.attention.key.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.output.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.receptance.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.time_decay": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.time_first": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.time_mix_key": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.time_mix_receptance": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.time_mix_value": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.attention.value.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.feed_forward.key.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.5.feed_forward.receptance.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.5.feed_forward.time_mix_key": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.feed_forward.time_mix_receptance": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.feed_forward.value.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.5.ln1.bias": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.ln1.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.ln2.bias": "model-00006-of-00030.safetensors", - "rwkv.blocks.5.ln2.weight": "model-00006-of-00030.safetensors", - "rwkv.blocks.6.attention.key.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.output.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.receptance.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.time_decay": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.time_first": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.time_mix_key": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.time_mix_receptance": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.time_mix_value": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.attention.value.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.feed_forward.key.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.6.feed_forward.receptance.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.6.feed_forward.time_mix_key": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.feed_forward.time_mix_receptance": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.feed_forward.value.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.6.ln1.bias": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.ln1.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.ln2.bias": "model-00007-of-00030.safetensors", - "rwkv.blocks.6.ln2.weight": "model-00007-of-00030.safetensors", - "rwkv.blocks.7.attention.key.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.output.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.receptance.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.time_decay": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.time_first": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.time_mix_key": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.time_mix_receptance": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.time_mix_value": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.attention.value.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.feed_forward.key.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.7.feed_forward.receptance.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.7.feed_forward.time_mix_key": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.feed_forward.time_mix_receptance": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.feed_forward.value.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.7.ln1.bias": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.ln1.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.ln2.bias": "model-00008-of-00030.safetensors", - "rwkv.blocks.7.ln2.weight": "model-00008-of-00030.safetensors", - "rwkv.blocks.8.attention.key.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.output.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.receptance.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.time_decay": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.time_first": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.time_mix_key": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.time_mix_receptance": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.time_mix_value": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.attention.value.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.feed_forward.key.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.8.feed_forward.receptance.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.8.feed_forward.time_mix_key": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.feed_forward.time_mix_receptance": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.feed_forward.value.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.8.ln1.bias": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.ln1.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.ln2.bias": "model-00009-of-00030.safetensors", - "rwkv.blocks.8.ln2.weight": "model-00009-of-00030.safetensors", - "rwkv.blocks.9.attention.key.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.output.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.receptance.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.time_decay": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.time_first": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.time_mix_key": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.time_mix_receptance": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.time_mix_value": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.attention.value.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.feed_forward.key.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.9.feed_forward.receptance.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.9.feed_forward.time_mix_key": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.feed_forward.time_mix_receptance": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.feed_forward.value.weight": "model-00011-of-00030.safetensors", - "rwkv.blocks.9.ln1.bias": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.ln1.weight": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.ln2.bias": "model-00010-of-00030.safetensors", - "rwkv.blocks.9.ln2.weight": "model-00010-of-00030.safetensors", - "rwkv.embeddings.weight": "model-00001-of-00030.safetensors", - "rwkv.ln_out.bias": "model-00029-of-00030.safetensors", - "rwkv.ln_out.weight": "model-00029-of-00030.safetensors" + "head.weight": "model-00015-of-00015.safetensors", + "rwkv.blocks.0.attention.key.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.output.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.receptance.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.time_decay": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.time_first": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.time_mix_key": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.time_mix_receptance": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.time_mix_value": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.attention.value.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.feed_forward.key.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.feed_forward.receptance.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.feed_forward.time_mix_key": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.feed_forward.time_mix_receptance": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.feed_forward.value.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.ln1.bias": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.ln1.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.ln2.bias": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.ln2.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.pre_ln.bias": "model-00001-of-00015.safetensors", + "rwkv.blocks.0.pre_ln.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.key.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.output.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.receptance.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.time_decay": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.time_first": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.time_mix_key": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.time_mix_receptance": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.time_mix_value": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.attention.value.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.feed_forward.key.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.1.feed_forward.receptance.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.1.feed_forward.time_mix_key": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.feed_forward.time_mix_receptance": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.feed_forward.value.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.1.ln1.bias": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.ln1.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.ln2.bias": "model-00001-of-00015.safetensors", + "rwkv.blocks.1.ln2.weight": "model-00001-of-00015.safetensors", + "rwkv.blocks.10.attention.key.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.output.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.receptance.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.time_decay": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.time_first": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.time_mix_key": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.time_mix_receptance": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.time_mix_value": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.attention.value.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.feed_forward.key.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.feed_forward.receptance.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.feed_forward.time_mix_key": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.feed_forward.time_mix_receptance": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.feed_forward.value.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.ln1.bias": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.ln1.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.ln2.bias": "model-00006-of-00015.safetensors", + "rwkv.blocks.10.ln2.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.key.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.output.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.receptance.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.time_decay": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.time_first": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.time_mix_key": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.time_mix_receptance": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.time_mix_value": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.attention.value.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.feed_forward.key.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.11.feed_forward.receptance.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.11.feed_forward.time_mix_key": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.feed_forward.time_mix_receptance": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.feed_forward.value.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.11.ln1.bias": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.ln1.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.ln2.bias": "model-00006-of-00015.safetensors", + "rwkv.blocks.11.ln2.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.12.attention.key.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.output.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.receptance.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.time_decay": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.time_first": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.time_mix_key": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.time_mix_receptance": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.time_mix_value": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.attention.value.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.feed_forward.key.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.feed_forward.receptance.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.feed_forward.time_mix_key": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.feed_forward.time_mix_receptance": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.feed_forward.value.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.ln1.bias": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.ln1.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.ln2.bias": "model-00007-of-00015.safetensors", + "rwkv.blocks.12.ln2.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.key.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.output.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.receptance.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.time_decay": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.time_first": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.time_mix_key": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.time_mix_receptance": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.time_mix_value": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.attention.value.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.feed_forward.key.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.13.feed_forward.receptance.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.13.feed_forward.time_mix_key": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.feed_forward.time_mix_receptance": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.feed_forward.value.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.13.ln1.bias": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.ln1.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.ln2.bias": "model-00007-of-00015.safetensors", + "rwkv.blocks.13.ln2.weight": "model-00007-of-00015.safetensors", + "rwkv.blocks.14.attention.key.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.output.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.receptance.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.time_decay": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.time_first": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.time_mix_key": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.time_mix_receptance": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.time_mix_value": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.attention.value.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.feed_forward.key.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.feed_forward.receptance.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.feed_forward.time_mix_key": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.feed_forward.time_mix_receptance": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.feed_forward.value.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.ln1.bias": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.ln1.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.ln2.bias": "model-00008-of-00015.safetensors", + "rwkv.blocks.14.ln2.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.key.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.output.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.receptance.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.time_decay": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.time_first": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.time_mix_key": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.time_mix_receptance": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.time_mix_value": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.attention.value.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.feed_forward.key.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.15.feed_forward.receptance.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.15.feed_forward.time_mix_key": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.feed_forward.time_mix_receptance": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.feed_forward.value.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.15.ln1.bias": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.ln1.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.ln2.bias": "model-00008-of-00015.safetensors", + "rwkv.blocks.15.ln2.weight": "model-00008-of-00015.safetensors", + "rwkv.blocks.16.attention.key.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.output.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.receptance.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.time_decay": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.time_first": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.time_mix_key": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.time_mix_receptance": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.time_mix_value": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.attention.value.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.feed_forward.key.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.feed_forward.receptance.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.feed_forward.time_mix_key": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.feed_forward.time_mix_receptance": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.feed_forward.value.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.ln1.bias": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.ln1.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.ln2.bias": "model-00009-of-00015.safetensors", + "rwkv.blocks.16.ln2.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.key.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.output.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.receptance.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.time_decay": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.time_first": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.time_mix_key": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.time_mix_receptance": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.time_mix_value": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.attention.value.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.feed_forward.key.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.17.feed_forward.receptance.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.17.feed_forward.time_mix_key": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.feed_forward.time_mix_receptance": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.feed_forward.value.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.17.ln1.bias": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.ln1.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.ln2.bias": "model-00009-of-00015.safetensors", + "rwkv.blocks.17.ln2.weight": "model-00009-of-00015.safetensors", + "rwkv.blocks.18.attention.key.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.output.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.receptance.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.time_decay": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.time_first": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.time_mix_key": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.time_mix_receptance": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.time_mix_value": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.attention.value.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.feed_forward.key.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.feed_forward.receptance.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.feed_forward.time_mix_key": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.feed_forward.time_mix_receptance": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.feed_forward.value.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.ln1.bias": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.ln1.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.ln2.bias": "model-00010-of-00015.safetensors", + "rwkv.blocks.18.ln2.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.key.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.output.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.receptance.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.time_decay": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.time_first": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.time_mix_key": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.time_mix_receptance": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.time_mix_value": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.attention.value.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.feed_forward.key.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.19.feed_forward.receptance.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.19.feed_forward.time_mix_key": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.feed_forward.time_mix_receptance": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.feed_forward.value.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.19.ln1.bias": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.ln1.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.ln2.bias": "model-00010-of-00015.safetensors", + "rwkv.blocks.19.ln2.weight": "model-00010-of-00015.safetensors", + "rwkv.blocks.2.attention.key.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.output.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.receptance.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.time_decay": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.time_first": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.time_mix_key": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.time_mix_receptance": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.time_mix_value": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.attention.value.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.feed_forward.key.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.feed_forward.receptance.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.feed_forward.time_mix_key": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.feed_forward.time_mix_receptance": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.feed_forward.value.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.ln1.bias": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.ln1.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.ln2.bias": "model-00002-of-00015.safetensors", + "rwkv.blocks.2.ln2.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.20.attention.key.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.output.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.receptance.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.time_decay": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.time_first": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.time_mix_key": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.time_mix_receptance": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.time_mix_value": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.attention.value.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.feed_forward.key.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.feed_forward.receptance.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.feed_forward.time_mix_key": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.feed_forward.time_mix_receptance": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.feed_forward.value.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.ln1.bias": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.ln1.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.ln2.bias": "model-00011-of-00015.safetensors", + "rwkv.blocks.20.ln2.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.key.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.output.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.receptance.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.time_decay": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.time_first": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.time_mix_key": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.time_mix_receptance": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.time_mix_value": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.attention.value.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.feed_forward.key.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.21.feed_forward.receptance.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.21.feed_forward.time_mix_key": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.feed_forward.time_mix_receptance": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.feed_forward.value.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.21.ln1.bias": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.ln1.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.ln2.bias": "model-00011-of-00015.safetensors", + "rwkv.blocks.21.ln2.weight": "model-00011-of-00015.safetensors", + "rwkv.blocks.22.attention.key.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.output.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.receptance.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.time_decay": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.time_first": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.time_mix_key": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.time_mix_receptance": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.time_mix_value": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.attention.value.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.feed_forward.key.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.feed_forward.receptance.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.feed_forward.time_mix_key": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.feed_forward.time_mix_receptance": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.feed_forward.value.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.ln1.bias": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.ln1.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.ln2.bias": "model-00012-of-00015.safetensors", + "rwkv.blocks.22.ln2.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.key.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.output.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.receptance.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.time_decay": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.time_first": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.time_mix_key": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.time_mix_receptance": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.time_mix_value": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.attention.value.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.feed_forward.key.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.23.feed_forward.receptance.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.23.feed_forward.time_mix_key": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.feed_forward.time_mix_receptance": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.feed_forward.value.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.23.ln1.bias": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.ln1.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.ln2.bias": "model-00012-of-00015.safetensors", + "rwkv.blocks.23.ln2.weight": "model-00012-of-00015.safetensors", + "rwkv.blocks.24.attention.key.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.output.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.receptance.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.time_decay": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.time_first": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.time_mix_key": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.time_mix_receptance": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.time_mix_value": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.attention.value.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.feed_forward.key.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.feed_forward.receptance.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.feed_forward.time_mix_key": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.feed_forward.time_mix_receptance": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.feed_forward.value.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.ln1.bias": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.ln1.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.ln2.bias": "model-00013-of-00015.safetensors", + "rwkv.blocks.24.ln2.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.key.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.output.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.receptance.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.time_decay": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.time_first": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.time_mix_key": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.time_mix_receptance": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.time_mix_value": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.attention.value.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.feed_forward.key.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.25.feed_forward.receptance.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.25.feed_forward.time_mix_key": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.feed_forward.time_mix_receptance": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.feed_forward.value.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.25.ln1.bias": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.ln1.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.ln2.bias": "model-00013-of-00015.safetensors", + "rwkv.blocks.25.ln2.weight": "model-00013-of-00015.safetensors", + "rwkv.blocks.26.attention.key.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.output.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.receptance.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.time_decay": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.time_first": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.time_mix_key": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.time_mix_receptance": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.time_mix_value": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.attention.value.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.feed_forward.key.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.feed_forward.receptance.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.feed_forward.time_mix_key": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.feed_forward.time_mix_receptance": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.feed_forward.value.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.ln1.bias": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.ln1.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.ln2.bias": "model-00014-of-00015.safetensors", + "rwkv.blocks.26.ln2.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.key.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.output.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.receptance.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.time_decay": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.time_first": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.time_mix_key": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.time_mix_receptance": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.time_mix_value": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.attention.value.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.feed_forward.key.weight": "model-00015-of-00015.safetensors", + "rwkv.blocks.27.feed_forward.receptance.weight": "model-00015-of-00015.safetensors", + "rwkv.blocks.27.feed_forward.time_mix_key": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.feed_forward.time_mix_receptance": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.feed_forward.value.weight": "model-00015-of-00015.safetensors", + "rwkv.blocks.27.ln1.bias": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.ln1.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.ln2.bias": "model-00014-of-00015.safetensors", + "rwkv.blocks.27.ln2.weight": "model-00014-of-00015.safetensors", + "rwkv.blocks.3.attention.key.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.output.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.receptance.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.time_decay": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.time_first": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.time_mix_key": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.time_mix_receptance": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.time_mix_value": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.attention.value.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.feed_forward.key.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.3.feed_forward.receptance.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.3.feed_forward.time_mix_key": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.feed_forward.time_mix_receptance": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.feed_forward.value.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.3.ln1.bias": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.ln1.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.ln2.bias": "model-00002-of-00015.safetensors", + "rwkv.blocks.3.ln2.weight": "model-00002-of-00015.safetensors", + "rwkv.blocks.4.attention.key.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.output.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.receptance.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.time_decay": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.time_first": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.time_mix_key": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.time_mix_receptance": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.time_mix_value": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.attention.value.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.feed_forward.key.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.feed_forward.receptance.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.feed_forward.time_mix_key": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.feed_forward.time_mix_receptance": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.feed_forward.value.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.ln1.bias": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.ln1.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.ln2.bias": "model-00003-of-00015.safetensors", + "rwkv.blocks.4.ln2.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.key.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.output.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.receptance.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.time_decay": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.time_first": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.time_mix_key": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.time_mix_receptance": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.time_mix_value": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.attention.value.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.feed_forward.key.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.5.feed_forward.receptance.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.5.feed_forward.time_mix_key": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.feed_forward.time_mix_receptance": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.feed_forward.value.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.5.ln1.bias": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.ln1.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.ln2.bias": "model-00003-of-00015.safetensors", + "rwkv.blocks.5.ln2.weight": "model-00003-of-00015.safetensors", + "rwkv.blocks.6.attention.key.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.output.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.receptance.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.time_decay": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.time_first": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.time_mix_key": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.time_mix_receptance": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.time_mix_value": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.attention.value.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.feed_forward.key.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.feed_forward.receptance.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.feed_forward.time_mix_key": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.feed_forward.time_mix_receptance": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.feed_forward.value.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.ln1.bias": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.ln1.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.ln2.bias": "model-00004-of-00015.safetensors", + "rwkv.blocks.6.ln2.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.key.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.output.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.receptance.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.time_decay": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.time_first": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.time_mix_key": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.time_mix_receptance": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.time_mix_value": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.attention.value.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.feed_forward.key.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.7.feed_forward.receptance.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.7.feed_forward.time_mix_key": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.feed_forward.time_mix_receptance": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.feed_forward.value.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.7.ln1.bias": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.ln1.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.ln2.bias": "model-00004-of-00015.safetensors", + "rwkv.blocks.7.ln2.weight": "model-00004-of-00015.safetensors", + "rwkv.blocks.8.attention.key.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.output.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.receptance.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.time_decay": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.time_first": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.time_mix_key": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.time_mix_receptance": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.time_mix_value": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.attention.value.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.feed_forward.key.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.feed_forward.receptance.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.feed_forward.time_mix_key": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.feed_forward.time_mix_receptance": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.feed_forward.value.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.ln1.bias": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.ln1.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.ln2.bias": "model-00005-of-00015.safetensors", + "rwkv.blocks.8.ln2.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.key.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.output.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.receptance.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.time_decay": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.time_first": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.time_mix_key": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.time_mix_receptance": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.time_mix_value": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.attention.value.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.feed_forward.key.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.9.feed_forward.receptance.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.9.feed_forward.time_mix_key": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.feed_forward.time_mix_receptance": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.feed_forward.value.weight": "model-00006-of-00015.safetensors", + "rwkv.blocks.9.ln1.bias": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.ln1.weight": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.ln2.bias": "model-00005-of-00015.safetensors", + "rwkv.blocks.9.ln2.weight": "model-00005-of-00015.safetensors", + "rwkv.embeddings.weight": "model-00001-of-00015.safetensors", + "rwkv.ln_out.bias": "model-00015-of-00015.safetensors", + "rwkv.ln_out.weight": "model-00015-of-00015.safetensors" } } diff --git a/pytorch_model-00001-of-00015.bin b/pytorch_model-00001-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..de71db41099af515d7203e16a0fe794ab04898c1 --- /dev/null +++ b/pytorch_model-00001-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1dde9f1b7de8e05d30655366e0cd56e273ed15a6021558c6f180d473d4f843e +size 996617751 diff --git a/pytorch_model-00002-of-00015.bin b/pytorch_model-00002-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..af06c2641796cbfd9d9ef872f068279b99d028b7 --- /dev/null +++ b/pytorch_model-00002-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0832da23d20cb04f20b339c55e6c8dd692f66e53315743e39dee06cc4c66e8 +size 872607319 diff --git a/pytorch_model-00003-of-00015.bin b/pytorch_model-00003-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..531a09e8f44f81e39e9be67f4345ae3a48e90c97 --- /dev/null +++ b/pytorch_model-00003-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4e223ebf87c21e58e21f6cdab00592771e1c8d73c3a16b6d2e36c79724f2c9 +size 872607319 diff --git a/pytorch_model-00004-of-00015.bin b/pytorch_model-00004-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6837b846ad71bbdfa32331d88e01c70606973fe --- /dev/null +++ b/pytorch_model-00004-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e5d5b83ec9a662a2e1b53ac8aaf2df5617150740eacd8abd64dbcaef91c1e0 +size 872607319 diff --git a/pytorch_model-00005-of-00015.bin b/pytorch_model-00005-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..777c3bbce9f20b064fce1ca184dcfba5388b0f18 --- /dev/null +++ b/pytorch_model-00005-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ac092e51936c5ee3dd836368b5bf13024948ba75684ce3355d00032477aa53 +size 872607319 diff --git a/pytorch_model-00006-of-00015.bin b/pytorch_model-00006-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..7973cfb9555b3cef146479b88431a247121d01ce --- /dev/null +++ b/pytorch_model-00006-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19b4c1078b2692aca4dda5c2e7d90b890f456a5234ed9dc015408e1f79fede3 +size 872607383 diff --git a/pytorch_model-00007-of-00015.bin b/pytorch_model-00007-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..541a2e0eb30f9c0e30558a0209bd954eda44ca1d --- /dev/null +++ b/pytorch_model-00007-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bfe750348156eb394979c82c8e4ff5d09756271d34e10edac48ae5430f3f15 +size 872607383 diff --git a/pytorch_model-00008-of-00015.bin b/pytorch_model-00008-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..543703f30398cba0fdc1fada5cffae42496b4dac --- /dev/null +++ b/pytorch_model-00008-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f42de90ae8bdb36395f73a43456136a355dc3fd2384223912fd07da8fbc779d +size 872607383 diff --git a/pytorch_model-00009-of-00015.bin b/pytorch_model-00009-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..68a8c8b66b2e29c647acb2147f1e4689da970bae --- /dev/null +++ b/pytorch_model-00009-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8d55359d5337596d6b8e4c3151c2acb48db0dc1ecd873755429c95cd1f1fee +size 872607383 diff --git a/pytorch_model-00010-of-00015.bin b/pytorch_model-00010-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..8093d94d9273580833ecbeb879887c9c65e3661e --- /dev/null +++ b/pytorch_model-00010-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4395935db9d136f1c62bf0dc5aeca0053a9450768faeb38496241e3953196f +size 872607383 diff --git a/pytorch_model-00011-of-00015.bin b/pytorch_model-00011-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..c636d4ccc516c03b25838a6ff1f48d800b2fb47b --- /dev/null +++ b/pytorch_model-00011-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5cea9e10902903433421b353153938d7072028e397f8732ee710152690dd33 +size 872607383 diff --git a/pytorch_model-00012-of-00015.bin b/pytorch_model-00012-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1c3ecf2ee94304584768a64f1f04b47bc32125a --- /dev/null +++ b/pytorch_model-00012-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84db663c6073553cab108506667eaa51bb3e041f40cf1c15a274f109ed657807 +size 872607383 diff --git a/pytorch_model-00013-of-00015.bin b/pytorch_model-00013-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..30f197eb9f1d9df7cb5681e71a4741a698ee044f --- /dev/null +++ b/pytorch_model-00013-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4dad660d3af240fe52157b09773b7efb2abe4f5d9347f0a098bcb32042f8e22 +size 872607383 diff --git a/pytorch_model-00014-of-00015.bin b/pytorch_model-00014-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..5303175cbc3f079aa3bbbcc0f12e724b89177f3f --- /dev/null +++ b/pytorch_model-00014-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3877f6962d20e19b403bf438cb8fb67c1af92f7bf93c18d776f35e7abb70b2d0 +size 872607383 diff --git a/pytorch_model-00015-of-00015.bin b/pytorch_model-00015-of-00015.bin new file mode 100644 index 0000000000000000000000000000000000000000..13205b4ef17098dce1eb7b07eb57a7a7d47e3453 --- /dev/null +++ b/pytorch_model-00015-of-00015.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b89bd112719b9b7481d66b675ef5f3e3a1e4c16260f4e7a02a4f7d66599295 +size 727992703 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..2ceebba0c25106300c7b4a4cb57dfe3fee63f806 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,517 @@ +{ + "metadata": { + "total_size": 13068337152 + }, + "weight_map": { + "head.weight": "pytorch_model-00015-of-00015.bin", + "rwkv.blocks.0.attention.key.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.output.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.receptance.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.time_decay": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.time_first": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.time_mix_key": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.time_mix_receptance": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.time_mix_value": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.attention.value.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.feed_forward.key.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.feed_forward.receptance.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.feed_forward.time_mix_key": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.feed_forward.value.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.ln1.bias": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.ln1.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.ln2.bias": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.ln2.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.pre_ln.bias": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.0.pre_ln.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.key.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.output.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.receptance.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.time_decay": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.time_first": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.time_mix_key": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.time_mix_receptance": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.time_mix_value": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.attention.value.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.feed_forward.key.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.1.feed_forward.receptance.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.1.feed_forward.time_mix_key": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.feed_forward.time_mix_receptance": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.feed_forward.value.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.1.ln1.bias": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.ln1.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.ln2.bias": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.1.ln2.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.blocks.10.attention.key.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.output.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.receptance.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.time_decay": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.time_first": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.time_mix_key": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.time_mix_receptance": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.time_mix_value": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.attention.value.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.feed_forward.key.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.feed_forward.receptance.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.feed_forward.time_mix_key": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.feed_forward.time_mix_receptance": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.feed_forward.value.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.ln1.bias": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.ln1.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.ln2.bias": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.10.ln2.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.key.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.output.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.receptance.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.time_decay": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.time_first": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.time_mix_key": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.time_mix_receptance": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.time_mix_value": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.attention.value.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.feed_forward.key.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.11.feed_forward.receptance.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.11.feed_forward.time_mix_key": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.feed_forward.time_mix_receptance": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.feed_forward.value.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.11.ln1.bias": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.ln1.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.ln2.bias": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.11.ln2.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.12.attention.key.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.output.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.receptance.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.time_decay": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.time_first": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.time_mix_key": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.time_mix_receptance": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.time_mix_value": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.attention.value.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.feed_forward.key.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.feed_forward.receptance.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.feed_forward.time_mix_key": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.feed_forward.time_mix_receptance": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.feed_forward.value.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.ln1.bias": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.ln1.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.ln2.bias": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.12.ln2.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.key.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.output.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.receptance.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.time_decay": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.time_first": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.time_mix_key": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.time_mix_receptance": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.time_mix_value": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.attention.value.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.feed_forward.key.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.13.feed_forward.receptance.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.13.feed_forward.time_mix_key": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.feed_forward.time_mix_receptance": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.feed_forward.value.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.13.ln1.bias": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.ln1.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.ln2.bias": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.13.ln2.weight": "pytorch_model-00007-of-00015.bin", + "rwkv.blocks.14.attention.key.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.output.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.receptance.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.time_decay": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.time_first": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.time_mix_key": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.time_mix_receptance": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.time_mix_value": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.attention.value.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.feed_forward.key.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.feed_forward.receptance.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.feed_forward.time_mix_key": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.feed_forward.time_mix_receptance": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.feed_forward.value.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.ln1.bias": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.ln1.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.ln2.bias": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.14.ln2.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.key.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.output.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.receptance.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.time_decay": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.time_first": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.time_mix_key": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.time_mix_receptance": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.time_mix_value": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.attention.value.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.feed_forward.key.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.15.feed_forward.receptance.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.15.feed_forward.time_mix_key": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.feed_forward.time_mix_receptance": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.feed_forward.value.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.15.ln1.bias": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.ln1.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.ln2.bias": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.15.ln2.weight": "pytorch_model-00008-of-00015.bin", + "rwkv.blocks.16.attention.key.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.output.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.receptance.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.time_decay": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.time_first": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.time_mix_key": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.time_mix_receptance": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.time_mix_value": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.attention.value.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.feed_forward.key.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.feed_forward.receptance.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.feed_forward.time_mix_key": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.feed_forward.time_mix_receptance": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.feed_forward.value.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.ln1.bias": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.ln1.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.ln2.bias": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.16.ln2.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.key.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.output.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.receptance.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.time_decay": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.time_first": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.time_mix_key": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.time_mix_receptance": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.time_mix_value": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.attention.value.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.feed_forward.key.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.17.feed_forward.receptance.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.17.feed_forward.time_mix_key": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.feed_forward.time_mix_receptance": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.feed_forward.value.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.17.ln1.bias": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.ln1.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.ln2.bias": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.17.ln2.weight": "pytorch_model-00009-of-00015.bin", + "rwkv.blocks.18.attention.key.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.output.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.receptance.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.time_decay": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.time_first": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.time_mix_key": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.time_mix_receptance": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.time_mix_value": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.attention.value.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.feed_forward.key.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.feed_forward.receptance.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.feed_forward.time_mix_key": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.feed_forward.time_mix_receptance": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.feed_forward.value.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.ln1.bias": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.ln1.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.ln2.bias": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.18.ln2.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.key.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.output.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.receptance.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.time_decay": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.time_first": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.time_mix_key": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.time_mix_receptance": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.time_mix_value": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.attention.value.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.feed_forward.key.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.19.feed_forward.receptance.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.19.feed_forward.time_mix_key": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.feed_forward.time_mix_receptance": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.feed_forward.value.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.19.ln1.bias": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.ln1.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.ln2.bias": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.19.ln2.weight": "pytorch_model-00010-of-00015.bin", + "rwkv.blocks.2.attention.key.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.output.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.receptance.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.time_decay": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.time_first": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.time_mix_key": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.time_mix_receptance": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.time_mix_value": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.attention.value.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.feed_forward.key.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.feed_forward.receptance.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.feed_forward.time_mix_key": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.feed_forward.value.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.ln1.bias": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.ln1.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.ln2.bias": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.2.ln2.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.20.attention.key.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.output.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.receptance.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.time_decay": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.time_first": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.time_mix_key": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.time_mix_receptance": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.time_mix_value": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.attention.value.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.feed_forward.key.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.feed_forward.receptance.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.feed_forward.time_mix_key": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.feed_forward.time_mix_receptance": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.feed_forward.value.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.ln1.bias": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.ln1.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.ln2.bias": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.20.ln2.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.key.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.output.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.receptance.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.time_decay": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.time_first": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.time_mix_key": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.time_mix_receptance": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.time_mix_value": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.attention.value.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.feed_forward.key.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.21.feed_forward.receptance.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.21.feed_forward.time_mix_key": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.feed_forward.time_mix_receptance": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.feed_forward.value.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.21.ln1.bias": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.ln1.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.ln2.bias": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.21.ln2.weight": "pytorch_model-00011-of-00015.bin", + "rwkv.blocks.22.attention.key.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.output.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.receptance.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.time_decay": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.time_first": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.time_mix_key": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.time_mix_receptance": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.time_mix_value": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.attention.value.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.feed_forward.key.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.feed_forward.receptance.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.feed_forward.time_mix_key": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.feed_forward.time_mix_receptance": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.feed_forward.value.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.ln1.bias": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.ln1.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.ln2.bias": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.22.ln2.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.key.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.output.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.receptance.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.time_decay": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.time_first": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.time_mix_key": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.time_mix_receptance": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.time_mix_value": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.attention.value.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.feed_forward.key.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.23.feed_forward.receptance.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.23.feed_forward.time_mix_key": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.feed_forward.time_mix_receptance": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.feed_forward.value.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.23.ln1.bias": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.ln1.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.ln2.bias": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.23.ln2.weight": "pytorch_model-00012-of-00015.bin", + "rwkv.blocks.24.attention.key.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.output.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.receptance.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.time_decay": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.time_first": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.time_mix_key": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.time_mix_receptance": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.time_mix_value": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.attention.value.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.feed_forward.key.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.feed_forward.receptance.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.feed_forward.time_mix_key": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.feed_forward.time_mix_receptance": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.feed_forward.value.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.ln1.bias": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.ln1.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.ln2.bias": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.24.ln2.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.key.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.output.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.receptance.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.time_decay": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.time_first": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.time_mix_key": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.time_mix_receptance": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.time_mix_value": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.attention.value.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.feed_forward.key.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.25.feed_forward.receptance.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.25.feed_forward.time_mix_key": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.feed_forward.time_mix_receptance": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.feed_forward.value.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.25.ln1.bias": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.ln1.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.ln2.bias": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.25.ln2.weight": "pytorch_model-00013-of-00015.bin", + "rwkv.blocks.26.attention.key.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.output.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.receptance.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.time_decay": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.time_first": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.time_mix_key": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.time_mix_receptance": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.time_mix_value": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.attention.value.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.feed_forward.key.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.feed_forward.receptance.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.feed_forward.time_mix_key": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.feed_forward.time_mix_receptance": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.feed_forward.value.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.ln1.bias": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.ln1.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.ln2.bias": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.26.ln2.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.key.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.output.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.receptance.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.time_decay": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.time_first": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.time_mix_key": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.time_mix_receptance": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.time_mix_value": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.attention.value.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.feed_forward.key.weight": "pytorch_model-00015-of-00015.bin", + "rwkv.blocks.27.feed_forward.receptance.weight": "pytorch_model-00015-of-00015.bin", + "rwkv.blocks.27.feed_forward.time_mix_key": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.feed_forward.time_mix_receptance": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.feed_forward.value.weight": "pytorch_model-00015-of-00015.bin", + "rwkv.blocks.27.ln1.bias": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.ln1.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.ln2.bias": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.27.ln2.weight": "pytorch_model-00014-of-00015.bin", + "rwkv.blocks.3.attention.key.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.output.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.receptance.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.time_decay": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.time_first": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.time_mix_key": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.time_mix_receptance": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.time_mix_value": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.attention.value.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.feed_forward.key.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.3.feed_forward.receptance.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.3.feed_forward.time_mix_key": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.feed_forward.time_mix_receptance": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.feed_forward.value.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.3.ln1.bias": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.ln1.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.ln2.bias": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.3.ln2.weight": "pytorch_model-00002-of-00015.bin", + "rwkv.blocks.4.attention.key.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.output.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.receptance.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.time_decay": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.time_first": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.time_mix_key": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.time_mix_receptance": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.time_mix_value": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.attention.value.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.feed_forward.key.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.feed_forward.receptance.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.feed_forward.time_mix_key": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.feed_forward.value.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.ln1.bias": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.ln1.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.ln2.bias": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.4.ln2.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.key.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.output.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.receptance.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.time_decay": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.time_first": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.time_mix_key": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.time_mix_receptance": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.time_mix_value": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.attention.value.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.feed_forward.key.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.5.feed_forward.receptance.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.5.feed_forward.time_mix_key": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.feed_forward.time_mix_receptance": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.feed_forward.value.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.5.ln1.bias": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.ln1.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.ln2.bias": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.5.ln2.weight": "pytorch_model-00003-of-00015.bin", + "rwkv.blocks.6.attention.key.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.output.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.receptance.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.time_decay": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.time_first": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.time_mix_key": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.time_mix_receptance": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.time_mix_value": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.attention.value.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.feed_forward.key.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.feed_forward.receptance.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.feed_forward.time_mix_key": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.feed_forward.time_mix_receptance": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.feed_forward.value.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.ln1.bias": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.ln1.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.ln2.bias": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.6.ln2.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.key.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.output.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.receptance.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.time_decay": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.time_first": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.time_mix_key": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.time_mix_receptance": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.time_mix_value": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.attention.value.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.feed_forward.key.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.7.feed_forward.receptance.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.7.feed_forward.time_mix_key": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.feed_forward.time_mix_receptance": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.feed_forward.value.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.7.ln1.bias": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.ln1.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.ln2.bias": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.7.ln2.weight": "pytorch_model-00004-of-00015.bin", + "rwkv.blocks.8.attention.key.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.output.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.receptance.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.time_decay": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.time_first": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.time_mix_key": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.time_mix_receptance": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.time_mix_value": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.attention.value.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.feed_forward.key.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.feed_forward.receptance.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.feed_forward.time_mix_key": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.feed_forward.time_mix_receptance": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.feed_forward.value.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.ln1.bias": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.ln1.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.ln2.bias": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.8.ln2.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.key.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.output.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.receptance.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.time_decay": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.time_first": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.time_mix_key": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.time_mix_receptance": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.time_mix_value": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.attention.value.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.feed_forward.key.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.9.feed_forward.receptance.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.9.feed_forward.time_mix_key": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.feed_forward.time_mix_receptance": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.feed_forward.value.weight": "pytorch_model-00006-of-00015.bin", + "rwkv.blocks.9.ln1.bias": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.ln1.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.ln2.bias": "pytorch_model-00005-of-00015.bin", + "rwkv.blocks.9.ln2.weight": "pytorch_model-00005-of-00015.bin", + "rwkv.embeddings.weight": "pytorch_model-00001-of-00015.bin", + "rwkv.ln_out.bias": "pytorch_model-00015-of-00015.bin", + "rwkv.ln_out.weight": "pytorch_model-00015-of-00015.bin" + } +}