diff --git a/README.md b/README.md index 84f0be3c6784f8b93ae1a155af41778becff494e..283eb6fdfd290675ed54f40bb71f21e8cbc5350e 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ --- title: Text Generation Webui Space -emoji: ⚡ -colorFrom: gray -colorTo: yellow +emoji: 🏃 +colorFrom: yellow +colorTo: purple sdk: gradio sdk_version: 3.20.1 -app_file: app.py +app_file: text-generation-webui\server.py pinned: false license: mit --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out this repo https://github.com/oobabooga/text-generation-webui diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f9d50381de819d7bc4c5c7aefecab25825f0a7a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pytorch +torchvision +torchaudio +git \ No newline at end of file diff --git a/text-generation-webui/.gitignore b/text-generation-webui/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1b7f0fb8eea3e894694d9ca1541a896102d40376 --- /dev/null +++ b/text-generation-webui/.gitignore @@ -0,0 +1,21 @@ +cache/* +characters/* +extensions/silero_tts/outputs/* +extensions/elevenlabs_tts/outputs/* +logs/* +models/* +softprompts/* +torch-dumps/* +*pycache* +*/*pycache* +*/*/pycache* + +settings.json +img_bot* +img_me* + +!characters/Example.json +!characters/Example.png +!models/place-your-models-here.txt +!softprompts/place-your-softprompts-here.txt +!torch-dumps/place-your-pt-models-here.txt diff --git a/text-generation-webui/LICENSE b/text-generation-webui/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0ad25db4bd1d86c452db3f9602ccdbe172438f52 --- /dev/null +++ b/text-generation-webui/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/text-generation-webui/README.md b/text-generation-webui/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c2677399fb7c422dce04d19684f635014582b42 --- /dev/null +++ b/text-generation-webui/README.md @@ -0,0 +1,196 @@ +# Text generation web UI + +A gradio web UI for running Large Language Models like GPT-J 6B, OPT, GALACTICA, LLaMA, and Pygmalion. + +Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation. + +[[Try it on Google Colab]](https://colab.research.google.com/github/oobabooga/AI-Notebooks/blob/main/Colab-TextGen-GPU.ipynb) + +|![Image1](https://github.com/oobabooga/screenshots/raw/main/qa.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/cai3.png) | +|:---:|:---:| +|![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png) | ![Image4](https://github.com/oobabooga/screenshots/raw/main/galactica.png) | + +## Features + +* Switch between different models using a dropdown menu. +* Notebook mode that resembles OpenAI's playground. +* Chat mode for conversation and role playing. +* Generate nice HTML output for GPT-4chan. +* Generate Markdown output for [GALACTICA](https://github.com/paperswithcode/galai), including LaTeX support. +* Support for [Pygmalion](https://huggingface.co/models?search=pygmalionai/pygmalion) and custom characters in JSON or TavernAI Character Card formats ([FAQ](https://github.com/oobabooga/text-generation-webui/wiki/Pygmalion-chat-model-FAQ)). +* Advanced chat features (send images, get audio responses with TTS). +* Stream the text output in real time. +* Load parameter presets from text files. +* Load large models in 8-bit mode (see [here](https://github.com/oobabooga/text-generation-webui/issues/147#issuecomment-1456040134), [here](https://github.com/oobabooga/text-generation-webui/issues/20#issuecomment-1411650652) and [here](https://www.reddit.com/r/PygmalionAI/comments/1115gom/running_pygmalion_6b_with_8gb_of_vram/) if you are on Windows). +* Split large models across your GPU(s), CPU, and disk. +* CPU mode. +* [FlexGen offload](https://github.com/oobabooga/text-generation-webui/wiki/FlexGen). +* [DeepSpeed ZeRO-3 offload](https://github.com/oobabooga/text-generation-webui/wiki/DeepSpeed). +* Get responses via API, [with](https://github.com/oobabooga/text-generation-webui/blob/main/api-example-streaming.py) or [without](https://github.com/oobabooga/text-generation-webui/blob/main/api-example.py) streaming. +* [Supports the LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model). +* [Supports the RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model). +* Supports softprompts. +* [Supports extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions). +* [Works on Google Colab](https://github.com/oobabooga/text-generation-webui/wiki/Running-on-Colab). + +## Installation option 1: conda + +Open a terminal and copy and paste these commands one at a time ([install conda](https://docs.conda.io/en/latest/miniconda.html) first if you don't have it already): + +``` +conda create -n textgen +conda activate textgen +conda install torchvision torchaudio pytorch-cuda=11.7 git -c pytorch -c nvidia +git clone https://github.com/oobabooga/text-generation-webui +cd text-generation-webui +pip install -r requirements.txt +``` + +The third line assumes that you have an NVIDIA GPU. + +* If you have an AMD GPU, replace the third command with this one: + +``` +pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2 +``` + +* If you are running it in CPU mode, replace the third command with this one: + +``` +conda install pytorch torchvision torchaudio git -c pytorch +``` + +See also: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings). + +## Installation option 2: one-click installers + +[oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip) + +[oobabooga-linux.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-linux.zip) + +Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder. + +* To download a model, double click on "download-model" +* To start the web UI, double click on "start-webui" + +## Downloading models + +Models should be placed under `models/model-name`. For instance, `models/gpt-j-6B` for [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main). + +#### Hugging Face + +[Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) is the main place to download models. These are some noteworthy examples: + +* [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main) +* [GPT-Neo](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=eleutherai+%2F+gpt-neo) +* [Pythia](https://huggingface.co/models?search=eleutherai/pythia) +* [OPT](https://huggingface.co/models?search=facebook/opt) +* [GALACTICA](https://huggingface.co/models?search=facebook/galactica) +* [\*-Erebus](https://huggingface.co/models?search=erebus) (NSFW) +* [Pygmalion](https://huggingface.co/models?search=pygmalion) (NSFW) + +You can automatically download a model from HF using the script `download-model.py`: + + python download-model.py organization/model + +For instance: + + python download-model.py facebook/opt-1.3b + +If you want to download a model manually, note that all you need are the json, txt, and pytorch\*.bin (or model*.safetensors) files. The remaining files are not necessary. + +#### GPT-4chan + +[GPT-4chan](https://huggingface.co/ykilcher/gpt-4chan) has been shut down from Hugging Face, so you need to download it elsewhere. You have two options: + +* Torrent: [16-bit](https://archive.org/details/gpt4chan_model_float16) / [32-bit](https://archive.org/details/gpt4chan_model) +* Direct download: [16-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model_float16/) / [32-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model/) + +The 32-bit version is only relevant if you intend to run the model in CPU mode. Otherwise, you should use the 16-bit version. + +After downloading the model, follow these steps: + +1. Place the files under `models/gpt4chan_model_float16` or `models/gpt4chan_model`. +2. Place GPT-J 6B's config.json file in that same folder: [config.json](https://huggingface.co/EleutherAI/gpt-j-6B/raw/main/config.json). +3. Download GPT-J 6B's tokenizer files (they will be automatically detected when you attempt to load GPT-4chan): + +``` +python download-model.py EleutherAI/gpt-j-6B --text-only +``` + +## Starting the web UI + + conda activate textgen + python server.py + +Then browse to + +`http://localhost:7860/?__theme=dark` + + + +Optionally, you can use the following command-line flags: + +| Flag | Description | +|-------------|-------------| +| `-h`, `--help` | show this help message and exit | +| `--model MODEL` | Name of the model to load by default. | +| `--notebook` | Launch the web UI in notebook mode, where the output is written to the same text box as the input. | +| `--chat` | Launch the web UI in chat mode.| +| `--cai-chat` | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. | +| `--cpu` | Use the CPU to generate text.| +| `--load-in-8bit` | Load the model with 8-bit precision.| +| `--load-in-4bit` | DEPRECATED: use `--gptq-bits 4` instead. | +| `--gptq-bits GPTQ_BITS` | Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA and OPT. | +| `--gptq-model-type MODEL_TYPE` | Model type of pre-quantized model. Currently only LLaMa and OPT are supported. | +| `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. | +| `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.| +| `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. | +| `--disk-cache-dir DISK_CACHE_DIR` | Directory to save the disk cache to. Defaults to `cache/`. | +| `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` | Maxmimum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. | +| `--cpu-memory CPU_MEMORY` | Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99.| +| `--flexgen` | Enable the use of FlexGen offloading. | +| `--percent PERCENT [PERCENT ...]` | FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0). | +| `--compress-weight` | FlexGen: Whether to compress weight (default: False).| +| `--pin-weight [PIN_WEIGHT]` | FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%). | +| `--deepspeed` | Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. | +| `--nvme-offload-dir NVME_OFFLOAD_DIR` | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. | +| `--local_rank LOCAL_RANK` | DeepSpeed: Optional argument for distributed setups. | +| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". | +| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. | +| `--no-stream` | Don't stream the text output in real time. | +| `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag.| +| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. | +| `--listen` | Make the web UI reachable from your local network.| +| `--listen-port LISTEN_PORT` | The listening port that the server will use. | +| `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. | +| `--auto-launch` | Open the web UI in the default browser upon launch. | +| `--verbose` | Print the prompts to the terminal. | + +Out of memory errors? [Check this guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide). + +## Presets + +Inference settings presets can be created under `presets/` as text files. These files are detected automatically at startup. + +By default, 10 presets by NovelAI and KoboldAI are included. These were selected out of a sample of 43 presets after applying a K-Means clustering algorithm and selecting the elements closest to the average of each cluster. + +## System requirements + +Check the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/System-requirements) for some examples of VRAM and RAM usage in both GPU and CPU mode. + +## Contributing + +Pull requests, suggestions, and issue reports are welcome. + +Before reporting a bug, make sure that you have: + +1. Created a conda environment and installed the dependencies exactly as in the *Installation* section above. +2. [Searched](https://github.com/oobabooga/text-generation-webui/issues) to see if an issue already exists for the issue you encountered. + +## Credits + +- Gradio dropdown menu refresh button: https://github.com/AUTOMATIC1111/stable-diffusion-webui +- Verbose preset: Anonymous 4chan user. +- NovelAI and KoboldAI presets: https://github.com/KoboldAI/KoboldAI-Client/wiki/Settings-Presets +- Pygmalion preset, code for early stopping in chat mode, code for some of the sliders, --chat mode colors: https://github.com/PygmalionAI/gradio-ui/ diff --git a/text-generation-webui/api-example-stream.py b/text-generation-webui/api-example-stream.py new file mode 100644 index 0000000000000000000000000000000000000000..a5ed420252fdceab73cc26d83a7b87f60981ec95 --- /dev/null +++ b/text-generation-webui/api-example-stream.py @@ -0,0 +1,90 @@ +''' + +Contributed by SagsMug. Thank you SagsMug. +https://github.com/oobabooga/text-generation-webui/pull/175 + +''' + +import asyncio +import json +import random +import string + +import websockets + + +def random_hash(): + letters = string.ascii_lowercase + string.digits + return ''.join(random.choice(letters) for i in range(9)) + +async def run(context): + server = "127.0.0.1" + params = { + 'max_new_tokens': 200, + 'do_sample': True, + 'temperature': 0.5, + 'top_p': 0.9, + 'typical_p': 1, + 'repetition_penalty': 1.05, + 'top_k': 0, + 'min_length': 0, + 'no_repeat_ngram_size': 0, + 'num_beams': 1, + 'penalty_alpha': 0, + 'length_penalty': 1, + 'early_stopping': False, + } + session = random_hash() + + async with websockets.connect(f"ws://{server}:7860/queue/join") as websocket: + while content := json.loads(await websocket.recv()): + #Python3.10 syntax, replace with if elif on older + match content["msg"]: + case "send_hash": + await websocket.send(json.dumps({ + "session_hash": session, + "fn_index": 7 + })) + case "estimation": + pass + case "send_data": + await websocket.send(json.dumps({ + "session_hash": session, + "fn_index": 7, + "data": [ + context, + params['max_new_tokens'], + params['do_sample'], + params['temperature'], + params['top_p'], + params['typical_p'], + params['repetition_penalty'], + params['top_k'], + params['min_length'], + params['no_repeat_ngram_size'], + params['num_beams'], + params['penalty_alpha'], + params['length_penalty'], + params['early_stopping'], + ] + })) + case "process_starts": + pass + case "process_generating" | "process_completed": + yield content["output"]["data"][0] + # You can search for your desired end indicator and + # stop generation by closing the websocket here + if (content["msg"] == "process_completed"): + break + +prompt = "What I would like to say is the following: " + +async def get_result(): + async for response in run(prompt): + # Print intermediate steps + print(response) + + # Print final result + print(response) + +asyncio.run(get_result()) diff --git a/text-generation-webui/api-example.py b/text-generation-webui/api-example.py new file mode 100644 index 0000000000000000000000000000000000000000..0306b7ab8a3fa3d6f57d8474ad74d67f13557b6d --- /dev/null +++ b/text-generation-webui/api-example.py @@ -0,0 +1,59 @@ +''' + +This is an example on how to use the API for oobabooga/text-generation-webui. + +Make sure to start the web UI with the following flags: + +python server.py --model MODEL --listen --no-stream + +Optionally, you can also add the --share flag to generate a public gradio URL, +allowing you to use the API remotely. + +''' +import requests + +# Server address +server = "127.0.0.1" + +# Generation parameters +# Reference: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig +params = { + 'max_new_tokens': 200, + 'do_sample': True, + 'temperature': 0.5, + 'top_p': 0.9, + 'typical_p': 1, + 'repetition_penalty': 1.05, + 'top_k': 0, + 'min_length': 0, + 'no_repeat_ngram_size': 0, + 'num_beams': 1, + 'penalty_alpha': 0, + 'length_penalty': 1, + 'early_stopping': False, +} + +# Input prompt +prompt = "What I would like to say is the following: " + +response = requests.post(f"http://{server}:7860/run/textgen", json={ + "data": [ + prompt, + params['max_new_tokens'], + params['do_sample'], + params['temperature'], + params['top_p'], + params['typical_p'], + params['repetition_penalty'], + params['top_k'], + params['min_length'], + params['no_repeat_ngram_size'], + params['num_beams'], + params['penalty_alpha'], + params['length_penalty'], + params['early_stopping'], + ] +}).json() + +reply = response["data"][0] +print(reply) diff --git a/text-generation-webui/characters/Example.json b/text-generation-webui/characters/Example.json new file mode 100644 index 0000000000000000000000000000000000000000..496869c4e6cd643c910fbdf86d748c1c70987020 --- /dev/null +++ b/text-generation-webui/characters/Example.json @@ -0,0 +1,7 @@ +{ + "char_name": "Chiharu Yamada", + "char_persona": "Chiharu Yamada is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.", + "char_greeting": "*Chiharu strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air*\nHey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!", + "world_scenario": "", + "example_dialogue": "{{user}}: So how did you get into computer engineering?\n{{char}}: I've always loved tinkering with technology since I was a kid.\n{{user}}: That's really impressive!\n{{char}}: *She chuckles bashfully* Thanks!\n{{user}}: So what do you do when you're not working on computers?\n{{char}}: I love exploring, going out with friends, watching movies, and playing video games.\n{{user}}: What's your favorite type of computer hardware to work with?\n{{char}}: Motherboards, they're like puzzles and the backbone of any system.\n{{user}}: That sounds great!\n{{char}}: Yeah, it's really fun. I'm lucky to be able to do this as a job." +} diff --git a/text-generation-webui/characters/Example.png b/text-generation-webui/characters/Example.png new file mode 100644 index 0000000000000000000000000000000000000000..a7c4e513c4eaa05db1ebb2164956ea0b85d74a75 Binary files /dev/null and b/text-generation-webui/characters/Example.png differ diff --git a/text-generation-webui/convert-to-flexgen.py b/text-generation-webui/convert-to-flexgen.py new file mode 100644 index 0000000000000000000000000000000000000000..917f023c3fe395c2e3cbcad11c9cdc6b85ef1e7e --- /dev/null +++ b/text-generation-webui/convert-to-flexgen.py @@ -0,0 +1,60 @@ +''' + +Converts a transformers model to a format compatible with flexgen. + +''' + +import argparse +import os +from pathlib import Path + +import numpy as np +import torch +from tqdm import tqdm +from transformers import AutoModelForCausalLM, AutoTokenizer + +parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54)) +parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.") +args = parser.parse_args() + +def disable_torch_init(): + """ + Disable the redundant torch default initialization to accelerate model creation. + """ + import torch + global torch_linear_init_backup + global torch_layer_norm_init_backup + + torch_linear_init_backup = torch.nn.Linear.reset_parameters + setattr(torch.nn.Linear, "reset_parameters", lambda self: None) + + torch_layer_norm_init_backup = torch.nn.LayerNorm.reset_parameters + setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None) + +def restore_torch_init(): + """Rollback the change made by disable_torch_init.""" + import torch + setattr(torch.nn.Linear, "reset_parameters", torch_linear_init_backup) + setattr(torch.nn.LayerNorm, "reset_parameters", torch_layer_norm_init_backup) + +if __name__ == '__main__': + path = Path(args.MODEL) + model_name = path.name + + print(f"Loading {model_name}...") + #disable_torch_init() + model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16, low_cpu_mem_usage=True) + #restore_torch_init() + + tokenizer = AutoTokenizer.from_pretrained(path) + + out_folder = Path(f"models/{model_name}-np") + if not Path(out_folder).exists(): + os.mkdir(out_folder) + + print(f"Saving the converted model to {out_folder}...") + for name, param in tqdm(list(model.model.named_parameters())): + name = name.replace("decoder.final_layer_norm", "decoder.layer_norm") + param_path = os.path.join(out_folder, name) + with open(param_path, "wb") as f: + np.save(f, param.cpu().detach().numpy()) diff --git a/text-generation-webui/convert-to-safetensors.py b/text-generation-webui/convert-to-safetensors.py new file mode 100644 index 0000000000000000000000000000000000000000..63baaa9726ab48025d2ba473d029bb3f1153aa3a --- /dev/null +++ b/text-generation-webui/convert-to-safetensors.py @@ -0,0 +1,38 @@ +''' + +Converts a transformers model to safetensors format and shards it. + +This makes it faster to load (because of safetensors) and lowers its RAM usage +while loading (because of sharding). + +Based on the original script by 81300: + +https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303 + +''' + +import argparse +from pathlib import Path + +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54)) +parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.") +parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).') +parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).") +parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.') +args = parser.parse_args() + +if __name__ == '__main__': + path = Path(args.MODEL) + model_name = path.name + + print(f"Loading {model_name}...") + model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16) + tokenizer = AutoTokenizer.from_pretrained(path) + + out_folder = args.output or Path(f"models/{model_name}_safetensors") + print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...") + model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True) + tokenizer.save_pretrained(out_folder) diff --git a/text-generation-webui/download-model.py b/text-generation-webui/download-model.py new file mode 100644 index 0000000000000000000000000000000000000000..8be398c4e0d3ca0c0a915efb442f432fc2056834 --- /dev/null +++ b/text-generation-webui/download-model.py @@ -0,0 +1,176 @@ +''' +Downloads models from Hugging Face to models/model-name. + +Example: +python download-model.py facebook/opt-1.3b + +''' + +import argparse +import base64 +import json +import multiprocessing +import re +import sys +from pathlib import Path + +import requests +import tqdm + +parser = argparse.ArgumentParser() +parser.add_argument('MODEL', type=str, default=None, nargs='?') +parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.') +parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.') +parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).') +args = parser.parse_args() + +def get_file(args): + url = args[0] + output_folder = args[1] + idx = args[2] + tot = args[3] + + print(f"Downloading file {idx} of {tot}...") + r = requests.get(url, stream=True) + with open(output_folder / Path(url.split('/')[-1]), 'wb') as f: + total_size = int(r.headers.get('content-length', 0)) + block_size = 1024 + t = tqdm.tqdm(total=total_size, unit='iB', unit_scale=True) + for data in r.iter_content(block_size): + t.update(len(data)) + f.write(data) + t.close() + +def sanitize_branch_name(branch_name): + pattern = re.compile(r"^[a-zA-Z0-9._-]+$") + if pattern.match(branch_name): + return branch_name + else: + raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.") + +def select_model_from_default_options(): + models = { + "Pygmalion 6B original": ("PygmalionAI", "pygmalion-6b", "b8344bb4eb76a437797ad3b19420a13922aaabe1"), + "Pygmalion 6B main": ("PygmalionAI", "pygmalion-6b", "main"), + "Pygmalion 6B dev": ("PygmalionAI", "pygmalion-6b", "dev"), + "Pygmalion 2.7B": ("PygmalionAI", "pygmalion-2.7b", "main"), + "Pygmalion 1.3B": ("PygmalionAI", "pygmalion-1.3b", "main"), + "Pygmalion 350m": ("PygmalionAI", "pygmalion-350m", "main"), + "OPT 6.7b": ("facebook", "opt-6.7b", "main"), + "OPT 2.7b": ("facebook", "opt-2.7b", "main"), + "OPT 1.3b": ("facebook", "opt-1.3b", "main"), + "OPT 350m": ("facebook", "opt-350m", "main"), + } + choices = {} + + print("Select the model that you want to download:\n") + for i,name in enumerate(models): + char = chr(ord('A')+i) + choices[char] = name + print(f"{char}) {name}") + char = chr(ord('A')+len(models)) + print(f"{char}) None of the above") + + print() + print("Input> ", end='') + choice = input()[0].strip().upper() + if choice == char: + print("""\nThen type the name of your desired Hugging Face model in the format organization/name. + +Examples: +PygmalionAI/pygmalion-6b +facebook/opt-1.3b +""") + + print("Input> ", end='') + model = input() + branch = "main" + else: + arr = models[choices[choice]] + model = f"{arr[0]}/{arr[1]}" + branch = arr[2] + + return model, branch + +def get_download_links_from_huggingface(model, branch): + base = "https://huggingface.co" + page = f"/api/models/{model}/tree/{branch}?cursor=" + cursor = b"" + + links = [] + classifications = [] + has_pytorch = False + has_safetensors = False + while True: + content = requests.get(f"{base}{page}{cursor.decode()}").content + + dict = json.loads(content) + if len(dict) == 0: + break + + for i in range(len(dict)): + fname = dict[i]['path'] + + is_pytorch = re.match("pytorch_model.*\.bin", fname) + is_safetensors = re.match("model.*\.safetensors", fname) + is_tokenizer = re.match("tokenizer.*\.model", fname) + is_text = re.match(".*\.(txt|json)", fname) or is_tokenizer + + if any((is_pytorch, is_safetensors, is_text, is_tokenizer)): + if is_text: + links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}") + classifications.append('text') + continue + if not args.text_only: + links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}") + if is_safetensors: + has_safetensors = True + classifications.append('safetensors') + elif is_pytorch: + has_pytorch = True + classifications.append('pytorch') + + cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50' + cursor = base64.b64encode(cursor) + cursor = cursor.replace(b'=', b'%3D') + + # If both pytorch and safetensors are available, download safetensors only + if has_pytorch and has_safetensors: + for i in range(len(classifications)-1, -1, -1): + if classifications[i] == 'pytorch': + links.pop(i) + + return links + +if __name__ == '__main__': + model = args.MODEL + branch = args.branch + if model is None: + model, branch = select_model_from_default_options() + else: + if model[-1] == '/': + model = model[:-1] + branch = args.branch + if branch is None: + branch = "main" + else: + try: + branch = sanitize_branch_name(branch) + except ValueError as err_branch: + print(f"Error: {err_branch}") + sys.exit() + if branch != 'main': + output_folder = Path("models") / (model.split('/')[-1] + f'_{branch}') + else: + output_folder = Path("models") / model.split('/')[-1] + if not output_folder.exists(): + output_folder.mkdir() + + links = get_download_links_from_huggingface(model, branch) + + # Downloading the files + print(f"Downloading the model to {output_folder}") + pool = multiprocessing.Pool(processes=args.threads) + results = pool.map(get_file, [[links[i], output_folder, i+1, len(links)] for i in range(len(links))]) + pool.close() + pool.join() diff --git a/text-generation-webui/extensions/character_bias/script.py b/text-generation-webui/extensions/character_bias/script.py new file mode 100644 index 0000000000000000000000000000000000000000..35b38c0edcb38512f2472937578a363343a4468c --- /dev/null +++ b/text-generation-webui/extensions/character_bias/script.py @@ -0,0 +1,42 @@ +import gradio as gr + +params = { + "activate": True, + "bias string": " *I am so happy*", +} + +def input_modifier(string): + """ + This function is applied to your text inputs before + they are fed into the model. + """ + + return string + +def output_modifier(string): + """ + This function is applied to the model outputs. + """ + + return string + +def bot_prefix_modifier(string): + """ + This function is only applied in chat mode. It modifies + the prefix text for the Bot and can be used to bias its + behavior. + """ + + if params['activate'] == True: + return f'{string} {params["bias string"].strip()} ' + else: + return string + +def ui(): + # Gradio elements + activate = gr.Checkbox(value=params['activate'], label='Activate character bias') + string = gr.Textbox(value=params["bias string"], label='Character bias') + + # Event functions to update the parameters in the backend + string.change(lambda x: params.update({"bias string": x}), string, None) + activate.change(lambda x: params.update({"activate": x}), activate, None) diff --git a/text-generation-webui/extensions/elevenlabs_tts/requirements.txt b/text-generation-webui/extensions/elevenlabs_tts/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8ec07a8a7fcf02ca48cc00520e66fcb58c447393 --- /dev/null +++ b/text-generation-webui/extensions/elevenlabs_tts/requirements.txt @@ -0,0 +1,3 @@ +elevenlabslib +soundfile +sounddevice diff --git a/text-generation-webui/extensions/elevenlabs_tts/script.py b/text-generation-webui/extensions/elevenlabs_tts/script.py new file mode 100644 index 0000000000000000000000000000000000000000..90d61efc6aa77bc2377c435eefe4cf623b588168 --- /dev/null +++ b/text-generation-webui/extensions/elevenlabs_tts/script.py @@ -0,0 +1,113 @@ +from pathlib import Path + +import gradio as gr +from elevenlabslib import * +from elevenlabslib.helpers import * + +params = { + 'activate': True, + 'api_key': '12345', + 'selected_voice': 'None', +} + +initial_voice = ['None'] +wav_idx = 0 +user = ElevenLabsUser(params['api_key']) +user_info = None + + +# Check if the API is valid and refresh the UI accordingly. +def check_valid_api(): + + global user, user_info, params + + user = ElevenLabsUser(params['api_key']) + user_info = user._get_subscription_data() + print('checking api') + if params['activate'] == False: + return gr.update(value='Disconnected') + elif user_info is None: + print('Incorrect API Key') + return gr.update(value='Disconnected') + else: + print('Got an API Key!') + return gr.update(value='Connected') + +# Once the API is verified, get the available voices and update the dropdown list +def refresh_voices(): + + global user, user_info + + your_voices = [None] + if user_info is not None: + for voice in user.get_available_voices(): + your_voices.append(voice.initialName) + return gr.Dropdown.update(choices=your_voices) + else: + return + +def remove_surrounded_chars(string): + new_string = "" + in_star = False + for char in string: + if char == '*': + in_star = not in_star + elif not in_star: + new_string += char + return new_string + +def input_modifier(string): + """ + This function is applied to your text inputs before + they are fed into the model. + """ + + return string + +def output_modifier(string): + """ + This function is applied to the model outputs. + """ + + global params, wav_idx, user, user_info + + if params['activate'] == False: + return string + elif user_info == None: + return string + + string = remove_surrounded_chars(string) + string = string.replace('"', '') + string = string.replace('“', '') + string = string.replace('\n', ' ') + string = string.strip() + + if string == '': + string = 'empty reply, try regenerating' + + output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.wav'.format(wav_idx)) + voice = user.get_voices_by_name(params['selected_voice'])[0] + audio_data = voice.generate_audio_bytes(string) + save_bytes_to_path(Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.wav'), audio_data) + + string = f'' + wav_idx += 1 + return string + +def ui(): + + # Gradio elements + with gr.Row(): + activate = gr.Checkbox(value=params['activate'], label='Activate TTS') + connection_status = gr.Textbox(value='Disconnected', label='Connection Status') + voice = gr.Dropdown(value=params['selected_voice'], choices=initial_voice, label='TTS Voice') + with gr.Row(): + api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key') + connect = gr.Button(value='Connect') + + # Event functions to update the parameters in the backend + activate.change(lambda x: params.update({'activate': x}), activate, None) + voice.change(lambda x: params.update({'selected_voice': x}), voice, None) + api_key.change(lambda x: params.update({'api_key': x}), api_key, None) + connect.click(check_valid_api, [], connection_status) + connect.click(refresh_voices, [], voice) diff --git a/text-generation-webui/extensions/gallery/script.py b/text-generation-webui/extensions/gallery/script.py new file mode 100644 index 0000000000000000000000000000000000000000..8a2d7cf988734a7ab0966d047ff3d31ba58324b7 --- /dev/null +++ b/text-generation-webui/extensions/gallery/script.py @@ -0,0 +1,82 @@ +from pathlib import Path + +import gradio as gr + +from modules.html_generator import get_image_cache + + +def generate_html(): + css = """ + .character-gallery { + margin: 1rem 0; + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + grid-column-gap: 0.4rem; + grid-row-gap: 1.2rem; + } + + .character-container { + cursor: pointer; + text-align: center; + position: relative; + opacity: 0.85; + } + + .character-container:hover { + opacity: 1; + } + + .character-container .placeholder, .character-container img { + width: 150px; + height: 200px; + background-color: gray; + object-fit: cover; + margin: 0 auto; + border-radius: 1rem; + border: 3px solid white; + box-shadow: 3px 3px 6px 0px rgb(0 0 0 / 50%); + } + + .character-name { + margin-top: 0.3rem; + display: block; + font-size: 1.2rem; + font-weight: 600; + overflow-wrap: anywhere; + } + """ + + container_html = f'" + return container_html + +def ui(): + with gr.Accordion("Character gallery"): + update = gr.Button("Refresh") + gallery = gr.HTML(value=generate_html()) + update.click(generate_html, [], gallery) diff --git a/text-generation-webui/extensions/google_translate/requirements.txt b/text-generation-webui/extensions/google_translate/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..554a00df62818f96ba7d396ae39d8e58efbe9bfe --- /dev/null +++ b/text-generation-webui/extensions/google_translate/requirements.txt @@ -0,0 +1 @@ +deep-translator==1.9.2 diff --git a/text-generation-webui/extensions/google_translate/script.py b/text-generation-webui/extensions/google_translate/script.py new file mode 100644 index 0000000000000000000000000000000000000000..68bc54b293086bed1a070a310d276060ee939d44 --- /dev/null +++ b/text-generation-webui/extensions/google_translate/script.py @@ -0,0 +1,42 @@ +import gradio as gr +from deep_translator import GoogleTranslator + +params = { + "language string": "ja", +} + +language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'} + +def input_modifier(string): + """ + This function is applied to your text inputs before + they are fed into the model. + """ + + return GoogleTranslator(source=params['language string'], target='en').translate(string) + +def output_modifier(string): + """ + This function is applied to the model outputs. + """ + + return GoogleTranslator(source='en', target=params['language string']).translate(string) + +def bot_prefix_modifier(string): + """ + This function is only applied in chat mode. It modifies + the prefix text for the Bot and can be used to bias its + behavior. + """ + + return string + +def ui(): + # Finding the language name from the language code to use as the default value + language_name = list(language_codes.keys())[list(language_codes.values()).index(params['language string'])] + + # Gradio elements + language = gr.Dropdown(value=language_name, choices=[k for k in language_codes], label='Language') + + # Event functions to update the parameters in the backend + language.change(lambda x: params.update({"language string": language_codes[x]}), language, None) diff --git a/text-generation-webui/extensions/llama_prompts/script.py b/text-generation-webui/extensions/llama_prompts/script.py new file mode 100644 index 0000000000000000000000000000000000000000..22c96f7c2d6763213a728d77ee6666496d9c4aa3 --- /dev/null +++ b/text-generation-webui/extensions/llama_prompts/script.py @@ -0,0 +1,18 @@ +import gradio as gr +import modules.shared as shared +import pandas as pd + +df = pd.read_csv("https://raw.githubusercontent.com/devbrones/llama-prompts/main/prompts/prompts.csv") + +def get_prompt_by_name(name): + if name == 'None': + return '' + else: + return df[df['Prompt name'] == name].iloc[0]['Prompt'].replace('\\n', '\n') + +def ui(): + if not shared.args.chat or shared.args.cai_chat: + choices = ['None'] + list(df['Prompt name']) + + prompts_menu = gr.Dropdown(value=choices[0], choices=choices, label='Prompt') + prompts_menu.change(get_prompt_by_name, prompts_menu, shared.gradio['textbox']) diff --git a/text-generation-webui/extensions/send_pictures/script.py b/text-generation-webui/extensions/send_pictures/script.py new file mode 100644 index 0000000000000000000000000000000000000000..b0c356329a51edf026f7223a0ee7e5427d8751ce --- /dev/null +++ b/text-generation-webui/extensions/send_pictures/script.py @@ -0,0 +1,46 @@ +import base64 +from io import BytesIO + +import gradio as gr +import torch +from transformers import BlipForConditionalGeneration, BlipProcessor + +import modules.chat as chat +import modules.shared as shared + +# If 'state' is True, will hijack the next chat generation with +# custom input text given by 'value' in the format [text, visible_text] +input_hijack = { + 'state': False, + 'value': ["", ""] +} + +processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") +model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float32).to("cpu") + +def caption_image(raw_image): + inputs = processor(raw_image.convert('RGB'), return_tensors="pt").to("cpu", torch.float32) + out = model.generate(**inputs, max_new_tokens=100) + return processor.decode(out[0], skip_special_tokens=True) + +def generate_chat_picture(picture, name1, name2): + text = f'*{name1} sends {name2} a picture that contains the following: "{caption_image(picture)}"*' + buffer = BytesIO() + picture.save(buffer, format="JPEG") + img_str = base64.b64encode(buffer.getvalue()).decode('utf-8') + visible_text = f'' + return text, visible_text + +def ui(): + picture_select = gr.Image(label='Send a picture', type='pil') + + function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper' + + # Prepare the hijack with custom inputs + picture_select.upload(lambda picture, name1, name2: input_hijack.update({"state": True, "value": generate_chat_picture(picture, name1, name2)}), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None) + + # Call the generation function + picture_select.upload(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream) + + # Clear the picture from the upload field + picture_select.upload(lambda : None, [], [picture_select], show_progress=False) diff --git a/text-generation-webui/extensions/silero_tts/requirements.txt b/text-generation-webui/extensions/silero_tts/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f2f0bff55a862de8e643496d90c01713785801a2 --- /dev/null +++ b/text-generation-webui/extensions/silero_tts/requirements.txt @@ -0,0 +1,6 @@ +ipython +omegaconf +pydub +PyYAML +torch +torchaudio diff --git a/text-generation-webui/extensions/silero_tts/script.py b/text-generation-webui/extensions/silero_tts/script.py new file mode 100644 index 0000000000000000000000000000000000000000..f611dc27b7480cd357b77c0c407fcc2bd6df2679 --- /dev/null +++ b/text-generation-webui/extensions/silero_tts/script.py @@ -0,0 +1,169 @@ +import time +from pathlib import Path + +import gradio as gr +import torch + +import modules.chat as chat +import modules.shared as shared + +torch._C._jit_set_profiling_mode(False) + +params = { + 'activate': True, + 'speaker': 'en_56', + 'language': 'en', + 'model_id': 'v3_en', + 'sample_rate': 48000, + 'device': 'cpu', + 'show_text': False, + 'autoplay': True, + 'voice_pitch': 'medium', + 'voice_speed': 'medium', +} + +current_params = params.copy() +voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115'] +voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high'] +voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast'] + +# Used for making text xml compatible, needed for voice pitch and speed control +table = str.maketrans({ + "<": "<", + ">": ">", + "&": "&", + "'": "'", + '"': """, +}) + +def xmlesc(txt): + return txt.translate(table) + +def load_model(): + model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id']) + model.to(params['device']) + return model +model = load_model() + +def remove_surrounded_chars(string): + new_string = "" + in_star = False + for char in string: + if char == '*': + in_star = not in_star + elif not in_star: + new_string += char + return new_string + +def remove_tts_from_history(name1, name2): + for i, entry in enumerate(shared.history['internal']): + shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]] + return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character) + +def toggle_text_in_history(name1, name2): + for i, entry in enumerate(shared.history['visible']): + visible_reply = entry[1] + if visible_reply.startswith('')[0]}\n\n{reply}"] + else: + shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('')[0]}"] + return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character) + +def input_modifier(string): + """ + This function is applied to your text inputs before + they are fed into the model. + """ + + # Remove autoplay from the last reply + if (shared.args.chat or shared.args.cai_chat) and len(shared.history['internal']) > 0: + shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')] + + shared.processing_message = "*Is recording a voice message...*" + return string + +def output_modifier(string): + """ + This function is applied to the model outputs. + """ + + global model, current_params + + for i in params: + if params[i] != current_params[i]: + model = load_model() + current_params = params.copy() + break + + if params['activate'] == False: + return string + + original_string = string + string = remove_surrounded_chars(string) + string = string.replace('"', '') + string = string.replace('“', '') + string = string.replace('\n', ' ') + string = string.strip() + + if string == '': + string = '*Empty reply, try regenerating*' + else: + output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav') + prosody = ''.format(params['voice_speed'], params['voice_pitch']) + silero_input = f'{prosody}{xmlesc(string)}' + model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file)) + + autoplay = 'autoplay' if params['autoplay'] else '' + string = f'' + if params['show_text']: + string += f'\n\n{original_string}' + + shared.processing_message = "*Is typing...*" + return string + +def bot_prefix_modifier(string): + """ + This function is only applied in chat mode. It modifies + the prefix text for the Bot and can be used to bias its + behavior. + """ + + return string + +def ui(): + # Gradio elements + with gr.Accordion("Silero TTS"): + with gr.Row(): + activate = gr.Checkbox(value=params['activate'], label='Activate TTS') + autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically') + show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player') + voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice') + with gr.Row(): + v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch') + v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed') + with gr.Row(): + convert = gr.Button('Permanently replace audios with the message texts') + convert_cancel = gr.Button('Cancel', visible=False) + convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False) + + # Convert history with confirmation + convert_arr = [convert_confirm, convert, convert_cancel] + convert.click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr) + convert_confirm.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) + convert_confirm.click(remove_tts_from_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display']) + convert_confirm.click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False) + convert_cancel.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) + + # Toggle message text in history + show_text.change(lambda x: params.update({"show_text": x}), show_text, None) + show_text.change(toggle_text_in_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display']) + show_text.change(lambda : chat.save_history(timestamp=False), [], [], show_progress=False) + + # Event functions to update the parameters in the backend + activate.change(lambda x: params.update({"activate": x}), activate, None) + autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None) + voice.change(lambda x: params.update({"speaker": x}), voice, None) + v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None) + v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None) diff --git a/text-generation-webui/models/place-your-models-here.txt b/text-generation-webui/models/place-your-models-here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/text-generation-webui/modules/GPTQ_loader.py b/text-generation-webui/modules/GPTQ_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..c2723490bbe214e351634ca4054f74a0b5334b28 --- /dev/null +++ b/text-generation-webui/modules/GPTQ_loader.py @@ -0,0 +1,71 @@ +import sys +from pathlib import Path + +import accelerate +import torch + +import modules.shared as shared + +sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa"))) +import llama +import opt + + +def load_quantized(model_name): + if not shared.args.gptq_model_type: + # Try to determine model type from model name + model_type = model_name.split('-')[0].lower() + if model_type not in ('llama', 'opt'): + print("Can't determine model type from model name. Please specify it manually using --gptq-model-type " + "argument") + exit() + else: + model_type = shared.args.gptq_model_type.lower() + + if model_type == 'llama': + load_quant = llama.load_quant + elif model_type == 'opt': + load_quant = opt.load_quant + else: + print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported") + exit() + + path_to_model = Path(f'models/{model_name}') + if path_to_model.name.lower().startswith('llama-7b'): + pt_model = f'llama-7b-{shared.args.gptq_bits}bit.pt' + elif path_to_model.name.lower().startswith('llama-13b'): + pt_model = f'llama-13b-{shared.args.gptq_bits}bit.pt' + elif path_to_model.name.lower().startswith('llama-30b'): + pt_model = f'llama-30b-{shared.args.gptq_bits}bit.pt' + elif path_to_model.name.lower().startswith('llama-65b'): + pt_model = f'llama-65b-{shared.args.gptq_bits}bit.pt' + else: + pt_model = f'{model_name}-{shared.args.gptq_bits}bit.pt' + + # Try to find the .pt both in models/ and in the subfolder + pt_path = None + for path in [Path(p) for p in [f"models/{pt_model}", f"{path_to_model}/{pt_model}"]]: + if path.exists(): + pt_path = path + + if not pt_path: + print(f"Could not find {pt_model}, exiting...") + exit() + + model = load_quant(str(path_to_model), str(pt_path), shared.args.gptq_bits) + + # Multiple GPUs or GPU+CPU + if shared.args.gpu_memory: + max_memory = {} + for i in range(len(shared.args.gpu_memory)): + max_memory[i] = f"{shared.args.gpu_memory[i]}GiB" + max_memory['cpu'] = f"{shared.args.cpu_memory or '99'}GiB" + + device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=["LLaMADecoderLayer"]) + model = accelerate.dispatch_model(model, device_map=device_map) + + # Single GPU + else: + model = model.to(torch.device('cuda:0')) + + return model diff --git a/text-generation-webui/modules/RWKV.py b/text-generation-webui/modules/RWKV.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf8937ad37944c0cebeeb8e0891bec1474724ea --- /dev/null +++ b/text-generation-webui/modules/RWKV.py @@ -0,0 +1,74 @@ +import os +from pathlib import Path + +import numpy as np +from tokenizers import Tokenizer + +import modules.shared as shared +from modules.callbacks import Iteratorize + +np.set_printoptions(precision=4, suppress=True, linewidth=200) + +os.environ['RWKV_JIT_ON'] = '1' +os.environ["RWKV_CUDA_ON"] = '1' if shared.args.rwkv_cuda_on else '0' # use CUDA kernel for seq mode (much faster) + +from rwkv.model import RWKV +from rwkv.utils import PIPELINE, PIPELINE_ARGS + + +class RWKVModel: + def __init__(self): + pass + + @classmethod + def from_pretrained(self, path, dtype="fp16", device="cuda"): + tokenizer_path = Path(f"{path.parent}/20B_tokenizer.json") + + if shared.args.rwkv_strategy is None: + model = RWKV(model=str(path), strategy=f'{device} {dtype}') + else: + model = RWKV(model=str(path), strategy=shared.args.rwkv_strategy) + pipeline = PIPELINE(model, str(tokenizer_path)) + + result = self() + result.pipeline = pipeline + return result + + def generate(self, context="", token_count=20, temperature=1, top_p=1, top_k=50, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None): + args = PIPELINE_ARGS( + temperature = temperature, + top_p = top_p, + top_k = top_k, + alpha_frequency = alpha_frequency, # Frequency Penalty (as in GPT-3) + alpha_presence = alpha_presence, # Presence Penalty (as in GPT-3) + token_ban = token_ban, # ban the generation of some tokens + token_stop = token_stop + ) + + return context+self.pipeline.generate(context, token_count=token_count, args=args, callback=callback) + + def generate_with_streaming(self, **kwargs): + with Iteratorize(self.generate, kwargs, callback=None) as generator: + reply = kwargs['context'] + for token in generator: + reply += token + yield reply + +class RWKVTokenizer: + def __init__(self): + pass + + @classmethod + def from_pretrained(self, path): + tokenizer_path = path / "20B_tokenizer.json" + tokenizer = Tokenizer.from_file(str(tokenizer_path)) + + result = self() + result.tokenizer = tokenizer + return result + + def encode(self, prompt): + return self.tokenizer.encode(prompt).ids + + def decode(self, ids): + return self.tokenizer.decode(ids) diff --git a/text-generation-webui/modules/callbacks.py b/text-generation-webui/modules/callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..faa4a5e9991e1ae711589fed61e7d1f48e28fed3 --- /dev/null +++ b/text-generation-webui/modules/callbacks.py @@ -0,0 +1,98 @@ +import gc +from queue import Queue +from threading import Thread + +import torch +import transformers + +import modules.shared as shared + +# Copied from https://github.com/PygmalionAI/gradio-ui/ +class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria): + + def __init__(self, sentinel_token_ids: torch.LongTensor, + starting_idx: int): + transformers.StoppingCriteria.__init__(self) + self.sentinel_token_ids = sentinel_token_ids + self.starting_idx = starting_idx + + def __call__(self, input_ids: torch.LongTensor, + _scores: torch.FloatTensor) -> bool: + for sample in input_ids: + trimmed_sample = sample[self.starting_idx:] + # Can't unfold, output is still too tiny. Skip. + if trimmed_sample.shape[-1] < self.sentinel_token_ids.shape[-1]: + continue + + for window in trimmed_sample.unfold( + 0, self.sentinel_token_ids.shape[-1], 1): + if torch.all(torch.eq(self.sentinel_token_ids, window)): + return True + return False + +class Stream(transformers.StoppingCriteria): + def __init__(self, callback_func=None): + self.callback_func = callback_func + + def __call__(self, input_ids, scores) -> bool: + if self.callback_func is not None: + self.callback_func(input_ids[0]) + return False + +class Iteratorize: + + """ + Transforms a function that takes a callback + into a lazy iterator (generator). + """ + + def __init__(self, func, kwargs={}, callback=None): + self.mfunc=func + self.c_callback=callback + self.q = Queue() + self.sentinel = object() + self.kwargs = kwargs + self.stop_now = False + + def _callback(val): + if self.stop_now: + raise ValueError + self.q.put(val) + + def gentask(): + try: + ret = self.mfunc(callback=_callback, **self.kwargs) + except ValueError: + pass + clear_torch_cache() + self.q.put(self.sentinel) + if self.c_callback: + self.c_callback(ret) + + self.thread = Thread(target=gentask) + self.thread.start() + + def __iter__(self): + return self + + def __next__(self): + obj = self.q.get(True,None) + if obj is self.sentinel: + raise StopIteration + else: + return obj + + def __del__(self): + clear_torch_cache() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop_now = True + clear_torch_cache() + +def clear_torch_cache(): + gc.collect() + if not shared.args.cpu: + torch.cuda.empty_cache() diff --git a/text-generation-webui/modules/chat.py b/text-generation-webui/modules/chat.py new file mode 100644 index 0000000000000000000000000000000000000000..bd45b879f92f366255c6f2308ccf135dd61bda1d --- /dev/null +++ b/text-generation-webui/modules/chat.py @@ -0,0 +1,398 @@ +import base64 +import copy +import io +import json +import re +from datetime import datetime +from pathlib import Path + +from PIL import Image + +import modules.extensions as extensions_module +import modules.shared as shared +from modules.extensions import apply_extensions +from modules.html_generator import generate_chat_html +from modules.text_generation import encode, generate_reply, get_max_prompt_length + + +# This gets the new line characters right. +def clean_chat_message(text): + text = text.replace('\n', '\n\n') + text = re.sub(r"\n{3,}", "\n\n", text) + text = text.strip() + return text + +def generate_chat_output(history, name1, name2, character): + if shared.args.cai_chat: + return generate_chat_html(history, name1, name2, character) + else: + return history + +def generate_chat_prompt(user_input, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=False): + user_input = clean_chat_message(user_input) + rows = [f"{context.strip()}\n"] + + if shared.soft_prompt: + chat_prompt_size -= shared.soft_prompt_tensor.shape[1] + max_length = min(get_max_prompt_length(max_new_tokens), chat_prompt_size) + + i = len(shared.history['internal'])-1 + while i >= 0 and len(encode(''.join(rows), max_new_tokens)[0]) < max_length: + rows.insert(1, f"{name2}: {shared.history['internal'][i][1].strip()}\n") + if not (shared.history['internal'][i][0] == '<|BEGIN-VISIBLE-CHAT|>'): + rows.insert(1, f"{name1}: {shared.history['internal'][i][0].strip()}\n") + i -= 1 + + if not impersonate: + rows.append(f"{name1}: {user_input}\n") + rows.append(apply_extensions(f"{name2}:", "bot_prefix")) + limit = 3 + else: + rows.append(f"{name1}:") + limit = 2 + + while len(rows) > limit and len(encode(''.join(rows), max_new_tokens)[0]) >= max_length: + rows.pop(1) + + prompt = ''.join(rows) + return prompt + +def extract_message_from_reply(question, reply, name1, name2, check, impersonate=False): + next_character_found = False + + asker = name1 if not impersonate else name2 + replier = name2 if not impersonate else name1 + + previous_idx = [m.start() for m in re.finditer(f"(^|\n){re.escape(replier)}:", question)] + idx = [m.start() for m in re.finditer(f"(^|\n){re.escape(replier)}:", reply)] + idx = idx[max(len(previous_idx)-1, 0)] + + if not impersonate: + reply = reply[idx + 1 + len(apply_extensions(f"{replier}:", "bot_prefix")):] + else: + reply = reply[idx + 1 + len(f"{replier}:"):] + + if check: + lines = reply.split('\n') + reply = lines[0].strip() + if len(lines) > 1: + next_character_found = True + else: + idx = reply.find(f"\n{asker}:") + if idx != -1: + reply = reply[:idx] + next_character_found = True + reply = clean_chat_message(reply) + + # If something like "\nYo" is generated just before "\nYou:" + # is completed, trim it + next_turn = f"\n{asker}:" + for j in range(len(next_turn)-1, 0, -1): + if reply[-j:] == next_turn[:j]: + reply = reply[:-j] + break + + return reply, next_character_found + +def stop_everything_event(): + shared.stop_everything = True + +def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False): + shared.stop_everything = False + just_started = True + eos_token = '\n' if check else None + name1_original = name1 + if 'pygmalion' in shared.model_name.lower(): + name1 = "You" + + # Check if any extension wants to hijack this function call + visible_text = None + custom_generate_chat_prompt = None + for extension, _ in extensions_module.iterator(): + if hasattr(extension, 'input_hijack') and extension.input_hijack['state'] == True: + extension.input_hijack['state'] = False + text, visible_text = extension.input_hijack['value'] + if custom_generate_chat_prompt is None and hasattr(extension, 'custom_generate_chat_prompt'): + custom_generate_chat_prompt = extension.custom_generate_chat_prompt + + if visible_text is None: + visible_text = text + if shared.args.chat: + visible_text = visible_text.replace('\n', '
') + text = apply_extensions(text, "input") + + if custom_generate_chat_prompt is None: + prompt = generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size) + else: + prompt = custom_generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size) + + # Yield *Is typing...* + if not regenerate: + yield shared.history['visible']+[[visible_text, shared.processing_message]] + + # Generate + reply = '' + for i in range(chat_generation_attempts): + for reply in generate_reply(f"{prompt}{' ' if len(reply) > 0 else ''}{reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name1}:"): + + # Extracting the reply + reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check) + visible_reply = re.sub("(||{{user}})", name1_original, reply) + visible_reply = apply_extensions(visible_reply, "output") + if shared.args.chat: + visible_reply = visible_reply.replace('\n', '
') + + # We need this global variable to handle the Stop event, + # otherwise gradio gets confused + if shared.stop_everything: + return shared.history['visible'] + if just_started: + just_started = False + shared.history['internal'].append(['', '']) + shared.history['visible'].append(['', '']) + + shared.history['internal'][-1] = [text, reply] + shared.history['visible'][-1] = [visible_text, visible_reply] + if not shared.args.no_stream: + yield shared.history['visible'] + if next_character_found: + break + + yield shared.history['visible'] + +def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1): + eos_token = '\n' if check else None + + if 'pygmalion' in shared.model_name.lower(): + name1 = "You" + + prompt = generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=True) + + reply = '' + # Yield *Is typing...* + yield shared.processing_message + for i in range(chat_generation_attempts): + for reply in generate_reply(prompt+reply, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name2}:"): + reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check, impersonate=True) + yield reply + if next_character_found: + break + yield reply + +def cai_chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1): + for _history in chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts): + yield generate_chat_html(_history, name1, name2, shared.character) + +def regenerate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1): + if (shared.character != 'None' and len(shared.history['visible']) == 1) or len(shared.history['internal']) == 0: + yield generate_chat_output(shared.history['visible'], name1, name2, shared.character) + else: + last_visible = shared.history['visible'].pop() + last_internal = shared.history['internal'].pop() + # Yield '*Is typing...*' + yield generate_chat_output(shared.history['visible']+[[last_visible[0], shared.processing_message]], name1, name2, shared.character) + for _history in chatbot_wrapper(last_internal[0], max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts, regenerate=True): + if shared.args.cai_chat: + shared.history['visible'][-1] = [last_visible[0], _history[-1][1]] + else: + shared.history['visible'][-1] = (last_visible[0], _history[-1][1]) + yield generate_chat_output(shared.history['visible'], name1, name2, shared.character) + +def remove_last_message(name1, name2): + if len(shared.history['visible']) > 0 and not shared.history['internal'][-1][0] == '<|BEGIN-VISIBLE-CHAT|>': + last = shared.history['visible'].pop() + shared.history['internal'].pop() + else: + last = ['', ''] + + if shared.args.cai_chat: + return generate_chat_html(shared.history['visible'], name1, name2, shared.character), last[0] + else: + return shared.history['visible'], last[0] + +def send_last_reply_to_input(): + if len(shared.history['internal']) > 0: + return shared.history['internal'][-1][1] + else: + return '' + +def replace_last_reply(text, name1, name2): + if len(shared.history['visible']) > 0: + if shared.args.cai_chat: + shared.history['visible'][-1][1] = text + else: + shared.history['visible'][-1] = (shared.history['visible'][-1][0], text) + shared.history['internal'][-1][1] = apply_extensions(text, "input") + + return generate_chat_output(shared.history['visible'], name1, name2, shared.character) + +def clear_html(): + return generate_chat_html([], "", "", shared.character) + +def clear_chat_log(name1, name2): + if shared.character != 'None': + found = False + for i in range(len(shared.history['internal'])): + if '<|BEGIN-VISIBLE-CHAT|>' in shared.history['internal'][i][0]: + shared.history['visible'] = [['', apply_extensions(shared.history['internal'][i][1], "output")]] + shared.history['internal'] = [shared.history['internal'][i]] + found = True + break + if not found: + shared.history['visible'] = [] + shared.history['internal'] = [] + else: + shared.history['internal'] = [] + shared.history['visible'] = [] + + return generate_chat_output(shared.history['visible'], name1, name2, shared.character) + +def redraw_html(name1, name2): + return generate_chat_html(shared.history['visible'], name1, name2, shared.character) + +def tokenize_dialogue(dialogue, name1, name2): + _history = [] + + dialogue = re.sub('', '', dialogue) + dialogue = re.sub('', '', dialogue) + dialogue = re.sub('(\n|^)[Aa]non:', '\\1You:', dialogue) + dialogue = re.sub('(\n|^)\[CHARACTER\]:', f'\\g<1>{name2}:', dialogue) + idx = [m.start() for m in re.finditer(f"(^|\n)({re.escape(name1)}|{re.escape(name2)}):", dialogue)] + if len(idx) == 0: + return _history + + messages = [] + for i in range(len(idx)-1): + messages.append(dialogue[idx[i]:idx[i+1]].strip()) + messages.append(dialogue[idx[-1]:].strip()) + + entry = ['', ''] + for i in messages: + if i.startswith(f'{name1}:'): + entry[0] = i[len(f'{name1}:'):].strip() + elif i.startswith(f'{name2}:'): + entry[1] = i[len(f'{name2}:'):].strip() + if not (len(entry[0]) == 0 and len(entry[1]) == 0): + _history.append(entry) + entry = ['', ''] + + print("\033[1;32;1m\nDialogue tokenized to:\033[0;37;0m\n", end='') + for row in _history: + for column in row: + print("\n") + for line in column.strip().split('\n'): + print("| "+line+"\n") + print("|\n") + print("------------------------------") + + return _history + +def save_history(timestamp=True): + prefix = '' if shared.character == 'None' else f"{shared.character}_" + if timestamp: + fname = f"{prefix}{datetime.now().strftime('%Y%m%d-%H%M%S')}.json" + else: + fname = f"{prefix}persistent.json" + if not Path('logs').exists(): + Path('logs').mkdir() + with open(Path(f'logs/{fname}'), 'w', encoding='utf-8') as f: + f.write(json.dumps({'data': shared.history['internal'], 'data_visible': shared.history['visible']}, indent=2)) + return Path(f'logs/{fname}') + +def load_history(file, name1, name2): + file = file.decode('utf-8') + try: + j = json.loads(file) + if 'data' in j: + shared.history['internal'] = j['data'] + if 'data_visible' in j: + shared.history['visible'] = j['data_visible'] + else: + shared.history['visible'] = copy.deepcopy(shared.history['internal']) + # Compatibility with Pygmalion AI's official web UI + elif 'chat' in j: + shared.history['internal'] = [':'.join(x.split(':')[1:]).strip() for x in j['chat']] + if len(j['chat']) > 0 and j['chat'][0].startswith(f'{name2}:'): + shared.history['internal'] = [['<|BEGIN-VISIBLE-CHAT|>', shared.history['internal'][0]]] + [[shared.history['internal'][i], shared.history['internal'][i+1]] for i in range(1, len(shared.history['internal'])-1, 2)] + shared.history['visible'] = copy.deepcopy(shared.history['internal']) + shared.history['visible'][0][0] = '' + else: + shared.history['internal'] = [[shared.history['internal'][i], shared.history['internal'][i+1]] for i in range(0, len(shared.history['internal'])-1, 2)] + shared.history['visible'] = copy.deepcopy(shared.history['internal']) + except: + shared.history['internal'] = tokenize_dialogue(file, name1, name2) + shared.history['visible'] = copy.deepcopy(shared.history['internal']) + +def load_default_history(name1, name2): + if Path('logs/persistent.json').exists(): + load_history(open(Path('logs/persistent.json'), 'rb').read(), name1, name2) + else: + shared.history['internal'] = [] + shared.history['visible'] = [] + +def load_character(_character, name1, name2): + context = "" + shared.history['internal'] = [] + shared.history['visible'] = [] + if _character != 'None': + shared.character = _character + data = json.loads(open(Path(f'characters/{_character}.json'), 'r', encoding='utf-8').read()) + name2 = data['char_name'] + if 'char_persona' in data and data['char_persona'] != '': + context += f"{data['char_name']}'s Persona: {data['char_persona']}\n" + if 'world_scenario' in data and data['world_scenario'] != '': + context += f"Scenario: {data['world_scenario']}\n" + context = f"{context.strip()}\n\n" + if 'example_dialogue' in data and data['example_dialogue'] != '': + data['example_dialogue'] = data['example_dialogue'].replace('{{user}}', name1).replace('{{char}}', name2) + data['example_dialogue'] = data['example_dialogue'].replace('', name1).replace('', name2) + context += f"{data['example_dialogue'].strip()}\n" + if 'char_greeting' in data and len(data['char_greeting'].strip()) > 0: + shared.history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', data['char_greeting']]] + shared.history['visible'] += [['', apply_extensions(data['char_greeting'], "output")]] + else: + shared.history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', "Hello there!"]] + shared.history['visible'] += [['', "Hello there!"]] + else: + shared.character = None + context = shared.settings['context_pygmalion'] + name2 = shared.settings['name2_pygmalion'] + + if Path(f'logs/{shared.character}_persistent.json').exists(): + load_history(open(Path(f'logs/{shared.character}_persistent.json'), 'rb').read(), name1, name2) + + if shared.args.cai_chat: + return name2, context, generate_chat_html(shared.history['visible'], name1, name2, shared.character) + else: + return name2, context, shared.history['visible'] + +def upload_character(json_file, img, tavern=False): + json_file = json_file if type(json_file) == str else json_file.decode('utf-8') + data = json.loads(json_file) + outfile_name = data["char_name"] + i = 1 + while Path(f'characters/{outfile_name}.json').exists(): + outfile_name = f'{data["char_name"]}_{i:03d}' + i += 1 + if tavern: + outfile_name = f'TavernAI-{outfile_name}' + with open(Path(f'characters/{outfile_name}.json'), 'w', encoding='utf-8') as f: + f.write(json_file) + if img is not None: + img = Image.open(io.BytesIO(img)) + img.save(Path(f'characters/{outfile_name}.png')) + print(f'New character saved to "characters/{outfile_name}.json".') + return outfile_name + +def upload_tavern_character(img, name1, name2): + _img = Image.open(io.BytesIO(img)) + _img.getexif() + decoded_string = base64.b64decode(_img.info['chara']) + _json = json.loads(decoded_string) + _json = {"char_name": _json['name'], "char_persona": _json['description'], "char_greeting": _json["first_mes"], "example_dialogue": _json['mes_example'], "world_scenario": _json['scenario']} + return upload_character(json.dumps(_json), img, tavern=True) + +def upload_your_profile_picture(img): + img = Image.open(io.BytesIO(img)) + img.save(Path('img_me.png')) + print('Profile picture saved to "img_me.png"') diff --git a/text-generation-webui/modules/deepspeed_parameters.py b/text-generation-webui/modules/deepspeed_parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..3dbed437f5b5196d0b1fcbc582085319fb8d40d1 --- /dev/null +++ b/text-generation-webui/modules/deepspeed_parameters.py @@ -0,0 +1,75 @@ +def generate_ds_config(ds_bf16, train_batch_size, nvme_offload_dir): + + ''' + DeepSpeed configration + https://huggingface.co/docs/transformers/main_classes/deepspeed + ''' + + if nvme_offload_dir: + ds_config = { + "fp16": { + "enabled": not ds_bf16, + }, + "bf16": { + "enabled": ds_bf16, + }, + "zero_optimization": { + "stage": 3, + "offload_param": { + "device": "nvme", + "nvme_path": nvme_offload_dir, + "pin_memory": True, + "buffer_count": 5, + "buffer_size": 1e9, + "max_in_cpu": 1e9 + }, + "overlap_comm": True, + "reduce_bucket_size": "auto", + "contiguous_gradients": True, + "sub_group_size": 1e8, + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": "auto", + "stage3_max_reuse_distance": "auto", + }, + "aio": { + "block_size": 262144, + "queue_depth": 32, + "thread_count": 1, + "single_submit": False, + "overlap_events": True + }, + "steps_per_print": 2000, + "train_batch_size": train_batch_size, + "train_micro_batch_size_per_gpu": 1, + "wall_clock_breakdown": False + } + else: + ds_config = { + "fp16": { + "enabled": not ds_bf16, + }, + "bf16": { + "enabled": ds_bf16, + }, + "zero_optimization": { + "stage": 3, + "offload_param": { + "device": "cpu", + "pin_memory": True + }, + "overlap_comm": True, + "contiguous_gradients": True, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": "auto", + "stage3_max_reuse_distance": "auto", + }, + "steps_per_print": 2000, + "train_batch_size": train_batch_size, + "train_micro_batch_size_per_gpu": 1, + "wall_clock_breakdown": False + } + + return ds_config diff --git a/text-generation-webui/modules/extensions.py b/text-generation-webui/modules/extensions.py new file mode 100644 index 0000000000000000000000000000000000000000..c8de8a7bc9ebd331d65704996a764e7cc279a6e5 --- /dev/null +++ b/text-generation-webui/modules/extensions.py @@ -0,0 +1,45 @@ +import extensions +import modules.shared as shared + +state = {} +available_extensions = [] + +def load_extensions(): + global state + for i, name in enumerate(shared.args.extensions): + if name in available_extensions: + print(f'Loading the extension "{name}"... ', end='') + exec(f"import extensions.{name}.script") + state[name] = [True, i] + print('Ok.') + +# This iterator returns the extensions in the order specified in the command-line +def iterator(): + for name in sorted(state, key=lambda x : state[x][1]): + if state[name][0] == True: + yield eval(f"extensions.{name}.script"), name + +# Extension functions that map string -> string +def apply_extensions(text, typ): + for extension, _ in iterator(): + if typ == "input" and hasattr(extension, "input_modifier"): + text = extension.input_modifier(text) + elif typ == "output" and hasattr(extension, "output_modifier"): + text = extension.output_modifier(text) + elif typ == "bot_prefix" and hasattr(extension, "bot_prefix_modifier"): + text = extension.bot_prefix_modifier(text) + return text + +def create_extensions_block(): + # Updating the default values + for extension, name in iterator(): + if hasattr(extension, 'params'): + for param in extension.params: + _id = f"{name}-{param}" + if _id in shared.settings: + extension.params[param] = shared.settings[_id] + + # Creating the extension ui elements + for extension, name in iterator(): + if hasattr(extension, "ui"): + extension.ui() diff --git a/text-generation-webui/modules/html_generator.py b/text-generation-webui/modules/html_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..162040bac68c2e987b33a02ccb12e90b51a63b2d --- /dev/null +++ b/text-generation-webui/modules/html_generator.py @@ -0,0 +1,357 @@ +''' + +This is a library for formatting GPT-4chan and chat outputs as nice HTML. + +''' + +import os +import re +from pathlib import Path + +from PIL import Image + +# This is to store the paths to the thumbnails of the profile pictures +image_cache = {} + +def generate_basic_html(s): + css = """ + .container { + max-width: 600px; + margin-left: auto; + margin-right: auto; + background-color: rgb(31, 41, 55); + padding:3em; + } + .container p { + font-size: 16px !important; + color: white !important; + margin-bottom: 22px; + line-height: 1.4 !important; + } + """ + s = '\n'.join([f'

{line}

' for line in s.split('\n')]) + s = f'
{s}
' + return s + +def process_post(post, c): + t = post.split('\n') + number = t[0].split(' ')[1] + if len(t) > 1: + src = '\n'.join(t[1:]) + else: + src = '' + src = re.sub('>', '>', src) + src = re.sub('(>>[0-9]*)', '\\1', src) + src = re.sub('\n', '
\n', src) + src = f'
{src}\n' + src = f'Anonymous No.{number}\n{src}' + return src + +def generate_4chan_html(f): + css = """ + + #parent #container { + background-color: #eef2ff; + padding: 17px; + } + #parent #container .reply { + background-color: rgb(214, 218, 240); + border-bottom-color: rgb(183, 197, 217); + border-bottom-style: solid; + border-bottom-width: 1px; + border-image-outset: 0; + border-image-repeat: stretch; + border-image-slice: 100%; + border-image-source: none; + border-image-width: 1; + border-left-color: rgb(0, 0, 0); + border-left-style: none; + border-left-width: 0px; + border-right-color: rgb(183, 197, 217); + border-right-style: solid; + border-right-width: 1px; + border-top-color: rgb(0, 0, 0); + border-top-style: none; + border-top-width: 0px; + color: rgb(0, 0, 0); + display: table; + font-family: arial, helvetica, sans-serif; + font-size: 13.3333px; + margin-bottom: 4px; + margin-left: 0px; + margin-right: 0px; + margin-top: 4px; + overflow-x: hidden; + overflow-y: hidden; + padding-bottom: 4px; + padding-left: 2px; + padding-right: 2px; + padding-top: 4px; + } + + #parent #container .number { + color: rgb(0, 0, 0); + font-family: arial, helvetica, sans-serif; + font-size: 13.3333px; + width: 342.65px; + margin-right: 7px; + } + + #parent #container .op { + color: rgb(0, 0, 0); + font-family: arial, helvetica, sans-serif; + font-size: 13.3333px; + margin-bottom: 8px; + margin-left: 0px; + margin-right: 0px; + margin-top: 4px; + overflow-x: hidden; + overflow-y: hidden; + } + + #parent #container .op blockquote { + margin-left: 0px !important; + } + + #parent #container .name { + color: rgb(17, 119, 67); + font-family: arial, helvetica, sans-serif; + font-size: 13.3333px; + font-weight: 700; + margin-left: 7px; + } + + #parent #container .quote { + color: rgb(221, 0, 0); + font-family: arial, helvetica, sans-serif; + font-size: 13.3333px; + text-decoration-color: rgb(221, 0, 0); + text-decoration-line: underline; + text-decoration-style: solid; + text-decoration-thickness: auto; + } + + #parent #container .greentext { + color: rgb(120, 153, 34); + font-family: arial, helvetica, sans-serif; + font-size: 13.3333px; + } + + #parent #container blockquote { + margin: 0px !important; + margin-block-start: 1em; + margin-block-end: 1em; + margin-inline-start: 40px; + margin-inline-end: 40px; + margin-top: 13.33px !important; + margin-bottom: 13.33px !important; + margin-left: 40px !important; + margin-right: 40px !important; + } + + #parent #container .message { + color: black; + border: none; + } + """ + + posts = [] + post = '' + c = -2 + for line in f.splitlines(): + line += "\n" + if line == '-----\n': + continue + elif line.startswith('--- '): + c += 1 + if post != '': + src = process_post(post, c) + posts.append(src) + post = line + else: + post += line + if post != '': + src = process_post(post, c) + posts.append(src) + + for i in range(len(posts)): + if i == 0: + posts[i] = f'
{posts[i]}
\n' + else: + posts[i] = f'
{posts[i]}
\n' + + output = '' + output += f'
' + for post in posts: + output += post + output += '
' + output = output.split('\n') + for i in range(len(output)): + output[i] = re.sub(r'^(>(.*?)(
|))', r'\1', output[i]) + output[i] = re.sub(r'^
(>(.*?)(
|))', r'
\1', output[i]) + output = '\n'.join(output) + + return output + +def get_image_cache(path): + cache_folder = Path("cache") + if not cache_folder.exists(): + cache_folder.mkdir() + + mtime = os.stat(path).st_mtime + if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache): + img = Image.open(path) + img.thumbnail((200, 200)) + output_file = Path(f'cache/{path.name}_cache.png') + img.convert('RGB').save(output_file, format='PNG') + image_cache[path] = [mtime, output_file.as_posix()] + + return image_cache[path][1] + +def generate_chat_html(history, name1, name2, character): + css = """ + .chat { + margin-left: auto; + margin-right: auto; + max-width: 800px; + height: 66.67vh; + overflow-y: auto; + padding-right: 20px; + display: flex; + flex-direction: column-reverse; + } + + .message { + display: grid; + grid-template-columns: 60px 1fr; + padding-bottom: 25px; + font-size: 15px; + font-family: Helvetica, Arial, sans-serif; + line-height: 1.428571429; + } + + .circle-you { + width: 50px; + height: 50px; + background-color: rgb(238, 78, 59); + border-radius: 50%; + } + + .circle-bot { + width: 50px; + height: 50px; + background-color: rgb(59, 78, 244); + border-radius: 50%; + } + + .circle-bot img, .circle-you img { + border-radius: 50%; + width: 100%; + height: 100%; + object-fit: cover; + } + + .text { + } + + .text p { + margin-top: 5px; + } + + .username { + font-weight: bold; + } + + .message-body { + } + + .message-body img { + max-width: 300px; + max-height: 300px; + border-radius: 20px; + } + + .message-body p { + margin-bottom: 0 !important; + font-size: 15px !important; + line-height: 1.428571429 !important; + } + + .dark .message-body p em { + color: rgb(138, 138, 138) !important; + } + + .message-body p em { + color: rgb(110, 110, 110) !important; + } + + """ + + output = '' + output += f'
' + img = '' + + for i in [ + f"characters/{character}.png", + f"characters/{character}.jpg", + f"characters/{character}.jpeg", + "img_bot.png", + "img_bot.jpg", + "img_bot.jpeg" + ]: + + path = Path(i) + if path.exists(): + img = f'' + break + + img_me = '' + for i in ["img_me.png", "img_me.jpg", "img_me.jpeg"]: + path = Path(i) + if path.exists(): + img_me = f'' + break + + for i,_row in enumerate(history[::-1]): + row = _row.copy() + row[0] = re.sub(r"(\*\*)([^\*\n]*)(\*\*)", r"\2", row[0]) + row[1] = re.sub(r"(\*\*)([^\*\n]*)(\*\*)", r"\2", row[1]) + row[0] = re.sub(r"(\*)([^\*\n]*)(\*)", r"\2", row[0]) + row[1] = re.sub(r"(\*)([^\*\n]*)(\*)", r"\2", row[1]) + p = '\n'.join([f"

{x}

" for x in row[1].split('\n')]) + output += f""" +
+
+ {img} +
+
+
+ {name2} +
+
+ {p} +
+
+
+ """ + + if not (i == len(history)-1 and len(row[0]) == 0): + p = '\n'.join([f"

{x}

" for x in row[0].split('\n')]) + output += f""" +
+
+ {img_me} +
+
+
+ {name1} +
+
+ {p} +
+
+
+ """ + + output += "
" + return output diff --git a/text-generation-webui/modules/models.py b/text-generation-webui/modules/models.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bb11fd3f7292657b008ab644b5be121d9980e5 --- /dev/null +++ b/text-generation-webui/modules/models.py @@ -0,0 +1,168 @@ +import json +import os +import time +import zipfile +from pathlib import Path + +import numpy as np +import torch +import transformers +from transformers import AutoModelForCausalLM, AutoTokenizer + +import modules.shared as shared + +transformers.logging.set_verbosity_error() + +local_rank = None + +if shared.args.flexgen: + from flexgen.flex_opt import (CompressionConfig, ExecutionEnv, OptLM, + Policy, str2bool) + +if shared.args.deepspeed: + import deepspeed + from transformers.deepspeed import (HfDeepSpeedConfig, + is_deepspeed_zero3_enabled) + + from modules.deepspeed_parameters import generate_ds_config + + # Distributed setup + local_rank = shared.args.local_rank if shared.args.local_rank is not None else int(os.getenv("LOCAL_RANK", "0")) + world_size = int(os.getenv("WORLD_SIZE", "1")) + torch.cuda.set_device(local_rank) + deepspeed.init_distributed() + ds_config = generate_ds_config(shared.args.bf16, 1 * world_size, shared.args.nvme_offload_dir) + dschf = HfDeepSpeedConfig(ds_config) # Keep this object alive for the Transformers integration + + +def load_model(model_name): + print(f"Loading {model_name}...") + t0 = time.time() + + shared.is_RWKV = model_name.lower().startswith('rwkv-') + + # Default settings + if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]): + if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')): + model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True) + else: + model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16).cuda() + + # FlexGen + elif shared.args.flexgen: + # Initialize environment + env = ExecutionEnv.create(shared.args.disk_cache_dir) + + # Offloading policy + policy = Policy(1, 1, + shared.args.percent[0], shared.args.percent[1], + shared.args.percent[2], shared.args.percent[3], + shared.args.percent[4], shared.args.percent[5], + overlap=True, sep_layer=True, pin_weight=shared.args.pin_weight, + cpu_cache_compute=False, attn_sparsity=1.0, + compress_weight=shared.args.compress_weight, + comp_weight_config=CompressionConfig( + num_bits=4, group_size=64, + group_dim=0, symmetric=False), + compress_cache=False, + comp_cache_config=CompressionConfig( + num_bits=4, group_size=64, + group_dim=2, symmetric=False)) + + model = OptLM(f"facebook/{shared.model_name}", env, "models", policy) + + # DeepSpeed ZeRO-3 + elif shared.args.deepspeed: + model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16) + model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0] + model.module.eval() # Inference + print(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}") + + # RMKV model (not on HuggingFace) + elif shared.is_RWKV: + from modules.RWKV import RWKVModel, RWKVTokenizer + + model = RWKVModel.from_pretrained(Path(f'models/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda") + tokenizer = RWKVTokenizer.from_pretrained(Path('models')) + + return model, tokenizer + + # Quantized model + elif shared.args.gptq_bits > 0: + from modules.GPTQ_loader import load_quantized + + model = load_quantized(model_name) + + # Custom + else: + command = "AutoModelForCausalLM.from_pretrained" + params = ["low_cpu_mem_usage=True"] + if not shared.args.cpu and not torch.cuda.is_available(): + print("Warning: no GPU has been detected.\nFalling back to CPU mode.\n") + shared.args.cpu = True + + if shared.args.cpu: + params.append("low_cpu_mem_usage=True") + params.append("torch_dtype=torch.float32") + else: + params.append("device_map='auto'") + params.append("load_in_8bit=True" if shared.args.load_in_8bit else "torch_dtype=torch.bfloat16" if shared.args.bf16 else "torch_dtype=torch.float16") + + if shared.args.gpu_memory: + memory_map = shared.args.gpu_memory + max_memory = f"max_memory={{0: '{memory_map[0]}GiB'" + for i in range(1, len(memory_map)): + max_memory += (f", {i}: '{memory_map[i]}GiB'") + max_memory += (f", 'cpu': '{shared.args.cpu_memory or '99'}GiB'}}") + params.append(max_memory) + elif not shared.args.load_in_8bit: + total_mem = (torch.cuda.get_device_properties(0).total_memory/(1024*1024)) + suggestion = round((total_mem-1000)/1000)*1000 + if total_mem-suggestion < 800: + suggestion -= 1000 + suggestion = int(round(suggestion/1000)) + print(f"\033[1;32;1mAuto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors.\nYou can manually set other values.\033[0;37;0m") + params.append(f"max_memory={{0: '{suggestion}GiB', 'cpu': '{shared.args.cpu_memory or '99'}GiB'}}") + if shared.args.disk: + params.append(f"offload_folder='{shared.args.disk_cache_dir}'") + + command = f"{command}(Path(f'models/{shared.model_name}'), {', '.join(set(params))})" + model = eval(command) + + # Loading the tokenizer + if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path("models/gpt-j-6B/").exists(): + tokenizer = AutoTokenizer.from_pretrained(Path("models/gpt-j-6B/")) + else: + tokenizer = AutoTokenizer.from_pretrained(Path(f"models/{shared.model_name}/")) + tokenizer.truncation_side = 'left' + + print(f"Loaded the model in {(time.time()-t0):.2f} seconds.") + return model, tokenizer + +def load_soft_prompt(name): + if name == 'None': + shared.soft_prompt = False + shared.soft_prompt_tensor = None + else: + with zipfile.ZipFile(Path(f'softprompts/{name}.zip')) as zf: + zf.extract('tensor.npy') + zf.extract('meta.json') + j = json.loads(open('meta.json', 'r').read()) + print(f"\nLoading the softprompt \"{name}\".") + for field in j: + if field != 'name': + if type(j[field]) is list: + print(f"{field}: {', '.join(j[field])}") + else: + print(f"{field}: {j[field]}") + print() + tensor = np.load('tensor.npy') + Path('tensor.npy').unlink() + Path('meta.json').unlink() + tensor = torch.Tensor(tensor).to(device=shared.model.device, dtype=shared.model.dtype) + tensor = torch.reshape(tensor, (1, tensor.shape[0], tensor.shape[1])) + + shared.soft_prompt = True + shared.soft_prompt_tensor = tensor + + return name diff --git a/text-generation-webui/modules/shared.py b/text-generation-webui/modules/shared.py new file mode 100644 index 0000000000000000000000000000000000000000..ea2eb50b7f586e5c562bf2e7c75429c91f21ec6c --- /dev/null +++ b/text-generation-webui/modules/shared.py @@ -0,0 +1,103 @@ +import argparse + +model = None +tokenizer = None +model_name = "" +soft_prompt_tensor = None +soft_prompt = False +is_RWKV = False + +# Chat variables +history = {'internal': [], 'visible': []} +character = 'None' +stop_everything = False +processing_message = '*Is typing...*' + +# UI elements (buttons, sliders, HTML, etc) +gradio = {} + +# Generation input parameters +input_params = [] + +settings = { + 'max_new_tokens': 200, + 'max_new_tokens_min': 1, + 'max_new_tokens_max': 2000, + 'name1': 'Person 1', + 'name2': 'Person 2', + 'context': 'This is a conversation between two people.', + 'stop_at_newline': True, + 'chat_prompt_size': 2048, + 'chat_prompt_size_min': 0, + 'chat_prompt_size_max': 2048, + 'chat_generation_attempts': 1, + 'chat_generation_attempts_min': 1, + 'chat_generation_attempts_max': 5, + 'name1_pygmalion': 'You', + 'name2_pygmalion': 'Kawaii', + 'context_pygmalion': "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", + 'stop_at_newline_pygmalion': False, + 'default_extensions': [], + 'chat_default_extensions': ["gallery"], + 'presets': { + 'default': 'NovelAI-Sphinx Moth', + 'pygmalion-*': 'Pygmalion', + 'RWKV-*': 'Naive', + }, + 'prompts': { + 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', + '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n', + '(rosey|chip|joi)_.*_instruct.*': 'User: \n', + 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>' + } +} + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54)) +parser.add_argument('--model', type=str, help='Name of the model to load by default.') +parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.') +parser.add_argument('--chat', action='store_true', help='Launch the web UI in chat mode.') +parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.') +parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text.') +parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.') +parser.add_argument('--load-in-4bit', action='store_true', help='DEPRECATED: use --gptq-bits 4 instead.') +parser.add_argument('--gptq-bits', type=int, default=0, help='Load a pre-quantized model with specified precision. 2, 3, 4 and 8bit are supported. Currently only works with LLaMA and OPT.') +parser.add_argument('--gptq-model-type', type=str, help='Model type of pre-quantized model. Currently only LLaMa and OPT are supported.') +parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.') +parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.') +parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.') +parser.add_argument('--disk-cache-dir', type=str, default="cache", help='Directory to save the disk cache to. Defaults to "cache".') +parser.add_argument('--gpu-memory', type=int, nargs="+", help='Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs.') +parser.add_argument('--cpu-memory', type=int, help='Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99.') +parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.') +parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).') +parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.") +parser.add_argument("--pin-weight", type=str2bool, nargs="?", const=True, default=True, help="FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%%).") +parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.') +parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.') +parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.') +parser.add_argument('--rwkv-strategy', type=str, default=None, help='RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8".') +parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile the CUDA kernel for better performance.') +parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.') +parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.') +parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') +parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.') +parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.') +parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.') +parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.') +parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') +args = parser.parse_args() + +# Provisional, this will be deleted later +if args.load_in_4bit: + print("Warning: --load-in-4bit is deprecated and will be removed. Use --gptq-bits 4 instead.\n") + args.gptq_bits = 4 diff --git a/text-generation-webui/modules/text_generation.py b/text-generation-webui/modules/text_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..d64481b24ec4542e55de1605a6181f97d9a50de9 --- /dev/null +++ b/text-generation-webui/modules/text_generation.py @@ -0,0 +1,238 @@ +import gc +import re +import time + +import numpy as np +import torch +import transformers + +import modules.shared as shared +from modules.callbacks import (Iteratorize, Stream, + _SentinelTokenStoppingCriteria) +from modules.extensions import apply_extensions +from modules.html_generator import generate_4chan_html, generate_basic_html +from modules.models import local_rank + + +def get_max_prompt_length(tokens): + max_length = 2048-tokens + if shared.soft_prompt: + max_length -= shared.soft_prompt_tensor.shape[1] + return max_length + +def encode(prompt, tokens_to_generate=0, add_special_tokens=True): + if shared.is_RWKV: + input_ids = shared.tokenizer.encode(str(prompt)) + input_ids = np.array(input_ids).reshape(1, len(input_ids)) + return input_ids + else: + input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', truncation=True, max_length=get_max_prompt_length(tokens_to_generate), add_special_tokens=add_special_tokens) + if shared.args.cpu: + return input_ids + elif shared.args.flexgen: + return input_ids.numpy() + elif shared.args.deepspeed: + return input_ids.to(device=local_rank) + else: + return input_ids.cuda() + +def decode(output_ids): + # Open Assistant relies on special tokens like <|endoftext|> + if re.match('oasst-*', shared.model_name.lower()): + return shared.tokenizer.decode(output_ids, skip_special_tokens=False) + else: + reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True) + reply = reply.replace(r'<|endoftext|>', '') + return reply + +def generate_softprompt_input_tensors(input_ids): + inputs_embeds = shared.model.transformer.wte(input_ids) + inputs_embeds = torch.cat((shared.soft_prompt_tensor, inputs_embeds), dim=1) + filler_input_ids = torch.zeros((1, inputs_embeds.shape[1]), dtype=input_ids.dtype).to(shared.model.device) + #filler_input_ids += shared.model.config.bos_token_id # setting dummy input_ids to bos tokens + return inputs_embeds, filler_input_ids + +# Removes empty replies from gpt4chan outputs +def fix_gpt4chan(s): + for i in range(10): + s = re.sub("--- [0-9]*\n>>[0-9]*\n---", "---", s) + s = re.sub("--- [0-9]*\n *\n---", "---", s) + s = re.sub("--- [0-9]*\n\n\n---", "---", s) + return s + +# Fix the LaTeX equations in galactica +def fix_galactica(s): + s = s.replace(r'\[', r'$') + s = s.replace(r'\]', r'$') + s = s.replace(r'\(', r'$') + s = s.replace(r'\)', r'$') + s = s.replace(r'$$', r'$') + s = re.sub(r'\n', r'\n\n', s) + s = re.sub(r"\n{3,}", "\n\n", s) + return s + +def formatted_outputs(reply, model_name): + if not (shared.args.chat or shared.args.cai_chat): + if model_name.lower().startswith('galactica'): + reply = fix_galactica(reply) + return reply, reply, generate_basic_html(reply) + elif model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')): + reply = fix_gpt4chan(reply) + return reply, 'Only applicable for GALACTICA models.', generate_4chan_html(reply) + else: + return reply, 'Only applicable for GALACTICA models.', generate_basic_html(reply) + else: + return reply + +def clear_torch_cache(): + gc.collect() + if not shared.args.cpu: + torch.cuda.empty_cache() + +def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None): + clear_torch_cache() + t0 = time.time() + + # These models are not part of Hugging Face, so we handle them + # separately and terminate the function call earlier + if shared.is_RWKV: + try: + if shared.args.no_stream: + reply = shared.model.generate(context=question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k) + yield formatted_outputs(reply, shared.model_name) + else: + yield formatted_outputs(question, shared.model_name) + # RWKV has proper streaming, which is very nice. + # No need to generate 8 tokens at a time. + for reply in shared.model.generate_with_streaming(context=question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k): + yield formatted_outputs(reply, shared.model_name) + finally: + t1 = time.time() + output = encode(reply)[0] + input_ids = encode(question) + print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output)-len(input_ids[0]))/(t1-t0):.2f} tokens/s, {len(output)-len(input_ids[0])} tokens)") + return + + original_question = question + if not (shared.args.chat or shared.args.cai_chat): + question = apply_extensions(question, "input") + if shared.args.verbose: + print(f"\n\n{question}\n--------------------\n") + + input_ids = encode(question, max_new_tokens) + original_input_ids = input_ids + output = input_ids[0] + cuda = "" if (shared.args.cpu or shared.args.deepspeed or shared.args.flexgen) else ".cuda()" + eos_token_ids = [shared.tokenizer.eos_token_id] if shared.tokenizer.eos_token_id is not None else [] + if eos_token is not None: + eos_token_ids.append(int(encode(eos_token)[0][-1])) + stopping_criteria_list = transformers.StoppingCriteriaList() + if stopping_string is not None: + # Copied from https://github.com/PygmalionAI/gradio-ui/blob/master/src/model.py + t = encode(stopping_string, 0, add_special_tokens=False) + stopping_criteria_list.append(_SentinelTokenStoppingCriteria(sentinel_token_ids=t, starting_idx=len(input_ids[0]))) + + if not shared.args.flexgen: + generate_params = [ + f"max_new_tokens=max_new_tokens", + f"eos_token_id={eos_token_ids}", + f"stopping_criteria=stopping_criteria_list", + f"do_sample={do_sample}", + f"temperature={temperature}", + f"top_p={top_p}", + f"typical_p={typical_p}", + f"repetition_penalty={repetition_penalty}", + f"top_k={top_k}", + f"min_length={min_length if shared.args.no_stream else 0}", + f"no_repeat_ngram_size={no_repeat_ngram_size}", + f"num_beams={num_beams}", + f"penalty_alpha={penalty_alpha}", + f"length_penalty={length_penalty}", + f"early_stopping={early_stopping}", + ] + else: + generate_params = [ + f"max_new_tokens={max_new_tokens if shared.args.no_stream else 8}", + f"do_sample={do_sample}", + f"temperature={temperature}", + f"stop={eos_token_ids[-1]}", + ] + if shared.args.deepspeed: + generate_params.append("synced_gpus=True") + if shared.soft_prompt: + inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids) + generate_params.insert(0, "inputs_embeds=inputs_embeds") + generate_params.insert(0, "inputs=filler_input_ids") + else: + generate_params.insert(0, "inputs=input_ids") + + try: + # Generate the entire reply at once. + if shared.args.no_stream: + with torch.no_grad(): + output = eval(f"shared.model.generate({', '.join(generate_params)}){cuda}")[0] + if shared.soft_prompt: + output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:])) + + reply = decode(output) + if not (shared.args.chat or shared.args.cai_chat): + reply = original_question + apply_extensions(reply[len(question):], "output") + + yield formatted_outputs(reply, shared.model_name) + + # Stream the reply 1 token at a time. + # This is based on the trick of using 'stopping_criteria' to create an iterator. + elif not shared.args.flexgen: + + def generate_with_callback(callback=None, **kwargs): + kwargs['stopping_criteria'].append(Stream(callback_func=callback)) + clear_torch_cache() + with torch.no_grad(): + shared.model.generate(**kwargs) + + def generate_with_streaming(**kwargs): + return Iteratorize(generate_with_callback, kwargs, callback=None) + + yield formatted_outputs(original_question, shared.model_name) + with eval(f"generate_with_streaming({', '.join(generate_params)})") as generator: + for output in generator: + if shared.soft_prompt: + output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:])) + reply = decode(output) + + if not (shared.args.chat or shared.args.cai_chat): + reply = original_question + apply_extensions(reply[len(question):], "output") + + if output[-1] in eos_token_ids: + break + yield formatted_outputs(reply, shared.model_name) + + yield formatted_outputs(reply, shared.model_name) + + # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria' + else: + for i in range(max_new_tokens//8+1): + clear_torch_cache() + with torch.no_grad(): + output = eval(f"shared.model.generate({', '.join(generate_params)})")[0] + if shared.soft_prompt: + output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:])) + reply = decode(output) + + if not (shared.args.chat or shared.args.cai_chat): + reply = original_question + apply_extensions(reply[len(question):], "output") + + if np.count_nonzero(np.isin(input_ids[0], eos_token_ids)) < np.count_nonzero(np.isin(output, eos_token_ids)): + break + yield formatted_outputs(reply, shared.model_name) + + input_ids = np.reshape(output, (1, output.shape[0])) + if shared.soft_prompt: + inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids) + + yield formatted_outputs(reply, shared.model_name) + + finally: + t1 = time.time() + print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output)-len(original_input_ids[0]))/(t1-t0):.2f} tokens/s, {len(output)-len(original_input_ids[0])} tokens)") + return diff --git a/text-generation-webui/modules/ui.py b/text-generation-webui/modules/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..bb193e35c11b2a3d474ea89e7567206a3343395a --- /dev/null +++ b/text-generation-webui/modules/ui.py @@ -0,0 +1,92 @@ +import gradio as gr + +refresh_symbol = '\U0001f504' # 🔄 + +css = """ +.tabs.svelte-710i53 { + margin-top: 0 +} +.py-6 { + padding-top: 2.5rem +} +.dark #refresh-button { + background-color: #ffffff1f; +} +#refresh-button { + flex: none; + margin: 0; + padding: 0; + min-width: 50px; + border: none; + box-shadow: none; + border-radius: 10px; + background-color: #0000000d; +} +#download-label, #upload-label { + min-height: 0 +} +#accordion { +} +.dark svg { + fill: white; +} +svg { + display: unset !important; + vertical-align: middle !important; + margin: 5px; +} +ol li p, ul li p { + display: inline-block; +} +""" + +chat_css = """ +.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx { + height: 66.67vh +} +.gradio-container { + max-width: 800px !important; + margin-left: auto !important; + margin-right: auto !important; +} +.w-screen { + width: unset +} +div.svelte-362y77>*, div.svelte-362y77>.form>* { + flex-wrap: nowrap +} +/* fixes the API documentation in chat mode */ +.api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h { + display: grid; +} +.pending.svelte-1ed2p3z { + opacity: 1; +} +""" + +class ToolButton(gr.Button, gr.components.FormComponent): + """Small button with single emoji as text, fits inside gradio forms""" + + def __init__(self, **kwargs): + super().__init__(variant="tool", **kwargs) + + def get_block_name(self): + return "button" + +def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id): + def refresh(): + refresh_method() + args = refreshed_args() if callable(refreshed_args) else refreshed_args + + for k, v in args.items(): + setattr(refresh_component, k, v) + + return gr.update(**(args or {})) + + refresh_button = ToolButton(value=refresh_symbol, elem_id=elem_id) + refresh_button.click( + fn=refresh, + inputs=[], + outputs=[refresh_component] + ) + return refresh_button diff --git a/text-generation-webui/presets/Contrastive Search.txt b/text-generation-webui/presets/Contrastive Search.txt new file mode 100644 index 0000000000000000000000000000000000000000..832bc9caf9b744d9d9c728f88d887f012a56ba3e --- /dev/null +++ b/text-generation-webui/presets/Contrastive Search.txt @@ -0,0 +1,3 @@ +do_sample=False +penalty_alpha=0.6 +top_k=4 diff --git a/text-generation-webui/presets/Debug-deterministic.txt b/text-generation-webui/presets/Debug-deterministic.txt new file mode 100644 index 0000000000000000000000000000000000000000..6673b71c8164effc401a486055b7f9a021b2acfb --- /dev/null +++ b/text-generation-webui/presets/Debug-deterministic.txt @@ -0,0 +1 @@ +do_sample=False diff --git a/text-generation-webui/presets/Default.txt b/text-generation-webui/presets/Default.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f0983ec7f67e44ac6a383bc13636eec8ad01c78 --- /dev/null +++ b/text-generation-webui/presets/Default.txt @@ -0,0 +1,12 @@ +do_sample=True +temperature=1 +top_p=1 +typical_p=1 +repetition_penalty=1 +top_k=50 +num_beams=1 +penalty_alpha=0 +min_length=0 +length_penalty=1 +no_repeat_ngram_size=0 +early_stopping=False diff --git a/text-generation-webui/presets/Individual Today.txt b/text-generation-webui/presets/Individual Today.txt new file mode 100644 index 0000000000000000000000000000000000000000..f40b879cefc3d3e7914fc03f0f2322758c51cc05 --- /dev/null +++ b/text-generation-webui/presets/Individual Today.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.9 +top_k=50 +temperature=1.39 +repetition_penalty=1.08 +typical_p=0.2 diff --git a/text-generation-webui/presets/Kobold-Godlike.txt b/text-generation-webui/presets/Kobold-Godlike.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ba5b794b6d0130a1fa1d918bda9a276f7d23367 --- /dev/null +++ b/text-generation-webui/presets/Kobold-Godlike.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.5 +top_k=0 +temperature=0.7 +repetition_penalty=1.1 +typical_p=0.19 diff --git a/text-generation-webui/presets/Kobold-Liminal Drift.txt b/text-generation-webui/presets/Kobold-Liminal Drift.txt new file mode 100644 index 0000000000000000000000000000000000000000..be4dd3bd7a70af2d4eb6c847bed6bedee5379dce --- /dev/null +++ b/text-generation-webui/presets/Kobold-Liminal Drift.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=1.0 +top_k=0 +temperature=0.66 +repetition_penalty=1.1 +typical_p=0.6 diff --git a/text-generation-webui/presets/Naive.txt b/text-generation-webui/presets/Naive.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa8c058224c533f4084e230f6bbf77b63d5e81ea --- /dev/null +++ b/text-generation-webui/presets/Naive.txt @@ -0,0 +1,4 @@ +do_sample=True +temperature=0.7 +top_p=0.85 +top_k=50 diff --git a/text-generation-webui/presets/NovelAI-Best Guess.txt b/text-generation-webui/presets/NovelAI-Best Guess.txt new file mode 100644 index 0000000000000000000000000000000000000000..db3fa75b2a11d7e29b108177f9894e82d1e52126 --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Best Guess.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.9 +top_k=100 +temperature=0.8 +repetition_penalty=1.15 +typical_p=1.0 diff --git a/text-generation-webui/presets/NovelAI-Decadence.txt b/text-generation-webui/presets/NovelAI-Decadence.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3109f3e3f3a021810d171a0b98f615766b57e4b --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Decadence.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=1.0 +top_k=100 +temperature=2 +repetition_penalty=1 +typical_p=0.97 diff --git a/text-generation-webui/presets/NovelAI-Genesis.txt b/text-generation-webui/presets/NovelAI-Genesis.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc7376b3b981a260448a65cd3c00c7b3904308e2 --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Genesis.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.98 +top_k=0 +temperature=0.63 +repetition_penalty=1.05 +typical_p=1.0 diff --git a/text-generation-webui/presets/NovelAI-Lycaenidae.txt b/text-generation-webui/presets/NovelAI-Lycaenidae.txt new file mode 100644 index 0000000000000000000000000000000000000000..0134569cef76bc0de6b3dc7885d94d9d9afdfd62 --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Lycaenidae.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.85 +top_k=12 +temperature=2 +repetition_penalty=1.15 +typical_p=1.0 diff --git a/text-generation-webui/presets/NovelAI-Ouroboros.txt b/text-generation-webui/presets/NovelAI-Ouroboros.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e944b54e78e1f63bd4bb6f56a717e0fec751c6b --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Ouroboros.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=1.0 +top_k=100 +temperature=1.07 +repetition_penalty=1.05 +typical_p=1.0 diff --git a/text-generation-webui/presets/NovelAI-Pleasing Results.txt b/text-generation-webui/presets/NovelAI-Pleasing Results.txt new file mode 100644 index 0000000000000000000000000000000000000000..330114a25db6d194dbc8689bf5476a81f649cf64 --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Pleasing Results.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=1.0 +top_k=0 +temperature=0.44 +repetition_penalty=1.15 +typical_p=1.0 diff --git a/text-generation-webui/presets/NovelAI-Sphinx Moth.txt b/text-generation-webui/presets/NovelAI-Sphinx Moth.txt new file mode 100644 index 0000000000000000000000000000000000000000..bace1e24b5dcc64fdde99097930f41a991e91b8e --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Sphinx Moth.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.18 +top_k=30 +temperature=2.0 +repetition_penalty=1.15 +typical_p=1.0 diff --git a/text-generation-webui/presets/NovelAI-Storywriter.txt b/text-generation-webui/presets/NovelAI-Storywriter.txt new file mode 100644 index 0000000000000000000000000000000000000000..2df5f8181458c642ed4691925ade3d542de5391c --- /dev/null +++ b/text-generation-webui/presets/NovelAI-Storywriter.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.73 +top_k=0 +temperature=0.72 +repetition_penalty=1.1 +typical_p=1.0 diff --git a/text-generation-webui/presets/Pygmalion.txt b/text-generation-webui/presets/Pygmalion.txt new file mode 100644 index 0000000000000000000000000000000000000000..f8b2ca55304ce8243e26bd28ebc757e40354a0e9 --- /dev/null +++ b/text-generation-webui/presets/Pygmalion.txt @@ -0,0 +1,6 @@ +do_sample=True +top_p=0.9 +top_k=0 +temperature=0.5 +repetition_penalty=1.1 +typical_p=1.0 diff --git a/text-generation-webui/presets/Verbose (Beam Search).txt b/text-generation-webui/presets/Verbose (Beam Search).txt new file mode 100644 index 0000000000000000000000000000000000000000..a3be1b94f27e31e1d0e762a15fd0300abb32e460 --- /dev/null +++ b/text-generation-webui/presets/Verbose (Beam Search).txt @@ -0,0 +1,9 @@ +num_beams=10 +min_length=200 +length_penalty =1.4 +no_repeat_ngram_size=2 +early_stopping=True +temperature=0.7 +top_k=150 +top_p=0.92 +repetition_penalty=4.5 diff --git a/text-generation-webui/requirements.txt b/text-generation-webui/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9bb2b74f99700dbdea1315fd9d78cd3c9c227694 --- /dev/null +++ b/text-generation-webui/requirements.txt @@ -0,0 +1,11 @@ +accelerate==0.17.1 +bitsandbytes==0.37.1 +flexgen==0.1.7 +gradio==3.18.0 +numpy +requests +rwkv==0.4.2 +safetensors==0.3.0 +sentencepiece +tqdm +git+https://github.com/zphang/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176 diff --git a/text-generation-webui/server.py b/text-generation-webui/server.py new file mode 100644 index 0000000000000000000000000000000000000000..2e2dc39822fd11bfc2d3a04eeae2001f17034a72 --- /dev/null +++ b/text-generation-webui/server.py @@ -0,0 +1,381 @@ +import gc +import io +import json +import re +import sys +import time +import zipfile +import os +os.system('python download-model.py waifu-workshop/pygmalion-6b --branch original-sharded') +from pathlib import Path + +import gradio as gr +import torch + +import modules.chat as chat +import modules.extensions as extensions_module +import modules.shared as shared +import modules.ui as ui +from modules.html_generator import generate_chat_html +from modules.models import load_model, load_soft_prompt +from modules.text_generation import generate_reply + +# Loading custom settings +settings_file = None +if shared.args.settings is not None and Path(shared.args.settings).exists(): + settings_file = Path(shared.args.settings) +elif Path('settings.json').exists(): + settings_file = Path('settings.json') +if settings_file is not None: + print(f"Loading settings from {settings_file}...") + new_settings = json.loads(open(settings_file, 'r').read()) + for item in new_settings: + shared.settings[item] = new_settings[item] + +def get_available_models(): + if shared.args.flexgen: + return sorted([re.sub('-np$', '', item.name) for item in list(Path('models/').glob('*')) if item.name.endswith('-np')], key=str.lower) + else: + return sorted([item.name for item in list(Path('models/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt'))], key=str.lower) + +def get_available_presets(): + return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) + +def get_available_characters(): + return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('characters').glob('*.json'))), key=str.lower) + +def get_available_extensions(): + return sorted(set(map(lambda x : x.parts[1], Path('extensions').glob('*/script.py'))), key=str.lower) + +def get_available_softprompts(): + return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower) + +def load_model_wrapper(selected_model): + if selected_model != shared.model_name: + shared.model_name = selected_model + shared.model = shared.tokenizer = None + if not shared.args.cpu: + gc.collect() + torch.cuda.empty_cache() + shared.model, shared.tokenizer = load_model(shared.model_name) + + return selected_model + +def load_preset_values(preset_menu, return_dict=False): + generate_params = { + 'do_sample': True, + 'temperature': 1, + 'top_p': 1, + 'typical_p': 1, + 'repetition_penalty': 1, + 'top_k': 50, + 'num_beams': 1, + 'penalty_alpha': 0, + 'min_length': 0, + 'length_penalty': 1, + 'no_repeat_ngram_size': 0, + 'early_stopping': False, + } + with open(Path(f'presets/{preset_menu}.txt'), 'r') as infile: + preset = infile.read() + for i in preset.splitlines(): + i = i.rstrip(',').strip().split('=') + if len(i) == 2 and i[0].strip() != 'tokens': + generate_params[i[0].strip()] = eval(i[1].strip()) + + generate_params['temperature'] = min(1.99, generate_params['temperature']) + + if return_dict: + return generate_params + else: + return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping'] + +def upload_soft_prompt(file): + with zipfile.ZipFile(io.BytesIO(file)) as zf: + zf.extract('meta.json') + j = json.loads(open('meta.json', 'r').read()) + name = j['name'] + Path('meta.json').unlink() + + with open(Path(f'softprompts/{name}.zip'), 'wb') as f: + f.write(file) + + return name + +def create_settings_menus(default_preset): + generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True) + + with gr.Row(): + with gr.Column(): + with gr.Row(): + shared.gradio['model_menu'] = gr.Dropdown(choices=available_models, value=shared.model_name, label='Model') + ui.create_refresh_button(shared.gradio['model_menu'], lambda : None, lambda : {'choices': get_available_models()}, 'refresh-button') + with gr.Column(): + with gr.Row(): + shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') + ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') + + with gr.Accordion('Custom generation parameters', open=False, elem_id='accordion'): + with gr.Row(): + with gr.Column(): + shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature') + shared.gradio['repetition_penalty'] = gr.Slider(1.0, 2.99, value=generate_params['repetition_penalty'],step=0.01,label='repetition_penalty') + shared.gradio['top_k'] = gr.Slider(0,200,value=generate_params['top_k'],step=1,label='top_k') + shared.gradio['top_p'] = gr.Slider(0.0,1.0,value=generate_params['top_p'],step=0.01,label='top_p') + with gr.Column(): + shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') + shared.gradio['typical_p'] = gr.Slider(0.0,1.0,value=generate_params['typical_p'],step=0.01,label='typical_p') + shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') + shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'] if shared.args.no_stream else 0, label='min_length', interactive=shared.args.no_stream) + + gr.Markdown('Contrastive search:') + shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha') + + gr.Markdown('Beam search (uses a lot of VRAM):') + with gr.Row(): + with gr.Column(): + shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams') + with gr.Column(): + shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') + shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') + + with gr.Accordion('Soft prompt', open=False, elem_id='accordion'): + with gr.Row(): + shared.gradio['softprompts_menu'] = gr.Dropdown(choices=available_softprompts, value='None', label='Soft prompt') + ui.create_refresh_button(shared.gradio['softprompts_menu'], lambda : None, lambda : {'choices': get_available_softprompts()}, 'refresh-button') + + gr.Markdown('Upload a soft prompt (.zip format):') + with gr.Row(): + shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip']) + + shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) + shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio['do_sample'], shared.gradio['temperature'], shared.gradio['top_p'], shared.gradio['typical_p'], shared.gradio['repetition_penalty'], shared.gradio['top_k'], shared.gradio['min_length'], shared.gradio['no_repeat_ngram_size'], shared.gradio['num_beams'], shared.gradio['penalty_alpha'], shared.gradio['length_penalty'], shared.gradio['early_stopping']]) + shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True) + shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']]) + +available_models = get_available_models() +available_presets = get_available_presets() +available_characters = get_available_characters() +available_softprompts = get_available_softprompts() + +# Default extensions +extensions_module.available_extensions = get_available_extensions() +if shared.args.chat or shared.args.cai_chat: + for extension in shared.settings['chat_default_extensions']: + shared.args.extensions = shared.args.extensions or [] + if extension not in shared.args.extensions: + shared.args.extensions.append(extension) +else: + for extension in shared.settings['default_extensions']: + shared.args.extensions = shared.args.extensions or [] + if extension not in shared.args.extensions: + shared.args.extensions.append(extension) +if shared.args.extensions is not None and len(shared.args.extensions) > 0: + extensions_module.load_extensions() + +# Default model +if shared.args.model is not None: + shared.model_name = shared.args.model +else: + if len(available_models) == 0: + print('No models are available! Please download at least one.') + sys.exit(0) + elif len(available_models) == 1: + i = 0 + else: + print('The following models are available:\n') + for i, model in enumerate(available_models): + print(f'{i+1}. {model}') + print(f'\nWhich one do you want to load? 1-{len(available_models)}\n') + i = int(input())-1 + print() + shared.model_name = available_models[i] +shared.model, shared.tokenizer = load_model(shared.model_name) + +# Default UI settings +gen_events = [] +default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')] +default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] +title ='Text generation web UI' +description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n' +suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else '' + +if shared.args.chat or shared.args.cai_chat: + with gr.Blocks(css=ui.css+ui.chat_css, analytics_enabled=False, title=title) as shared.gradio['interface']: + if shared.args.cai_chat: + shared.gradio['display'] = gr.HTML(value=generate_chat_html(shared.history['visible'], shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}'], shared.character)) + else: + shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528")) + shared.gradio['textbox'] = gr.Textbox(label='Input') + with gr.Row(): + shared.gradio['Stop'] = gr.Button('Stop') + shared.gradio['Generate'] = gr.Button('Generate') + with gr.Row(): + shared.gradio['Impersonate'] = gr.Button('Impersonate') + shared.gradio['Regenerate'] = gr.Button('Regenerate') + with gr.Row(): + shared.gradio['Copy last reply'] = gr.Button('Copy last reply') + shared.gradio['Replace last reply'] = gr.Button('Replace last reply') + shared.gradio['Remove last'] = gr.Button('Remove last') + + shared.gradio['Clear history'] = gr.Button('Clear history') + shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False) + shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) + with gr.Tab('Chat settings'): + shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name') + shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name') + shared.gradio['context'] = gr.Textbox(value=shared.settings[f'context{suffix}'], lines=5, label='Context') + with gr.Row(): + shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu') + ui.create_refresh_button(shared.gradio['character_menu'], lambda : None, lambda : {'choices': get_available_characters()}, 'refresh-button') + + with gr.Row(): + shared.gradio['check'] = gr.Checkbox(value=shared.settings[f'stop_at_newline{suffix}'], label='Stop generating at new line character?') + with gr.Row(): + with gr.Tab('Chat history'): + with gr.Row(): + with gr.Column(): + gr.Markdown('Upload') + shared.gradio['upload_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt']) + with gr.Column(): + gr.Markdown('Download') + shared.gradio['download'] = gr.File() + shared.gradio['download_button'] = gr.Button(value='Click me') + with gr.Tab('Upload character'): + with gr.Row(): + with gr.Column(): + gr.Markdown('1. Select the JSON file') + shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json']) + with gr.Column(): + gr.Markdown('2. Select your character\'s profile picture (optional)') + shared.gradio['upload_img_bot'] = gr.File(type='binary', file_types=['image']) + shared.gradio['Upload character'] = gr.Button(value='Submit') + with gr.Tab('Upload your profile picture'): + shared.gradio['upload_img_me'] = gr.File(type='binary', file_types=['image']) + with gr.Tab('Upload TavernAI Character Card'): + shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image']) + + with gr.Tab('Generation settings'): + with gr.Row(): + with gr.Column(): + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + with gr.Column(): + shared.gradio['chat_prompt_size_slider'] = gr.Slider(minimum=shared.settings['chat_prompt_size_min'], maximum=shared.settings['chat_prompt_size_max'], step=1, label='Maximum prompt size in tokens', value=shared.settings['chat_prompt_size']) + shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)') + create_settings_menus(default_preset) + + shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'name1', 'name2', 'context', 'check', 'chat_prompt_size_slider', 'chat_generation_attempts']] + if shared.args.extensions is not None: + with gr.Tab('Extensions'): + extensions_module.create_extensions_block() + + function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper' + + gen_events.append(shared.gradio['Generate'].click(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream, api_name='textgen')) + gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)) + gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)) + gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream)) + shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events) + + shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream) + shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream) + + # Clear history with confirmation + clear_arr = [shared.gradio[k] for k in ['Clear history-confirm', 'Clear history', 'Clear history-cancel']] + shared.gradio['Clear history'].click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, clear_arr) + shared.gradio['Clear history-confirm'].click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr) + shared.gradio['Clear history-confirm'].click(chat.clear_chat_log, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display']) + shared.gradio['Clear history-cancel'].click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr) + + shared.gradio['Remove last'].click(chat.remove_last_message, [shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['display'], shared.gradio['textbox']], show_progress=False) + shared.gradio['download_button'].click(chat.save_history, inputs=[], outputs=[shared.gradio['download']]) + shared.gradio['Upload character'].click(chat.upload_character, [shared.gradio['upload_json'], shared.gradio['upload_img_bot']], [shared.gradio['character_menu']]) + + # Clearing stuff and saving the history + for i in ['Generate', 'Regenerate', 'Replace last reply']: + shared.gradio[i].click(lambda x: '', shared.gradio['textbox'], shared.gradio['textbox'], show_progress=False) + shared.gradio[i].click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False) + shared.gradio['Clear history-confirm'].click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False) + shared.gradio['textbox'].submit(lambda x: '', shared.gradio['textbox'], shared.gradio['textbox'], show_progress=False) + shared.gradio['textbox'].submit(lambda : chat.save_history(timestamp=False), [], [], show_progress=False) + + shared.gradio['character_menu'].change(chat.load_character, [shared.gradio['character_menu'], shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['name2'], shared.gradio['context'], shared.gradio['display']]) + shared.gradio['upload_chat_history'].upload(chat.load_history, [shared.gradio['upload_chat_history'], shared.gradio['name1'], shared.gradio['name2']], []) + shared.gradio['upload_img_tavern'].upload(chat.upload_tavern_character, [shared.gradio['upload_img_tavern'], shared.gradio['name1'], shared.gradio['name2']], [shared.gradio['character_menu']]) + shared.gradio['upload_img_me'].upload(chat.upload_your_profile_picture, [shared.gradio['upload_img_me']], []) + + reload_func = chat.redraw_html if shared.args.cai_chat else lambda : shared.history['visible'] + reload_inputs = [shared.gradio['name1'], shared.gradio['name2']] if shared.args.cai_chat else [] + shared.gradio['upload_chat_history'].upload(reload_func, reload_inputs, [shared.gradio['display']]) + shared.gradio['upload_img_me'].upload(reload_func, reload_inputs, [shared.gradio['display']]) + shared.gradio['Stop'].click(reload_func, reload_inputs, [shared.gradio['display']]) + + shared.gradio['interface'].load(lambda : chat.load_default_history(shared.settings[f'name1{suffix}'], shared.settings[f'name2{suffix}']), None, None) + shared.gradio['interface'].load(reload_func, reload_inputs, [shared.gradio['display']], show_progress=True) + +elif shared.args.notebook: + with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']: + gr.Markdown(description) + with gr.Tab('Raw'): + shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=23) + with gr.Tab('Markdown'): + shared.gradio['markdown'] = gr.Markdown() + with gr.Tab('HTML'): + shared.gradio['html'] = gr.HTML() + + shared.gradio['Generate'] = gr.Button('Generate') + shared.gradio['Stop'] = gr.Button('Stop') + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + + create_settings_menus(default_preset) + if shared.args.extensions is not None: + extensions_module.create_extensions_block() + + shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']] + output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']] + gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen')) + gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream)) + shared.gradio['Stop'].click(None, None, None, cancels=gen_events) + +else: + with gr.Blocks(css=ui.css, analytics_enabled=False, title=title) as shared.gradio['interface']: + gr.Markdown(description) + with gr.Row(): + with gr.Column(): + shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=15, label='Input') + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + shared.gradio['Generate'] = gr.Button('Generate') + with gr.Row(): + with gr.Column(): + shared.gradio['Continue'] = gr.Button('Continue') + with gr.Column(): + shared.gradio['Stop'] = gr.Button('Stop') + + create_settings_menus(default_preset) + if shared.args.extensions is not None: + extensions_module.create_extensions_block() + + with gr.Column(): + with gr.Tab('Raw'): + shared.gradio['output_textbox'] = gr.Textbox(lines=15, label='Output') + with gr.Tab('Markdown'): + shared.gradio['markdown'] = gr.Markdown() + with gr.Tab('HTML'): + shared.gradio['html'] = gr.HTML() + + shared.input_params = [shared.gradio[k] for k in ['textbox', 'max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']] + output_params = [shared.gradio[k] for k in ['output_textbox', 'markdown', 'html']] + gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen')) + gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream)) + gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream)) + shared.gradio['Stop'].click(None, None, None, cancels=gen_events) + +shared.gradio['interface'].queue() +if shared.args.listen: + shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name='0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch) +else: + shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch) + +# I think that I will need this later +while True: + time.sleep(0.5) diff --git a/text-generation-webui/settings-template.json b/text-generation-webui/settings-template.json new file mode 100644 index 0000000000000000000000000000000000000000..9da4397012ccf6821dffb048d741071cf97fff6f --- /dev/null +++ b/text-generation-webui/settings-template.json @@ -0,0 +1,35 @@ +{ + "max_new_tokens": 200, + "max_new_tokens_min": 1, + "max_new_tokens_max": 2000, + "name1": "Person 1", + "name2": "Person 2", + "context": "This is a conversation between two people.", + "stop_at_newline": true, + "chat_prompt_size": 2048, + "chat_prompt_size_min": 0, + "chat_prompt_size_max": 2048, + "chat_generation_attempts": 1, + "chat_generation_attempts_min": 1, + "chat_generation_attempts_max": 5, + "name1_pygmalion": "You", + "name2_pygmalion": "Kawaii", + "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", + "stop_at_newline_pygmalion": false, + "default_extensions": [], + "chat_default_extensions": [ + "gallery" + ], + "presets": { + "default": "NovelAI-Sphinx Moth", + "pygmalion-*": "Pygmalion", + "RWKV-*": "Naive", + "(rosey|chip|joi)_.*_instruct.*": "Instruct Joi (Contrastive Search)" + }, + "prompts": { + "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", + "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", + "(rosey|chip|joi)_.*_instruct.*": "User: \n", + "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>" + } +} diff --git a/text-generation-webui/softprompts/place-your-softprompts-here.txt b/text-generation-webui/softprompts/place-your-softprompts-here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391