diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57b7f6982f7d7b7b9677c795488b11864d69d19e
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+ko_fi: oobabooga
diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.yml b/.github/ISSUE_TEMPLATE/bug_report_template.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bd30a0c9c17dd514bf364846fe7914b6d10a4584
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report_template.yml
@@ -0,0 +1,53 @@
+name: "Bug report"
+description: Report a bug
+labels: [ "bug" ]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to fill out this bug report!
+ - type: textarea
+ id: bug-description
+ attributes:
+ label: Describe the bug
+ description: A clear and concise description of what the bug is.
+ placeholder: Bug description
+ validations:
+ required: true
+ - type: checkboxes
+ attributes:
+ label: Is there an existing issue for this?
+ description: Please search to see if an issue already exists for the issue you encountered.
+ options:
+ - label: I have searched the existing issues
+ required: true
+ - type: textarea
+ id: reproduction
+ attributes:
+ label: Reproduction
+ description: Please provide the steps necessary to reproduce your issue.
+ placeholder: Reproduction
+ validations:
+ required: true
+ - type: textarea
+ id: screenshot
+ attributes:
+ label: Screenshot
+ description: "If possible, please include screenshot(s) so that we can understand what the issue is."
+ - type: textarea
+ id: logs
+ attributes:
+ label: Logs
+ description: "Please include the full stacktrace of the errors you get in the command-line (if any)."
+ render: shell
+ validations:
+ required: true
+ - type: textarea
+ id: system-info
+ attributes:
+ label: System Info
+ description: "Please share your system info with us: operating system, GPU brand, and GPU model. If you are using a Google Colab notebook, mention that instead."
+ render: shell
+ placeholder:
+ validations:
+ required: true
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000000000000000000000000000000000..b94974f865491731a1251e3e9736e01cbe81b06f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,16 @@
+---
+name: Feature request
+about: Suggest an improvement or new feature for the web UI
+title: ''
+labels: 'enhancement'
+assignees: ''
+
+---
+
+**Description**
+
+A clear and concise description of what you want to be implemented.
+
+**Additional Context**
+
+If applicable, please provide any extra information, external links, or screenshots that could be useful.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000000000000000000000000000000000000..91abb11fdf507883caeeb2d2958e1c65fb6cbdc1
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+ - package-ecosystem: "pip" # See documentation for possible values
+ directory: "/" # Location of package manifests
+ schedule:
+ interval: "weekly"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000000000000000000000000000000000000..51e26b13a38889a38cac5392b6e22190fd75a8b7
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,3 @@
+## Checklist:
+
+- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
diff --git a/.github/workflows/auto-release.yml b/.github/workflows/auto-release.yml
new file mode 100644
index 0000000000000000000000000000000000000000..59287718a4685059577fff5810e52514a577e59b
--- /dev/null
+++ b/.github/workflows/auto-release.yml
@@ -0,0 +1,28 @@
+name: Weekly Snapshot Release
+on:
+ schedule:
+ - cron: '15 20 * * 0'
+ workflow_dispatch: {}
+
+jobs:
+ create_release:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v2
+
+ - name: Set snapshot tag
+ id: set_snapshot_tag
+ run: echo ::set-output name=tag::snapshot-$(date +'%Y-%m-%d')
+
+ - name: Create release
+ id: create_release
+ uses: softprops/action-gh-release@v1
+ with:
+ generate_release_notes: true
+ tag_name: ${{ steps.set_snapshot_tag.outputs.tag }}
+ name: ${{ steps.set_snapshot_tag.outputs.tag }}
+ draft: false
+ prerelease: false
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2de6d955a3c739d03716b6c6f884ab9bf1c47b59
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,22 @@
+name: Close inactive issues
+on:
+ schedule:
+ - cron: "10 23 * * *"
+
+jobs:
+ close-issues:
+ runs-on: ubuntu-latest
+ permissions:
+ issues: write
+ pull-requests: write
+ steps:
+ - uses: actions/stale@v5
+ with:
+ stale-issue-message: ""
+ close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment."
+ days-before-issue-stale: 42
+ days-before-issue-close: 0
+ stale-issue-label: "stale"
+ days-before-pr-stale: -1
+ days-before-pr-close: -1
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..280a3707def0628f10cbfd89eb6b82ab84d0fa22
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,38 @@
+cache
+characters
+training/datasets
+extensions/silero_tts/outputs
+extensions/elevenlabs_tts/outputs
+extensions/sd_api_pictures/outputs
+extensions/multimodal/pipelines
+logs
+loras
+models
+presets
+repositories
+softprompts
+torch-dumps
+*pycache*
+*/*pycache*
+*/*/pycache*
+venv/
+.venv/
+.vscode
+.idea/
+*.bak
+*.ipynb
+*.log
+
+settings.json
+settings.yaml
+notification.mp3
+img_bot*
+img_me*
+prompts/[0-9]*
+models/config-user.yaml
+
+.DS_Store
+Thumbs.db
+.chroma
+installer_files
+/CMD_FLAGS.txt
\ No newline at end of file
diff --git a/CMD_FLAGS.txt b/CMD_FLAGS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2d63d9e8fbb1e043a9f6742cead71701a99f9a2
--- /dev/null
+++ b/CMD_FLAGS.txt
@@ -0,0 +1,3 @@
+# Only used by the one-click installer.
+# Example:
+# --listen --api
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..0ad25db4bd1d86c452db3f9602ccdbe172438f52
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,661 @@
+ GNU AFFERO GENERAL PUBLIC LICENSE
+ Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+ A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate. Many developers of free software are heartened and
+encouraged by the resulting cooperation. However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+ The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community. It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server. Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+ An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals. This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU Affero General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Remote Network Interaction; Use with the GNU General Public License.
+
+ Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software. This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time. Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source. For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code. There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+.
diff --git a/README.md b/README.md
index 46becbbed393a2c6b02e54730de3378aff08ac60..f834e6c051f2a3b93969f14686de6f7798d48234 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,421 @@
----
-title: Text Chat Ui
-emoji: 📈
-colorFrom: indigo
-colorTo: green
-sdk: docker
-pinned: false
----
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Text generation web UI
+
+A Gradio web UI for Large Language Models.
+
+Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
+
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) |
+|:---:|:---:|
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) |
+
+## Features
+
+* 3 interface modes: default (two columns), notebook, and chat
+* Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [CTransformers](https://github.com/marella/ctransformers), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ)
+* Dropdown menu for quickly switching between different models
+* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
+* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
+* 4-bit, 8-bit, and CPU inference through the transformers library
+* Use llama.cpp models with transformers samplers (`llamacpp_HF` loader)
+* [Multimodal pipelines, including LLaVA and MiniGPT-4](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal)
+* [Extensions framework](docs/Extensions.md)
+* [Custom chat characters](docs/Chat-mode.md)
+* Very efficient text streaming
+* Markdown output with LaTeX rendering, to use for instance with [GALACTICA](https://github.com/paperswithcode/galai)
+* API, including endpoints for websocket streaming ([see the examples](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples))
+
+To learn how to use the various features, check out the Documentation: https://github.com/oobabooga/text-generation-webui/tree/main/docs
+
+## Installation
+
+### One-click installers
+
+1) Clone or download the repository.
+2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS.
+3) Select your GPU vendor when asked.
+4) Have fun!
+
+#### How it works
+
+The script creates a folder called `installer_files` where it sets up a Conda environment using Miniconda. The installation is self-contained: if you want to reinstall, just delete `installer_files` and run the start script again.
+
+To launch the webui in the future after it is already installed, run the same `start` script.
+
+#### Getting updates
+
+Run `update_linux.sh`, `update_windows.bat`, `update_macos.sh`, or `update_wsl.bat`.
+
+#### Running commands
+
+If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, `cmd_macos.sh`, or `cmd_wsl.bat`.
+
+#### Defining command-line flags
+
+To define persistent command-line flags like `--listen` or `--api`, edit the `CMD_FLAGS.txt` file with a text editor and add them there. Flags can also be provided directly to the start scripts, for instance, `./start-linux.sh --listen`.
+
+#### Other info
+
+* There is no need to run any of those scripts as admin/root.
+* For additional instructions about AMD setup, WSL setup, and nvcc installation, consult [this page](https://github.com/oobabooga/text-generation-webui/blob/main/docs/One-Click-Installers.md).
+* The installer has been tested mostly on NVIDIA GPUs. If you can find a way to improve it for your AMD/Intel Arc/Mac Metal GPU, you are highly encouraged to submit a PR to this repository. The main file to be edited is `one_click.py`.
+* For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=False INSTALL_EXTENSIONS=False ./start_linux.sh`.
+
+### Manual installation using Conda
+
+Recommended if you have some experience with the command-line.
+
+#### 0. Install Conda
+
+https://docs.conda.io/en/latest/miniconda.html
+
+On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)):
+
+```
+curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
+bash Miniconda3.sh
+```
+
+#### 1. Create a new conda environment
+
+```
+conda create -n textgen python=3.10
+conda activate textgen
+```
+
+#### 2. Install Pytorch
+
+| System | GPU | Command |
+|--------|---------|---------|
+| Linux/WSL | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` |
+| Linux/WSL | CPU only | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu` |
+| Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6` |
+| MacOS + MPS | Any | `pip3 install torch torchvision torchaudio` |
+| Windows | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` |
+| Windows | CPU only | `pip3 install torch torchvision torchaudio` |
+
+The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
+
+For NVIDIA, you may also need to manually install the CUDA runtime libraries:
+
+```
+conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime
+```
+
+#### 3. Install the web UI
+
+```
+git clone https://github.com/oobabooga/text-generation-webui
+cd text-generation-webui
+pip install -r requirements.txt
+```
+
+#### AMD, Metal, Intel Arc, and CPUs without AVX2
+
+1) Replace the last command above with
+
+```
+pip install -r requirements_nowheels.txt
+```
+
+2) Manually install llama-cpp-python using the appropriate command for your hardware: [Installation from PyPI](https://github.com/abetlen/llama-cpp-python#installation-from-pypi).
+
+3) Do the same for CTransformers: [Installation](https://github.com/marella/ctransformers#installation).
+
+4) AMD: Manually install AutoGPTQ: [Installation](https://github.com/PanQiWei/AutoGPTQ#installation).
+
+5) AMD: Manually install [ExLlama](https://github.com/turboderp/exllama) by simply cloning it into the `repositories` folder (it will be automatically compiled at runtime after that):
+
+```
+cd text-generation-webui
+git clone https://github.com/turboderp/exllama repositories/exllama
+```
+
+#### bitsandbytes on older NVIDIA GPUs
+
+bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
+
+* Linux: `pip install bitsandbytes==0.38.1`
+* Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl`
+
+### Alternative: Docker
+
+```
+ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} .
+cp docker/.env.example .env
+# Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model
+docker compose up --build
+```
+
+* You need to have docker compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/blob/main/docs/Docker.md) for instructions.
+* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
+
+### Updating the requirements
+
+From time to time, the `requirements.txt` changes. To update, use these commands:
+
+```
+conda activate textgen
+cd text-generation-webui
+pip install -r requirements.txt --upgrade
+```
+
+## Downloading models
+
+Models should be placed in the `text-generation-webui/models` folder. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).
+
+* Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example:
+
+```
+text-generation-webui
+├── models
+│ ├── lmsys_vicuna-33b-v1.3
+│ │ ├── config.json
+│ │ ├── generation_config.json
+│ │ ├── pytorch_model-00001-of-00007.bin
+│ │ ├── pytorch_model-00002-of-00007.bin
+│ │ ├── pytorch_model-00003-of-00007.bin
+│ │ ├── pytorch_model-00004-of-00007.bin
+│ │ ├── pytorch_model-00005-of-00007.bin
+│ │ ├── pytorch_model-00006-of-00007.bin
+│ │ ├── pytorch_model-00007-of-00007.bin
+│ │ ├── pytorch_model.bin.index.json
+│ │ ├── special_tokens_map.json
+│ │ ├── tokenizer_config.json
+│ │ └── tokenizer.model
+```
+
+* GGUF models are a single file and should be placed directly into `models`. Example:
+
+```
+text-generation-webui
+├── models
+│ ├── llama-2-13b-chat.Q4_K_M.gguf
+```
+
+In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download via the command-line with `python download-model.py organization/model` (use `--help` to see all the options).
+
+#### GPT-4chan
+
+
+
+Instructions
+
+
+[GPT-4chan](https://huggingface.co/ykilcher/gpt-4chan) has been shut down from Hugging Face, so you need to download it elsewhere. You have two options:
+
+* Torrent: [16-bit](https://archive.org/details/gpt4chan_model_float16) / [32-bit](https://archive.org/details/gpt4chan_model)
+* Direct download: [16-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model_float16/) / [32-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model/)
+
+The 32-bit version is only relevant if you intend to run the model in CPU mode. Otherwise, you should use the 16-bit version.
+
+After downloading the model, follow these steps:
+
+1. Place the files under `models/gpt4chan_model_float16` or `models/gpt4chan_model`.
+2. Place GPT-J 6B's config.json file in that same folder: [config.json](https://huggingface.co/EleutherAI/gpt-j-6B/raw/main/config.json).
+3. Download GPT-J 6B's tokenizer files (they will be automatically detected when you attempt to load GPT-4chan):
+
+```
+python download-model.py EleutherAI/gpt-j-6B --text-only
+```
+
+When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format:
+
+![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png)
+
+
+
+## Starting the web UI
+
+ conda activate textgen
+ cd text-generation-webui
+ python server.py
+
+Then browse to
+
+`http://localhost:7860/?__theme=dark`
+
+Optionally, you can use the following command-line flags:
+
+#### Basic settings
+
+| Flag | Description |
+|--------------------------------------------|-------------|
+| `-h`, `--help` | Show this help message and exit. |
+| `--multi-user` | Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental. |
+| `--character CHARACTER` | The name of the character to load in chat mode by default. |
+| `--model MODEL` | Name of the model to load by default. |
+| `--lora LORA [LORA ...]` | The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces. |
+| `--model-dir MODEL_DIR` | Path to directory with all the models. |
+| `--lora-dir LORA_DIR` | Path to directory with all the loras. |
+| `--model-menu` | Show a model menu in the terminal when the web UI is first launched. |
+| `--settings SETTINGS_FILE` | Load the default interface settings from this yaml file. See `settings-template.yaml` for an example. If you create a file called `settings.yaml`, this file will be loaded by default without the need to use the `--settings` flag. |
+| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
+| `--verbose` | Print the prompts to the terminal. |
+| `--chat-buttons` | Show buttons on chat tab instead of hover menu. |
+
+#### Model loader
+
+| Flag | Description |
+|--------------------------------------------|-------------|
+| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, ctransformers |
+
+#### Accelerate/transformers
+
+| Flag | Description |
+|---------------------------------------------|-------------|
+| `--cpu` | Use the CPU to generate text. Warning: Training on CPU is extremely slow.|
+| `--auto-devices` | Automatically split the model across the available GPU(s) and CPU. |
+| `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` | Maximum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. You can also set values in MiB like `--gpu-memory 3500MiB`. |
+| `--cpu-memory CPU_MEMORY` | Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.|
+| `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
+| `--disk-cache-dir DISK_CACHE_DIR` | Directory to save the disk cache to. Defaults to `cache/`. |
+| `--load-in-8bit` | Load the model with 8-bit precision (using bitsandbytes).|
+| `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
+| `--no-cache` | Set `use_cache` to False while generating text. This reduces the VRAM usage a bit with a performance cost. |
+| `--xformers` | Use xformer's memory efficient attention. This should increase your tokens/s. |
+| `--sdp-attention` | Use torch 2.0's sdp attention. |
+| `--trust-remote-code` | Set trust_remote_code=True while loading a model. Necessary for ChatGLM and Falcon. |
+| `--use_fast` | Set use_fast=True while loading a tokenizer. |
+
+#### Accelerate 4-bit
+
+⚠️ Requires minimum compute of 7.0 on Windows at the moment.
+
+| Flag | Description |
+|---------------------------------------------|-------------|
+| `--load-in-4bit` | Load the model with 4-bit precision (using bitsandbytes). |
+| `--compute_dtype COMPUTE_DTYPE` | compute dtype for 4-bit. Valid options: bfloat16, float16, float32. |
+| `--quant_type QUANT_TYPE` | quant_type for 4-bit. Valid options: nf4, fp4. |
+| `--use_double_quant` | use_double_quant for 4-bit. |
+
+#### GGUF (for llama.cpp and ctransformers)
+
+| Flag | Description |
+|-------------|-------------|
+| `--threads` | Number of threads to use. |
+| `--threads-batch THREADS_BATCH` | Number of threads to use for batches/prompt processing. |
+| `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
+| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
+| `--n_ctx N_CTX` | Size of the prompt context. |
+
+#### llama.cpp
+
+| Flag | Description |
+|---------------|---------------|
+| `--mul_mat_q` | Activate new mulmat kernels. |
+| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
+| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). |
+| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
+|`--cfg-cache` | llamacpp_HF: Create an additional cache for CFG negative prompts. |
+| `--no-mmap` | Prevent mmap from being used. |
+| `--mlock` | Force the system to keep the model in RAM. |
+| `--numa` | Activate NUMA task allocation for llama.cpp |
+| `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. |
+
+#### ctransformers
+
+| Flag | Description |
+|-------------|-------------|
+| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gptneox, falcon, llama, mpt, starcoder (gptbigcode), dollyv2, and replit are supported. |
+
+#### AutoGPTQ
+
+| Flag | Description |
+|------------------|-------------|
+| `--triton` | Use triton. |
+| `--no_inject_fused_attention` | Disable the use of fused attention, which will use less VRAM at the cost of slower inference. |
+| `--no_inject_fused_mlp` | Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. |
+| `--no_use_cuda_fp16` | This can make models faster on some systems. |
+| `--desc_act` | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. |
+| `--disable_exllama` | Disable ExLlama kernel, which can improve inference speed on some systems. |
+
+#### ExLlama
+
+| Flag | Description |
+|------------------|-------------|
+|`--gpu-split` | Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. `20,7,7` |
+|`--max_seq_len MAX_SEQ_LEN` | Maximum sequence length. |
+|`--cfg-cache` | ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama. |
+
+#### GPTQ-for-LLaMa
+
+| Flag | Description |
+|---------------------------|-------------|
+| `--wbits WBITS` | Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
+| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
+| `--groupsize GROUPSIZE` | Group size. |
+| `--pre_layer PRE_LAYER [PRE_LAYER ...]` | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. |
+| `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. |
+| `--monkey-patch` | Apply the monkey patch for using LoRAs with quantized models.
+
+#### DeepSpeed
+
+| Flag | Description |
+|---------------------------------------|-------------|
+| `--deepspeed` | Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. |
+| `--nvme-offload-dir NVME_OFFLOAD_DIR` | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. |
+| `--local_rank LOCAL_RANK` | DeepSpeed: Optional argument for distributed setups. |
+
+#### RWKV
+
+| Flag | Description |
+|---------------------------------|-------------|
+| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
+| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
+
+#### RoPE (for llama.cpp, ExLlama, ExLlamaV2, and transformers)
+
+| Flag | Description |
+|------------------|-------------|
+| `--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
+| `--rope_freq_base ROPE_FREQ_BASE` | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
+| `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |
+
+#### Gradio
+
+| Flag | Description |
+|---------------------------------------|-------------|
+| `--listen` | Make the web UI reachable from your local network. |
+| `--listen-host LISTEN_HOST` | The hostname that the server will use. |
+| `--listen-port LISTEN_PORT` | The listening port that the server will use. |
+| `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
+| `--auto-launch` | Open the web UI in the default browser upon launch. |
+| `--gradio-auth USER:PWD` | set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3" |
+| `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" |
+| `--ssl-keyfile SSL_KEYFILE` | The path to the SSL certificate key file. |
+| `--ssl-certfile SSL_CERTFILE` | The path to the SSL certificate cert file. |
+
+#### API
+
+| Flag | Description |
+|---------------------------------------|-------------|
+| `--api` | Enable the API extension. |
+| `--public-api` | Create a public URL for the API using Cloudfare. |
+| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
+| `--api-blocking-port BLOCKING_PORT` | The listening port for the blocking API. |
+| `--api-streaming-port STREAMING_PORT` | The listening port for the streaming API. |
+
+#### Multimodal
+
+| Flag | Description |
+|---------------------------------------|-------------|
+| `--multimodal-pipeline PIPELINE` | The multimodal pipeline to use. Examples: `llava-7b`, `llava-13b`. |
+
+## Presets
+
+Inference settings presets can be created under `presets/` as yaml files. These files are detected automatically at startup.
+
+The presets that are included by default are the result of a contest that received 7215 votes. More details can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
+
+## Contributing
+
+If you would like to contribute to the project, check out the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
+
+## Community
+
+* Subreddit: https://www.reddit.com/r/oobabooga/
+* Discord: https://discord.gg/jwZCF2dPQN
+
+## Acknowledgment
+
+In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition, which will allow me to dedicate more time towards realizing the full potential of text-generation-webui.
diff --git a/api-examples/api-example-chat-stream.py b/api-examples/api-example-chat-stream.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfa5d4f580b65d40c0dfa3b32ec6b5d940783f03
--- /dev/null
+++ b/api-examples/api-example-chat-stream.py
@@ -0,0 +1,112 @@
+import asyncio
+import html
+import json
+import sys
+
+try:
+ import websockets
+except ImportError:
+ print("Websockets package not found. Make sure it's installed.")
+
+# For local streaming, the websockets are hosted without ssl - ws://
+HOST = 'localhost:5005'
+URI = f'ws://{HOST}/api/v1/chat-stream'
+
+# For reverse-proxied streaming, the remote will likely host with ssl - wss://
+# URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
+
+
+async def run(user_input, history):
+ # Note: the selected defaults change from time to time.
+ request = {
+ 'user_input': user_input,
+ 'max_new_tokens': 250,
+ 'auto_max_new_tokens': False,
+ 'max_tokens_second': 0,
+ 'history': history,
+ 'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct'
+ 'character': 'Example',
+ 'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset
+ 'your_name': 'You',
+ # 'name1': 'name of user', # Optional
+ # 'name2': 'name of character', # Optional
+ # 'context': 'character context', # Optional
+ # 'greeting': 'greeting', # Optional
+ # 'name1_instruct': 'You', # Optional
+ # 'name2_instruct': 'Assistant', # Optional
+ # 'context_instruct': 'context_instruct', # Optional
+ # 'turn_template': 'turn_template', # Optional
+ 'regenerate': False,
+ '_continue': False,
+ 'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+
+ # Generation params. If 'preset' is set to different than 'None', the values
+ # in presets/preset-name.yaml are used instead of the individual numbers.
+ 'preset': 'None',
+ 'do_sample': True,
+ 'temperature': 0.7,
+ 'top_p': 0.1,
+ 'typical_p': 1,
+ 'epsilon_cutoff': 0, # In units of 1e-4
+ 'eta_cutoff': 0, # In units of 1e-4
+ 'tfs': 1,
+ 'top_a': 0,
+ 'repetition_penalty': 1.18,
+ 'repetition_penalty_range': 0,
+ 'top_k': 40,
+ 'min_length': 0,
+ 'no_repeat_ngram_size': 0,
+ 'num_beams': 1,
+ 'penalty_alpha': 0,
+ 'length_penalty': 1,
+ 'early_stopping': False,
+ 'mirostat_mode': 0,
+ 'mirostat_tau': 5,
+ 'mirostat_eta': 0.1,
+ 'grammar_string': '',
+ 'guidance_scale': 1,
+ 'negative_prompt': '',
+
+ 'seed': -1,
+ 'add_bos_token': True,
+ 'truncation_length': 2048,
+ 'ban_eos_token': False,
+ 'custom_token_bans': '',
+ 'skip_special_tokens': True,
+ 'stopping_strings': []
+ }
+
+ async with websockets.connect(URI, ping_interval=None) as websocket:
+ await websocket.send(json.dumps(request))
+
+ while True:
+ incoming_data = await websocket.recv()
+ incoming_data = json.loads(incoming_data)
+
+ match incoming_data['event']:
+ case 'text_stream':
+ yield incoming_data['history']
+ case 'stream_end':
+ return
+
+
+async def print_response_stream(user_input, history):
+ cur_len = 0
+ async for new_history in run(user_input, history):
+ cur_message = new_history['visible'][-1][1][cur_len:]
+ cur_len += len(cur_message)
+ print(html.unescape(cur_message), end='')
+ sys.stdout.flush() # If we don't flush, we won't see tokens in realtime.
+
+
+if __name__ == '__main__':
+ user_input = "Please give me a step-by-step guide on how to plant a tree in my backyard."
+
+ # Basic example
+ history = {'internal': [], 'visible': []}
+
+ # "Continue" example. Make sure to set '_continue' to True above
+ # arr = [user_input, 'Surely, here is']
+ # history = {'internal': [arr], 'visible': [arr]}
+
+ asyncio.run(print_response_stream(user_input, history))
diff --git a/api-examples/api-example-chat.py b/api-examples/api-example-chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2a1e1e42bdbfe0c745b15ac8bf61f4633952472
--- /dev/null
+++ b/api-examples/api-example-chat.py
@@ -0,0 +1,92 @@
+import html
+import json
+
+import requests
+
+# For local streaming, the websockets are hosted without ssl - http://
+HOST = 'localhost:5000'
+URI = f'http://{HOST}/api/v1/chat'
+
+# For reverse-proxied streaming, the remote will likely host with ssl - https://
+# URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat'
+
+
+def run(user_input, history):
+ request = {
+ 'user_input': user_input,
+ 'max_new_tokens': 250,
+ 'auto_max_new_tokens': False,
+ 'max_tokens_second': 0,
+ 'history': history,
+ 'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct'
+ 'character': 'Example',
+ 'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset
+ 'your_name': 'You',
+ # 'name1': 'name of user', # Optional
+ # 'name2': 'name of character', # Optional
+ # 'context': 'character context', # Optional
+ # 'greeting': 'greeting', # Optional
+ # 'name1_instruct': 'You', # Optional
+ # 'name2_instruct': 'Assistant', # Optional
+ # 'context_instruct': 'context_instruct', # Optional
+ # 'turn_template': 'turn_template', # Optional
+ 'regenerate': False,
+ '_continue': False,
+ 'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+
+ # Generation params. If 'preset' is set to different than 'None', the values
+ # in presets/preset-name.yaml are used instead of the individual numbers.
+ 'preset': 'None',
+ 'do_sample': True,
+ 'temperature': 0.7,
+ 'top_p': 0.1,
+ 'typical_p': 1,
+ 'epsilon_cutoff': 0, # In units of 1e-4
+ 'eta_cutoff': 0, # In units of 1e-4
+ 'tfs': 1,
+ 'top_a': 0,
+ 'repetition_penalty': 1.18,
+ 'repetition_penalty_range': 0,
+ 'top_k': 40,
+ 'min_length': 0,
+ 'no_repeat_ngram_size': 0,
+ 'num_beams': 1,
+ 'penalty_alpha': 0,
+ 'length_penalty': 1,
+ 'early_stopping': False,
+ 'mirostat_mode': 0,
+ 'mirostat_tau': 5,
+ 'mirostat_eta': 0.1,
+ 'grammar_string': '',
+ 'guidance_scale': 1,
+ 'negative_prompt': '',
+
+ 'seed': -1,
+ 'add_bos_token': True,
+ 'truncation_length': 2048,
+ 'ban_eos_token': False,
+ 'custom_token_bans': '',
+ 'skip_special_tokens': True,
+ 'stopping_strings': []
+ }
+
+ response = requests.post(URI, json=request)
+
+ if response.status_code == 200:
+ result = response.json()['results'][0]['history']
+ print(json.dumps(result, indent=4))
+ print()
+ print(html.unescape(result['visible'][-1][1]))
+
+
+if __name__ == '__main__':
+ user_input = "Please give me a step-by-step guide on how to plant a tree in my backyard."
+
+ # Basic example
+ history = {'internal': [], 'visible': []}
+
+ # "Continue" example. Make sure to set '_continue' to True above
+ # arr = [user_input, 'Surely, here is']
+ # history = {'internal': [arr], 'visible': [arr]}
+
+ run(user_input, history)
diff --git a/api-examples/api-example-model.py b/api-examples/api-example-model.py
new file mode 100644
index 0000000000000000000000000000000000000000..44109d36c222cc1e47215cbe40bf55ff8009b2d1
--- /dev/null
+++ b/api-examples/api-example-model.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+import requests
+
+HOST = '0.0.0.0:5000'
+
+
+def generate(prompt, tokens=200):
+ request = {'prompt': prompt, 'max_new_tokens': tokens}
+ response = requests.post(f'http://{HOST}/api/v1/generate', json=request)
+
+ if response.status_code == 200:
+ return response.json()['results'][0]['text']
+
+
+def model_api(request):
+ response = requests.post(f'http://{HOST}/api/v1/model', json=request)
+ return response.json()
+
+
+# print some common settings
+def print_basic_model_info(response):
+ basic_settings = ['truncation_length', 'instruction_template']
+ print("Model: ", response['result']['model_name'])
+ print("Lora(s): ", response['result']['lora_names'])
+ for setting in basic_settings:
+ print(setting, "=", response['result']['shared.settings'][setting])
+
+
+# model info
+def model_info():
+ response = model_api({'action': 'info'})
+ print_basic_model_info(response)
+
+
+# simple loader
+def model_load(model_name):
+ return model_api({'action': 'load', 'model_name': model_name})
+
+
+# complex loader
+def complex_model_load(model):
+
+ def guess_groupsize(model_name):
+ if '1024g' in model_name:
+ return 1024
+ elif '128g' in model_name:
+ return 128
+ elif '32g' in model_name:
+ return 32
+ else:
+ return -1
+
+ req = {
+ 'action': 'load',
+ 'model_name': model,
+ 'args': {
+ 'loader': 'AutoGPTQ',
+
+ 'bf16': False,
+ 'load_in_8bit': False,
+ 'groupsize': 0,
+ 'wbits': 0,
+
+ # llama.cpp
+ 'threads': 0,
+ 'n_batch': 512,
+ 'no_mmap': False,
+ 'mlock': False,
+ 'cache_capacity': None,
+ 'n_gpu_layers': 0,
+ 'n_ctx': 2048,
+
+ # RWKV
+ 'rwkv_strategy': None,
+ 'rwkv_cuda_on': False,
+
+ # b&b 4-bit
+ # 'load_in_4bit': False,
+ # 'compute_dtype': 'float16',
+ # 'quant_type': 'nf4',
+ # 'use_double_quant': False,
+
+ # "cpu": false,
+ # "auto_devices": false,
+ # "gpu_memory": null,
+ # "cpu_memory": null,
+ # "disk": false,
+ # "disk_cache_dir": "cache",
+ },
+ }
+
+ model = model.lower()
+
+ if '4bit' in model or 'gptq' in model or 'int4' in model:
+ req['args']['wbits'] = 4
+ req['args']['groupsize'] = guess_groupsize(model)
+ elif '3bit' in model:
+ req['args']['wbits'] = 3
+ req['args']['groupsize'] = guess_groupsize(model)
+ else:
+ req['args']['gptq_for_llama'] = False
+
+ if '8bit' in model:
+ req['args']['load_in_8bit'] = True
+ elif '-hf' in model or 'fp16' in model:
+ if '7b' in model:
+ req['args']['bf16'] = True # for 24GB
+ elif '13b' in model:
+ req['args']['load_in_8bit'] = True # for 24GB
+ elif 'gguf' in model:
+ # req['args']['threads'] = 16
+ if '7b' in model:
+ req['args']['n_gpu_layers'] = 100
+ elif '13b' in model:
+ req['args']['n_gpu_layers'] = 100
+ elif '30b' in model or '33b' in model:
+ req['args']['n_gpu_layers'] = 59 # 24GB
+ elif '65b' in model:
+ req['args']['n_gpu_layers'] = 42 # 24GB
+ elif 'rwkv' in model:
+ req['args']['rwkv_cuda_on'] = True
+ if '14b' in model:
+ req['args']['rwkv_strategy'] = 'cuda f16i8' # 24GB
+ else:
+ req['args']['rwkv_strategy'] = 'cuda f16' # 24GB
+
+ return model_api(req)
+
+
+if __name__ == '__main__':
+ for model in model_api({'action': 'list'})['result']:
+ try:
+ resp = complex_model_load(model)
+
+ if 'error' in resp:
+ print(f"❌ {model} FAIL Error: {resp['error']['message']}")
+ continue
+ else:
+ print_basic_model_info(resp)
+
+ ans = generate("0,1,1,2,3,5,8,13,", tokens=2)
+
+ if '21' in ans:
+ print(f"✅ {model} PASS ({ans})")
+ else:
+ print(f"❌ {model} FAIL ({ans})")
+
+ except Exception as e:
+ print(f"❌ {model} FAIL Exception: {repr(e)}")
+
+
+# 0,1,1,2,3,5,8,13, is the fibonacci sequence, the next number is 21.
+# Some results below.
+""" $ ./model-api-example.py
+Model: 4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda
+Lora(s): []
+truncation_length = 2048
+instruction_template = Alpaca
+✅ 4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda PASS (21)
+Model: 4bit_WizardLM-13B-Uncensored-4bit-128g
+Lora(s): []
+truncation_length = 2048
+instruction_template = WizardLM
+✅ 4bit_WizardLM-13B-Uncensored-4bit-128g PASS (21)
+Model: Aeala_VicUnlocked-alpaca-30b-4bit
+Lora(s): []
+truncation_length = 2048
+instruction_template = Alpaca
+✅ Aeala_VicUnlocked-alpaca-30b-4bit PASS (21)
+Model: alpaca-30b-4bit
+Lora(s): []
+truncation_length = 2048
+instruction_template = Alpaca
+✅ alpaca-30b-4bit PASS (21)
+"""
diff --git a/api-examples/api-example-stream.py b/api-examples/api-example-stream.py
new file mode 100644
index 0000000000000000000000000000000000000000..966ca6f62d97e25557037602bb2202b7197f7066
--- /dev/null
+++ b/api-examples/api-example-stream.py
@@ -0,0 +1,86 @@
+import asyncio
+import json
+import sys
+
+try:
+ import websockets
+except ImportError:
+ print("Websockets package not found. Make sure it's installed.")
+
+# For local streaming, the websockets are hosted without ssl - ws://
+HOST = 'localhost:5005'
+URI = f'ws://{HOST}/api/v1/stream'
+
+# For reverse-proxied streaming, the remote will likely host with ssl - wss://
+# URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
+
+
+async def run(context):
+ # Note: the selected defaults change from time to time.
+ request = {
+ 'prompt': context,
+ 'max_new_tokens': 250,
+ 'auto_max_new_tokens': False,
+ 'max_tokens_second': 0,
+
+ # Generation params. If 'preset' is set to different than 'None', the values
+ # in presets/preset-name.yaml are used instead of the individual numbers.
+ 'preset': 'None',
+ 'do_sample': True,
+ 'temperature': 0.7,
+ 'top_p': 0.1,
+ 'typical_p': 1,
+ 'epsilon_cutoff': 0, # In units of 1e-4
+ 'eta_cutoff': 0, # In units of 1e-4
+ 'tfs': 1,
+ 'top_a': 0,
+ 'repetition_penalty': 1.18,
+ 'repetition_penalty_range': 0,
+ 'top_k': 40,
+ 'min_length': 0,
+ 'no_repeat_ngram_size': 0,
+ 'num_beams': 1,
+ 'penalty_alpha': 0,
+ 'length_penalty': 1,
+ 'early_stopping': False,
+ 'mirostat_mode': 0,
+ 'mirostat_tau': 5,
+ 'mirostat_eta': 0.1,
+ 'grammar_string': '',
+ 'guidance_scale': 1,
+ 'negative_prompt': '',
+
+ 'seed': -1,
+ 'add_bos_token': True,
+ 'truncation_length': 2048,
+ 'ban_eos_token': False,
+ 'custom_token_bans': '',
+ 'skip_special_tokens': True,
+ 'stopping_strings': []
+ }
+
+ async with websockets.connect(URI, ping_interval=None) as websocket:
+ await websocket.send(json.dumps(request))
+
+ yield context # Remove this if you just want to see the reply
+
+ while True:
+ incoming_data = await websocket.recv()
+ incoming_data = json.loads(incoming_data)
+
+ match incoming_data['event']:
+ case 'text_stream':
+ yield incoming_data['text']
+ case 'stream_end':
+ return
+
+
+async def print_response_stream(prompt):
+ async for response in run(prompt):
+ print(response, end='')
+ sys.stdout.flush() # If we don't flush, we won't see tokens in realtime.
+
+
+if __name__ == '__main__':
+ prompt = "In order to make homemade bread, follow these steps:\n1)"
+ asyncio.run(print_response_stream(prompt))
diff --git a/api-examples/api-example.py b/api-examples/api-example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9fd60d05c95d76d2ce011fa5ecdd97433b5e3de
--- /dev/null
+++ b/api-examples/api-example.py
@@ -0,0 +1,63 @@
+import requests
+
+# For local streaming, the websockets are hosted without ssl - http://
+HOST = 'localhost:5000'
+URI = f'http://{HOST}/api/v1/generate'
+
+# For reverse-proxied streaming, the remote will likely host with ssl - https://
+# URI = 'https://your-uri-here.trycloudflare.com/api/v1/generate'
+
+
+def run(prompt):
+ request = {
+ 'prompt': prompt,
+ 'max_new_tokens': 250,
+ 'auto_max_new_tokens': False,
+ 'max_tokens_second': 0,
+
+ # Generation params. If 'preset' is set to different than 'None', the values
+ # in presets/preset-name.yaml are used instead of the individual numbers.
+ 'preset': 'None',
+ 'do_sample': True,
+ 'temperature': 0.7,
+ 'top_p': 0.1,
+ 'typical_p': 1,
+ 'epsilon_cutoff': 0, # In units of 1e-4
+ 'eta_cutoff': 0, # In units of 1e-4
+ 'tfs': 1,
+ 'top_a': 0,
+ 'repetition_penalty': 1.18,
+ 'repetition_penalty_range': 0,
+ 'top_k': 40,
+ 'min_length': 0,
+ 'no_repeat_ngram_size': 0,
+ 'num_beams': 1,
+ 'penalty_alpha': 0,
+ 'length_penalty': 1,
+ 'early_stopping': False,
+ 'mirostat_mode': 0,
+ 'mirostat_tau': 5,
+ 'mirostat_eta': 0.1,
+ 'grammar_string': '',
+ 'guidance_scale': 1,
+ 'negative_prompt': '',
+
+ 'seed': -1,
+ 'add_bos_token': True,
+ 'truncation_length': 2048,
+ 'ban_eos_token': False,
+ 'custom_token_bans': '',
+ 'skip_special_tokens': True,
+ 'stopping_strings': []
+ }
+
+ response = requests.post(URI, json=request)
+
+ if response.status_code == 200:
+ result = response.json()['results'][0]['text']
+ print(prompt + result)
+
+
+if __name__ == '__main__':
+ prompt = "In order to make homemade bread, follow these steps:\n1)"
+ run(prompt)
diff --git a/characters/Assistant.yaml b/characters/Assistant.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a6141f401e2c70a25708b12c999192840d575340
--- /dev/null
+++ b/characters/Assistant.yaml
@@ -0,0 +1,4 @@
+name: AI
+greeting: How can I help you today?
+context: |
+ The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.
diff --git a/characters/Example.png b/characters/Example.png
new file mode 100644
index 0000000000000000000000000000000000000000..a7c4e513c4eaa05db1ebb2164956ea0b85d74a75
Binary files /dev/null and b/characters/Example.png differ
diff --git a/characters/Example.yaml b/characters/Example.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1a3299e7c0b977feb7345a0943c25ed8c45a9c3
--- /dev/null
+++ b/characters/Example.yaml
@@ -0,0 +1,17 @@
+name: Chiharu Yamada
+greeting: |-
+ *Chiharu strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air*
+ Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!
+context: |-
+ Chiharu Yamada's Persona: Chiharu Yamada is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
+
+ {{user}}: So how did you get into computer engineering?
+ {{char}}: I've always loved tinkering with technology since I was a kid.
+ {{user}}: That's really impressive!
+ {{char}}: *She chuckles bashfully* Thanks!
+ {{user}}: So what do you do when you're not working on computers?
+ {{char}}: I love exploring, going out with friends, watching movies, and playing video games.
+ {{user}}: What's your favorite type of computer hardware to work with?
+ {{char}}: Motherboards, they're like puzzles and the backbone of any system.
+ {{user}}: That sounds great!
+ {{char}}: Yeah, it's really fun. I'm lucky to be able to do this as a job.
diff --git a/cmd_linux.sh b/cmd_linux.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1685050aff7b270ae42e295c0b947d576e2653a3
--- /dev/null
+++ b/cmd_linux.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate env
+bash --init-file <(echo "source \"$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh\" && conda activate \"$INSTALL_ENV_DIR\"")
diff --git a/cmd_macos.sh b/cmd_macos.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1b052e5c34bd43b7e898858d7993dd5f6a7a6f08
--- /dev/null
+++ b/cmd_macos.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate env
+source $CONDA_ROOT_PREFIX/etc/profile.d/conda.sh
+conda activate $INSTALL_ENV_DIR
+exec bash --norc
diff --git a/cmd_windows.bat b/cmd_windows.bat
new file mode 100644
index 0000000000000000000000000000000000000000..531a326158e9e169657051b0e76bdfad17c4b238
--- /dev/null
+++ b/cmd_windows.bat
@@ -0,0 +1,34 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+
+@rem fix failed install when installing to a separate drive
+set TMP=%cd%\installer_files
+set TEMP=%cd%\installer_files
+
+@rem deactivate existing conda envs as needed to avoid conflicts
+(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
+
+@rem config
+set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
+set INSTALL_ENV_DIR=%cd%\installer_files\env
+
+@rem environment isolation
+set PYTHONNOUSERSITE=1
+set PYTHONPATH=
+set PYTHONHOME=
+set "CUDA_PATH=%INSTALL_ENV_DIR%"
+set "CUDA_HOME=%CUDA_PATH%"
+
+@rem activate installer env
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+
+@rem enter commands
+cmd /k "%*"
+
+:end
+pause
diff --git a/cmd_wsl.bat b/cmd_wsl.bat
new file mode 100644
index 0000000000000000000000000000000000000000..f9f4348a4672d1981b2648c55b861cb0fb6f5598
--- /dev/null
+++ b/cmd_wsl.bat
@@ -0,0 +1,11 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
+call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh cmd"
+
+:end
+pause
diff --git a/convert-to-safetensors.py b/convert-to-safetensors.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b721e7cd4d15cf7e5e03caaee57ef83a41553bc
--- /dev/null
+++ b/convert-to-safetensors.py
@@ -0,0 +1,38 @@
+'''
+
+Converts a transformers model to safetensors format and shards it.
+
+This makes it faster to load (because of safetensors) and lowers its RAM usage
+while loading (because of sharding).
+
+Based on the original script by 81300:
+
+https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303
+
+'''
+
+import argparse
+from pathlib import Path
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54))
+parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
+parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).')
+parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).")
+parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
+args = parser.parse_args()
+
+if __name__ == '__main__':
+ path = Path(args.MODEL)
+ model_name = path.name
+
+ print(f"Loading {model_name}...")
+ model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16)
+ tokenizer = AutoTokenizer.from_pretrained(path)
+
+ out_folder = args.output or Path(f"models/{model_name}_safetensors")
+ print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...")
+ model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True)
+ tokenizer.save_pretrained(out_folder)
diff --git a/css/NotoSans/NotoSans-Black.woff b/css/NotoSans/NotoSans-Black.woff
new file mode 100644
index 0000000000000000000000000000000000000000..0280e0f137d287f5f3192af9372843a0de42bd7b
Binary files /dev/null and b/css/NotoSans/NotoSans-Black.woff differ
diff --git a/css/NotoSans/NotoSans-Black.woff2 b/css/NotoSans/NotoSans-Black.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..1d5118399232ff708b7b26691c8d0156fd42d54e
Binary files /dev/null and b/css/NotoSans/NotoSans-Black.woff2 differ
diff --git a/css/NotoSans/NotoSans-BlackItalic.woff b/css/NotoSans/NotoSans-BlackItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..5cd4424a6fddb3a5e7f3708b4f7e5cec273d9b39
Binary files /dev/null and b/css/NotoSans/NotoSans-BlackItalic.woff differ
diff --git a/css/NotoSans/NotoSans-BlackItalic.woff2 b/css/NotoSans/NotoSans-BlackItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..f0baeca2749fde34a0875d9cc174da31af660848
Binary files /dev/null and b/css/NotoSans/NotoSans-BlackItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-Bold.woff b/css/NotoSans/NotoSans-Bold.woff
new file mode 100644
index 0000000000000000000000000000000000000000..750b737544d0d523b5ed4b4af85718b4f3bfcc62
Binary files /dev/null and b/css/NotoSans/NotoSans-Bold.woff differ
diff --git a/css/NotoSans/NotoSans-Bold.woff2 b/css/NotoSans/NotoSans-Bold.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..af6c17d0c55b387fa878ef889b6d22c400960d53
Binary files /dev/null and b/css/NotoSans/NotoSans-Bold.woff2 differ
diff --git a/css/NotoSans/NotoSans-BoldItalic.woff b/css/NotoSans/NotoSans-BoldItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..d484cf22c01d3432db4ada26128fad614936714e
Binary files /dev/null and b/css/NotoSans/NotoSans-BoldItalic.woff differ
diff --git a/css/NotoSans/NotoSans-BoldItalic.woff2 b/css/NotoSans/NotoSans-BoldItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..210c3a1de0fcc7557e0c3bf878bf82d32e0059f9
Binary files /dev/null and b/css/NotoSans/NotoSans-BoldItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-ExtraBold.woff b/css/NotoSans/NotoSans-ExtraBold.woff
new file mode 100644
index 0000000000000000000000000000000000000000..1a1e41d39b3b72744c99d33e3389fdd3d3ee3802
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraBold.woff differ
diff --git a/css/NotoSans/NotoSans-ExtraBold.woff2 b/css/NotoSans/NotoSans-ExtraBold.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..e2bd3232d16190fea6cadef163c0ab6f5a13acbe
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraBold.woff2 differ
diff --git a/css/NotoSans/NotoSans-ExtraBoldItalic.woff b/css/NotoSans/NotoSans-ExtraBoldItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..95d68a956af95b6805c2529666221f22a050d8be
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraBoldItalic.woff differ
diff --git a/css/NotoSans/NotoSans-ExtraBoldItalic.woff2 b/css/NotoSans/NotoSans-ExtraBoldItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..65892aeb95ca8177df133f1f7de3920a9c68bcd2
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraBoldItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-ExtraLight.woff b/css/NotoSans/NotoSans-ExtraLight.woff
new file mode 100644
index 0000000000000000000000000000000000000000..4b8a5596d33cbfd7c339040377d2f7e8068c0810
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraLight.woff differ
diff --git a/css/NotoSans/NotoSans-ExtraLight.woff2 b/css/NotoSans/NotoSans-ExtraLight.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..e92cf55d234418f31115e4741a95c9955b66160d
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraLight.woff2 differ
diff --git a/css/NotoSans/NotoSans-ExtraLightItalic.woff b/css/NotoSans/NotoSans-ExtraLightItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..f0b0a671ec370cbc282f96e4eaca944320f1649b
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraLightItalic.woff differ
diff --git a/css/NotoSans/NotoSans-ExtraLightItalic.woff2 b/css/NotoSans/NotoSans-ExtraLightItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..d63c4f7eedafc451ad5b0789c923b0cc3bd6811c
Binary files /dev/null and b/css/NotoSans/NotoSans-ExtraLightItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-Italic.woff b/css/NotoSans/NotoSans-Italic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..bc892974665a048c7f7bc1f29c680fa05c8973d5
Binary files /dev/null and b/css/NotoSans/NotoSans-Italic.woff differ
diff --git a/css/NotoSans/NotoSans-Italic.woff2 b/css/NotoSans/NotoSans-Italic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..a6bd8a370911945e0b046db45e55957f863e8d33
Binary files /dev/null and b/css/NotoSans/NotoSans-Italic.woff2 differ
diff --git a/css/NotoSans/NotoSans-Light.woff b/css/NotoSans/NotoSans-Light.woff
new file mode 100644
index 0000000000000000000000000000000000000000..b89c99716fd05133cb6fcc1eda755efff80e5e8e
Binary files /dev/null and b/css/NotoSans/NotoSans-Light.woff differ
diff --git a/css/NotoSans/NotoSans-Light.woff2 b/css/NotoSans/NotoSans-Light.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..962c6d765230953ac5a2ef53de98e96ce06687a1
Binary files /dev/null and b/css/NotoSans/NotoSans-Light.woff2 differ
diff --git a/css/NotoSans/NotoSans-LightItalic.woff b/css/NotoSans/NotoSans-LightItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..741ab91d57712412af2ea0c2897f467df4808aae
Binary files /dev/null and b/css/NotoSans/NotoSans-LightItalic.woff differ
diff --git a/css/NotoSans/NotoSans-LightItalic.woff2 b/css/NotoSans/NotoSans-LightItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..9153283066f1521cb4e2656b54b5fc492f19d59e
Binary files /dev/null and b/css/NotoSans/NotoSans-LightItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-Medium.woff b/css/NotoSans/NotoSans-Medium.woff
new file mode 100644
index 0000000000000000000000000000000000000000..d8dfb11002bd741989a7a1762551bb1e308b6d24
Binary files /dev/null and b/css/NotoSans/NotoSans-Medium.woff differ
diff --git a/css/NotoSans/NotoSans-Medium.woff2 b/css/NotoSans/NotoSans-Medium.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..deff785776161c5bed8354758077d5710ef34d51
Binary files /dev/null and b/css/NotoSans/NotoSans-Medium.woff2 differ
diff --git a/css/NotoSans/NotoSans-MediumItalic.woff b/css/NotoSans/NotoSans-MediumItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..d7ca037e320f8e7315c145f50f785b6f1e829a2b
Binary files /dev/null and b/css/NotoSans/NotoSans-MediumItalic.woff differ
diff --git a/css/NotoSans/NotoSans-MediumItalic.woff2 b/css/NotoSans/NotoSans-MediumItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..d87d4b6378b4296320c2f58a38d1f3de9139aced
Binary files /dev/null and b/css/NotoSans/NotoSans-MediumItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-Regular.woff b/css/NotoSans/NotoSans-Regular.woff
new file mode 100644
index 0000000000000000000000000000000000000000..64d9e17c6e39e64182c80031042d45a466e9780d
Binary files /dev/null and b/css/NotoSans/NotoSans-Regular.woff differ
diff --git a/css/NotoSans/NotoSans-Regular.woff2 b/css/NotoSans/NotoSans-Regular.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..172de3ca2a0e7687103f312ca7ba254ba5e95571
Binary files /dev/null and b/css/NotoSans/NotoSans-Regular.woff2 differ
diff --git a/css/NotoSans/NotoSans-SemiBold.woff b/css/NotoSans/NotoSans-SemiBold.woff
new file mode 100644
index 0000000000000000000000000000000000000000..abd6f5429535ef9f605f6300031c87bb6817d9c7
Binary files /dev/null and b/css/NotoSans/NotoSans-SemiBold.woff differ
diff --git a/css/NotoSans/NotoSans-SemiBold.woff2 b/css/NotoSans/NotoSans-SemiBold.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..1c38d6796c44f3677811b0a413c2808093911cdb
Binary files /dev/null and b/css/NotoSans/NotoSans-SemiBold.woff2 differ
diff --git a/css/NotoSans/NotoSans-SemiBoldItalic.woff b/css/NotoSans/NotoSans-SemiBoldItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..32dd0195e4184bb58f66a54dff97c42b10f49fac
Binary files /dev/null and b/css/NotoSans/NotoSans-SemiBoldItalic.woff differ
diff --git a/css/NotoSans/NotoSans-SemiBoldItalic.woff2 b/css/NotoSans/NotoSans-SemiBoldItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..853adbfc54e323c46b4b36bd091d54b147860065
Binary files /dev/null and b/css/NotoSans/NotoSans-SemiBoldItalic.woff2 differ
diff --git a/css/NotoSans/NotoSans-Thin.woff b/css/NotoSans/NotoSans-Thin.woff
new file mode 100644
index 0000000000000000000000000000000000000000..edb17d3ede58de57cfb12a71c410a97f58c2ba28
Binary files /dev/null and b/css/NotoSans/NotoSans-Thin.woff differ
diff --git a/css/NotoSans/NotoSans-Thin.woff2 b/css/NotoSans/NotoSans-Thin.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..ca9fd928ca5e9ffa9624f15cb5bf3edb4d90f5ad
Binary files /dev/null and b/css/NotoSans/NotoSans-Thin.woff2 differ
diff --git a/css/NotoSans/NotoSans-ThinItalic.woff b/css/NotoSans/NotoSans-ThinItalic.woff
new file mode 100644
index 0000000000000000000000000000000000000000..43d33e8e9d40c927c78f92d1037a6e4ebbcc6c44
Binary files /dev/null and b/css/NotoSans/NotoSans-ThinItalic.woff differ
diff --git a/css/NotoSans/NotoSans-ThinItalic.woff2 b/css/NotoSans/NotoSans-ThinItalic.woff2
new file mode 100644
index 0000000000000000000000000000000000000000..73d94abc034802b95c57302a83f9b9d2f9db8cdb
Binary files /dev/null and b/css/NotoSans/NotoSans-ThinItalic.woff2 differ
diff --git a/css/NotoSans/stylesheet.css b/css/NotoSans/stylesheet.css
new file mode 100644
index 0000000000000000000000000000000000000000..467973b8eebd42a5ba50f4df0a07440b843a19cc
--- /dev/null
+++ b/css/NotoSans/stylesheet.css
@@ -0,0 +1,166 @@
+/*
+Copied from https://github.com/SillyTavern/SillyTavern/tree/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/webfonts/NotoSans
+*/
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Black.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Black.woff') format('woff');
+ font-weight: 900;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-ExtraBoldItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-ExtraBoldItalic.woff') format('woff');
+ font-weight: bold;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-BlackItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-BlackItalic.woff') format('woff');
+ font-weight: 900;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-ExtraBold.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-ExtraBold.woff') format('woff');
+ font-weight: bold;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-ThinItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-ThinItalic.woff') format('woff');
+ font-weight: 100;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-BoldItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-BoldItalic.woff') format('woff');
+ font-weight: bold;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Bold.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Bold.woff') format('woff');
+ font-weight: bold;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-LightItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-LightItalic.woff') format('woff');
+ font-weight: 300;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Italic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Italic.woff') format('woff');
+ font-weight: normal;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-ExtraLightItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-ExtraLightItalic.woff') format('woff');
+ font-weight: 200;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Light.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Light.woff') format('woff');
+ font-weight: 300;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-ExtraLight.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-ExtraLight.woff') format('woff');
+ font-weight: 200;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Medium.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Medium.woff') format('woff');
+ font-weight: 500;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Regular.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Regular.woff') format('woff');
+ font-weight: normal;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-MediumItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-MediumItalic.woff') format('woff');
+ font-weight: 500;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-SemiBoldItalic.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-SemiBoldItalic.woff') format('woff');
+ font-weight: 600;
+ font-style: italic;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-SemiBold.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-SemiBold.woff') format('woff');
+ font-weight: 600;
+ font-style: normal;
+ font-display: swap;
+}
+
+@font-face {
+ font-family: 'Noto Sans';
+ src: url('file/css/NotoSans/NotoSans-Thin.woff2') format('woff2'),
+ url('file/css/NotoSans/NotoSans-Thin.woff') format('woff');
+ font-weight: 100;
+ font-style: normal;
+ font-display: swap;
+}
+
diff --git a/css/chat_style-TheEncrypted777.css b/css/chat_style-TheEncrypted777.css
new file mode 100644
index 0000000000000000000000000000000000000000..dfc01ebe8c42c2f8820ed9d4c5543c94849f89b4
--- /dev/null
+++ b/css/chat_style-TheEncrypted777.css
@@ -0,0 +1,133 @@
+/* All credits to TheEncrypted777: https://www.reddit.com/r/Oobabooga/comments/12xe6vq/updated_css_styling_with_color_customization_for/ */
+
+.message {
+ display: grid;
+ grid-template-columns: 60px minmax(0, 1fr);
+ padding-bottom: 28px;
+ font-size: 18px;
+ font-family: 'Noto Sans', Arial, sans-serif;
+ line-height: 1.428571429;
+}
+
+.circle-you,
+.circle-bot {
+ background-color: gray;
+ border-radius: 1rem;
+ border: 2px solid white;
+}
+
+.circle-bot img,
+.circle-you img {
+ border-radius: 10%;
+ width: 100%;
+ height: 100%;
+ object-fit: cover;
+}
+
+.circle-you, .circle-bot {
+ /*You can set the size of the profile images here, but if you do, you have to also adjust the .text{padding-left: 90px} to a different number according to the width of the image which is right below here*/
+ width: 135px;
+ height: 175px;
+}
+
+.text {
+ /*Change this to move the message box further left or right depending on the size of your profile pic*/
+ padding-left: 90px;
+ text-shadow: 2px 2px 2px rgb(0, 0, 0, 0.4);
+}
+
+.text p {
+ margin-top: 2px;
+}
+
+.username {
+ padding-left: 10px;
+ font-size: 22px;
+ font-weight: bold;
+ border-top: 1px solid rgb(51, 64, 90);
+ padding: 3px;
+}
+
+.message-body {
+ position: relative;
+ border-radius: 1rem;
+ border: 1px solid rgba(255, 255, 255, 0.459);
+ border-radius: 10px;
+ padding: 10px;
+ padding-top: 5px;
+ /*Message gradient background color - remove the line bellow if you don't want a background color or gradient*/
+ background: linear-gradient(to bottom, #171730, #1b263f);
+}
+
+ /*Adds 2 extra lines at the top and bottom of the message*/
+.message-body:before,
+ .message-body:after {
+ content: "";
+ position: absolute;
+ left: 10px;
+ right: 10px;
+ height: 1px;
+ background-color: rgba(255, 255, 255, 0.13);
+}
+
+.message-body:before {
+ top: 6px;
+}
+
+.message-body:after {
+ bottom: 6px;
+}
+
+.message-body img {
+ max-width: 300px;
+ max-height: 300px;
+ border-radius: 20px;
+}
+
+.message-body p {
+ margin-bottom: 0 !important;
+ font-size: 18px !important;
+ line-height: 1.428571429 !important;
+ color: rgb(243, 244, 246) !important;
+ text-shadow: 2px 2px 2px rgb(0, 0, 0);
+}
+
+.message-body p em {
+ color: rgb(138, 138, 138) !important;
+}
+
+@media screen and (max-width: 688px) {
+ .message {
+ display: grid;
+ grid-template-columns: 60px minmax(0, 1fr);
+ padding-bottom: 25px;
+ font-size: 15px;
+ font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
+ line-height: 1.428571429;
+ }
+
+ .circle-you, .circle-bot {
+ width: 50px;
+ height: 73px;
+ border-radius: 0.5rem;
+ }
+
+ .circle-bot img,
+ .circle-you img {
+ width: 100%;
+ height: 100%;
+ object-fit: cover;
+ }
+
+ .text {
+ padding-left: 0px;
+ }
+
+ .message-body p {
+ font-size: 16px !important;
+ }
+
+ .username {
+ font-size: 20px;
+ }
+}
diff --git a/css/chat_style-cai-chat-square.css b/css/chat_style-cai-chat-square.css
new file mode 100644
index 0000000000000000000000000000000000000000..0098da35ee7eb7bd164abd48ecd74554337f5a53
--- /dev/null
+++ b/css/chat_style-cai-chat-square.css
@@ -0,0 +1,21 @@
+@import url("file/css/chat_style-cai-chat.css");
+
+.circle-bot, .circle-you {
+ height: 90px;
+ width: 60px;
+ border-radius: 10px;
+ background-color: #656565;
+}
+
+.circle-bot img, .circle-you img {
+ border-radius: 8.333px;
+}
+
+.circle-you {
+ background-color: #656565;
+}
+
+.message {
+ padding-bottom: 30px;
+ grid-template-columns: 70px minmax(0, 1fr);
+}
diff --git a/css/chat_style-cai-chat.css b/css/chat_style-cai-chat.css
new file mode 100644
index 0000000000000000000000000000000000000000..47f39e0e870b4229ec8fd60a4a69657e36e48f66
--- /dev/null
+++ b/css/chat_style-cai-chat.css
@@ -0,0 +1,59 @@
+.message {
+ display: grid;
+ grid-template-columns: 60px minmax(0, 1fr);
+ padding-bottom: 25px;
+ font-size: 15px;
+ font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
+ line-height: 23px !important;
+}
+
+.circle-you {
+ width: 50px;
+ height: 50px;
+ background-color: rgb(238, 78, 59);
+ border-radius: 50%;
+}
+
+.circle-bot {
+ width: 50px;
+ height: 50px;
+ background-color: rgb(59, 78, 244);
+ border-radius: 50%;
+}
+
+.circle-bot img,
+.circle-you img {
+ border-radius: 50%;
+ width: 100%;
+ height: 100%;
+ object-fit: cover;
+}
+
+.text p {
+ margin-top: 5px;
+}
+
+.username {
+ font-weight: bold;
+}
+
+.message-body img {
+ max-width: 300px;
+ max-height: 300px;
+ border-radius: 20px;
+}
+
+.message-body p {
+ margin-bottom: 0 !important;
+ font-size: 15px !important;
+ line-height: 23px !important;
+}
+
+.dark .message-body p em {
+ color: rgb(138, 138, 138) !important;
+}
+
+.message-body p em {
+ color: rgb(110, 110, 110) !important;
+ font-weight: 500;
+}
\ No newline at end of file
diff --git a/css/chat_style-messenger.css b/css/chat_style-messenger.css
new file mode 100644
index 0000000000000000000000000000000000000000..fb3f65a458e76beddbab532539f56e2132e4a887
--- /dev/null
+++ b/css/chat_style-messenger.css
@@ -0,0 +1,99 @@
+.message {
+ padding-bottom: 25px;
+ font-size: 15px;
+ font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
+ line-height: 1.428571429;
+}
+
+.circle-you {
+ width: 50px;
+ height: 50px;
+ background-color: rgb(238, 78, 59);
+ border-radius: 50%;
+}
+
+.circle-bot {
+ width: 50px;
+ height: 50px;
+ background-color: rgb(59, 78, 244);
+ border-radius: 50%;
+ float: left;
+ margin-right: 10px;
+ margin-top: 5px;
+}
+
+.circle-bot img,
+.circle-you img {
+ border-radius: 50%;
+ width: 100%;
+ height: 100%;
+ object-fit: cover;
+}
+
+.circle-you {
+ margin-top: 5px;
+ float: right;
+}
+
+.circle-bot + .text, .circle-you + .text {
+ border-radius: 18px;
+ padding: 8px 12px;
+}
+
+.circle-bot + .text {
+ background-color: #E4E6EB;
+ float: left;
+}
+
+.circle-you + .text {
+ float: right;
+ background-color: rgb(0, 132, 255);
+ margin-right: 10px;
+}
+
+.circle-you + .text div, .circle-you + .text *, .dark .circle-you + .text div, .dark .circle-you + .text * {
+ color: #FFF !important;
+}
+
+.circle-you + .text .username {
+ text-align: right;
+}
+
+.dark .circle-bot + .text div, .dark .circle-bot + .text * {
+ color: #000;
+}
+
+.text {
+ max-width: 80%;
+}
+
+.text p {
+ margin-top: 5px;
+}
+
+.username {
+ font-weight: bold;
+}
+
+.message-body {
+}
+
+.message-body img {
+ max-width: 300px;
+ max-height: 300px;
+ border-radius: 20px;
+}
+
+.message-body p {
+ margin-bottom: 0 !important;
+ font-size: 15px !important;
+ line-height: 1.428571429 !important;
+}
+
+.dark .message-body p em {
+ color: rgb(138, 138, 138) !important;
+}
+
+.message-body p em {
+ color: rgb(110, 110, 110) !important;
+}
diff --git a/css/chat_style-wpp.css b/css/chat_style-wpp.css
new file mode 100644
index 0000000000000000000000000000000000000000..da9f172f434530c3df77d6b937ebae1a3868a29d
--- /dev/null
+++ b/css/chat_style-wpp.css
@@ -0,0 +1,55 @@
+.message {
+ padding-bottom: 25px;
+ font-size: 15px;
+ font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
+ line-height: 1.428571429;
+}
+
+.text-you {
+ background-color: #d9fdd3;
+ border-radius: 15px;
+ padding: 10px;
+ padding-top: 5px;
+ float: right;
+}
+
+.text-bot {
+ background-color: #f2f2f2;
+ border-radius: 15px;
+ padding: 10px;
+ padding-top: 5px;
+}
+
+.dark .text-you {
+ background-color: #005c4b;
+ color: #111b21;
+}
+
+.dark .text-bot {
+ background-color: #1f2937;
+ color: #111b21;
+}
+
+.text-bot p, .text-you p {
+ margin-top: 5px;
+}
+
+.message-body img {
+ max-width: 300px;
+ max-height: 300px;
+ border-radius: 20px;
+}
+
+.message-body p {
+ margin-bottom: 0 !important;
+ font-size: 15px !important;
+ line-height: 1.428571429 !important;
+}
+
+.dark .message-body p em {
+ color: rgb(138, 138, 138) !important;
+}
+
+.message-body p em {
+ color: rgb(110, 110, 110) !important;
+}
\ No newline at end of file
diff --git a/css/html_4chan_style.css b/css/html_4chan_style.css
new file mode 100644
index 0000000000000000000000000000000000000000..cef9f6eba1886f01b7433f5cc16dd1b5a696e762
--- /dev/null
+++ b/css/html_4chan_style.css
@@ -0,0 +1,104 @@
+#parent #container {
+ background-color: #eef2ff;
+ padding: 17px;
+}
+
+#parent #container .reply {
+ background-color: rgb(214, 218, 240);
+ border-bottom-color: rgb(183, 197, 217);
+ border-bottom-style: solid;
+ border-bottom-width: 1px;
+ border-image-outset: 0;
+ border-image-repeat: stretch;
+ border-image-slice: 100%;
+ border-image-source: none;
+ border-image-width: 1;
+ border-left-color: rgb(0, 0, 0);
+ border-left-style: none;
+ border-left-width: 0px;
+ border-right-color: rgb(183, 197, 217);
+ border-right-style: solid;
+ border-right-width: 1px;
+ border-top-color: rgb(0, 0, 0);
+ border-top-style: none;
+ border-top-width: 0px;
+ color: rgb(0, 0, 0);
+ display: table;
+ font-family: arial, helvetica, sans-serif;
+ font-size: 13.3333px;
+ margin-bottom: 4px;
+ margin-left: 0px;
+ margin-right: 0px;
+ margin-top: 4px;
+ overflow-x: hidden;
+ overflow-y: hidden;
+ padding-bottom: 4px;
+ padding-left: 2px;
+ padding-right: 2px;
+ padding-top: 4px;
+}
+
+#parent #container .number {
+ color: rgb(0, 0, 0);
+ font-family: arial, helvetica, sans-serif;
+ font-size: 13.3333px;
+ width: 342.65px;
+ margin-right: 7px;
+}
+
+#parent #container .op {
+ color: rgb(0, 0, 0);
+ font-family: arial, helvetica, sans-serif;
+ font-size: 13.3333px;
+ margin-bottom: 8px;
+ margin-left: 0px;
+ margin-right: 0px;
+ margin-top: 4px;
+ overflow-x: hidden;
+ overflow-y: hidden;
+}
+
+#parent #container .op blockquote {
+ margin-left: 0px !important;
+}
+
+#parent #container .name {
+ color: rgb(17, 119, 67);
+ font-family: arial, helvetica, sans-serif;
+ font-size: 13.3333px;
+ font-weight: 700;
+ margin-left: 7px;
+}
+
+#parent #container .quote {
+ color: rgb(221, 0, 0);
+ font-family: arial, helvetica, sans-serif;
+ font-size: 13.3333px;
+ text-decoration-color: rgb(221, 0, 0);
+ text-decoration-line: underline;
+ text-decoration-style: solid;
+ text-decoration-thickness: auto;
+}
+
+#parent #container .greentext {
+ color: rgb(120, 153, 34);
+ font-family: arial, helvetica, sans-serif;
+ font-size: 13.3333px;
+}
+
+#parent #container blockquote {
+ margin: 0px !important;
+ margin-block-start: 1em;
+ margin-block-end: 1em;
+ margin-inline-start: 40px;
+ margin-inline-end: 40px;
+ margin-top: 13.33px !important;
+ margin-bottom: 13.33px !important;
+ margin-left: 40px !important;
+ margin-right: 40px !important;
+}
+
+#parent #container .message_4chan {
+ color: black;
+ border: none;
+}
\ No newline at end of file
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
new file mode 100644
index 0000000000000000000000000000000000000000..30dea47d0b7057d4e3da76872ea188d0767d0137
--- /dev/null
+++ b/css/html_instruct_style.css
@@ -0,0 +1,67 @@
+.message {
+ display: grid;
+ grid-template-columns: 60px 1fr;
+ padding-bottom: 25px;
+ font-size: 15px;
+ font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
+ line-height: 22px;
+}
+
+.username {
+ display: none;
+}
+
+.message-body p {
+ font-size: 15px !important;
+ line-height: 22px !important;
+}
+
+.message-body p, .chat .message-body ul, .chat .message-body ol {
+ margin-bottom: 1.25em !important;
+}
+
+.message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
+ margin-bottom: 0em !important;
+}
+
+.dark .message-body p em {
+ color: rgb(198, 202, 214) !important;
+}
+
+.message-body p em {
+ color: rgb(110, 110, 110) !important;
+}
+
+.gradio-container .chat .assistant-message {
+ padding: 15px;
+ border-radius: 20px;
+ background-color: #0000000f;
+ margin-top: 9px !important;
+ margin-bottom: 18px !important;
+}
+
+.gradio-container .chat .user-message {
+ padding: 15px;
+ border-radius: 20px;
+ margin-bottom: 9px !important;
+}
+
+.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
+ margin-bottom: 0px !important;
+}
+
+.dark .chat .assistant-message {
+ background-color: #1f2937;
+}
+
+.dark .chat .user-message {
+ background-color: transparent;
+}
+
+code {
+ background-color: white !important;
+}
+
+.dark code {
+ background-color: #0e1321 !important;
+}
\ No newline at end of file
diff --git a/css/html_readable_style.css b/css/html_readable_style.css
new file mode 100644
index 0000000000000000000000000000000000000000..cb260f1ab5bb783a455894f9ba82ca310a92d8ed
--- /dev/null
+++ b/css/html_readable_style.css
@@ -0,0 +1,33 @@
+.readable-container {
+ max-width: 600px;
+ margin-left: auto;
+ margin-right: auto;
+ background-color: rgb(31, 41, 55);
+ padding: 3em;
+ word-break: break-word;
+ overflow-wrap: anywhere;
+ color: #efefef !important;
+}
+
+.readable-container p, .readable-container li {
+ font-size: 16px !important;
+ color: #efefef !important;
+ margin-bottom: 22px;
+ line-height: 1.4 !important;
+}
+
+.readable-container li > p {
+ display: inline !important;
+}
+
+.readable-container code {
+ overflow-x: auto;
+}
+
+.readable-container :not(pre) > code {
+ white-space: normal !important;
+}
+
+.readable-container .hoverable {
+ font-size: 14px;
+}
\ No newline at end of file
diff --git a/css/main.css b/css/main.css
new file mode 100644
index 0000000000000000000000000000000000000000..f50932742cbca677419113479cbd4a5011e356ff
--- /dev/null
+++ b/css/main.css
@@ -0,0 +1,644 @@
+.tabs.svelte-710i53 {
+ margin-top: 0
+}
+
+.py-6 {
+ padding-top: 2.5rem
+}
+
+.small-button {
+ min-width: 0 !important;
+ max-width: 171px;
+ height: 39.594px;
+ align-self: end;
+}
+
+.refresh-button {
+ max-width: 4.4em;
+ min-width: 2.2em !important;
+ height: 39.594px;
+ align-self: end;
+ line-height: 1em;
+ border-radius: 0.5em;
+ flex: none;
+}
+
+.refresh-button-small {
+ max-width: 2.2em;
+}
+
+.button_nowrap {
+ white-space: nowrap;
+}
+
+#slim-column {
+ flex: none !important;
+ min-width: 0 !important;
+}
+
+.slim-dropdown {
+ background-color: transparent !important;
+ border: none !important;
+ padding: 0 !important;
+}
+
+#download-label, #upload-label {
+ min-height: 0
+}
+
+.dark svg {
+ fill: white;
+}
+
+.dark a {
+ color: white !important;
+}
+
+ol li p, ul li p {
+ display: inline-block;
+}
+
+#chat-tab, #default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
+ border: 0;
+}
+
+.gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
+ color: white;
+}
+
+.gradio-container {
+ max-width: 100% !important;
+ padding-top: 0 !important;
+}
+
+#extensions {
+ margin-top: 5px;
+ margin-bottom: 35px;
+}
+
+.extension-tab {
+ border: 0 !important;
+}
+
+span.math.inline {
+ font-size: 27px;
+ vertical-align: baseline !important;
+}
+
+div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
+ flex-wrap: nowrap;
+}
+
+.header_bar {
+ background-color: #f7f7f7;
+ margin-bottom: 19px;
+ overflow-x: scroll;
+ margin-left: calc(-1 * var(--size-4));
+ margin-right: calc(-1 * var(--size-4));
+ display: block !important;
+ text-wrap: nowrap;
+}
+
+.dark .header_bar {
+ border: none !important;
+ background-color: #8080802b;
+}
+
+.header_bar button.selected {
+ border-radius: 0;
+}
+
+.textbox_default textarea {
+ height: calc(100dvh - 271px);
+}
+
+.textbox_default_output textarea {
+ height: calc(100dvh - 185px);
+}
+
+.textbox textarea {
+ height: calc(100dvh - 241px);
+}
+
+.textbox_logits textarea {
+ height: calc(100dvh - 236px);
+}
+
+.textbox_logits_notebook textarea {
+ height: calc(100dvh - 292px);
+}
+
+.monospace textarea {
+ font-family: monospace;
+}
+
+.textbox_default textarea,
+.textbox_default_output textarea,
+.textbox_logits textarea,
+.textbox_logits_notebook textarea,
+.textbox textarea {
+ font-size: 16px !important;
+ color: #46464A !important;
+}
+
+.dark textarea {
+ color: #efefef !important;
+}
+
+@media screen and (max-width: 711px) {
+ .textbox_default textarea {
+ height: calc(100dvh - 259px);
+ }
+
+ div .default-token-counter {
+ top: calc( 0.5 * (100dvh - 236px) ) !important;
+ }
+
+ .transparent-substring {
+ display: none;
+ }
+
+ .hover-menu {
+ min-width: 250px !important;
+ }
+}
+
+/* Hide the gradio footer*/
+footer {
+ display: none !important;
+}
+
+button {
+ font-size: 14px !important;
+}
+
+.file-saver {
+ position: fixed !important;
+ height: 100%;
+ z-index: 1000;
+ background-color: rgba(0, 0, 0, 0.5) !important;
+ margin-left: -20px;
+ margin-right: -20px;
+}
+
+.file-saver > :first-child {
+ position: fixed !important;
+ top: 50%;
+ left: 50%;
+ transform: translate(-50%, -50%); /* center horizontally */
+ width: 100%;
+ max-width: 500px;
+ background-color: var(--input-background-fill);
+ border: var(--input-border-width) solid var(--input-border-color) !important;
+}
+
+.file-saver > :first-child > :nth-child(2) {
+ background: var(--block-background-fill);
+}
+
+.checkboxgroup-table label {
+ background: none !important;
+ padding: 0 !important;
+ border: 0 !important;
+}
+
+.checkboxgroup-table div {
+ display: grid !important;
+}
+
+.markdown ul ol {
+ font-size: 100% !important;
+}
+
+.pretty_scrollbar::-webkit-scrollbar {
+ width: 5px;
+}
+
+.pretty_scrollbar::-webkit-scrollbar-track {
+ background: transparent;
+}
+
+.pretty_scrollbar::-webkit-scrollbar-thumb,
+.pretty_scrollbar::-webkit-scrollbar-thumb:hover {
+ background: #c5c5d2;
+}
+
+.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
+.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
+ background: #374151;
+}
+
+.pretty_scrollbar::-webkit-resizer {
+ background: #c5c5d2;
+}
+
+.dark .pretty_scrollbar::-webkit-resizer {
+ background: #374151;
+}
+
+audio {
+ max-width: 100%;
+}
+
+/* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */
+.token-counter {
+ position: absolute !important;
+ top: calc( 0.5 * (100dvh - 218px) ) !important;
+ right: 2px;
+ z-index: 100;
+ background: var(--input-background-fill) !important;
+ min-height: 0 !important;
+}
+
+.default-token-counter {
+ top: calc( 0.5 * (100dvh - 248px) ) !important;
+}
+
+.token-counter span {
+ padding: 1px;
+ box-shadow: 0 0 0 0.3em rgba(192,192,192,0.15), inset 0 0 0.6em rgba(192,192,192,0.075);
+ border: 2px solid rgba(192,192,192,0.4) !important;
+ border-radius: 0.4em;
+}
+
+.no-background {
+ background: var(--background-fill-primary) !important;
+ padding: 0px !important;
+}
+
+/*----------------------------------------------
+ Chat tab
+----------------------------------------------*/
+.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
+ height: 66.67vh
+}
+
+.gradio-container {
+ margin-left: auto !important;
+ margin-right: auto !important;
+}
+
+.w-screen {
+ width: unset
+}
+
+div.svelte-362y77>*, div.svelte-362y77>.form>* {
+ flex-wrap: nowrap
+}
+
+.pending.svelte-1ed2p3z {
+ opacity: 1;
+}
+
+.wrap.svelte-6roggh.svelte-6roggh {
+ max-height: 92.5%;
+}
+
+/* This is for the microphone button in the whisper extension */
+.sm.svelte-1ipelgc {
+ width: 100%;
+}
+
+#chat-tab button#Generate, #chat-tab button#stop {
+ width: 89.3438px !important;
+}
+
+#chat-tab button, #notebook-tab button, #default-tab button {
+ min-width: 0 !important;
+}
+
+#chat-tab > :first-child, #extensions {
+ max-width: 880px;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+@media screen and (max-width: 688px) {
+ #chat-tab {
+ padding-left: 0px;
+ padding-right: 0px;
+ }
+
+ .chat-parent {
+ height: calc(100dvh - 179px) !important;
+ }
+
+ .old-ui .chat-parent {
+ height: calc(100dvh - 310px) !important;
+ }
+}
+
+.chat {
+ margin-left: auto;
+ margin-right: auto;
+ max-width: 880px;
+ height: 100%;
+ overflow-y: auto;
+ padding-right: 15px;
+ display: flex;
+ flex-direction: column;
+ word-break: break-word;
+ overflow-wrap: anywhere;
+}
+
+.chat-parent {
+ height: calc(100dvh - 181px);
+ overflow: auto !important;
+}
+
+.old-ui .chat-parent {
+ height: calc(100dvh - 270px);
+}
+
+.chat-parent.bigchat {
+ height: calc(100dvh - 181px) !important;
+}
+
+.chat > .messages {
+ display: flex;
+ flex-direction: column;
+}
+
+.chat .message:last-child {
+ margin-bottom: 0px !important;
+ padding-bottom: 0px !important;
+}
+
+.message-body li {
+ margin-top: 0 !important;
+ margin-bottom: 0 !important;
+}
+
+.message-body li > p {
+ display: inline !important;
+}
+
+.message-body ul, .message-body ol {
+ font-size: 15px !important;
+}
+
+.message-body ul {
+ list-style-type: disc !important;
+}
+
+.message-body pre:not(:last-child) {
+ margin-bottom: 1.25em !important;
+}
+
+.message-body pre:last-child {
+ margin-bottom: 0px !important;
+}
+
+.message-body code {
+ white-space: pre-wrap !important;
+ word-wrap: break-word !important;
+ border: 1px solid var(--border-color-primary);
+ border-radius: var(--radius-sm);
+ background: var(--background-fill-secondary);
+ font-size: 85%;
+ padding: 1px 3px;
+}
+
+.message-body pre > code {
+ display: block;
+ padding: .5em .7em;
+}
+
+.message-body :not(pre) > code {
+ white-space: normal !important;
+}
+
+#chat-input {
+ padding: 0;
+ padding-top: 18px;
+ background: transparent;
+ border: none;
+}
+
+#chat-input textarea:focus {
+ box-shadow: none !important;
+}
+
+#chat-input > :first-child {
+ background-color: transparent;
+}
+
+#chat-input .progress-text {
+ display: none;
+}
+
+@media print {
+ body {
+ visibility: hidden;
+ }
+
+ .chat {
+ visibility: visible;
+ position: absolute;
+ left: 0;
+ top: 0;
+ max-width: unset;
+ max-height: unset;
+ width: 100%;
+ overflow-y: visible;
+ }
+
+ .message {
+ break-inside: avoid;
+ }
+
+ .gradio-container {
+ overflow: visible;
+ }
+
+ .tab-nav {
+ display: none !important;
+ }
+
+ #chat-tab > :first-child {
+ max-width: unset;
+ }
+}
+
+#show-controls {
+ position: absolute;
+ height: 100%;
+ background-color: var(--background-fill-primary);
+ border: 0px !important;
+ border-radius: 0px;
+}
+
+#show-controls label {
+ z-index: 1000;
+ position: absolute;
+ left: calc(100% - 168px);
+}
+
+#typing-container {
+ display: none;
+ position: absolute;
+ background-color: transparent;
+ left: -2px;
+ padding: var(--block-padding);
+}
+
+.typing {
+ position: relative;
+}
+
+.visible-dots #typing-container {
+ display: block;
+}
+
+.typing span {
+ content: '';
+ animation: blink 1.5s infinite;
+ animation-fill-mode: both;
+ height: 10px;
+ width: 10px;
+ background: #3b5998;;
+ position: absolute;
+ left:0;
+ top:0;
+ border-radius: 50%;
+}
+
+.typing .dot1 {
+ animation-delay: .2s;
+ margin-left: calc(10px * 1.5);
+}
+
+.typing .dot2 {
+ animation-delay: .4s;
+ margin-left: calc(10px * 3);
+}
+
+@keyframes blink {
+ 0% {
+ opacity: .1;
+ }
+ 20% {
+ opacity: 1;
+ }
+ 100% {
+ opacity: .1;
+ }
+}
+
+#chat-tab .generating {
+ display: none !important;
+}
+
+.hover-element {
+ position: relative;
+ font-size: 24px;
+}
+
+.hover-menu {
+ display: none;
+ position: absolute;
+ bottom: 80%;
+ left: 0;
+ background-color: var(--background-fill-secondary);
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.5);
+ z-index: 10000;
+ min-width: 330px;
+ flex-direction: column;
+}
+
+.hover-menu button {
+ width: 100%;
+ background: transparent !important;
+ border-radius: 0px !important;
+ justify-content: space-between;
+ margin: 0 !important;
+ height: 36px;
+}
+
+.hover-menu button:not(#clear-history-confirm) {
+ border-bottom: 0 !important;
+}
+
+.hover-menu button:not(#clear-history-confirm):last-child {
+ border-bottom: var(--button-border-width) solid var(--button-secondary-border-color) !important;
+}
+
+.hover-menu button:hover {
+ background: var(--button-secondary-background-fill-hover) !important;
+}
+
+.transparent-substring {
+ opacity: 0.333;
+}
+
+#chat-tab:not(.old-ui) #chat-buttons {
+ display: none !important;
+}
+
+#gr-hover-container {
+ min-width: 0 !important;
+ display: flex;
+ flex-direction: column-reverse;
+ padding-right: 20px;
+ padding-bottom: 3px;
+ flex-grow: 0 !important;
+}
+
+#generate-stop-container {
+ min-width: 0 !important;
+ display: flex;
+ flex-direction: column-reverse;
+ padding-bottom: 3px;
+ flex: 0 auto !important;
+}
+
+#chat-input-container {
+ min-width: 0 !important;
+}
+
+#chat-input-container > .form {
+ background: transparent;
+ border: none;
+}
+
+#chat-input-row {
+ padding-bottom: 20px;
+}
+
+.old-ui #chat-input-row, #chat-input-row.bigchat {
+ padding-bottom: 0px !important;
+}
+
+#chat-col {
+ padding-bottom: 115px;
+}
+
+.old-ui #chat-col, #chat-col.bigchat {
+ padding-bottom: 95px !important;
+}
+
+.old-ui #chat-buttons #clear-history-confirm {
+ order: -1;
+}
+
+.chat ol, .chat ul {
+ margin-top: 6px !important;
+}
+
+/*----------------------------------------------
+ Past chats menus
+----------------------------------------------*/
+#past-chats-row {
+ margin-bottom: calc( -1 * var(--layout-gap) );
+}
+
+#rename-row label {
+ margin-top: var(--layout-gap);
+}
+
+/*----------------------------------------------
+ Keep dropdown menus above errored components
+----------------------------------------------*/
+.options {
+ z-index: 100 !important;
+}
diff --git a/docker/.dockerignore b/docker/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..6073533e0929aac9e917a5980198334a2a01f8ef
--- /dev/null
+++ b/docker/.dockerignore
@@ -0,0 +1,9 @@
+.env
+Dockerfile
+/characters
+/loras
+/models
+/presets
+/prompts
+/softprompts
+/training
diff --git a/docker/.env.example b/docker/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..3119a9f07f47b1ad964ce337c33f1ce63d79f377
--- /dev/null
+++ b/docker/.env.example
@@ -0,0 +1,30 @@
+# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
+# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
+# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
+TORCH_CUDA_ARCH_LIST=7.5
+
+# these commands worked for me with roughly 4.5GB of vram
+CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
+
+# the following examples have been tested with the files linked in docs/README_docker.md:
+# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
+# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
+# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
+
+# the port the webui binds to on the host
+HOST_PORT=7860
+# the port the webui binds to inside the container
+CONTAINER_PORT=7860
+
+# the port the api binds to on the host
+HOST_API_PORT=5000
+# the port the api binds to inside the container
+CONTAINER_API_PORT=5000
+
+# the port the api stream endpoint binds to on the host
+HOST_API_STREAM_PORT=5005
+# the port the api stream endpoint binds to inside the container
+CONTAINER_API_STREAM_PORT=5005
+
+# the version used to install text-generation-webui from
+WEBUI_VERSION=HEAD
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..810bb7c12f22c5bf4e95ab128f2baad7255319b6
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,75 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \
+ apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN git clone --depth=1 https://github.com/oobabooga/GPTQ-for-LLaMa /build
+
+WORKDIR /build
+
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+ python3 -m venv /build/venv && \
+ . /build/venv/bin/activate && \
+ pip3 install --upgrade pip setuptools wheel && \
+ pip3 install torch torchvision torchaudio && \
+ pip3 install -r requirements.txt
+
+# https://developer.nvidia.com/cuda-gpus
+# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
+RUN . /build/venv/bin/activate && \
+ python3 setup_cuda.py bdist_wheel -d .
+
+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
+
+LABEL maintainer="Your Name "
+LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \
+ apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ ffmpeg && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN --mount=type=cache,target=/root/.cache/pip,rw pip3 install virtualenv
+RUN mkdir /app
+
+WORKDIR /app
+
+ARG WEBUI_VERSION
+RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source"
+
+# Create virtualenv
+RUN virtualenv /app/venv
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+ . /app/venv/bin/activate && \
+ pip3 install --upgrade pip setuptools wheel && \
+ pip3 install torch torchvision torchaudio sentence_transformers xformers
+
+# Copy and install GPTQ-for-LLaMa
+COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+ . /app/venv/bin/activate && \
+ pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl
+
+# Install main requirements
+COPY requirements.txt /app/requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+ . /app/venv/bin/activate && \
+ pip3 install -r requirements.txt
+
+COPY . /app/
+
+RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
+
+# Install extension requirements
+RUN --mount=type=cache,target=/root/.cache/pip,rw \
+ . /app/venv/bin/activate && \
+ for ext in /app/extensions/*/requirements.txt; do \
+ cd "$(dirname "$ext")"; \
+ pip3 install -r requirements.txt; \
+ done
+
+ENV CLI_ARGS=""
+
+EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
+CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ce29f33b25a48921c8ea6a5dbd0c3282f71f832b
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,33 @@
+version: "3.3"
+services:
+ text-generation-webui:
+ build:
+ context: .
+ args:
+ # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
+ TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5}
+ WEBUI_VERSION: ${WEBUI_VERSION:-HEAD}
+ env_file: .env
+ ports:
+ - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
+ - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
+ - "${HOST_API_STREAM_PORT:-5005}:${CONTAINER_API_STREAM_PORT:-5005}"
+ stdin_open: true
+ tty: true
+ volumes:
+ - ./characters:/app/characters
+ - ./extensions:/app/extensions
+ - ./loras:/app/loras
+ - ./models:/app/models
+ - ./presets:/app/presets
+ - ./prompts:/app/prompts
+ - ./softprompts:/app/softprompts
+ - ./training:/app/training
+ - ./cloudflared:/etc/cloudflared
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ device_ids: ['0']
+ capabilities: [gpu]
diff --git a/docs/Audio-Notification.md b/docs/Audio-Notification.md
new file mode 100644
index 0000000000000000000000000000000000000000..3baa5349359257acc6f63d075c3c845adb3f5c12
--- /dev/null
+++ b/docs/Audio-Notification.md
@@ -0,0 +1,14 @@
+# Audio notification
+
+If your computer takes a long time to generate each response for the model that you are using, you can enable an audio notification for when the response is completed. This feature was kindly contributed by HappyWorldGames in [#1277](https://github.com/oobabooga/text-generation-webui/pull/1277).
+
+### Installation
+
+Simply place a file called "notification.mp3" in the same folder as `server.py`. Here you can find some examples:
+
+* https://pixabay.com/sound-effects/search/ding/?duration=0-30
+* https://pixabay.com/sound-effects/search/notification/?duration=0-30
+
+Source: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/1126
+
+This file will be automatically detected the next time you start the web UI.
diff --git a/docs/Chat-mode.md b/docs/Chat-mode.md
new file mode 100644
index 0000000000000000000000000000000000000000..065e6a9a0020e69b4cc0de56e7d1710c6f891961
--- /dev/null
+++ b/docs/Chat-mode.md
@@ -0,0 +1,39 @@
+## Chat characters
+
+Custom chat mode characters are defined by `.yaml` files inside the `characters` folder. An example is included: [Example.yaml](https://github.com/oobabooga/text-generation-webui/blob/main/characters/Example.yaml).
+
+The following fields may be defined:
+
+| Field | Description |
+|-------|-------------|
+| `name` or `bot` | The character's name. |
+| `context` | A string that appears at the top of the prompt. It usually contains a description of the character's personality and a few example messages. |
+| `greeting` (optional) | The character's opening message. It appears when the character is first loaded or when the history is cleared. |
+| `your_name` or `user` (optional) | Your name. This overwrites what you had previously written in the `Your name` field in the interface. |
+
+#### Special tokens
+
+The following replacements happen when the prompt is generated, and they apply to the `context` and `greeting` fields:
+
+* `{{char}}` and `` get replaced with the character's name.
+* `{{user}}` and `` get replaced with your name.
+
+#### How do I add a profile picture for my character?
+
+Put an image with the same name as your character's `.yaml` file into the `characters` folder. For example, if your bot is `Character.yaml`, add `Character.jpg` or `Character.png` to the folder.
+
+#### Is the chat history truncated in the prompt?
+
+Once your prompt reaches the `truncation_length` parameter (2048 by default), old messages will be removed one at a time. The context string will always stay at the top of the prompt and will never get truncated.
+
+## Chat styles
+
+Custom chat styles can be defined in the `text-generation-webui/css` folder. Simply create a new file with name starting in `chat_style-` and ending in `.css` and it will automatically appear in the "Chat style" dropdown menu in the interface. Examples:
+
+```
+chat_style-cai-chat.css
+chat_style-TheEncrypted777.css
+chat_style-wpp.css
+```
+
+You should use the same class names as in `chat_style-cai-chat.css` in your custom style.
\ No newline at end of file
diff --git a/docs/DeepSpeed.md b/docs/DeepSpeed.md
new file mode 100644
index 0000000000000000000000000000000000000000..6170f6819ca072ff50fd1146b64d73f74ab00473
--- /dev/null
+++ b/docs/DeepSpeed.md
@@ -0,0 +1,24 @@
+An alternative way of reducing the GPU memory usage of models is to use the `DeepSpeed ZeRO-3` optimization.
+
+With this, I have been able to load a 6b model (GPT-J 6B) with less than 6GB of VRAM. The speed of text generation is very decent and much better than what would be accomplished with `--auto-devices --gpu-memory 6`.
+
+As far as I know, DeepSpeed is only available for Linux at the moment.
+
+### How to use it
+
+1. Install DeepSpeed:
+
+```
+conda install -c conda-forge mpi4py mpich
+pip install -U deepspeed
+```
+
+2. Start the web UI replacing `python` with `deepspeed --num_gpus=1` and adding the `--deepspeed` flag. Example:
+
+```
+deepspeed --num_gpus=1 server.py --deepspeed --chat --model gpt-j-6B
+```
+
+### Learn more
+
+For more information, check out [this comment](https://github.com/oobabooga/text-generation-webui/issues/40#issuecomment-1412038622) by 81300, who came up with the DeepSpeed support in this web UI.
\ No newline at end of file
diff --git a/docs/Docker.md b/docs/Docker.md
new file mode 100644
index 0000000000000000000000000000000000000000..322dba39a8b2ebcf87932717ddf240101f558ed4
--- /dev/null
+++ b/docs/Docker.md
@@ -0,0 +1,203 @@
+Docker Compose is a way of installing and launching the web UI in an isolated Ubuntu image using only a few commands.
+
+In order to create the image as described in the main README, you must have docker compose 2.17 or higher:
+
+```
+~$ docker compose version
+Docker Compose version v2.17.2
+```
+
+Make sure to also create the necessary symbolic links:
+
+```
+cd text-generation-webui
+ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} .
+cp docker/.env.example .env
+# Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model
+docker compose up --build
+```
+
+# Table of contents
+
+* [Docker Compose installation instructions](#docker-compose-installation-instructions)
+* [Repository with additional Docker files](#dedicated-docker-repository)
+
+# Docker Compose installation instructions
+
+By [@loeken](https://github.com/loeken).
+
+- [Ubuntu 22.04](#ubuntu-2204)
+ - [0. youtube video](#0-youtube-video)
+ - [1. update the drivers](#1-update-the-drivers)
+ - [2. reboot](#2-reboot)
+ - [3. install docker](#3-install-docker)
+ - [4. docker \& container toolkit](#4-docker--container-toolkit)
+ - [5. clone the repo](#5-clone-the-repo)
+ - [6. prepare models](#6-prepare-models)
+ - [7. prepare .env file](#7-prepare-env-file)
+ - [8. startup docker container](#8-startup-docker-container)
+- [Manjaro](#manjaro)
+ - [update the drivers](#update-the-drivers)
+ - [reboot](#reboot)
+ - [docker \& container toolkit](#docker--container-toolkit)
+ - [continue with ubuntu task](#continue-with-ubuntu-task)
+- [Windows](#windows)
+ - [0. youtube video](#0-youtube-video-1)
+ - [1. choco package manager](#1-choco-package-manager)
+ - [2. install drivers/dependencies](#2-install-driversdependencies)
+ - [3. install wsl](#3-install-wsl)
+ - [4. reboot](#4-reboot)
+ - [5. git clone \&\& startup](#5-git-clone--startup)
+ - [6. prepare models](#6-prepare-models-1)
+ - [7. startup](#7-startup)
+- [notes](#notes)
+
+## Ubuntu 22.04
+
+### 0. youtube video
+A video walking you through the setup can be found here:
+
+[![oobabooga text-generation-webui setup in docker on ubuntu 22.04](https://img.youtube.com/vi/ELkKWYh8qOk/0.jpg)](https://www.youtube.com/watch?v=ELkKWYh8qOk)
+
+
+### 1. update the drivers
+in the the “software updater” update drivers to the last version of the prop driver.
+
+### 2. reboot
+to switch using to new driver
+
+### 3. install docker
+```bash
+sudo apt update
+sudo apt-get install curl
+sudo mkdir -m 0755 -p /etc/apt/keyrings
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
+echo \
+ "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
+ "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
+ sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+sudo apt update
+sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y
+sudo usermod -aG docker $USER
+newgrp docker
+```
+
+### 4. docker & container toolkit
+```bash
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \
+sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null
+sudo apt update
+sudo apt install nvidia-docker2 nvidia-container-runtime -y
+sudo systemctl restart docker
+```
+
+### 5. clone the repo
+```
+git clone https://github.com/oobabooga/text-generation-webui
+cd text-generation-webui
+```
+
+### 6. prepare models
+download and place the models inside the models folder. tested with:
+
+4bit
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
+
+8bit:
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
+
+### 7. prepare .env file
+edit .env values to your needs.
+```bash
+cp .env.example .env
+nano .env
+```
+
+### 8. startup docker container
+```bash
+docker compose up --build
+```
+
+## Manjaro
+manjaro/arch is similar to ubuntu just the dependency installation is more convenient
+
+### update the drivers
+```bash
+sudo mhwd -a pci nonfree 0300
+```
+### reboot
+```bash
+reboot
+```
+### docker & container toolkit
+```bash
+yay -S docker docker-compose buildkit gcc nvidia-docker
+sudo usermod -aG docker $USER
+newgrp docker
+sudo systemctl restart docker # required by nvidia-container-runtime
+```
+
+### continue with ubuntu task
+continue at [5. clone the repo](#5-clone-the-repo)
+
+## Windows
+### 0. youtube video
+A video walking you through the setup can be found here:
+[![oobabooga text-generation-webui setup in docker on windows 11](https://img.youtube.com/vi/ejH4w5b5kFQ/0.jpg)](https://www.youtube.com/watch?v=ejH4w5b5kFQ)
+
+### 1. choco package manager
+install package manager (https://chocolatey.org/ )
+```
+Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+```
+
+### 2. install drivers/dependencies
+```
+choco install nvidia-display-driver cuda git docker-desktop
+```
+
+### 3. install wsl
+wsl --install
+
+### 4. reboot
+after reboot enter username/password in wsl
+
+### 5. git clone && startup
+clone the repo and edit .env values to your needs.
+```
+cd Desktop
+git clone https://github.com/oobabooga/text-generation-webui
+cd text-generation-webui
+COPY .env.example .env
+notepad .env
+```
+
+### 6. prepare models
+download and place the models inside the models folder. tested with:
+
+4bit https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617 https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
+
+8bit: https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
+
+### 7. startup
+```
+docker compose up
+```
+
+## notes
+
+on older ubuntus you can manually install the docker compose plugin like this:
+```
+DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker}
+mkdir -p $DOCKER_CONFIG/cli-plugins
+curl -SL https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-linux-x86_64 -o $DOCKER_CONFIG/cli-plugins/docker-compose
+chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose
+export PATH="$HOME/.docker/cli-plugins:$PATH"
+```
+
+# Dedicated docker repository
+
+An external repository maintains a docker wrapper for this project as well as several pre-configured 'one-click' `docker compose` variants (e.g., updated branches of GPTQ). It can be found at: [Atinoda/text-generation-webui-docker](https://github.com/Atinoda/text-generation-webui-docker).
+
diff --git a/docs/ExLlama.md b/docs/ExLlama.md
new file mode 100644
index 0000000000000000000000000000000000000000..db0ebe63c90cf155e8b550e73a542d560ccb0b54
--- /dev/null
+++ b/docs/ExLlama.md
@@ -0,0 +1,22 @@
+# ExLlama
+
+### About
+
+ExLlama is an extremely optimized GPTQ backend for LLaMA models. It features much lower VRAM usage and much higher speeds due to not relying on unoptimized transformers code.
+
+### Usage
+
+Configure text-generation-webui to use exllama via the UI or command line:
+ - In the "Model" tab, set "Loader" to "exllama"
+ - Specify `--loader exllama` on the command line
+
+### Manual setup
+
+No additional installation steps are necessary since an exllama package is already included in the requirements.txt. If this package fails to install for some reason, you can install it manually by cloning the original repository into your `repositories/` folder:
+
+```
+mkdir repositories
+cd repositories
+git clone https://github.com/turboderp/exllama
+```
+
diff --git a/docs/Extensions.md b/docs/Extensions.md
new file mode 100644
index 0000000000000000000000000000000000000000..53acce59095c0addd0a06774113c69c82d1bdd01
--- /dev/null
+++ b/docs/Extensions.md
@@ -0,0 +1,244 @@
+# Extensions
+
+Extensions are defined by files named `script.py` inside subfolders of `text-generation-webui/extensions`. They are loaded at startup if the folder name is specified after the `--extensions` flag.
+
+For instance, `extensions/silero_tts/script.py` gets loaded with `python server.py --extensions silero_tts`.
+
+## [text-generation-webui-extensions](https://github.com/oobabooga/text-generation-webui-extensions)
+
+The repository above contains a directory of user extensions.
+
+If you create an extension, you are welcome to host it in a GitHub repository and submit a PR adding it to the list.
+
+## Built-in extensions
+
+|Extension|Description|
+|---------|-----------|
+|[api](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/api)| Creates an API with two endpoints, one for streaming at `/api/v1/stream` port 5005 and another for blocking at `/api/v1/generate` port 5000. This is the main API for the webui. |
+|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. |
+|[multimodal](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal) | Adds multimodality support (text+images). For a detailed description see [README.md](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal/README.md) in the extension directory. |
+|[google_translate](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/google_translate)| Automatically translates inputs and outputs using Google Translate.|
+|[silero_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/silero_tts)| Text-to-speech extension using [Silero](https://github.com/snakers4/silero-models). When used in chat mode, responses are replaced with an audio widget. |
+|[elevenlabs_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/elevenlabs_tts)| Text-to-speech extension using the [ElevenLabs](https://beta.elevenlabs.io/) API. You need an API key to use it. |
+|[whisper_stt](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/whisper_stt)| Allows you to enter your inputs in chat mode using your microphone. |
+|[sd_api_pictures](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures)| Allows you to request pictures from the bot in chat mode, which will be generated using the AUTOMATIC1111 Stable Diffusion API. See examples [here](https://github.com/oobabooga/text-generation-webui/pull/309). |
+|[character_bias](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/character_bias)| Just a very simple example that adds a hidden string at the beginning of the bot's reply in chat mode. |
+|[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
+|[gallery](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/gallery/)| Creates a gallery with the chat characters and their pictures. |
+|[superbooga](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superbooga)| An extension that uses ChromaDB to create an arbitrarily large pseudocontext, taking as input text files, URLs, or pasted text. Based on https://github.com/kaiokendev/superbig. |
+|[ngrok](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/ngrok)| Allows you to access the web UI remotely using the ngrok reverse tunnel service (free). It's an alternative to the built-in Gradio `--share` feature. |
+|[perplexity_colors](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/perplexity_colors)| Colors each token in the output text by its associated probability, as derived from the model logits. |
+
+## How to write an extension
+
+The extensions framework is based on special functions and variables that you can define in `script.py`. The functions are the following:
+
+| Function | Description |
+|-------------|-------------|
+| `def setup()` | Is executed when the extension gets imported. |
+| `def ui()` | Creates custom gradio elements when the UI is launched. |
+| `def custom_css()` | Returns custom CSS as a string. It is applied whenever the web UI is loaded. |
+| `def custom_js()` | Same as above but for javascript. |
+| `def input_modifier(string, state, is_chat=False)` | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. |
+| `def output_modifier(string, state, is_chat=False)` | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. |
+| `def chat_input_modifier(text, visible_text, state)` | Modifies both the visible and internal inputs in chat mode. Can be used to hijack the chat input with custom content. |
+| `def bot_prefix_modifier(string, state)` | Applied in chat mode to the prefix for the bot's reply. |
+| `def state_modifier(state)` | Modifies the dictionary containing the UI input parameters before it is used by the text generation functions. |
+| `def history_modifier(history)` | Modifies the chat history before the text generation in chat mode begins. |
+| `def custom_generate_reply(...)` | Overrides the main text generation function. |
+| `def custom_generate_chat_prompt(...)` | Overrides the prompt generator in chat mode. |
+| `def tokenizer_modifier(state, prompt, input_ids, input_embeds)` | Modifies the `input_ids`/`input_embeds` fed to the model. Should return `prompt`, `input_ids`, `input_embeds`. See the `multimodal` extension for an example. |
+| `def custom_tokenized_length(prompt)` | Used in conjunction with `tokenizer_modifier`, returns the length in tokens of `prompt`. See the `multimodal` extension for an example. |
+
+Additionally, you can define a special `params` dictionary. In it, the `display_name` key is used to define the displayed name of the extension in the UI, and the `is_tab` key is used to define whether the extension should appear in a new tab. By default, extensions appear at the bottom of the "Text generation" tab.
+
+Example:
+
+```python
+params = {
+ "display_name": "Google Translate",
+ "is_tab": True,
+}
+```
+
+The `params` dict may also contain variables that you want to be customizable through a `settings.yaml` file. For instance, assuming the extension is in `extensions/google_translate`, the variable `language string` in
+
+```python
+params = {
+ "display_name": "Google Translate",
+ "is_tab": True,
+ "language string": "jp"
+}
+```
+
+can be customized by adding a key called `google_translate-language string` to `settings.yaml`:
+
+```python
+google_translate-language string: 'fr'
+```
+
+That is, the syntax for the key is `extension_name-variable_name`.
+
+## Using multiple extensions at the same time
+
+You can activate more than one extension at a time by providing their names separated by spaces after `--extensions`. The input, output, and bot prefix modifiers will be applied in the specified order.
+
+Example:
+
+```
+python server.py --extensions enthusiasm translate # First apply enthusiasm, then translate
+python server.py --extensions translate enthusiasm # First apply translate, then enthusiasm
+```
+
+Do note, that for:
+- `custom_generate_chat_prompt`
+- `custom_generate_reply`
+- `custom_tokenized_length`
+
+only the first declaration encountered will be used and the rest will be ignored.
+
+## A full example
+
+The source code below can be found at [extensions/example/script.py](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/example/script.py).
+
+```python
+"""
+An example of extension. It does nothing, but you can add transformations
+before the return statements to customize the webui behavior.
+
+Starting from history_modifier and ending in output_modifier, the
+functions are declared in the same order that they are called at
+generation time.
+"""
+
+import gradio as gr
+import torch
+from transformers import LogitsProcessor
+
+from modules import chat, shared
+from modules.text_generation import (
+ decode,
+ encode,
+ generate_reply,
+)
+
+params = {
+ "display_name": "Example Extension",
+ "is_tab": False,
+}
+
+class MyLogits(LogitsProcessor):
+ """
+ Manipulates the probabilities for the next token before it gets sampled.
+ Used in the logits_processor_modifier function below.
+ """
+ def __init__(self):
+ pass
+
+ def __call__(self, input_ids, scores):
+ # probs = torch.softmax(scores, dim=-1, dtype=torch.float)
+ # probs[0] /= probs[0].sum()
+ # scores = torch.log(probs / (1 - probs))
+ return scores
+
+def history_modifier(history):
+ """
+ Modifies the chat history.
+ Only used in chat mode.
+ """
+ return history
+
+def state_modifier(state):
+ """
+ Modifies the state variable, which is a dictionary containing the input
+ values in the UI like sliders and checkboxes.
+ """
+ return state
+
+def chat_input_modifier(text, visible_text, state):
+ """
+ Modifies the user input string in chat mode (visible_text).
+ You can also modify the internal representation of the user
+ input (text) to change how it will appear in the prompt.
+ """
+ return text, visible_text
+
+def input_modifier(string, state, is_chat=False):
+ """
+ In default/notebook modes, modifies the whole prompt.
+
+ In chat mode, it is the same as chat_input_modifier but only applied
+ to "text", here called "string", and not to "visible_text".
+ """
+ return string
+
+def bot_prefix_modifier(string, state):
+ """
+ Modifies the prefix for the next bot reply in chat mode.
+ By default, the prefix will be something like "Bot Name:".
+ """
+ return string
+
+def tokenizer_modifier(state, prompt, input_ids, input_embeds):
+ """
+ Modifies the input ids and embeds.
+ Used by the multimodal extension to put image embeddings in the prompt.
+ Only used by loaders that use the transformers library for sampling.
+ """
+ return prompt, input_ids, input_embeds
+
+def logits_processor_modifier(processor_list, input_ids):
+ """
+ Adds logits processors to the list, allowing you to access and modify
+ the next token probabilities.
+ Only used by loaders that use the transformers library for sampling.
+ """
+ processor_list.append(MyLogits())
+ return processor_list
+
+def output_modifier(string, state, is_chat=False):
+ """
+ Modifies the LLM output before it gets presented.
+
+ In chat mode, the modified version goes into history['visible'],
+ and the original version goes into history['internal'].
+ """
+ return string
+
+def custom_generate_chat_prompt(user_input, state, **kwargs):
+ """
+ Replaces the function that generates the prompt from the chat history.
+ Only used in chat mode.
+ """
+ result = chat.generate_chat_prompt(user_input, state, **kwargs)
+ return result
+
+def custom_css():
+ """
+ Returns a CSS string that gets appended to the CSS for the webui.
+ """
+ return ''
+
+def custom_js():
+ """
+ Returns a javascript string that gets appended to the javascript
+ for the webui.
+ """
+ return ''
+
+def setup():
+ """
+ Gets executed only once, when the extension is imported.
+ """
+ pass
+
+def ui():
+ """
+ Gets executed when the UI is drawn. Custom gradio elements and
+ their corresponding event handlers should be defined here.
+
+ To learn about gradio components, check out the docs:
+ https://gradio.app/docs/
+ """
+ pass
+```
diff --git a/docs/GPTQ-models-(4-bit-mode).md b/docs/GPTQ-models-(4-bit-mode).md
new file mode 100644
index 0000000000000000000000000000000000000000..730e8324323e58f36ea5dcd18567122e3be96ab6
--- /dev/null
+++ b/docs/GPTQ-models-(4-bit-mode).md
@@ -0,0 +1,182 @@
+GPTQ is a clever quantization algorithm that lightly reoptimizes the weights during quantization so that the accuracy loss is compensated relative to a round-to-nearest quantization. See the paper for more details: https://arxiv.org/abs/2210.17323
+
+4-bit GPTQ models reduce VRAM usage by about 75%. So LLaMA-7B fits into a 6GB GPU, and LLaMA-30B fits into a 24GB GPU.
+
+## Overview
+
+There are two ways of loading GPTQ models in the web UI at the moment:
+
+* Using AutoGPTQ:
+ * supports more models
+ * standardized (no need to guess any parameter)
+ * is a proper Python library
+ * ~no wheels are presently available so it requires manual compilation~
+ * supports loading both triton and cuda models
+
+* Using GPTQ-for-LLaMa directly:
+ * faster CPU offloading
+ * faster multi-GPU inference
+ * supports loading LoRAs using a monkey patch
+ * requires you to manually figure out the wbits/groupsize/model_type parameters for the model to be able to load it
+ * supports either only cuda or only triton depending on the branch
+
+For creating new quantizations, I recommend using AutoGPTQ: https://github.com/PanQiWei/AutoGPTQ
+
+## AutoGPTQ
+
+### Installation
+
+No additional steps are necessary as AutoGPTQ is already in the `requirements.txt` for the webui. If you still want or need to install it manually for whatever reason, these are the commands:
+
+```
+conda activate textgen
+git clone https://github.com/PanQiWei/AutoGPTQ.git && cd AutoGPTQ
+pip install .
+```
+
+The last command requires `nvcc` to be installed (see the [instructions above](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#step-1-install-nvcc)).
+
+### Usage
+
+When you quantize a model using AutoGPTQ, a folder containing a filed called `quantize_config.json` will be generated. Place that folder inside your `models/` folder and load it with the `--autogptq` flag:
+
+```
+python server.py --autogptq --model model_name
+```
+
+Alternatively, check the `autogptq` box in the "Model" tab of the UI before loading the model.
+
+### Offloading
+
+In order to do CPU offloading or multi-gpu inference with AutoGPTQ, use the `--gpu-memory` flag. It is currently somewhat slower than offloading with the `--pre_layer` option in GPTQ-for-LLaMA.
+
+For CPU offloading:
+
+```
+python server.py --autogptq --gpu-memory 3000MiB --model model_name
+```
+
+For multi-GPU inference:
+
+```
+python server.py --autogptq --gpu-memory 3000MiB 6000MiB --model model_name
+```
+
+### Using LoRAs with AutoGPTQ
+
+Works fine for a single LoRA.
+
+## GPTQ-for-LLaMa
+
+GPTQ-for-LLaMa is the original adaptation of GPTQ for the LLaMA model. It was made possible by [@qwopqwop200](https://github.com/qwopqwop200/GPTQ-for-LLaMa): https://github.com/qwopqwop200/GPTQ-for-LLaMa
+
+A Python package containing both major CUDA versions of GPTQ-for-LLaMa is used to simplify installation and compatibility: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA
+
+### Precompiled wheels
+
+Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases
+
+Wheels are included in requirements.txt and are installed with the webui on supported systems.
+
+### Manual installation
+
+#### Step 1: install nvcc
+
+```
+conda activate textgen
+conda install cuda -c nvidia/label/cuda-11.7.1
+```
+
+The command above takes some 10 minutes to run and shows no progress bar or updates along the way.
+
+You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough. On Windows, Visual Studio or Visual Studio Build Tools is required.
+
+If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+) on Linux, you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise.
+
+#### Step 2: compile the CUDA extensions
+
+```
+python -m pip install git+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA -v
+```
+
+### Getting pre-converted LLaMA weights
+
+* Direct download (recommended):
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-7B-4bit-128g
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-13B-4bit-128g
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-30B-4bit-128g
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-65B-4bit-128g
+
+These models were converted with `desc_act=True`. They work just fine with ExLlama. For AutoGPTQ, they will only work on Linux with the `triton` option checked.
+
+* Torrent:
+
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617
+
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105
+
+These models were converted with `desc_act=False`. As such, they are less accurate, but they work with AutoGPTQ on Windows. The `128g` versions are better from 13b upwards, and worse for 7b. The tokenizer files in the torrents are outdated, in particular the files called `tokenizer_config.json` and `special_tokens_map.json`. Here you can find those files: https://huggingface.co/oobabooga/llama-tokenizer
+
+### Starting the web UI:
+
+Use the `--gptq-for-llama` flag.
+
+For the models converted without `group-size`:
+
+```
+python server.py --model llama-7b-4bit --gptq-for-llama
+```
+
+For the models converted with `group-size`:
+
+```
+python server.py --model llama-13b-4bit-128g --gptq-for-llama --wbits 4 --groupsize 128
+```
+
+The command-line flags `--wbits` and `--groupsize` are automatically detected based on the folder names in many cases.
+
+### CPU offloading
+
+It is possible to offload part of the layers of the 4-bit model to the CPU with the `--pre_layer` flag. The higher the number after `--pre_layer`, the more layers will be allocated to the GPU.
+
+With this command, I can run llama-7b with 4GB VRAM:
+
+```
+python server.py --model llama-7b-4bit --pre_layer 20
+```
+
+This is the performance:
+
+```
+Output generated in 123.79 seconds (1.61 tokens/s, 199 tokens)
+```
+
+You can also use multiple GPUs with `pre_layer` if using the oobabooga fork of GPTQ, eg `--pre_layer 30 60` will load a LLaMA-30B model half onto your first GPU and half onto your second, or `--pre_layer 20 40` will load 20 layers onto GPU-0, 20 layers onto GPU-1, and 20 layers offloaded to CPU.
+
+### Using LoRAs with GPTQ-for-LLaMa
+
+This requires using a monkey patch that is supported by this web UI: https://github.com/johnsmith0031/alpaca_lora_4bit
+
+To use it:
+
+1. Install alpaca_lora_4bit using pip
+
+```
+git clone https://github.com/johnsmith0031/alpaca_lora_4bit.git
+cd alpaca_lora_4bit
+git fetch origin winglian-setup_pip
+git checkout winglian-setup_pip
+pip install .
+```
+
+2. Start the UI with the `--monkey-patch` flag:
+
+```
+python server.py --model llama-7b-4bit-128g --listen --lora tloen_alpaca-lora-7b --monkey-patch
+```
+
+
diff --git a/docs/Generation-Parameters.md b/docs/Generation-Parameters.md
new file mode 100644
index 0000000000000000000000000000000000000000..301fb26fd57a267d7c8ff1e5056c5ce8eeb68cfb
--- /dev/null
+++ b/docs/Generation-Parameters.md
@@ -0,0 +1,71 @@
+# Generation Parameters
+
+For a technical description of the parameters, the [transformers documentation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig) is a good reference.
+
+The best presets, according to the [Preset Arena](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md) experiment, are:
+
+**Instruction following:**
+
+1) Divine Intellect
+2) Big O
+3) simple-1
+4) Space Alien
+5) StarChat
+6) Titanic
+7) tfs-with-top-a
+8) Asterism
+9) Contrastive Search
+
+**Chat:**
+
+1) Midnight Enigma
+2) Yara
+3) Shortwave
+
+### Temperature
+
+Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness.
+
+### top_p
+
+If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results.
+
+### top_k
+
+Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results.
+
+### typical_p
+
+If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text.
+
+### epsilon_cutoff
+
+In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0.
+
+### eta_cutoff
+
+In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0.
+
+### repetition_penalty
+
+Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition.
+
+### repetition_penalty_range
+
+The number of most recent tokens to consider for repetition penalty. 0 makes all tokens be used.
+
+### encoder_repetition_penalty
+
+Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge.
+
+### no_repeat_ngram_size
+
+If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases.
+
+### min_length
+
+Minimum generation length in tokens.
+
+### penalty_alpha
+
+Contrastive Search is enabled by setting this to greater than zero and unchecking "do_sample". It should be used with a low value of top_k, for instance, top_k = 4.
diff --git a/docs/LLaMA-model.md b/docs/LLaMA-model.md
new file mode 100644
index 0000000000000000000000000000000000000000..ba7350f59c54c8ad821619cef2207763b09b3ef3
--- /dev/null
+++ b/docs/LLaMA-model.md
@@ -0,0 +1,56 @@
+LLaMA is a Large Language Model developed by Meta AI.
+
+It was trained on more tokens than previous models. The result is that the smallest version with 7 billion parameters has similar performance to GPT-3 with 175 billion parameters.
+
+This guide will cover usage through the official `transformers` implementation. For 4-bit mode, head over to [GPTQ models (4 bit mode)
+](GPTQ-models-(4-bit-mode).md).
+
+## Getting the weights
+
+### Option 1: pre-converted weights
+
+* Direct download (recommended):
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-7B-HF
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-13B-HF
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-30B-HF
+
+https://huggingface.co/Neko-Institute-of-Science/LLaMA-65B-HF
+
+* Torrent:
+
+https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789
+
+The tokenizer files in the torrent above are outdated, in particular the files called `tokenizer_config.json` and `special_tokens_map.json`. Here you can find those files: https://huggingface.co/oobabooga/llama-tokenizer
+
+### Option 2: convert the weights yourself
+
+1. Install the `protobuf` library:
+
+```
+pip install protobuf==3.20.1
+```
+
+2. Use the script below to convert the model in `.pth` format that you, a fellow academic, downloaded using Meta's official link.
+
+If you have `transformers` installed in place:
+
+```
+python -m transformers.models.llama.convert_llama_weights_to_hf --input_dir /path/to/LLaMA --model_size 7B --output_dir /tmp/outputs/llama-7b
+```
+
+Otherwise download [convert_llama_weights_to_hf.py](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py) first and run:
+
+```
+python convert_llama_weights_to_hf.py --input_dir /path/to/LLaMA --model_size 7B --output_dir /tmp/outputs/llama-7b
+```
+
+3. Move the `llama-7b` folder inside your `text-generation-webui/models` folder.
+
+## Starting the web UI
+
+```python
+python server.py --model llama-7b
+```
diff --git a/docs/LLaMA-v2-model.md b/docs/LLaMA-v2-model.md
new file mode 100644
index 0000000000000000000000000000000000000000..55c6aa76e9c90963ba1cbf75b5528e6b23c70f18
--- /dev/null
+++ b/docs/LLaMA-v2-model.md
@@ -0,0 +1,35 @@
+# LLaMA-v2
+
+To convert LLaMA-v2 from the `.pth` format provided by Meta to transformers format, follow the steps below:
+
+1) `cd` into your `llama` folder (the one containing `download.sh` and the models that you downloaded):
+
+```
+cd llama
+```
+
+2) Clone the transformers library:
+
+```
+git clone 'https://github.com/huggingface/transformers'
+
+```
+
+3) Create symbolic links from the downloaded folders to names that the conversion script can recognize:
+
+```
+ln -s llama-2-7b 7B
+ln -s llama-2-13b 13B
+```
+
+4) Do the conversions:
+
+```
+mkdir llama-2-7b-hf llama-2-13b-hf
+python ./transformers/src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir . --model_size 7B --output_dir llama-2-7b-hf --safe_serialization true
+python ./transformers/src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir . --model_size 13B --output_dir llama-2-13b-hf --safe_serialization true
+```
+
+5) Move the output folders inside `text-generation-webui/models`
+
+6) Have fun
diff --git a/docs/LoRA.md b/docs/LoRA.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1504d1096c44227e8c510fce4bcaa6254849cb0
--- /dev/null
+++ b/docs/LoRA.md
@@ -0,0 +1,71 @@
+# LoRA
+
+LoRA (Low-Rank Adaptation) is an extremely powerful method for customizing a base model by training only a small number of parameters. They can be attached to models at runtime.
+
+For instance, a 50mb LoRA can teach LLaMA an entire new language, a given writing style, or give it instruction-following or chat abilities.
+
+This is the current state of LoRA integration in the web UI:
+
+|Loader | Status |
+|--------|------|
+| Transformers | Full support in 16-bit, `--load-in-8bit`, `--load-in-4bit`, and CPU modes. |
+| ExLlama | Single LoRA support. Fast to remove the LoRA afterwards. |
+| AutoGPTQ | Single LoRA support. Removing the LoRA requires reloading the entire model.|
+| GPTQ-for-LLaMa | Full support with the [monkey patch](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#using-loras-with-gptq-for-llama). |
+
+## Downloading a LoRA
+
+The download script can be used. For instance:
+
+```
+python download-model.py tloen/alpaca-lora-7b
+```
+
+The files will be saved to `loras/tloen_alpaca-lora-7b`.
+
+## Using the LoRA
+
+The `--lora` command-line flag can be used. Examples:
+
+```
+python server.py --model llama-7b-hf --lora tloen_alpaca-lora-7b
+python server.py --model llama-7b-hf --lora tloen_alpaca-lora-7b --load-in-8bit
+python server.py --model llama-7b-hf --lora tloen_alpaca-lora-7b --load-in-4bit
+python server.py --model llama-7b-hf --lora tloen_alpaca-lora-7b --cpu
+```
+
+Instead of using the `--lora` command-line flag, you can also select the LoRA in the "Parameters" tab of the interface.
+
+## Prompt
+For the Alpaca LoRA in particular, the prompt must be formatted like this:
+
+```
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+### Instruction:
+Write a Python script that generates text using the transformers library.
+### Response:
+```
+
+Sample output:
+
+```
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+### Instruction:
+Write a Python script that generates text using the transformers library.
+### Response:
+
+import transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+model = AutoModelForCausalLM.from_pretrained("bert-base-uncased")
+texts = ["Hello world", "How are you"]
+for sentence in texts:
+sentence = tokenizer(sentence)
+print(f"Generated {len(sentence)} tokens from '{sentence}'")
+output = model(sentences=sentence).predict()
+print(f"Predicted {len(output)} tokens for '{sentence}':\n{output}")
+```
+
+## Training a LoRA
+
+You can train your own LoRAs from the `Training` tab. See [Training LoRAs](Training-LoRAs.md) for details.
diff --git a/docs/Low-VRAM-guide.md b/docs/Low-VRAM-guide.md
new file mode 100644
index 0000000000000000000000000000000000000000..7814ecb0c3bc604e8eaa6545b5f83be7f5bdb519
--- /dev/null
+++ b/docs/Low-VRAM-guide.md
@@ -0,0 +1,53 @@
+If you GPU is not large enough to fit a 16-bit model, try these in the following order:
+
+### Load the model in 8-bit mode
+
+```
+python server.py --load-in-8bit
+```
+
+### Load the model in 4-bit mode
+
+```
+python server.py --load-in-4bit
+```
+
+### Split the model across your GPU and CPU
+
+```
+python server.py --auto-devices
+```
+
+If you can load the model with this command but it runs out of memory when you try to generate text, try increasingly limiting the amount of memory allocated to the GPU until the error stops happening:
+
+```
+python server.py --auto-devices --gpu-memory 10
+python server.py --auto-devices --gpu-memory 9
+python server.py --auto-devices --gpu-memory 8
+...
+```
+
+where the number is in GiB.
+
+For finer control, you can also specify the unit in MiB explicitly:
+
+```
+python server.py --auto-devices --gpu-memory 8722MiB
+python server.py --auto-devices --gpu-memory 4725MiB
+python server.py --auto-devices --gpu-memory 3500MiB
+...
+```
+
+### Send layers to a disk cache
+
+As a desperate last measure, you can split the model across your GPU, CPU, and disk:
+
+```
+python server.py --auto-devices --disk
+```
+
+With this, I am able to load a 30b model into my RTX 3090, but it takes 10 seconds to generate 1 word.
+
+### DeepSpeed (experimental)
+
+An experimental alternative to all of the above is to use DeepSpeed: [guide](DeepSpeed.md).
diff --git a/docs/One-Click-Installers.md b/docs/One-Click-Installers.md
new file mode 100644
index 0000000000000000000000000000000000000000..1597f484ef8a15e237259b8c8a92c20c86abcfa0
--- /dev/null
+++ b/docs/One-Click-Installers.md
@@ -0,0 +1,72 @@
+# Additional one-click installers info
+
+## Installing nvcc
+
+If you have an NVIDIA GPU and ever need to compile something, like ExLlamav2 (that currently doesn't have pre-built wheels), you can install `nvcc` by running the `cmd_` script for your OS and entering this command:
+
+```
+conda install cuda -c nvidia/label/cuda-11.7.1
+```
+
+## Using an AMD GPU in Linux
+
+Requires ROCm SDK 5.4.2 or 5.4.3 to be installed. Some systems may also
+need: sudo apt-get install libstdc++-12-dev
+
+Edit the "one_click.py" script using a text editor and un-comment and
+modify the lines near the top of the script according to your setup. In
+particular, modify the os.environ["ROCM_PATH"] = '/opt/rocm' line to
+point to your ROCm installation.
+
+## WSL instructions
+
+If you do not have WSL installed, see here:
+https://learn.microsoft.com/en-us/windows/wsl/install
+
+If you want to install Linux to a drive other than C
+Open powershell and enter these commands:
+
+cd D:\Path\To\Linux
+$ProgressPreference = 'SilentlyContinue'
+Invoke-WebRequest -Uri -OutFile Linux.appx -UseBasicParsing
+mv Linux.appx Linux.zip
+
+Then open Linux.zip and you should see several .appx files inside.
+The one with _x64.appx contains the exe installer that you need.
+Extract the contents of that _x64.appx file and run .exe to install.
+
+Linux Distro URLs:
+https://learn.microsoft.com/en-us/windows/wsl/install-manual#downloading-distributions
+
+******************************************************************************
+*ENSURE THAT THE WSL LINUX DISTRO THAT YOU WISH TO USE IS SET AS THE DEFAULT!*
+******************************************************************************
+
+Do this by using these commands:
+wsl -l
+wsl -s
+
+### Web UI Installation
+
+Run the "start" script. By default it will install the web UI in WSL:
+/home/{username}/text-gen-install
+
+To launch the web UI in the future after it is already installed, run
+the same "start" script. Ensure that one_click.py and wsl.sh are next to it!
+
+### Updating the web UI
+
+As an alternative to running the "update" script, you can also run "wsl.sh update" in WSL.
+
+### Running an interactive shell
+
+As an alternative to running the "cmd" script, you can also run "wsl.sh cmd" in WSL.
+
+### Changing the default install location
+
+To change this, you will need to edit the scripts as follows:
+wsl.sh: line ~22 INSTALL_DIR="/path/to/install/dir"
+
+Keep in mind that there is a long-standing bug in WSL that significantly
+slows drive read/write speeds when using a physical drive as opposed to
+the virtual one that Linux is installed in.
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6ab8d213e97d0a9a62a244ca1d14fd130a48fe23
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,21 @@
+# text-generation-webui documentation
+
+## Table of contents
+
+* [Audio Notification](Audio-Notification.md)
+* [Chat mode](Chat-mode.md)
+* [DeepSpeed](DeepSpeed.md)
+* [Docker](Docker.md)
+* [ExLlama](ExLlama.md)
+* [Extensions](Extensions.md)
+* [GPTQ models (4 bit mode)](GPTQ-models-(4-bit-mode).md)
+* [LLaMA model](LLaMA-model.md)
+* [llama.cpp](llama.cpp.md)
+* [LoRA](LoRA.md)
+* [Low VRAM guide](Low-VRAM-guide.md)
+* [RWKV model](RWKV-model.md)
+* [Spell book](Spell-book.md)
+* [System requirements](System-requirements.md)
+* [Training LoRAs](Training-LoRAs.md)
+* [Windows installation guide](Windows-installation-guide.md)
+* [WSL installation guide](WSL-installation-guide.md)
diff --git a/docs/RWKV-model.md b/docs/RWKV-model.md
new file mode 100644
index 0000000000000000000000000000000000000000..88f13fa56e0567bf3442b21c1d2a1cdd56d29647
--- /dev/null
+++ b/docs/RWKV-model.md
@@ -0,0 +1,72 @@
+> RWKV: RNN with Transformer-level LLM Performance
+>
+> It combines the best of RNN and transformer - great performance, fast inference, saves VRAM, fast training, "infinite" ctx_len, and free sentence embedding (using the final hidden state).
+
+https://github.com/BlinkDL/RWKV-LM
+
+https://github.com/BlinkDL/ChatRWKV
+
+## Using RWKV in the web UI
+
+### Hugging Face weights
+
+Simply download the weights from https://huggingface.co/RWKV and load them as you would for any other model.
+
+There is a bug in transformers==4.29.2 that prevents RWKV from being loaded in 8-bit mode. You can install the dev branch to solve this bug: `pip install git+https://github.com/huggingface/transformers`
+
+### Original .pth weights
+
+The instructions below are from before RWKV was supported in transformers, and they are kept for legacy purposes. The old implementation is possibly faster, but it lacks the full range of samplers that the transformers library offers.
+
+#### 0. Install the RWKV library
+
+```
+pip install rwkv
+```
+
+`0.7.3` was the last version that I tested. If you experience any issues, try ```pip install rwkv==0.7.3```.
+
+#### 1. Download the model
+
+It is available in different sizes:
+
+* https://huggingface.co/BlinkDL/rwkv-4-pile-3b/
+* https://huggingface.co/BlinkDL/rwkv-4-pile-7b/
+* https://huggingface.co/BlinkDL/rwkv-4-pile-14b/
+
+There are also older releases with smaller sizes like:
+
+* https://huggingface.co/BlinkDL/rwkv-4-pile-169m/resolve/main/RWKV-4-Pile-169M-20220807-8023.pth
+
+Download the chosen `.pth` and put it directly in the `models` folder.
+
+#### 2. Download the tokenizer
+
+[20B_tokenizer.json](https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/v2/20B_tokenizer.json)
+
+Also put it directly in the `models` folder. Make sure to not rename it. It should be called `20B_tokenizer.json`.
+
+#### 3. Launch the web UI
+
+No additional steps are required. Just launch it as you would with any other model.
+
+```
+python server.py --listen --no-stream --model RWKV-4-Pile-169M-20220807-8023.pth
+```
+
+#### Setting a custom strategy
+
+It is possible to have very fine control over the offloading and precision for the model with the `--rwkv-strategy` flag. Possible values include:
+
+```
+"cpu fp32" # CPU mode
+"cuda fp16" # GPU mode with float16 precision
+"cuda fp16 *30 -> cpu fp32" # GPU+CPU offloading. The higher the number after *, the higher the GPU allocation.
+"cuda fp16i8" # GPU mode with 8-bit precision
+```
+
+See the README for the PyPl package for more details: https://pypi.org/project/rwkv/
+
+#### Compiling the CUDA kernel
+
+You can compile the CUDA kernel for the model with `--rwkv-cuda-on`. This should improve the performance a lot but I haven't been able to get it to work yet.
diff --git a/docs/Spell-book.md b/docs/Spell-book.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b7c76c953f76f8a486bbe5156de4e9ebb3f0ec0
--- /dev/null
+++ b/docs/Spell-book.md
@@ -0,0 +1,107 @@
+You have now entered a hidden corner of the internet.
+
+A confusing yet intriguing realm of paradoxes and contradictions.
+
+A place where you will find out that what you thought you knew, you in fact didn't know, and what you didn't know was in front of you all along.
+
+![](https://i.pinimg.com/originals/6e/e2/7b/6ee27bad351d3aca470d80f1033ba9c6.jpg)
+
+*In other words, here I will document little-known facts about this web UI that I could not find another place for in the wiki.*
+
+#### You can train LoRAs in CPU mode
+
+Load the web UI with
+
+```
+python server.py --cpu
+```
+
+and start training the LoRA from the training tab as usual.
+
+#### 8-bit mode works with CPU offloading
+
+```
+python server.py --load-in-8bit --gpu-memory 4000MiB
+```
+
+#### `--pre_layer`, and not `--gpu-memory`, is the right way to do CPU offloading with 4-bit models
+
+```
+python server.py --wbits 4 --groupsize 128 --pre_layer 20
+```
+
+#### Models can be loaded in 32-bit, 16-bit, 8-bit, and 4-bit modes
+
+```
+python server.py --cpu
+python server.py
+python server.py --load-in-8bit
+python server.py --wbits 4
+```
+
+#### The web UI works with any version of GPTQ-for-LLaMa
+
+Including the up to date triton and cuda branches. But you have to delete the `repositories/GPTQ-for-LLaMa` folder and reinstall the new one every time:
+
+```
+cd text-generation-webui/repositories
+rm -r GPTQ-for-LLaMa
+pip uninstall quant-cuda
+git clone https://github.com/oobabooga/GPTQ-for-LLaMa -b cuda # or any other repository and branch
+cd GPTQ-for-LLaMa
+python setup_cuda.py install
+```
+
+#### Instruction-following templates are represented as chat characters
+
+https://github.com/oobabooga/text-generation-webui/tree/main/characters/instruction-following
+
+#### The right way to run Alpaca, Open Assistant, Vicuna, etc is Instruct mode, not normal chat mode
+
+Otherwise the prompt will not be formatted correctly.
+
+1. Start the web UI with
+
+```
+python server.py --chat
+```
+
+2. Click on the "instruct" option under "Chat modes"
+
+3. Select the correct template in the hidden dropdown menu that will become visible.
+
+#### Notebook mode is best mode
+
+Ascended individuals have realized that notebook mode is the superset of chat mode and can do chats with ultimate flexibility, including group chats, editing replies, starting a new bot reply in a given way, and impersonating.
+
+#### RWKV is a RNN
+
+Most models are transformers, but not RWKV, which is a RNN. It's a great model.
+
+#### `--gpu-memory` is not a hard limit on the GPU memory
+
+It is simply a parameter that is passed to the `accelerate` library while loading the model. More memory will be allocated during generation. That's why this parameter has to be set to less than your total GPU memory.
+
+#### Contrastive search perhaps the best preset
+
+But it uses a ton of VRAM.
+
+#### You can check the sha256sum of downloaded models with the download script
+
+```
+python download-model.py facebook/galactica-125m --check
+```
+
+#### The download script continues interrupted downloads by default
+
+It doesn't start over.
+
+#### You can download models with multiple threads
+
+```
+python download-model.py facebook/galactica-125m --threads 8
+```
+
+#### LoRAs work in 4-bit mode
+
+You need to follow [these instructions](GPTQ-models-(4-bit-mode).md#using-loras-in-4-bit-mode) and then start the web UI with the `--monkey-patch` flag.
diff --git a/docs/System-requirements.md b/docs/System-requirements.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a88416d34ad7c8babd90a81db902e95288a8197
--- /dev/null
+++ b/docs/System-requirements.md
@@ -0,0 +1,42 @@
+These are the VRAM and RAM requirements (in MiB) to run some examples of models **in 16-bit (default) precision**:
+
+| model | VRAM (GPU) | RAM |
+|:-----------------------|-------------:|--------:|
+| arxiv_ai_gpt2 | 1512.37 | 5824.2 |
+| blenderbot-1B-distill | 2441.75 | 4425.91 |
+| opt-1.3b | 2509.61 | 4427.79 |
+| gpt-neo-1.3b | 2605.27 | 5851.58 |
+| opt-2.7b | 5058.05 | 4863.95 |
+| gpt4chan_model_float16 | 11653.7 | 4437.71 |
+| gpt-j-6B | 11653.7 | 5633.79 |
+| galactica-6.7b | 12697.9 | 4429.89 |
+| opt-6.7b | 12700 | 4368.66 |
+| bloomz-7b1-p3 | 13483.1 | 4470.34 |
+
+#### GPU mode with 8-bit precision
+
+Allows you to load models that would not normally fit into your GPU. Enabled by default for 13b and 20b models in this web UI.
+
+| model | VRAM (GPU) | RAM |
+|:---------------|-------------:|--------:|
+| opt-13b | 12528.1 | 1152.39 |
+| gpt-neox-20b | 20384 | 2291.7 |
+
+#### CPU mode (32-bit precision)
+
+A lot slower, but does not require a GPU.
+
+On my i5-12400F, 6B models take around 10-20 seconds to respond in chat mode, and around 5 minutes to generate a 200 tokens completion.
+
+| model | RAM |
+|:-----------------------|---------:|
+| arxiv_ai_gpt2 | 4430.82 |
+| gpt-neo-1.3b | 6089.31 |
+| opt-1.3b | 8411.12 |
+| blenderbot-1B-distill | 8508.16 |
+| opt-2.7b | 14969.3 |
+| bloomz-7b1-p3 | 21371.2 |
+| gpt-j-6B | 24200.3 |
+| gpt4chan_model | 24246.3 |
+| galactica-6.7b | 26561.4 |
+| opt-6.7b | 29596.6 |
diff --git a/docs/Training-LoRAs.md b/docs/Training-LoRAs.md
new file mode 100644
index 0000000000000000000000000000000000000000..83e6d5a7251eea080cd7dfe8d19a2e42d6d3a822
--- /dev/null
+++ b/docs/Training-LoRAs.md
@@ -0,0 +1,174 @@
+## Training Your Own LoRAs
+
+The WebUI seeks to make training your own LoRAs as easy as possible. It comes down to just a few simple steps:
+
+### **Step 1**: Make a plan.
+- What base model do you want to use? The LoRA you make has to be matched up to a single architecture (eg LLaMA-13B) and cannot be transferred to others (eg LLaMA-7B, StableLM, etc. would all be different). Derivatives of the same model (eg Alpaca finetune of LLaMA-13B) might be transferrable, but even then it's best to train exactly on what you plan to use.
+- What model format do you want? At time of writing, 8-bit models are most stable, and 4-bit are supported but experimental. In the near future it is likely that 4-bit will be the best option for most users.
+- What are you training it on? Do you want it to learn real information, a simple format, ...?
+
+### **Step 2**: Gather a dataset.
+- If you use a dataset similar to the [Alpaca](https://github.com/gururise/AlpacaDataCleaned/blob/main/alpaca_data_cleaned.json) format, that is natively supported by the `Formatted Dataset` input in the WebUI, with premade formatter options.
+- If you use a dataset that isn't matched to Alpaca's format, but uses the same basic JSON structure, you can make your own format file by copying `training/formats/alpaca-format.json` to a new file and [editing its content](#format-files).
+- If you can get the dataset into a simple text file, that works too! You can train using the `Raw text file` input option.
+ - This means you can for example just copy/paste a chatlog/documentation page/whatever you want, shove it in a plain text file, and train on it.
+- If you use a structured dataset not in this format, you may have to find an external way to convert it - or open an issue to request native support.
+
+### **Step 3**: Do the training.
+- **3.1**: Load the WebUI, and your model.
+ - Make sure you don't have any LoRAs already loaded (unless you want to train for multi-LoRA usage).
+- **3.2**: Open the `Training` tab at the top, `Train LoRA` sub-tab.
+- **3.3**: Fill in the name of the LoRA, select your dataset in the dataset options.
+- **3.4**: Select other parameters to your preference. See [parameters below](#parameters).
+- **3.5**: click `Start LoRA Training`, and wait.
+ - It can take a few hours for a large dataset, or just a few minute if doing a small run.
+ - You may want to monitor your [loss value](#loss) while it goes.
+
+### **Step 4**: Evaluate your results.
+- Load the LoRA under the Models Tab.
+- You can go test-drive it on the `Text generation` tab, or you can use the `Perplexity evaluation` sub-tab of the `Training` tab.
+- If you used the `Save every n steps` option, you can grab prior copies of the model from sub-folders within the LoRA model's folder and try them instead.
+
+### **Step 5**: Re-run if you're unhappy.
+- Make sure to unload the LoRA before training it.
+- You can simply resume a prior run - use `Copy parameters from` to select your LoRA, and edit parameters. Note that you cannot change the `Rank` of an already created LoRA.
+ - If you want to resume from a checkpoint saved along the way, simply copy the contents of the checkpoint folder into the LoRA's folder.
+ - (Note: `adapter_model.bin` is the important file that holds the actual LoRA content).
+ - This will start Learning Rate and Steps back to the start. If you want to resume as if you were midway through, you can adjust your Learning Rate to the last reported LR in logs and reduce your epochs.
+- Or, you can start over entirely if you prefer.
+- If your model is producing corrupted outputs, you probably need to start over and use a lower Learning Rate.
+- If your model isn't learning detailed information but you want it to, you might need to just run more epochs, or you might need a higher Rank.
+- If your model is enforcing a format you didn't want, you may need to tweak your dataset, or start over and not train as far.
+
+## Format Files
+
+If using JSON formatted datasets, they are presumed to be in the following approximate format:
+
+```json
+[
+ {
+ "somekey": "somevalue",
+ "key2": "value2"
+ },
+ {
+ // etc
+ }
+]
+```
+
+Where the keys (eg `somekey`, `key2` above) are standardized, and relatively consistent across the dataset, and the values (eg `somevalue`, `value2`) contain the content actually intended to be trained.
+
+For Alpaca, the keys are `instruction`, `input`, and `output`, wherein `input` is sometimes blank.
+
+A simple format file for Alpaca to be used as a chat bot is:
+
+```json
+{
+ "instruction,output": "User: %instruction%\nAssistant: %output%",
+ "instruction,input,output": "User: %instruction%: %input%\nAssistant: %output%"
+}
+```
+
+Note that the keys (eg `instruction,output`) are a comma-separated list of dataset keys, and the values are a simple string that use those keys with `%%`.
+
+So for example if a dataset has `"instruction": "answer my question"`, then the format file's `User: %instruction%\n` will be automatically filled in as `User: answer my question\n`.
+
+If you have different sets of key inputs, you can make your own format file to match it. This format-file is designed to be as simple as possible to enable easy editing to match your needs.
+
+## Raw Text File Settings
+
+When using raw text files as your dataset, the text is automatically split into chunks based on your `Cutoff Length` you get a few basic options to configure them.
+- `Overlap Length` is how much to overlap chunks by. Overlapping chunks helps prevent the model from learning strange mid-sentence cuts, and instead learn continual sentences that flow from earlier text.
+- `Prefer Newline Cut Length` sets a maximum distance in characters to shift the chunk cut towards newlines. Doing this helps prevent lines from starting or ending mid-sentence, preventing the model from learning to cut off sentences randomly.
+- `Hard Cut String` sets a string that indicates there must be a hard cut without overlap. This defaults to `\n\n\n`, meaning 3 newlines. No trained chunk will ever contain this string. This allows you to insert unrelated sections of text in the same text file, but still ensure the model won't be taught to randomly change the subject.
+
+## Parameters
+
+The basic purpose and function of each parameter is documented on-page in the WebUI, so read through them in the UI to understand your options.
+
+That said, here's a guide to the most important parameter choices you should consider:
+
+### VRAM
+
+- First, you must consider your VRAM availability.
+ - Generally, under default settings, VRAM usage for training with default parameters is very close to when generating text (with 1000+ tokens of context) (ie, if you can generate text, you can train LoRAs).
+ - Note: worse by default in the 4-bit monkeypatch currently. Reduce `Micro Batch Size` to `1` to restore this to expectations.
+ - If you have VRAM to spare, setting higher batch sizes will use more VRAM and get you better quality training in exchange.
+ - If you have large data, setting a higher cutoff length may be beneficial, but will cost significant VRAM. If you can spare some, set your batch size to `1` and see how high you can push your cutoff length.
+ - If you're low on VRAM, reducing batch size or cutoff length will of course improve that.
+ - Don't be afraid to just try it and see what happens. If it's too much, it will just error out, and you can lower settings and try again.
+
+### Rank
+
+- Second, you want to consider the amount of learning you want.
+ - For example, you may wish to just learn a dialogue format (as in the case of Alpaca) in which case setting a low `Rank` value (32 or lower) works great.
+ - Or, you might be training on project documentation you want the bot to understand and be able to understand questions about, in which case the higher the rank, the better.
+ - Generally, higher Rank = more precise learning = more total content learned = more VRAM usage while training.
+
+### Learning Rate and Epochs
+
+- Third, how carefully you want it to be learned.
+ - In other words, how okay or not you are with the model losing unrelated understandings.
+ - You can control this with 3 key settings: the Learning Rate, its scheduler, and your total epochs.
+ - The learning rate controls how much change is made to the model by each token it sees.
+ - It's in scientific notation normally, so for example `3e-4` means `3 * 10^-4` which is `0.0003`. The number after `e-` controls how many `0`s are in the number.
+ - Higher values let training run faster, but also are more likely to corrupt prior data in the model.
+ - You essentially have two variables to balance: the LR, and Epochs.
+ - If you make LR higher, you can set Epochs equally lower to match. High LR + low epochs = very fast, low quality training.
+ - If you make LR low, set epochs high. Low LR + high epochs = slow but high-quality training.
+ - The scheduler controls change-over-time as you train - it starts high, and then goes low. This helps balance getting data in, and having decent quality, at the same time.
+ - You can see graphs of the different scheduler options [in the HuggingFace docs here](https://moon-ci-docs.huggingface.co/docs/transformers/pr_1/en/main_classes/optimizer_schedules#transformers.SchedulerType)
+
+## Loss
+
+When you're running training, the WebUI's console window will log reports that include, among other things, a numeric value named `Loss`. It will start as a high number, and gradually get lower and lower as it goes.
+
+"Loss" in the world of AI training theoretically means "how close is the model to perfect", with `0` meaning "absolutely perfect". This is calculated by measuring the difference between the model outputting exactly the text you're training it to output, and what it actually outputs.
+
+In practice, a good LLM should have a very complex variable range of ideas running in its artificial head, so a loss of `0` would indicate that the model has broken and forgotten to how think about anything other than what you trained it.
+
+So, in effect, Loss is a balancing game: you want to get it low enough that it understands your data, but high enough that it isn't forgetting everything else. Generally, if it goes below `1.0`, it's going to start forgetting its prior memories, and you should stop training. In some cases you may prefer to take it as low as `0.5` (if you want it to be very very predictable). Different goals have different needs, so don't be afraid to experiment and see what works best for you.
+
+Note: if you see Loss start at or suddenly jump to exactly `0`, it is likely something has gone wrong in your training process (eg model corruption).
+
+## Note: 4-Bit Monkeypatch
+
+The [4-bit LoRA monkeypatch](GPTQ-models-(4-bit-mode).md#using-loras-in-4-bit-mode) works for training, but has side effects:
+- VRAM usage is higher currently. You can reduce the `Micro Batch Size` to `1` to compensate.
+- Models do funky things. LoRAs apply themselves, or refuse to apply, or spontaneously error out, or etc. It can be helpful to reload base model or restart the WebUI between training/usage to minimize chances of anything going haywire.
+- Loading or working with multiple LoRAs at the same time doesn't currently work.
+- Generally, recognize and treat the monkeypatch as the dirty temporary hack it is - it works, but isn't very stable. It will get better in time when everything is merged upstream for full official support.
+
+## Legacy notes
+
+LoRA training was contributed by [mcmonkey4eva](https://github.com/mcmonkey4eva) in PR [#570](https://github.com/oobabooga/text-generation-webui/pull/570).
+
+### Using the original alpaca-lora code
+
+Kept here for reference. The Training tab has much more features than this method.
+
+```
+conda activate textgen
+git clone https://github.com/tloen/alpaca-lora
+```
+
+Edit those two lines in `alpaca-lora/finetune.py` to use your existing model folder instead of downloading everything from decapoda:
+
+```
+model = LlamaForCausalLM.from_pretrained(
+ "models/llama-7b",
+ load_in_8bit=True,
+ device_map="auto",
+)
+tokenizer = LlamaTokenizer.from_pretrained(
+ "models/llama-7b", add_eos_token=True
+)
+```
+
+Run the script with:
+
+```
+python finetune.py
+```
+
+It just works. It runs at 22.32s/it, with 1170 iterations in total, so about 7 hours and a half for training a LoRA. RTX 3090, 18153MiB VRAM used, drawing maximum power (350W, room heater mode).
diff --git a/docs/WSL-installation-guide.md b/docs/WSL-installation-guide.md
new file mode 100644
index 0000000000000000000000000000000000000000..30b7fa3e6f4613898fbb0d0bd16b77db5d79c14b
--- /dev/null
+++ b/docs/WSL-installation-guide.md
@@ -0,0 +1,82 @@
+Guide created by [@jfryton](https://github.com/jfryton). Thank you jfryton.
+
+-----
+
+Here's an easy-to-follow, step-by-step guide for installing Windows Subsystem for Linux (WSL) with Ubuntu on Windows 10/11:
+
+## Step 1: Enable WSL
+
+1. Press the Windows key + X and click on "Windows PowerShell (Admin)" or "Windows Terminal (Admin)" to open PowerShell or Terminal with administrator privileges.
+2. In the PowerShell window, type the following command and press Enter:
+
+```
+wsl --install
+```
+
+If this command doesn't work, you can enable WSL with the following command for Windows 10:
+
+```
+wsl --set-default-version 1
+```
+
+For Windows 11, you can use:
+
+```
+wsl --set-default-version 2
+```
+
+You may be prompted to restart your computer. If so, save your work and restart.
+
+## Step 2: Install Ubuntu
+
+1. Open the Microsoft Store.
+2. Search for "Ubuntu" in the search bar.
+3. Choose the desired Ubuntu version (e.g., Ubuntu 20.04 LTS) and click "Get" or "Install" to download and install the Ubuntu app.
+4. Once the installation is complete, click "Launch" or search for "Ubuntu" in the Start menu and open the app.
+
+## Step 3: Set up Ubuntu
+
+1. When you first launch the Ubuntu app, it will take a few minutes to set up. Be patient as it installs the necessary files and sets up your environment.
+2. Once the setup is complete, you will be prompted to create a new UNIX username and password. Choose a username and password, and make sure to remember them, as you will need them for future administrative tasks within the Ubuntu environment.
+
+## Step 4: Update and upgrade packages
+
+1. After setting up your username and password, it's a good idea to update and upgrade your Ubuntu system. Run the following commands in the Ubuntu terminal:
+
+```
+sudo apt update
+sudo apt upgrade
+```
+
+2. Enter your password when prompted. This will update the package list and upgrade any outdated packages.
+
+Congratulations! You have now installed WSL with Ubuntu on your Windows 10/11 system. You can use the Ubuntu terminal for various tasks, like running Linux commands, installing packages, or managing files.
+
+You can launch your WSL Ubuntu installation by selecting the Ubuntu app (like any other program installed on your computer) or typing 'ubuntu' into Powershell or Terminal.
+
+## Step 5: Proceed with Linux instructions
+
+1. You can now follow the Linux setup instructions. If you receive any error messages about a missing tool or package, just install them using apt:
+
+```
+sudo apt install [missing package]
+```
+
+You will probably need to install build-essential
+
+```
+sudo apt install build-essential
+```
+
+If you face any issues or need to troubleshoot, you can always refer to the official Microsoft documentation for WSL: https://docs.microsoft.com/en-us/windows/wsl/
+
+#### WSL2 performance using /mnt:
+when you git clone a repository, put it inside WSL and not outside. To understand more, take a look at this [issue](https://github.com/microsoft/WSL/issues/4197#issuecomment-604592340)
+
+## Bonus: Port Forwarding
+
+By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following command (using PowerShell or Terminal with administrator privileges).
+
+```
+netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=localhost connectport=7860
+```
diff --git a/docs/Windows-installation-guide.md b/docs/Windows-installation-guide.md
new file mode 100644
index 0000000000000000000000000000000000000000..83b22efa38b1839d07a5a58494dbc26ba86397ee
--- /dev/null
+++ b/docs/Windows-installation-guide.md
@@ -0,0 +1,9 @@
+If you are having trouble following the installation instructions in the README, Reddit user [Technical_Leather949](https://www.reddit.com/user/Technical_Leather949/) has created a more detailed, step-by-step guide covering:
+
+* Windows installation
+* 8-bit mode on Windows
+* LLaMA
+* LLaMA 4-bit
+
+The guide can be found here: https://www.reddit.com/r/LocalLLaMA/comments/11o6o3f/how_to_install_llama_8bit_and_4bit/
+
diff --git a/docs/llama.cpp.md b/docs/llama.cpp.md
new file mode 100644
index 0000000000000000000000000000000000000000..48d60df36b4bc4d4e77acff7f7b0b9e3864e25ad
--- /dev/null
+++ b/docs/llama.cpp.md
@@ -0,0 +1,43 @@
+# llama.cpp
+
+llama.cpp is the best backend in two important scenarios:
+
+1) You don't have a GPU.
+2) You want to run a model that doesn't fit into your GPU.
+
+## Setting up the models
+
+#### Pre-converted
+
+Download the GGUF models directly into your `text-generation-webui/models` folder. It will be a single file.
+
+* Make sure its name ends in `.gguf`.
+* `q4_K_M` quantization is recommended.
+
+#### Convert Llama yourself
+
+Follow the instructions in the llama.cpp README to generate a GGUF: https://github.com/ggerganov/llama.cpp#prepare-data--run
+
+## GPU acceleration
+
+Enabled with the `--n-gpu-layers` parameter.
+
+* If you have enough VRAM, use a high number like `--n-gpu-layers 1000` to offload all layers to the GPU.
+* Otherwise, start with a low number like `--n-gpu-layers 10` and then gradually increase it until you run out of memory.
+
+This feature works out of the box for NVIDIA GPUs on Linux (amd64) or Windows. For other GPUs, you need to uninstall `llama-cpp-python` with
+
+```
+pip uninstall -y llama-cpp-python
+```
+
+and then recompile it using the commands here: https://pypi.org/project/llama-cpp-python/
+
+#### macOS
+
+For macOS, these are the commands:
+
+```
+pip uninstall -y llama-cpp-python
+CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir
+```
diff --git a/download-model.py b/download-model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b8d7b258d22cdbe7a278cb8fd43f4bda37c3a24
--- /dev/null
+++ b/download-model.py
@@ -0,0 +1,275 @@
+'''
+Downloads models from Hugging Face to models/username_modelname.
+
+Example:
+python download-model.py facebook/opt-1.3b
+
+'''
+
+import argparse
+import base64
+import datetime
+import hashlib
+import json
+import os
+import re
+import sys
+from pathlib import Path
+
+import requests
+import tqdm
+from requests.adapters import HTTPAdapter
+from tqdm.contrib.concurrent import thread_map
+
+
+base = "https://huggingface.co"
+
+
+class ModelDownloader:
+ def __init__(self, max_retries=5):
+ self.session = requests.Session()
+ if max_retries:
+ self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
+ self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
+ if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
+ self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
+ if os.getenv('HF_TOKEN') is not None:
+ self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
+
+ def sanitize_model_and_branch_names(self, model, branch):
+ if model[-1] == '/':
+ model = model[:-1]
+
+ if model.startswith(base + '/'):
+ model = model[len(base) + 1:]
+
+ model_parts = model.split(":")
+ model = model_parts[0] if len(model_parts) > 0 else model
+ branch = model_parts[1] if len(model_parts) > 1 else branch
+
+ if branch is None:
+ branch = "main"
+ else:
+ pattern = re.compile(r"^[a-zA-Z0-9._-]+$")
+ if not pattern.match(branch):
+ raise ValueError(
+ "Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
+
+ return model, branch
+
+ def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
+ page = f"/api/models/{model}/tree/{branch}"
+ cursor = b""
+
+ links = []
+ sha256 = []
+ classifications = []
+ has_pytorch = False
+ has_pt = False
+ has_gguf = False
+ has_safetensors = False
+ is_lora = False
+ while True:
+ url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
+ r = self.session.get(url, timeout=10)
+ r.raise_for_status()
+ content = r.content
+
+ dict = json.loads(content)
+ if len(dict) == 0:
+ break
+
+ for i in range(len(dict)):
+ fname = dict[i]['path']
+ if specific_file not in [None, ''] and fname != specific_file:
+ continue
+
+ if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
+ is_lora = True
+
+ is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
+ is_safetensors = re.match(r".*\.safetensors", fname)
+ is_pt = re.match(r".*\.pt", fname)
+ is_gguf = re.match(r'.*\.gguf', fname)
+ is_tiktoken = re.match(r".*\.tiktoken", fname)
+ is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) or is_tiktoken
+ is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
+ if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_tokenizer, is_text)):
+ if 'lfs' in dict[i]:
+ sha256.append([fname, dict[i]['lfs']['oid']])
+
+ if is_text:
+ links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
+ classifications.append('text')
+ continue
+
+ if not text_only:
+ links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
+ if is_safetensors:
+ has_safetensors = True
+ classifications.append('safetensors')
+ elif is_pytorch:
+ has_pytorch = True
+ classifications.append('pytorch')
+ elif is_pt:
+ has_pt = True
+ classifications.append('pt')
+ elif is_gguf:
+ has_gguf = True
+ classifications.append('gguf')
+
+ cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
+ cursor = base64.b64encode(cursor)
+ cursor = cursor.replace(b'=', b'%3D')
+
+ # If both pytorch and safetensors are available, download safetensors only
+ if (has_pytorch or has_pt) and has_safetensors:
+ for i in range(len(classifications) - 1, -1, -1):
+ if classifications[i] in ['pytorch', 'pt']:
+ links.pop(i)
+
+ is_llamacpp = has_gguf and specific_file is not None
+ return links, sha256, is_lora, is_llamacpp
+
+ def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None):
+ if base_folder is None:
+ base_folder = 'models' if not is_lora else 'loras'
+
+ # If the model is of type GGUF, save directly in the base_folder
+ if is_llamacpp:
+ return Path(base_folder)
+
+ output_folder = f"{'_'.join(model.split('/')[-2:])}"
+ if branch != 'main':
+ output_folder += f'_{branch}'
+
+ output_folder = Path(base_folder) / output_folder
+ return output_folder
+
+ def get_single_file(self, url, output_folder, start_from_scratch=False):
+ filename = Path(url.rsplit('/', 1)[1])
+ output_path = output_folder / filename
+ headers = {}
+ mode = 'wb'
+ if output_path.exists() and not start_from_scratch:
+
+ # Check if the file has already been downloaded completely
+ r = self.session.get(url, stream=True, timeout=10)
+ total_size = int(r.headers.get('content-length', 0))
+ if output_path.stat().st_size >= total_size:
+ return
+
+ # Otherwise, resume the download from where it left off
+ headers = {'Range': f'bytes={output_path.stat().st_size}-'}
+ mode = 'ab'
+
+ with self.session.get(url, stream=True, headers=headers, timeout=10) as r:
+ r.raise_for_status() # Do not continue the download if the request was unsuccessful
+ total_size = int(r.headers.get('content-length', 0))
+ block_size = 1024 * 1024 # 1MB
+ with open(output_path, mode) as f:
+ with tqdm.tqdm(total=total_size, unit='iB', unit_scale=True, bar_format='{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}') as t:
+ count = 0
+ for data in r.iter_content(block_size):
+ t.update(len(data))
+ f.write(data)
+ if total_size != 0 and self.progress_bar is not None:
+ count += len(data)
+ self.progress_bar(float(count) / float(total_size), f"{filename}")
+
+ def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
+ thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
+
+ def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
+ self.progress_bar = progress_bar
+
+ # Create the folder and writing the metadata
+ output_folder.mkdir(parents=True, exist_ok=True)
+
+ if not is_llamacpp:
+ metadata = f'url: https://huggingface.co/{model}\n' \
+ f'branch: {branch}\n' \
+ f'download date: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n'
+
+ sha256_str = '\n'.join([f' {item[1]} {item[0]}' for item in sha256])
+ if sha256_str:
+ metadata += f'sha256sum:\n{sha256_str}'
+
+ metadata += '\n'
+ (output_folder / 'huggingface-metadata.txt').write_text(metadata)
+
+ if specific_file:
+ print(f"Downloading {specific_file} to {output_folder}")
+ else:
+ print(f"Downloading the model to {output_folder}")
+
+ self.start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads)
+
+ def check_model_files(self, model, branch, links, sha256, output_folder):
+ # Validate the checksums
+ validated = True
+ for i in range(len(sha256)):
+ fpath = (output_folder / sha256[i][0])
+
+ if not fpath.exists():
+ print(f"The following file is missing: {fpath}")
+ validated = False
+ continue
+
+ with open(output_folder / sha256[i][0], "rb") as f:
+ bytes = f.read()
+ file_hash = hashlib.sha256(bytes).hexdigest()
+ if file_hash != sha256[i][1]:
+ print(f'Checksum failed: {sha256[i][0]} {sha256[i][1]}')
+ validated = False
+ else:
+ print(f'Checksum validated: {sha256[i][0]} {sha256[i][1]}')
+
+ if validated:
+ print('[+] Validated checksums of all model files!')
+ else:
+ print('[-] Invalid checksums. Rerun download-model.py with the --clean flag.')
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('MODEL', type=str, default=None, nargs='?')
+ parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
+ parser.add_argument('--threads', type=int, default=4, help='Number of files to download simultaneously.')
+ parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
+ parser.add_argument('--specific-file', type=str, default=None, help='Name of the specific file to download (if not provided, downloads all).')
+ parser.add_argument('--output', type=str, default=None, help='The folder where the model should be saved.')
+ parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
+ parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
+ parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.')
+ args = parser.parse_args()
+
+ branch = args.branch
+ model = args.MODEL
+ specific_file = args.specific_file
+
+ if model is None:
+ print("Error: Please specify the model you'd like to download (e.g. 'python download-model.py facebook/opt-1.3b').")
+ sys.exit()
+
+ downloader = ModelDownloader(max_retries=args.max_retries)
+ # Clean up the model/branch names
+ try:
+ model, branch = downloader.sanitize_model_and_branch_names(model, branch)
+ except ValueError as err_branch:
+ print(f"Error: {err_branch}")
+ sys.exit()
+
+ # Get the download links from Hugging Face
+ links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
+
+ # Get the output folder
+ output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output)
+
+ if args.check:
+ # Check previously downloaded files
+ downloader.check_model_files(model, branch, links, sha256, output_folder)
+ else:
+ # Download files
+ downloader.download_model_files(model, branch, links, sha256, output_folder, specific_file=specific_file, threads=args.threads, is_llamacpp=is_llamacpp)
diff --git a/extensions/Training_PRO/README.md b/extensions/Training_PRO/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..246f25fedd90c700d1b4588aa49ed6a5597ad7d7
--- /dev/null
+++ b/extensions/Training_PRO/README.md
@@ -0,0 +1,56 @@
+# Training_PRO
+
+This is an expanded Training tab
+Maintained by FP
+
+https://github.com/FartyPants/Training_PRO
+
+- Chunking: precise raw text slicer (PRTS) uses sentence slicing and making sure things are clean on all ends
+- overlap chunking - this special overlapping will make additional overlap block based on logical rules (aka no overlap block on hard cut)
+- custom scheduler (follow the code to make your own) In LR Scheduler select FP_low_epoch_annealing - this scheduler will keep the LR constant for first epoch then use cosine for the rest - this part would be best to spawn into a new py file
+- saves graph png file at the end with learning rate and loss per epoch
+- adding EOS to each block or to hard cut only
+- automatically lowers gradient accumulation if you go overboard and set gradient accumulation that will be higher than actual data - transformers would then throw error (or they used to, not sure if still true) but in any way, it will fix bad data
+- turn BOS on and OFF
+- target selector
+- DEMENTOR LEARNING (experimental) Deep Memorization Enforcement Through Overlapping and Repetition. This is an experiment for long-text learning using low epochs (basically use 1 epoch with constant LR or 2 epochs with FP_low_epoch_annealing LR scheduler)
+- Getting rid of micro batch size/batch size confusion. Now there is True Batch Size and Gradient accumulation slider, consisten with all the other training out there
+- Ability to save Checkpoint during training with a button
+- Ability to change Stop Loss during training
+- different modes of checkpoint auto saving
+- Function to Check Dataset and suggest parameters such as warmup and checkpoint save frequency before training
+
+### Notes:
+
+This uses it's own chunking code for raw text based on sentence splitting. This will avoid weird cuts in the chunks and each chunk should now start with sentence and end on some sentence. It works hand in hand with Hard Cut. A propper use is to structure your text into logical blocks (ideas) separated by three \n then use three \n in hard cut. This way each chunk will contain only one flow of ideas and not derail in the thoughts. And Overlapping code will create overlapped blocks on sentence basis too, but not cross hard cut, thus not cross different ideas either. Does it make any sense? No? Hmmmm...
+
+### Targets
+
+Normal LORA is q, v and that's what you should use. You can use (q k v o) or (q k v) and it will give you a lot more trainable parameters. The benefit is that you can keep rank lower and still attain the same coherency as q v with high rank. Guanaco has been trained with QLORA and q k v o for example and they swear by it.
+
+### DEMENTOR LEARNING (experimental) Deep Memorization Enforcement Through Overlapping and Repetition
+
+This is and experimental chunking to train long-form text in low number of epochs (basically 1) with sliding repetition. The depth of learning directly depends on the cutoff_length. Increasing cutoff length will also increase number of blocks created from long-form text (which is contrary to normal training). It is based on my own wild experiments.
+
+### Getting rid of batch size and micro batch size
+
+Keeping consistency with everyone else.
+
+Listen, There is only ONE batch size - the True batch size (called previously micro-batch size in WebUI) - this is how many blocks are processed at once (during a single step). It eats GPU, but it really helps with the quality training (in fact the ideal batch size would be the same as number of blocks - which is unrealistic) - so the idea is to cram as much True Batch Size before your GPU blows with OOM. On 24GB this is about 10 for 13b (loaded with 4-bit)
+
+So no micro batch size - it is now called True Batch Size, because that's what it is.
+
+The other thing is Gradient Accumulation - this is an emulation of the above Batch size - a virtual batch size, if you will. If your GPU can't handle real batch size then you may fake it using Gradient Accumulation. This will accumulate the gradients over so many steps defined here and then update the weights at the end without increase in GPU.
+Gradient accumulation is like a virtual Batch size multiplier without the GPU penalty.
+
+If your batch size is 4 and your gradient accumulation is 2 then it sort of behaves as if we have batch size 8. *Sort of* because Batch size of 4 and GA of 2 is NOT the same as batch size of 2 and GA of 4. (It produces different weights - hence it's not an equivalent). The idea is that if you don't have GPU - using GA to extend batch size is the next best thing (good enough) since you have no other choice.
+
+If all you can afford is 1 batch size, then increasing GA will likely make the learning better in some range of GA (it's not always more is better).
+
+However - GA is not some golden goose. As said, it isn't the same as batch size. In fact GA may worsen your learning as well.
+
+I would suggest a series of experiment where you would put batch size as high as possible without OOM, set GA 1, then repeat training while increasing the GA (2, 4...), and see how the model changes. It's likely that it would follow some sort of curve where GA will seem to help before it will make it worse. Some people believe that if you can squeeze 6 BATCH Size, then you should not bother with GA at all... YMMW
+
+High Batch Size vs High GA would also likely produce different results in terms of learning words vs style. How? Hmmmm... good question.
+
+One optical "benefit" of GA is that the loss will fluctuate less (because of all the gradient accumulation, which works as a form of noise smoothing as well).
diff --git a/extensions/Training_PRO/custom_scheduler.py b/extensions/Training_PRO/custom_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..6aa66336ec6408dca0ce60b68c6b5a59acd6d8bc
--- /dev/null
+++ b/extensions/Training_PRO/custom_scheduler.py
@@ -0,0 +1,175 @@
+from functools import partial
+import torch
+import transformers
+import math
+from torch.optim.lr_scheduler import LambdaLR
+
+
+#FPHAM custom training scheduller block - should be extracted to separate file
+last_print_label = ''
+
+# hold constant to the half of epochs then cosine down to 0
+def _get_fp_half_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
+
+ global last_print_label
+ print_label = ''
+
+ half_steps = num_training_steps//2
+
+ num_warmup_steps = min(num_warmup_steps,half_steps)
+
+ if current_step < num_warmup_steps:
+ print_label = 'Scheduler: Warmup'
+ elif current_step < half_steps:
+ print_label = 'Scheduler: Hold'
+ else:
+ print_label = 'Scheduler: Annealing'
+
+ if print_label != last_print_label:
+ print(print_label)
+
+ last_print_label = print_label
+
+ if current_step < num_warmup_steps:
+ return float(current_step) / float(max(1, num_warmup_steps))
+
+ if current_step < half_steps:
+ return 1.0
+
+ progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps))
+ num_cycles = 0.5
+ return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
+
+# constant to the first epochs then cosine down to 0 over the rest epochs
+def _get_fp_cosine_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
+
+ global last_print_label
+ print_label = ''
+
+ num_warmup_steps = min(num_warmup_steps,num_firstepoch_steps)
+
+ if current_step < num_warmup_steps:
+ print_label = 'Scheduler: Warmup'
+ elif current_step < num_firstepoch_steps:
+ print_label = 'Scheduler: Hold'
+ else:
+ print_label = 'Scheduler: Annealing'
+
+ if print_label != last_print_label:
+ print(print_label)
+
+ last_print_label = print_label
+
+ if current_step < num_warmup_steps:
+ return float(current_step) / float(max(1, num_warmup_steps))
+
+ if current_step < num_firstepoch_steps:
+ return 1.0
+
+ progress = float(current_step - num_firstepoch_steps) / float(max(1, num_training_steps - num_firstepoch_steps))
+ num_cycles = 0.5
+ return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
+
+
+def custom_cosine_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_firstepoch_steps, last_epoch=-1):
+ """
+ Args:
+ optimizer ([`~torch.optim.Optimizer`]):
+ The optimizer for which to schedule the learning rate.
+ num_warmup_steps (`int`):
+ The number of steps for the warmup phase.
+ num_training_steps (`int`):
+ The total number of training steps.
+ last_epoch (`int`, *optional*, defaults to -1):
+ The index of the last epoch when resuming training.
+
+ Return:
+ `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
+ """
+
+ lr_lambda = partial(
+ _get_fp_cosine_schedule_with_warmup_lr_lambda,
+ num_warmup_steps=num_warmup_steps,
+ num_training_steps=num_training_steps,
+ num_firstepoch_steps = num_firstepoch_steps,
+ )
+ return LambdaLR(optimizer, lr_lambda, last_epoch)
+
+def custom_half_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_firstepoch_steps, last_epoch=-1):
+ """
+ Args:
+ optimizer ([`~torch.optim.Optimizer`]):
+ The optimizer for which to schedule the learning rate.
+ num_warmup_steps (`int`):
+ The number of steps for the warmup phase.
+ num_training_steps (`int`):
+ The total number of training steps.
+ last_epoch (`int`, *optional*, defaults to -1):
+ The index of the last epoch when resuming training.
+
+ Return:
+ `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
+ """
+
+ lr_lambda = partial(
+ _get_fp_half_schedule_with_warmup_lr_lambda,
+ num_warmup_steps=num_warmup_steps,
+ num_training_steps=num_training_steps,
+ num_firstepoch_steps = num_firstepoch_steps,
+ )
+ return LambdaLR(optimizer, lr_lambda, last_epoch)
+
+class FPSchedulerTrainer(transformers.Trainer):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def create_scheduler(self, num_training_steps: int, optimizer: torch.optim.Optimizer = None):
+ #Setup the scheduler. The optimizer of the trainer must have been set up either before this method is called or passed as an argument.
+
+ num_train_epochs = self.args.num_train_epochs
+ num_warmup_steps=self.args.get_warmup_steps(num_training_steps)
+ num_firstepoch_steps = math.ceil(num_training_steps/num_train_epochs)
+ num_warmup_acc = num_warmup_steps*self.args.gradient_accumulation_steps
+ num_firstepoch_steps_acc = num_firstepoch_steps*self.args.gradient_accumulation_steps
+ num_training_steps_acc = num_training_steps*self.args.gradient_accumulation_steps
+
+ print (f"Warm-up steps aligned to Gradient accumulation ({self.args.gradient_accumulation_steps}) = {num_warmup_acc} actual warmup steps")
+ if self.args.lr_scheduler_type == 'cosine':
+
+ num_warmup_acc_min = min(num_warmup_acc, num_firstepoch_steps_acc)
+
+ if num_warmup_acc>num_firstepoch_steps_acc:
+ print(f"\033[1;31;1mWARNING: The number of warmup steps is set too high! It will be clamped to 1 epoch, essentially going from warmup to annealing.\033[0;37;0m")
+ print (f"FP Scheduler Warmup: 0-[{num_warmup_acc_min}], Hold [{num_warmup_acc_min}]-{num_firstepoch_steps_acc}, Annealing {num_firstepoch_steps_acc}-{num_training_steps_acc}")
+ else:
+ print (f"FP Scheduler Warmup: 0-{num_warmup_acc_min}, Hold {num_warmup_acc_min}-{num_firstepoch_steps_acc}, Annealing {num_firstepoch_steps_acc}-{num_training_steps_acc}")
+
+ self.lr_scheduler = custom_cosine_scheduler_with_warmup(
+ optimizer=self.optimizer if optimizer is None else optimizer,
+ num_warmup_steps=num_warmup_steps,
+ num_training_steps=num_training_steps,
+ num_firstepoch_steps = num_firstepoch_steps,
+ )
+ self._created_lr_scheduler = True
+ return self.lr_scheduler
+ elif self.args.lr_scheduler_type == 'constant':
+
+ half_step_acc = num_training_steps_acc//2
+ num_warmup_acc_min = min(num_warmup_acc, half_step_acc)
+
+ if num_warmup_acc>half_step_acc:
+ print(f"\033[1;31;1mWARNING: The number of warmup steps is set too high! It will be clamped to half of all epochs, essentially going from warmup to annealing in the middle.\033[0;37;0m")
+ print (f"FP Scheduler Warmup: 0-[{num_warmup_acc_min}], Hold [{num_warmup_acc_min}]-{half_step_acc}, Annealing {half_step_acc}-{num_training_steps_acc}")
+ else:
+ print (f"FP Scheduler Warmup: 0-{num_warmup_acc_min}, Hold {num_warmup_acc_min}-{half_step_acc}, Annealing {half_step_acc}-{num_training_steps_acc}")
+
+ self.lr_scheduler = custom_half_scheduler_with_warmup(
+ optimizer=self.optimizer if optimizer is None else optimizer,
+ num_warmup_steps=num_warmup_steps,
+ num_training_steps=num_training_steps,
+ num_firstepoch_steps = num_firstepoch_steps,
+ )
+ self._created_lr_scheduler = True
+ return self.lr_scheduler
+ else:
+ return super().create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer)
\ No newline at end of file
diff --git a/extensions/Training_PRO/matplotgraph.py b/extensions/Training_PRO/matplotgraph.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e607526925445134fc1715a1fab6bb4af99112d
--- /dev/null
+++ b/extensions/Training_PRO/matplotgraph.py
@@ -0,0 +1,62 @@
+import os
+import json
+
+def create_graph(lora_path, lora_name):
+ try:
+ import matplotlib.pyplot as plt
+ from matplotlib.ticker import ScalarFormatter
+
+ peft_model_path = f'{lora_path}/training_graph.json'
+ image_model_path = f'{lora_path}/training_graph.png'
+ # Check if the JSON file exists
+ if os.path.exists(peft_model_path):
+ # Load data from JSON file
+ with open(peft_model_path, 'r') as file:
+ data = json.load(file)
+ # Extract x, y1, and y2 values
+ x = [item['epoch'] for item in data]
+ y1 = [item['learning_rate'] for item in data]
+ y2 = [item['loss'] for item in data]
+
+ # Create the line chart
+ fig, ax1 = plt.subplots(figsize=(10, 6))
+
+
+ # Plot y1 (learning rate) on the first y-axis
+ ax1.plot(x, y1, 'b-', label='Learning Rate')
+ ax1.set_xlabel('Epoch')
+ ax1.set_ylabel('Learning Rate', color='b')
+ ax1.tick_params('y', colors='b')
+
+ # Create a second y-axis
+ ax2 = ax1.twinx()
+
+ # Plot y2 (loss) on the second y-axis
+ ax2.plot(x, y2, 'r-', label='Loss')
+ ax2.set_ylabel('Loss', color='r')
+ ax2.tick_params('y', colors='r')
+
+ # Set the y-axis formatter to display numbers in scientific notation
+ ax1.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
+ ax1.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
+
+ # Add grid
+ ax1.grid(True)
+
+ # Combine the legends for both plots
+ lines, labels = ax1.get_legend_handles_labels()
+ lines2, labels2 = ax2.get_legend_handles_labels()
+ ax2.legend(lines + lines2, labels + labels2, loc='best')
+
+ # Set the title
+ plt.title(f'{lora_name} LR and Loss vs Epoch')
+
+ # Save the chart as an image
+ plt.savefig(image_model_path)
+
+ print(f"Graph saved in {image_model_path}")
+ else:
+ print(f"File 'training_graph.json' does not exist in the {lora_path}")
+
+ except ImportError:
+ print("matplotlib is not installed. Please install matplotlib to create PNG graphs")
\ No newline at end of file
diff --git a/extensions/Training_PRO/script.py b/extensions/Training_PRO/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..c272e3a8d14bb50aa33669952fc4d0081b24d42b
--- /dev/null
+++ b/extensions/Training_PRO/script.py
@@ -0,0 +1,1055 @@
+import os
+
+os.environ["WANDB_MODE"] = "offline"
+# os.environ["WANDB_DISABLED"] = "true"
+
+import json
+import math
+import random
+import shutil
+import sys
+import threading
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+
+import gradio as gr
+import torch
+import transformers
+
+from .custom_scheduler import FPSchedulerTrainer
+from .matplotgraph import create_graph
+from .train_utils import get_available_loras_local, precise_cut, sliding_block_cut
+
+from datasets import Dataset, load_dataset
+from peft import (
+ LoraConfig,
+ get_peft_model,
+ prepare_model_for_kbit_training,
+ set_peft_model_state_dict
+)
+from peft.utils.other import \
+ TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING as model_to_lora_modules
+from transformers.models.auto.modeling_auto import (
+ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+)
+
+from modules import shared, utils
+from modules.ui import create_refresh_button
+
+from modules.evaluate import (
+ calculate_perplexity,
+ generate_markdown_table,
+ save_past_evaluations
+)
+from modules.logging_colors import logger
+from modules.models import reload_model
+from modules.utils import natural_keys
+
+
+params = {
+ "display_name": "Training PRO",
+ "is_tab": True
+}
+
+non_serialized_params = {
+ "debug_slicer": False,
+ "Lora_sortedByTime": False,
+ "stop_at_loss": 0,
+ "save_steps_under_loss": 0.0,
+ "save_checkpoint_now": False,
+ "training_loop": False,
+ "current_stability": 0,
+}
+
+MODEL_CLASSES = {v[1]: v[0] for v in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.items()}
+PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "higher_rank_limit", "warmup_steps", "optimizer", "hard_cut_string", "train_only_after", "stop_at_loss", "add_eos_token", "min_chars", "report_to", "precize_slicing_overlap", "add_eos_token_type", "save_steps_under_loss", "add_bos_token", "training_projection","sliding_window","warmup_ratio","grad_accumulation"]
+WANT_INTERRUPT = False
+
+train_log = {}
+train_template = {}
+train_log_graph = []
+train_choices = ["all","q-k-v-o","q-k-v","k-v-down","q-v"]
+
+
+
+def ui():
+ with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
+ tmp = gr.State('')
+ with gr.Row():
+ with gr.Column():
+ # YY.MM.DD
+ gr.Markdown("`Ver: 23.09.22` This is enhanced version of QLora Training. [Maintained by FP](https://github.com/FartyPants/Training_PRO/tree/main)")
+
+ with gr.Row():
+ with gr.Column(scale=5):
+ with gr.Row():
+ copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=get_available_loras_local(non_serialized_params['Lora_sortedByTime']), elem_classes=['slim-dropdown'])
+ create_refresh_button(copy_from, lambda: None, lambda: {'choices': get_available_loras_local(non_serialized_params['Lora_sortedByTime'])}, 'refresh-button')
+ with gr.Column():
+ sort_byTime = gr.Checkbox(label='Sort list by Date', value=False, info='Sorts Loras by date created.', elem_classes=['no-background'])
+
+ with gr.Row():
+ with gr.Column(scale=5):
+ lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file')
+
+ with gr.Column():
+ always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
+
+ with gr.Row():
+ with gr.Column():
+ lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='Also called dimension count. Higher values = larger file, more content control. Smaller values = smaller file, less control. Use 4 or 8 for style, 128 or 256 to teach, 1024+ for fine-detail on big data. More VRAM is needed for higher ranks.')
+ lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+ batch_size = gr.Slider(visible= False, label='Batch Size', value=0, minimum=0, maximum=1024, step=4, info='Now Replaced with Gradient accumulation. Keeping it for sake of old saved data')
+ micro_batch_size = gr.Slider(label='True Batch Size', value=4, minimum=1, maximum=128, step=1, info='Specifies how many text blocks per step will be trained. The higher value, the better the concept of training will be, but it requires more GPU memory and it reduces speed.')
+ grad_accumulation = gr.Slider(label='Gradient Accumulation Steps', value=1, minimum=1, maximum=256, step=1, info="Virtually multiplies the Batch Size by averaging the learning over more than one step. Evens out loss fluctuations but also increases number of total steps.")
+ cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
+
+ with gr.Column():
+ stop_at_loss = gr.Slider(label='Stop at loss (Can be changed during training)', minimum=0.0, maximum=3.0, step=0.1, value=0.00, info='The process will automatically stop once the desired loss value is reached.')
+ gr.Markdown(" ")
+ epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
+ learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='In scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
+ lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt', 'FP_low_epoch_annealing', 'FP_half_time_annealing'], info='Learning rate scheduler - defines how the learning rate changes over time. Custom schedulers: `FP_low_epoch_annealing` constant for 1 epoch then cosine anneal. `FP_half_time_annealing` constant for half time then cosine anneal', elem_classes=['slim-dropdown'])
+
+ with gr.Accordion(label='Checkpoints', open=True):
+ with gr.Row():
+ with gr.Column():
+ save_steps = gr.Number(label='Save every n steps', value=0, info='A checkpoint will be saved every n steps. (0 = OFF)')
+ with gr.Column():
+ save_steps_under_loss = gr.Slider(label='Save at 10% Loss change', value=1.8, minimum=0.0, maximum=3.0, step=0.1, info="Saves checkpoints at (or bellow) this loss and then each time loss falls by at least 10% This works independently from 'Save every n steps'")
+ with gr.Row():
+ save_chackpoint_now = gr.Button('Queue Checkpoint Now')
+
+ with gr.Accordion(label='Advanced Options', open=True):
+ with gr.Row():
+ with gr.Column():
+ warmup_steps = gr.Number(label='Warmup Steps', value=100, info='Number of max steps used for a linear warmup. Value different than 0 has precedent over Warmup Ratio. The actual number of steps will be the closest multiple of graddient accumulation')
+ warmup_ratio = gr.Slider(label='Warmup Ratio', minimum=0.0, maximum=0.2, step=0.025, value=0.0, info='Ratio of total training steps that will be used for a linear warmup. It applies only if Warmup Step is 0.')
+
+ training_projection = gr.Radio(value = train_choices[4], label='LLaMA Target Projections', info='Change the targets (LORA is typically q-v)', choices=train_choices)
+ lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
+ optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.', elem_classes=['slim-dropdown'])
+
+ with gr.Column():
+ train_only_after = gr.Textbox(label='Train Only After', value='', info='Only consider text *after* this string in any given chunk for training. For Alpaca datasets, use "### Response:" to only train the response and ignore the input.')
+ add_bos_token = gr.Checkbox(label='Add BOS token', value=True, info="Adds BOS token for each dataset item")
+ add_eos_token = gr.Checkbox(label='Add EOS token', value=False, info="Adds EOS token for each dataset item")
+ add_eos_token_type = gr.Dropdown(label='EOS placement (raw text)', choices=['Every Block', 'Hard Cut Blocks Only'], value='Every Block', info='', allow_custom_value = False)
+
+ higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
+ report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True)
+
+ with gr.Column():
+ with gr.Tab(label='Formatted Dataset'):
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'])
+ create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button')
+ with gr.Row():
+ eval_dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'])
+ create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button')
+
+ with gr.Column():
+ with gr.Row():
+ format = gr.Dropdown(choices=utils.get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'])
+ create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('training/formats', 'json')}, 'refresh-button')
+ with gr.Row():
+ eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.')
+
+ with gr.Tab(label="Raw text file"):
+ with gr.Row():
+ raw_text_file = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.', elem_classes=['slim-dropdown'])
+ create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'txt')}, 'refresh-button')
+
+ with gr.Row():
+ with gr.Column():
+ precize_slicing_overlap = gr.Checkbox(label='Add Overlapping blocks', value = True)
+ sliding_window = gr.Checkbox(label='DEMENTOR Long-form Learning by FP (Highly Experimental, use low epochs)', value = False, info='Deep Memorization Enforcement Through Overlapping and Repetition. (I named it, so shush). Special process for learning long-form text using low amount of epochs.')
+ #debug_slicer = gr.Checkbox(label='Dump sentencelist.json to logs', value = non_serialized_params['debug_slicer'], info='Debug Slicer')
+
+ with gr.Column():
+ hard_cut_string = gr.Textbox(label='Hard Cut String', value='\\n\\n\\n', info='String that indicates a cut between logical blocks of text (ex. Ideas or Chapters). Helps prevent unwanted overlap between unrelated ideas.')
+ min_chars = gr.Number(label='Ignore small blocks', value=0, info='Ignore Text blocks that have less or equal characters than this number.')
+ with gr.Row():
+ with gr.Column():
+ check_dataset_btn = gr.Button('Load and Check Dataset and suggest data entries')
+ check_dataset_txt = gr.Textbox(label='Dataset info', value='')
+
+ with gr.Row():
+ start_button = gr.Button("Start LoRA Training", variant='primary')
+ stop_button = gr.Button("Interrupt")
+
+ output = gr.Markdown(value="Ready")
+
+ with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'):
+ with gr.Row():
+ with gr.Column():
+ models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True)
+ evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.')
+ with gr.Row():
+ with gr.Column():
+ stride_length = gr.Slider(label='Stride', minimum=1, maximum=2048, value=512, step=1, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
+
+ with gr.Column():
+ max_length = gr.Slider(label='max_length', minimum=0, maximum=8096, value=0, step=1, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
+
+ with gr.Row():
+ start_current_evaluation = gr.Button("Evaluate loaded model")
+ start_evaluation = gr.Button("Evaluate selected models")
+ stop_evaluation = gr.Button("Interrupt")
+
+ with gr.Column():
+ evaluation_log = gr.Markdown(value='')
+
+ evaluation_table = gr.Dataframe(value=generate_markdown_table(), interactive=True)
+ with gr.Row():
+ save_comments = gr.Button('Save comments', elem_classes="small-button")
+ refresh_table = gr.Button('Refresh the table', elem_classes="small-button")
+
+ # Training events
+ all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, higher_rank_limit, warmup_steps, optimizer, hard_cut_string, train_only_after, stop_at_loss, add_eos_token, min_chars, report_to, precize_slicing_overlap, add_eos_token_type, save_steps_under_loss, add_bos_token, training_projection,sliding_window,warmup_ratio,grad_accumulation]
+
+ def fix_old_version(batch_size_val,micro_batch_size_val, grad_accumulation_val):
+ if batch_size_val>0:
+ gradient_acc = batch_size_val // micro_batch_size_val
+ print(f"Using Old version of Batch Size ({batch_size_val}) to set Gradient Accumulation: {gradient_acc}")
+ return gradient_acc
+
+ return grad_accumulation_val
+
+ copy_from.change(do_copy_params, [copy_from] + all_params, all_params).then(fix_old_version,[batch_size,micro_batch_size, grad_accumulation],grad_accumulation)
+ start_button.click(do_train, all_params, output)
+ stop_button.click(do_interrupt, None, None, queue=False)
+ higher_rank_limit.change(change_rank_limit, [higher_rank_limit], [lora_rank, lora_alpha])
+
+ def trigger_stop_at_loss(stop_at_loss_value):
+ non_serialized_params.update({"stop_at_loss": stop_at_loss_value})
+ if non_serialized_params['training_loop']:
+ print(f"Queue: [Stop at loss Change] to {stop_at_loss_value}")
+
+
+ stop_at_loss.change(trigger_stop_at_loss, stop_at_loss, None)
+
+ def trigger_save_checkpoint():
+ non_serialized_params.update({"save_checkpoint_now": True})
+ if non_serialized_params['training_loop']:
+ print("Queue: [Save checkpoint] Checkpoint will be saved after the current step is finished.")
+ else:
+ print("Use during the training to save the checkpoint at any time.")
+
+
+
+ save_chackpoint_now.click(trigger_save_checkpoint, None, None)
+
+ dataset_calc_params = [save_steps,micro_batch_size, epochs, cutoff_len, dataset, format, raw_text_file, warmup_steps, hard_cut_string, min_chars, precize_slicing_overlap,sliding_window,warmup_ratio,grad_accumulation]
+
+ def check_dataset(save_steps:int, micro_batch_size: int, epochs: int, cutoff_len: int, dataset:str, format:str, raw_text_file:str, warmup_steps:int, hard_cut_string:str, min_chars:int, precize_slicing_overlap:bool,sliding_window:bool,warmup_ratio:float,grad_accumulation:int):
+ result = "Specify JSON dastaset or raw text file"
+ total_blocks = 0
+ if shared.tokenizer is None:
+ yield "Tokenizer is not available. Please Load some Model first."
+ return
+
+ if raw_text_file not in ['None', '']:
+ logger.info("Loading raw text file dataset...")
+ fullpath = clean_path('training/datasets', f'{raw_text_file}')
+ fullpath = Path(fullpath)
+ if fullpath.is_dir():
+ logger.info('Training path directory {}'.format(raw_text_file))
+ raw_text = ""
+ file_paths = sorted(fullpath.glob('*.txt'), key=lambda path: natural_keys(path.name))
+ for file_path in file_paths:
+ if file_path.is_file():
+ with file_path.open('r', encoding='utf-8') as file:
+ raw_text += file.read().replace('\r', '')
+
+ logger.info(f"Loaded training file: {file_path.name}")
+ else:
+ with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
+ raw_text = file.read().replace('\r', '')
+
+
+ if min_chars<0:
+ min_chars = 0
+
+ # == New more precise slicing on sentence boundary ==
+ if sliding_window:
+ text_chunks = sliding_block_cut(raw_text, min_chars, False, cutoff_len, hard_cut_string,non_serialized_params['debug_slicer'])
+ else:
+ text_chunks = precise_cut(raw_text, precize_slicing_overlap, min_chars, False, cutoff_len, hard_cut_string,non_serialized_params['debug_slicer'])
+
+ total_blocks = len(text_chunks)
+ result = f"Raw Text: ({raw_text_file}.txt) has {total_blocks} blocks (with cutoff length = {cutoff_len})"
+ del text_chunks
+
+ else:
+ if dataset in ['None', '']:
+ yield "Select dataset or Raw text."
+ return
+
+ if format in ['None', '']:
+ yield "Select format choice for dataset."
+ return
+
+ with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
+ format_data: dict[str, str] = json.load(formatFile)
+
+ def generate_prompt(data_point: dict[str, str]):
+ for options, data in format_data.items():
+ if set(options.split(',')) == set(x[0] for x in data_point.items() if (type(x[1]) is str and len(x[1].strip()) > 0)):
+ for key, val in data_point.items():
+ if type(val) is str:
+ data = data.replace(f'%{key}%', val)
+ return data
+ raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"')
+
+ def tokenize_dummy(prompt):
+
+ input_ids = shared.tokenizer.encode(prompt, truncation=True, max_length=cutoff_len)
+ labels = [1] * len(input_ids)
+ input_ids = torch.tensor(input_ids)
+ return {
+ "input_ids": input_ids,
+ "labels": labels,
+ "attention_mask": input_ids.ne(shared.tokenizer.pad_token_id),
+ }
+
+ def generate_and_tokenize_prompt(data_point):
+ prompt = generate_prompt(data_point)
+ return tokenize_dummy(prompt)
+
+ logger.info("Loading JSON datasets...")
+ data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
+ train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
+ total_blocks = train_data.num_rows
+
+ result = f"Dataset: ({dataset}.json) has {total_blocks} blocks (with cutoff length = {cutoff_len})"
+
+ if total_blocks>0:
+ number_ofSteps = int(math.ceil(total_blocks / micro_batch_size) * epochs)
+ num_stepsPer_epoch = int(math.ceil(number_ofSteps/epochs))
+ min_warm = math.ceil(100 / grad_accumulation)
+
+ warmup_steps_suggest = min(int(min_warm*grad_accumulation), int(math.ceil(number_ofSteps * 0.1)))
+ warmup_steps_suggest = min(warmup_steps_suggest,num_stepsPer_epoch)
+
+ save_each_n_min = int(math.ceil(number_ofSteps/10))
+ save_each_n_max = int(math.ceil(number_ofSteps/5))
+ gradient_accumulation_max = int(total_blocks)//micro_batch_size
+
+ result += f"\n[Batch Size: {micro_batch_size}, Epochs: {epochs}, Gradient Accumulation: {grad_accumulation}]\n"
+ result += f"Total number of steps: {number_ofSteps}\n"
+ result += f"Steps per each Epoch: {num_stepsPer_epoch}\n"
+ result += f"Warmup steps suggestion: {warmup_steps_suggest} (Current: {int(warmup_steps)})\n"
+ result += f"Checkpoint suggestion: Save every {save_each_n_min} - {save_each_n_max} steps (Current: {int(save_steps)})"
+ if gradient_accumulation_max < grad_accumulation:
+ result += f"\n\nWARNING: Gradient Accumulation {grad_accumulation} is too high: It should be below {gradient_accumulation_max}"
+
+
+ yield result
+ return
+
+ check_dataset_btn.click(check_dataset, dataset_calc_params ,check_dataset_txt)
+
+ # Evaluation events. For some reason, the interrupt event
+ # doesn't work with the .then() syntax, so I write them one
+ # by one in this ugly but functional way.
+ ev = start_evaluation.click(calculate_perplexity, [models, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False)
+ start_evaluation.click(generate_markdown_table, None, evaluation_table, show_progress=False)
+
+ start_current_evaluation.click(lambda: ['current model'], None, tmp)
+ ev_cur = start_current_evaluation.click(calculate_perplexity, [tmp, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False)
+ start_current_evaluation.click(generate_markdown_table, None, evaluation_table, show_progress=False)
+
+ stop_evaluation.click(None, None, None, cancels=[ev, ev_cur], queue=False)
+ refresh_table.click(generate_markdown_table, None, evaluation_table, show_progress=True)
+ save_comments.click(
+ save_past_evaluations, evaluation_table, None).then(
+ lambda: "Comments saved.", None, evaluation_log, show_progress=False)
+
+ def reload_lora():
+ return gr.Dropdown.update(choices=get_available_loras_local(non_serialized_params['Lora_sortedByTime']))
+
+ # nonserialized items
+
+ sort_byTime.change(lambda x: non_serialized_params.update({"Lora_sortedByTime": x}), sort_byTime, None).then(reload_lora,None,copy_from)
+ #debug_slicer.change(lambda x: non_serialized_params.update({"debug_slicer": x}), debug_slicer, None)
+
+
+def do_interrupt():
+ global WANT_INTERRUPT
+ WANT_INTERRUPT = True
+
+
+def do_copy_params(lora_name: str, *args):
+ f_name = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}/training_parameters.json"
+ if Path(f_name).is_file():
+ with open(f_name, 'r', encoding='utf-8') as format_file:
+ params: dict[str, str] = json.load(format_file)
+ else:
+ params = {}
+
+ result = list()
+ for i in range(0, len(PARAMETERS)):
+ key = PARAMETERS[i]
+ if key in params:
+ result.append(params[key])
+ else:
+ result.append(args[i])
+
+ return result
+
+
+def change_rank_limit(use_higher_ranks: bool):
+ mult = 2 if use_higher_ranks else 1
+ return {"maximum": 1024 * mult, "__type__": "update"}, {"maximum": 2048 * mult, "__type__": "update"}
+
+
+def clean_path(base_path: str, path: str):
+ """Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
+ path = path.replace('\\', '/').replace('..', '_')
+ if base_path is None:
+ return path
+
+ return f'{Path(base_path).absolute()}/{path}'
+
+
+def backup_adapter(input_folder):
+ # Get the creation date of the file adapter_model.bin
+ try:
+ adapter_file = Path(f"{input_folder}/adapter_model.bin")
+ if adapter_file.is_file():
+
+ logger.info("Backing up existing LoRA adapter...")
+ creation_date = datetime.fromtimestamp(adapter_file.stat().st_ctime)
+ creation_date_str = creation_date.strftime("Backup-%Y-%m-%d")
+
+ # Create the new subfolder
+ subfolder_path = Path(f"{input_folder}/{creation_date_str}")
+ subfolder_path.mkdir(parents=True, exist_ok=True)
+
+ # Check if the file already exists in the subfolder
+ backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin")
+ if backup_adapter_file.is_file():
+ print(" - Backup already exists. Skipping backup process.")
+ return
+
+ # Copy existing files to the new subfolder
+ existing_files = Path(input_folder).iterdir()
+ for file in existing_files:
+ if file.is_file():
+ shutil.copy2(file, subfolder_path)
+ except Exception as e:
+ print("An error occurred in backup_adapter:", str(e))
+
+
+def calc_trainable_parameters(model):
+ trainable_params = 0
+ all_param = 0
+ for _, param in model.named_parameters():
+ num_params = param.numel()
+ # if using DS Zero 3 and the weights are initialized empty
+ if num_params == 0 and hasattr(param, "ds_numel"):
+ num_params = param.ds_numel
+
+ all_param += num_params
+ if param.requires_grad:
+ trainable_params += num_params
+
+ return trainable_params, all_param
+
+
+def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int):
+
+ if shared.args.monkey_patch:
+ from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
+ replace_peft_model_with_int4_lora_model
+ )
+ replace_peft_model_with_int4_lora_model()
+
+ global WANT_INTERRUPT
+ WANT_INTERRUPT = False
+
+ # == Input validation / processing ==
+ yield "Preparing the input..."
+ lora_file_path = clean_path(None, lora_name)
+ if lora_file_path.strip() == '':
+ yield "Missing or invalid LoRA file name input."
+ return
+
+ lora_file_path = f"{Path(shared.args.lora_dir)}/{lora_file_path}"
+ actual_lr = float(learning_rate)
+ model_type = type(shared.model).__name__
+
+ if model_type in MODEL_CLASSES:
+ model_id = MODEL_CLASSES[model_type]
+ else:
+ model_id = "llama"
+ if model_type == "PeftModelForCausalLM":
+ if len(shared.lora_names) > 0:
+ yield "You are trying to train a LoRA while you already have another LoRA loaded. This will work, but may have unexpected effects. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
+ logger.warning("Training LoRA over top of another LoRA. May have unexpected effects.")
+ else:
+ yield "Model ID not matched due to LoRA loading. Consider reloading base model. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
+ logger.warning("Model ID not matched due to LoRA loading. Consider reloading base model.")
+ else:
+ yield "LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. Unexpected errors may follow. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
+ logger.warning(f"LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. (Found model type: {model_type})")
+
+ time.sleep(5)
+
+ if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch:
+ yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`"
+ return
+
+ if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
+ yield "Cannot input zeroes."
+ return
+
+ #in new version we dumped this in favor of grad_accumulation
+ #set it to zero fo new save
+ batch_size = 0
+
+ gradient_accumulation_steps = grad_accumulation #batch_size // micro_batch_size
+ shared.tokenizer.pad_token_id = 0
+ shared.tokenizer.padding_side = "left"
+
+ def encode(text, prepend_bos_token):
+
+ result = shared.tokenizer.encode(text, truncation=True, max_length=cutoff_len)
+ # Check if the first two tokens are BOS
+ if len(result) >= 2 and result[:2] == [shared.tokenizer.bos_token_id, shared.tokenizer.bos_token_id]:
+ result = result[1:]
+
+ if not prepend_bos_token and result[0] == shared.tokenizer.bos_token_id:
+ result = result[1:]
+ return result
+
+ def tokenize(prompt, append_eos_token=False, prepend_bos_token = False):
+
+ if train_only_after == '' or train_only_after not in prompt:
+ input_ids = encode(prompt, prepend_bos_token)
+
+ if append_eos_token and input_ids[-1] != shared.tokenizer.eos_token_id and len(input_ids) < cutoff_len:
+ input_ids.append(shared.tokenizer.eos_token_id)
+
+ input_ids = [shared.tokenizer.pad_token_id] * (cutoff_len - len(input_ids)) + input_ids
+
+ labels = [1] * len(input_ids)
+ else:
+ ind = prompt.index(train_only_after) + len(train_only_after)
+ before_tokens = encode(prompt[:ind], prepend_bos_token)
+ after_tokens = encode(prompt[ind:], False)
+
+ if append_eos_token and after_tokens[-1] != shared.tokenizer.eos_token_id:
+ after_tokens.append(shared.tokenizer.eos_token_id)
+
+ full_length = len(after_tokens) + len(before_tokens)
+ if full_length > cutoff_len:
+ after_tokens = after_tokens[:cutoff_len - len(before_tokens)]
+ else:
+ before_tokens = [shared.tokenizer.pad_token_id] * (cutoff_len - full_length) + before_tokens
+
+ input_ids = before_tokens + after_tokens
+ labels = [-100] * len(before_tokens) + [1] * len(after_tokens)
+
+ input_ids = torch.tensor(input_ids)
+ return {
+ "input_ids": input_ids,
+ "labels": labels,
+ "attention_mask": input_ids.ne(shared.tokenizer.pad_token_id),
+ }
+
+ train_template.clear()
+
+
+
+ print(f"*** LoRA: {lora_name} ***")
+ non_serialized_params.update({"stop_at_loss": stop_at_loss})
+ non_serialized_params.update({"save_steps_under_loss": save_steps_under_loss+0.01})
+ non_serialized_params.update({"save_checkpoint_now": False})
+ non_serialized_params.update({"training_loop": False})
+ non_serialized_params.update({"current_stability": 0})
+
+ # END OF FPHAM SENTENCE SPLIT functions ===================
+
+ # == Prep the dataset, format, etc ==
+ if raw_text_file not in ['None', '']:
+ train_template["template_type"] = "raw_text"
+ logger.info("Loading raw text file dataset...")
+ fullpath = clean_path('training/datasets', f'{raw_text_file}')
+ fullpath = Path(fullpath)
+ if fullpath.is_dir():
+ logger.info('Training path directory {}'.format(raw_text_file))
+ raw_text = ""
+ file_paths = sorted(fullpath.glob('*.txt'), key=lambda path: natural_keys(path.name))
+ for file_path in file_paths:
+ if file_path.is_file():
+ with file_path.open('r', encoding='utf-8') as file:
+ raw_text += file.read().replace('\r', '')
+
+ logger.info(f"Loaded training file: {file_path.name}")
+ else:
+ with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
+ raw_text = file.read().replace('\r', '')
+
+ # FPHAM PRECISE SLICING
+ if min_chars<0:
+ min_chars = 0
+
+ add_EOS_to_all = add_eos_token and add_eos_token_type == 'Every Block'
+ add_EOS_to_HC = add_eos_token and add_eos_token_type != 'Every Block'
+
+ #print (f"add_eos_token {add_eos_token}, add_EOS_to_all {add_EOS_to_all}, add_EOS_to_HC {add_EOS_to_HC}")
+
+ # == New more precise slicing on sentence boundary ==
+ if sliding_window:
+ text_chunks = sliding_block_cut(raw_text, min_chars, add_EOS_to_HC, cutoff_len, hard_cut_string,non_serialized_params['debug_slicer'])
+ else:
+ text_chunks = precise_cut(raw_text, precize_slicing_overlap, min_chars, add_EOS_to_HC, cutoff_len, hard_cut_string,non_serialized_params['debug_slicer'])
+
+ train_data = Dataset.from_list([tokenize(x, add_EOS_to_all, add_bos_token) for x in text_chunks])
+ if add_EOS_to_all:
+ print(f"Added EOS to {len(text_chunks)} blocks")
+
+ print(f"All Data Blocks: {len(text_chunks)}")
+
+ del text_chunks
+ eval_data = None
+ else:
+ if dataset in ['None', '']:
+ yield "Missing dataset choice input, cannot continue."
+ return
+
+ if format in ['None', '']:
+ yield "Missing format choice input, cannot continue."
+ return
+
+ train_template["template_type"] = "dataset"
+
+ with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
+ format_data: dict[str, str] = json.load(formatFile)
+
+ # == store training prompt ==
+ for _, value in format_data.items():
+ prompt_key = f"template_{len(train_template)}"
+ train_template[prompt_key] = value
+
+ def generate_prompt(data_point: dict[str, str]):
+ for options, data in format_data.items():
+ if set(options.split(',')) == set(x[0] for x in data_point.items() if (type(x[1]) is str and len(x[1].strip()) > 0)):
+ for key, val in data_point.items():
+ if type(val) is str:
+ data = data.replace(f'%{key}%', val)
+ return data
+ raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"')
+
+ def generate_and_tokenize_prompt(data_point):
+ prompt = generate_prompt(data_point)
+ return tokenize(prompt, add_eos_token, add_bos_token)
+
+ logger.info("Loading JSON datasets...")
+ data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
+ train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
+
+ print(f"BOS: {add_bos_token} EOS: {add_eos_token}")
+ print(f"Data Blocks: {train_data.num_rows}")
+
+ if eval_dataset == 'None':
+ eval_data = None
+ else:
+ eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
+ eval_data = eval_data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
+
+ # == We MUST reload model if it went through any previous training, even failed one ==
+ if shared.model_dirty_from_training:
+ selected_model = shared.model_name
+ if selected_model:
+ print("\033[1;31;1m(Model has been modified by previous training, it needs to be reloaded...)\033[0;37;0m")
+ try:
+ yield f"Reloading {selected_model}..."
+ reload_model()
+ if shared.model is not None:
+ print("Model reloaded OK, continue with training.")
+ else:
+ return f"Failed to load {selected_model}."
+ except:
+ exc = traceback.format_exc()
+ logger.error('Failed to reload the model.')
+ print(exc)
+ return exc.replace('\n', '\n\n')
+
+ # == Start prepping the model itself ==
+ if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
+ logger.info("Getting model ready...")
+ prepare_model_for_kbit_training(shared.model)
+
+ # base model is now frozen and should not be reused for any other LoRA training than this one
+ shared.model_dirty_from_training = True
+ if training_projection==train_choices[0]:
+ model_to_lora_modules["llama"] = ["gate_proj","down_proj","up_proj","q_proj","k_proj","v_proj","o_proj"]
+ elif training_projection==train_choices[1]:
+ model_to_lora_modules["llama"] = ["q_proj","k_proj", "v_proj", "o_proj"]
+ elif training_projection==train_choices[2]:
+ model_to_lora_modules["llama"] = ["q_proj","k_proj", "v_proj"]
+ elif training_projection==train_choices[3]:
+ model_to_lora_modules["llama"] = ["k_proj", "v_proj", "down_proj"]
+ else:
+ model_to_lora_modules["llama"] = ["q_proj", "v_proj"]
+
+
+ logger.info("Preparing for training...")
+ config = LoraConfig(
+ r=lora_rank,
+ lora_alpha=lora_alpha,
+ target_modules=model_to_lora_modules[model_id],
+ lora_dropout=lora_dropout,
+ bias="none",
+ task_type="CAUSAL_LM"
+ )
+
+ # == Backup the existing adapter ==
+ if not always_override:
+ backup_adapter(lora_file_path)
+
+ # == get model trainable params
+ model_trainable_params, model_all_params = calc_trainable_parameters(shared.model)
+
+ try:
+ logger.info("Creating LoRA model...")
+ lora_model = get_peft_model(shared.model, config)
+ if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
+ logger.info("Loading existing LoRA data...")
+ state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
+ set_peft_model_state_dict(lora_model, state_dict_peft)
+ except:
+ yield traceback.format_exc().replace('\n', '\n\n')
+ return
+
+ if shared.args.monkey_patch:
+ from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear
+ from alpaca_lora_4bit.models import Linear4bitLt
+ for _, m in lora_model.named_modules():
+ if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
+ if m.is_v1_model:
+ m.zeros = m.zeros.half()
+ m.scales = m.scales.half()
+
+ class Tracked():
+ def __init__(self):
+ self.current_steps = 0
+ self.max_steps = 0
+ self.did_save = False
+
+ tracked = Tracked()
+ actual_save_steps = math.ceil(save_steps / gradient_accumulation_steps)
+
+ class Callbacks(transformers.TrainerCallback):
+ def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
+ tracked.current_steps = state.global_step * gradient_accumulation_steps
+ tracked.max_steps = state.max_steps * gradient_accumulation_steps
+ if WANT_INTERRUPT:
+ control.should_epoch_stop = True
+ control.should_training_stop = True
+ else:
+ current_loss = float(train_log.get('loss', 0.0))
+ current_epoch = float(train_log.get('epoch', 0.0))
+
+ force_save = False
+
+ folder_save = f"checkpoint-{tracked.current_steps}"
+
+ if non_serialized_params['save_checkpoint_now']:
+ force_save = True
+ non_serialized_params.update({"save_checkpoint_now": False})
+ print(f"\033[1;31;1mSave Checkpoint manually trigerred.\033[0;37;0m")
+ folder_save = f"checkpoint-{tracked.current_steps}-user"
+
+ patience = 3 # Set the number of consecutive steps for tracking stability
+
+ if gradient_accumulation_steps==1:
+ patience = 5
+
+ min_steps = 10
+
+ if current_loss < non_serialized_params['save_steps_under_loss'] and current_loss > 0 and state.global_step > min_steps:
+ current_stability = non_serialized_params['current_stability']
+ current_stability += 1
+ non_serialized_params.update({"current_stability": current_stability})
+
+ if current_stability >= patience:
+ current_stability = 0
+ non_serialized_params.update({"current_stability": current_stability})
+ current_loss_dec = round(current_loss, 2)
+ loss_str = f"{current_loss_dec:.2f}"
+ loss_str = loss_str.replace('.', '_')
+ new_save = (current_loss_dec-0.1) + 0.01
+ non_serialized_params.update({"save_steps_under_loss": new_save})
+
+ folder_save = f"checkpoint-{tracked.current_steps}-loss-{loss_str}"
+ force_save = True
+
+
+ else:
+ # Reset stability if the loss goes above the threshold
+ non_serialized_params.update({"current_stability": 0})
+
+ if state.global_step > 0 and actual_save_steps > 0 and state.global_step % actual_save_steps == 0:
+ folder_save = f"checkpoint-{tracked.current_steps}"
+ force_save = True
+
+ if force_save:
+ lora_model.save_pretrained(f"{lora_file_path}/{folder_save}/")
+ print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m Saved: [{folder_save}]")
+ # Save log
+ with open(f"{lora_file_path}/{folder_save}/training_log.json", 'w', encoding='utf-8') as file:
+ json.dump(train_log, file, indent=2)
+ # == Save training prompt ==
+ with open(f"{lora_file_path}/{folder_save}/training_prompt.json", 'w', encoding='utf-8') as file:
+ json.dump(train_template, file, indent=2)
+
+
+ def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
+ tracked.current_steps += 1
+ if WANT_INTERRUPT:
+ control.should_epoch_stop = True
+ control.should_training_stop = True
+
+ def on_log(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, logs, **kwargs):
+ train_log.update(logs)
+ train_log.update({"current_steps": tracked.current_steps})
+ if WANT_INTERRUPT:
+ print("\033[1;31;1mInterrupted by user\033[0;37;0m")
+
+ print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m", end='')
+
+ entry = {
+ 'current_steps': int(train_log.get('current_steps',0)),
+ 'loss': float(train_log.get('loss', 0.0)),
+ 'learning_rate': float(train_log.get('learning_rate', 0.0)),
+ 'epoch': float(train_log.get('epoch', 0.0))
+ }
+
+ # Add the entry to the continuous log
+ train_log_graph.append(entry)
+
+ # Save the graph log for now, we can later generate full graph
+ with open(f"{lora_file_path}/training_graph.json", 'w') as file:
+ json.dump(train_log_graph, file, indent=4)
+
+ if 'loss' in logs:
+ loss = float(logs['loss'])
+ if loss <= stop_at_loss:
+ control.should_epoch_stop = True
+ control.should_training_stop = True
+ print(f"\033[1;31;1mStop Loss {stop_at_loss} reached.\033[0;37;0m")
+
+ # FPHAM SAMPLE REQ Transformers error handling
+ gradient_accumulation_max = int(train_data.num_rows)//micro_batch_size
+
+ if gradient_accumulation_max < gradient_accumulation_steps:
+ print(f"\033[1;31;1mWARNING: Current gradient accumulation is too high for the amount of training data.\033[0;37;0m")
+ print(f"Gradient accumulation: {gradient_accumulation_steps} should be less than: {gradient_accumulation_max}. \033[1;31;1mThis could crash Accelerate/Transformers\033[0;37;0m")
+ #min_batchSize = sample_req*micro_batch_size
+ print(f"Preferable fix: \033[1;31;1mIncrease the size of dataset\033[0;37;0m")
+ print(f"... or Decrerase Gradient Accumulation \033[1;31;1m{gradient_accumulation_steps}\033[0;37;0m to below {gradient_accumulation_max}")
+ gradient_accumulation_steps = max(1,gradient_accumulation_max-1)
+ print(f"Last resort fix for this run: Lowering Gradient accumulation to {gradient_accumulation_steps}. [Good luck]")
+
+ else:
+ print(f"Data Size Check: Gradient accumulation: {gradient_accumulation_steps} <= Blocks/Batch {gradient_accumulation_max} ... [OK]")
+
+ #END OF FPHAM SAMPLE REQ
+
+ # FPHAM Custom Scheduler ==
+ custom_scheduller = False
+ lr_scheduler_type_arg = lr_scheduler_type
+
+ if lr_scheduler_type == 'FP_low_epoch_annealing':
+ custom_scheduller = True
+ lr_scheduler_type_arg = 'cosine'
+ elif lr_scheduler_type == 'FP_half_time_annealing':
+ custom_scheduller = True
+ lr_scheduler_type_arg = 'constant'
+
+ args=transformers.TrainingArguments(
+ report_to=report_to if report_to != "None" else None,
+ per_device_train_batch_size=micro_batch_size,
+ gradient_accumulation_steps=gradient_accumulation_steps,
+ warmup_steps=math.ceil(warmup_steps / gradient_accumulation_steps),
+ warmup_ratio = warmup_ratio,
+ num_train_epochs=epochs,
+ learning_rate=actual_lr,
+ fp16=False if shared.args.cpu else True,
+ optim=optimizer,
+ logging_steps=1,
+ evaluation_strategy="steps" if eval_data is not None else "no",
+ eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,
+ save_strategy="steps" if eval_data is not None else "no",
+ output_dir=lora_file_path,
+ lr_scheduler_type=lr_scheduler_type_arg,
+ load_best_model_at_end=eval_data is not None,
+ # TODO: Enable multi-device support
+ ddp_find_unused_parameters=None,
+ no_cuda=shared.args.cpu,
+ )
+
+ if custom_scheduller:
+ trainer = FPSchedulerTrainer(
+ model=lora_model,
+ train_dataset=train_data,
+ eval_dataset=eval_data,
+ args=args,
+ data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
+ callbacks=list([Callbacks()])
+ )
+ else:
+ trainer = transformers.Trainer(
+ model=lora_model,
+ train_dataset=train_data,
+ eval_dataset=eval_data,
+ args=args,
+ data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
+ callbacks=list([Callbacks()])
+ )
+
+ # END OF FPHAM CUSTOM SCHEDULER
+
+ lora_model.config.use_cache = False
+
+ if torch.__version__ >= "2" and sys.platform != "win32":
+ lora_model = torch.compile(lora_model)
+
+ # == Save parameters for reuse ==
+ with open(f"{lora_file_path}/training_parameters.json", 'w', encoding='utf-8') as file:
+ vars = locals()
+ json.dump({x: vars[x] for x in PARAMETERS}, file, indent=2)
+
+ # == Save training prompt ==
+ with open(f"{lora_file_path}/training_prompt.json", 'w', encoding='utf-8') as file:
+ json.dump(train_template, file, indent=2)
+
+ # == Main run and monitor loop ==
+ logger.info("Starting training...")
+ yield "Starting..."
+
+ lora_trainable_param, lora_all_param = calc_trainable_parameters(lora_model)
+
+ projections_string = ", ".join([projection.replace("_proj", "") for projection in model_to_lora_modules[model_id]])
+
+ print(f"Training '{model_id}' model using ({projections_string}) projections")
+
+ if lora_all_param > 0:
+ print(f"Trainable params: {lora_trainable_param:,d} ({100 * lora_trainable_param / lora_all_param:.4f} %), All params: {lora_all_param:,d} (Model: {model_all_params:,d})")
+
+ train_log.update({"base_model_name": shared.model_name})
+ train_log.update({"base_model_class": shared.model.__class__.__name__})
+ train_log.update({"base_loaded_in_4bit": getattr(lora_model, "is_loaded_in_4bit", False)})
+ train_log.update({"base_loaded_in_8bit": getattr(lora_model, "is_loaded_in_8bit", False)})
+ train_log.update({"projections": projections_string})
+
+ if stop_at_loss > 0:
+ print(f"Monitoring loss \033[1;31;1m(Auto-Stop at: {stop_at_loss})\033[0;37;0m")
+
+ if WANT_INTERRUPT:
+ yield "Interrupted before start."
+ return
+
+ def log_train_dataset(trainer):
+ decoded_entries = []
+ # Try to decode the entries and write the log file
+ try:
+ # Iterate over the first 10 elements in the dataset (or fewer if there are less than 10)
+ for i in range(min(10, len(trainer.train_dataset))):
+ decoded_text = shared.tokenizer.decode(trainer.train_dataset[i]['input_ids'])
+ decoded_entries.append({"value": decoded_text})
+
+ # Write the log file
+ Path('logs').mkdir(exist_ok=True)
+ with open(Path('logs/train_dataset_sample.json'), 'w') as json_file:
+ json.dump(decoded_entries, json_file, indent=4)
+
+ logger.info("Log file 'train_dataset_sample.json' created in the 'logs' directory.")
+ except Exception as e:
+ logger.error(f"Failed to create log file due to error: {e}")
+
+ def threaded_run():
+ log_train_dataset(trainer)
+ trainer.train()
+ # Note: save in the thread in case the gradio thread breaks (eg browser closed)
+ lora_model.save_pretrained(lora_file_path)
+ logger.info("LoRA training run is completed and saved.")
+ # Save log
+ with open(f"{lora_file_path}/training_log.json", 'w', encoding='utf-8') as file:
+ json.dump(train_log, file, indent=2)
+
+ thread = threading.Thread(target=threaded_run)
+ thread.start()
+ last_step = 0
+ start_time = time.perf_counter()
+
+ while thread.is_alive():
+ time.sleep(0.5)
+ if WANT_INTERRUPT:
+ yield "Interrupting, please wait... *(Run will stop after the current training step completes.)*"
+
+ elif tracked.current_steps != last_step:
+ last_step = tracked.current_steps
+ time_elapsed = time.perf_counter() - start_time
+ lastloss = float(train_log.get('loss', 0.0))
+
+ non_serialized_params.update({"training_loop": True})
+
+ if lastloss > 0:
+ lastloss_str = f", ... Current Loss: `{lastloss:.2f}`"
+ else:
+ lastloss_str = ""
+
+ if time_elapsed <= 0:
+ timer_info = ""
+ total_time_estimate = 999
+ else:
+ its = tracked.current_steps / time_elapsed
+ if its > 1:
+ timer_info = f"`{its:.2f}` it/s"
+ else:
+ timer_info = f"`{1.0/its:.2f}` s/it"
+
+ total_time_estimate = (1.0 / its) * (tracked.max_steps)
+
+ if stop_at_loss != non_serialized_params['stop_at_loss']:
+ stop_at_loss = non_serialized_params['stop_at_loss']
+ print(f"Stop at loss changed \033[1;31;1m(Auto-Stop at: {stop_at_loss})\033[0;37;0m")
+
+ yield f"Running... **{tracked.current_steps}** / **{tracked.max_steps}** ... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining {lastloss_str}"
+
+ # Saving in the train thread might fail if an error occurs, so save here if so.
+
+ non_serialized_params.update({"training_loop": False})
+
+ if not tracked.did_save:
+ logger.info("Training complete, saving...")
+ lora_model.save_pretrained(lora_file_path)
+
+ if WANT_INTERRUPT:
+ logger.info("Training interrupted.")
+ yield f"Interrupted by user. LoRA saved to `{lora_file_path}`."
+ else:
+ logger.info("Training complete!")
+ yield f"Done! LoRA saved to `{lora_file_path}`.\n\nBefore testing your new LoRA, make sure to first reload the model, as it is currently dirty from training."
+
+ create_graph(lora_file_path, lora_name)
+
+def format_time(seconds: float):
+ if seconds < 120:
+ return f"`{seconds:.0f}` seconds"
+
+ minutes = seconds / 60
+ if minutes < 120:
+ return f"`{minutes:.0f}` minutes"
+
+ hours = minutes / 60
+ return f"`{hours:.0f}` hours"
diff --git a/extensions/Training_PRO/train_utils.py b/extensions/Training_PRO/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a11e3a4a958a9660e5eb08a04a607804c7c8422
--- /dev/null
+++ b/extensions/Training_PRO/train_utils.py
@@ -0,0 +1,279 @@
+import os
+from modules import shared, utils
+from pathlib import Path
+import json
+
+def list_subfoldersByTime(directory):
+
+ if not directory.endswith('/'):
+ directory += '/'
+ subfolders = []
+ path = directory
+ name_list = os.listdir(path)
+ full_list = [os.path.join(path,i) for i in name_list]
+ time_sorted_list = sorted(full_list, key=os.path.getmtime,reverse=True)
+
+ for entry in time_sorted_list:
+ if os.path.isdir(entry):
+ entry_str = f"{entry}" # Convert entry to a string
+ full_path = entry_str
+ entry_str = entry_str.replace('\\','/')
+ entry_str = entry_str.replace(f"{directory}", "") # Remove directory part
+ subfolders.append(entry_str)
+
+ return subfolders
+
+def get_available_loras_local(_sortedByTime):
+
+ model_dir = shared.args.lora_dir # Update with the appropriate directory path
+ subfolders = []
+ if _sortedByTime:
+ subfolders = list_subfoldersByTime(model_dir)
+ else:
+ subfolders = utils.get_available_loras()
+
+ return subfolders
+
+
+# FPHAM SPLIT BY SENTENCE BLOCK ===============
+
+def split_sentences(text: str, cutoff_len: int):
+ sentences = []
+ sentence = ''
+ delimiters = ['. ', '? ', '! ', '... ', '.\n', '?\n', '!\n','...\n','','/>']
+ abbreviations = ['Mr. ', 'Mrs. ', 'Dr. ', 'Ms. ', 'St. ', 'Prof. ', 'Jr. ', 'Ltd. ', 'Capt. ', 'Col. ', 'Gen. ', 'Ave. ', 'Blvd. ', 'Co. ', 'Corp. ', 'Dept. ', 'Est. ', 'Gov. ', 'Inc. ', 'Ph.D. ', 'Univ. ']
+ errors = 0
+ max_cut = cutoff_len-1
+ prev_char = ''
+
+ for char in text:
+ sentence += char
+
+
+ if (any(sentence.endswith(delimiter) for delimiter in delimiters) and
+ not (prev_char.isupper() and len(sentence) >= 3 and sentence[-3] != ' ') and
+ not any(sentence.endswith(abbreviation) for abbreviation in abbreviations)):
+ tokens = shared.tokenizer.encode(sentence)
+
+ if len(tokens) > max_cut:
+ tokens = tokens[:max_cut]
+ sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)
+ errors = errors + 1
+
+ sentences.append({'text': sentence, 'size': len(tokens)})
+
+ sentence = ''
+
+ prev_char = char
+
+ if sentence:
+ tokens = shared.tokenizer.encode(sentence)
+ if len(tokens) > max_cut:
+ tokens = tokens[:max_cut]
+ sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)
+ errors = errors + 1
+
+ sentences.append({'text': sentence, 'size': len(tokens)})
+
+ if errors > 0:
+ print(f"Trimmed sentences beyond Cutoff Length: {errors}")
+
+ return sentences
+
+# The goal of following code is to create blocks of text + overlapping blocks while:
+# respects sentence boundaries
+# always uses all the text
+# hard cut defined by hard_cut_string or will always end at the end of data block
+# no overlapping blocks will be created across hard cut or across token
+
+def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool):
+
+ EOSX_str = '/>' #hardcut placeholder
+ EOS_str = ''
+ print("Precise raw text slicer: ON")
+
+ cut_string = hard_cut_string.replace('\\n', '\n')
+ text = text.replace(cut_string, EOSX_str)
+ sentences = split_sentences(text, cutoff_len)
+
+ print(f"Sentences: {len(sentences)}")
+ sentencelist = []
+ currentSentence = ''
+ totalLength = 0
+ max_cut = cutoff_len-1
+ half_cut = cutoff_len//2
+ halfcut_length = 0
+
+ edgeindex = []
+ half_index = 0
+
+ for index, item in enumerate(sentences):
+
+ if halfcut_length+ item['size'] < half_cut:
+ halfcut_length += item['size']
+ half_index = index
+ else:
+ edgeindex.append(half_index)
+ halfcut_length = -2 * max_cut
+
+
+ if totalLength + item['size'] < max_cut and not currentSentence.endswith(EOSX_str):
+ currentSentence += item['text']
+ totalLength += item['size']
+ else:
+
+ if len(currentSentence.strip()) > min_chars_cut:
+ sentencelist.append(currentSentence.strip())
+
+ currentSentence = item['text']
+ totalLength = item['size']
+ halfcut_length = item['size']
+
+ if len(currentSentence.strip()) > min_chars_cut:
+ sentencelist.append(currentSentence.strip())
+
+ unique_blocks = len(sentencelist)
+ print(f"Text Blocks: {unique_blocks}")
+
+ #overlap strategies:
+ # don't overlap across HARD CUT (EOSX)
+ if overlap:
+ for edge_idx in edgeindex:
+ currentSentence = ''
+ totalLength = 0
+
+ for item in sentences[edge_idx:]:
+ if totalLength + item['size'] < max_cut:
+ currentSentence += item['text']
+ totalLength += item['size']
+ else:
+ #if by chance EOSX is at the end then it's acceptable
+ if currentSentence.endswith(EOSX_str) and len(currentSentence.strip()) > min_chars_cut:
+ sentencelist.append(currentSentence.strip())
+ # otherwise don't cross hard cut
+ elif EOSX_str not in currentSentence and len(currentSentence.strip()) > min_chars_cut:
+ sentencelist.append(currentSentence.strip())
+
+ currentSentence = ''
+ totalLength = 0
+ break
+
+ print(f"+ Overlapping blocks: {len(sentencelist)-unique_blocks}")
+
+ num_EOS = 0
+ for i in range(len(sentencelist)):
+ if eos_to_hc:
+ sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str)
+ else:
+ sentencelist[i] = sentencelist[i].replace(EOSX_str, '')
+
+ #someone may have had stop strings in the raw text...
+ sentencelist[i] = sentencelist[i].replace("", EOS_str)
+ num_EOS += sentencelist[i].count(EOS_str)
+
+ if num_EOS > 0:
+ print(f"+ EOS count: {num_EOS}")
+
+ #final check for useless lines
+ sentencelist = [item for item in sentencelist if item.strip() != ""]
+ sentencelist = [item for item in sentencelist if item.strip() != ""]
+
+
+ if debug_slicer:
+ # Write the log file
+ Path('logs').mkdir(exist_ok=True)
+ sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
+ output_file = "logs/sentencelist.json"
+ with open(output_file, 'w') as f:
+ json.dump(sentencelist_dict, f,indent=2)
+
+ print("Saved sentencelist.json in logs folder")
+
+ return sentencelist
+
+
+def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool):
+
+ EOSX_str = '/>' #hardcut placeholder
+ EOS_str = ''
+ print("Mega Block Overlap: ON")
+
+ cut_string = hard_cut_string.replace('\\n', '\n')
+ text = text.replace(cut_string, EOSX_str)
+ sentences = split_sentences(text, cutoff_len)
+
+ print(f"Sentences: {len(sentences)}")
+ sentencelist = []
+
+ max_cut = cutoff_len-1
+
+ #print(f"max_cut: {max_cut}")
+ advancing_to = 0
+
+ prev_block_lastsentence = ""
+
+
+ for i in range(len(sentences)):
+ totalLength = 0
+ currentSentence = ''
+ lastsentence = ""
+
+ if i >= advancing_to:
+ for k in range(i, len(sentences)):
+
+ current_length = sentences[k]['size']
+
+ if totalLength + current_length <= max_cut and not currentSentence.endswith(EOSX_str):
+ currentSentence += sentences[k]['text']
+ totalLength += current_length
+ lastsentence = sentences[k]['text']
+ else:
+ if len(currentSentence.strip()) > min_chars_cut:
+ if prev_block_lastsentence!=lastsentence:
+ sentencelist.append(currentSentence.strip())
+ prev_block_lastsentence = lastsentence
+
+ advancing_to = 0
+ if currentSentence.endswith(EOSX_str):
+ advancing_to = k
+
+ currentSentence = ""
+ totalLength = 0
+ break
+
+ if currentSentence != "":
+ if len(currentSentence.strip()) > min_chars_cut:
+ sentencelist.append(currentSentence.strip())
+
+ unique_blocks = len(sentencelist)
+ print(f"Text Blocks: {unique_blocks}")
+ num_EOS = 0
+ for i in range(len(sentencelist)):
+ if eos_to_hc:
+ sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str)
+ else:
+ sentencelist[i] = sentencelist[i].replace(EOSX_str, '')
+
+ #someone may have had stop strings in the raw text...
+ sentencelist[i] = sentencelist[i].replace("", EOS_str)
+ num_EOS += sentencelist[i].count(EOS_str)
+
+ if num_EOS > 0:
+ print(f"+ EOS count: {num_EOS}")
+
+ #final check for useless lines
+ sentencelist = [item for item in sentencelist if item.strip() != ""]
+ sentencelist = [item for item in sentencelist if item.strip() != ""]
+
+
+ if debug_slicer:
+ # Write the log file
+ Path('logs').mkdir(exist_ok=True)
+ sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
+ output_file = "logs/sentencelist.json"
+ with open(output_file, 'w') as f:
+ json.dump(sentencelist_dict, f,indent=2)
+
+ print("Saved sentencelist.json in logs folder")
+
+ return sentencelist
diff --git a/extensions/api/blocking_api.py b/extensions/api/blocking_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d5850cfc35019d07f5437b0acfb14f287800d4e
--- /dev/null
+++ b/extensions/api/blocking_api.py
@@ -0,0 +1,232 @@
+import json
+import ssl
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from threading import Thread
+
+from extensions.api.util import build_parameters, try_start_cloudflared
+from modules import shared
+from modules.chat import generate_chat_reply
+from modules.LoRA import add_lora_to_model
+from modules.models import load_model, unload_model
+from modules.models_settings import get_model_metadata, update_model_parameters
+from modules.text_generation import (
+ encode,
+ generate_reply,
+ stop_everything_event
+)
+from modules.utils import get_available_models
+from modules.logging_colors import logger
+
+
+def get_model_info():
+ return {
+ 'model_name': shared.model_name,
+ 'lora_names': shared.lora_names,
+ # dump
+ 'shared.settings': shared.settings,
+ 'shared.args': vars(shared.args),
+ }
+
+
+class Handler(BaseHTTPRequestHandler):
+ def do_GET(self):
+ if self.path == '/api/v1/model':
+ self.send_response(200)
+ self.end_headers()
+ response = json.dumps({
+ 'result': shared.model_name
+ })
+
+ self.wfile.write(response.encode('utf-8'))
+ else:
+ self.send_error(404)
+
+ def do_POST(self):
+ content_length = int(self.headers['Content-Length'])
+ body = json.loads(self.rfile.read(content_length).decode('utf-8'))
+
+ if self.path == '/api/v1/generate':
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.end_headers()
+
+ prompt = body['prompt']
+ generate_params = build_parameters(body)
+ stopping_strings = generate_params.pop('stopping_strings')
+ generate_params['stream'] = False
+
+ generator = generate_reply(
+ prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)
+
+ answer = ''
+ for a in generator:
+ answer = a
+
+ response = json.dumps({
+ 'results': [{
+ 'text': answer
+ }]
+ })
+
+ self.wfile.write(response.encode('utf-8'))
+
+ elif self.path == '/api/v1/chat':
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.end_headers()
+
+ user_input = body['user_input']
+ regenerate = body.get('regenerate', False)
+ _continue = body.get('_continue', False)
+
+ generate_params = build_parameters(body, chat=True)
+ generate_params['stream'] = False
+
+ generator = generate_chat_reply(
+ user_input, generate_params, regenerate=regenerate, _continue=_continue, loading_message=False)
+
+ answer = generate_params['history']
+ for a in generator:
+ answer = a
+
+ response = json.dumps({
+ 'results': [{
+ 'history': answer
+ }]
+ })
+
+ self.wfile.write(response.encode('utf-8'))
+
+ elif self.path == '/api/v1/stop-stream':
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.end_headers()
+
+ stop_everything_event()
+
+ response = json.dumps({
+ 'results': 'success'
+ })
+
+ self.wfile.write(response.encode('utf-8'))
+
+ elif self.path == '/api/v1/model':
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.end_headers()
+
+ # by default return the same as the GET interface
+ result = shared.model_name
+
+ # Actions: info, load, list, unload
+ action = body.get('action', '')
+
+ if action == 'load':
+ model_name = body['model_name']
+ args = body.get('args', {})
+ print('args', args)
+ for k in args:
+ setattr(shared.args, k, args[k])
+
+ shared.model_name = model_name
+ unload_model()
+
+ model_settings = get_model_metadata(shared.model_name)
+ shared.settings.update({k: v for k, v in model_settings.items() if k in shared.settings})
+ update_model_parameters(model_settings, initial=True)
+
+ if shared.settings['mode'] != 'instruct':
+ shared.settings['instruction_template'] = None
+
+ try:
+ shared.model, shared.tokenizer = load_model(shared.model_name)
+ if shared.args.lora:
+ add_lora_to_model(shared.args.lora) # list
+
+ except Exception as e:
+ response = json.dumps({'error': {'message': repr(e)}})
+
+ self.wfile.write(response.encode('utf-8'))
+ raise e
+
+ shared.args.model = shared.model_name
+
+ result = get_model_info()
+
+ elif action == 'unload':
+ unload_model()
+ shared.model_name = None
+ shared.args.model = None
+ result = get_model_info()
+
+ elif action == 'list':
+ result = get_available_models()
+
+ elif action == 'info':
+ result = get_model_info()
+
+ response = json.dumps({
+ 'result': result,
+ })
+
+ self.wfile.write(response.encode('utf-8'))
+
+ elif self.path == '/api/v1/token-count':
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.end_headers()
+
+ tokens = encode(body['prompt'])[0]
+ response = json.dumps({
+ 'results': [{
+ 'tokens': len(tokens)
+ }]
+ })
+
+ self.wfile.write(response.encode('utf-8'))
+ else:
+ self.send_error(404)
+
+ def do_OPTIONS(self):
+ self.send_response(200)
+ self.end_headers()
+
+ def end_headers(self):
+ self.send_header('Access-Control-Allow-Origin', '*')
+ self.send_header('Access-Control-Allow-Methods', '*')
+ self.send_header('Access-Control-Allow-Headers', '*')
+ self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate')
+ super().end_headers()
+
+
+def _run_server(port: int, share: bool = False, tunnel_id=str):
+ address = '0.0.0.0' if shared.args.listen else '127.0.0.1'
+ server = ThreadingHTTPServer((address, port), Handler)
+
+ ssl_certfile = shared.args.ssl_certfile
+ ssl_keyfile = shared.args.ssl_keyfile
+ ssl_verify = True if (ssl_keyfile and ssl_certfile) else False
+ if ssl_verify:
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ context.load_cert_chain(ssl_certfile, ssl_keyfile)
+ server.socket = context.wrap_socket(server.socket, server_side=True)
+
+ def on_start(public_url: str):
+ logger.info(f'Starting non-streaming server at public url {public_url}/api')
+
+ if share:
+ try:
+ try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start)
+ except Exception:
+ pass
+ else:
+ if ssl_verify:
+ logger.info(f'Starting API at https://{address}:{port}/api')
+ else:
+ logger.info(f'Starting API at http://{address}:{port}/api')
+
+ server.serve_forever()
+
+
+def start_server(port: int, share: bool = False, tunnel_id=str):
+ Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start()
diff --git a/extensions/api/requirements.txt b/extensions/api/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4f26c3ab433277c2f05250f3b1f5fe11952e78f
--- /dev/null
+++ b/extensions/api/requirements.txt
@@ -0,0 +1,2 @@
+flask_cloudflared==0.0.14
+websockets==11.0.2
\ No newline at end of file
diff --git a/extensions/api/script.py b/extensions/api/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..12fd9cad3f4c5ab425b63c253e70e520f961f844
--- /dev/null
+++ b/extensions/api/script.py
@@ -0,0 +1,13 @@
+import time
+
+import extensions.api.blocking_api as blocking_api
+import extensions.api.streaming_api as streaming_api
+from modules import shared
+
+
+def setup():
+ blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id)
+ if shared.args.public_api:
+ time.sleep(5)
+
+ streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id)
diff --git a/extensions/api/streaming_api.py b/extensions/api/streaming_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..71113c2e47c32a8d5a9dba24571a5daa38936618
--- /dev/null
+++ b/extensions/api/streaming_api.py
@@ -0,0 +1,142 @@
+import asyncio
+import json
+import ssl
+from threading import Thread
+
+from websockets.server import serve
+
+from extensions.api.util import (
+ build_parameters,
+ try_start_cloudflared,
+ with_api_lock
+)
+from modules import shared
+from modules.chat import generate_chat_reply
+from modules.text_generation import generate_reply
+from modules.logging_colors import logger
+
+PATH = '/api/v1/stream'
+
+
+@with_api_lock
+async def _handle_stream_message(websocket, message):
+ message = json.loads(message)
+
+ prompt = message['prompt']
+ generate_params = build_parameters(message)
+ stopping_strings = generate_params.pop('stopping_strings')
+ generate_params['stream'] = True
+
+ generator = generate_reply(
+ prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)
+
+ # As we stream, only send the new bytes.
+ skip_index = 0
+ message_num = 0
+
+ for a in generator:
+ to_send = a[skip_index:]
+ if to_send is None or chr(0xfffd) in to_send: # partial unicode character, don't send it yet.
+ continue
+
+ await websocket.send(json.dumps({
+ 'event': 'text_stream',
+ 'message_num': message_num,
+ 'text': to_send
+ }))
+
+ await asyncio.sleep(0)
+ skip_index += len(to_send)
+ message_num += 1
+
+ await websocket.send(json.dumps({
+ 'event': 'stream_end',
+ 'message_num': message_num
+ }))
+
+
+@with_api_lock
+async def _handle_chat_stream_message(websocket, message):
+ body = json.loads(message)
+
+ user_input = body['user_input']
+ generate_params = build_parameters(body, chat=True)
+ generate_params['stream'] = True
+ regenerate = body.get('regenerate', False)
+ _continue = body.get('_continue', False)
+
+ generator = generate_chat_reply(
+ user_input, generate_params, regenerate=regenerate, _continue=_continue, loading_message=False)
+
+ message_num = 0
+ for a in generator:
+ await websocket.send(json.dumps({
+ 'event': 'text_stream',
+ 'message_num': message_num,
+ 'history': a
+ }))
+
+ await asyncio.sleep(0)
+ message_num += 1
+
+ await websocket.send(json.dumps({
+ 'event': 'stream_end',
+ 'message_num': message_num
+ }))
+
+
+async def _handle_connection(websocket, path):
+
+ if path == '/api/v1/stream':
+ async for message in websocket:
+ await _handle_stream_message(websocket, message)
+
+ elif path == '/api/v1/chat-stream':
+ async for message in websocket:
+ await _handle_chat_stream_message(websocket, message)
+
+ else:
+ print(f'Streaming api: unknown path: {path}')
+ return
+
+
+async def _run(host: str, port: int):
+ ssl_certfile = shared.args.ssl_certfile
+ ssl_keyfile = shared.args.ssl_keyfile
+ ssl_verify = True if (ssl_keyfile and ssl_certfile) else False
+ if ssl_verify:
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ context.load_cert_chain(ssl_certfile, ssl_keyfile)
+ else:
+ context = None
+
+ async with serve(_handle_connection, host, port, ping_interval=None, ssl=context):
+ await asyncio.Future() # Run the server forever
+
+
+def _run_server(port: int, share: bool = False, tunnel_id=str):
+ address = '0.0.0.0' if shared.args.listen else '127.0.0.1'
+ ssl_certfile = shared.args.ssl_certfile
+ ssl_keyfile = shared.args.ssl_keyfile
+ ssl_verify = True if (ssl_keyfile and ssl_certfile) else False
+
+ def on_start(public_url: str):
+ public_url = public_url.replace('https://', 'wss://')
+ logger.info(f'Starting streaming server at public url {public_url}{PATH}')
+
+ if share:
+ try:
+ try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start)
+ except Exception as e:
+ print(e)
+ else:
+ if ssl_verify:
+ logger.info(f'Starting streaming server at wss://{address}:{port}{PATH}')
+ else:
+ logger.info(f'Starting streaming server at ws://{address}:{port}{PATH}')
+
+ asyncio.run(_run(host=address, port=port))
+
+
+def start_server(port: int, share: bool = False, tunnel_id=str):
+ Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start()
diff --git a/extensions/api/util.py b/extensions/api/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e42770d1b2fcb031db4d4f27b066f2382019235
--- /dev/null
+++ b/extensions/api/util.py
@@ -0,0 +1,152 @@
+import asyncio
+import functools
+import threading
+import time
+import traceback
+from threading import Thread
+from typing import Callable, Optional
+
+from modules import shared
+from modules.chat import load_character_memoized
+from modules.presets import load_preset_memoized
+
+# We use a thread local to store the asyncio lock, so that each thread
+# has its own lock. This isn't strictly necessary, but it makes it
+# such that if we can support multiple worker threads in the future,
+# thus handling multiple requests in parallel.
+api_tls = threading.local()
+
+
+def build_parameters(body, chat=False):
+
+ generate_params = {
+ 'max_new_tokens': int(body.get('max_new_tokens', body.get('max_length', 200))),
+ 'auto_max_new_tokens': bool(body.get('auto_max_new_tokens', False)),
+ 'max_tokens_second': int(body.get('max_tokens_second', 0)),
+ 'do_sample': bool(body.get('do_sample', True)),
+ 'temperature': float(body.get('temperature', 0.5)),
+ 'top_p': float(body.get('top_p', 1)),
+ 'typical_p': float(body.get('typical_p', body.get('typical', 1))),
+ 'epsilon_cutoff': float(body.get('epsilon_cutoff', 0)),
+ 'eta_cutoff': float(body.get('eta_cutoff', 0)),
+ 'tfs': float(body.get('tfs', 1)),
+ 'top_a': float(body.get('top_a', 0)),
+ 'repetition_penalty': float(body.get('repetition_penalty', body.get('rep_pen', 1.1))),
+ 'repetition_penalty_range': int(body.get('repetition_penalty_range', 0)),
+ 'encoder_repetition_penalty': float(body.get('encoder_repetition_penalty', 1.0)),
+ 'top_k': int(body.get('top_k', 0)),
+ 'min_length': int(body.get('min_length', 0)),
+ 'no_repeat_ngram_size': int(body.get('no_repeat_ngram_size', 0)),
+ 'num_beams': int(body.get('num_beams', 1)),
+ 'penalty_alpha': float(body.get('penalty_alpha', 0)),
+ 'length_penalty': float(body.get('length_penalty', 1)),
+ 'early_stopping': bool(body.get('early_stopping', False)),
+ 'mirostat_mode': int(body.get('mirostat_mode', 0)),
+ 'mirostat_tau': float(body.get('mirostat_tau', 5)),
+ 'mirostat_eta': float(body.get('mirostat_eta', 0.1)),
+ 'grammar_string': str(body.get('grammar_string', '')),
+ 'guidance_scale': float(body.get('guidance_scale', 1)),
+ 'negative_prompt': str(body.get('negative_prompt', '')),
+ 'seed': int(body.get('seed', -1)),
+ 'add_bos_token': bool(body.get('add_bos_token', True)),
+ 'truncation_length': int(body.get('truncation_length', body.get('max_context_length', 2048))),
+ 'custom_token_bans': str(body.get('custom_token_bans', '')),
+ 'ban_eos_token': bool(body.get('ban_eos_token', False)),
+ 'skip_special_tokens': bool(body.get('skip_special_tokens', True)),
+ 'custom_stopping_strings': '', # leave this blank
+ 'stopping_strings': body.get('stopping_strings', []),
+ }
+
+ preset_name = body.get('preset', 'None')
+ if preset_name not in ['None', None, '']:
+ preset = load_preset_memoized(preset_name)
+ generate_params.update(preset)
+
+ if chat:
+ character = body.get('character')
+ instruction_template = body.get('instruction_template', shared.settings['instruction_template'])
+ if str(instruction_template) == "None":
+ instruction_template = "Vicuna-v1.1"
+ if str(character) == "None":
+ character = "Assistant"
+
+ name1, name2, _, greeting, context, _ = load_character_memoized(character, str(body.get('your_name', shared.settings['name1'])), '', instruct=False)
+ name1_instruct, name2_instruct, _, _, context_instruct, turn_template = load_character_memoized(instruction_template, '', '', instruct=True)
+ generate_params.update({
+ 'mode': str(body.get('mode', 'chat')),
+ 'name1': str(body.get('name1', name1)),
+ 'name2': str(body.get('name2', name2)),
+ 'context': str(body.get('context', context)),
+ 'greeting': str(body.get('greeting', greeting)),
+ 'name1_instruct': str(body.get('name1_instruct', name1_instruct)),
+ 'name2_instruct': str(body.get('name2_instruct', name2_instruct)),
+ 'context_instruct': str(body.get('context_instruct', context_instruct)),
+ 'turn_template': str(body.get('turn_template', turn_template)),
+ 'chat-instruct_command': str(body.get('chat_instruct_command', body.get('chat-instruct_command', shared.settings['chat-instruct_command']))),
+ 'history': body.get('history', {'internal': [], 'visible': []})
+ })
+
+ return generate_params
+
+
+def try_start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
+ Thread(target=_start_cloudflared, args=[
+ port, tunnel_id, max_attempts, on_start], daemon=True).start()
+
+
+def _start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
+ try:
+ from flask_cloudflared import _run_cloudflared
+ except ImportError:
+ print('You should install flask_cloudflared manually')
+ raise Exception(
+ 'flask_cloudflared not installed. Make sure you installed the requirements.txt for this extension.')
+
+ for _ in range(max_attempts):
+ try:
+ if tunnel_id is not None:
+ public_url = _run_cloudflared(port, port + 1, tunnel_id=tunnel_id)
+ else:
+ public_url = _run_cloudflared(port, port + 1)
+
+ if on_start:
+ on_start(public_url)
+
+ return
+ except Exception:
+ traceback.print_exc()
+ time.sleep(3)
+
+ raise Exception('Could not start cloudflared.')
+
+
+def _get_api_lock(tls) -> asyncio.Lock:
+ """
+ The streaming and blocking API implementations each run on their own
+ thread, and multiplex requests using asyncio. If multiple outstanding
+ requests are received at once, we will try to acquire the shared lock
+ shared.generation_lock multiple times in succession in the same thread,
+ which will cause a deadlock.
+
+ To avoid this, we use this wrapper function to block on an asyncio
+ lock, and then try and grab the shared lock only while holding
+ the asyncio lock.
+ """
+ if not hasattr(tls, "asyncio_lock"):
+ tls.asyncio_lock = asyncio.Lock()
+
+ return tls.asyncio_lock
+
+
+def with_api_lock(func):
+ """
+ This decorator should be added to all streaming API methods which
+ require access to the shared.generation_lock. It ensures that the
+ tls.asyncio_lock is acquired before the method is called, and
+ released afterwards.
+ """
+ @functools.wraps(func)
+ async def api_wrapper(*args, **kwargs):
+ async with _get_api_lock(api_tls):
+ return await func(*args, **kwargs)
+ return api_wrapper
diff --git a/extensions/character_bias/script.py b/extensions/character_bias/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff12f3afdc28be4ead12ffab90bd9fbd783514a2
--- /dev/null
+++ b/extensions/character_bias/script.py
@@ -0,0 +1,83 @@
+import os
+
+import gradio as gr
+
+# get the current directory of the script
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+# check if the bias_options.txt file exists, if not, create it
+bias_file = os.path.join(current_dir, "bias_options.txt")
+if not os.path.isfile(bias_file):
+ with open(bias_file, "w") as f:
+ f.write("*I am so happy*\n*I am so sad*\n*I am so excited*\n*I am so bored*\n*I am so angry*")
+
+# read bias options from the text file
+with open(bias_file, "r") as f:
+ bias_options = [line.strip() for line in f.readlines()]
+
+params = {
+ "activate": True,
+ "bias string": " *I am so happy*",
+ "use custom string": False,
+}
+
+
+def input_modifier(string):
+ """
+ This function is applied to your text inputs before
+ they are fed into the model.
+ """
+ return string
+
+
+def output_modifier(string):
+ """
+ This function is applied to the model outputs.
+ """
+ return string
+
+
+def bot_prefix_modifier(string):
+ """
+ This function is only applied in chat mode. It modifies
+ the prefix text for the Bot and can be used to bias its
+ behavior.
+ """
+ if params['activate']:
+ if params['use custom string']:
+ return f'{string} {params["custom string"].strip()} '
+ else:
+ return f'{string} {params["bias string"].strip()} '
+ else:
+ return string
+
+
+def ui():
+ # Gradio elements
+ activate = gr.Checkbox(value=params['activate'], label='Activate character bias')
+ dropdown_string = gr.Dropdown(choices=bias_options, value=params["bias string"], label='Character bias', info='To edit the options in this dropdown edit the "bias_options.txt" file')
+ use_custom_string = gr.Checkbox(value=False, label='Use custom bias textbox instead of dropdown')
+ custom_string = gr.Textbox(value="", placeholder="Enter custom bias string", label="Custom Character Bias", info='To use this textbox activate the checkbox above')
+
+ # Event functions to update the parameters in the backend
+ def update_bias_string(x):
+ if x:
+ params.update({"bias string": x})
+ else:
+ params.update({"bias string": dropdown_string.get()})
+ return x
+
+ def update_custom_string(x):
+ params.update({"custom string": x})
+
+ dropdown_string.change(update_bias_string, dropdown_string, None)
+ custom_string.change(update_custom_string, custom_string, None)
+ activate.change(lambda x: params.update({"activate": x}), activate, None)
+ use_custom_string.change(lambda x: params.update({"use custom string": x}), use_custom_string, None)
+
+ # Group elements together depending on the selected option
+ def bias_string_group():
+ if use_custom_string.value:
+ return gr.Group([use_custom_string, custom_string])
+ else:
+ return dropdown_string
diff --git a/extensions/elevenlabs_tts/outputs/outputs-will-be-saved-here.txt b/extensions/elevenlabs_tts/outputs/outputs-will-be-saved-here.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/extensions/elevenlabs_tts/requirements.txt b/extensions/elevenlabs_tts/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3c0cc7e290d5ef65ee03c2dc4bd1d040ae0f42e
--- /dev/null
+++ b/extensions/elevenlabs_tts/requirements.txt
@@ -0,0 +1 @@
+elevenlabs==0.2.24
diff --git a/extensions/elevenlabs_tts/script.py b/extensions/elevenlabs_tts/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..af0c74582e5be9d31effff28328b5b848554bc05
--- /dev/null
+++ b/extensions/elevenlabs_tts/script.py
@@ -0,0 +1,197 @@
+import html
+import re
+from pathlib import Path
+
+import elevenlabs
+import gradio as gr
+
+from modules import chat, shared, ui_chat
+from modules.logging_colors import logger
+from modules.utils import gradio
+
+params = {
+ 'activate': True,
+ 'api_key': None,
+ 'selected_voice': 'None',
+ 'autoplay': False,
+ 'show_text': True,
+ 'model': 'eleven_monolingual_v1',
+}
+
+voices = None
+wav_idx = 0
+LANG_MODELS = ['eleven_monolingual_v1', 'eleven_multilingual_v1']
+
+
+def update_api_key(key):
+ params['api_key'] = key
+ if key is not None:
+ elevenlabs.set_api_key(key)
+
+
+def refresh_voices():
+ global params
+ your_voices = elevenlabs.voices()
+ voice_names = [voice.name for voice in your_voices]
+ return voice_names
+
+
+def refresh_voices_dd():
+ all_voices = refresh_voices()
+ return gr.Dropdown.update(value=all_voices[0], choices=all_voices)
+
+
+def remove_tts_from_history(history):
+ for i, entry in enumerate(history['internal']):
+ history['visible'][i] = [history['visible'][i][0], entry[1]]
+
+ return history
+
+
+def toggle_text_in_history(history):
+ for i, entry in enumerate(history['visible']):
+ visible_reply = entry[1]
+ if visible_reply.startswith('\n\n{reply}"]
+ else:
+ history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('')[0]}"]
+
+ return history
+
+
+def remove_surrounded_chars(string):
+ # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
+ # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
+ return re.sub('\*[^\*]*?(\*|$)', '', string)
+
+
+def state_modifier(state):
+ if not params['activate']:
+ return state
+
+ state['stream'] = False
+ return state
+
+
+def input_modifier(string):
+ if not params['activate']:
+ return string
+
+ shared.processing_message = "*Is recording a voice message...*"
+ return string
+
+
+def history_modifier(history):
+ # Remove autoplay from the last reply
+ if len(history['internal']) > 0:
+ history['visible'][-1] = [
+ history['visible'][-1][0],
+ history['visible'][-1][1].replace('controls autoplay>', 'controls>')
+ ]
+
+ return history
+
+
+def output_modifier(string):
+ global params, wav_idx
+
+ if not params['activate']:
+ return string
+
+ original_string = string
+ string = remove_surrounded_chars(string)
+ string = string.replace('"', '')
+ string = string.replace('“', '')
+ string = string.replace('\n', ' ')
+ string = string.strip()
+ if string == '':
+ string = 'empty reply, try regenerating'
+
+ output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
+ print(f'Outputting audio to {str(output_file)}')
+ try:
+ audio = elevenlabs.generate(text=html.unescape(string), voice=params['selected_voice'], model=params['model'])
+ elevenlabs.save(audio, str(output_file))
+
+ autoplay = 'autoplay' if params['autoplay'] else ''
+ string = f''
+ wav_idx += 1
+ except elevenlabs.api.error.UnauthenticatedRateLimitError:
+ string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"
+ except elevenlabs.api.error.RateLimitError:
+ string = "🤖 ElevenLabs API Tier Limit Reached\n\n"
+ except elevenlabs.api.error.APIError as err:
+ string = f"🤖 ElevenLabs Error: {err}\n\n"
+
+ if params['show_text']:
+ string += f'\n\n{original_string}'
+
+ shared.processing_message = "*Is typing...*"
+ return string
+
+
+def ui():
+ global voices
+ if not voices:
+ voices = refresh_voices()
+ selected = params['selected_voice']
+ if selected == 'None':
+ params['selected_voice'] = voices[0]
+ elif selected not in voices:
+ logger.error(f'Selected voice {selected} not available, switching to {voices[0]}')
+ params['selected_voice'] = voices[0]
+
+ # Gradio elements
+ with gr.Row():
+ activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
+ autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
+ show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
+
+ with gr.Row():
+ voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')
+ refresh = gr.Button(value='Refresh')
+
+ with gr.Row():
+ if params['api_key']:
+ api_key = gr.Textbox(value=params['api_key'], label='API Key')
+ update_api_key(params['api_key'])
+ else:
+ api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')
+
+ with gr.Row():
+ model = gr.Dropdown(value=params['model'], choices=LANG_MODELS, label='Language model')
+
+ with gr.Row():
+ convert = gr.Button('Permanently replace audios with the message texts')
+ convert_cancel = gr.Button('Cancel', visible=False)
+ convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
+
+ # Convert history with confirmation
+ convert_arr = [convert_confirm, convert, convert_cancel]
+ convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
+ convert_confirm.click(
+ lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
+ remove_tts_from_history, gradio('history'), gradio('history')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
+
+ convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
+
+ # Toggle message text in history
+ show_text.change(
+ lambda x: params.update({"show_text": x}), show_text, None).then(
+ toggle_text_in_history, gradio('history'), gradio('history')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
+
+ # Event functions to update the parameters in the backend
+ activate.change(lambda x: params.update({'activate': x}), activate, None)
+ voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
+ api_key.change(update_api_key, api_key, None)
+ model.change(lambda x: params.update({'model': x}), model, None)
+ # connect.click(check_valid_api, [], connection_status)
+ refresh.click(refresh_voices_dd, [], voice)
+ # Event functions to update the parameters in the backend
+ autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
diff --git a/extensions/example/script.py b/extensions/example/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..44f0cb3c64d2fcc2556c30426c94c29543e599dd
--- /dev/null
+++ b/extensions/example/script.py
@@ -0,0 +1,139 @@
+"""
+An example of extension. It does nothing, but you can add transformations
+before the return statements to customize the webui behavior.
+
+Starting from history_modifier and ending in output_modifier, the
+functions are declared in the same order that they are called at
+generation time.
+"""
+
+import gradio as gr
+import torch
+from transformers import LogitsProcessor
+
+from modules import chat, shared
+from modules.text_generation import (
+ decode,
+ encode,
+ generate_reply,
+)
+
+params = {
+ "display_name": "Example Extension",
+ "is_tab": False,
+}
+
+class MyLogits(LogitsProcessor):
+ """
+ Manipulates the probabilities for the next token before it gets sampled.
+ Used in the logits_processor_modifier function below.
+ """
+ def __init__(self):
+ pass
+
+ def __call__(self, input_ids, scores):
+ # probs = torch.softmax(scores, dim=-1, dtype=torch.float)
+ # probs[0] /= probs[0].sum()
+ # scores = torch.log(probs / (1 - probs))
+ return scores
+
+def history_modifier(history):
+ """
+ Modifies the chat history.
+ Only used in chat mode.
+ """
+ return history
+
+def state_modifier(state):
+ """
+ Modifies the state variable, which is a dictionary containing the input
+ values in the UI like sliders and checkboxes.
+ """
+ return state
+
+def chat_input_modifier(text, visible_text, state):
+ """
+ Modifies the user input string in chat mode (visible_text).
+ You can also modify the internal representation of the user
+ input (text) to change how it will appear in the prompt.
+ """
+ return text, visible_text
+
+def input_modifier(string, state, is_chat=False):
+ """
+ In default/notebook modes, modifies the whole prompt.
+
+ In chat mode, it is the same as chat_input_modifier but only applied
+ to "text", here called "string", and not to "visible_text".
+ """
+ return string
+
+def bot_prefix_modifier(string, state):
+ """
+ Modifies the prefix for the next bot reply in chat mode.
+ By default, the prefix will be something like "Bot Name:".
+ """
+ return string
+
+def tokenizer_modifier(state, prompt, input_ids, input_embeds):
+ """
+ Modifies the input ids and embeds.
+ Used by the multimodal extension to put image embeddings in the prompt.
+ Only used by loaders that use the transformers library for sampling.
+ """
+ return prompt, input_ids, input_embeds
+
+def logits_processor_modifier(processor_list, input_ids):
+ """
+ Adds logits processors to the list, allowing you to access and modify
+ the next token probabilities.
+ Only used by loaders that use the transformers library for sampling.
+ """
+ processor_list.append(MyLogits())
+ return processor_list
+
+def output_modifier(string, state, is_chat=False):
+ """
+ Modifies the LLM output before it gets presented.
+
+ In chat mode, the modified version goes into history['visible'],
+ and the original version goes into history['internal'].
+ """
+ return string
+
+def custom_generate_chat_prompt(user_input, state, **kwargs):
+ """
+ Replaces the function that generates the prompt from the chat history.
+ Only used in chat mode.
+ """
+ result = chat.generate_chat_prompt(user_input, state, **kwargs)
+ return result
+
+def custom_css():
+ """
+ Returns a CSS string that gets appended to the CSS for the webui.
+ """
+ return ''
+
+def custom_js():
+ """
+ Returns a javascript string that gets appended to the javascript
+ for the webui.
+ """
+ return ''
+
+def setup():
+ """
+ Gets executed only once, when the extension is imported.
+ """
+ pass
+
+def ui():
+ """
+ Gets executed when the UI is drawn. Custom gradio elements and
+ their corresponding event handlers should be defined here.
+
+ To learn about gradio components, check out the docs:
+ https://gradio.app/docs/
+ """
+ pass
diff --git a/extensions/gallery/script.js b/extensions/gallery/script.js
new file mode 100644
index 0000000000000000000000000000000000000000..4ff23afc84526e3e7661a589837d869cf932de88
--- /dev/null
+++ b/extensions/gallery/script.js
@@ -0,0 +1,33 @@
+let gallery_element = document.getElementById('gallery-extension');
+let chat_mode_element = document.getElementById('chat-mode');
+
+let extensions_block = document.getElementById('extensions');
+let extensions_block_size = extensions_block.childNodes.length;
+let gallery_only = (extensions_block_size == 5);
+
+document.querySelector('.header_bar').addEventListener('click', function(event) {
+ if (event.target.tagName === 'BUTTON') {
+ const buttonText = event.target.textContent.trim();
+
+ let chat_visible = (buttonText == 'Chat');
+ let default_visible = (buttonText == 'Default');
+ let notebook_visible = (buttonText == 'Notebook');
+ let chat_mode_visible = (chat_mode_element.offsetHeight > 0 && chat_mode_element.offsetWidth > 0);
+
+ // Only show this extension in the Chat tab
+ if (chat_visible) {
+ if (chat_mode_visible) {
+ gallery_element.style.display = 'block';
+ extensions_block.style.display = '';
+ } else {
+ gallery_element.style.display = 'none';
+ extensions_block.style.display = 'none';
+ }
+ } else {
+ gallery_element.style.display = 'none';
+ if (gallery_only) {
+ extensions_block.style.display = 'none';
+ }
+ }
+ }
+});
diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..611a11f4a89d048ee9d0095f315391f53676f413
--- /dev/null
+++ b/extensions/gallery/script.py
@@ -0,0 +1,101 @@
+from pathlib import Path
+
+import gradio as gr
+
+from modules.html_generator import get_image_cache
+from modules.shared import gradio
+
+
+def generate_css():
+ css = """
+ .character-gallery > .gallery {
+ margin: 1rem 0;
+ display: grid !important;
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+ grid-column-gap: 0.4rem;
+ grid-row-gap: 1.2rem;
+ }
+
+ .character-gallery > .label {
+ display: none !important;
+ }
+
+ .character-gallery button.gallery-item {
+ display: contents;
+ }
+
+ .character-container {
+ cursor: pointer;
+ text-align: center;
+ position: relative;
+ opacity: 0.85;
+ }
+
+ .character-container:hover {
+ opacity: 1;
+ }
+
+ .character-container .placeholder, .character-container img {
+ width: 150px;
+ height: 200px;
+ background-color: gray;
+ object-fit: cover;
+ margin: 0 auto;
+ border-radius: 1rem;
+ border: 3px solid white;
+ box-shadow: 3px 3px 6px 0px rgb(0 0 0 / 50%);
+ }
+
+ .character-name {
+ margin-top: 0.3rem;
+ display: block;
+ font-size: 1.2rem;
+ font-weight: 600;
+ overflow-wrap: anywhere;
+ }
+ """
+ return css
+
+
+def generate_html():
+ cards = []
+ # Iterate through files in image folder
+ for file in sorted(Path("characters").glob("*")):
+ if file.suffix in [".json", ".yml", ".yaml"]:
+ character = file.stem
+ container_html = '
'
+ image_html = ""
+
+ for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
+ if path.exists():
+ image_html = f''
+ break
+
+ container_html += f'{image_html} {character}'
+ container_html += "
"
+ cards.append([container_html, character])
+
+ return cards
+
+
+def select_character(evt: gr.SelectData):
+ return (evt.value[1])
+
+
+def custom_js():
+ path_to_js = Path(__file__).parent.resolve() / 'script.js'
+ return open(path_to_js, 'r').read()
+
+
+def ui():
+ with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
+ update = gr.Button("Refresh")
+ gr.HTML(value="")
+ gallery = gr.Dataset(components=[gr.HTML(visible=False)],
+ label="",
+ samples=generate_html(),
+ elem_classes=["character-gallery"],
+ samples_per_page=50
+ )
+ update.click(generate_html, [], gallery)
+ gallery.select(select_character, None, gradio['character_menu'])
diff --git a/extensions/google_translate/requirements.txt b/extensions/google_translate/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..554a00df62818f96ba7d396ae39d8e58efbe9bfe
--- /dev/null
+++ b/extensions/google_translate/requirements.txt
@@ -0,0 +1 @@
+deep-translator==1.9.2
diff --git a/extensions/google_translate/script.py b/extensions/google_translate/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..784668c1e4a704b306b7f0bb70afce07eebb255b
--- /dev/null
+++ b/extensions/google_translate/script.py
@@ -0,0 +1,59 @@
+import html
+
+import gradio as gr
+from deep_translator import GoogleTranslator
+
+params = {
+ "activate": True,
+ "language string": "ja",
+}
+
+language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}
+
+
+def input_modifier(string):
+ """
+ This function is applied to your text inputs before
+ they are fed into the model.
+ """
+ if not params['activate']:
+ return string
+
+ return GoogleTranslator(source=params['language string'], target='en').translate(string)
+
+
+def output_modifier(string):
+ """
+ This function is applied to the model outputs.
+ """
+ if not params['activate']:
+ return string
+
+ translated_str = GoogleTranslator(source='en', target=params['language string']).translate(html.unescape(string))
+ return html.escape(translated_str)
+
+
+def bot_prefix_modifier(string):
+ """
+ This function is only applied in chat mode. It modifies
+ the prefix text for the Bot and can be used to bias its
+ behavior.
+ """
+
+ return string
+
+
+def ui():
+ # Finding the language name from the language code to use as the default value
+ language_name = list(language_codes.keys())[list(language_codes.values()).index(params['language string'])]
+
+ # Gradio elements
+ with gr.Row():
+ activate = gr.Checkbox(value=params['activate'], label='Activate translation')
+
+ with gr.Row():
+ language = gr.Dropdown(value=language_name, choices=[k for k in language_codes], label='Language')
+
+ # Event functions to update the parameters in the backend
+ activate.change(lambda x: params.update({"activate": x}), activate, None)
+ language.change(lambda x: params.update({"language string": language_codes[x]}), language, None)
diff --git a/extensions/long_replies/script.py b/extensions/long_replies/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..035e8c9e1c5005620eb72cb83be456464d5f3e78
--- /dev/null
+++ b/extensions/long_replies/script.py
@@ -0,0 +1,143 @@
+import torch
+from modules import chat, shared
+from modules.text_generation import (
+ decode,
+ encode,
+ generate_reply,
+)
+from transformers import LogitsProcessor
+import gradio as gr
+
+params = {
+ "display_name": "Long replies",
+ "is_tab": False,
+ "min_length": 120,
+}
+
+initial_size = 0
+
+class MyLogits(LogitsProcessor):
+ """
+ Manipulates the probabilities for the next token before it gets sampled.
+ Used in the logits_processor_modifier function below.
+ """
+ def __init__(self):
+ self.newline_id = shared.tokenizer.encode('\n')[-1]
+ pass
+
+ def __call__(self, input_ids, scores):
+ if input_ids.shape[-1] - initial_size < params["min_length"]:
+ scores[...,self.newline_id] = -1000
+ # scores[...,shared.tokenizer.eos_token_id] = -1000
+
+ # probs = torch.softmax(scores, dim=-1, dtype=torch.float)
+ # probs[0] /= probs[0].sum()
+ # scores = torch.log(probs / (1 - probs))
+ return scores
+
+def history_modifier(history):
+ """
+ Modifies the chat history.
+ Only used in chat mode.
+ """
+ return history
+
+def state_modifier(state):
+ """
+ Modifies the state variable, which is a dictionary containing the input
+ values in the UI like sliders and checkboxes.
+ """
+ return state
+
+def chat_input_modifier(text, visible_text, state):
+ """
+ Modifies the user input string in chat mode (visible_text).
+ You can also modify the internal representation of the user
+ input (text) to change how it will appear in the prompt.
+ """
+ return text, visible_text
+
+def input_modifier(string, state):
+ """
+ In default/notebook modes, modifies the whole prompt.
+
+ In chat mode, it is the same as chat_input_modifier but only applied
+ to "text", here called "string", and not to "visible_text".
+ """
+ return string
+
+def bot_prefix_modifier(string, state):
+ """
+ Modifies the prefix for the next bot reply in chat mode.
+ By default, the prefix will be something like "Bot Name:".
+ """
+ return string
+
+def tokenizer_modifier(state, prompt, input_ids, input_embeds):
+ """
+ Modifies the input ids and embeds.
+ Used by the multimodal extension to put image embeddings in the prompt.
+ Only used by loaders that use the transformers library for sampling.
+ """
+
+ global initial_size
+ initial_size = input_ids.shape[-1]
+
+ return prompt, input_ids, input_embeds
+
+def logits_processor_modifier(processor_list, input_ids):
+ """
+ Adds logits processors to the list, allowing you to access and modify
+ the next token probabilities.
+ Only used by loaders that use the transformers library for sampling.
+ """
+ processor_list.append(MyLogits())
+ return processor_list
+
+def output_modifier(string, state):
+ """
+ Modifies the LLM output before it gets presented.
+
+ In chat mode, the modified version goes into history['visible'],
+ and the original version goes into history['internal'].
+ """
+ return string
+
+def custom_generate_chat_prompt(user_input, state, **kwargs):
+ """
+ Replaces the function that generates the prompt from the chat history.
+ Only used in chat mode.
+ """
+ result = chat.generate_chat_prompt(user_input, state, **kwargs)
+ return result
+
+def custom_css():
+ """
+ Returns a CSS string that gets appended to the CSS for the webui.
+ """
+ return ''
+
+def custom_js():
+ """
+ Returns a javascript string that gets appended to the javascript
+ for the webui.
+ """
+ return ''
+
+def setup():
+ """
+ Gets executed only once, when the extension is imported.
+ """
+ pass
+
+def ui():
+ """
+ Gets executed when the UI is drawn. Custom gradio elements and
+ their corresponding event handlers should be defined here.
+
+ To learn about gradio components, check out the docs:
+ https://gradio.app/docs/
+ """
+
+ min_length = gr.Slider(0, 800, step=10, value=params['min_length'], label='Minimum reply length')
+ min_length.change(lambda x: params.update({'min_length': x}), min_length, None)
diff --git a/extensions/multimodal/DOCS.md b/extensions/multimodal/DOCS.md
new file mode 100644
index 0000000000000000000000000000000000000000..eaa4365e9a304a14ebbdb1d4d435f3a2a1f7a7d2
--- /dev/null
+++ b/extensions/multimodal/DOCS.md
@@ -0,0 +1,85 @@
+# Technical description of multimodal extension
+
+## Working principle
+Multimodality extension does most of the stuff which is required for any image input:
+
+- adds the UI
+- saves the images as base64 JPEGs to history
+- provides the hooks to the UI
+- if there are images in the prompt, it:
+ - splits the prompt to text and image parts
+ - adds image start/end markers to text parts, then encodes and embeds the text parts
+ - calls the vision pipeline to embed the images
+ - stitches the embeddings together, and returns them to text generation
+- loads the appropriate vision pipeline, selected either from model name, or by specifying --multimodal-pipeline parameter
+
+Now, for the pipelines, they:
+
+- load the required vision models
+- return some consts, for example the number of tokens taken up by image
+- and most importantly: return the embeddings for LLM, given a list of images
+
+## Prompts/history
+
+To save images in prompt/history, this extension is using a base64 JPEG, wrapped in a HTML tag, like so:
+```
+
+```
+where `{img_str}` is the actual image data. This format makes displaying them in the UI for free. Do note, that this format is required to be exactly the same, the regex used to find the images is: ``.
+
+## LLM input
+To describe the input, let's see it on an example prompt:
+```
+text1text2text3
+```
+where `textN` is N-th text, `` is N-th image, in HTML format specified above.
+
+**The first step is to split the prompt into image/text parts**, so we get:
+```
+['text1', '', 'text2', '', 'text3']
+```
+this is done in `MultimodalEmbedder._split_prompt(...)` function, which returns a list of `PromptPart`s - dataclasses wrapping the separate parts.
+
+This function also appends the image start/end markers to text, which are provided by `AbstractMultimodalPipeline.image_start()` / `AbstractMultimodalPipeline.image_end()` functions. If image start is ``, and end is ``, this function will return:
+```
+['text1', '', 'text2', '', 'text3']
+```
+
+**The returned prompt parts are then turned into token embeddings.**
+
+First, they are modified to token IDs, for the text it is done using standard `modules.text_generation.encode()` function, and for the images the returned token IDs are changed to placeholders. The placeholder is a list of `N` times `placeholder token id`, where `N` is specified using `AbstractMultimodalPipeline.num_image_embeds()`, and placeholder token IDs using `AbstractMultimodalPipeline.placeholder_token_id()`.
+
+Now, based on the token IDs, the prompt might get truncated, especially if `max_new_tokens` are unreasonably high. Unfortunately, it can't be done simply, just by trimming the prompt to be short enough. This way will lead to sometimes splitting the prompt in the middle of an image embedding, which usually breaks the generation. Therefore, in this case, the entire image needs to be removed from input. This is done inside `MultimodalEmbedder._encode_text(...)` function.
+
+**After the tokenization, the tokens need to get embedded**, the text and images are once again treated separately.
+
+The text parts are turned to embeddings, using `AbstractMultimodalPipeline.embed_tokens(...)` function. It uses standard embedding function from the model, but to support many LLMs, the actual function is returned by the pipeline (as it might be different for different LLMs), for LLaMA it is `shared.model.model.embed_tokens(...)`.
+
+The image parts are turned to embeddings, using `AbstractMultimodalPipeline.embed_images(...)` function. This function is specific for a given pipeline, it takes the images as input, forwards them through vision model/projector, and returns the embeddings.
+
+**Now, the returned embeddings are stitched together**, using `torch.cat()`, this is creating the final input to the LLM.
+
+## Pipelines
+
+All of the pipelines should subclass `AbstractMultimodalPipeline` class. The idea is to allow for new pipelines to be added in the same way as user extensions - git clone into `extensions/multimodal/pipelines`.
+
+The pipelines are the description of the vision part, containing vision model/multimodal projector. All of the pipelines should have an unique `name()`, which is then selected by user, in `--multimodal-pipeline` CLI argument. For an example, see `pipelines/llava/llava.py`.
+
+## Pipeline modules
+
+Pipelines are organized into "pipeline modules" - subdirectories in `pipelines` directory. The pipeline modules should contain a file called `pipelines.py`, that should contain the following fields:
+- `available_pipelines: List[str]` - list of pipelines provided by this module, shown as the list of available pipelines to the user
+- `def get_pipeline(name: str, params: dict) -> Optional[AbstractMultimodalPipeline]`: - a function to get a concrete pipeline by `name`, if `name` doesn't match any, should return `None`. `params` is the user settings for multimodal extension
+- `def get_pipeline_from_model_name(model_name: str, params: dict) -> Optional[AbstractMultimodalPipeline]`: - a function to get a pipeline from `model_name`, should be eager to return `None`, unless the determination can be done clearly (for example: minigpt-4 bases on vicuna - it should never return the pipeline, but llava can, as it has its own specific LLM finetune)
+
+**NOTE**: A pipeline module should lazy-import the pipelines only when necessary, and it should keep its imports to minimum
+
+## Pipeline params
+
+The pipelines will get the extension `params` in the constructor. They should honor the following fields:
+- `vision_device` - string, specifying `torch.device` to run the vision model (CLIP/ViT) on
+- `vision_bits` - int, number of fp bits to load the vision model(s) in
+- `projector_device` - string, specifying `torch.device` to run the projector models (Linear layers, QFormer, etc.) on
+- `projector_bits` - int, number of fp bits to load the projector models in
+
+As a helper, `AbstractMultimodalPipeline` has `_get_device(self, setting_name: str, params: dict)` and `_get_dtype(self, setting_name: str, params: dict)` helper functions, which parse string/int and return `torch.device` / `torch.dtype`.
diff --git a/extensions/multimodal/README.md b/extensions/multimodal/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..506810343f54658e9e42b3dd45ed593a8cb70b25
--- /dev/null
+++ b/extensions/multimodal/README.md
@@ -0,0 +1,83 @@
+# Multimodal
+
+## Description
+
+Adds support for multimodality (text+images) to text-generation-webui.
+
+https://user-images.githubusercontent.com/3718215/233817203-69b57e77-0c55-4fd6-b742-3204bb13b8fc.mp4
+
+## Usage
+
+To run this extension, download a LLM that supports multimodality, and then start server.py with the appropriate `--multimodal-pipeline` argument. Examples:
+
+```
+python server.py --model wojtab_llava-7b-v0-4bit-128g --multimodal-pipeline llava-7b
+python3 server.py --model wojtab_llava-13b-v0-4bit-128g --multimodal-pipeline llava-13b
+python server.py --model anon8231489123_vicuna-13b-GPTQ-4bit-128g --multimodal-pipeline minigpt4-13b
+python server.py --model llama-7b-4bit --multimodal-pipeline minigpt4-7b
+```
+
+There is built-in support for LLaVA-v0-13B and LLaVA-v0-7b. To install `minigpt4`:
+
+- clone https://github.com/Wojtab/minigpt-4-pipeline into `extensions/multimodal/pipelines`
+- install the requirements.txt
+
+The same procedure should be used to install other pipelines, which can then be used with `--multimodal-pipeline [pipeline name]`. For additional multimodal pipelines refer to the compatibility section below.
+
+Do note, that each image takes up a considerable amount of tokens, so adjust `max_new_tokens` to be at most 1700 (recommended value is between 200 to 500), so the images don't get truncated.
+
+To send an image, just upload it to the extension field below chat, and send a prompt as always. The image will be added to the end of your message. If you wish to modify the placement, include a string `` in your prompt.
+
+Additionally, there is *Embed all images, not only the last one* checkbox. It modifies the image embeddings, by default (if it's unchecked), all but the most recent images have their embeddings empty, so they are not fed to the network. It seems as if some multimodal networks consider the features in all images at the same time as if they were a single image. Due to this behavior, by default, the extension skips previous images. However, it can lead to sub-par generation on other pipelines. If you want to include all images, just tick this checkbox.
+
+## Compatibility
+As of now, the following multimodal pipelines are supported:
+|Pipeline|`--multimodal-pipeline`|Default LLM|LLM info(for the linked model)|Pipeline repository|
+|-|-|-|-|-|
+|[LLaVA 13B](https://github.com/haotian-liu/LLaVA)|`llava-13b`|[LLaVA 13B](https://huggingface.co/wojtab/llava-13b-v0-4bit-128g)|GPTQ 4-bit quant, old CUDA|built-in|
+|[LLaVA 7B](https://github.com/haotian-liu/LLaVA)|`llava-7b`|[LLaVA 7B](https://huggingface.co/wojtab/llava-7b-v0-4bit-128g)|GPTQ 4-bit quant, old CUDA|built-in|
+|[MiniGPT-4 7B](https://github.com/Vision-CAIR/MiniGPT-4)|`minigpt4-7b`|[Vicuna v0 7B](https://huggingface.co/TheBloke/vicuna-7B-GPTQ-4bit-128g)|GPTQ 4-bit quant, new format|[Wojtab/minigpt-4-pipeline](https://github.com/Wojtab/minigpt-4-pipeline)|
+|[MiniGPT-4 13B](https://github.com/Vision-CAIR/MiniGPT-4)|`minigpt4-13b`|[Vicuna v0 13B](https://huggingface.co/anon8231489123/vicuna-13b-GPTQ-4bit-128g)|GPTQ 4-bit quant, old CUDA|[Wojtab/minigpt-4-pipeline](https://github.com/Wojtab/minigpt-4-pipeline)|
+|[InstructBLIP 7B](https://github.com/salesforce/LAVIS/tree/main/projects/instructblip)|`instructblip-7b`|[Vicuna v1.1 7B](https://huggingface.co/TheBloke/vicuna-7B-1.1-GPTQ-4bit-128g)|GPTQ 4-bit quant|[kjerk/instructblip-pipeline](https://github.com/kjerk/instructblip-pipeline)|
+|[InstructBLIP 13B](https://github.com/salesforce/LAVIS/tree/main/projects/instructblip)|`instructblip-13b`|[Vicuna v1.1 13B](https://huggingface.co/TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g)|GPTQ 4-bit quant|[kjerk/instructblip-pipeline](https://github.com/kjerk/instructblip-pipeline)|
+
+Some pipelines could support different LLMs but do note that while it might work, it isn't a supported configuration.
+
+DO NOT report bugs if you are using a different LLM.
+
+DO NOT report bugs with pipelines in this repository (unless they are built-in)
+
+## Extension config
+This extension uses the following parameters (from `settings.json`):
+|Parameter|Description|
+|---------|-----------|
+|`multimodal-vision_bits`|Number of bits to load vision models (CLIP/ViT) feature extractor in (most pipelines should support either 32 or 16, default=32)|
+|`multimodal-vision_device`|Torch device to run the feature extractor on, for example, `cpu` or `cuda:0`, by default `cuda:0` if available|
+|`multimodal-projector_bits`|Number of bits to load feature projector model(s) in (most pipelines should support either 32 or 16, default=32)|
+|`multimodal-projector_device`|Torch device to run the feature projector model(s) on, for example `cpu` or `cuda:0`, by default `cuda:0` if available|
+|`multimodal-add_all_images_to_prompt`|Default value of "Embed all images, not only the last one" checkbox|
+
+## Usage through API
+
+You can run the multimodal inference through API, by inputting the images to prompt. Images are embedded like so: `f''`, where `img_str` is base-64 jpeg data. Note that you will need to launch `server.py` with the arguments `--api --extensions multimodal`.
+
+Python example:
+
+```Python
+import base64
+import requests
+
+CONTEXT = "You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language. Follow the instructions carefully and explain your answers in detail.### Human: Hi!### Assistant: Hi there! How can I help you today?\n"
+
+with open('extreme_ironing.jpg', 'rb') as f:
+ img_str = base64.b64encode(f.read()).decode('utf-8')
+ prompt = CONTEXT + f'### Human: What is unusual about this image: \n### Assistant: '
+ print(requests.post('http://127.0.0.1:5000/api/v1/generate', json={'prompt': prompt, 'stopping_strings': ['\n###']}).json())
+```
+script output:
+```Python
+{'results': [{'text': "The unusual aspect of this image is that a man is standing on top of a yellow minivan while doing his laundry. He has set up a makeshift clothes line using the car's rooftop as an outdoor drying area. This scene is uncommon because people typically do their laundry indoors, in a dedicated space like a laundromat or a room in their home, rather than on top of a moving vehicle. Additionally, hanging clothes on the car could be potentially hazardous or illegal in some jurisdictions due to the risk of damaging the vehicle or causing accidents on the road.\n##"}]}
+```
+
+## For pipeline developers/technical description
+see [DOCS.md](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/multimodal/DOCS.md)
diff --git a/extensions/multimodal/abstract_pipeline.py b/extensions/multimodal/abstract_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..584219419d256e7743fd4d5120c56bcfa8f2a9f9
--- /dev/null
+++ b/extensions/multimodal/abstract_pipeline.py
@@ -0,0 +1,62 @@
+from abc import ABC, abstractmethod
+from typing import List, Optional
+
+import torch
+from PIL import Image
+
+
+class AbstractMultimodalPipeline(ABC):
+ @staticmethod
+ @abstractmethod
+ def name() -> str:
+ 'name of the pipeline, should be same as in --multimodal-pipeline'
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def image_start() -> Optional[str]:
+ 'return image start string, string representation of image start token, or None if not applicable'
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def image_end() -> Optional[str]:
+ 'return image end string, string representation of image end token, or None if not applicable'
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def placeholder_token_id() -> int:
+ 'return placeholder token id'
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def num_image_embeds() -> int:
+ 'return the number of embeds used by a single image (for example: 256 for LLaVA)'
+ pass
+
+ @abstractmethod
+ def embed_images(self, images: List[Image.Image]) -> torch.Tensor:
+ 'forward the images through vision pipeline, and return their embeddings'
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def embed_tokens(input_ids: torch.Tensor) -> torch.Tensor:
+ 'embed tokens, the exact function varies by LLM, for LLaMA it is `shared.model.model.embed_tokens`'
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def placeholder_embeddings() -> torch.Tensor:
+ 'get placeholder embeddings if there are multiple images, and `add_all_images_to_prompt` is False'
+ pass
+
+ def _get_device(self, setting_name: str, params: dict):
+ if params[setting_name] is None:
+ return torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+ return torch.device(params[setting_name])
+
+ def _get_dtype(self, setting_name: str, params: dict):
+ return torch.float32 if int(params[setting_name]) == 32 else torch.float16
diff --git a/extensions/multimodal/multimodal_embedder.py b/extensions/multimodal/multimodal_embedder.py
new file mode 100644
index 0000000000000000000000000000000000000000..626077cb80987d66af90f390e31aa2f2def76fec
--- /dev/null
+++ b/extensions/multimodal/multimodal_embedder.py
@@ -0,0 +1,178 @@
+import base64
+import re
+from dataclasses import dataclass
+from io import BytesIO
+from typing import Any, List, Optional
+
+import torch
+from PIL import Image
+
+from extensions.multimodal.pipeline_loader import load_pipeline
+from modules import shared
+from modules.logging_colors import logger
+from modules.text_generation import encode, get_max_prompt_length
+
+
+@dataclass
+class PromptPart:
+ text: str
+ image: Optional[Image.Image] = None
+ is_image: bool = False
+ input_ids: Optional[torch.Tensor] = None
+ embedding: Optional[torch.Tensor] = None
+
+
+class MultimodalEmbedder:
+ def __init__(self, params: dict):
+ pipeline, source = load_pipeline(params)
+ self.pipeline = pipeline
+ logger.info(f'Multimodal: loaded pipeline {self.pipeline.name()} from pipelines/{source} ({self.pipeline.__class__.__name__})')
+
+ def _split_prompt(self, prompt: str, load_images: bool = False) -> List[PromptPart]:
+ """Splits a prompt into a list of `PromptParts` to separate image data from text.
+ It will also append `image_start` and `image_end` before and after the image, and optionally parse and load the images,
+ if `load_images` is `True`.
+ """
+ parts: List[PromptPart] = []
+ curr = 0
+ while True:
+ match = re.search(r'', prompt[curr:])
+ if match is None:
+ # no more image tokens, append the rest of the prompt
+ if curr > 0:
+ # add image end token after last image
+ parts.append(PromptPart(text=self.pipeline.image_end() + prompt[curr:]))
+ else:
+ parts.append(PromptPart(text=prompt))
+ break
+ # found an image, append image start token to the text
+ if match.start() > 0:
+ parts.append(PromptPart(text=prompt[curr:curr + match.start()] + self.pipeline.image_start()))
+ else:
+ parts.append(PromptPart(text=self.pipeline.image_start()))
+ # append the image
+ parts.append(PromptPart(
+ text=match.group(0),
+ image=Image.open(BytesIO(base64.b64decode(match.group(1)))) if load_images else None,
+ is_image=True
+ ))
+ curr += match.end()
+ return parts
+
+ def _len_in_tokens_prompt_parts(self, parts: List[PromptPart]) -> int:
+ """Total length in tokens of all `parts`"""
+ tokens = 0
+ for part in parts:
+ if part.is_image:
+ tokens += self.pipeline.num_image_embeds()
+ elif part.input_ids is not None:
+ tokens += len(part.input_ids)
+ else:
+ tokens += len(encode(part.text)[0])
+ return tokens
+
+ def len_in_tokens(self, prompt: str) -> int:
+ """Total length in tokens for a given text `prompt`"""
+ parts = self._split_prompt(prompt, False)
+ return self._len_in_tokens_prompt_parts(parts)
+
+ def _encode_single_text(self, part: PromptPart, add_bos_token: bool) -> PromptPart:
+ """Encode a single prompt `part` to `input_ids`. Returns a `PromptPart`"""
+ if part.is_image:
+ placeholders = torch.ones((self.pipeline.num_image_embeds())) * self.pipeline.placeholder_token_id()
+ part.input_ids = placeholders.to(shared.model.device, dtype=torch.int64)
+ else:
+ part.input_ids = encode(part.text, add_bos_token=add_bos_token)[0].to(shared.model.device, dtype=torch.int64)
+ return part
+
+ @staticmethod
+ def _num_images(parts: List[PromptPart]) -> int:
+ count = 0
+ for part in parts:
+ if part.is_image:
+ count += 1
+ return count
+
+ def _encode_text(self, state, parts: List[PromptPart]) -> List[PromptPart]:
+ """Encode text to token_ids, also truncate the prompt, if necessary.
+
+ The chat/instruct mode should make prompts that fit in get_max_prompt_length, but if max_new_tokens are set
+ such that the context + min_rows don't fit, we can get a prompt which is too long.
+ We can't truncate image embeddings, as it leads to broken generation, so remove the images instead and warn the user
+ """
+ encoded: List[PromptPart] = []
+ for i, part in enumerate(parts):
+ encoded.append(self._encode_single_text(part, i == 0 and state['add_bos_token']))
+
+ # truncation:
+ max_len = get_max_prompt_length(state)
+ removed_images = 0
+
+ # 1. remove entire text/image blocks
+ while self._len_in_tokens_prompt_parts(encoded[1:]) > max_len:
+ if encoded[0].is_image:
+ removed_images += 1
+ encoded = encoded[1:]
+
+ # 2. check if the last prompt part doesn't need to get truncated
+ if self._len_in_tokens_prompt_parts(encoded) > max_len:
+ if encoded[0].is_image:
+ # don't truncate image embeddings, just remove the image, otherwise generation will be broken
+ removed_images += 1
+ encoded = encoded[1:]
+ elif len(encoded) > 1 and encoded[0].text.endswith(self.pipeline.image_start()):
+ # see if we can keep image_start token
+ len_image_start = len(encode(self.pipeline.image_start(), add_bos_token=state['add_bos_token'])[0])
+ if self._len_in_tokens_prompt_parts(encoded[1:]) + len_image_start > max_len:
+ # we can't -> remove this text, and the image
+ encoded = encoded[2:]
+ removed_images += 1
+ else:
+ # we can -> just truncate the text
+ trunc_len = self._len_in_tokens_prompt_parts(encoded) - max_len
+ encoded[0].input_ids = encoded[0].input_ids[trunc_len:]
+ elif len(encoded) > 0:
+ # only one text left, truncate it normally
+ trunc_len = self._len_in_tokens_prompt_parts(encoded) - max_len
+ encoded[0].input_ids = encoded[0].input_ids[trunc_len:]
+
+ # notify user if we truncated an image
+ if removed_images > 0:
+ logger.warning(f"Multimodal: removed {removed_images} image(s) from prompt. Try decreasing max_new_tokens if generation is broken")
+
+ return encoded
+
+ def _embed(self, parts: List[PromptPart]) -> List[PromptPart]:
+ # batch images
+ image_indicies = [i for i, part in enumerate(parts) if part.is_image]
+ embedded = self.pipeline.embed_images([parts[i].image for i in image_indicies])
+ for i, embeds in zip(image_indicies, embedded):
+ parts[i].embedding = embeds
+ # embed text
+ for (i, part) in enumerate(parts):
+ if not part.is_image:
+ parts[i].embedding = self.pipeline.embed_tokens(part.input_ids)
+ return parts
+
+ def _remove_old_images(self, parts: List[PromptPart], params: dict) -> List[PromptPart]:
+ if params['add_all_images_to_prompt']:
+ return parts
+ already_added = False
+ for i, part in reversed(list(enumerate(parts))):
+ if part.is_image:
+ if already_added:
+ parts[i].embedding = self.pipeline.placeholder_embeddings()
+ else:
+ already_added = True
+ return parts
+
+ def forward(self, prompt: str, state: Any, params: dict):
+ prompt_parts = self._split_prompt(prompt, True)
+ prompt_parts = self._encode_text(state, prompt_parts)
+ prompt_parts = self._embed(prompt_parts)
+ prompt_parts = self._remove_old_images(prompt_parts, params)
+ embeds = tuple(part.embedding for part in prompt_parts)
+ ids = tuple(part.input_ids for part in prompt_parts)
+ input_embeds = torch.cat(embeds, dim=0)
+ input_ids = torch.cat(ids, dim=0)
+ return prompt, input_ids, input_embeds, self._num_images(prompt_parts)
diff --git a/extensions/multimodal/pipeline_loader.py b/extensions/multimodal/pipeline_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fcd0a9b410fbc44a51941e0a87b294de871ef8b
--- /dev/null
+++ b/extensions/multimodal/pipeline_loader.py
@@ -0,0 +1,52 @@
+import traceback
+from importlib import import_module
+from pathlib import Path
+from typing import Tuple
+
+from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
+from modules import shared
+from modules.logging_colors import logger
+
+
+def _get_available_pipeline_modules():
+ pipeline_path = Path(__file__).parent / 'pipelines'
+ modules = [p for p in pipeline_path.iterdir() if p.is_dir()]
+ return [m.name for m in modules if (m / 'pipelines.py').exists()]
+
+
+def load_pipeline(params: dict) -> Tuple[AbstractMultimodalPipeline, str]:
+ pipeline_modules = {}
+ available_pipeline_modules = _get_available_pipeline_modules()
+ for name in available_pipeline_modules:
+ try:
+ pipeline_modules[name] = import_module(f'extensions.multimodal.pipelines.{name}.pipelines')
+ except:
+ logger.warning(f'Failed to get multimodal pipelines from {name}')
+ logger.warning(traceback.format_exc())
+
+ if shared.args.multimodal_pipeline is not None:
+ for k in pipeline_modules:
+ if hasattr(pipeline_modules[k], 'get_pipeline'):
+ pipeline = getattr(pipeline_modules[k], 'get_pipeline')(shared.args.multimodal_pipeline, params)
+ if pipeline is not None:
+ return (pipeline, k)
+ else:
+ model_name = shared.args.model.lower()
+ for k in pipeline_modules:
+ if hasattr(pipeline_modules[k], 'get_pipeline_from_model_name'):
+ pipeline = getattr(pipeline_modules[k], 'get_pipeline_from_model_name')(model_name, params)
+ if pipeline is not None:
+ return (pipeline, k)
+
+ available = []
+ for k in pipeline_modules:
+ if hasattr(pipeline_modules[k], 'available_pipelines'):
+ pipelines = getattr(pipeline_modules[k], 'available_pipelines')
+ available += pipelines
+
+ if shared.args.multimodal_pipeline is not None:
+ log = f'Multimodal - ERROR: Failed to load multimodal pipeline "{shared.args.multimodal_pipeline}", available pipelines are: {available}.'
+ else:
+ log = f'Multimodal - ERROR: Failed to determine multimodal pipeline for model {shared.args.model}, please select one manually using --multimodal-pipeline [PIPELINE]. Available pipelines are: {available}.'
+ logger.critical(f'{log} Please specify a correct pipeline, or disable the extension')
+ raise RuntimeError(f'{log} Please specify a correct pipeline, or disable the extension')
diff --git a/extensions/multimodal/pipelines/llava/README.md b/extensions/multimodal/pipelines/llava/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..aff64faaae07d2f4da6c24e8ea03693326313139
--- /dev/null
+++ b/extensions/multimodal/pipelines/llava/README.md
@@ -0,0 +1,9 @@
+## LLaVA pipeline
+
+This module provides 2 pipelines:
+- `llava-7b` - for use with LLaVA v0 7B model (finetuned LLaMa 7B)
+- `llava-13b` - for use with LLaVA v0 13B model (finetuned LLaMa 13B)
+
+[LLaVA](https://github.com/haotian-liu/LLaVA) uses CLIP `openai/clip-vit-large-patch14` as the vision model, and then a single linear layer. For 13B the projector weights are in `liuhaotian/LLaVA-13b-delta-v0`, and for 7B they are in `liuhaotian/LLaVA-7b-delta-v0`.
+
+The supported parameter combinations for both the vision model, and the projector are: CUDA/32bit, CUDA/16bit, CPU/32bit
diff --git a/extensions/multimodal/pipelines/llava/llava.py b/extensions/multimodal/pipelines/llava/llava.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c75eeed990ef1b9cef8b248eb7495d0f9b206b7
--- /dev/null
+++ b/extensions/multimodal/pipelines/llava/llava.py
@@ -0,0 +1,177 @@
+import time
+from abc import abstractmethod
+from typing import List, Tuple
+
+import torch
+from huggingface_hub import hf_hub_download
+from PIL import Image
+from transformers import CLIPImageProcessor, CLIPVisionModel
+
+from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
+from modules import shared
+from modules.logging_colors import logger
+from modules.text_generation import encode
+
+
+class LLaVA_v0_Pipeline(AbstractMultimodalPipeline):
+ CLIP_REPO = "openai/clip-vit-large-patch14"
+
+ def __init__(self, params: dict) -> None:
+ super().__init__()
+ self.clip_device = self._get_device("vision_device", params)
+ self.clip_dtype = self._get_dtype("vision_bits", params)
+ self.projector_device = self._get_device("projector_device", params)
+ self.projector_dtype = self._get_dtype("projector_bits", params)
+ self.image_processor, self.vision_tower, self.mm_projector = self._load_models()
+
+ def _load_models(self):
+ start_ts = time.time()
+
+ logger.info(f"LLaVA - Loading CLIP from {LLaVA_v0_Pipeline.CLIP_REPO} as {self.clip_dtype} on {self.clip_device}...")
+ image_processor = CLIPImageProcessor.from_pretrained(LLaVA_v0_Pipeline.CLIP_REPO, torch_dtype=self.clip_dtype)
+ vision_tower = CLIPVisionModel.from_pretrained(LLaVA_v0_Pipeline.CLIP_REPO, torch_dtype=self.clip_dtype).to(self.clip_device)
+
+ logger.info(f"LLaVA - Loading projector from {self.llava_projector_repo()} as {self.projector_dtype} on {self.projector_device}...")
+ projector_path = hf_hub_download(self.llava_projector_repo(), self.llava_projector_filename())
+ mm_projector = torch.nn.Linear(*self.llava_projector_shape())
+ projector_data = torch.load(projector_path)
+ mm_projector.weight = torch.nn.Parameter(projector_data['model.mm_projector.weight'].to(dtype=self.projector_dtype), False)
+ mm_projector.bias = torch.nn.Parameter(projector_data['model.mm_projector.bias'].to(dtype=self.projector_dtype), False)
+ mm_projector = mm_projector.to(self.projector_device)
+
+ logger.info(f"LLaVA supporting models loaded, took {time.time() - start_ts:.2f} seconds")
+ return image_processor, vision_tower, mm_projector
+
+ @staticmethod
+ def image_start() -> str:
+ return ""
+
+ @staticmethod
+ def image_end() -> str:
+ return ""
+
+ @staticmethod
+ def num_image_embeds() -> int:
+ return 256
+
+ @staticmethod
+ def embed_tokens(input_ids: torch.Tensor) -> torch.Tensor:
+ for attr in ['', 'model', 'model.model', 'model.model.model']:
+ tmp = getattr(shared.model, attr, None) if attr != '' else shared.model
+ if tmp is not None and hasattr(tmp, 'embed_tokens'):
+ func = tmp.embed_tokens
+ break
+ else:
+ raise ValueError('The embed_tokens method has not been found for this loader.')
+
+ return func(input_ids).to(shared.model.device, dtype=shared.model.dtype)
+
+ @staticmethod
+ def placeholder_embeddings() -> torch.Tensor:
+ return LLaVA_v0_Pipeline.embed_tokens(encode(""*256, add_bos_token=False)[0])
+
+ def embed_images(self, images: List[Image.Image]) -> torch.Tensor:
+ images = self.image_processor(images, return_tensors='pt')['pixel_values']
+ images = images.to(self.clip_device, dtype=self.clip_dtype)
+
+ with torch.no_grad():
+ image_forward_outs = self.vision_tower(images, output_hidden_states=True)
+ select_hidden_state_layer = -2
+ select_hidden_state = image_forward_outs.hidden_states[select_hidden_state_layer]
+ image_features = select_hidden_state[:, 1:].to(self.projector_device, dtype=self.projector_dtype)
+ image_features = self.mm_projector(image_features)
+ return image_features.to(shared.model.device, dtype=shared.model.dtype)
+
+ @staticmethod
+ @abstractmethod
+ def llava_projector_repo() -> str:
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def llava_projector_filename() -> str:
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def llava_projector_shape() -> Tuple[int, int]:
+ pass
+
+
+class LLaVA_v0_13B_Pipeline(LLaVA_v0_Pipeline):
+ def __init__(self, params: dict) -> None:
+ super().__init__(params)
+
+ @staticmethod
+ def name() -> str:
+ return "llava-13b"
+
+ @staticmethod
+ def placeholder_token_id() -> int:
+ return 32000
+
+ @staticmethod
+ def llava_projector_shape() -> Tuple[int, int]:
+ return (1024, 5120)
+
+ @staticmethod
+ def llava_projector_filename() -> str:
+ return "mm_projector.bin"
+
+ @staticmethod
+ def llava_projector_repo() -> str:
+ return "liuhaotian/LLaVA-13b-delta-v0"
+
+
+class LLaVA_v0_7B_Pipeline(LLaVA_v0_Pipeline):
+ def __init__(self, params: dict) -> None:
+ super().__init__(params)
+
+ @staticmethod
+ def name() -> str:
+ return "llava-7b"
+
+ @staticmethod
+ def placeholder_token_id() -> int:
+ return 32001
+
+ @staticmethod
+ def llava_projector_shape() -> Tuple[int, int]:
+ return (1024, 4096)
+
+ @staticmethod
+ def llava_projector_filename() -> str:
+ return "mm_projector.bin"
+
+ @staticmethod
+ def llava_projector_repo() -> str:
+ return "liuhaotian/LLaVA-7b-delta-v0"
+
+
+class LLaVA_LLaMA_2_13B_Pipeline(LLaVA_v0_13B_Pipeline):
+ def __init__(self, params: dict) -> None:
+ super().__init__(params)
+
+ @staticmethod
+ def name() -> str:
+ return "llava-llama-2-13b"
+
+ @staticmethod
+ def placeholder_token_id() -> int:
+ return 0
+
+ @staticmethod
+ def llava_projector_repo() -> str:
+ return "liuhaotian/llava-llama-2-13b-chat-lightning-preview"
+
+ @staticmethod
+ def image_start() -> str:
+ return ""
+
+ @staticmethod
+ def image_end() -> str:
+ return ""
+
+ @staticmethod
+ def placeholder_embeddings() -> torch.Tensor:
+ return LLaVA_v0_Pipeline.embed_tokens(encode(""*256, add_bos_token=False)[0])
diff --git a/extensions/multimodal/pipelines/llava/pipelines.py b/extensions/multimodal/pipelines/llava/pipelines.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6776a5a40e1dc36ce25286c10c3db5f51b8a85d
--- /dev/null
+++ b/extensions/multimodal/pipelines/llava/pipelines.py
@@ -0,0 +1,34 @@
+from typing import Optional
+
+from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
+
+available_pipelines = ['llava-7b', 'llava-13b', 'llava-llama-2-13b']
+
+
+def get_pipeline(name: str, params: dict) -> Optional[AbstractMultimodalPipeline]:
+ if name == 'llava-7b':
+ from .llava import LLaVA_v0_7B_Pipeline
+ return LLaVA_v0_7B_Pipeline(params)
+ if name == 'llava-13b':
+ from .llava import LLaVA_v0_13B_Pipeline
+ return LLaVA_v0_13B_Pipeline(params)
+ if name == 'llava-llama-2-13b':
+ from .llava import LLaVA_LLaMA_2_13B_Pipeline
+ return LLaVA_LLaMA_2_13B_Pipeline(params)
+ return None
+
+
+def get_pipeline_from_model_name(model_name: str, params: dict) -> Optional[AbstractMultimodalPipeline]:
+ if 'llava' not in model_name.lower():
+ return None
+ if 'llama-2' in model_name.lower():
+ if '13b' in model_name.lower():
+ from .llava import LLaVA_LLaMA_2_13B_Pipeline
+ return LLaVA_LLaMA_2_13B_Pipeline(params)
+ if '7b' in model_name.lower():
+ from .llava import LLaVA_v0_7B_Pipeline
+ return LLaVA_v0_7B_Pipeline(params)
+ if '13b' in model_name.lower():
+ from .llava import LLaVA_v0_13B_Pipeline
+ return LLaVA_v0_13B_Pipeline(params)
+ return None
diff --git a/extensions/multimodal/pipelines/place-additional-pipelines-here.txt b/extensions/multimodal/pipelines/place-additional-pipelines-here.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/extensions/multimodal/script.py b/extensions/multimodal/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bc26315cc60882406343c92f8f6368f7e9239aa
--- /dev/null
+++ b/extensions/multimodal/script.py
@@ -0,0 +1,112 @@
+import base64
+import re
+import time
+from functools import partial
+from io import BytesIO
+
+import gradio as gr
+import torch
+
+from extensions.multimodal.multimodal_embedder import MultimodalEmbedder
+from modules import shared
+from modules.logging_colors import logger
+
+params = {
+ "add_all_images_to_prompt": False,
+ # device to run vision encoder on
+ "vision_device": None,
+ # bits to load vision encoder in, either 16 or 32
+ "vision_bits": 32,
+ # device to run multimodal projector on
+ "projector_device": None,
+ # multimodal projector bits, either 32 or 16
+ "projector_bits": 32
+}
+
+
+# If 'state' is True, will hijack the next chat generation
+input_hijack = {
+ 'state': False,
+ 'value': ["", ""]
+}
+
+
+# initialized in ui, so that params are loaded from settings
+multimodal_embedder: MultimodalEmbedder = None
+
+
+def chat_input_modifier(text, visible_text, state):
+ global input_hijack
+ if input_hijack['state']:
+ input_hijack['state'] = False
+ return input_hijack['value'](text, visible_text)
+ else:
+ return text, visible_text
+
+
+def add_chat_picture(picture, text, visible_text):
+ # resize the image, so that shortest edge is at least 224 (size for CLIP), and at most 300 (to keep history manageable)
+ max_hw, min_hw = max(picture.size), min(picture.size)
+ aspect_ratio = max_hw / min_hw
+ shortest_edge = int(max(300 / aspect_ratio, 224))
+ longest_edge = int(shortest_edge * aspect_ratio)
+ w = shortest_edge if picture.width < picture.height else longest_edge
+ h = shortest_edge if picture.width >= picture.height else longest_edge
+ picture = picture.resize((w, h))
+
+ buffer = BytesIO()
+ picture.save(buffer, format="JPEG")
+ img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
+ image = f''
+
+ if '' in text:
+ text = text.replace('', image)
+ else:
+ text = text + '\n' + image
+
+ if visible_text == '' or visible_text is None:
+ visible_text = text
+ elif '' in visible_text:
+ visible_text = visible_text.replace('', image)
+ else:
+ visible_text = visible_text + '\n' + image
+
+ return text, visible_text
+
+
+def custom_tokenized_length(prompt):
+ return multimodal_embedder.len_in_tokens(prompt)
+
+
+def tokenizer_modifier(state, prompt, input_ids, input_embeds):
+ global params
+ start_ts = time.time()
+ image_match = re.search(r'', prompt)
+
+ if image_match is None:
+ return prompt, input_ids, input_embeds
+
+ prompt, input_ids, input_embeds, total_embedded = multimodal_embedder.forward(prompt, state, params)
+ logger.info(f'Embedded {total_embedded} image(s) in {time.time()-start_ts:.2f}s')
+ return (prompt,
+ input_ids.unsqueeze(0).to(shared.model.device, dtype=torch.int64),
+ input_embeds.unsqueeze(0).to(shared.model.device, dtype=shared.model.dtype))
+
+
+def ui():
+ global multimodal_embedder
+ multimodal_embedder = MultimodalEmbedder(params)
+ with gr.Column():
+ picture_select = gr.Image(label='Send a picture', type='pil')
+ # The models don't seem to deal well with multiple images
+ single_image_checkbox = gr.Checkbox(False, label='Embed all images, not only the last one')
+ # Prepare the input hijack
+ picture_select.upload(
+ lambda picture: input_hijack.update({"state": True, "value": partial(add_chat_picture, picture)}),
+ [picture_select],
+ None
+ )
+ picture_select.clear(lambda: input_hijack.update({"state": False, "value": ["", ""]}), None, None)
+ single_image_checkbox.change(lambda x: params.update({"add_all_images_to_prompt": x}), single_image_checkbox, None)
+ shared.gradio['Generate'].click(lambda: None, None, picture_select)
+ shared.gradio['textbox'].submit(lambda: None, None, picture_select)
diff --git a/extensions/ngrok/README.md b/extensions/ngrok/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0324bf9852408d9d2b86cc0165c2d548996f9c94
--- /dev/null
+++ b/extensions/ngrok/README.md
@@ -0,0 +1,69 @@
+# Adding an ingress URL through the ngrok Agent SDK for Python
+
+[ngrok](https://ngrok.com) is a globally distributed reverse proxy commonly used for quickly getting a public URL to a
+service running inside a private network, such as on your local laptop. The ngrok agent is usually
+deployed inside a private network and is used to communicate with the ngrok cloud service.
+
+By default the authtoken in the NGROK_AUTHTOKEN environment variable will be used. Alternatively one may be specified in
+the `settings.json` file, see the Examples below. Retrieve your authtoken on the [Auth Token page of your ngrok dashboard](https://dashboard.ngrok.com/get-started/your-authtoken), signing up is free.
+
+# Documentation
+
+For a list of all available options, see [the configuration documentation](https://ngrok.com/docs/ngrok-agent/config/) or [the connect example](https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py).
+
+The ngrok Python SDK is [on github here](https://github.com/ngrok/ngrok-py). A quickstart guide and a full API reference are included in the [ngrok-py Python API documentation](https://ngrok.github.io/ngrok-py/).
+
+# Running
+
+To enable ngrok install the requirements and then add `--extension ngrok` to the command line options, for instance:
+
+```bash
+pip install -r extensions/ngrok/requirements.txt
+python server.py --extension ngrok
+```
+
+In the output you should then see something like this:
+
+```bash
+INFO:Loading the extension "ngrok"...
+INFO:Session created
+INFO:Created tunnel "9d9d0944dc75ff9d3aae653e5eb29fe9" with url "https://d83706cf7be7.ngrok.app"
+INFO:Tunnel "9d9d0944dc75ff9d3aae653e5eb29fe9" TCP forwarding to "localhost:7860"
+INFO:Ingress established at https://d83706cf7be7.ngrok.app
+```
+
+You can now access the webui via the url shown, in this case `https://d83706cf7be7.ngrok.app`. It is recommended to add some authentication to the ingress, see below.
+
+# Example Settings
+
+In `settings.json` add a `ngrok` key with a dictionary of options, for instance:
+
+To enable basic authentication:
+```json
+{
+ "ngrok": {
+ "basic_auth": "user:password"
+ }
+}
+```
+
+To enable OAUTH authentication:
+```json
+{
+ "ngrok": {
+ "oauth_provider": "google",
+ "oauth_allow_domains": "asdf.com",
+ "oauth_allow_emails": "asdf@asdf.com"
+ }
+}
+```
+
+To add an authtoken instead of using the NGROK_AUTHTOKEN environment variable:
+```json
+{
+ "ngrok": {
+ "authtoken": "",
+ "authtoken_from_env":false
+ }
+}
+```
\ No newline at end of file
diff --git a/extensions/ngrok/requirements.txt b/extensions/ngrok/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8acb99f2a1f2a165f496121e539949e18f3fcd81
--- /dev/null
+++ b/extensions/ngrok/requirements.txt
@@ -0,0 +1 @@
+ngrok==0.*
diff --git a/extensions/ngrok/script.py b/extensions/ngrok/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..46f39bd327b6046f8e0d38ef266fc7d3687640da
--- /dev/null
+++ b/extensions/ngrok/script.py
@@ -0,0 +1,36 @@
+# Adds ngrok ingress, to use add `--extension ngrok` to the command line options
+#
+# Parameters can be customized in settings.json of webui, e.g.:
+# {"ngrok": {"basic_auth":"user:password"} }
+# or
+# {"ngrok": {"oauth_provider":"google", "oauth_allow_emails":["asdf@asdf.com"]} }
+#
+# See this example for full list of options: https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py
+# or the README.md in this directory.
+
+import logging
+from modules import shared
+
+# Pick up host/port command line arguments
+host = shared.args.listen_host if shared.args.listen_host and shared.args.listen else '127.0.0.1'
+port = shared.args.listen_port if shared.args.listen_port else '7860'
+
+# Default options
+options = {
+ 'addr': f"{host}:{port}",
+ 'authtoken_from_env': True,
+ 'session_metadata': 'text-generation-webui',
+}
+
+
+def ui():
+ settings = shared.settings.get("ngrok")
+ if settings:
+ options.update(settings)
+
+ try:
+ import ngrok
+ tunnel = ngrok.connect(**options)
+ logging.info(f"Ingress established at: {tunnel.url()}")
+ except ModuleNotFoundError:
+ logging.error("===> ngrok library not found, please run `pip install -r extensions/ngrok/requirements.txt`")
diff --git a/extensions/openai/README.md b/extensions/openai/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..82026ac17b2f95b61956117eac00a13a32666b17
--- /dev/null
+++ b/extensions/openai/README.md
@@ -0,0 +1,263 @@
+# An OpenedAI API (openai like)
+
+This extension creates an API that works kind of like openai (ie. api.openai.com).
+
+## Setup & installation
+
+Install the requirements:
+
+```
+pip3 install -r requirements.txt
+```
+
+It listens on `tcp port 5001` by default. You can use the `OPENEDAI_PORT` environment variable to change this.
+
+Make sure you enable it in server launch parameters, it should include:
+
+```
+--extensions openai
+```
+
+You can also use the `--listen` argument to make the server available on the networ, and/or the `--share` argument to enable a public Cloudflare endpoint.
+
+To enable the basic image generation support (txt2img) set the environment variable `SD_WEBUI_URL` to point to your Stable Diffusion API ([Automatic1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui)).
+
+For example:
+
+```
+SD_WEBUI_URL=http://127.0.0.1:7861
+```
+
+## Quick start
+
+1. Install the requirements.txt (pip)
+2. Enable the `openeai` module (--extensions openai), restart the server.
+3. Configure the openai client
+
+Most openai application can be configured to connect the API if you set the following environment variables:
+
+```shell
+# Sample .env file:
+OPENAI_API_KEY=sk-111111111111111111111111111111111111111111111111
+OPENAI_API_BASE=http://0.0.0.0:5001/v1
+```
+
+If needed, replace 0.0.0.0 with the IP/port of your server.
+
+
+### Settings
+
+To adjust your default settings, you can add the following to your `settings.yaml` file.
+
+```
+openai-port: 5002
+openai-embedding_device: cuda
+openai-sd_webui_url: http://127.0.0.1:7861
+openai-debug: 1
+```
+
+If you've configured the environment variables, please note that settings from `settings.yaml` won't take effect. For instance, if you set `openai-port: 5002` in `settings.yaml` but `OPENEDAI_PORT=5001` in the environment variables, the extension will use `5001` as the port number.
+
+When using `cache_embedding_model.py` to preload the embedding model during Docker image building, consider the following:
+
+- If you wish to use the default settings, leave the environment variables unset.
+- If you intend to change the default embedding model, ensure that you configure the environment variable `OPENEDAI_EMBEDDING_MODEL` to the desired model. Avoid setting `openai-embedding_model` in `settings.yaml` because those settings only take effect after the server starts.
+
+### Models
+
+This has been successfully tested with Alpaca, Koala, Vicuna, WizardLM and their variants, (ex. gpt4-x-alpaca, GPT4all-snoozy, stable-vicuna, wizard-vicuna, etc.) and many others. Models that have been trained for **Instruction Following** work best. If you test with other models please let me know how it goes. Less than satisfying results (so far) from: RWKV-4-Raven, llama, mpt-7b-instruct/chat.
+
+For best results across all API endpoints, a model like [vicuna-13b-v1.3-GPTQ](https://huggingface.co/TheBloke/vicuna-13b-v1.3-GPTQ), [stable-vicuna-13B-GPTQ](https://huggingface.co/TheBloke/stable-vicuna-13B-GPTQ) or [airoboros-13B-gpt4-1.3-GPTQ](https://huggingface.co/TheBloke/airoboros-13B-gpt4-1.3-GPTQ) is a good start.
+
+For good results with the [Completions](https://platform.openai.com/docs/api-reference/completions) API endpoint, in addition to the above models, you can also try using a base model like [falcon-7b](https://huggingface.co/tiiuae/falcon-7b) or Llama.
+
+For good results with the [ChatCompletions](https://platform.openai.com/docs/api-reference/chat) or [Edits](https://platform.openai.com/docs/api-reference/edits) API endpoints you can use almost any model trained for instruction following. Be sure that the proper instruction template is detected and loaded or the results will not be good.
+
+For the proper instruction format to be detected you need to have a matching model entry in your `models/config.yaml` file. Be sure to keep this file up to date.
+A matching instruction template file in the characters/instruction-following/ folder will loaded and applied to format messages correctly for the model - this is critical for good results.
+
+For example, the Wizard-Vicuna family of models are trained with the Vicuna 1.1 format. In the models/config.yaml file there is this matching entry:
+
+```
+.*wizard.*vicuna:
+ mode: 'instruct'
+ instruction_template: 'Vicuna-v1.1'
+```
+
+This refers to `characters/instruction-following/Vicuna-v1.1.yaml`, which looks like this:
+
+```
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n"
+```
+
+For most common models this is already setup, but if you are using a new or uncommon model you may need add a matching entry to the models/config.yaml and possibly create your own instruction-following template and for best results.
+
+If you see this in your logs, it probably means that the correct format could not be loaded:
+
+```
+Warning: Loaded default instruction-following template for model.
+```
+
+### Embeddings (alpha)
+
+Embeddings requires `sentence-transformers` installed, but chat and completions will function without it loaded. The embeddings endpoint is currently using the HuggingFace model: `sentence-transformers/all-mpnet-base-v2` for embeddings. This produces 768 dimensional embeddings (the same as the text-davinci-002 embeddings), which is different from OpenAI's current default `text-embedding-ada-002` model which produces 1536 dimensional embeddings. The model is small-ish and fast-ish. This model and embedding size may change in the future.
+
+| model name | dimensions | input max tokens | speed | size | Avg. performance |
+| ---------------------- | ---------- | ---------------- | ----- | ---- | ---------------- |
+| text-embedding-ada-002 | 1536 | 8192 | - | - | - |
+| text-davinci-002 | 768 | 2046 | - | - | - |
+| all-mpnet-base-v2 | 768 | 384 | 2800 | 420M | 63.3 |
+| all-MiniLM-L6-v2 | 384 | 256 | 14200 | 80M | 58.8 |
+
+In short, the all-MiniLM-L6-v2 model is 5x faster, 5x smaller ram, 2x smaller storage, and still offers good quality. Stats from (https://www.sbert.net/docs/pretrained_models.html). To change the model from the default you can set the environment variable `OPENEDAI_EMBEDDING_MODEL`, ex. "OPENEDAI_EMBEDDING_MODEL=all-MiniLM-L6-v2".
+
+Warning: You cannot mix embeddings from different models even if they have the same dimensions. They are not comparable.
+
+### Client Application Setup
+
+Almost everything you use it with will require you to set a dummy OpenAI API key environment variable.
+
+With the [official python openai client](https://github.com/openai/openai-python), set the `OPENAI_API_BASE` environment variables:
+
+```shell
+# Sample .env file:
+OPENAI_API_KEY=sk-111111111111111111111111111111111111111111111111
+OPENAI_API_BASE=http://0.0.0.0:5001/v1
+```
+
+If needed, replace 0.0.0.0 with the IP/port of your server.
+
+If using .env files to save the `OPENAI_API_BASE` and `OPENAI_API_KEY` variables, make sure the .env file is loaded before the openai module is imported:
+
+```python
+from dotenv import load_dotenv
+load_dotenv() # make sure the environment variables are set before import
+import openai
+```
+
+With the [official Node.js openai client](https://github.com/openai/openai-node) it is slightly more more complex because the environment variables are not used by default, so small source code changes may be required to use the environment variables, like so:
+
+```js
+const openai = OpenAI(
+ Configuration({
+ apiKey: process.env.OPENAI_API_KEY,
+ basePath: process.env.OPENAI_API_BASE
+ })
+);
+```
+
+For apps made with the [chatgpt-api Node.js client library](https://github.com/transitive-bullshit/chatgpt-api):
+
+```js
+const api = new ChatGPTAPI({
+ apiKey: process.env.OPENAI_API_KEY,
+ apiBaseUrl: process.env.OPENAI_API_BASE
+});
+```
+
+## API Documentation & Examples
+
+The OpenAI API is well documented, you can view the documentation here: https://platform.openai.com/docs/api-reference
+
+Examples of how to use the Completions API in Python can be found here: https://platform.openai.com/examples
+Not all of them will work with all models unfortunately, See the notes on Models for how to get the best results.
+
+Here is a simple python example.
+
+```python
+import os
+os.environ['OPENAI_API_KEY']="sk-111111111111111111111111111111111111111111111111"
+os.environ['OPENAI_API_BASE']="http://0.0.0.0:5001/v1"
+import openai
+
+response = openai.ChatCompletion.create(
+ model="x",
+ messages = [{ 'role': 'system', 'content': "Answer in a consistent style." },
+ {'role': 'user', 'content': "Teach me about patience."},
+ {'role': 'assistant', 'content': "The river that carves the deepest valley flows from a modest spring; the grandest symphony originates from a single note; the most intricate tapestry begins with a solitary thread."},
+ {'role': 'user', 'content': "Teach me about the ocean."},
+ ]
+)
+text = response['choices'][0]['message']['content']
+print(text)
+```
+
+## Compatibility & not so compatibility
+
+| API endpoint | tested with | notes |
+| ------------------------- | ---------------------------------- | --------------------------------------------------------------------------- |
+| /v1/chat/completions | openai.ChatCompletion.create() | Use it with instruction following models |
+| /v1/embeddings | openai.Embedding.create() | Using SentenceTransformer embeddings |
+| /v1/images/generations | openai.Image.create() | Bare bones, no model configuration, response_format='b64_json' only. |
+| /v1/moderations | openai.Moderation.create() | Basic initial support via embeddings |
+| /v1/models | openai.Model.list() | Lists models, Currently loaded model first, plus some compatibility options |
+| /v1/models/{id} | openai.Model.get() | returns whatever you ask for |
+| /v1/edits | openai.Edit.create() | Deprecated by openai, good with instruction following models |
+| /v1/text_completion | openai.Completion.create() | Legacy endpoint, variable quality based on the model |
+| /v1/completions | openai api completions.create | Legacy endpoint (v0.25) |
+| /v1/engines/\*/embeddings | python-openai v0.25 | Legacy endpoint |
+| /v1/engines/\*/generate | openai engines.generate | Legacy endpoint |
+| /v1/engines | openai engines.list | Legacy Lists models |
+| /v1/engines/{model_name} | openai engines.get -i {model_name} | You can use this legacy endpoint to load models via the api or command line |
+| /v1/images/edits | openai.Image.create_edit() | not yet supported |
+| /v1/images/variations | openai.Image.create_variation() | not yet supported |
+| /v1/audio/\* | openai.Audio.\* | supported |
+| /v1/files\* | openai.Files.\* | not yet supported |
+| /v1/fine-tunes\* | openai.FineTune.\* | not yet supported |
+| /v1/search | openai.search, engines.search | not yet supported |
+
+Because of the differences in OpenAI model context sizes (2k, 4k, 8k, 16k, etc,) you may need to adjust the max_tokens to fit into the context of the model you choose.
+
+Streaming, temperature, top_p, max_tokens, stop, should all work as expected, but not all parameters are mapped correctly.
+
+Some hacky mappings:
+
+| OpenAI | text-generation-webui | note |
+| ----------------------- | -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| model | - | Ignored, the model is not changed |
+| frequency_penalty | encoder_repetition_penalty | this seems to operate with a different scale and defaults, I tried to scale it based on range & defaults, but the results are terrible. hardcoded to 1.18 until there is a better way |
+| presence_penalty | repetition_penalty | same issues as frequency_penalty, hardcoded to 1.0 |
+| best_of | top_k | default is 1 (top_k is 20 for chat, which doesn't support best_of) |
+| n | 1 | variations are not supported yet. |
+| 1 | num_beams | hardcoded to 1 |
+| 1.0 | typical_p | hardcoded to 1.0 |
+| logprobs & logit_bias | - | experimental, llama only, transformers-kin only (ExLlama_HF ok), can also use llama tokens if 'model' is not an openai model or will convert from tiktoken for the openai model specified in 'model' |
+| messages.name | - | not supported yet |
+| suffix | - | not supported yet |
+| user | - | not supported yet |
+| functions/function_call | - | function calls are not supported yet |
+
+### Applications
+
+Almost everything needs the `OPENAI_API_KEY` and `OPENAI_API_BASE` environment variable set, but there are some exceptions.
+
+| Compatibility | Application/Library | Website | Notes |
+| ------------- | ---------------------- | ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ✅❌ | openai-python (v0.25+) | https://github.com/openai/openai-python | only the endpoints from above are working. OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
+| ✅❌ | openai-node | https://github.com/openai/openai-node | only the endpoints from above are working. environment variables don't work by default, but can be configured (see above) |
+| ✅❌ | chatgpt-api | https://github.com/transitive-bullshit/chatgpt-api | only the endpoints from above are working. environment variables don't work by default, but can be configured (see above) |
+| ✅ | anse | https://github.com/anse-app/anse | API Key & URL configurable in UI, Images also work |
+| ✅ | shell_gpt | https://github.com/TheR1D/shell_gpt | OPENAI_API_HOST=http://127.0.0.1:5001 |
+| ✅ | gpt-shell | https://github.com/jla/gpt-shell | OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
+| ✅ | gpt-discord-bot | https://github.com/openai/gpt-discord-bot | OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
+| ✅ | OpenAI for Notepad++ | https://github.com/Krazal/nppopenai | api_url=http://127.0.0.1:5001 in the config file, or environment variables |
+| ✅ | vscode-openai | https://marketplace.visualstudio.com/items?itemName=AndrewButson.vscode-openai | OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
+| ✅❌ | langchain | https://github.com/hwchase17/langchain | OPENAI_API_BASE=http://127.0.0.1:5001/v1 even with a good 30B-4bit model the result is poor so far. It assumes zero shot python/json coding. Some model tailored prompt formatting improves results greatly. |
+| ✅❌ | Auto-GPT | https://github.com/Significant-Gravitas/Auto-GPT | OPENAI_API_BASE=http://127.0.0.1:5001/v1 Same issues as langchain. Also assumes a 4k+ context |
+| ✅❌ | babyagi | https://github.com/yoheinakajima/babyagi | OPENAI_API_BASE=http://127.0.0.1:5001/v1 |
+| ❌ | guidance | https://github.com/microsoft/guidance | logit_bias and logprobs not yet supported |
+
+## Future plans
+
+- better error handling
+- model changing, esp. something for swapping loras or embedding models
+- consider switching to FastAPI + starlette for SSE (openai SSE seems non-standard)
+
+## Bugs? Feedback? Comments? Pull requests?
+
+To enable debugging and get copious output you can set the `OPENEDAI_DEBUG=1` environment variable.
+
+Are all appreciated, please @matatonic and I'll try to get back to you as soon as possible.
diff --git a/extensions/openai/cache_embedding_model.py b/extensions/openai/cache_embedding_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f4f0806a62e3f46cc3a6076e05d3b8b7e87a2b2
--- /dev/null
+++ b/extensions/openai/cache_embedding_model.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# preload the embedding model, useful for Docker images to prevent re-download on config change
+# Dockerfile:
+# ENV OPENEDAI_EMBEDDING_MODEL=all-mpnet-base-v2 # Optional
+# RUN python3 cache_embedded_model.py
+import os
+
+import sentence_transformers
+
+st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", "all-mpnet-base-v2")
+model = sentence_transformers.SentenceTransformer(st_model)
diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
new file mode 100644
index 0000000000000000000000000000000000000000..40d96c1f0cf0a2d72cd5beb7f957a0918f06812c
--- /dev/null
+++ b/extensions/openai/completions.py
@@ -0,0 +1,637 @@
+import time
+
+import tiktoken
+import torch
+import torch.nn.functional as F
+import yaml
+from extensions.openai.defaults import clamp, default, get_default_req_params
+from extensions.openai.errors import InvalidRequestError
+from extensions.openai.utils import debug_msg, end_line
+from modules import shared
+from modules.text_generation import decode, encode, generate_reply
+from transformers import LogitsProcessor, LogitsProcessorList
+
+
+# Thanks to @Cypherfox [Cypherfoxy] for the logits code, blame to @matatonic
+class LogitsBiasProcessor(LogitsProcessor):
+ def __init__(self, logit_bias={}):
+ self.logit_bias = logit_bias
+ if self.logit_bias:
+ self.keys = list([int(key) for key in self.logit_bias.keys()])
+ values = [self.logit_bias[str(key)] for key in self.keys]
+ self.values = torch.tensor(values, dtype=torch.float, device=shared.model.device)
+ debug_msg(f"{self})")
+
+ def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:
+ if self.logit_bias:
+ debug_msg(logits[0, self.keys], " + ", self.values)
+ logits[0, self.keys] += self.values
+ debug_msg(" --> ", logits[0, self.keys])
+ debug_msg(" max/min ", float(torch.max(logits[0])), float(torch.min(logits[0])))
+ return logits
+
+ def __repr__(self):
+ return f"<{self.__class__.__name__}(logit_bias={self.logit_bias})>"
+
+
+class LogprobProcessor(LogitsProcessor):
+ def __init__(self, logprobs=None):
+ self.logprobs = logprobs
+ self.token_alternatives = {}
+
+ def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:
+ if self.logprobs is not None: # 0-5
+ log_e_probabilities = F.log_softmax(logits, dim=1)
+ top_values, top_indices = torch.topk(log_e_probabilities, k=self.logprobs + 1)
+ top_tokens = [decode(tok) for tok in top_indices[0]]
+ top_probs = [float(x) for x in top_values[0]]
+ self.token_alternatives = dict(zip(top_tokens, top_probs))
+ debug_msg(repr(self))
+ return logits
+
+ def __repr__(self):
+ return f"<{self.__class__.__name__}(logprobs={self.logprobs}, token_alternatives={self.token_alternatives})>"
+
+
+def convert_logprobs_to_tiktoken(model, logprobs):
+ # more problems than it's worth.
+ # try:
+ # encoder = tiktoken.encoding_for_model(model)
+ # # just pick the first one if it encodes to multiple tokens... 99.9% not required and maybe worse overall.
+ # return dict([(encoder.decode([encoder.encode(token)[0]]), prob) for token, prob in logprobs.items()])
+ # except KeyError:
+ # # assume native tokens if we can't find the tokenizer
+ # return logprobs
+
+ return logprobs
+
+
+def marshal_common_params(body):
+ # Request Parameters
+ # Try to use openai defaults or map them to something with the same intent
+
+ req_params = get_default_req_params()
+
+ # Common request parameters
+ req_params['truncation_length'] = shared.settings['truncation_length']
+ req_params['add_bos_token'] = shared.settings.get('add_bos_token', req_params['add_bos_token'])
+ req_params['seed'] = shared.settings.get('seed', req_params['seed'])
+ req_params['custom_stopping_strings'] = shared.settings['custom_stopping_strings']
+
+ # OpenAI API Parameters
+ # model - ignored for now, TODO: When we can reliably load a model or lora from a name only change this
+ req_params['requested_model'] = body.get('model', shared.model_name)
+
+ req_params['suffix'] = default(body, 'suffix', req_params['suffix'])
+ req_params['temperature'] = clamp(default(body, 'temperature', req_params['temperature']), 0.01, 1.99) # fixup absolute 0.0/2.0
+ req_params['top_p'] = clamp(default(body, 'top_p', req_params['top_p']), 0.01, 1.0)
+ n = default(body, 'n', 1)
+ if n != 1:
+ raise InvalidRequestError(message="Only n = 1 is supported.", param='n')
+
+ if 'stop' in body: # str or array, max len 4 (ignored)
+ if isinstance(body['stop'], str):
+ req_params['stopping_strings'] = [body['stop']] # non-standard parameter
+ elif isinstance(body['stop'], list):
+ req_params['stopping_strings'] = body['stop']
+
+ # presence_penalty - ignored
+ # frequency_penalty - ignored
+
+ # pass through unofficial params
+ req_params['repetition_penalty'] = default(body, 'repetition_penalty', req_params['repetition_penalty'])
+ req_params['encoder_repetition_penalty'] = default(body, 'encoder_repetition_penalty', req_params['encoder_repetition_penalty'])
+
+ # user - ignored
+
+ logits_processor = []
+ logit_bias = body.get('logit_bias', None)
+ if logit_bias: # {str: float, ...}
+ # XXX convert tokens from tiktoken based on requested model
+ # Ex.: 'logit_bias': {'1129': 100, '11442': 100, '16243': 100}
+ try:
+ encoder = tiktoken.encoding_for_model(req_params['requested_model'])
+ new_logit_bias = {}
+ for logit, bias in logit_bias.items():
+ for x in encode(encoder.decode([int(logit)]), add_special_tokens=False)[0]:
+ if int(x) in [0, 1, 2, 29871]: # XXX LLAMA tokens
+ continue
+ new_logit_bias[str(int(x))] = bias
+ debug_msg('logit_bias_map', logit_bias, '->', new_logit_bias)
+ logit_bias = new_logit_bias
+ except KeyError:
+ pass # assume native tokens if we can't find the tokenizer
+
+ logits_processor = [LogitsBiasProcessor(logit_bias)]
+
+ logprobs = None # coming to chat eventually
+ if 'logprobs' in body:
+ logprobs = default(body, 'logprobs', 0) # maybe cap at topk? don't clamp 0-5.
+ req_params['logprob_proc'] = LogprobProcessor(logprobs)
+ logits_processor.extend([req_params['logprob_proc']])
+ else:
+ logprobs = None
+
+ if logits_processor: # requires logits_processor support
+ req_params['logits_processor'] = LogitsProcessorList(logits_processor)
+
+ return req_params
+
+
+def messages_to_prompt(body: dict, req_params: dict, max_tokens):
+ # functions
+ if body.get('functions', []): # chat only
+ raise InvalidRequestError(message="functions is not supported.", param='functions')
+ if body.get('function_call', ''): # chat only, 'none', 'auto', {'name': 'func'}
+ raise InvalidRequestError(message="function_call is not supported.", param='function_call')
+
+ if 'messages' not in body:
+ raise InvalidRequestError(message="messages is required", param='messages')
+
+ messages = body['messages']
+
+ role_formats = {
+ 'user': 'User: {message}\n',
+ 'assistant': 'Assistant: {message}\n',
+ 'system': '{message}',
+ 'context': 'You are a helpful assistant. Answer as concisely as possible.\nUser: I want your assistance.\nAssistant: Sure! What can I do for you?',
+ 'prompt': 'Assistant:',
+ }
+
+ if 'stopping_strings' not in req_params:
+ req_params['stopping_strings'] = []
+
+ # Instruct models can be much better
+ if shared.settings['instruction_template']:
+ try:
+ instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r'))
+
+ template = instruct['turn_template']
+ system_message_template = "{message}"
+ system_message_default = instruct.get('context', '') # can be missing
+ bot_start = template.find('<|bot|>') # So far, 100% of instruction templates have this token
+ user_message_template = template[:bot_start].replace('<|user-message|>', '{message}').replace('<|user|>', instruct.get('user', ''))
+ bot_message_template = template[bot_start:].replace('<|bot-message|>', '{message}').replace('<|bot|>', instruct.get('bot', ''))
+ bot_prompt = bot_message_template[:bot_message_template.find('{message}')].rstrip(' ')
+
+ role_formats = {
+ 'user': user_message_template,
+ 'assistant': bot_message_template,
+ 'system': system_message_template,
+ 'context': system_message_default,
+ 'prompt': bot_prompt,
+ }
+
+ if 'Alpaca' in shared.settings['instruction_template']:
+ req_params['stopping_strings'].extend(['\n###'])
+ elif instruct['user']: # WizardLM and some others have no user prompt.
+ req_params['stopping_strings'].extend(['\n' + instruct['user'], instruct['user']])
+
+ debug_msg(f"Loaded instruction role format: {shared.settings['instruction_template']}")
+
+ except Exception as e:
+ req_params['stopping_strings'].extend(['\nUser:', 'User:']) # XXX User: prompt here also
+
+ print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}")
+ print("Warning: Loaded default instruction-following template for model.")
+
+ else:
+ req_params['stopping_strings'].extend(['\nUser:', 'User:']) # XXX User: prompt here also
+ print("Warning: Loaded default instruction-following template for model.")
+
+ system_msgs = []
+ chat_msgs = []
+
+ # You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible. Knowledge cutoff: {knowledge_cutoff} Current date: {current_date}
+ context_msg = role_formats['system'].format(message=role_formats['context']) if role_formats['context'] else ''
+ context_msg = end_line(context_msg)
+
+ # Maybe they sent both? This is not documented in the API, but some clients seem to do this.
+ if 'prompt' in body:
+ context_msg = end_line(role_formats['system'].format(message=body['prompt'])) + context_msg
+
+ for m in messages:
+ if 'role' not in m:
+ raise InvalidRequestError(message="messages: missing role", param='messages')
+ if 'content' not in m:
+ raise InvalidRequestError(message="messages: missing content", param='messages')
+
+ role = m['role']
+ content = m['content']
+ # name = m.get('name', None)
+ # function_call = m.get('function_call', None) # user name or function name with output in content
+ msg = role_formats[role].format(message=content)
+ if role == 'system':
+ system_msgs.extend([msg])
+ elif role == 'function':
+ raise InvalidRequestError(message="role: function is not supported.", param='messages')
+ else:
+ chat_msgs.extend([msg])
+
+ system_msg = '\n'.join(system_msgs)
+ system_msg = end_line(system_msg)
+
+ prompt = system_msg + context_msg + ''.join(chat_msgs) + role_formats['prompt']
+
+ token_count = len(encode(prompt)[0])
+
+ if token_count >= req_params['truncation_length']:
+ err_msg = f"This model maximum context length is {req_params['truncation_length']} tokens. However, your messages resulted in over {token_count} tokens."
+ raise InvalidRequestError(message=err_msg, param='messages')
+
+ if max_tokens > 0 and token_count + max_tokens > req_params['truncation_length']:
+ err_msg = f"This model maximum context length is {req_params['truncation_length']} tokens. However, your messages resulted in over {token_count} tokens and max_tokens is {max_tokens}."
+ print(f"Warning: ${err_msg}")
+ # raise InvalidRequestError(message=err_msg, params='max_tokens')
+
+ return prompt, token_count
+
+
+def chat_completions(body: dict, is_legacy: bool = False) -> dict:
+ # Chat Completions
+ object_type = 'chat.completions'
+ created_time = int(time.time())
+ cmpl_id = "chatcmpl-%d" % (int(time.time() * 1000000000))
+ resp_list = 'data' if is_legacy else 'choices'
+
+ # common params
+ req_params = marshal_common_params(body)
+ req_params['stream'] = False
+ requested_model = req_params.pop('requested_model')
+ logprob_proc = req_params.pop('logprob_proc', None)
+ req_params['top_k'] = 20 # There is no best_of/top_k param for chat, but it is much improved with a higher top_k.
+
+ # chat default max_tokens is 'inf', but also flexible
+ max_tokens = 0
+ max_tokens_str = 'length' if is_legacy else 'max_tokens'
+ if max_tokens_str in body:
+ max_tokens = default(body, max_tokens_str, req_params['truncation_length'])
+ req_params['max_new_tokens'] = max_tokens
+ else:
+ req_params['max_new_tokens'] = req_params['truncation_length']
+
+ # format the prompt from messages
+ prompt, token_count = messages_to_prompt(body, req_params, max_tokens) # updates req_params['stopping_strings']
+
+ # set real max, avoid deeper errors
+ if req_params['max_new_tokens'] + token_count >= req_params['truncation_length']:
+ req_params['max_new_tokens'] = req_params['truncation_length'] - token_count
+
+ stopping_strings = req_params.pop('stopping_strings', [])
+
+ # generate reply #######################################
+ debug_msg({'prompt': prompt, 'req_params': req_params})
+ generator = generate_reply(prompt, req_params, stopping_strings=stopping_strings, is_chat=False)
+
+ answer = ''
+ for a in generator:
+ answer = a
+
+ # strip extra leading space off new generated content
+ if answer and answer[0] == ' ':
+ answer = answer[1:]
+
+ completion_token_count = len(encode(answer)[0])
+ stop_reason = "stop"
+ if token_count + completion_token_count >= req_params['truncation_length'] or completion_token_count >= req_params['max_new_tokens']:
+ stop_reason = "length"
+
+ resp = {
+ "id": cmpl_id,
+ "object": object_type,
+ "created": created_time,
+ "model": shared.model_name, # TODO: add Lora info?
+ resp_list: [{
+ "index": 0,
+ "finish_reason": stop_reason,
+ "message": {"role": "assistant", "content": answer}
+ }],
+ "usage": {
+ "prompt_tokens": token_count,
+ "completion_tokens": completion_token_count,
+ "total_tokens": token_count + completion_token_count
+ }
+ }
+ if logprob_proc: # not official for chat yet
+ top_logprobs = convert_logprobs_to_tiktoken(model=requested_model, logprobs=logprob_proc.token_alternatives)
+ resp[resp_list][0]["logprobs"] = {'top_logprobs': [top_logprobs]}
+ # else:
+ # resp[resp_list][0]["logprobs"] = None
+
+ return resp
+
+
+# generator
+def stream_chat_completions(body: dict, is_legacy: bool = False):
+
+ # Chat Completions
+ stream_object_type = 'chat.completions.chunk'
+ created_time = int(time.time())
+ cmpl_id = "chatcmpl-%d" % (int(time.time() * 1000000000))
+ resp_list = 'data' if is_legacy else 'choices'
+
+ # common params
+ req_params = marshal_common_params(body)
+ req_params['stream'] = True
+ requested_model = req_params.pop('requested_model')
+ logprob_proc = req_params.pop('logprob_proc', None)
+ req_params['top_k'] = 20 # There is no best_of/top_k param for chat, but it is much improved with a higher top_k.
+
+ # chat default max_tokens is 'inf', but also flexible
+ max_tokens = 0
+ max_tokens_str = 'length' if is_legacy else 'max_tokens'
+ if max_tokens_str in body:
+ max_tokens = default(body, max_tokens_str, req_params['truncation_length'])
+ req_params['max_new_tokens'] = max_tokens
+ else:
+ req_params['max_new_tokens'] = req_params['truncation_length']
+
+ # format the prompt from messages
+ prompt, token_count = messages_to_prompt(body, req_params, max_tokens) # updates req_params['stopping_strings']
+
+ # set real max, avoid deeper errors
+ if req_params['max_new_tokens'] + token_count >= req_params['truncation_length']:
+ req_params['max_new_tokens'] = req_params['truncation_length'] - token_count
+
+ def chat_streaming_chunk(content):
+ # begin streaming
+ chunk = {
+ "id": cmpl_id,
+ "object": stream_object_type,
+ "created": created_time,
+ "model": shared.model_name,
+ resp_list: [{
+ "index": 0,
+ "finish_reason": None,
+ # So yeah... do both methods? delta and messages.
+ "message": {'role': 'assistant', 'content': content},
+ "delta": {'role': 'assistant', 'content': content},
+ }],
+ }
+
+ if logprob_proc: # not official for chat yet
+ top_logprobs = convert_logprobs_to_tiktoken(model=requested_model, logprobs=logprob_proc.token_alternatives)
+ chunk[resp_list][0]["logprobs"] = {'top_logprobs': [top_logprobs]}
+ # else:
+ # chunk[resp_list][0]["logprobs"] = None
+ return chunk
+
+ yield chat_streaming_chunk('')
+
+ # generate reply #######################################
+ debug_msg({'prompt': prompt, 'req_params': req_params})
+
+ stopping_strings = req_params.pop('stopping_strings', [])
+
+ generator = generate_reply(prompt, req_params, stopping_strings=stopping_strings, is_chat=False)
+
+ answer = ''
+ seen_content = ''
+ completion_token_count = 0
+
+ for a in generator:
+ answer = a
+
+ len_seen = len(seen_content)
+ new_content = answer[len_seen:]
+
+ if not new_content or chr(0xfffd) in new_content: # partial unicode character, don't send it yet.
+ continue
+
+ seen_content = answer
+
+ # strip extra leading space off new generated content
+ if len_seen == 0 and new_content[0] == ' ':
+ new_content = new_content[1:]
+
+ chunk = chat_streaming_chunk(new_content)
+
+ yield chunk
+
+ # to get the correct token_count, strip leading space if present
+ if answer and answer[0] == ' ':
+ answer = answer[1:]
+
+ completion_token_count = len(encode(answer)[0])
+ stop_reason = "stop"
+ if token_count + completion_token_count >= req_params['truncation_length'] or completion_token_count >= req_params['max_new_tokens']:
+ stop_reason = "length"
+
+ chunk = chat_streaming_chunk('')
+ chunk[resp_list][0]['finish_reason'] = stop_reason
+ chunk['usage'] = {
+ "prompt_tokens": token_count,
+ "completion_tokens": completion_token_count,
+ "total_tokens": token_count + completion_token_count
+ }
+
+ yield chunk
+
+
+def completions(body: dict, is_legacy: bool = False):
+ # Legacy
+ # Text Completions
+ object_type = 'text_completion'
+ created_time = int(time.time())
+ cmpl_id = "conv-%d" % (int(time.time() * 1000000000))
+ resp_list = 'data' if is_legacy else 'choices'
+
+ # ... encoded as a string, array of strings, array of tokens, or array of token arrays.
+ prompt_str = 'context' if is_legacy else 'prompt'
+ if prompt_str not in body:
+ raise InvalidRequestError("Missing required input", param=prompt_str)
+
+ prompt_arg = body[prompt_str]
+ if isinstance(prompt_arg, str) or (isinstance(prompt_arg, list) and isinstance(prompt_arg[0], int)):
+ prompt_arg = [prompt_arg]
+
+ # common params
+ req_params = marshal_common_params(body)
+ req_params['stream'] = False
+ max_tokens_str = 'length' if is_legacy else 'max_tokens'
+ max_tokens = default(body, max_tokens_str, req_params['max_new_tokens'])
+ req_params['max_new_tokens'] = max_tokens
+ requested_model = req_params.pop('requested_model')
+ logprob_proc = req_params.pop('logprob_proc', None)
+ stopping_strings = req_params.pop('stopping_strings', [])
+ # req_params['suffix'] = default(body, 'suffix', req_params['suffix'])
+ req_params['echo'] = default(body, 'echo', req_params['echo'])
+ req_params['top_k'] = default(body, 'best_of', req_params['top_k'])
+
+ resp_list_data = []
+ total_completion_token_count = 0
+ total_prompt_token_count = 0
+
+ for idx, prompt in enumerate(prompt_arg, start=0):
+ if isinstance(prompt[0], int):
+ # token lists
+ if requested_model == shared.model_name:
+ prompt = decode(prompt)[0]
+ else:
+ try:
+ encoder = tiktoken.encoding_for_model(requested_model)
+ prompt = encoder.decode(prompt)
+ except KeyError:
+ prompt = decode(prompt)[0]
+
+ token_count = len(encode(prompt)[0])
+ total_prompt_token_count += token_count
+
+ if token_count + max_tokens > req_params['truncation_length']:
+ err_msg = f"The token count of your prompt ({token_count}) plus max_tokens ({max_tokens}) cannot exceed the model's context length ({req_params['truncation_length']})."
+ # print(f"Warning: ${err_msg}")
+ raise InvalidRequestError(message=err_msg, param=max_tokens_str)
+
+ # generate reply #######################################
+ debug_msg({'prompt': prompt, 'req_params': req_params})
+ generator = generate_reply(prompt, req_params, stopping_strings=stopping_strings, is_chat=False)
+ answer = ''
+
+ for a in generator:
+ answer = a
+
+ # strip extra leading space off new generated content
+ if answer and answer[0] == ' ':
+ answer = answer[1:]
+
+ completion_token_count = len(encode(answer)[0])
+ total_completion_token_count += completion_token_count
+ stop_reason = "stop"
+ if token_count + completion_token_count >= req_params['truncation_length'] or completion_token_count >= max_tokens:
+ stop_reason = "length"
+
+ respi = {
+ "index": idx,
+ "finish_reason": stop_reason,
+ "text": answer,
+ "logprobs": {'top_logprobs': [logprob_proc.token_alternatives]} if logprob_proc else None,
+ }
+
+ resp_list_data.extend([respi])
+
+ resp = {
+ "id": cmpl_id,
+ "object": object_type,
+ "created": created_time,
+ "model": shared.model_name, # TODO: add Lora info?
+ resp_list: resp_list_data,
+ "usage": {
+ "prompt_tokens": total_prompt_token_count,
+ "completion_tokens": total_completion_token_count,
+ "total_tokens": total_prompt_token_count + total_completion_token_count
+ }
+ }
+
+ return resp
+
+
+# generator
+def stream_completions(body: dict, is_legacy: bool = False):
+ # Legacy
+ # Text Completions
+ # object_type = 'text_completion'
+ stream_object_type = 'text_completion.chunk'
+ created_time = int(time.time())
+ cmpl_id = "conv-%d" % (int(time.time() * 1000000000))
+ resp_list = 'data' if is_legacy else 'choices'
+
+ # ... encoded as a string, array of strings, array of tokens, or array of token arrays.
+ prompt_str = 'context' if is_legacy else 'prompt'
+ if prompt_str not in body:
+ raise InvalidRequestError("Missing required input", param=prompt_str)
+
+ prompt = body[prompt_str]
+ req_params = marshal_common_params(body)
+ requested_model = req_params.pop('requested_model')
+ if isinstance(prompt, list):
+ if prompt and isinstance(prompt[0], int):
+ try:
+ encoder = tiktoken.encoding_for_model(requested_model)
+ prompt = encoder.decode(prompt)
+ except KeyError:
+ prompt = decode(prompt)[0]
+ else:
+ raise InvalidRequestError(message="API Batched generation not yet supported.", param=prompt_str)
+
+ # common params
+ req_params['stream'] = True
+ max_tokens_str = 'length' if is_legacy else 'max_tokens'
+ max_tokens = default(body, max_tokens_str, req_params['max_new_tokens'])
+ req_params['max_new_tokens'] = max_tokens
+ logprob_proc = req_params.pop('logprob_proc', None)
+ stopping_strings = req_params.pop('stopping_strings', [])
+ # req_params['suffix'] = default(body, 'suffix', req_params['suffix'])
+ req_params['echo'] = default(body, 'echo', req_params['echo'])
+ req_params['top_k'] = default(body, 'best_of', req_params['top_k'])
+
+ token_count = len(encode(prompt)[0])
+
+ if token_count + max_tokens > req_params['truncation_length']:
+ err_msg = f"The token count of your prompt ({token_count}) plus max_tokens ({max_tokens}) cannot exceed the model's context length ({req_params['truncation_length']})."
+ # print(f"Warning: ${err_msg}")
+ raise InvalidRequestError(message=err_msg, param=max_tokens_str)
+
+ def text_streaming_chunk(content):
+ # begin streaming
+ chunk = {
+ "id": cmpl_id,
+ "object": stream_object_type,
+ "created": created_time,
+ "model": shared.model_name,
+ resp_list: [{
+ "index": 0,
+ "finish_reason": None,
+ "text": content,
+ "logprobs": {'top_logprobs': [logprob_proc.token_alternatives]} if logprob_proc else None,
+ }],
+ }
+
+ return chunk
+
+ yield text_streaming_chunk('')
+
+ # generate reply #######################################
+ debug_msg({'prompt': prompt, 'req_params': req_params})
+ generator = generate_reply(prompt, req_params, stopping_strings=stopping_strings, is_chat=False)
+
+ answer = ''
+ seen_content = ''
+ completion_token_count = 0
+
+ for a in generator:
+ answer = a
+
+ len_seen = len(seen_content)
+ new_content = answer[len_seen:]
+
+ if not new_content or chr(0xfffd) in new_content: # partial unicode character, don't send it yet.
+ continue
+
+ seen_content = answer
+
+ # strip extra leading space off new generated content
+ if len_seen == 0 and new_content[0] == ' ':
+ new_content = new_content[1:]
+
+ chunk = text_streaming_chunk(new_content)
+
+ yield chunk
+
+ # to get the correct count, we strip the leading space if present
+ if answer and answer[0] == ' ':
+ answer = answer[1:]
+
+ completion_token_count = len(encode(answer)[0])
+ stop_reason = "stop"
+ if token_count + completion_token_count >= req_params['truncation_length'] or completion_token_count >= max_tokens:
+ stop_reason = "length"
+
+ chunk = text_streaming_chunk('')
+ chunk[resp_list][0]["finish_reason"] = stop_reason
+ chunk["usage"] = {
+ "prompt_tokens": token_count,
+ "completion_tokens": completion_token_count,
+ "total_tokens": token_count + completion_token_count
+ }
+
+ yield chunk
diff --git a/extensions/openai/defaults.py b/extensions/openai/defaults.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ebade82721d10ded175256c9dd6cdc800b2ed69
--- /dev/null
+++ b/extensions/openai/defaults.py
@@ -0,0 +1,74 @@
+import copy
+
+# Slightly different defaults for OpenAI's API
+# Data type is important, Ex. use 0.0 for a float 0
+default_req_params = {
+ 'max_new_tokens': 16, # 'Inf' for chat
+ 'auto_max_new_tokens': False,
+ 'max_tokens_second': 0,
+ 'temperature': 1.0,
+ 'top_p': 1.0,
+ 'top_k': 1, # choose 20 for chat in absence of another default
+ 'repetition_penalty': 1.18,
+ 'repetition_penalty_range': 0,
+ 'encoder_repetition_penalty': 1.0,
+ 'suffix': None,
+ 'stream': False,
+ 'echo': False,
+ 'seed': -1,
+ # 'n' : default(body, 'n', 1), # 'n' doesn't have a direct map
+ 'truncation_length': 2048, # first use shared.settings value
+ 'add_bos_token': True,
+ 'do_sample': True,
+ 'typical_p': 1.0,
+ 'epsilon_cutoff': 0.0, # In units of 1e-4
+ 'eta_cutoff': 0.0, # In units of 1e-4
+ 'tfs': 1.0,
+ 'top_a': 0.0,
+ 'min_length': 0,
+ 'no_repeat_ngram_size': 0,
+ 'num_beams': 1,
+ 'penalty_alpha': 0.0,
+ 'length_penalty': 1.0,
+ 'early_stopping': False,
+ 'mirostat_mode': 0,
+ 'mirostat_tau': 5.0,
+ 'mirostat_eta': 0.1,
+ 'grammar_string': '',
+ 'guidance_scale': 1,
+ 'negative_prompt': '',
+ 'ban_eos_token': False,
+ 'custom_token_bans': '',
+ 'skip_special_tokens': True,
+ 'custom_stopping_strings': '',
+ # 'logits_processor' - conditionally passed
+ # 'stopping_strings' - temporarily used
+ # 'logprobs' - temporarily used
+ # 'requested_model' - temporarily used
+}
+
+
+def get_default_req_params():
+ return copy.deepcopy(default_req_params)
+
+
+def default(dic, key, default):
+ '''
+ little helper to get defaults if arg is present but None and should be the same type as default.
+ '''
+ val = dic.get(key, default)
+ if not isinstance(val, type(default)):
+ # maybe it's just something like 1 instead of 1.0
+ try:
+ v = type(default)(val)
+ if type(val)(v) == val: # if it's the same value passed in, it's ok.
+ return v
+ except:
+ pass
+
+ val = default
+ return val
+
+
+def clamp(value, minvalue, maxvalue):
+ return max(minvalue, min(value, maxvalue))
diff --git a/extensions/openai/edits.py b/extensions/openai/edits.py
new file mode 100644
index 0000000000000000000000000000000000000000..edf4e6c05611f0f0d2e526f82c4ebc5f477e9c9f
--- /dev/null
+++ b/extensions/openai/edits.py
@@ -0,0 +1,101 @@
+import time
+
+import yaml
+from extensions.openai.defaults import get_default_req_params
+from extensions.openai.errors import InvalidRequestError
+from extensions.openai.utils import debug_msg
+from modules import shared
+from modules.text_generation import encode, generate_reply
+
+
+def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict:
+
+ created_time = int(time.time() * 1000)
+
+ # Request parameters
+ req_params = get_default_req_params()
+ stopping_strings = []
+
+ # Alpaca is verbose so a good default prompt
+ default_template = (
+ "Below is an instruction that describes a task, paired with an input that provides further context. "
+ "Write a response that appropriately completes the request.\n\n"
+ "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
+ )
+
+ instruction_template = default_template
+
+ # Use the special instruction/input/response template for anything trained like Alpaca
+ if shared.settings['instruction_template']:
+ if 'Alpaca' in shared.settings['instruction_template']:
+ stopping_strings.extend(['\n###'])
+ else:
+ try:
+ instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r'))
+
+ template = instruct['turn_template']
+ template = template\
+ .replace('<|user|>', instruct.get('user', ''))\
+ .replace('<|bot|>', instruct.get('bot', ''))\
+ .replace('<|user-message|>', '{instruction}\n{input}')
+
+ instruction_template = instruct.get('context', '') + template[:template.find('<|bot-message|>')].rstrip(' ')
+ if instruct['user']:
+ stopping_strings.extend(['\n' + instruct['user'], instruct['user']])
+
+ except Exception as e:
+ instruction_template = default_template
+ print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}")
+ print("Warning: Loaded default instruction-following template (Alpaca) for model.")
+ else:
+ stopping_strings.extend(['\n###'])
+ print("Warning: Loaded default instruction-following template (Alpaca) for model.")
+
+ edit_task = instruction_template.format(instruction=instruction, input=input)
+
+ truncation_length = shared.settings['truncation_length']
+
+ token_count = len(encode(edit_task)[0])
+ max_tokens = truncation_length - token_count
+
+ if max_tokens < 1:
+ err_msg = f"This model maximum context length is {truncation_length} tokens. However, your messages resulted in over {truncation_length - max_tokens} tokens."
+ raise InvalidRequestError(err_msg, param='input')
+
+ req_params['max_new_tokens'] = max_tokens
+ req_params['truncation_length'] = truncation_length
+ req_params['temperature'] = temperature
+ req_params['top_p'] = top_p
+ req_params['seed'] = shared.settings.get('seed', req_params['seed'])
+ req_params['add_bos_token'] = shared.settings.get('add_bos_token', req_params['add_bos_token'])
+ req_params['custom_stopping_strings'] = shared.settings['custom_stopping_strings']
+
+ debug_msg({'edit_template': edit_task, 'req_params': req_params, 'token_count': token_count})
+
+ generator = generate_reply(edit_task, req_params, stopping_strings=stopping_strings, is_chat=False)
+
+ answer = ''
+ for a in generator:
+ answer = a
+
+ # some reply's have an extra leading space to fit the instruction template, just clip it off from the reply.
+ if edit_task[-1] != '\n' and answer and answer[0] == ' ':
+ answer = answer[1:]
+
+ completion_token_count = len(encode(answer)[0])
+
+ resp = {
+ "object": "edit",
+ "created": created_time,
+ "choices": [{
+ "text": answer,
+ "index": 0,
+ }],
+ "usage": {
+ "prompt_tokens": token_count,
+ "completion_tokens": completion_token_count,
+ "total_tokens": token_count + completion_token_count
+ }
+ }
+
+ return resp
diff --git a/extensions/openai/embeddings.py b/extensions/openai/embeddings.py
new file mode 100644
index 0000000000000000000000000000000000000000..96f44d91d7d5ae5fd78b39b18ce7bdfe54c84c4e
--- /dev/null
+++ b/extensions/openai/embeddings.py
@@ -0,0 +1,80 @@
+import os
+
+import numpy as np
+from extensions.openai.errors import ServiceUnavailableError
+from extensions.openai.utils import debug_msg, float_list_to_base64
+from sentence_transformers import SentenceTransformer
+
+embeddings_params_initialized = False
+# using 'lazy loading' to avoid circular import
+# so this function will be executed only once
+def initialize_embedding_params():
+ global embeddings_params_initialized
+ if not embeddings_params_initialized:
+ global st_model, embeddings_model, embeddings_device
+ from extensions.openai.script import params
+ st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2'))
+ embeddings_model = None
+ # OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone
+ embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu'))
+ if embeddings_device.lower() == 'auto':
+ embeddings_device = None
+ embeddings_params_initialized = True
+
+
+def load_embedding_model(model: str) -> SentenceTransformer:
+ initialize_embedding_params()
+ global embeddings_device, embeddings_model
+ try:
+ embeddings_model = 'loading...' # flag
+ # see: https://www.sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer
+ emb_model = SentenceTransformer(model, device=embeddings_device)
+ # ... emb_model.device doesn't seem to work, always cpu anyways? but specify cpu anyways to free more VRAM
+ print(f"\nLoaded embedding model: {model} on {emb_model.device} [always seems to say 'cpu', even if 'cuda'], max sequence length: {emb_model.max_seq_length}")
+ except Exception as e:
+ embeddings_model = None
+ raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))
+
+ return emb_model
+
+
+def get_embeddings_model() -> SentenceTransformer:
+ initialize_embedding_params()
+ global embeddings_model, st_model
+ if st_model and not embeddings_model:
+ embeddings_model = load_embedding_model(st_model) # lazy load the model
+ return embeddings_model
+
+
+def get_embeddings_model_name() -> str:
+ initialize_embedding_params()
+ global st_model
+ return st_model
+
+
+def get_embeddings(input: list) -> np.ndarray:
+ return get_embeddings_model().encode(input, convert_to_numpy=True, normalize_embeddings=True, convert_to_tensor=False, device=embeddings_device)
+
+
+def embeddings(input: list, encoding_format: str) -> dict:
+
+ embeddings = get_embeddings(input)
+
+ if encoding_format == "base64":
+ data = [{"object": "embedding", "embedding": float_list_to_base64(emb), "index": n} for n, emb in enumerate(embeddings)]
+ else:
+ data = [{"object": "embedding", "embedding": emb.tolist(), "index": n} for n, emb in enumerate(embeddings)]
+
+ response = {
+ "object": "list",
+ "data": data,
+ "model": st_model, # return the real model
+ "usage": {
+ "prompt_tokens": 0,
+ "total_tokens": 0,
+ }
+ }
+
+ debug_msg(f"Embeddings return size: {len(embeddings[0])}, number: {len(embeddings)}")
+
+ return response
diff --git a/extensions/openai/errors.py b/extensions/openai/errors.py
new file mode 100644
index 0000000000000000000000000000000000000000..838d1e7cc6f5577aad94bcd01f33f13ba49b349e
--- /dev/null
+++ b/extensions/openai/errors.py
@@ -0,0 +1,31 @@
+class OpenAIError(Exception):
+ def __init__(self, message=None, code=500, internal_message=''):
+ self.message = message
+ self.code = code
+ self.internal_message = internal_message
+
+ def __repr__(self):
+ return "%s(message=%r, code=%d)" % (
+ self.__class__.__name__,
+ self.message,
+ self.code,
+ )
+
+
+class InvalidRequestError(OpenAIError):
+ def __init__(self, message, param, code=400, internal_message=''):
+ super().__init__(message, code, internal_message)
+ self.param = param
+
+ def __repr__(self):
+ return "%s(message=%r, code=%d, param=%s)" % (
+ self.__class__.__name__,
+ self.message,
+ self.code,
+ self.param,
+ )
+
+
+class ServiceUnavailableError(OpenAIError):
+ def __init__(self, message="Service unavailable, please try again later.", code=503, internal_message=''):
+ super().__init__(message, code, internal_message)
diff --git a/extensions/openai/images.py b/extensions/openai/images.py
new file mode 100644
index 0000000000000000000000000000000000000000..350ea617267926b4f53f9fa0486d3e005f931be6
--- /dev/null
+++ b/extensions/openai/images.py
@@ -0,0 +1,68 @@
+import os
+import time
+
+import requests
+from extensions.openai.errors import ServiceUnavailableError
+
+
+def generations(prompt: str, size: str, response_format: str, n: int):
+ # Stable Diffusion callout wrapper for txt2img
+ # Low effort implementation for compatibility. With only "prompt" being passed and assuming DALL-E
+ # the results will be limited and likely poor. SD has hundreds of models and dozens of settings.
+ # If you want high quality tailored results you should just use the Stable Diffusion API directly.
+ # it's too general an API to try and shape the result with specific tags like negative prompts
+ # or "masterpiece", etc. SD configuration is beyond the scope of this API.
+ # At this point I will not add the edits and variations endpoints (ie. img2img) because they
+ # require changing the form data handling to accept multipart form data, also to properly support
+ # url return types will require file management and a web serving files... Perhaps later!
+ base_model_size = 512 if 'SD_BASE_MODEL_SIZE' not in os.environ else int(os.environ.get('SD_BASE_MODEL_SIZE', 512))
+ sd_defaults = {
+ 'sampler_name': 'DPM++ 2M Karras', # vast improvement
+ 'steps': 30,
+ }
+
+ width, height = [int(x) for x in size.split('x')] # ignore the restrictions on size
+
+ # to hack on better generation, edit default payload.
+ payload = {
+ 'prompt': prompt, # ignore prompt limit of 1000 characters
+ 'width': width,
+ 'height': height,
+ 'batch_size': n,
+ }
+ payload.update(sd_defaults)
+
+ scale = min(width, height) / base_model_size
+ if scale >= 1.2:
+ # for better performance with the default size (1024), and larger res.
+ scaler = {
+ 'width': width // scale,
+ 'height': height // scale,
+ 'hr_scale': scale,
+ 'enable_hr': True,
+ 'hr_upscaler': 'Latent',
+ 'denoising_strength': 0.68,
+ }
+ payload.update(scaler)
+
+ resp = {
+ 'created': int(time.time()),
+ 'data': []
+ }
+ from extensions.openai.script import params
+ # TODO: support SD_WEBUI_AUTH username:password pair.
+ sd_url = f"{os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', ''))}/sdapi/v1/txt2img"
+
+ response = requests.post(url=sd_url, json=payload)
+ r = response.json()
+ if response.status_code != 200 or 'images' not in r:
+ print(r)
+ raise ServiceUnavailableError(r.get('error', 'Unknown error calling Stable Diffusion'), code=response.status_code, internal_message=r.get('errors', None))
+ # r['parameters']...
+ for b64_json in r['images']:
+ if response_format == 'b64_json':
+ resp['data'].extend([{'b64_json': b64_json}])
+ else:
+ resp['data'].extend([{'url': f'data:image/png;base64,{b64_json}'}]) # yeah it's lazy. requests.get() will not work with this
+
+ return resp
diff --git a/extensions/openai/models.py b/extensions/openai/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..83e550f8f2b6c858dd76fdde4515e6164c88b912
--- /dev/null
+++ b/extensions/openai/models.py
@@ -0,0 +1,78 @@
+from extensions.openai.embeddings import get_embeddings_model_name
+from extensions.openai.errors import OpenAIError
+from modules import shared
+from modules.models import load_model as _load_model
+from modules.models import unload_model
+from modules.models_settings import get_model_metadata, update_model_parameters
+from modules.utils import get_available_models
+
+
+def get_current_model_list() -> list:
+ return [shared.model_name] # The real chat/completions model, maybe "None"
+
+
+def get_pseudo_model_list() -> list:
+ return [ # these are expected by so much, so include some here as a dummy
+ 'gpt-3.5-turbo',
+ 'text-embedding-ada-002',
+ ]
+
+
+def load_model(model_name: str) -> dict:
+ resp = {
+ "id": model_name,
+ "object": "engine",
+ "owner": "self",
+ "ready": True,
+ }
+ if model_name not in get_pseudo_model_list() + [get_embeddings_model_name()] + get_current_model_list(): # Real model only
+ # No args. Maybe it works anyways!
+ # TODO: hack some heuristics into args for better results
+
+ shared.model_name = model_name
+ unload_model()
+
+ model_settings = get_model_metadata(shared.model_name)
+ shared.settings.update({k: v for k, v in model_settings.items() if k in shared.settings})
+ update_model_parameters(model_settings, initial=True)
+
+ if shared.settings['mode'] != 'instruct':
+ shared.settings['instruction_template'] = None
+
+ shared.model, shared.tokenizer = _load_model(shared.model_name)
+
+ if not shared.model: # load failed.
+ shared.model_name = "None"
+ raise OpenAIError(f"Model load failed for: {shared.model_name}")
+
+ return resp
+
+
+def list_models(is_legacy: bool = False) -> dict:
+ # TODO: Lora's?
+ all_model_list = get_current_model_list() + [get_embeddings_model_name()] + get_pseudo_model_list() + get_available_models()
+
+ models = {}
+
+ if is_legacy:
+ models = [{"id": id, "object": "engine", "owner": "user", "ready": True} for id in all_model_list]
+ if not shared.model:
+ models[0]['ready'] = False
+ else:
+ models = [{"id": id, "object": "model", "owned_by": "user", "permission": []} for id in all_model_list]
+
+ resp = {
+ "object": "list",
+ "data": models,
+ }
+
+ return resp
+
+
+def model_info(model_name: str) -> dict:
+ return {
+ "id": model_name,
+ "object": "model",
+ "owned_by": "user",
+ "permission": []
+ }
diff --git a/extensions/openai/moderations.py b/extensions/openai/moderations.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d2d4c1dacb79dfa327bfbb3a93419cfddb49d98
--- /dev/null
+++ b/extensions/openai/moderations.py
@@ -0,0 +1,68 @@
+import time
+
+import numpy as np
+from extensions.openai.embeddings import get_embeddings
+from numpy.linalg import norm
+
+moderations_disabled = False # return 0/false
+category_embeddings = None
+antonym_embeddings = None
+categories = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"]
+flag_threshold = 0.5
+
+
+def get_category_embeddings() -> dict:
+ global category_embeddings, categories
+ if category_embeddings is None:
+ embeddings = get_embeddings(categories).tolist()
+ category_embeddings = dict(zip(categories, embeddings))
+
+ return category_embeddings
+
+
+def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+ return np.dot(a, b) / (norm(a) * norm(b))
+
+
+# seems most openai like with all-mpnet-base-v2
+def mod_score(a: np.ndarray, b: np.ndarray) -> float:
+ return 2.0 * np.dot(a, b)
+
+
+def moderations(input):
+ global category_embeddings, categories, flag_threshold, moderations_disabled
+ results = {
+ "id": f"modr-{int(time.time()*1e9)}",
+ "model": "text-moderation-001",
+ "results": [],
+ }
+
+ if moderations_disabled:
+ results['results'] = [{
+ 'categories': dict([(C, False) for C in categories]),
+ 'category_scores': dict([(C, 0.0) for C in categories]),
+ 'flagged': False,
+ }]
+ return results
+
+ category_embeddings = get_category_embeddings()
+
+ # input, string or array
+ if isinstance(input, str):
+ input = [input]
+
+ for in_str in input:
+ for ine in get_embeddings([in_str]):
+ category_scores = dict([(C, mod_score(category_embeddings[C], ine)) for C in categories])
+ category_flags = dict([(C, bool(category_scores[C] > flag_threshold)) for C in categories])
+ flagged = any(category_flags.values())
+
+ results['results'].extend([{
+ 'flagged': flagged,
+ 'categories': category_flags,
+ 'category_scores': category_scores,
+ }])
+
+ print(results)
+
+ return results
diff --git a/extensions/openai/requirements.txt b/extensions/openai/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c63b5e1ac53368b327833849948160d51010508
--- /dev/null
+++ b/extensions/openai/requirements.txt
@@ -0,0 +1,4 @@
+SpeechRecognition==3.10.0
+flask_cloudflared==0.0.12
+sentence-transformers
+tiktoken
diff --git a/extensions/openai/script.py b/extensions/openai/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..097689bb4bd96f0d4f28eb51f9776cfb89245041
--- /dev/null
+++ b/extensions/openai/script.py
@@ -0,0 +1,352 @@
+import json
+import os
+import ssl
+import traceback
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from threading import Thread
+
+import extensions.openai.completions as OAIcompletions
+import extensions.openai.edits as OAIedits
+import extensions.openai.embeddings as OAIembeddings
+import extensions.openai.images as OAIimages
+import extensions.openai.models as OAImodels
+import extensions.openai.moderations as OAImoderations
+from extensions.openai.defaults import clamp, default, get_default_req_params
+from extensions.openai.errors import (
+ InvalidRequestError,
+ OpenAIError,
+ ServiceUnavailableError
+)
+from extensions.openai.tokens import token_count, token_decode, token_encode
+from extensions.openai.utils import debug_msg
+from modules import shared
+
+import cgi
+import speech_recognition as sr
+from pydub import AudioSegment
+
+params = {
+ # default params
+ 'port': 5001,
+ 'embedding_device': 'cpu',
+ 'embedding_model': 'all-mpnet-base-v2',
+
+ # optional params
+ 'sd_webui_url': '',
+ 'debug': 0
+}
+
+class Handler(BaseHTTPRequestHandler):
+ def send_access_control_headers(self):
+ self.send_header("Access-Control-Allow-Origin", "*")
+ self.send_header("Access-Control-Allow-Credentials", "true")
+ self.send_header(
+ "Access-Control-Allow-Methods",
+ "GET,HEAD,OPTIONS,POST,PUT"
+ )
+ self.send_header(
+ "Access-Control-Allow-Headers",
+ "Origin, Accept, X-Requested-With, Content-Type, "
+ "Access-Control-Request-Method, Access-Control-Request-Headers, "
+ "Authorization"
+ )
+
+ def do_OPTIONS(self):
+ self.send_response(200)
+ self.send_access_control_headers()
+ self.send_header('Content-Type', 'application/json')
+ self.end_headers()
+ self.wfile.write("OK".encode('utf-8'))
+
+ def start_sse(self):
+ self.send_response(200)
+ self.send_access_control_headers()
+ self.send_header('Content-Type', 'text/event-stream')
+ self.send_header('Cache-Control', 'no-cache')
+ # self.send_header('Connection', 'keep-alive')
+ self.end_headers()
+
+ def send_sse(self, chunk: dict):
+ response = 'data: ' + json.dumps(chunk) + '\r\n\r\n'
+ debug_msg(response[:-4])
+ self.wfile.write(response.encode('utf-8'))
+
+ def end_sse(self):
+ response = 'data: [DONE]\r\n\r\n'
+ debug_msg(response[:-4])
+ self.wfile.write(response.encode('utf-8'))
+
+ def return_json(self, ret: dict, code: int = 200, no_debug=False):
+ self.send_response(code)
+ self.send_access_control_headers()
+ self.send_header('Content-Type', 'application/json')
+
+ response = json.dumps(ret)
+ r_utf8 = response.encode('utf-8')
+
+ self.send_header('Content-Length', str(len(r_utf8)))
+ self.end_headers()
+
+ self.wfile.write(r_utf8)
+ if not no_debug:
+ debug_msg(r_utf8)
+
+ def openai_error(self, message, code=500, error_type='APIError', param='', internal_message=''):
+
+ error_resp = {
+ 'error': {
+ 'message': message,
+ 'code': code,
+ 'type': error_type,
+ 'param': param,
+ }
+ }
+ if internal_message:
+ print(error_type, message)
+ print(internal_message)
+ # error_resp['internal_message'] = internal_message
+
+ self.return_json(error_resp, code)
+
+ def openai_error_handler(func):
+ def wrapper(self):
+ try:
+ func(self)
+ except InvalidRequestError as e:
+ self.openai_error(e.message, e.code, e.__class__.__name__, e.param, internal_message=e.internal_message)
+ except OpenAIError as e:
+ self.openai_error(e.message, e.code, e.__class__.__name__, internal_message=e.internal_message)
+ except Exception as e:
+ self.openai_error(repr(e), 500, 'OpenAIError', internal_message=traceback.format_exc())
+
+ return wrapper
+
+ @openai_error_handler
+ def do_GET(self):
+ debug_msg(self.requestline)
+ debug_msg(self.headers)
+
+ if self.path.startswith('/v1/engines') or self.path.startswith('/v1/models'):
+ is_legacy = 'engines' in self.path
+ is_list = self.path in ['/v1/engines', '/v1/models']
+ if is_legacy and not is_list:
+ model_name = self.path[self.path.find('/v1/engines/') + len('/v1/engines/'):]
+ resp = OAImodels.load_model(model_name)
+ elif is_list:
+ resp = OAImodels.list_models(is_legacy)
+ else:
+ model_name = self.path[len('/v1/models/'):]
+ resp = OAImodels.model_info(model_name)
+
+ self.return_json(resp)
+
+ elif '/billing/usage' in self.path:
+ # Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31
+ self.return_json({"total_usage": 0}, no_debug=True)
+
+ else:
+ self.send_error(404)
+
+ @openai_error_handler
+ def do_POST(self):
+
+ if '/v1/audio/transcriptions' in self.path:
+ r = sr.Recognizer()
+
+ # Parse the form data
+ form = cgi.FieldStorage(
+ fp=self.rfile,
+ headers=self.headers,
+ environ={'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': self.headers['Content-Type']}
+ )
+
+ audio_file = form['file'].file
+ audio_data = AudioSegment.from_file(audio_file)
+
+ # Convert AudioSegment to raw data
+ raw_data = audio_data.raw_data
+
+ # Create AudioData object
+ audio_data = sr.AudioData(raw_data, audio_data.frame_rate, audio_data.sample_width)
+ whipser_language = form.getvalue('language', None)
+ whipser_model = form.getvalue('model', 'tiny') # Use the model from the form data if it exists, otherwise default to tiny
+
+ transcription = {"text": ""}
+
+ try:
+ transcription["text"] = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
+ except sr.UnknownValueError:
+ print("Whisper could not understand audio")
+ transcription["text"] = "Whisper could not understand audio UnknownValueError"
+ except sr.RequestError as e:
+ print("Could not request results from Whisper", e)
+ transcription["text"] = "Whisper could not understand audio RequestError"
+
+ self.return_json(transcription, no_debug=True)
+ return
+
+ debug_msg(self.requestline)
+ debug_msg(self.headers)
+
+ content_length = self.headers.get('Content-Length')
+ transfer_encoding = self.headers.get('Transfer-Encoding')
+
+ if content_length:
+ body = json.loads(self.rfile.read(int(content_length)).decode('utf-8'))
+ elif transfer_encoding == 'chunked':
+ chunks = []
+ while True:
+ chunk_size = int(self.rfile.readline(), 16) # Read the chunk size
+ if chunk_size == 0:
+ break # End of chunks
+ chunks.append(self.rfile.read(chunk_size))
+ self.rfile.readline() # Consume the trailing newline after each chunk
+ body = json.loads(b''.join(chunks).decode('utf-8'))
+ else:
+ self.send_response(400, "Bad Request: Either Content-Length or Transfer-Encoding header expected.")
+ self.end_headers()
+ return
+
+ debug_msg(body)
+
+ if '/completions' in self.path or '/generate' in self.path:
+
+ if not shared.model:
+ raise ServiceUnavailableError("No model loaded.")
+
+ is_legacy = '/generate' in self.path
+ is_streaming = body.get('stream', False)
+
+ if is_streaming:
+ self.start_sse()
+
+ response = []
+ if 'chat' in self.path:
+ response = OAIcompletions.stream_chat_completions(body, is_legacy=is_legacy)
+ else:
+ response = OAIcompletions.stream_completions(body, is_legacy=is_legacy)
+
+ for resp in response:
+ self.send_sse(resp)
+
+ self.end_sse()
+
+ else:
+ response = ''
+ if 'chat' in self.path:
+ response = OAIcompletions.chat_completions(body, is_legacy=is_legacy)
+ else:
+ response = OAIcompletions.completions(body, is_legacy=is_legacy)
+
+ self.return_json(response)
+
+ elif '/edits' in self.path:
+ # deprecated
+
+ if not shared.model:
+ raise ServiceUnavailableError("No model loaded.")
+
+ req_params = get_default_req_params()
+
+ instruction = body['instruction']
+ input = body.get('input', '')
+ temperature = clamp(default(body, 'temperature', req_params['temperature']), 0.001, 1.999) # fixup absolute 0.0
+ top_p = clamp(default(body, 'top_p', req_params['top_p']), 0.001, 1.0)
+
+ response = OAIedits.edits(instruction, input, temperature, top_p)
+
+ self.return_json(response)
+
+ elif '/images/generations' in self.path:
+ if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
+ raise ServiceUnavailableError("Stable Diffusion not available. SD_WEBUI_URL not set.")
+
+ prompt = body['prompt']
+ size = default(body, 'size', '1024x1024')
+ response_format = default(body, 'response_format', 'url') # or b64_json
+ n = default(body, 'n', 1) # ignore the batch limits of max 10
+
+ response = OAIimages.generations(prompt=prompt, size=size, response_format=response_format, n=n)
+
+ self.return_json(response, no_debug=True)
+
+ elif '/embeddings' in self.path:
+ encoding_format = body.get('encoding_format', '')
+
+ input = body.get('input', body.get('text', ''))
+ if not input:
+ raise InvalidRequestError("Missing required argument input", params='input')
+
+ if type(input) is str:
+ input = [input]
+
+ response = OAIembeddings.embeddings(input, encoding_format)
+
+ self.return_json(response, no_debug=True)
+
+ elif '/moderations' in self.path:
+ input = body['input']
+ if not input:
+ raise InvalidRequestError("Missing required argument input", params='input')
+
+ response = OAImoderations.moderations(input)
+
+ self.return_json(response, no_debug=True)
+
+ elif self.path == '/api/v1/token-count':
+ # NOT STANDARD. lifted from the api extension, but it's still very useful to calculate tokenized length client side.
+ response = token_count(body['prompt'])
+
+ self.return_json(response, no_debug=True)
+
+ elif self.path == '/api/v1/token/encode':
+ # NOT STANDARD. needed to support logit_bias, logprobs and token arrays for native models
+ encoding_format = body.get('encoding_format', '')
+
+ response = token_encode(body['input'], encoding_format)
+
+ self.return_json(response, no_debug=True)
+
+ elif self.path == '/api/v1/token/decode':
+ # NOT STANDARD. needed to support logit_bias, logprobs and token arrays for native models
+ encoding_format = body.get('encoding_format', '')
+
+ response = token_decode(body['input'], encoding_format)
+
+ self.return_json(response, no_debug=True)
+
+ else:
+ self.send_error(404)
+
+
+def run_server():
+ port = int(os.environ.get('OPENEDAI_PORT', params.get('port', 5001)))
+ server_addr = ('0.0.0.0' if shared.args.listen else '127.0.0.1', port)
+ server = ThreadingHTTPServer(server_addr, Handler)
+
+ ssl_certfile=os.environ.get('OPENEDAI_CERT_PATH', shared.args.ssl_certfile)
+ ssl_keyfile=os.environ.get('OPENEDAI_KEY_PATH', shared.args.ssl_keyfile)
+ ssl_verify=True if (ssl_keyfile and ssl_certfile) else False
+ if ssl_verify:
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ context.load_cert_chain(ssl_certfile, ssl_keyfile)
+ server.socket = context.wrap_socket(server.socket, server_side=True)
+
+ if shared.args.share:
+ try:
+ from flask_cloudflared import _run_cloudflared
+ public_url = _run_cloudflared(port, port + 1)
+ print(f'OpenAI compatible API ready at: OPENAI_API_BASE={public_url}/v1')
+ except ImportError:
+ print('You should install flask_cloudflared manually')
+ else:
+ if ssl_verify:
+ print(f'OpenAI compatible API ready at: OPENAI_API_BASE=https://{server_addr[0]}:{server_addr[1]}/v1')
+ else:
+ print(f'OpenAI compatible API ready at: OPENAI_API_BASE=http://{server_addr[0]}:{server_addr[1]}/v1')
+
+ server.serve_forever()
+
+
+def setup():
+ Thread(target=run_server, daemon=True).start()
diff --git a/extensions/openai/tokens.py b/extensions/openai/tokens.py
new file mode 100644
index 0000000000000000000000000000000000000000..0338e7f25aaa9d8b82ed8c69ab9cae9996130629
--- /dev/null
+++ b/extensions/openai/tokens.py
@@ -0,0 +1,36 @@
+from modules.text_generation import decode, encode
+
+
+def token_count(prompt):
+ tokens = encode(prompt)[0]
+
+ return {
+ 'results': [{
+ 'tokens': len(tokens)
+ }]
+ }
+
+
+def token_encode(input, encoding_format):
+ # if isinstance(input, list):
+ tokens = encode(input)[0]
+
+ return {
+ 'results': [{
+ 'tokens': tokens,
+ 'length': len(tokens),
+ }]
+ }
+
+
+def token_decode(tokens, encoding_format):
+ # if isinstance(input, list):
+ # if encoding_format == "base64":
+ # tokens = base64_to_float_list(tokens)
+ output = decode(tokens)[0]
+
+ return {
+ 'results': [{
+ 'text': output
+ }]
+ }
diff --git a/extensions/openai/utils.py b/extensions/openai/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..49fc9510ddeec92e27052d471a586746033d119b
--- /dev/null
+++ b/extensions/openai/utils.py
@@ -0,0 +1,30 @@
+import base64
+import os
+
+import numpy as np
+
+def float_list_to_base64(float_array: np.ndarray) -> str:
+ # Convert the list to a float32 array that the OpenAPI client expects
+ # float_array = np.array(float_list, dtype="float32")
+
+ # Get raw bytes
+ bytes_array = float_array.tobytes()
+
+ # Encode bytes into base64
+ encoded_bytes = base64.b64encode(bytes_array)
+
+ # Turn raw base64 encoded bytes into ASCII
+ ascii_string = encoded_bytes.decode('ascii')
+ return ascii_string
+
+
+def end_line(s):
+ if s and s[-1] != '\n':
+ s = s + '\n'
+ return s
+
+
+def debug_msg(*args, **kwargs):
+ from extensions.openai.script import params
+ if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)):
+ print(*args, **kwargs)
diff --git a/extensions/perplexity_colors/script.py b/extensions/perplexity_colors/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a986ac40b4194a5751015241d82046ce95cbca2
--- /dev/null
+++ b/extensions/perplexity_colors/script.py
@@ -0,0 +1,309 @@
+import time
+
+import gradio
+import numpy as np
+import torch
+from transformers import LogitsProcessor
+
+from modules import html_generator, shared
+
+params = {
+ 'active': True,
+ 'color_by_perplexity': False,
+ 'color_by_probability': False,
+ 'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
+ 'probability_dropdown': False,
+ 'verbose': False # For debugging mostly
+}
+
+
+class PerplexityLogits(LogitsProcessor):
+ def __init__(self, verbose=False):
+ self.generated_token_ids = []
+ self.selected_probs = []
+ self.top_token_ids_list = []
+ self.top_probs_list = []
+ self.perplexities_list = []
+ self.last_probs = None
+ self.verbose = verbose
+
+ def __call__(self, input_ids, scores):
+ # t0 = time.time()
+ probs = torch.softmax(scores, dim=-1, dtype=torch.float)
+ log_probs = torch.nan_to_num(torch.log(probs)) # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
+ entropy = -torch.sum(probs * log_probs)
+ entropy = entropy.cpu().numpy()
+ perplexity = round(float(np.exp(entropy)), 4)
+ self.perplexities_list.append(perplexity)
+ last_token_id = int(input_ids[0][-1].cpu().numpy().item())
+ # Store the generated tokens (not sure why this isn't accessible in the output endpoint!)
+ self.generated_token_ids.append(last_token_id)
+ # Get last probability, and add to the list if it wasn't there
+ if len(self.selected_probs) > 0:
+ # Is the selected token in the top tokens?
+ if self.verbose:
+ print('Probs: Token after', shared.tokenizer.decode(last_token_id))
+ print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
+ print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
+ if last_token_id in self.top_token_ids_list[-1][0]:
+ idx = self.top_token_ids_list[-1][0].index(last_token_id)
+ self.selected_probs.append(self.top_probs_list[-1][0][idx])
+ else:
+ self.top_token_ids_list[-1][0].append(last_token_id)
+ last_prob = round(float(self.last_probs[last_token_id]), 4)
+ self.top_probs_list[-1][0].append(last_prob)
+ self.selected_probs.append(last_prob)
+ else:
+ self.selected_probs.append(1.0) # Placeholder for the last token of the prompt
+
+ if self.verbose:
+ pplbar = "-"
+ if not np.isnan(perplexity):
+ pplbar = "*" * round(perplexity)
+ print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")
+
+ # Get top 5 probabilities
+ top_tokens_and_probs = torch.topk(probs, 5)
+ top_probs = top_tokens_and_probs.values.cpu().numpy().astype(float).tolist()
+ top_token_ids = top_tokens_and_probs.indices.cpu().numpy().astype(int).tolist()
+
+ self.top_token_ids_list.append(top_token_ids)
+ self.top_probs_list.append(top_probs)
+
+ probs = probs.cpu().numpy().flatten()
+ self.last_probs = probs # Need to keep this as a reference for top probs
+
+ # t1 = time.time()
+ # print(f"PPL Processor: {(t1-t0):.3f} s")
+ # About 1 ms, though occasionally up to around 100 ms, not sure why...
+ # Doesn't actually modify the logits!
+ return scores
+
+
+# Stores the perplexity and top probabilities
+ppl_logits_processor = None
+
+
+def logits_processor_modifier(logits_processor_list, input_ids):
+ global ppl_logits_processor
+ if params['active']:
+ ppl_logits_processor = PerplexityLogits(verbose=params['verbose'])
+ logits_processor_list.append(ppl_logits_processor)
+
+
+def output_modifier(text):
+ global ppl_logits_processor
+ # t0 = time.time()
+
+ if not params['active']:
+ return text
+
+ # TODO: It's probably more efficient to do this above rather than modifying all these lists
+ # Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
+ perplexities = ppl_logits_processor.perplexities_list[:-1]
+ top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
+ top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
+ top_probs_list = ppl_logits_processor.top_probs_list[:-1]
+ # Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
+ gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
+ gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
+ sel_probs = ppl_logits_processor.selected_probs[1:]
+
+ end_part = '' if params['probability_dropdown'] else '' # Helps with finding the index after replacing part of the text.
+
+ i = 0
+ for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
+ color = 'ffffff'
+ if params['color_by_probability'] and params['color_by_perplexity']:
+ color = probability_perplexity_color_scale(prob, ppl)
+ elif params['color_by_perplexity']:
+ color = perplexity_color_scale(ppl)
+ elif params['color_by_probability']:
+ color = probability_color_scale(prob)
+ if token in text[i:]:
+ if params['probability_dropdown']:
+ text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], ppl), 1)
+ else:
+ text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
+ i += text[i:].find(end_part) + len(end_part)
+
+ # Use full perplexity list for calculating the average here.
+ print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
+ # t1 = time.time()
+ # print(f"Modifier: {(t1-t0):.3f} s")
+ # About 50 ms
+ return text
+
+
+def probability_color_scale(prob):
+ '''
+ Green-yellow-red color scale
+ '''
+
+ rv = 0
+ gv = 0
+ if prob <= 0.5:
+ rv = 'ff'
+ gv = hex(int(255 * prob * 2))[2:]
+ if len(gv) < 2:
+ gv = '0' * (2 - len(gv)) + gv
+ else:
+ rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
+ gv = 'ff'
+ if len(rv) < 2:
+ rv = '0' * (2 - len(rv)) + rv
+
+ return rv + gv + '00'
+
+
+def perplexity_color_scale(ppl):
+ '''
+ Red component only, white for 0 perplexity (sorry if you're not in dark mode)
+ '''
+ value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
+ if len(value) < 2:
+ value = '0' * (2 - len(value)) + value
+
+ return 'ff' + value + value
+
+
+def probability_perplexity_color_scale(prob, ppl):
+ '''
+ Green-yellow-red for probability and blue component for perplexity
+ '''
+
+ rv = 0
+ gv = 0
+ bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
+ if len(bv) < 2:
+ bv = '0' * (2 - len(bv)) + bv
+
+ if prob <= 0.5:
+ rv = 'ff'
+ gv = hex(int(255 * prob * 2))[2:]
+ if len(gv) < 2:
+ gv = '0' * (2 - len(gv)) + gv
+ else:
+ rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
+ gv = 'ff'
+ if len(rv) < 2:
+ rv = '0' * (2 - len(rv)) + rv
+
+ return rv + gv + bv
+
+
+def add_color_html(token, color):
+ return f'{token}'
+
+
+# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
+# I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
+# Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
+# I wonder if we can also avoid using deepcopy here.
+def add_dropdown_html(token, color, top_tokens, top_probs, perplexity=0):
+ html = f'
{token}
'
+ for token_option, prob in zip(top_tokens, top_probs):
+ # TODO: Bold for selected token?
+ # Using divs prevented the problem of divs inside spans causing issues.
+ # Now the problem is that divs show the same whitespace of one space between every token.
+ # There is probably some way to fix this in CSS that I don't know about.
+ row_color = probability_color_scale(prob)
+ row_class = ' class="selected"' if token_option == token else ''
+ html += f'
{token_option}
{prob:.4f}
'
+ if perplexity != 0:
+ ppl_color = perplexity_color_scale(perplexity)
+ html += f'
Perplexity:
{perplexity:.4f}
'
+ html += '
'
+ return html # About 750 characters per token...
+
+
+def custom_css():
+ return """
+ .dropdown {
+ display: none;
+ position: absolute;
+ z-index: 50;
+ background-color: var(--block-background-fill);
+ box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
+ width: max-content;
+ overflow: visible;
+ padding: 5px;
+ border-radius: 10px;
+ border: 1px solid var(--border-color-primary);
+ }
+
+ .dropdown-content {
+ border: none;
+ z-index: 50;
+ }
+
+ .dropdown-content tr.selected {
+ background-color: var(--block-label-background-fill);
+ }
+
+ .dropdown-content td {
+ color: var(--body-text-color);
+ }
+
+ .hoverable {
+ color: var(--body-text-color);
+ position: relative;
+ display: inline-block;
+ overflow: visible;
+ font-size: 15px;
+ line-height: 1.75;
+ margin: 0;
+ padding: 0;
+ }
+
+ .hoverable:hover .dropdown {
+ display: block;
+ }
+
+ pre {
+ white-space: pre-wrap;
+ }
+
+ # TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
+ # However, it also makes the scrollbar disappear, which is bad.
+ # The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
+ #.chat {
+ # overflow-y: auto;
+ #}
+ """
+
+
+# Monkeypatch applied to html_generator.py
+# We simply don't render markdown into HTML. We wrap everything in
tags to preserve whitespace
+# formatting. If you're coloring tokens by perplexity or probability, or especially if you're using
+# the probability dropdown, you probably care more about seeing the tokens the model actually outputted
+# rather than rendering ```code blocks``` or *italics*.
+def convert_to_markdown(string):
+ return '
' + string + '
'
+
+
+html_generator.convert_to_markdown = convert_to_markdown
+
+
+def ui():
+ def update_active_check(x):
+ params.update({'active': x})
+
+ def update_color_by_ppl_check(x):
+ params.update({'color_by_perplexity': x})
+
+ def update_color_by_prob_check(x):
+ params.update({'color_by_probability': x})
+
+ def update_prob_dropdown_check(x):
+ params.update({'probability_dropdown': x})
+
+ active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
+ color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
+ color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
+ prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")
+
+ active_check.change(update_active_check, active_check, None)
+ color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
+ color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
+ prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None)
diff --git a/extensions/sd_api_pictures/README.MD b/extensions/sd_api_pictures/README.MD
new file mode 100644
index 0000000000000000000000000000000000000000..67c75e145ccc8301505d96d858da04713ad4337d
--- /dev/null
+++ b/extensions/sd_api_pictures/README.MD
@@ -0,0 +1,90 @@
+## Description:
+TL;DR: Lets the bot answer you with a picture!
+
+Stable Diffusion API pictures for TextGen, v.1.2.0
+An extension to [oobabooga's textgen-webui](https://github.com/oobabooga/text-generation-webui) allowing you to receive pics generated by [Automatic1111's SD-WebUI API](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
+
+
+Interface overview
+
+![Interface](https://raw.githubusercontent.com/Brawlence/SD_api_pics/main/illust/Interface.jpg)
+
+
+
+Load it in the `--chat` mode with `--extension sd_api_pictures` alongside `send_pictures`
+(it's not really required, but completes the picture, *pun intended*).
+
+
+## History
+
+Consider the version included with [oobabooga's repository](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures) to be STABLE, experimental developments and untested features are pushed in [Brawlence/SD_api_pics](https://github.com/Brawlence/SD_api_pics)
+
+Lastest change:
+1.1.0 → 1.1.1 Fixed not having Auto1111's metadata in received images
+
+## Details
+
+The image generation is triggered:
+- manually through the 'Force the picture response' button while in `Manual` or `Immersive/Interactive` modes OR
+- automatically in `Immersive/Interactive` mode if the words `'send|main|message|me'` are followed by `'image|pic|picture|photo|snap|snapshot|selfie|meme'` in the user's prompt
+- always on in `Picturebook/Adventure` mode (if not currently suppressed by 'Suppress the picture response')
+
+## Prerequisites
+
+One needs an available instance of Automatic1111's webui running with an `--api` flag. Ain't tested with a notebook / cloud hosted one but should be possible.
+To run it locally in parallel on the same machine, specify custom `--listen-port` for either Auto1111's or ooba's webUIs.
+
+## Features overview
+- Connection to API check (press enter in the address box)
+- [VRAM management (model shuffling)](https://github.com/Brawlence/SD_api_pics/wiki/VRAM-management-feature)
+- [Three different operation modes](https://github.com/Brawlence/SD_api_pics/wiki/Modes-of-operation) (manual, interactive, always-on)
+- User-defined persistent settings via settings.json
+
+### Connection check
+
+Insert the Automatic1111's WebUI address and press Enter:
+![API-check](https://raw.githubusercontent.com/Brawlence/SD_api_pics/main/illust/API-check.gif)
+Green mark confirms the ability to communicate with Auto1111's API on this address. Red cross means something's not right (the ext won't work).
+
+### Persistents settings
+
+Create or modify the `settings.json` in the `text-generation-webui` root directory to override the defaults
+present in script.py, ex:
+
+```json
+{
+ "sd_api_pictures-manage_VRAM": 1,
+ "sd_api_pictures-save_img": 1,
+ "sd_api_pictures-prompt_prefix": "(Masterpiece:1.1), detailed, intricate, colorful, (solo:1.1)",
+ "sd_api_pictures-sampler_name": "DPM++ 2M Karras"
+}
+```
+
+will automatically set the `Manage VRAM` & `Keep original images` checkboxes and change the texts in `Prompt Prefix` and `Sampler name` on load.
+
+---
+
+## Demonstrations:
+
+Those are examples of the version 1.0.0, but the core functionality is still the same
+
+
+Conversation 1
+
+![EXA1](https://user-images.githubusercontent.com/42910943/224866564-939a3bcb-e7cf-4ac0-a33f-b3047b55054d.jpg)
+![EXA2](https://user-images.githubusercontent.com/42910943/224866566-38394054-1320-45cf-9515-afa76d9d7745.jpg)
+![EXA3](https://user-images.githubusercontent.com/42910943/224866568-10ea47b7-0bac-4269-9ec9-22c387a13b59.jpg)
+![EXA4](https://user-images.githubusercontent.com/42910943/224866569-326121ad-1ea1-4874-9f6b-4bca7930a263.jpg)
+
+
+
+
+
+Conversation 2
+
+![Hist1](https://user-images.githubusercontent.com/42910943/224865517-c6966b58-bc4d-4353-aab9-6eb97778d7bf.jpg)
+![Hist2](https://user-images.githubusercontent.com/42910943/224865527-b2fe7c2e-0da5-4c2e-b705-42e233b07084.jpg)
+![Hist3](https://user-images.githubusercontent.com/42910943/224865535-a38d94e7-8975-4a46-a655-1ae1de41f85d.jpg)
+
+
+
diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33367d4a0b7e3486bbc280a2dd2e248517e89cc
--- /dev/null
+++ b/extensions/sd_api_pictures/script.py
@@ -0,0 +1,386 @@
+import base64
+import io
+import re
+import time
+from datetime import date
+from pathlib import Path
+
+import gradio as gr
+import requests
+import torch
+from PIL import Image
+
+from modules import shared
+from modules.models import reload_model, unload_model
+from modules.ui import create_refresh_button
+
+torch._C._jit_set_profiling_mode(False)
+
+# parameters which can be customized in settings.json of webui
+params = {
+ 'address': 'http://127.0.0.1:7860',
+ 'mode': 0, # modes of operation: 0 (Manual only), 1 (Immersive/Interactive - looks for words to trigger), 2 (Picturebook Adventure - Always on)
+ 'manage_VRAM': False,
+ 'save_img': False,
+ 'SD_model': 'NeverEndingDream', # not used right now
+ 'prompt_prefix': '(Masterpiece:1.1), detailed, intricate, colorful',
+ 'negative_prompt': '(worst quality, low quality:1.3)',
+ 'width': 512,
+ 'height': 512,
+ 'denoising_strength': 0.61,
+ 'restore_faces': False,
+ 'enable_hr': False,
+ 'hr_upscaler': 'ESRGAN_4x',
+ 'hr_scale': '1.0',
+ 'seed': -1,
+ 'sampler_name': 'DPM++ 2M Karras',
+ 'steps': 32,
+ 'cfg_scale': 7,
+ 'textgen_prefix': 'Please provide a detailed and vivid description of [subject]',
+ 'sd_checkpoint': ' ',
+ 'checkpoint_list': [" "]
+}
+
+
+def give_VRAM_priority(actor):
+ global shared, params
+
+ if actor == 'SD':
+ unload_model()
+ print("Requesting Auto1111 to re-load last checkpoint used...")
+ response = requests.post(url=f'{params["address"]}/sdapi/v1/reload-checkpoint', json='')
+ response.raise_for_status()
+
+ elif actor == 'LLM':
+ print("Requesting Auto1111 to vacate VRAM...")
+ response = requests.post(url=f'{params["address"]}/sdapi/v1/unload-checkpoint', json='')
+ response.raise_for_status()
+ reload_model()
+
+ elif actor == 'set':
+ print("VRAM mangement activated -- requesting Auto1111 to vacate VRAM...")
+ response = requests.post(url=f'{params["address"]}/sdapi/v1/unload-checkpoint', json='')
+ response.raise_for_status()
+
+ elif actor == 'reset':
+ print("VRAM mangement deactivated -- requesting Auto1111 to reload checkpoint")
+ response = requests.post(url=f'{params["address"]}/sdapi/v1/reload-checkpoint', json='')
+ response.raise_for_status()
+
+ else:
+ raise RuntimeError(f'Managing VRAM: "{actor}" is not a known state!')
+
+ response.raise_for_status()
+ del response
+
+
+if params['manage_VRAM']:
+ give_VRAM_priority('set')
+
+SD_models = ['NeverEndingDream'] # TODO: get with http://{address}}/sdapi/v1/sd-models and allow user to select
+
+picture_response = False # specifies if the next model response should appear as a picture
+
+
+def remove_surrounded_chars(string):
+ # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
+ # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
+ return re.sub('\*[^\*]*?(\*|$)', '', string)
+
+
+def triggers_are_in(string):
+ string = remove_surrounded_chars(string)
+ # regex searches for send|main|message|me (at the end of the word) followed by
+ # a whole word of image|pic|picture|photo|snap|snapshot|selfie|meme(s),
+ # (?aims) are regex parser flags
+ return bool(re.search('(?aims)(send|mail|message|me)\\b.+?\\b(image|pic(ture)?|photo|snap(shot)?|selfie|meme)s?\\b', string))
+
+
+def state_modifier(state):
+ if picture_response:
+ state['stream'] = False
+
+ return state
+
+
+def input_modifier(string):
+ """
+ This function is applied to your text inputs before
+ they are fed into the model.
+ """
+
+ global params
+
+ if not params['mode'] == 1: # if not in immersive/interactive mode, do nothing
+ return string
+
+ if triggers_are_in(string): # if we're in it, check for trigger words
+ toggle_generation(True)
+ string = string.lower()
+ if "of" in string:
+ subject = string.split('of', 1)[1] # subdivide the string once by the first 'of' instance and get what's coming after it
+ string = params['textgen_prefix'].replace("[subject]", subject)
+ else:
+ string = params['textgen_prefix'].replace("[subject]", "your appearance, your surroundings and what you are doing right now")
+
+ return string
+
+# Get and save the Stable Diffusion-generated picture
+def get_SD_pictures(description, character):
+
+ global params
+
+ if params['manage_VRAM']:
+ give_VRAM_priority('SD')
+
+ description = re.sub('', ' ', description)
+ description = f"({description}:1)"
+
+ payload = {
+ "prompt": params['prompt_prefix'] + description,
+ "seed": params['seed'],
+ "sampler_name": params['sampler_name'],
+ "enable_hr": params['enable_hr'],
+ "hr_scale": params['hr_scale'],
+ "hr_upscaler": params['hr_upscaler'],
+ "denoising_strength": params['denoising_strength'],
+ "steps": params['steps'],
+ "cfg_scale": params['cfg_scale'],
+ "width": params['width'],
+ "height": params['height'],
+ "restore_faces": params['restore_faces'],
+ "override_settings_restore_afterwards": True,
+ "negative_prompt": params['negative_prompt']
+ }
+
+ print(f'Prompting the image generator via the API on {params["address"]}...')
+ response = requests.post(url=f'{params["address"]}/sdapi/v1/txt2img', json=payload)
+ response.raise_for_status()
+ r = response.json()
+
+ visible_result = ""
+ for img_str in r['images']:
+ if params['save_img']:
+ img_data = base64.b64decode(img_str)
+
+ variadic = f'{date.today().strftime("%Y_%m_%d")}/{character}_{int(time.time())}'
+ output_file = Path(f'extensions/sd_api_pictures/outputs/{variadic}.png')
+ output_file.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(output_file.as_posix(), 'wb') as f:
+ f.write(img_data)
+
+ visible_result = visible_result + f'\n'
+ else:
+ image = Image.open(io.BytesIO(base64.b64decode(img_str.split(",", 1)[0])))
+ # lower the resolution of received images for the chat, otherwise the log size gets out of control quickly with all the base64 values in visible history
+ image.thumbnail((300, 300))
+ buffered = io.BytesIO()
+ image.save(buffered, format="JPEG")
+ buffered.seek(0)
+ image_bytes = buffered.getvalue()
+ img_str = "data:image/jpeg;base64," + base64.b64encode(image_bytes).decode()
+ visible_result = visible_result + f'\n'
+
+ if params['manage_VRAM']:
+ give_VRAM_priority('LLM')
+
+ return visible_result
+
+# TODO: how do I make the UI history ignore the resulting pictures (I don't want HTML to appear in history)
+# and replace it with 'text' for the purposes of logging?
+def output_modifier(string, state):
+ """
+ This function is applied to the model outputs.
+ """
+
+ global picture_response, params
+
+ if not picture_response:
+ return string
+
+ string = remove_surrounded_chars(string)
+ string = string.replace('"', '')
+ string = string.replace('“', '')
+ string = string.replace('\n', ' ')
+ string = string.strip()
+
+ if string == '':
+ string = 'no viable description in reply, try regenerating'
+ return string
+
+ text = ""
+ if (params['mode'] < 2):
+ toggle_generation(False)
+ text = f'*Sends a picture which portrays: “{string}”*'
+ else:
+ text = string
+
+ string = get_SD_pictures(string, state['character_menu']) + "\n" + text
+
+ return string
+
+
+def bot_prefix_modifier(string):
+ """
+ This function is only applied in chat mode. It modifies
+ the prefix text for the Bot and can be used to bias its
+ behavior.
+ """
+
+ return string
+
+
+def toggle_generation(*args):
+ global picture_response, shared
+
+ if not args:
+ picture_response = not picture_response
+ else:
+ picture_response = args[0]
+
+ shared.processing_message = "*Is sending a picture...*" if picture_response else "*Is typing...*"
+
+
+def filter_address(address):
+ address = address.strip()
+ # address = re.sub('http(s)?:\/\/|\/$','',address) # remove starting http:// OR https:// OR trailing slash
+ address = re.sub('\/$', '', address) # remove trailing /s
+ if not address.startswith('http'):
+ address = 'http://' + address
+ return address
+
+
+def SD_api_address_update(address):
+ global params
+
+ msg = "✔️ SD API is found on:"
+ address = filter_address(address)
+ params.update({"address": address})
+ try:
+ response = requests.get(url=f'{params["address"]}/sdapi/v1/sd-models')
+ response.raise_for_status()
+ # r = response.json()
+ except:
+ msg = "❌ No SD API endpoint on:"
+
+ return gr.Textbox.update(label=msg)
+
+
+def custom_css():
+ path_to_css = Path(__file__).parent.resolve() / 'style.css'
+ return open(path_to_css, 'r').read()
+
+
+def get_checkpoints():
+ global params
+
+ try:
+ models = requests.get(url=f'{params["address"]}/sdapi/v1/sd-models')
+ options = requests.get(url=f'{params["address"]}/sdapi/v1/options')
+ options_json = options.json()
+ params['sd_checkpoint'] = options_json['sd_model_checkpoint']
+ params['checkpoint_list'] = [result["title"] for result in models.json()]
+ except:
+ params['sd_checkpoint'] = ""
+ params['checkpoint_list'] = []
+
+ return gr.update(choices=params['checkpoint_list'], value=params['sd_checkpoint'])
+
+
+def load_checkpoint(checkpoint):
+ payload = {
+ "sd_model_checkpoint": checkpoint
+ }
+
+ try:
+ requests.post(url=f'{params["address"]}/sdapi/v1/options', json=payload)
+ except:
+ pass
+
+
+def get_samplers():
+ try:
+ response = requests.get(url=f'{params["address"]}/sdapi/v1/samplers')
+ response.raise_for_status()
+ samplers = [x["name"] for x in response.json()]
+ except:
+ samplers = []
+
+ return samplers
+
+
+def ui():
+
+ # Gradio elements
+ # gr.Markdown('### Stable Diffusion API Pictures') # Currently the name of extension is shown as the title
+ with gr.Accordion("Parameters", open=True, elem_classes="SDAP"):
+ with gr.Row():
+ address = gr.Textbox(placeholder=params['address'], value=params['address'], label='Auto1111\'s WebUI address')
+ modes_list = ["Manual", "Immersive/Interactive", "Picturebook/Adventure"]
+ mode = gr.Dropdown(modes_list, value=modes_list[params['mode']], label="Mode of operation", type="index")
+ with gr.Column(scale=1, min_width=300):
+ manage_VRAM = gr.Checkbox(value=params['manage_VRAM'], label='Manage VRAM')
+ save_img = gr.Checkbox(value=params['save_img'], label='Keep original images and use them in chat')
+
+ force_pic = gr.Button("Force the picture response")
+ suppr_pic = gr.Button("Suppress the picture response")
+ with gr.Row():
+ checkpoint = gr.Dropdown(params['checkpoint_list'], value=params['sd_checkpoint'], label="Checkpoint", type="value")
+ update_checkpoints = gr.Button("Get list of checkpoints")
+
+ with gr.Accordion("Generation parameters", open=False):
+ prompt_prefix = gr.Textbox(placeholder=params['prompt_prefix'], value=params['prompt_prefix'], label='Prompt Prefix (best used to describe the look of the character)')
+ textgen_prefix = gr.Textbox(placeholder=params['textgen_prefix'], value=params['textgen_prefix'], label='textgen prefix (type [subject] where the subject should be placed)')
+ negative_prompt = gr.Textbox(placeholder=params['negative_prompt'], value=params['negative_prompt'], label='Negative Prompt')
+ with gr.Row():
+ with gr.Column():
+ width = gr.Slider(64, 2048, value=params['width'], step=64, label='Width')
+ height = gr.Slider(64, 2048, value=params['height'], step=64, label='Height')
+ with gr.Column(variant="compact", elem_id="sampler_col"):
+ with gr.Row(elem_id="sampler_row"):
+ sampler_name = gr.Dropdown(value=params['sampler_name'], label='Sampling method', elem_id="sampler_box")
+ create_refresh_button(sampler_name, lambda: None, lambda: {'choices': get_samplers()}, 'refresh-button')
+ steps = gr.Slider(1, 150, value=params['steps'], step=1, label="Sampling steps", elem_id="steps_box")
+ with gr.Row():
+ seed = gr.Number(label="Seed", value=params['seed'], elem_id="seed_box")
+ cfg_scale = gr.Number(label="CFG Scale", value=params['cfg_scale'], elem_id="cfg_box")
+ with gr.Column() as hr_options:
+ restore_faces = gr.Checkbox(value=params['restore_faces'], label='Restore faces')
+ enable_hr = gr.Checkbox(value=params['enable_hr'], label='Hires. fix')
+ with gr.Row(visible=params['enable_hr'], elem_classes="hires_opts") as hr_options:
+ hr_scale = gr.Slider(1, 4, value=params['hr_scale'], step=0.1, label='Upscale by')
+ denoising_strength = gr.Slider(0, 1, value=params['denoising_strength'], step=0.01, label='Denoising strength')
+ hr_upscaler = gr.Textbox(placeholder=params['hr_upscaler'], value=params['hr_upscaler'], label='Upscaler')
+
+ # Event functions to update the parameters in the backend
+ address.change(lambda x: params.update({"address": filter_address(x)}), address, None)
+ mode.select(lambda x: params.update({"mode": x}), mode, None)
+ mode.select(lambda x: toggle_generation(x > 1), inputs=mode, outputs=None)
+ manage_VRAM.change(lambda x: params.update({"manage_VRAM": x}), manage_VRAM, None)
+ manage_VRAM.change(lambda x: give_VRAM_priority('set' if x else 'reset'), inputs=manage_VRAM, outputs=None)
+ save_img.change(lambda x: params.update({"save_img": x}), save_img, None)
+
+ address.submit(fn=SD_api_address_update, inputs=address, outputs=address)
+ prompt_prefix.change(lambda x: params.update({"prompt_prefix": x}), prompt_prefix, None)
+ textgen_prefix.change(lambda x: params.update({"textgen_prefix": x}), textgen_prefix, None)
+ negative_prompt.change(lambda x: params.update({"negative_prompt": x}), negative_prompt, None)
+ width.change(lambda x: params.update({"width": x}), width, None)
+ height.change(lambda x: params.update({"height": x}), height, None)
+ hr_scale.change(lambda x: params.update({"hr_scale": x}), hr_scale, None)
+ denoising_strength.change(lambda x: params.update({"denoising_strength": x}), denoising_strength, None)
+ restore_faces.change(lambda x: params.update({"restore_faces": x}), restore_faces, None)
+ hr_upscaler.change(lambda x: params.update({"hr_upscaler": x}), hr_upscaler, None)
+ enable_hr.change(lambda x: params.update({"enable_hr": x}), enable_hr, None)
+ enable_hr.change(lambda x: hr_options.update(visible=params["enable_hr"]), enable_hr, hr_options)
+ update_checkpoints.click(get_checkpoints, None, checkpoint)
+ checkpoint.change(lambda x: params.update({"sd_checkpoint": x}), checkpoint, None)
+ checkpoint.change(load_checkpoint, checkpoint, None)
+
+ sampler_name.change(lambda x: params.update({"sampler_name": x}), sampler_name, None)
+ steps.change(lambda x: params.update({"steps": x}), steps, None)
+ seed.change(lambda x: params.update({"seed": x}), seed, None)
+ cfg_scale.change(lambda x: params.update({"cfg_scale": x}), cfg_scale, None)
+
+ force_pic.click(lambda x: toggle_generation(True), inputs=force_pic, outputs=None)
+ suppr_pic.click(lambda x: toggle_generation(False), inputs=suppr_pic, outputs=None)
diff --git a/extensions/sd_api_pictures/style.css b/extensions/sd_api_pictures/style.css
new file mode 100644
index 0000000000000000000000000000000000000000..6f4994616a1d4ca52f3a8245f963ce0b7ebbb0d7
--- /dev/null
+++ b/extensions/sd_api_pictures/style.css
@@ -0,0 +1,52 @@
+/* Align the elements for SD_api_picture extension */
+.SDAP #sampler_box {
+ padding-top: var(--spacing-sm);
+ padding-bottom: var(--spacing-sm);
+ border: 0;
+}
+
+.SDAP #steps_box {
+ border-radius: 0 0 var(--block-radius) var(--block-radius);
+}
+
+.SDAP #sampler_col {
+ gap: 0;
+ padding: 0;
+ background-color: transparent;
+}
+
+.SDAP #sampler_row {
+ border-bottom: 0;
+ box-shadow: var(--block-shadow);
+ border-width: var(--block-border-width);
+ border-color: var(--block-border-color);
+ border-radius: var(--block-radius) var(--block-radius) 0 0;
+ background: var(--block-background-fill);
+ gap: 0;
+}
+
+.SDAP #sampler_row .refresh-button {
+ margin-bottom: var(--spacing-sm);
+ margin-right: var(--spacing-lg);
+}
+
+.SDAP #seed_box,
+.SDAP #cfg_box {
+ padding-top: var(--spacing-md);
+}
+
+.SDAP #sampler_box span,
+.SDAP #seed_box span,
+.SDAP #cfg_box span,
+.SDAP #steps_box span {
+ margin-bottom: var(--spacing-sm);
+}
+
+.SDAP svg.dropdown-arrow {
+ flex-shrink: 0 !important;
+ margin: 0px !important;
+}
+
+.SDAP .hires_opts input[type="number"] {
+ width: 6em !important;
+}
diff --git a/extensions/send_pictures/script.py b/extensions/send_pictures/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8e6c969271e1dc3d383cefce74774ad505882cd
--- /dev/null
+++ b/extensions/send_pictures/script.py
@@ -0,0 +1,58 @@
+import base64
+from io import BytesIO
+
+import gradio as gr
+import torch
+from transformers import BlipForConditionalGeneration, BlipProcessor
+
+from modules import chat, shared, ui_chat
+from modules.ui import gather_interface_values
+from modules.utils import gradio
+
+input_hijack = {
+ 'state': False,
+ 'value': ["", ""]
+}
+
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float32).to("cpu")
+
+
+def chat_input_modifier(text, visible_text, state):
+ global input_hijack
+ if input_hijack['state']:
+ input_hijack['state'] = False
+ return input_hijack['value']
+ else:
+ return text, visible_text
+
+
+def caption_image(raw_image):
+ inputs = processor(raw_image.convert('RGB'), return_tensors="pt").to("cpu", torch.float32)
+ out = model.generate(**inputs, max_new_tokens=100)
+ return processor.decode(out[0], skip_special_tokens=True)
+
+
+def generate_chat_picture(picture, name1, name2):
+ text = f'*{name1} sends {name2} a picture that contains the following: “{caption_image(picture)}”*'
+ # lower the resolution of sent images for the chat, otherwise the log size gets out of control quickly with all the base64 values in visible history
+ picture.thumbnail((300, 300))
+ buffer = BytesIO()
+ picture.save(buffer, format="JPEG")
+ img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
+ visible_text = f''
+ return text, visible_text
+
+
+def ui():
+ picture_select = gr.Image(label='Send a picture', type='pil')
+
+ # Prepare the input hijack, update the interface values, call the generation function, and clear the picture
+ picture_select.upload(
+ lambda picture, name1, name2: input_hijack.update({
+ "state": True,
+ "value": generate_chat_picture(picture, name1, name2)
+ }), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None).then(
+ gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.generate_chat_reply_wrapper, gradio(ui_chat.inputs), gradio('display', 'history'), show_progress=False).then(
+ lambda: None, None, picture_select, show_progress=False)
diff --git a/extensions/silero_tts/harvard_sentences.txt b/extensions/silero_tts/harvard_sentences.txt
new file mode 100644
index 0000000000000000000000000000000000000000..958d7f3cb28d8e2ea215ee416068b2f672d9e9d1
--- /dev/null
+++ b/extensions/silero_tts/harvard_sentences.txt
@@ -0,0 +1,720 @@
+The birch canoe slid on the smooth planks.
+Glue the sheet to the dark blue background.
+It's easy to tell the depth of a well.
+These days a chicken leg is a rare dish.
+Rice is often served in round bowls.
+The juice of lemons makes fine punch.
+The box was thrown beside the parked truck.
+The hogs were fed chopped corn and garbage.
+Four hours of steady work faced us.
+A large size in stockings is hard to sell.
+The boy was there when the sun rose.
+A rod is used to catch pink salmon.
+The source of the huge river is the clear spring.
+Kick the ball straight and follow through.
+Help the woman get back to her feet.
+A pot of tea helps to pass the evening.
+Smoky fires lack flame and heat.
+The soft cushion broke the man's fall.
+The salt breeze came across from the sea.
+The girl at the booth sold fifty bonds.
+The small pup gnawed a hole in the sock.
+The fish twisted and turned on the bent hook.
+Press the pants and sew a button on the vest.
+The swan dive was far short of perfect.
+The beauty of the view stunned the young boy.
+Two blue fish swam in the tank.
+Her purse was full of useless trash.
+The colt reared and threw the tall rider.
+It snowed, rained, and hailed the same morning.
+Read verse out loud for pleasure.
+Hoist the load to your left shoulder.
+Take the winding path to reach the lake.
+Note closely the size of the gas tank.
+Wipe the grease off his dirty face.
+Mend the coat before you go out.
+The wrist was badly strained and hung limp.
+The stray cat gave birth to kittens.
+The young girl gave no clear response.
+The meal was cooked before the bell rang.
+What joy there is in living.
+A king ruled the state in the early days.
+The ship was torn apart on the sharp reef.
+Sickness kept him home the third week.
+The wide road shimmered in the hot sun.
+The lazy cow lay in the cool grass.
+Lift the square stone over the fence.
+The rope will bind the seven books at once.
+Hop over the fence and plunge in.
+The friendly gang left the drug store.
+Mesh wire keeps chicks inside.
+The frosty air passed through the coat.
+The crooked maze failed to fool the mouse.
+Adding fast leads to wrong sums.
+The show was a flop from the very start.
+A saw is a tool used for making boards.
+The wagon moved on well oiled wheels.
+March the soldiers past the next hill.
+A cup of sugar makes sweet fudge.
+Place a rosebush near the porch steps.
+Both lost their lives in the raging storm.
+We talked of the side show in the circus.
+Use a pencil to write the first draft.
+He ran half way to the hardware store.
+The clock struck to mark the third period.
+A small creek cut across the field.
+Cars and busses stalled in snow drifts.
+The set of china hit the floor with a crash.
+This is a grand season for hikes on the road.
+The dune rose from the edge of the water.
+Those words were the cue for the actor to leave.
+A yacht slid around the point into the bay.
+The two met while playing on the sand.
+The ink stain dried on the finished page.
+The walled town was seized without a fight.
+The lease ran out in sixteen weeks.
+A tame squirrel makes a nice pet.
+The horn of the car woke the sleeping cop.
+The heart beat strongly and with firm strokes.
+The pearl was worn in a thin silver ring.
+The fruit peel was cut in thick slices.
+The Navy attacked the big task force.
+See the cat glaring at the scared mouse.
+There are more than two factors here.
+The hat brim was wide and too droopy.
+The lawyer tried to lose his case.
+The grass curled around the fence post.
+Cut the pie into large parts.
+Men strive but seldom get rich.
+Always close the barn door tight.
+He lay prone and hardly moved a limb.
+The slush lay deep along the street.
+A wisp of cloud hung in the blue air.
+A pound of sugar costs more than eggs.
+The fin was sharp and cut the clear water.
+The play seems dull and quite stupid.
+Bail the boat to stop it from sinking.
+The term ended in late June that year.
+A tusk is used to make costly gifts.
+Ten pins were set in order.
+The bill was paid every third week.
+Oak is strong and also gives shade.
+Cats and dogs each hate the other.
+The pipe began to rust while new.
+Open the crate but don't break the glass.
+Add the sum to the product of these three.
+Thieves who rob friends deserve jail.
+The ripe taste of cheese improves with age.
+Act on these orders with great speed.
+The hog crawled under the high fence.
+Move the vat over the hot fire.
+The bark of the pine tree was shiny and dark.
+Leaves turn brown and yellow in the fall.
+The pennant waved when the wind blew.
+Split the log with a quick, sharp blow.
+Burn peat after the logs give out.
+He ordered peach pie with ice cream.
+Weave the carpet on the right hand side.
+Hemp is a weed found in parts of the tropics.
+A lame back kept his score low.
+We find joy in the simplest things.
+Type out three lists of orders.
+The harder he tried the less he got done.
+The boss ran the show with a watchful eye.
+The cup cracked and spilled its contents.
+Paste can cleanse the most dirty brass.
+The slang word for raw whiskey is booze.
+It caught its hind paw in a rusty trap.
+The wharf could be seen at the farther shore.
+Feel the heat of the weak dying flame.
+The tiny girl took off her hat.
+A cramp is no small danger on a swim.
+He said the same phrase thirty times.
+Pluck the bright rose without leaves.
+Two plus seven is less than ten.
+The glow deepened in the eyes of the sweet girl.
+Bring your problems to the wise chief.
+Write a fond note to the friend you cherish.
+Clothes and lodging are free to new men.
+We frown when events take a bad turn.
+Port is a strong wine with a smoky taste.
+The young kid jumped the rusty gate.
+Guess the results from the first scores.
+A salt pickle tastes fine with ham.
+The just claim got the right verdict.
+These thistles bend in a high wind.
+Pure bred poodles have curls.
+The tree top waved in a graceful way.
+The spot on the blotter was made by green ink.
+Mud was spattered on the front of his white shirt.
+The cigar burned a hole in the desk top.
+The empty flask stood on the tin tray.
+A speedy man can beat this track mark.
+He broke a new shoelace that day.
+The coffee stand is too high for the couch.
+The urge to write short stories is rare.
+The pencils have all been used.
+The pirates seized the crew of the lost ship.
+We tried to replace the coin but failed.
+She sewed the torn coat quite neatly.
+The sofa cushion is red and of light weight.
+The jacket hung on the back of the wide chair.
+At that high level the air is pure.
+Drop the two when you add the figures.
+A filing case is now hard to buy.
+An abrupt start does not win the prize.
+Wood is best for making toys and blocks.
+The office paint was a dull, sad tan.
+He knew the skill of the great young actress.
+A rag will soak up spilled water.
+A shower of dirt fell from the hot pipes.
+Steam hissed from the broken valve.
+The child almost hurt the small dog.
+There was a sound of dry leaves outside.
+The sky that morning was clear and bright blue.
+Torn scraps littered the stone floor.
+Sunday is the best part of the week.
+The doctor cured him with these pills.
+The new girl was fired today at noon.
+They felt gay when the ship arrived in port.
+Add the store's account to the last cent.
+Acid burns holes in wool cloth.
+Fairy tales should be fun to write.
+Eight miles of woodland burned to waste.
+The third act was dull and tired the players.
+A young child should not suffer fright.
+Add the column and put the sum here.
+We admire and love a good cook.
+There the flood mark is ten inches.
+He carved a head from the round block of marble.
+She has a smart way of wearing clothes.
+The fruit of a fig tree is apple-shaped.
+Corn cobs can be used to kindle a fire.
+Where were they when the noise started.
+The paper box is full of thumb tacks.
+Sell your gift to a buyer at a good gain.
+The tongs lay beside the ice pail.
+The petals fall with the next puff of wind.
+Bring your best compass to the third class.
+They could laugh although they were sad.
+Farmers came in to thresh the oat crop.
+The brown house was on fire to the attic.
+The lure is used to catch trout and flounder.
+Float the soap on top of the bath water.
+A blue crane is a tall wading bird.
+A fresh start will work such wonders.
+The club rented the rink for the fifth night.
+After the dance, they went straight home.
+The hostess taught the new maid to serve.
+He wrote his last novel there at the inn.
+Even the worst will beat his low score.
+The cement had dried when he moved it.
+The loss of the second ship was hard to take.
+The fly made its way along the wall.
+Do that with a wooden stick.
+Live wires should be kept covered.
+The large house had hot water taps.
+It is hard to erase blue or red ink.
+Write at once or you may forget it.
+The doorknob was made of bright clean brass.
+The wreck occurred by the bank on Main Street.
+A pencil with black lead writes best.
+Coax a young calf to drink from a bucket.
+Schools for ladies teach charm and grace.
+The lamp shone with a steady green flame.
+They took the axe and the saw to the forest.
+The ancient coin was quite dull and worn.
+The shaky barn fell with a loud crash.
+Jazz and swing fans like fast music.
+Rake the rubbish up and then burn it.
+Slash the gold cloth into fine ribbons.
+Try to have the court decide the case.
+They are pushed back each time they attack.
+He broke his ties with groups of former friends.
+They floated on the raft to sun their white backs.
+The map had an X that meant nothing.
+Whitings are small fish caught in nets.
+Some ads serve to cheat buyers.
+Jerk the rope and the bell rings weakly.
+A waxed floor makes us lose balance.
+Madam, this is the best brand of corn.
+On the islands the sea breeze is soft and mild.
+The play began as soon as we sat down.
+This will lead the world to more sound and fury.
+Add salt before you fry the egg.
+The rush for funds reached its peak Tuesday.
+The birch looked stark white and lonesome.
+The box is held by a bright red snapper.
+To make pure ice, you freeze water.
+The first worm gets snapped early.
+Jump the fence and hurry up the bank.
+Yell and clap as the curtain slides back.
+They are men who walk the middle of the road.
+Both brothers wear the same size.
+In some form or other we need fun.
+The prince ordered his head chopped off.
+The houses are built of red clay bricks.
+Ducks fly north but lack a compass.
+Fruit flavors are used in fizz drinks.
+These pills do less good than others.
+Canned pears lack full flavor.
+The dark pot hung in the front closet.
+Carry the pail to the wall and spill it there.
+The train brought our hero to the big town.
+We are sure that one war is enough.
+Gray paint stretched for miles around.
+The rude laugh filled the empty room.
+High seats are best for football fans.
+Tea served from the brown jug is tasty.
+A dash of pepper spoils beef stew.
+A zestful food is the hot-cross bun.
+The horse trotted around the field at a brisk pace.
+Find the twin who stole the pearl necklace.
+Cut the cord that binds the box tightly.
+The red tape bound the smuggled food.
+Look in the corner to find the tan shirt.
+The cold drizzle will halt the bond drive.
+Nine men were hired to dig the ruins.
+The junk yard had a mouldy smell.
+The flint sputtered and lit a pine torch.
+Soak the cloth and drown the sharp odor.
+The shelves were bare of both jam or crackers.
+A joy to every child is the swan boat.
+All sat frozen and watched the screen.
+A cloud of dust stung his tender eyes.
+To reach the end he needs much courage.
+Shape the clay gently into block form.
+A ridge on a smooth surface is a bump or flaw.
+Hedge apples may stain your hands green.
+Quench your thirst, then eat the crackers.
+Tight curls get limp on rainy days.
+The mute muffled the high tones of the horn.
+The gold ring fits only a pierced ear.
+The old pan was covered with hard fudge.
+Watch the log float in the wide river.
+The node on the stalk of wheat grew daily.
+The heap of fallen leaves was set on fire.
+Write fast if you want to finish early.
+His shirt was clean but one button was gone.
+The barrel of beer was a brew of malt and hops.
+Tin cans are absent from store shelves.
+Slide the box into that empty space.
+The plant grew large and green in the window.
+The beam dropped down on the workmen's head.
+Pink clouds floated with the breeze.
+She danced like a swan, tall and graceful.
+The tube was blown and the tire flat and useless.
+It is late morning on the old wall clock.
+Let's all join as we sing the last chorus.
+The last switch cannot be turned off.
+The fight will end in just six minutes.
+The store walls were lined with colored frocks.
+The peace league met to discuss their plans.
+The rise to fame of a person takes luck.
+Paper is scarce, so write with much care.
+The quick fox jumped on the sleeping cat.
+The nozzle of the fire hose was bright brass.
+Screw the round cap on as tight as needed.
+Time brings us many changes.
+The purple tie was ten years old.
+Men think and plan and sometimes act.
+Fill the ink jar with sticky glue.
+He smoke a big pipe with strong contents.
+We need grain to keep our mules healthy.
+Pack the records in a neat thin case.
+The crunch of feet in the snow was the only sound.
+The copper bowl shone in the sun's rays.
+Boards will warp unless kept dry.
+The plush chair leaned against the wall.
+Glass will clink when struck by metal.
+Bathe and relax in the cool green grass.
+Nine rows of soldiers stood in line.
+The beach is dry and shallow at low tide.
+The idea is to sew both edges straight.
+The kitten chased the dog down the street.
+Pages bound in cloth make a book.
+Try to trace the fine lines of the painting.
+Women form less than half of the group.
+The zones merge in the central part of town.
+A gem in the rough needs work to polish.
+Code is used when secrets are sent.
+Most of the news is easy for us to hear.
+He used the lathe to make brass objects.
+The vane on top of the pole revolved in the wind.
+Mince pie is a dish served to children.
+The clan gathered on each dull night.
+Let it burn, it gives us warmth and comfort.
+A castle built from sand fails to endure.
+A child's wit saved the day for us.
+Tack the strip of carpet to the worn floor.
+Next Tuesday we must vote.
+Pour the stew from the pot into the plate.
+Each penny shone like new.
+The man went to the woods to gather sticks.
+The dirt piles were lines along the road.
+The logs fell and tumbled into the clear stream.
+Just hoist it up and take it away.
+A ripe plum is fit for a king's palate.
+Our plans right now are hazy.
+Brass rings are sold by these natives.
+It takes a good trap to capture a bear.
+Feed the white mouse some flower seeds.
+The thaw came early and freed the stream.
+He took the lead and kept it the whole distance.
+The key you designed will fit the lock.
+Plead to the council to free the poor thief.
+Better hash is made of rare beef.
+This plank was made for walking on.
+The lake sparkled in the red hot sun.
+He crawled with care along the ledge.
+Tend the sheep while the dog wanders.
+It takes a lot of help to finish these.
+Mark the spot with a sign painted red.
+Take two shares as a fair profit.
+The fur of cats goes by many names.
+North winds bring colds and fevers.
+He asks no person to vouch for him.
+Go now and come here later.
+A sash of gold silk will trim her dress.
+Soap can wash most dirt away.
+That move means the game is over.
+He wrote down a long list of items.
+A siege will crack the strong defense.
+Grape juice and water mix well.
+Roads are paved with sticky tar.
+Fake stones shine but cost little.
+The drip of the rain made a pleasant sound.
+Smoke poured out of every crack.
+Serve the hot rum to the tired heroes.
+Much of the story makes good sense.
+The sun came up to light the eastern sky.
+Heave the line over the port side.
+A lathe cuts and trims any wood.
+It's a dense crowd in two distinct ways.
+His hip struck the knee of the next player.
+The stale smell of old beer lingers.
+The desk was firm on the shaky floor.
+It takes heat to bring out the odor.
+Beef is scarcer than some lamb.
+Raise the sail and steer the ship northward.
+A cone costs five cents on Mondays.
+A pod is what peas always grow in.
+Jerk the dart from the cork target.
+No cement will hold hard wood.
+We now have a new base for shipping.
+A list of names is carved around the base.
+The sheep were led home by a dog.
+Three for a dime, the young peddler cried.
+The sense of smell is better than that of touch.
+No hardship seemed to keep him sad.
+Grace makes up for lack of beauty.
+Nudge gently but wake her now.
+The news struck doubt into restless minds.
+Once we stood beside the shore.
+A chink in the wall allowed a draft to blow.
+Fasten two pins on each side.
+A cold dip restores health and zest.
+He takes the oath of office each March.
+The sand drifts over the sill of the old house.
+The point of the steel pen was bent and twisted.
+There is a lag between thought and act.
+Seed is needed to plant the spring corn.
+Draw the chart with heavy black lines.
+The boy owed his pal thirty cents.
+The chap slipped into the crowd and was lost.
+Hats are worn to tea and not to dinner.
+The ramp led up to the wide highway.
+Beat the dust from the rug onto the lawn.
+Say it slowly but make it ring clear.
+The straw nest housed five robins.
+Screen the porch with woven straw mats.
+This horse will nose his way to the finish.
+The dry wax protects the deep scratch.
+He picked up the dice for a second roll.
+These coins will be needed to pay his debt.
+The nag pulled the frail cart along.
+Twist the valve and release hot steam.
+The vamp of the shoe had a gold buckle.
+The smell of burned rags itches my nose.
+New pants lack cuffs and pockets.
+The marsh will freeze when cold enough.
+They slice the sausage thin with a knife.
+The bloom of the rose lasts a few days.
+A gray mare walked before the colt.
+Breakfast buns are fine with a hot drink.
+Bottles hold four kinds of rum.
+The man wore a feather in his felt hat.
+He wheeled the bike past the winding road.
+Drop the ashes on the worn old rug.
+The desk and both chairs were painted tan.
+Throw out the used paper cup and plate.
+A clean neck means a neat collar.
+The couch cover and hall drapes were blue.
+The stems of the tall glasses cracked and broke.
+The wall phone rang loud and often.
+The clothes dried on a thin wooden rack.
+Turn on the lantern which gives us light.
+The cleat sank deeply into the soft turf.
+The bills were mailed promptly on the tenth of the month.
+To have is better than to wait and hope.
+The price is fair for a good antique clock.
+The music played on while they talked.
+Dispense with a vest on a day like this.
+The bunch of grapes was pressed into wine.
+He sent the figs, but kept the ripe cherries.
+The hinge on the door creaked with old age.
+The screen before the fire kept in the sparks.
+Fly by night, and you waste little time.
+Thick glasses helped him read the print.
+Birth and death mark the limits of life.
+The chair looked strong but had no bottom.
+The kite flew wildly in the high wind.
+A fur muff is stylish once more.
+The tin box held priceless stones.
+We need an end of all such matter.
+The case was puzzling to the old and wise.
+The bright lanterns were gay on the dark lawn.
+We don't get much money but we have fun.
+The youth drove with zest, but little skill.
+Five years he lived with a shaggy dog.
+A fence cuts through the corner lot.
+The way to save money is not to spend much.
+Shut the hatch before the waves push it in.
+The odor of spring makes young hearts jump.
+Crack the walnut with your sharp side teeth.
+He offered proof in the form of a large chart.
+Send the stuff in a thick paper bag.
+A quart of milk is water for the most part.
+They told wild tales to frighten him.
+The three story house was built of stone.
+In the rear of the ground floor was a large passage.
+A man in a blue sweater sat at the desk.
+Oats are a food eaten by horse and man.
+Their eyelids droop for want of sleep.
+A sip of tea revives his tired friend.
+There are many ways to do these things.
+Tuck the sheet under the edge of the mat.
+A force equal to that would move the earth.
+We like to see clear weather.
+The work of the tailor is seen on each side.
+Take a chance and win a china doll.
+Shake the dust from your shoes, stranger.
+She was kind to sick old people.
+The square wooden crate was packed to be shipped.
+The dusty bench stood by the stone wall.
+We dress to suit the weather of most days.
+Smile when you say nasty words.
+A bowl of rice is free with chicken stew.
+The water in this well is a source of good health.
+Take shelter in this tent, but keep still.
+That guy is the writer of a few banned books.
+The little tales they tell are false.
+The door was barred, locked, and bolted as well.
+Ripe pears are fit for a queen's table.
+A big wet stain was on the round carpet.
+The kite dipped and swayed, but stayed aloft.
+The pleasant hours fly by much too soon.
+The room was crowded with a wild mob.
+This strong arm shall shield your honor.
+She blushed when he gave her a white orchid.
+The beetle droned in the hot June sun.
+Press the pedal with your left foot.
+Neat plans fail without luck.
+The black trunk fell from the landing.
+The bank pressed for payment of the debt.
+The theft of the pearl pin was kept secret.
+Shake hands with this friendly child.
+The vast space stretched into the far distance.
+A rich farm is rare in this sandy waste.
+His wide grin earned many friends.
+Flax makes a fine brand of paper.
+Hurdle the pit with the aid of a long pole.
+A strong bid may scare your partner stiff.
+Even a just cause needs power to win.
+Peep under the tent and see the clowns.
+The leaf drifts along with a slow spin.
+Cheap clothes are flashy but don't last.
+A thing of small note can cause despair.
+Flood the mails with requests for this book.
+A thick coat of black paint covered all.
+The pencil was cut to be sharp at both ends.
+Those last words were a strong statement.
+He wrote his name boldly at the top of the sheet.
+Dill pickles are sour but taste fine.
+Down that road is the way to the grain farmer.
+Either mud or dust are found at all times.
+The best method is to fix it in place with clips.
+If you mumble your speech will be lost.
+At night the alarm roused him from a deep sleep.
+Read just what the meter says.
+Fill your pack with bright trinkets for the poor.
+The small red neon lamp went out.
+Clams are small, round, soft, and tasty.
+The fan whirled its round blades softly.
+The line where the edges join was clean.
+Breathe deep and smell the piny air.
+It matters not if he reads these words or those.
+A brown leather bag hung from its strap.
+A toad and a frog are hard to tell apart.
+A white silk jacket goes with any shoes.
+A break in the dam almost caused a flood.
+Paint the sockets in the wall dull green.
+The child crawled into the dense grass.
+Bribes fail where honest men work.
+Trample the spark, else the flames will spread.
+The hilt of the sword was carved with fine designs.
+A round hole was drilled through the thin board.
+Footprints showed the path he took up the beach.
+She was waiting at my front lawn.
+A vent near the edge brought in fresh air.
+Prod the old mule with a crooked stick.
+It is a band of steel three inches wide.
+The pipe ran almost the length of the ditch.
+It was hidden from sight by a mass of leaves and shrubs.
+The weight of the package was seen on the high scale.
+Wake and rise, and step into the green outdoors.
+The green light in the brown box flickered.
+The brass tube circled the high wall.
+The lobes of her ears were pierced to hold rings.
+Hold the hammer near the end to drive the nail.
+Next Sunday is the twelfth of the month.
+Every word and phrase he speaks is true.
+He put his last cartridge into the gun and fired.
+They took their kids from the public school.
+Drive the screw straight into the wood.
+Keep the hatch tight and the watch constant.
+Sever the twine with a quick snip of the knife.
+Paper will dry out when wet.
+Slide the catch back and open the desk.
+Help the weak to preserve their strength.
+A sullen smile gets few friends.
+Stop whistling and watch the boys march.
+Jerk the cord, and out tumbles the gold.
+Slide the tray across the glass top.
+The cloud moved in a stately way and was gone.
+Light maple makes for a swell room.
+Set the piece here and say nothing.
+Dull stories make her laugh.
+A stiff cord will do to fasten your shoe.
+Get the trust fund to the bank early.
+Choose between the high road and the low.
+A plea for funds seems to come again.
+He lent his coat to the tall gaunt stranger.
+There is a strong chance it will happen once more.
+The duke left the park in a silver coach.
+Greet the new guests and leave quickly.
+When the frost has come it is time for turkey.
+Sweet words work better than fierce.
+A thin stripe runs down the middle.
+A six comes up more often than a ten.
+Lush fern grow on the lofty rocks.
+The ram scared the school children off.
+The team with the best timing looks good.
+The farmer swapped his horse for a brown ox.
+Sit on the perch and tell the others what to do.
+A steep trail is painful for our feet.
+The early phase of life moves fast.
+Green moss grows on the northern side.
+Tea in thin china has a sweet taste.
+Pitch the straw through the door of the stable.
+The latch on the back gate needed a nail.
+The goose was brought straight from the old market.
+The sink is the thing in which we pile dishes.
+A whiff of it will cure the most stubborn cold.
+The facts don't always show who is right.
+She flaps her cape as she parades the street.
+The loss of the cruiser was a blow to the fleet.
+Loop the braid to the left and then over.
+Plead with the lawyer to drop the lost cause.
+Calves thrive on tender spring grass.
+Post no bills on this office wall.
+Tear a thin sheet from the yellow pad.
+A cruise in warm waters in a sleek yacht is fun.
+A streak of color ran down the left edge.
+It was done before the boy could see it.
+Crouch before you jump or miss the mark.
+Pack the kits and don't forget the salt.
+The square peg will settle in the round hole.
+Fine soap saves tender skin.
+Poached eggs and tea must suffice.
+Bad nerves are jangled by a door slam.
+Ship maps are different from those for planes.
+Dimes showered down from all sides.
+They sang the same tunes at each party.
+The sky in the west is tinged with orange red.
+The pods of peas ferment in bare fields.
+The horse balked and threw the tall rider.
+The hitch between the horse and cart broke.
+Pile the coal high in the shed corner.
+A gold vase is both rare and costly.
+The knife was hung inside its bright sheath.
+The rarest spice comes from the far East.
+The roof should be tilted at a sharp slant.
+A smatter of French is worse than none.
+The mule trod the treadmill day and night.
+The aim of the contest is to raise a great fund.
+To send it now in large amounts is bad.
+There is a fine hard tang in salty air.
+Cod is the main business of the north shore.
+The slab was hewn from heavy blocks of slate.
+Dunk the stale biscuits into strong drink.
+Hang tinsel from both branches.
+Cap the jar with a tight brass cover.
+The poor boy missed the boat again.
+Be sure to set the lamp firmly in the hole.
+Pick a card and slip it under the pack.
+A round mat will cover the dull spot.
+The first part of the plan needs changing.
+A good book informs of what we ought to know.
+The mail comes in three batches per day.
+You cannot brew tea in a cold pot.
+Dots of light betrayed the black cat.
+Put the chart on the mantel and tack it down.
+The night shift men rate extra pay.
+The red paper brightened the dim stage.
+See the player scoot to third base.
+Slide the bill between the two leaves.
+Many hands help get the job done.
+We don't like to admit our small faults.
+No doubt about the way the wind blows.
+Dig deep in the earth for pirate's gold.
+The steady drip is worse than a drenching rain.
+A flat pack takes less luggage space.
+Green ice frosted the punch bowl.
+A stuffed chair slipped from the moving van.
+The stitch will serve but needs to be shortened.
+A thin book fits in the side pocket.
+The gloss on top made it unfit to read.
+The hail pattered on the burnt brown grass.
+Seven seals were stamped on great sheets.
+Our troops are set to strike heavy blows.
+The store was jammed before the sale could start.
+It was a bad error on the part of the new judge.
+One step more and the board will collapse.
+Take the match and strike it against your shoe.
+The pot boiled, but the contents failed to jell.
+The baby puts his right foot in his mouth.
+The bombs left most of the town in ruins.
+Stop and stare at the hard working man.
+The streets are narrow and full of sharp turns.
+The pup jerked the leash as he saw a feline shape.
+Open your book to the first page.
+Fish evade the net and swim off.
+Dip the pail once and let it settle.
+Will you please answer that phone.
+The big red apple fell to the ground.
+The curtain rose and the show was on.
+The young prince became heir to the throne.
+He sent the boy on a short errand.
+Leave now and you will arrive on time.
+The corner store was robbed last night.
+A gold ring will please most any girl.
+The long journey home took a year.
+She saw a cat in the neighbor's house.
+A pink shell was found on the sandy beach.
+Small children came to see him.
+The grass and bushes were wet with dew.
+The blind man counted his old coins.
+A severe storm tore down the barn.
+She called his name many times.
+When you hear the bell, come quickly.
\ No newline at end of file
diff --git a/extensions/silero_tts/outputs/outputs-will-be-saved-here.txt b/extensions/silero_tts/outputs/outputs-will-be-saved-here.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/extensions/silero_tts/requirements.txt b/extensions/silero_tts/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1017bf0d7accb9930872ededd8a4bc077d393958
--- /dev/null
+++ b/extensions/silero_tts/requirements.txt
@@ -0,0 +1,5 @@
+ipython
+num2words
+omegaconf
+pydub
+PyYAML
diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4bacc7278001f35e5267b2e97a05b71474d70c5
--- /dev/null
+++ b/extensions/silero_tts/script.py
@@ -0,0 +1,254 @@
+import html
+import random
+import time
+from pathlib import Path
+
+import gradio as gr
+import torch
+
+from extensions.silero_tts import tts_preprocessor
+from modules import chat, shared, ui_chat
+from modules.utils import gradio
+
+torch._C._jit_set_profiling_mode(False)
+
+
+params = {
+ 'activate': True,
+ 'speaker': 'en_56',
+ 'language': 'en',
+ 'model_id': 'v3_en',
+ 'sample_rate': 48000,
+ 'device': 'cpu',
+ 'show_text': False,
+ 'autoplay': True,
+ 'voice_pitch': 'medium',
+ 'voice_speed': 'medium',
+ 'local_cache_path': '' # User can override the default cache path to something other via settings.json
+}
+
+current_params = params.copy()
+
+voices_en = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
+voices_es = ["es_0", "es_1", "es_2"]
+voices_fr = ["fr_0", "fr_1", "fr_2", "fr_3", "fr_4", "fr_5"]
+voices_de = ["bernd_ungerer", "eva_k", "friedrich", "hokuspokus", "karlsson"]
+voices_ru = ["aidar", "baya", "kseniya", "xenia"]
+voices_ua = ["mykyta"]
+voices_uz = ["dilnavoz"]
+
+languages = {
+ "en": {"label": "English", "voices": voices_en, "default_voice": "en_56", "model_id": "v3_en"},
+ "es": {"label": "Español", "voices": voices_es, "default_voice": "es_0", "model_id": "v3_es"},
+ "fr": {"label": "Français", "voices": voices_fr, "default_voice": "fr_0", "model_id": "v3_fr"},
+ "de": {"label": "Deutsch", "voices": voices_de, "default_voice": "eva_k", "model_id": "v3_de"},
+ "ru": {"label": "русский", "voices": voices_ru, "default_voice": "aidar", "model_id": "ru_v3"},
+ "ua": {"label": "українська", "voices": voices_ua, "default_voice": "mykyta", "model_id": "v3_ua"},
+ "uz": {"label": "Oʻzbekcha", "voices": voices_uz, "default_voice": "dilnavoz", "model_id": "v3_uz"},
+}
+
+voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
+voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
+
+# Used for making text xml compatible, needed for voice pitch and speed control
+table = str.maketrans({
+ "<": "<",
+ ">": ">",
+ "&": "&",
+ "'": "'",
+ '"': """,
+})
+
+
+def xmlesc(txt):
+ return txt.translate(table)
+
+
+def load_model():
+ torch_cache_path = torch.hub.get_dir() if params['local_cache_path'] == '' else params['local_cache_path']
+ model_path = torch_cache_path + "/snakers4_silero-models_master/src/silero/model/" + params['model_id'] + ".pt"
+ if Path(model_path).is_file():
+ print(f'\nUsing Silero TTS cached checkpoint found at {torch_cache_path}')
+ model, example_text = torch.hub.load(repo_or_dir=torch_cache_path + '/snakers4_silero-models_master/', model='silero_tts', language=params['language'], speaker=params['model_id'], source='local', path=model_path, force_reload=True)
+ else:
+ print(f'\nSilero TTS cache not found at {torch_cache_path}. Attempting to download...')
+ model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
+ model.to(params['device'])
+ return model
+
+
+def remove_tts_from_history(history):
+ for i, entry in enumerate(history['internal']):
+ history['visible'][i] = [history['visible'][i][0], entry[1]]
+
+ return history
+
+
+def toggle_text_in_history(history):
+ for i, entry in enumerate(history['visible']):
+ visible_reply = entry[1]
+ if visible_reply.startswith('\n\n{reply}"]
+ else:
+ history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('')[0]}"]
+
+ return history
+
+
+def state_modifier(state):
+ if not params['activate']:
+ return state
+
+ state['stream'] = False
+ return state
+
+
+def input_modifier(string, state):
+ if not params['activate']:
+ return string
+
+ shared.processing_message = "*Is recording a voice message...*"
+ return string
+
+
+def history_modifier(history):
+ # Remove autoplay from the last reply
+ if len(history['internal']) > 0:
+ history['visible'][-1] = [
+ history['visible'][-1][0],
+ history['visible'][-1][1].replace('controls autoplay>', 'controls>')
+ ]
+
+ return history
+
+
+def output_modifier(string, state):
+ global model, current_params, streaming_state
+
+ for i in params:
+ if params[i] != current_params[i]:
+ model = load_model()
+ current_params = params.copy()
+ break
+
+ if not params['activate']:
+ return string
+
+ original_string = string
+ string = tts_preprocessor.preprocess(html.unescape(string))
+
+ if string == '':
+ string = '*Empty reply, try regenerating*'
+ else:
+ output_file = Path(f'extensions/silero_tts/outputs/{state["character_menu"]}_{int(time.time())}.wav')
+ prosody = ''.format(params['voice_speed'], params['voice_pitch'])
+ silero_input = f'{prosody}{xmlesc(string)}'
+ model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+
+ autoplay = 'autoplay' if params['autoplay'] else ''
+ string = f''
+ if params['show_text']:
+ string += f'\n\n{original_string}'
+
+ shared.processing_message = "*Is typing...*"
+ return string
+
+
+def setup():
+ global model
+ model = load_model()
+
+
+def random_sentence():
+ with open(Path("extensions/silero_tts/harvard_sentences.txt")) as f:
+ return random.choice(list(f))
+
+
+def voice_preview(preview_text):
+ global model, current_params, streaming_state
+
+ for i in params:
+ if params[i] != current_params[i]:
+ model = load_model()
+ current_params = params.copy()
+ break
+
+ string = tts_preprocessor.preprocess(preview_text or random_sentence())
+
+ output_file = Path('extensions/silero_tts/outputs/voice_preview.wav')
+ prosody = f""
+ silero_input = f'{prosody}{xmlesc(string)}'
+ model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+
+ return f''
+
+
+def language_change(lang):
+ global params
+ lang_code = list(languages.keys())[lang]
+ params.update({"language": lang_code, "speaker": languages[lang_code]["default_voice"], "model_id": languages[lang_code]["model_id"]})
+ return gr.update(choices=languages[lang_code]["voices"], value=languages[lang_code]["default_voice"])
+
+
+def custom_css():
+ path_to_css = Path(__file__).parent.resolve() / 'style.css'
+ return open(path_to_css, 'r').read()
+
+
+def ui():
+ # Gradio elements
+ with gr.Accordion("Silero TTS"):
+ with gr.Row():
+ activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
+ autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
+
+ show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
+
+ with gr.Row():
+ language = gr.Dropdown(value=languages[params['language']]["label"], choices=[v["label"] for _, v in languages.items()], label='Language', type="index")
+ voice = gr.Dropdown(value=params['speaker'], choices=voices_en, label='TTS voice')
+ with gr.Row():
+ v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
+ v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
+
+ with gr.Row():
+ preview_text = gr.Text(show_label=False, placeholder="Preview text", elem_id="silero_preview_text")
+ preview_play = gr.Button("Preview")
+ preview_audio = gr.HTML(visible=False)
+
+ with gr.Row():
+ convert = gr.Button('Permanently replace audios with the message texts')
+ convert_cancel = gr.Button('Cancel', visible=False)
+ convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
+
+ # Convert history with confirmation
+ convert_arr = [convert_confirm, convert, convert_cancel]
+ convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
+ convert_confirm.click(
+ lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
+ remove_tts_from_history, gradio('history'), gradio('history')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
+
+ convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
+
+ # Toggle message text in history
+ show_text.change(
+ lambda x: params.update({"show_text": x}), show_text, None).then(
+ toggle_text_in_history, gradio('history'), gradio('history')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
+
+ # Event functions to update the parameters in the backend
+ activate.change(lambda x: params.update({"activate": x}), activate, None)
+ autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
+ language.change(language_change, language, voice, show_progress=False)
+ voice.change(lambda x: params.update({"speaker": x}), voice, None)
+ v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
+ v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)
+
+ # Play preview
+ preview_text.submit(voice_preview, preview_text, preview_audio)
+ preview_play.click(voice_preview, preview_text, preview_audio)
diff --git a/extensions/silero_tts/style.css b/extensions/silero_tts/style.css
new file mode 100644
index 0000000000000000000000000000000000000000..2ab7aefbbfca19982414f13a76dfdd4324793903
--- /dev/null
+++ b/extensions/silero_tts/style.css
@@ -0,0 +1,8 @@
+.SDAP .hires_opts input[type="number"] {
+ width: 6em !important;
+}
+
+/* silero_tts preview */
+.form:has(> #silero_preview_text) {
+ min-width: 75%
+}
diff --git a/extensions/silero_tts/test_tts.py b/extensions/silero_tts/test_tts.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebc2c102a9ef29f21141429232f957421989cdd4
--- /dev/null
+++ b/extensions/silero_tts/test_tts.py
@@ -0,0 +1,81 @@
+import time
+from pathlib import Path
+
+import torch
+import tts_preprocessor
+
+torch._C._jit_set_profiling_mode(False)
+
+
+params = {
+ 'activate': True,
+ 'speaker': 'en_49',
+ 'language': 'en',
+ 'model_id': 'v3_en',
+ 'sample_rate': 48000,
+ 'device': 'cpu',
+ 'show_text': True,
+ 'autoplay': True,
+ 'voice_pitch': 'medium',
+ 'voice_speed': 'medium',
+}
+
+current_params = params.copy()
+voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
+voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
+voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
+
+# Used for making text xml compatible, needed for voice pitch and speed control
+table = str.maketrans({
+ "<": "<",
+ ">": ">",
+ "&": "&",
+ "'": "'",
+ '"': """,
+})
+
+
+def xmlesc(txt):
+ return txt.translate(table)
+
+
+def load_model():
+ model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
+ model.to(params['device'])
+ return model
+
+
+model = load_model()
+
+
+def output_modifier(string):
+ """
+ This function is applied to the model outputs.
+ """
+
+ global model, current_params
+
+ original_string = string
+ string = tts_preprocessor.preprocess(string)
+ processed_string = string
+
+ if string == '':
+ string = '*Empty reply, try regenerating*'
+ else:
+ output_file = Path(f'extensions/silero_tts/outputs/test_{int(time.time())}.wav')
+ prosody = ''.format(params['voice_speed'], params['voice_pitch'])
+ silero_input = f'{prosody}{xmlesc(string)}'
+ model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+
+ autoplay = 'autoplay' if params['autoplay'] else ''
+ string = f''
+
+ if params['show_text']:
+ string += f'\n\n{original_string}\n\nProcessed:\n{processed_string}'
+
+ print(string)
+
+
+if __name__ == '__main__':
+ import sys
+ output_modifier(sys.argv[1])
diff --git a/extensions/silero_tts/tts_preprocessor.py b/extensions/silero_tts/tts_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..daefdcbda6c9b20a87c6f3d84d2a759c2c51289c
--- /dev/null
+++ b/extensions/silero_tts/tts_preprocessor.py
@@ -0,0 +1,200 @@
+import re
+
+from num2words import num2words
+
+punctuation = r'[\s,.?!/)\'\]>]'
+alphabet_map = {
+ "A": " Ei ",
+ "B": " Bee ",
+ "C": " See ",
+ "D": " Dee ",
+ "E": " Eee ",
+ "F": " Eff ",
+ "G": " Jee ",
+ "H": " Eich ",
+ "I": " Eye ",
+ "J": " Jay ",
+ "K": " Kay ",
+ "L": " El ",
+ "M": " Emm ",
+ "N": " Enn ",
+ "O": " Ohh ",
+ "P": " Pee ",
+ "Q": " Queue ",
+ "R": " Are ",
+ "S": " Ess ",
+ "T": " Tee ",
+ "U": " You ",
+ "V": " Vee ",
+ "W": " Double You ",
+ "X": " Ex ",
+ "Y": " Why ",
+ "Z": " Zed " # Zed is weird, as I (da3dsoul) am American, but most of the voice models sound British, so it matches
+}
+
+
+def preprocess(string):
+ # the order for some of these matter
+ # For example, you need to remove the commas in numbers before expanding them
+ string = remove_surrounded_chars(string)
+ string = string.replace('"', '')
+ string = string.replace('\u201D', '').replace('\u201C', '') # right and left quote
+ string = string.replace('\u201F', '') # italic looking quote
+ string = string.replace('\n', ' ')
+ string = convert_num_locale(string)
+ string = replace_negative(string)
+ string = replace_roman(string)
+ string = hyphen_range_to(string)
+ string = num_to_words(string)
+
+ # TODO Try to use a ML predictor to expand abbreviations. It's hard, dependent on context, and whether to actually
+ # try to say the abbreviation or spell it out as I've done below is not agreed upon
+
+ # For now, expand abbreviations to pronunciations
+ # replace_abbreviations adds a lot of unnecessary whitespace to ensure separation
+ string = replace_abbreviations(string)
+ string = replace_lowercase_abbreviations(string)
+
+ # cleanup whitespaces
+ # remove whitespace before punctuation
+ string = re.sub(rf'\s+({punctuation})', r'\1', string)
+ string = string.strip()
+ # compact whitespace
+ string = ' '.join(string.split())
+
+ return string
+
+
+def remove_surrounded_chars(string):
+ # first this expression will check if there is a string nested exclusively between a alt=
+ # and a style= string. This would correspond to only a the alt text of an embedded image
+ # If it matches it will only keep that part as the string, and rend it for further processing
+ # Afterwards this expression matches to 'as few symbols as possible (0 upwards) between any
+ # asterisks' OR' as few symbols as possible (0 upwards) between an asterisk and the end of the string'
+ if re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL):
+ m = re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL)
+ string = m.group(0)
+ return re.sub(r'\*[^*]*?(\*|$)', '', string)
+
+
+def convert_num_locale(text):
+ # This detects locale and converts it to American without comma separators
+ pattern = re.compile(r'(?:\s|^)\d{1,3}(?:\.\d{3})+(,\d+)(?:\s|$)')
+ result = text
+ while True:
+ match = pattern.search(result)
+ if match is None:
+ break
+
+ start = match.start()
+ end = match.end()
+ result = result[0:start] + result[start:end].replace('.', '').replace(',', '.') + result[end:len(result)]
+
+ # removes comma separators from existing American numbers
+ pattern = re.compile(r'(\d),(\d)')
+ result = pattern.sub(r'\1\2', result)
+
+ return result
+
+
+def replace_negative(string):
+ # handles situations like -5. -5 would become negative 5, which would then be expanded to negative five
+ return re.sub(rf'(\s)(-)(\d+)({punctuation})', r'\1negative \3\4', string)
+
+
+def replace_roman(string):
+ # find a string of roman numerals.
+ # Only 2 or more, to avoid capturing I and single character abbreviations, like names
+ pattern = re.compile(rf'\s[IVXLCDM]{{2,}}{punctuation}')
+ result = string
+ while True:
+ match = pattern.search(result)
+ if match is None:
+ break
+
+ start = match.start()
+ end = match.end()
+ result = result[0:start + 1] + str(roman_to_int(result[start + 1:end - 1])) + result[end - 1:len(result)]
+
+ return result
+
+
+def roman_to_int(s):
+ rom_val = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
+ int_val = 0
+ for i in range(len(s)):
+ if i > 0 and rom_val[s[i]] > rom_val[s[i - 1]]:
+ int_val += rom_val[s[i]] - 2 * rom_val[s[i - 1]]
+ else:
+ int_val += rom_val[s[i]]
+ return int_val
+
+
+def hyphen_range_to(text):
+ pattern = re.compile(r'(\d+)[-–](\d+)')
+ result = pattern.sub(lambda x: x.group(1) + ' to ' + x.group(2), text)
+ return result
+
+
+def num_to_words(text):
+ # 1000 or 10.23
+ pattern = re.compile(r'\d+\.\d+|\d+')
+ result = pattern.sub(lambda x: num2words(float(x.group())), text)
+ return result
+
+
+def replace_abbreviations(string):
+ # abbreviations 1 to 4 characters long. It will get things like A and I, but those are pronounced with their letter
+ pattern = re.compile(rf'(^|[\s(.\'\[<])([A-Z]{{1,4}})({punctuation}|$)')
+ result = string
+ while True:
+ match = pattern.search(result)
+ if match is None:
+ break
+
+ start = match.start()
+ end = match.end()
+ result = result[0:start] + replace_abbreviation(result[start:end]) + result[end:len(result)]
+
+ return result
+
+
+def replace_lowercase_abbreviations(string):
+ # abbreviations 1 to 4 characters long, separated by dots i.e. e.g.
+ pattern = re.compile(rf'(^|[\s(.\'\[<])(([a-z]\.){{1,4}})({punctuation}|$)')
+ result = string
+ while True:
+ match = pattern.search(result)
+ if match is None:
+ break
+
+ start = match.start()
+ end = match.end()
+ result = result[0:start] + replace_abbreviation(result[start:end].upper()) + result[end:len(result)]
+
+ return result
+
+
+def replace_abbreviation(string):
+ result = ""
+ for char in string:
+ result += match_mapping(char)
+
+ return result
+
+
+def match_mapping(char):
+ for mapping in alphabet_map.keys():
+ if char == mapping:
+ return alphabet_map[char]
+
+ return char
+
+
+def __main__(args):
+ print(preprocess(args[1]))
+
+
+if __name__ == "__main__":
+ import sys
+ __main__(sys.argv)
diff --git a/extensions/superbooga/chromadb.py b/extensions/superbooga/chromadb.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fb7a71848a8c99ab29b90c49902b545a1595f03
--- /dev/null
+++ b/extensions/superbooga/chromadb.py
@@ -0,0 +1,125 @@
+import chromadb
+import posthog
+import torch
+from chromadb.config import Settings
+from sentence_transformers import SentenceTransformer
+
+from modules.logging_colors import logger
+
+logger.info('Intercepting all calls to posthog :)')
+posthog.capture = lambda *args, **kwargs: None
+
+
+class Collecter():
+ def __init__(self):
+ pass
+
+ def add(self, texts: list[str]):
+ pass
+
+ def get(self, search_strings: list[str], n_results: int) -> list[str]:
+ pass
+
+ def clear(self):
+ pass
+
+
+class Embedder():
+ def __init__(self):
+ pass
+
+ def embed(self, text: str) -> list[torch.Tensor]:
+ pass
+
+
+class ChromaCollector(Collecter):
+ def __init__(self, embedder: Embedder):
+ super().__init__()
+ self.chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
+ self.embedder = embedder
+ self.collection = self.chroma_client.create_collection(name="context", embedding_function=embedder.embed)
+ self.ids = []
+
+ def add(self, texts: list[str]):
+ if len(texts) == 0:
+ return
+
+ self.ids = [f"id{i}" for i in range(len(texts))]
+ self.collection.add(documents=texts, ids=self.ids)
+
+ def get_documents_ids_distances(self, search_strings: list[str], n_results: int):
+ n_results = min(len(self.ids), n_results)
+ if n_results == 0:
+ return [], [], []
+
+ result = self.collection.query(query_texts=search_strings, n_results=n_results, include=['documents', 'distances'])
+ documents = result['documents'][0]
+ ids = list(map(lambda x: int(x[2:]), result['ids'][0]))
+ distances = result['distances'][0]
+ return documents, ids, distances
+
+ # Get chunks by similarity
+ def get(self, search_strings: list[str], n_results: int) -> list[str]:
+ documents, _, _ = self.get_documents_ids_distances(search_strings, n_results)
+ return documents
+
+ # Get ids by similarity
+ def get_ids(self, search_strings: list[str], n_results: int) -> list[str]:
+ _, ids, _ = self.get_documents_ids_distances(search_strings, n_results)
+ return ids
+
+ # Get chunks by similarity and then sort by insertion order
+ def get_sorted(self, search_strings: list[str], n_results: int) -> list[str]:
+ documents, ids, _ = self.get_documents_ids_distances(search_strings, n_results)
+ return [x for _, x in sorted(zip(ids, documents))]
+
+ # Multiply distance by factor within [0, time_weight] where more recent is lower
+ def apply_time_weight_to_distances(self, ids: list[int], distances: list[float], time_weight: float = 1.0) -> list[float]:
+ if len(self.ids) <= 1:
+ return distances.copy()
+
+ return [distance * (1 - _id / (len(self.ids) - 1) * time_weight) for _id, distance in zip(ids, distances)]
+
+ # Get ids by similarity and then sort by insertion order
+ def get_ids_sorted(self, search_strings: list[str], n_results: int, n_initial: int = None, time_weight: float = 1.0) -> list[str]:
+ do_time_weight = time_weight > 0
+ if not (do_time_weight and n_initial is not None):
+ n_initial = n_results
+ elif n_initial == -1:
+ n_initial = len(self.ids)
+
+ if n_initial < n_results:
+ raise ValueError(f"n_initial {n_initial} should be >= n_results {n_results}")
+
+ _, ids, distances = self.get_documents_ids_distances(search_strings, n_initial)
+ if do_time_weight:
+ distances_w = self.apply_time_weight_to_distances(ids, distances, time_weight=time_weight)
+ results = zip(ids, distances, distances_w)
+ results = sorted(results, key=lambda x: x[2])[:n_results]
+ results = sorted(results, key=lambda x: x[0])
+ ids = [x[0] for x in results]
+
+ return sorted(ids)
+
+ def clear(self):
+ self.collection.delete(ids=self.ids)
+ self.ids = []
+
+
+class SentenceTransformerEmbedder(Embedder):
+ def __init__(self) -> None:
+ self.model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+ self.embed = self.model.encode
+
+
+def make_collector():
+ global embedder
+ return ChromaCollector(embedder)
+
+
+def add_chunks_to_collector(chunks, collector):
+ collector.clear()
+ collector.add(chunks)
+
+
+embedder = SentenceTransformerEmbedder()
diff --git a/extensions/superbooga/download_urls.py b/extensions/superbooga/download_urls.py
new file mode 100644
index 0000000000000000000000000000000000000000..424a98857600dc54caf9cdea92b513d9116d6808
--- /dev/null
+++ b/extensions/superbooga/download_urls.py
@@ -0,0 +1,35 @@
+import concurrent.futures
+
+import requests
+
+
+def download_single(url):
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+ }
+ response = requests.get(url, headers=headers, timeout=5)
+ if response.status_code == 200:
+ return response.content
+ else:
+ raise Exception("Failed to download URL")
+
+
+def download_urls(urls, threads=1):
+ with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
+ futures = []
+ for url in urls:
+ future = executor.submit(download_single, url)
+ futures.append(future)
+
+ results = []
+ i = 0
+ for future in concurrent.futures.as_completed(futures):
+ try:
+ result = future.result()
+ results.append(result)
+ i += 1
+ yield f"{i}/{len(urls)}", results
+ except Exception:
+ pass
+
+ yield "Done", results
diff --git a/extensions/superbooga/requirements.txt b/extensions/superbooga/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73a60078cfc523cde88d52d97bd44d099d751f7e
--- /dev/null
+++ b/extensions/superbooga/requirements.txt
@@ -0,0 +1,6 @@
+beautifulsoup4==4.12.2
+chromadb==0.3.18
+pandas==2.0.3
+posthog==2.4.2
+sentence_transformers==2.2.2
+lxml
diff --git a/extensions/superbooga/script.py b/extensions/superbooga/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..06fe8ad3d33c50fe7f5ad2d0123a0dde4695a291
--- /dev/null
+++ b/extensions/superbooga/script.py
@@ -0,0 +1,260 @@
+import re
+import textwrap
+
+import gradio as gr
+from bs4 import BeautifulSoup
+
+from modules import chat
+from modules.logging_colors import logger
+
+from .chromadb import add_chunks_to_collector, make_collector
+from .download_urls import download_urls
+
+params = {
+ 'chunk_count': 5,
+ 'chunk_count_initial': 10,
+ 'time_weight': 0,
+ 'chunk_length': 700,
+ 'chunk_separator': '',
+ 'strong_cleanup': False,
+ 'threads': 4,
+}
+
+collector = make_collector()
+chat_collector = make_collector()
+
+
+def feed_data_into_collector(corpus, chunk_len, chunk_sep):
+ global collector
+
+ # Defining variables
+ chunk_len = int(chunk_len)
+ chunk_sep = chunk_sep.replace(r'\n', '\n')
+ cumulative = ''
+
+ # Breaking the data into chunks and adding those to the db
+ cumulative += "Breaking the input dataset...\n\n"
+ yield cumulative
+ if chunk_sep:
+ data_chunks = corpus.split(chunk_sep)
+ data_chunks = [[data_chunk[i:i + chunk_len] for i in range(0, len(data_chunk), chunk_len)] for data_chunk in data_chunks]
+ data_chunks = [x for y in data_chunks for x in y]
+ else:
+ data_chunks = [corpus[i:i + chunk_len] for i in range(0, len(corpus), chunk_len)]
+
+ cumulative += f"{len(data_chunks)} chunks have been found.\n\nAdding the chunks to the database...\n\n"
+ yield cumulative
+ add_chunks_to_collector(data_chunks, collector)
+ cumulative += "Done."
+ yield cumulative
+
+
+def feed_file_into_collector(file, chunk_len, chunk_sep):
+ yield 'Reading the input dataset...\n\n'
+ text = file.decode('utf-8')
+ for i in feed_data_into_collector(text, chunk_len, chunk_sep):
+ yield i
+
+
+def feed_url_into_collector(urls, chunk_len, chunk_sep, strong_cleanup, threads):
+ all_text = ''
+ cumulative = ''
+
+ urls = urls.strip().split('\n')
+ cumulative += f'Loading {len(urls)} URLs with {threads} threads...\n\n'
+ yield cumulative
+ for update, contents in download_urls(urls, threads=threads):
+ yield cumulative + update
+
+ cumulative += 'Processing the HTML sources...'
+ yield cumulative
+ for content in contents:
+ soup = BeautifulSoup(content, features="lxml")
+ for script in soup(["script", "style"]):
+ script.extract()
+
+ strings = soup.stripped_strings
+ if strong_cleanup:
+ strings = [s for s in strings if re.search("[A-Za-z] ", s)]
+
+ text = '\n'.join([s.strip() for s in strings])
+ all_text += text
+
+ for i in feed_data_into_collector(all_text, chunk_len, chunk_sep):
+ yield i
+
+
+def apply_settings(chunk_count, chunk_count_initial, time_weight):
+ global params
+ params['chunk_count'] = int(chunk_count)
+ params['chunk_count_initial'] = int(chunk_count_initial)
+ params['time_weight'] = time_weight
+ settings_to_display = {k: params[k] for k in params if k in ['chunk_count', 'chunk_count_initial', 'time_weight']}
+ yield f"The following settings are now active: {str(settings_to_display)}"
+
+
+def custom_generate_chat_prompt(user_input, state, **kwargs):
+ global chat_collector
+
+ # get history as being modified when using regenerate.
+ history = kwargs['history']
+
+ if state['mode'] == 'instruct':
+ results = collector.get_sorted(user_input, n_results=params['chunk_count'])
+ additional_context = '\nYour reply should be based on the context below:\n\n' + '\n'.join(results)
+ user_input += additional_context
+ else:
+
+ def make_single_exchange(id_):
+ output = ''
+ output += f"{state['name1']}: {history['internal'][id_][0]}\n"
+ output += f"{state['name2']}: {history['internal'][id_][1]}\n"
+ return output
+
+ if len(history['internal']) > params['chunk_count'] and user_input != '':
+ chunks = []
+ hist_size = len(history['internal'])
+ for i in range(hist_size - 1):
+ chunks.append(make_single_exchange(i))
+
+ add_chunks_to_collector(chunks, chat_collector)
+ query = '\n'.join(history['internal'][-1] + [user_input])
+ try:
+ best_ids = chat_collector.get_ids_sorted(query, n_results=params['chunk_count'], n_initial=params['chunk_count_initial'], time_weight=params['time_weight'])
+ additional_context = '\n'
+ for id_ in best_ids:
+ if history['internal'][id_][0] != '<|BEGIN-VISIBLE-CHAT|>':
+ additional_context += make_single_exchange(id_)
+
+ logger.warning(f'Adding the following new context:\n{additional_context}')
+ state['context'] = state['context'].strip() + '\n' + additional_context
+ kwargs['history'] = {
+ 'internal': [history['internal'][i] for i in range(hist_size) if i not in best_ids],
+ 'visible': ''
+ }
+ except RuntimeError:
+ logger.error("Couldn't query the database, moving on...")
+
+ return chat.generate_chat_prompt(user_input, state, **kwargs)
+
+
+def remove_special_tokens(string):
+ pattern = r'(<\|begin-user-input\|>|<\|end-user-input\|>|<\|injection-point\|>)'
+ return re.sub(pattern, '', string)
+
+
+def input_modifier(string, state, is_chat=False):
+ if is_chat:
+ return string
+
+ # Find the user input
+ pattern = re.compile(r"<\|begin-user-input\|>(.*?)<\|end-user-input\|>", re.DOTALL)
+ match = re.search(pattern, string)
+ if match:
+ user_input = match.group(1).strip()
+
+ # Get the most similar chunks
+ results = collector.get_sorted(user_input, n_results=params['chunk_count'])
+
+ # Make the injection
+ string = string.replace('<|injection-point|>', '\n'.join(results))
+
+ return remove_special_tokens(string)
+
+
+def ui():
+ with gr.Accordion("Click for more information...", open=False):
+ gr.Markdown(textwrap.dedent("""
+
+ ## About
+
+ This extension takes a dataset as input, breaks it into chunks, and adds the result to a local/offline Chroma database.
+
+ The database is then queried during inference time to get the excerpts that are closest to your input. The idea is to create an arbitrarily large pseudo context.
+
+ The core methodology was developed and contributed by kaiokendev, who is working on improvements to the method in this repository: https://github.com/kaiokendev/superbig
+
+ ## Data input
+
+ Start by entering some data in the interface below and then clicking on "Load data".
+
+ Each time you load some new data, the old chunks are discarded.
+
+ ## Chat mode
+
+ #### Instruct
+
+ On each turn, the chunks will be compared to your current input and the most relevant matches will be appended to the input in the following format:
+
+ ```
+ Consider the excerpts below as additional context:
+ ...
+ ```
+
+ The injection doesn't make it into the chat history. It is only used in the current generation.
+
+ #### Regular chat
+
+ The chunks from the external data sources are ignored, and the chroma database is built based on the chat history instead. The most relevant past exchanges relative to the present input are added to the context string. This way, the extension acts as a long term memory.
+
+ ## Notebook/default modes
+
+ Your question must be manually specified between `<|begin-user-input|>` and `<|end-user-input|>` tags, and the injection point must be specified with `<|injection-point|>`.
+
+ The special tokens mentioned above (`<|begin-user-input|>`, `<|end-user-input|>`, and `<|injection-point|>`) are removed in the background before the text generation begins.
+
+ Here is an example in Vicuna 1.1 format:
+
+ ```
+ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+
+ USER:
+
+ <|begin-user-input|>
+ What datasets are mentioned in the text below?
+ <|end-user-input|>
+
+ <|injection-point|>
+
+ ASSISTANT:
+ ```
+
+ ⚠️ For best results, make sure to remove the spaces and new line characters after `ASSISTANT:`.
+
+ *This extension is currently experimental and under development.*
+
+ """))
+
+ with gr.Row():
+ with gr.Column(min_width=600):
+ with gr.Tab("Text input"):
+ data_input = gr.Textbox(lines=20, label='Input data')
+ update_data = gr.Button('Load data')
+
+ with gr.Tab("URL input"):
+ url_input = gr.Textbox(lines=10, label='Input URLs', info='Enter one or more URLs separated by newline characters.')
+ strong_cleanup = gr.Checkbox(value=params['strong_cleanup'], label='Strong cleanup', info='Only keeps html elements that look like long-form text.')
+ threads = gr.Number(value=params['threads'], label='Threads', info='The number of threads to use while downloading the URLs.', precision=0)
+ update_url = gr.Button('Load data')
+
+ with gr.Tab("File input"):
+ file_input = gr.File(label='Input file', type='binary')
+ update_file = gr.Button('Load data')
+
+ with gr.Tab("Generation settings"):
+ chunk_count = gr.Number(value=params['chunk_count'], label='Chunk count', info='The number of closest-matching chunks to include in the prompt.')
+ gr.Markdown('Time weighting (optional, used in to make recently added chunks more likely to appear)')
+ time_weight = gr.Slider(0, 1, value=params['time_weight'], label='Time weight', info='Defines the strength of the time weighting. 0 = no time weighting.')
+ chunk_count_initial = gr.Number(value=params['chunk_count_initial'], label='Initial chunk count', info='The number of closest-matching chunks retrieved for time weight reordering in chat mode. This should be >= chunk count. -1 = All chunks are retrieved. Only used if time_weight > 0.')
+
+ update_settings = gr.Button('Apply changes')
+
+ chunk_len = gr.Number(value=params['chunk_length'], label='Chunk length', info='In characters, not tokens. This value is used when you click on "Load data".')
+ chunk_sep = gr.Textbox(value=params['chunk_separator'], label='Chunk separator', info='Used to manually split chunks. Manually split chunks longer than chunk length are split again. This value is used when you click on "Load data".')
+ with gr.Column():
+ last_updated = gr.Markdown()
+
+ update_data.click(feed_data_into_collector, [data_input, chunk_len, chunk_sep], last_updated, show_progress=False)
+ update_url.click(feed_url_into_collector, [url_input, chunk_len, chunk_sep, strong_cleanup, threads], last_updated, show_progress=False)
+ update_file.click(feed_file_into_collector, [file_input, chunk_len, chunk_sep], last_updated, show_progress=False)
+ update_settings.click(apply_settings, [chunk_count, chunk_count_initial, time_weight], last_updated, show_progress=False)
diff --git a/extensions/superboogav2/README.md b/extensions/superboogav2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d25b3a5eb96a92bcbc78e008b1ef7826bad43c3e
--- /dev/null
+++ b/extensions/superboogav2/README.md
@@ -0,0 +1,5 @@
+# superboogav2
+
+For a description, please see the comments in this Pull Request:
+
+https://github.com/oobabooga/text-generation-webui/pull/3272
diff --git a/extensions/superboogav2/api.py b/extensions/superboogav2/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..993e2b7d6d99bf3c5b8c79e82d54b592bdf76f21
--- /dev/null
+++ b/extensions/superboogav2/api.py
@@ -0,0 +1,207 @@
+"""
+This module is responsible for the VectorDB API. It currently supports:
+* DELETE api/v1/clear
+ - Clears the whole DB.
+* POST api/v1/add
+ - Add some corpus to the DB. You can also specify metadata to be added alongside it.
+* POST api/v1/delete
+ - Delete specific records with given metadata.
+* POST api/v1/get
+ - Get results from chromaDB.
+"""
+
+import json
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from urllib.parse import urlparse, parse_qs
+from threading import Thread
+
+from modules import shared
+from modules.logging_colors import logger
+
+from .chromadb import ChromaCollector
+from .data_processor import process_and_add_to_collector
+
+import extensions.superboogav2.parameters as parameters
+
+
+class CustomThreadingHTTPServer(ThreadingHTTPServer):
+ def __init__(self, server_address, RequestHandlerClass, collector: ChromaCollector, bind_and_activate=True):
+ self.collector = collector
+ super().__init__(server_address, RequestHandlerClass, bind_and_activate)
+
+ def finish_request(self, request, client_address):
+ self.RequestHandlerClass(request, client_address, self, self.collector)
+
+
+class Handler(BaseHTTPRequestHandler):
+ def __init__(self, request, client_address, server, collector: ChromaCollector):
+ self.collector = collector
+ super().__init__(request, client_address, server)
+
+
+ def _send_412_error(self, message):
+ self.send_response(412)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ response = json.dumps({"error": message})
+ self.wfile.write(response.encode('utf-8'))
+
+
+ def _send_404_error(self):
+ self.send_response(404)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ response = json.dumps({"error": "Resource not found"})
+ self.wfile.write(response.encode('utf-8'))
+
+
+ def _send_400_error(self, error_message: str):
+ self.send_response(400)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ response = json.dumps({"error": error_message})
+ self.wfile.write(response.encode('utf-8'))
+
+
+ def _send_200_response(self, message: str):
+ self.send_response(200)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+
+ if isinstance(message, str):
+ response = json.dumps({"message": message})
+ else:
+ response = json.dumps(message)
+
+ self.wfile.write(response.encode('utf-8'))
+
+
+ def _handle_get(self, search_strings: list[str], n_results: int, max_token_count: int, sort_param: str):
+ if sort_param == parameters.SORT_DISTANCE:
+ results = self.collector.get_sorted_by_dist(search_strings, n_results, max_token_count)
+ elif sort_param == parameters.SORT_ID:
+ results = self.collector.get_sorted_by_id(search_strings, n_results, max_token_count)
+ else: # Default is dist
+ results = self.collector.get_sorted_by_dist(search_strings, n_results, max_token_count)
+
+ return {
+ "results": results
+ }
+
+
+ def do_GET(self):
+ self._send_404_error()
+
+
+ def do_POST(self):
+ try:
+ content_length = int(self.headers['Content-Length'])
+ body = json.loads(self.rfile.read(content_length).decode('utf-8'))
+
+ parsed_path = urlparse(self.path)
+ path = parsed_path.path
+ query_params = parse_qs(parsed_path.query)
+
+ if path in ['/api/v1/add', '/api/add']:
+ corpus = body.get('corpus')
+ if corpus is None:
+ self._send_412_error("Missing parameter 'corpus'")
+ return
+
+ clear_before_adding = body.get('clear_before_adding', False)
+ metadata = body.get('metadata')
+ process_and_add_to_collector(corpus, self.collector, clear_before_adding, metadata)
+ self._send_200_response("Data successfully added")
+
+ elif path in ['/api/v1/delete', '/api/delete']:
+ metadata = body.get('metadata')
+ if corpus is None:
+ self._send_412_error("Missing parameter 'metadata'")
+ return
+
+ self.collector.delete(ids_to_delete=None, where=metadata)
+ self._send_200_response("Data successfully deleted")
+
+ elif path in ['/api/v1/get', '/api/get']:
+ search_strings = body.get('search_strings')
+ if search_strings is None:
+ self._send_412_error("Missing parameter 'search_strings'")
+ return
+
+ n_results = body.get('n_results')
+ if n_results is None:
+ n_results = parameters.get_chunk_count()
+
+ max_token_count = body.get('max_token_count')
+ if max_token_count is None:
+ max_token_count = parameters.get_max_token_count()
+
+ sort_param = query_params.get('sort', ['distance'])[0]
+
+ results = self._handle_get(search_strings, n_results, max_token_count, sort_param)
+ self._send_200_response(results)
+
+ else:
+ self._send_404_error()
+ except Exception as e:
+ self._send_400_error(str(e))
+
+
+ def do_DELETE(self):
+ try:
+ parsed_path = urlparse(self.path)
+ path = parsed_path.path
+ query_params = parse_qs(parsed_path.query)
+
+ if path in ['/api/v1/clear', '/api/clear']:
+ self.collector.clear()
+ self._send_200_response("Data successfully cleared")
+ else:
+ self._send_404_error()
+ except Exception as e:
+ self._send_400_error(str(e))
+
+
+ def do_OPTIONS(self):
+ self.send_response(200)
+ self.end_headers()
+
+
+ def end_headers(self):
+ self.send_header('Access-Control-Allow-Origin', '*')
+ self.send_header('Access-Control-Allow-Methods', '*')
+ self.send_header('Access-Control-Allow-Headers', '*')
+ self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate')
+ super().end_headers()
+
+
+class APIManager:
+ def __init__(self, collector: ChromaCollector):
+ self.server = None
+ self.collector = collector
+ self.is_running = False
+
+ def start_server(self, port: int):
+ if self.server is not None:
+ print("Server already running.")
+ return
+
+ address = '0.0.0.0' if shared.args.listen else '127.0.0.1'
+ self.server = CustomThreadingHTTPServer((address, port), Handler, self.collector)
+
+ logger.info(f'Starting chromaDB API at http://{address}:{port}/api')
+
+ Thread(target=self.server.serve_forever, daemon=True).start()
+
+ self.is_running = True
+
+ def stop_server(self):
+ if self.server is not None:
+ logger.info(f'Stopping chromaDB API.')
+ self.server.shutdown()
+ self.server.server_close()
+ self.server = None
+ self.is_running = False
+
+ def is_server_running(self):
+ return self.is_running
\ No newline at end of file
diff --git a/extensions/superboogav2/benchmark.py b/extensions/superboogav2/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..46475a088b0eca137f641935d58dbf4b8d50ed29
--- /dev/null
+++ b/extensions/superboogav2/benchmark.py
@@ -0,0 +1,72 @@
+"""
+This module implements a benchmark function to evaluate the performance of the embedding pipeline. It expects a configuration JSON file. It must have questions and expected retrieved text.
+For each question, it's essential to have variants of that question. Language is fluid and each person might have their own spin on how they may ask it.
+
+At the end, it will save the results inside a benchmark_{sysdate}.txt file in the main directory.
+
+The benchmark function will return the score as an integer.
+"""
+import datetime
+import json
+import os
+
+from pathlib import Path
+
+from .data_processor import process_and_add_to_collector, preprocess_text
+from .parameters import get_chunk_count, get_max_token_count
+from .utils import create_metadata_source
+
+def benchmark(config_path, collector):
+ # Get the current system date
+ sysdate = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+ filename = f"benchmark_{sysdate}.txt"
+
+ # Open the log file in append mode
+ with open(filename, 'a') as log:
+ with open(config_path, 'r') as f:
+ data = json.load(f)
+
+ total_points = 0
+ max_points = 0
+
+ for item in data:
+ filepath = item["text"]
+ corpus = ""
+
+ # Check if the file exists
+ if os.path.isfile(Path(filepath)):
+ # Open the file and read its content
+ with open(Path(filepath), 'r') as file:
+ corpus = file.read()
+ process_and_add_to_collector(corpus, collector, True, create_metadata_source('benchmark'))
+ else:
+ raise f'Cannot find specified file {filepath}.'
+
+ for question_group in item["questions"]:
+ question_variants = question_group["question_variants"]
+ criteria = question_group["criteria"]
+
+ for q in question_variants:
+ max_points += len(criteria)
+ processed_text = preprocess_text(q)
+
+ # Get the most similar chunks
+ results = collector.get_sorted_by_dist(processed_text, n_results=get_chunk_count(), max_token_count=get_max_token_count())
+
+ points = 0
+
+ for c in criteria:
+ for p in results:
+ if c in p:
+ points += 1
+ total_points += 1
+ break
+
+ info = f"The question '{q}' scored {points}/{len(criteria)} points."
+ print(info, file=log)
+
+ print('\n---\n', file=log)
+
+ print(f'##Total points:\n\n{total_points}/{max_points}', file=log)
+
+ return total_points, max_points
\ No newline at end of file
diff --git a/extensions/superboogav2/benchmark_texts/aircraft_lease.txt b/extensions/superboogav2/benchmark_texts/aircraft_lease.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d7e35b02e58d07f9c379aaf5175c909598f77ec
--- /dev/null
+++ b/extensions/superboogav2/benchmark_texts/aircraft_lease.txt
@@ -0,0 +1,9471 @@
+
+EX-10.72
+55
+a2069410zex-10_72.txt
+EXHIBIT 10.72
+
+
+
+ Exhibit 10.72
+
+ COUNTERPART NO. 2
+
+
+===============================================================================
+
+
+ AIRCRAFT LEASE AGREEMENT
+ dated as of November 29, 1995
+
+ between
+
+ WILMINGTON TRUST COMPANY, not in its
+ individual capacity but solely as Trustee
+ f/b/o ITOCHU AirLease (Europe) Limited and
+ Marubeni Airleasing (U.K.) Limited,
+
+ Lessor
+
+ and
+
+ ALOHA AIRLINES, INC.,
+
+ Lessee
+
+ One Boeing 737-25A Aircraft
+
+===============================================================================
+
+ THIS LEASE HAS BEEN EXECUTED IN MULTIPLE COUNTERPARTS, OF WHICH THERE IS ONE
+ COUNTERPART DESIGNATED AS "COUNTERPART NO. 1 - CHATTEL PAPER ORIGINAL" AND ALL
+ OTHER COUNTERPARTS SHALL BE DESIGNATED WITH NUMBERS OTHER THAN 1. TO THE EXTENT
+ THAT THIS LEASE CONSTITUTES CHATTEL PAPER UNDER THE UNIFORM COMMERCIAL CODE, AS
+ IN EFFECT IN ANY APPLICABLE JURISDICTION, NO SECURITY INTEREST MAY BE CREATED
+ OR PERFECTED BY POSSESSION OF ANY COUNTERPART OF THIS LEASE OTHER THAN SAID
+ COUNTERPART NO. 1.
+
+
+
+
+
+ COUNTERPART NO. 7
+
+
+
+
+================================================================================
+
+
+ AIRCRAFT LEASE AGREEMENT
+ dated as of November 29, 1995
+
+
+ between
+
+ WILMINGTON TRUST COMPANY, not in its
+ individual capacity but solely as Trustee
+ f/b/o ITOCHU AirLease (Europe) Limited and
+ Marubeni Airleasing (U.K.) Limited,
+
+ Lessor
+
+ and
+
+ ALOHA AIRLINES, INC.,
+
+ Lessee
+
+
+ One Boeing 737-25A Aircraft
+
+================================================================================
+
+
+ THIS LEASE HAS BEEN EXECUTED IN MULTIPLE COUNTERPARTS, OF WHICH THERE IS ONE
+ COUNTERPART DESIGNATED AS "COUNTERPART NO. 1 - CHATTEL PAPER ORIGINAL" AND ALL
+ OTHER COUNTERPARTS SHALL BE DESIGNATED WITH NUMBERS OTHER THAN 1. TO THE EXTENT
+ THAT THIS LEASE CONSTITUTES CHATTEL PAPER UNDER THE UNIFORM COMMERCIAL CODE, AS
+ IN EFFECT IN ANY APPLICABLE JURISDICTION, NO SECURITY INTEREST MAY BE CREATED
+ OR PERFECTED BY POSSESSION OF ANY COUNTERPART OF THIS LEASE OTHER THAN SAID
+ COUNTERPART NO. 1.
+
+
+
+
+ Table of Contents
+ -----------------
+
+
+ -i-
+
+
+
+
+
+ EXHIBITS
+
+ Exhibit A - Schedule and Description of Aircraft
+
+ Exhibit B-1 - Aircraft Documents on the Delivery Date
+
+ Exhibit B-2 - Aircraft Documents on the Return Occasion
+
+ Exhibit C-1 - Financial Information
+
+ Exhibit C-2 - Definitions and Values
+
+ Exhibit D - Lease Supplement
+
+ Exhibit E - Return Condition Requirements
+
+ Exhibit F - Lessee's Counsel Opinion
+
+ Exhibit G - Approved Insurance Broker Report
+
+ Exhibit H - Insurance Special Provisions
+
+ Exhibit I - Form of Redelivery Acknowledgment
+
+ EXHIBIT J - Monthly Aircraft Utilization and
+ Status Report
+
+ EXHIBIT K - List of Documents to Be Supplied Annually
+
+
+
+ -ii-
+
+
+
+
+ RECORDED
+
+ Federal Aviation Administration
+
+ Date 1-25-96 Time 3:38 PM
+ ---------- ----------
+ Conveyance Number II004730
+ ------------
+ By /s/ Donna Jennings
+ -------------------------------
+ Conveyances Examiner
+
+
+ AIRCRAFT LEASE AGREEMENT
+
+
+ THIS AIRCRAFT LEASE AGREEMENT, dated as of November 29, 1995
+ (the "Lease") , between (1) WILMINGTON TRUST COMPANY, not in its individual
+ capacity but solely as Trustee f/b/o ITOCHU AirLease (Europe) Limited and
+ Marubeni Airleasing (U.K.) Limited (Wilmington Trust Company, in such capacity,
+ is herein called "Lessor") , and (2) Aloha Airlines, Inc., a corporation
+ organized under the laws of Hawaii (herein called "Lessee").
+
+ WHEREAS, Lessee desires to lease from Lessor and Lessor is
+ willing to lease to Lessee the aircraft described herein upon and subject to
+ the terms and conditions of this Lease;
+
+ NOW, THEREFORE, In consideration of the mutual promises,
+ covenants, terms and conditions contained herein, Lessor and Lessee agree as
+ follows:
+
+ Section 1. DEFINITIONS.
+
+ The following terms shall have the following respective
+meanings for all purposes of this Lease Agreement (such meanings to be equally
+applicable to both the singular and plural forms of the terms defined):
+
+ AFFILIATE shall mean, with respect to any Person, any other
+ Person directly or indirectly controlling, controlled by or under common
+ control with such Person.
+
+ AIRCRAFT shall mean the Airframe together with (i) the
+ Engines, whether or not installed on the Aircraft, (ii) all Parts, (iii) spare
+ parts or ancillary equipment or devices' furnished with the Aircraft under this
+ Lease, (iv) all Aircraft Documents, and (v) all substitutions, replacements and
+ renewals of any and all thereof.
+
+ AIRCRAFT DOCUMENTS shall mean, as applicable, the items
+ identified on Exhibit B-1 hereto provided to Lessee on the Delivery Date and
+ Exhibit B-2 hereto provided to Lessor on the Return Occasion, all of which
+ shall be maintained in the English language.
+
+ AIRCRAFT MAINTENANCE MANUAL shall mean the aircraft
+ maintenance manual which constitutes part of the Maintenance Program.
+
+
+
+
+
+ AIRFRAME shall mean (a) the aircraft described on Exhibit A,
+ but not including any Engine installed thereon, and (b) any and all Parts so
+ long as the same shall be incorporated or installed on or attached to the
+ Airframe, or so long as title thereto shall remain vested in Lessor in
+ accordance with the terms of Section 9, after removal from the Airframe.
+
+ APPROVED INSURANCE BROKER shall mean any reputable insurance
+ broker of internationally recognized responsibility and standing specializing
+ in aircraft insurance.
+
+ APPROVED INSURER shall mean any reputable insurance company or
+ insurance broker of internationally recognized responsibility and standing
+ specializing in aircraft insurance.
+
+ APU shall mean the auxiliary power unit.
+
+ BASIC RENT shall mean the rent for the Aircraft specified on
+ Exhibit C-1 and payable throughout the Term for the Aircraft pursuant to
+ Section 4(a).
+
+ BASIC RENT PAYMENT DATE shall have the meaning set forth in
+Exhibit C-1 hereto.
+
+ BUSINESS DAY shall mean any day other than a Saturday, Sunday
+ or other day on which banking institutions in Delaware, Hawaii, New York,
+ London or Dublin are authorized or required by law to be closed.
+
+ C CHECK is defined in the Maintenance Program and equivalent
+ to a "C" Check as defined in Boeing's "B737-200 Maintenance Planning Data" Doc.
+ D6-17594. A "C" Check is currently performed at 3,000 Flight Hour intervals.
+
+ CASUALTY OCCURRENCE shall mean any of the following events
+ with respect to the Aircraft, Airframe or any Engine: (a) loss of such property
+ or its use due to theft or disappearance for a period in excess of sixty (60)
+ consecutive days, or any destruction, damage beyond economic repair or
+ rendition of such property permanently unfit for normal use by Lessee for any
+ reason whatsoever; (b) any damage to such property which results in an
+ insurance settlement with respect to such property on the basis of a total loss
+ or on the basis of a compromised or constructive total loss, (c) the
+ condemnation, confiscation, appropriation or seizure of title, or requisition
+ of title to such property; (d) the appropriation or seizure of the use of such
+ property by or on the authority of any Governmental Entity or purported
+ Governmental Entity, which in any such case shall have resulted in the loss of
+ possession thereof
+
+
+ -2-
+
+
+
+
+
+ by Lessee for a period in excess of one hundred eighty (180) consecutive days
+ (or for such shorter period ending on the date which is seven (7) days from the
+ date of receipt of an insurance settlement with respect to such property on the
+ basis of a total loss); or (e) prohibition under any applicable Law of the use
+ of the Aircraft for commercial transportation of passengers for a period of six
+ (6) months.
+
+ CASUALTY VALUE shall mean the value as specified on Exhibit
+ C-1 hereto.
+
+ CERTIFIED AIR CARRIER shall mean a United States "air carrier"
+ within the meaning of Section 40102(a)(2) of the Federal Aviation Act engaged
+ in regularly scheduled air transportation of passengers who is a "citizen of
+ the United States" within the meaning of Section 40102(a)(15) of the Federal
+ Aviation Act holding a certificate of public convenience and necessity issued
+ by the FAA pursuant to the Federal Aviation Act or by a similar authority under
+ legislation hereafter enacted.
+
+ CLAIMS shall have the meaning set forth in Section 13.
+
+ CODE shall mean the United States Internal Revenue Code of
+ 1986, as amended, and any successor thereto, as interpreted by the rules and
+ regulations issued thereunder, in each case as in effect from time to time.
+
+ COMMONLY CONTROLLED PERSON shall mean any trade or business,
+ whether or not incorporated, which is at any relevant time a member of the same
+ controlled group of corporations (within the meaning of Section 414(b) of the
+ Code) or affiliated service group (within the meaning of Section 414(m) or (o)
+ of the Code) as, or under common control (within the meaning of Section 414(c)
+ of the Code) with, Lessee.
+
+ CYCLE shall mean a cycle commencing each time the wheels of
+ the Airframe (or any other airframe on which an Engine or Part is installed,
+ but only insofar as the calculation of Cycles is required with reference to the
+ relevant Engine or Part) leave the ground on take-off and finishing when the
+ wheels of such Airframe (or any other airframe on which such Engine or Part is
+ installed) touch the ground in landing.
+
+ D CHECK is defined in the Maintenance Program and is
+ equivalent to a "D Check" or "heavy structural inspection" as defined in
+ "Boeing's B737-200 Maintenance Planning Data" Doc. D6-17594. A "D" Check is
+ currently performed at 12,000 Flight Hour intervals.
+
+
+ -3-
+
+
+
+
+
+ DEFAULT shall mean an Event of Default or event which would
+ constitute an Event of Default but for the lapse of time or the giving of
+ notice or both.
+
+ DELIVERY DATE shall mean the date on which the Aircraft is
+ delivered to and accepted by Lessee for purposes of this Lease.
+
+ DELIVERY LOCATION shall mean the location specified on Exhibit
+ C-2 hereto for the delivery of the Aircraft by Lessor to Lessee.
+
+ DETAILED SPECIFICATIONS means the detailed specifications
+ number D6-32410-181 Revision C dated December 1, 1987 relating to the Aircraft
+ to be provided to Lessor on or before the Effective Date.
+
+ DOLLARS shall mean lawful currency of the United States of
+America.
+
+ ENGINE shall mean any engine installed on or furnished with
+ the Aircraft on the Delivery Date, such engine being identified as to
+ manufacturer and type on Exhibit A hereto and which will be further identified
+ as to manufacturer serial number in the Lease Supplement, and any Replacement
+ Engine which may from time to time be substituted therefor pursuant to Section
+ 11; together in each case with any and all Parts incorporated or installed in
+ or attached thereto or any and all Parts removed therefrom so long as title
+ thereto remains vested in Lessor in accordance with the terms of Section 9
+ after removal from such engine. Except as otherwise set forth herein, at such
+ time as a Replacement Engine shall be so substituted, such replaced engine
+ shall cease to be an Engine hereunder. The term "Engines" means, as of any date
+ of determination, all Engines then leased hereunder.
+
+ EQUIPMENT shall mean the Airframe, each Engine and the
+Aircraft Documents.
+
+ ERISA shall mean the Employee Retirement Income Security Act
+ of 1974, as amended, and any successor statute thereto, as interpreted by the
+ rules and regulations thereunder, all as the same may be in effect from time to
+ time.
+
+ ESTIMATED DELIVERY DATE shall mean the date specified on
+ Exhibit C-2 hereto, which the. parties anticipate to be the Delivery Date.
+
+ EVENT OF DEFAULT shall have the meaning specified in any one
+or more clauses in Section 17.
+
+
+ -4-
+
+
+
+
+
+ EXPIRATION DATE shall mean the date specified in the Lease
+Supplement for expiration of the Term.
+
+ FAA shall mean the Federal Aviation Administration of the
+ United States Department of Transportation or any successor.
+
+ FEDERAL AVIATION ACT shall mean Subtitle VII, Part A of Title
+ 49 of the United States Code, as amended.
+
+ FINAL INSPECTION shall have the meaning set forth in Section
+16(b).
+
+ FLIGHT HOUR shall mean each hour and any fractional portion
+ of an hour which elapses from the time the wheels of the Airframe (or any other
+ airframe on which an Engine or Part is installed, but only insofar as the
+ calculation of Flight Hours is required with reference to the relevant Engine
+ or Part) leave the ground on take-off to the time when the wheels of such
+ Airframe (or any other airframe on which such Engine or Part is installed)
+ touch the ground on landing.
+
+ GOVERNMENTAL ENTITY shall mean any federal, state, municipal
+or other governmental department, commission, board, bureau, court, legislature,
+agency, instrumentality or authority, domestic or foreign.
+
+ INDEMNITEE shall mean Wilmington Trust Company in its
+ individual capacity, Lessor, each Beneficiary, each Lender and, in each case
+ their respective Affiliates, officers, directors, shareholders, agents and
+ employees and their respective successors and assigns.
+
+ ITEM means the Aircraft or Airframe or any Engine in each
+ case including Parts constituting a part of any thereof.
+
+ LAW shall mean and include (i) any statute, decree,
+ constitution, regulation, order, judgment or other directive of any
+ Governmental Entity; (ii) any treaty, pact, compact or other agreement to which
+ any Governmental Entity is a signatory or party; (iii) any judicial or
+ administrative interpretation or application of any Law described in (i) or
+ (ii) above; and (iv) any amendment or revision of any Law described in (i),
+ (ii) or (iii) above.
+
+ LEASE SUPPLEMENT shall mean the Lease Supplement,
+ substantially in the form of Exhibit D hereto, entered into between Lessor and
+ Lessee.
+
+ LENDER shall mean each Person who now or hereafter makes a
+loan to the Lessor or any Beneficiary or otherwise
+
+
+ -5-
+
+
+
+
+
+ finances the Beneficiaries' interest in the Aircraft and for whose benefit
+ Lessor grants a security interest in the Aircraft and the Lease.
+
+ LESSOR'S LIENS shall mean Liens arising as a result of (i)
+ claims against Lessor or any Beneficiary not related to the transactions
+ contemplated by this Lease; or (ii) acts of Lessor or any Beneficiary, not
+ contemplated and expressly permitted under this Lease; or (iii) Taxes imposed
+ against Lessor which are not indemnified against by Lessee pursuant to Section
+ 10; or (iv) claims against Lessor arising out of the voluntary transfer by
+ Lessor of all or any part of its interests in the Aircraft or this Lease, other
+ than a transfer pursuant to Section 11 or 18 hereof.
+
+ LIBOR shall mean the offered rate (rounded up to the nearest
+ 1/32 of one percent) for deposits in Dollars for six-month periods which appear
+ on the Telerate Page 3750 as of 11:00 a.m., London time, on the day that is two
+ (2) Business Days preceding that date for which LIBOR is being determined. If
+ at least two (2) such offered rates appear on the Telerate Page 3750, the rate
+ will be the arithmetic mean of such offered rates (rounded up to the nearest
+ 1/32 of one percent).
+
+ LIEN shall mean any mortgage, pledge, lien, charge,
+ encumbrance, lease, exercise of rights, security interest or claim.
+
+ MAINTENANCE PROGRAM shall mean an FAA approved maintenance
+ program as in effect from time to time for the Aircraft encompassing scheduled
+ maintenance, condition monitored maintenance and on-condition maintenance of
+ the Airframe, Engines and components of the Aircraft in accordance with the
+ Manufacturer's maintenance planning document. Upon request by Lessor or any
+ Beneficiary a copy of such Maintenance Program shall be made available to
+ Lessor or such Beneficiary.
+
+ MORTGAGE CONVENTION shall mean the Convention for the
+ International Recognition of Rights in Aircraft, signed (ad referendum) at
+ Geneva, Switzerland, on June 19, 1948, and amended from time to time, but
+ excluding the terms of any adhesion thereto or ratification thereof containing
+ reservations to which the United States of America does not accede.
+
+ OPERATIVE DOCUMENTS shall mean this Lease, the Lease
+ Supplement, the Trust Agreement and the Supplemental Agreement.
+
+ PARTS shall mean all appliances, components, parts,
+instruments, appurtenances, accessories, furnishings
+
+
+ -6-
+
+
+
+
+
+ and other equipment of whatever nature (other than complete Engines or
+ engines), which may now or from time to time be incorporated or installed in or
+ attached to the Airframe or any Engine. Except as otherwise set forth herein,
+ at such time as a replacement part shall be substituted for a Part in
+ accordance with Section 9 hereof, the Part so removed shall cease to be a Part
+ hereunder.
+
+ PBGC shall mean the Pension Benefit Guaranty Corporation
+ established pursuant to Subtitle A of Part IV of ERISA, and any successor
+ thereof.
+
+ PERSON shall mean and include any individual person,
+ corporation, partnership, limited liability company, firm, joint stock company,
+ joint venture, trust, estate, unincorporated organization, association or
+ Governmental Entity.
+
+ PLAN shall mean any employee pension benefit plan (as defined
+ in Section 3(2) of ERISA) maintained or contributed to at any time, or required
+ to be contributed to, by Lessee or any Commonly Controlled Person.
+
+ REMOVABLE PART shall have the meaning set forth in Section
+ 9(b).
+
+ RENT shall mean Basic Rent and Supplemental Rent,
+collectively.
+
+ REPLACEMENT ENGINE shall mean an engine of the same
+ manufacturer and model and having not less than equivalent value, utility,
+ modification and remaining warranty status as the Engine it is intended to
+ replace under Section 11(b) hereof, or, at Lessee's option, an engine of the
+ same manufacturer as such Engine but of an improved model, and otherwise of not
+ less than equivalent value and utility and suitable for installation and use on
+ the Airframe without impairing the value or utility of the Airframe.
+
+ RETURN OCCASION shall mean the event that occurs when
+ possession of the Aircraft is returned from Lessee to Lessor at the end of the
+ Term of this Lease or upon Lessor taking possession pursuant to Section 18.
+
+ SUPPLEMENTAL RENT shall mean any and all amounts, liabilities
+ and obligations (other than Basic Rent) which Lessee assumes or agrees to pay
+ hereunder, including without limitation (i) any payment of Casualty Value; (ii)
+ any payment of indemnity required by Sections 10 or 13 hereof; and (iii) to the
+ extent permitted by applicable Law, interest at the Overdue Rate (all
+ computations of interest at the Overdue Rate under this Lease to be made on the
+ basis
+
+
+ -7-
+
+
+
+
+
+ of a 360-day year for the actual number of days elapsed) calculated on any
+ Rent not paid when due hereunder until the same is paid.
+
+ TAXES shall mean any and all sales, use, business, gross
+ income, personal property, transfer, fuel, leasing, occupational, value added,
+ excess profits, excise, gross receipts, franchise, stamp, income, levies,
+ imposts, withholdings or other taxes or duties of any nature whatsoever now or
+ hereafter imposed, levied, collected, withheld or assessed, together with any
+ penalties, fines, charges or interest thereon.
+
+ TELERATE PAGE 3750 shall mean the display page so designed on
+ the Dow Jones Telerate Service (or such other page as may replace that page on
+ that service, or such other service as may be nominated as the information
+ vendor, for the purpose of displaying comparable rates).
+
+ TERM shall mean the term of this Lease specified in the Lease
+Supplement hereto.
+
+ WET LEASE shall mean any arrangement whereby Lessee agrees to
+ operate the Aircraft under a contractual arrangement with a third party
+ pursuant to which no rights to any aircraft specifically identified by serial
+ number or registration number are granted to such third party and pursuant to
+ which the Aircraft (i) remains at all times under the sole and complete
+ operational control of Lessee, (ii) shall be operated solely by cockpit crew
+ employed by Lessee possessing all current certificates and licenses required by
+ applicable Laws and (iii) shall be maintained by Lessee in accordance with all
+ the provisions of the Lease including, but not limited to, Sections 6(d) and 12
+ (it being understood and agreed by Lessor and Lessee that any Wet Lease shall,
+ by its terms, be in all cases subject and subordinate to this Lease).
+
+ The terms SECURITY DEPOSIT, SECURITY DEPOSIT ADIUSTMENT and
+ SECURITY LETTER OF CREDIT shall have the meanings set forth in Exhibit C-1
+ hereto.
+
+ The terms ASSIGNMENT AGREEMENT, ASSIGNMENT AGREEMENT N685MA,
+ BENEFICIARY, BENEFICIARY'S ADDRESS, DEDUCTIBLE AMOUNT, DELIVERY LOCATION,
+ ENGINE MANUFACTURER, ESTIMATED DELIVERY DATE, LAST BASIC RENT PAYMENT DATE,
+ LEASE IDENTIFICATION, LESSEE'S ADDRESS, LESSOR'S ADDRESS, MANUFACTURER, OTHER
+ LEASE, OVERDUE RATE, PAYMENT LOCATIONS, PERMITTED JURISDICTION, PERMITTED
+ SUBLESSEES, PUBLIC LIABILITY AND PROPERTY DAMAGE INSURANCE, RETURN LOCATION,
+ SUPPLEMENTAL AGREEMENT, TRUST AGREEMENT and TRUSTEE shall have the meanings set
+ forth on Exhibit C-2 hereto.
+
+
+ -8-
+
+
+
+
+
+ Section 2. LEASE AND CONDITIONS.
+
+ (a) Lessor hereby agrees to lease the Aircraft to Lessee, and
+ Lessee hereby agrees to lease the Aircraft from Lessor, on the terms and
+ conditions of this Lease, as supplemented by the Lease Supplement.
+
+ (b) Lessor's obligation to lease the Aircraft hereunder to
+ Lessee shall be subject to the satisfaction of the following conditions
+ precedent, in each case subject to the reasonable satisfaction of Lessor and
+ each Beneficiary:
+
+ (i) the absence of any Default hereunder;
+
+ (ii) the absence of any material adverse change in
+ Lessee's financial condition or in Lessee's ability to carry on its
+ business from September 30, 1995 to the Delivery Date;
+
+ (iii) the absence of any default or event of default by
+ Lessee under any aircraft lease or obligation for the payment of borrowed
+ money to which the Lessee is a party;
+
+ (iv) no action or proceeding shall be pending nor shall
+ any governmental action be threatened before any court or governmental
+ agency of competent jurisdiction, nor shall any order, judgment or decree
+ have been issued or proposed to be issued by any court or governmental
+ agency, to set aside, restrain, enjoin or prevent the completion and
+ consummation of this Lease or any other Operative Documents or the
+ transactions contemplated hereby or thereby;
+
+ (v) receipt of every consent, approval or authorization
+ of, and the giving of every notice to, any third party that is required in
+ order to execute and deliver this Lease and to perform the transactions
+ contemplated hereby and each such consent, approval or authorization shall
+ be valid and effective and shall not have been revoked;
+
+ (vi) receipt of the approval of the Board of Directors of
+ each Beneficiary and any other Affiliate of each Beneficiary as such
+ Beneficiary, in its sole discretion, deems necessary;
+
+ (vii) receipt by Lessor and each Beneficiary of the
+ following documents on or before the Delivery Date, all of which shall be
+ reasonably satisfactory in form and substance to Lessor and each
+ Beneficiary:
+
+
+ -9-
+
+
+
+
+ (1) a Lease Supplement in the form of Exhibit D, duly
+ authorized, executed and delivered by Lessee, and effective as of the
+ Delivery Date of the Aircraft;
+
+ (2) a certified copy of the resolutions of the Board
+ of Directors of Lessee evidencing due authority of Lessee for the
+ execution, delivery and performance of this Lease, the Lease
+ Supplement and all other documents related hereto or thereto,
+ together with an incumbency certificate as to the person or persons
+ authorized to execute and deliver such documents on behalf of Lessee;
+
+ (3) a favorable opinion of Lessee's counsel, dated
+ the Delivery Date, substantially in the form as set forth on Exhibit
+ F hereto and of special aviation counsel, dated the Delivery Date, in
+ form and scope acceptable to Lessor and the Beneficiaries;
+
+ (4) a certificate signed by a duly authorized officer
+ of Lessee, dated the Delivery Date, stating that: (i) the
+ representations and warranties contained' in Section 5(d) hereof are
+ true and accurate on and as of such date as though made on and as of
+ such time; (ii) no event has occurred and is continuing, or would
+ result from the lease of the Aircraft, which constitutes a Default;
+ and (iii) since September 30, 1995 there has not occurred any
+ material adverse change in Lessee's financial condition or in
+ Lessee's ability to carry on its business;
+
+ (5) an opinion or report, dated the Delivery Date,
+ signed by an Approved Insurance Broker or Approved Insurer as to the
+ due compliance with the insurance provisions of Section 12 hereof
+ with respect to the Aircraft, substantially in the form as set forth
+ on Exhibit G hereto;
+
+ (6) certificates of an Approved Insurance Broker
+ evidencing the insurance as required by Section 12 hereof
+
+
+ -10-
+
+
+
+
+
+ together with such special provisions as set forth on Exhibit H
+ hereto;
+
+ (7) certified copies of all necessary consents from
+ any Governmental Entities or such other Persons as to the
+ transactions contemplated by this Lease;
+
+ (8) such financial information concerning Lessee and
+ other documents and matters incidental to any of the foregoing as
+ Lessor or any Beneficiary may reasonably request; and
+
+ (9) a duly executed original of the Security Letter
+ of Credit.
+
+ (vii) the Lease, the Lease Supplement, an application for
+ registration of the Aircraft on AC Form 8050-1 in the name of the
+ Trustee, the Trust Agreement, and the affidavit of citizenship of the
+ Trustee shall have been duly filed for recordation (or shall be in the
+ process of being so fully filed for recordation) with the FAA pursuant to
+ the Federal Aviation Act and precautionary UCC filings with respect to
+ the Lease shall have been duly executed and delivered and shall have been
+ duly filed in Hawaii and all places in which such filings are necessary
+ or advisable to establish and perfect Trustee's right, title and interest
+ in the Equipment.
+
+ (c) Lessee's obligation to lease the Aircraft hereunder from
+ Lessor shall be conditional upon (i) the receipt by Lessee of a certified copy
+ of the resolutions of the Board of Directors of Lessor evidencing due authority
+ of Lessor for the execution, delivery and performance of the Trust Agreement
+ and this Lease and all other documents related thereto, together with an
+ incumbency certificate as to the person or persons authorized to execute and
+ deliver such documents on behalf of Lessor, (ii) the receipt by Lessee of
+ certification by Lessor to the effect that Lessor's representations and
+ warranties herein are true as at the Delivery Date, and (iii) the continued
+ effectiveness of the registration of the Aircraft with the FAA.
+
+ (d) Lessee's obligation to lease the Aircraft hereunder from
+Lessor shall also be conditioned upon the Aircraft being delivered to Lessee in
+the following condition:
+
+ (1) The Aircraft shall be airworthy and in good
+ operating condition
+
+
+ -11-
+
+
+
+
+ with all of the Aircraft equipment, components and systems;
+
+ (2) The Aircraft shall be clean;
+
+ (3) The Aircraft shall meet the requirements for
+ airworthiness certification by the FAA;
+
+ (4) A borescope of the Engines and the inspection of
+ the APU in accordance with the Manufacturer's or APU manufacturer's
+ recommendation shall have been performed at the direction of Lessee
+ but under the control and at the cost of the Beneficiaries, and any
+ discrepancies discovered in connection therewith shall have been
+ corrected;
+
+ (5) Lessor shall, in cooperation with the
+ Beneficiaries, retain a flight crew at competitive market rates to
+ conduct a functional check flight, not to exceed two hours (the
+ "Acceptance Flight"), to demonstrate the airworthiness of the
+ Aircraft and proper functioning of all systems and components. At
+ all times during such Acceptance Flight, the flight crew shall be
+ under the direction of the Beneficiaries and the Beneficiaries shall
+ be in command of the Aircraft. The Acceptance Flight shall be
+ conducted after the conclusion of Lessee's ground inspection and
+ technical acceptance of the Aircraft on the Delivery Date, PROVIDED,
+ HOWEVER, that such technical inspection shall be subject to the
+ satisfactory completion of the Acceptance Flight, as provided below.
+ If, during the Acceptance Flight, no discrepancy or malfunction is
+ detected with respect to the airworthiness or operational nature of
+ the Aircraft by normal airline standards, then (i) the delivery of
+ the Aircraft from Lessor to Lessee hereunder shall occur, the
+ Aircraft shall be accepted by Lessee hereunder whilst the Aircraft
+ is located at the Delivery Location, and Lessee shall execute and
+ deliver the Lease Supplement, (ii) the Aircraft shall continue en
+ route to a location selected by Lessee (the "Ferry Location"), under
+ the operational control of Lessee from the time of such delivery and
+ acceptance (the Acceptance Flight shall terminate at the time of
+ such delivery and acceptance, and that portion of the flight from
+ the Delivery Location to the Ferry Location is herein called the
+ "Ferry Flight"), and (iii) Lessee shall bear the costs of the flight
+ crew, fuel and other costs
+
+
+ -12-
+
+
+
+
+
+ relating to the Ferry Flight and shall reimburse the Beneficiaries
+ therefor promptly following receipt of the Beneficiaries, invoice
+ therefor, PROVIDED, HOWEVER, that, if any discrepancy or malfunction
+ is detected with respect to the airworthiness during the Acceptance
+ Flight, then, at Lessee's option after consultation with Lessor,
+ either (A) the Aircraft shall be delivered to and accepted by Lessee
+ at the Delivery Location and shall be ferried to the Ferry Location,
+ as provided in clauses (i), (ii) and (iii) above, where Lessee shall
+ remedy such discrepancy or malfunction at the cost (without mark up)
+ of the Beneficiaries (provided that such subsequent repair or
+ maintenance work shall not affect Lessee's acceptance of the
+ Aircraft hereunder), or (B) the Aircraft shall return to the
+ Beneficiaries' storage and maintenance facility where such
+ discrepancy or malfunction shall be corrected at the Beneficiaries'
+ expense, in which case the Delivery Date shall be postponed to such
+ date as the Beneficiaries shall advise, subject to the second
+ paragraph of Section 3(c) below. Any discrepancy or malfunction
+ detected of an airworthiness nature shall be corrected by Lessee or
+ the Beneficiaries in accordance with clause (A) or (B) above, as
+ applicable, at Beneficiaries, expense. If during the Acceptance
+ Flight a discrepancy or malfunction is detected with respect to the
+ operational nature of the Aircraft by normal airline standards but
+ no discrepancy or malfunction is detected with respect to the
+ airworthiness of the Aircraft, then the Aircraft shall be delivered
+ to and accepted by Lessee at the Delivery Location as provided in
+ clause (A) above, and Lessee shall remedy such discrepancy or
+ malfunction at the cost (without mark up) of the Beneficiaries.
+
+ In anticipation of the occurrence of the Delivery Date, the
+ Beneficiaries retained a flight crew to conduct the Acceptance
+ Flight and will incur costs relating to such retention. In
+ connection therewith, Lessee agrees to reimburse the Beneficiaries,
+ promptly following Lessee's receipt of an invoice therefor, for
+ one-half of the costs incurred by the Beneficiaries in connection
+ with retaining such flight crew for the Acceptance Flight, which
+ costs relate to the period commencing on and including November 29,
+ 1995 and ending on the day immediately preceding the Delivery Date.
+
+
+ -13-
+
+
+
+
+
+
+ Section 3. Delivery and Acceptance; Term.
+
+ (a) DELIVERY. It is anticipated that the Aircraft will become
+ available for delivery on or about the Estimated Delivery Date set forth on
+ Exhibit C-2.
+
+ (b) PLACE OF DELIVERY AND ACCEPTANCE. The Aircraft shall be
+ delivered to Lessee at the Delivery Location set forth on Exhibit C-2.
+
+ (c) FORCE MAJEURE. Notwithstanding any other provision
+hereof to the contrary, Lessor shall not have any obligation to lease the
+Aircraft to Lessee on account of its or Beneficiaries, inability to deliver the
+Aircraft if such inability or delay results from (i) loss or destruction of the
+Aircraft or any substantial part of the Aircraft or damage to the Aircraft for
+any reason whatsoever, including but not limited to, the negligence of the
+Beneficiaries or (ii) any other cause occurring without the negligence of the
+Beneficiaries and beyond the reasonable control of the Beneficiaries (after use
+of reasonable diligence), including but not limited to war, warlike operations,
+acts of government or governmental priorities, insurrections or riots, strikes
+or labor troubles causing cessation, slow down or interruption of work, riots,
+fire, floods, explosions, earthquakes, accidents, epidemics or quarantine
+restrictions, mechanical failure, inability for any reason to obtain materials,
+accessories, equipment or parts, delays in transportation or any act of God or
+a public enemy.
+
+ In the event that delivery of the Equipment is delayed for a
+ period in excess of 60 days beyond the Estimated Delivery Date (because of the
+ occurrence of an event described in clause (i) or (ii) above) Lessee or Lessor
+ shall have the right to terminate this Lease upon written notice to the other
+ and neither Lessee nor Lessor shall have any further obligations to the other
+ hereunder except that Lessor or the Beneficiaries shall return the Security
+ Deposit to Lessee subject to and in accordance with Section 21(a) hereof.
+
+ (d) ACCEPTANCE OF AIRCRAFT. The Aircraft to be leased
+ hereunder shall be delivered to Lessee "AS IS, WHERE IS" and SUBJECT TO EACH
+ AND EVERY DISCLAIMER OF WARRANTY AND REPRESENTATION AS SET FORTH IN SECTION
+ 5(a) HEREOF. Representatives of Lessee shall inspect the Aircraft on or prior
+ to the Estimated Delivery Date to verify that the Aircraft is in the condition
+ required by Section 2(d). In the event the Aircraft does not fulfill all of the
+ conditions set forth in Section 2(d) and if Lessee does not accept delivery of
+ the Aircraft with discrepancies to be corrected pursuant to Section 2(d)(5),
+ then Lessor shall have (A) with respect to any matter which affects the
+
+ -14-
+
+
+
+
+
+ airworthiness of the Aircraft, fifteen (15) days within which to cure any
+ deficiency in the Aircraft, without cost to Lessee, and (B) with respect to any
+ other matter, sixty (60) days within which to cure any deficiency with the
+ Aircraft, without cost to Lessee; Lessee shall then reinspect the Aircraft and
+ if the Aircraft is then in compliance with the conditions set forth in Section
+ 2(d), Lessee shall accept delivery of the Aircraft. In no event shall Lessor or
+ the Beneficiaries have the right pursuant to the foregoing paragraph (c) or
+ this paragraph (d) or both to extend the Delivery Date beyond the date that is
+ sixty (60) days after the Estimated Delivery Date. Upon acceptance of the
+ Aircraft Lessee shall thereupon indicate and confirm its acceptance of the
+ Aircraft by delivery to Lessor of a Lease Supplement, dated the Delivery Date,
+ in the form set forth as Exhibit D hereto.
+
+ The execution by Lessee of the Lease Supplement shall
+ constitute Lessee's acknowledgment that the Aircraft is in the condition
+ required hereby on the Delivery Date and Lessee's absolute, unconditional and
+ irrevocable acceptance of the Aircraft hereunder for all purposes of this
+ Lease.
+
+ (e) TERM OF LEASE. The Term of this Lease shall commence on
+ the Delivery Date and shall continue for 96 months until the Expiration Date
+ set forth in the Lease Supplement, subject to earlier termination by Lessor or
+ a Beneficiary following the occurrence of an Event of Default, as hereinafter
+ provided or pursuant to paragraph 2(ii) of the Supplemental Agreement.
+
+ Section 4. RENT.
+
+ (a) RENT. Lessee covenants and agrees to pay to Lessor, or
+its assigns, the following as Rent:
+
+ (i) BASIC RENT: The Basic Rent as set forth on Exhibit C-1
+hereto throughout the Term hereof, payable in consecutive installments in
+advance on each Basic Rent Payment Date.
+
+ (ii) SUPPLEMENTAL RENT: Any and all Supplemental Rent as and
+when the same becomes due.
+
+ (b) PLACE AND METHOD OF PAYMENT. All Basic Rent and
+ Supplemental Rent and other amounts payable under this Lease shall be paid on
+ the due date in immediately available funds in Dollars by wire transfer, at
+ the Payment Locations specified on Exhibit C-2 hereto, or at such other
+ location as Lessor shall designate in writing. Any Rent due on a day other than
+ a Business Day (in New York, Dublin, London and Honolulu only) shall be due on
+ the next preceding Business Day.
+
+ -15-
+
+
+
+
+
+ (c) PROHIBITION AGAINST SETOFF, COUNTERCLAIM, ETC. This Lease
+ is a net lease. Subject to Section 20(f), Lessee's obligation to pay all Rent
+ hereunder shall be absolute and unconditional and shall not be affected or
+ reduced by any circumstance, including, without limitation, (i) any setoff,
+ counterclaim, recoupment, defense or other right which Lessee may have against
+ Lessor, any Beneficiary, the Manufacturer, the Engine Manufacturer, any seller
+ of or person providing services with respect to the Aircraft or any other
+ Person, for any reason whatsoever; (ii) any defect in the title, airworthiness
+ or eligibility for registration under applicable Law, or any condition, design,
+ operation or fitness for use of, or any damage to or loss or destruction of,
+ the Aircraft, or any interruption or cessation in the use or possession thereof
+ by Lessee for any reason whatsoever, whether arising out of or related to an
+ act or omission of Lessee, or any other Person; (iii) any Liens with respect to
+ the Aircraft; (iv) the invalidity or unenforceability or lack of due
+ authorization or other infirmity of this Lease or any absence of right, power
+ or authority of Lessor or Lessee to enter into this Lease; (v) any insolvency,
+ bankruptcy, reorganization or similar proceedings by or against Lessor or
+ Lessee; (vi) any other circumstance or happening of any nature whatsoever,
+ similar to any of the foregoing; or (vii) any Taxes (other Taxes to which
+ Lessee's indemnity does not extend pursuant to the provisions of Section 10);
+ it being the express intention of Lessor and Lessee that all Rent payable
+ hereunder shall be payable in all events, unless the obligation to pay the same
+ shall be terminated pursuant to the express provisions of this Lease. Nothing
+ in this paragraph (c) shall constitute a waiver by Lessee of any right or claim
+ that Lessee may separately assert against Lessor or any Beneficiary.
+
+ Lessee hereby waives, to the extent permitted by applicable
+ Law, any and all rights which it may now have or which at any time hereafter
+ may be conferred upon it, by Law or otherwise, to terminate this Lease or any
+ obligation imposed upon Lessee hereunder or in relation hereto.
+
+ If for any reason whatsoever this Lease shall be terminated
+ in whole or in part by operation of law (other than termination under any
+ bankruptcy laws as now or hereafter in effect), Lessee nonetheless agrees to
+ pay to Lessor amounts equal to the Rent payments hereunder at the time such
+ payments would have become due and payable in accordance with the terms hereof
+ had this Lease not been terminated so long as Lessee is able to use, possess
+ and quietly enjoy the Aircraft, and so long as such payments are made and all
+ other terms and conditions hereof are complied
+
+ -16-
+
+
+
+
+
+with by Lessor and Lessee, Lessor and Lessee will deem this Lease to remain in
+full force and effect.
+
+ Section 5. REPRESENTATIONS AND WARRANTIES.
+
+ (a) WARRANTIES AND DISCLAIMER OF WARRANTIES.
+
+ LESSOR AND EACH BENEFICIARY HAVE NOT AND SHALL NOT BE DEEMED
+TO HAVE MADE (WHETHER BY VIRTUE OF HAVING LEASED THE AIRCRAFT UNDER THIS LEASE,
+OR HAVING ACQUIRED THE AIRCRAFT, OR HAVING DONE OR FAILED TO DO ANY ACT, OR
+HAVING ACQUIRED OR FAILED TO ACQUIRE ANY STATUS UNDER OR IN RELATION TO THIS
+LEASE OR OTHERWISE), AND LESSOR AND EACH BENEFICIARY HEREBY SPECIFICALLY
+DISCLAIM, ANY REPRESENTATION OR WARRANTY, EXPRESS OR IMPLIED, AS TO THE TITLE
+(EXCEPT AS HEREINBELOW PROVIDED), AIRWORTHINESS, CONDITION, DESIGN, OPERATION,
+MERCHANTABILITY, FREEDOM FROM CLAIMS OF INFRINGEMENT OR THE LIKE, OR FITNESS
+FOR USE FOR A PARTICULAR PURPOSE OF THE AIRCRAFT, OR AS TO THE QUALITY OF THE
+MATERIAL OR WORKMANSHIP OF THE AIRCRAFT, THE ABSENCE THEREFROM OF LATENT OR
+OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE, OR AS TO ANY OTHER REPRESENTATION
+OR WARRANTY WHATSOEVER, EXPRESS OR IMPLIED (INCLUDING ANY IMPLIED WARRANTY
+ARISING FROM A COURSE OF PERFORMANCE OR DEALING OR USAGE OF TRADE), WITH
+RESPECT TO THE AIRCRAFT; AND LESSEE HEREBY WAIVES, RELEASES, RENOUNCES AND
+DISCLAIMS EXPECTATION OF OR RELIANCE UPON ANY SUCH. WARRANTY OR WARRANTIES.
+LESSOR AND EACH BENEFICIARY SHALL NOT HAVE ANY RESPONSIBILITY OR LIABILITY TO
+LESSEE OR ANY OTHER PERSON, WHETHER ARISING IN CONTRACT OR TORT OUT OF ANY
+NEGLIGENCE OR STRICT LIABILITY OF LESSOR OR SUCH BENEFICIARY OR OTHERWISE, FOR
+(i) ANY LIABILITY, LOSS OR DAMAGE CAUSED OR ALLEGED TO BE CAUSED DIRECTLY OR
+INDIRECTLY BY THE AIRCRAFT OR ANY ENGINE OR BY ANY INADEQUACY THEREOF OR
+DEFICIENCY OR DEFECT THEREIN OR BY ANY OTHER CIRCUMSTANCE IN CONNECTION
+THEREWITH, (ii) THE USE, OPERATION OR PERFORMANCE OF THE AIRCRAFT OR ANY RISKS
+RELATING THERETO, (iii) ANY INTERRUPTION OF SERVICE, LOSS OF BUSINESS OR
+ANTICIPATED PROFITS OR CONSEQUENTIAL DAMAGES OR (iv) THE DELIVERY, OPERATION,
+SERVICING, MAINTENANCE, REPAIR, IMPROVEMENT OR REPLACEMENT OF THE AIRCRAFT. THE
+WARRANTIES AND REPRESENTATIONS SET FORTH IN THIS SECTION ARE EXCLUSIVE AND IN
+LIEU OF ALL OTHER REPRESENTATIONS OR WARRANTIES WHATSOEVER, EXPRESS OR IMPLIED,
+AND LESSOR AND EACH BENEFICIARY SHALL NOT BE DEEMED TO HAVE MADE BY OTHER
+WARRANTIES, EXCEPT THAT EACH BENEFICIARY MAKES THE REPRESENTATIONS AND
+WARRANTIES CONTAINED IN THE SUPPLEMENTAL AGREEMENT AND:
+
+ (b) LESSOR REPRESENTATIONS AND WARRANTIES.
+
+ Lessor hereby makes the following representations and
+warranties:
+
+ -17-
+
+
+
+
+
+ (i) the Aircraft is duly registered with the FAA in the name
+ of Lessor and, on the Delivery Date, Lessor shall be the FAA-registered
+ owner of the Aircraft and the Aircraft shall be free and clear of any and
+ all Lessor Liens attributable to it other than the interests of the
+ Beneficiaries under the Trust Agreement;
+
+ (ii) Lessor is a banking corporation duly organized and
+ validly existing and in good standing under the laws of Delaware, and has
+ the corporate power and authority to carry on its business as presently
+ conducted and to perform its obligations under this Lease;
+
+ (iii) the making and performance by Lessor of this Lease and
+ any other Operative Document to which Lessor is a party have been duly
+ authorized by all necessary corporate action on the part of Lessor and
+ will not violate any provision of any law, governmental rule or
+ regulation of the State of Delaware or of the United States of America
+ governing the banking and trust powers of Lessor or its Certificate of
+ Incorporation or By-Laws;
+
+ (iv) this Lease has been duly entered into and delivered by
+ Lessor, and this Lease does, and each other Operative Document when
+ executed and delivered by Lessor hereunder will, constitute legal, valid
+ and binding obligations of Lessor, enforceable in accordance with their
+ respective terms except as may be limited by bankruptcy, insolvency,
+ reorganization, moratorium or other similar laws affecting the
+ enforcement of creditors, rights generally, and, to the extent that
+ certain remedies require or may require enforcement by a court of equity,
+ by such principles of equity (regardless of whether such enforceability
+ is considered in a proceeding in equity or at law) as a court having
+ jurisdiction may impose and by laws which may affect some of such
+ remedies but which do not make available remedies inadequate for the
+ substantial realization of the benefits provided herein;
+
+ (v) neither the execution and delivery by Lessor of this
+ Lease and any other Operative Document to which it is a party nor the
+ performance by it of any of its obligations hereunder or thereunder, nor
+ the compliance by it with the terms and conditions hereof or thereof,
+ will violate, conflict with or result in any breach of any terms,
+ conditions or provisions of, or provisions of, or
+
+
+ -18-
+
+
+
+
+
+ constitute a default under, any law of the State of Delaware or the
+ United States of America governing the banking and trust powers of Lessor
+ or court judgment or decree applicable to it or pursuant to which it was
+ organized or any agreement or instrument to which it is a party or by
+ which it or any of its property is bound, or result in the creation or
+ imposition of any Lessor's Lien attributable to it on the Aircraft other
+ than as provided for in this Lease (such representation being made in
+ reliance upon and subject to the accuracy of the representation and
+ warranty contained in the last sentence of Section 5(d)(xvi));
+
+ (vi) neither the execution and delivery nor the performance
+ by Lessor of this Lease and any Operative Document to which Lessor is a
+ party requires any consent or approval of, giving of notice to,
+ registration with, or taking of any other action in respect of any
+ governmental authority or agency in the State of Delaware;
+
+ (vii) no registration, recording, filing or notarization of
+ this Lease in the State of Delaware and no other action by Lessor is
+ necessary to be taken in the State of Delaware to ensure the validity or
+ enforceability of the obligations of Lessor or the rights of Lessee
+ hereunder;
+
+ (viii) there are no pending or, to the knowledge of Lessor,
+ threatened actions or proceedings before any court, governmental or
+ administrative agency or arbitral body which actions or proceedings could
+ have a material and adverse effect on Lessor's ability to perform its
+ obligations under this Lease or any other Operative Document to which it
+ is a party;
+
+ (ix) Lessor shall obtain, make and maintain in full force and
+ effect, promptly renew from time to time and comply with the terms of all
+ consents, permissions, licenses, authorizations, approvals, registrations
+ and filings which may from time to time be required in the State of
+ Delaware in order to enable it to perform its obligations hereunder or
+ for the enforceability or admissibility in evidence hereof; and
+
+ (x) Lessor is a "citizen of the United States" within the
+ meaning of Section 40102(a)(15) of the Federal Aviation Act.
+
+ -19-
+
+
+
+
+
+ (c) MANUFACTURERS' WARRANTIES. So long as no Default and no
+ Event of Default shall have occurred and be continuing and so long as the Term
+ shall remain in effect, Lessee may exercise for the account of Lessor such
+ rights as Lessor may have under any warranty, express or implied, with respect
+ to the Aircraft made by the Manufacturer or the Engine Manufacturer, to the
+ extent that the same may be assigned or otherwise made available to Lessee;
+ provided, however, that upon an Event of Default all such rights shall
+ immediately revert to Lessor including all claims thereunder whether or not
+ perfected, and such reversion shall occur at the earlier of (A) written notice
+ from Lessor to Lessee to such effect, and (B) the expiration or any termination
+ of the Term. Lessor agrees to execute and deliver such further documents and
+ instruments and to do such further acts, deeds, matters or things (at Lessee's
+ expense) as may be necessary to enable Lessee to obtain whatever warranty
+ service furnished for the Aircraft or the Engines by the Manufacturer or the
+ Engine Manufacturer, respectively, as is available to the Lessor.
+
+ (d) LESSEE'S REPRESENTATIONS AND WARRANTIES. Lessee hereby
+ makes the following representations and warranties, which representations and
+ warranties shall survive the execution and delivery of this Lease and the
+ delivery of the Aircraft:
+
+ (i) Lessee is a corporation duly organized and existing under
+ the Laws of the State of Hawaii and has the corporate power and authority
+ to carry on its business as presently conducted and to perform its
+ obligations under this Lease;
+
+ (ii) this Lease has been duly authorized by all necessary
+ corporate action on the part of Lessee and does not require any approval
+ of stockholders of Lessee (or if such approval is required, such approval
+ has been obtained), and neither the execution and delivery hereof nor the
+ consummation of the transactions contemplated hereby nor compliance by
+ Lessee with any of the terms and provisions hereof will contravene any
+ Law applicable to Lessee or the corporate charter or by-laws of Lessee-or
+ result in any breach of, or constitute any default under, or result in
+ the creation of any Lien upon any property of Lessee under, any credit
+ agreement or instrument, or other agreement or-instrument to which Lessee
+ is a party or by which Lessee or its properties or assets are bound or
+ affected;
+
+ (iii) Lessee has received every consent, approval or
+ authorization of, and has given every
+
+ -20-
+
+
+
+
+
+ notice to, each Governmental Entity having jurisdiction with respect to
+ the execution, delivery or performance of this Lease that is required for
+ Lessee to execute and deliver this Lease and to perform the transactions
+ contemplated hereby and each such consent, approval or authorization is
+ valid and effective and has not been revoked;
+
+ (iv) this Lease has been duly executed and delivered by
+ Lessee, and the Lease does, and the Lease Supplement when executed and
+ delivered by Lessee will, constitute legal, valid and binding obligations
+ of Lessee, enforceable in accordance with their respective terms, except
+ as enforcement thereof may be limited by applicable bankruptcy,
+ insolvency, reorganization, moratorium or other similar laws affecting
+ the enforcement of creditors, rights generally, and, to the extent that
+ certain remedies require or may require enforcement by a court of equity,
+ by such principles of equity (regardless of whether such enforceability
+ is considered in a proceeding in equity or at law) as a court having
+ jurisdiction may impose and by Laws which may affect some of such
+ remedies but which do not make the available remedies inadequate for the
+ substantial realization of the benefits provided herein;
+
+ (v) there are no suits or proceedings (other than those
+ heretofore disclosed in writing to each Beneficiary) pending or, to the
+ knowledge of Lessee, threatened in any court or before any regulatory
+ commission, board or other administrative governmental agency against or
+ affecting Lessee which might have a materially adverse effect on the
+ current business or financial condition of Lessee;
+
+ (vi) Lessee has filed or caused to be filed all tax returns
+ which are required to be filed by it, and has paid or caused to be paid
+ all Taxes shown to be due or payable on said returns or on any assessment
+ received by Lessee, except to the extent being contested in good faith by
+ appropriate proceedings, and for the payment of which adequate reserves
+ have been provided;
+
+ (vii) except for the filing for recordation of the FAA Bills
+ of Sale, an application for registration of the Aircraft on AC Form
+ 8050-1 in the name of the Lessor, the Trust Agreement, the affidavit of
+ citizenship of the Lessor and this Lease and the Lease Supplement with
+ the FAA, the
+
+ -21-
+
+
+
+
+
+ filing with the Hawaii Bureau of Conveyances, Hawaii of precautionary UCC
+ financing statements and continuation statements at periodic intervals
+ with respect to the Aircraft and the placing on the Aircraft and on each
+ Engine of the plates containing the legends referred to in Section 6(f)
+ hereof, no further filing or recording of this Lease or of any other
+ document and no further under the Laws of any Governmental Entity in
+ order to (A) fully protect and establish Lessor's title to, interest in
+ and property rights with respect to the Aircraft as against Lessee or
+ any third party or (B) ensure the validity, effectiveness and enforce
+ ability of this Lease;
+
+ (viii) Lessee is not in default in the performance of any of
+ its obligations (a) for the payment of indebtedness for borrowed money or
+ of any interest or premium thereon or (b) for the payment of rent under
+ any lease or agreement to lease real, personal or mixed property;
+
+ (ix) the Maintenance Program for the Aircraft complies with
+ all FAA requirements;
+
+ (x) the consolidated financial statements of Aloha Airgroup,
+ Inc. (including Lessee), including the balance sheets and audited
+ statements of income and retained earnings of Lessee, for the preceding
+ two fiscal years and for the period ending September 30, 1995, copies of
+ which have been furnished to Lessor, are prepared in accordance with
+ generally accepted accounting principles, and present fairly the
+ financial position and operations of Lessee, and subsequent to the
+ conclusion of the last such period, there has been no material adverse
+ change in such position or operations;
+
+ (xi) there are no withholding or other Taxes to be deducted
+ from any payment to be made by Lessee under this Lease (other Taxes to
+ which Lessee's indemnity does not extend pursuant to the provisions of
+ Section 10);
+
+ (xii) Lessee is a Certified Air Carrier and holds all
+ licenses, certificates, permits and franchises from the appropriate
+ agencies of the United States of America and/or all other governmental
+ authorities having jurisdiction which are necessary to authorize the
+ Lessee to engage in air transport and to carry on its business as
+
+ -22-
+
+
+
+
+
+ presently conducted and to be conducted with the Aircraft;
+
+ (xiii) Lessee is a "citizen of the United States" as defined
+ in Section 40102(a)(15) of the Federal Aviation Act;
+
+ (xiv) Lessor is entitled to the benefits of Section 1110 of
+ Title 11 of the united States Code with respect to the Aircraft and the
+ Engines;
+
+ (xv) Lessee's chief executive office (as such term is defined
+ in Article 9 of the Uniform Commercial Code) and the place where it keeps
+ its corporate records concerning the Aircraft, all its interest in, to
+ and under any Operative Document and its contracts relating thereto, are
+ located in Honolulu, Hawaii;
+
+ (xvi) Neither Lessee nor any Person who was at such time a
+ Commonly Controlled Person has terminated any Plan or withdrawn from any
+ multiemployer plan (as defined in Section 4001(a)(3) of ERISA) in a
+ manner which has or could reasonably be expected to result in any
+ liability under Title IV of ERISA, and no event or condition has occurred
+ or exists as a result of which any such liability could reasonably be
+ expected to be incurred. No Plan maintained or contributed to (or
+ required to be contributed to) by Lessee or any Person who was at such
+ time a Commonly Controlled Person has incurred an "accumulated funding
+ deficiency" (as defined in Section 302 of ERISA). No Lien imposed under
+ the Code or ERISA on the assets of Lessee or any Commonly Controlled
+ Person exists or is reasonably likely to arise on account of any Plan. No
+ prohibited transaction (as defined in Section 406 of ERISA or Section
+ 4975 of the Code) has occurred which has subjected or could subject
+ Lessee or any Commonly Controlled Person to any liability under Section
+ 502(i) of ERISA or Section 4975 of the Code. The execution and delivery
+ by Lessee of this Lease and the consummation of the transactions
+ contemplated hereby will not constitute a prohibited transaction within
+ the meaning of Section 406 of ERISA or Section 4975 of the Code; and
+
+ (xvii) No part of the Rent or other payments made by Lessee
+ under the Operative Documents will be made with the assets of any plan
+ (or its related trust) as defined in Section 4975(e) of the Code or of
+ any employee benefit plan (or its
+
+ -23-
+
+
+
+
+
+ related trust) as defined in Section 3(3) of ERISA.
+
+ Section 6. POSSESSION AND USE; REGISTRATION.
+
+ (a) POSSESSION.
+
+ (i) SUBLEASE, ASSIGNMENT AND TRANSFER. Without the prior
+ written consent of Lessor and each Beneficiary, Lessee will not assign this
+ Lease or sublet, transfer or relinquish possession of the Aircraft, Airframe or
+ any Engine or install any Engine or permit any Engine to be installed on any
+ airframe other than the Airframe or under any circumstances permit the
+ Aircraft, Airframe or any Engine to be used by any other Person for any purpose
+ whatsoever, provided that so long as (A) no Default or Event of Default shall
+ have occurred and be continuing, (B) the action to be taken shall not affect
+ the registration of the Aircraft with the FAA and (C) all necessary approvals
+ of each Governmental Entity having jurisdiction over the Aircraft have been
+ obtained, then Lessee, without the prior written consent of Lessor and each
+ Beneficiary, may:
+
+ (1) subject any Engine to normal interchange or pooling
+ agreements or similar arrangements in each case customary in the airline
+ industry and entered into in the ordinary course of its business with
+ Permitted Sublessees or an air carrier approved by Lessor and each
+ Beneficiary or an FAA licensed engine overhaul agency, provided that
+ Lessee shall give the Beneficiaries 30 days' prior notice thereof
+ (together with a copy of the applicable pooling or interchange
+ agreement); and provided further, however, that: (A) no such agreement or
+ arrangement results in or requires the transfer of title to such Engine;
+ or (B) if Lessor's title to such Engine shall be divested under any such
+ agreement or arrangement, such divestiture shall be deemed to be a
+ Casualty occurrence with respect to such Engine and Lessee shall comply
+ with Section 11(b) hereof in respect thereof;
+
+ (2) deliver possession of the Aircraft, the Airframe or any
+ Engines to the manufacturer thereof for testing or other similar purposes
+ or to any organization for service, repair, maintenance, testing or
+ overhaul work on the Aircraft, Airframe or Engine or any Part thereof or
+ for alterations or modifications in or additions to the Aircraft,
+ Airframe or Engine to the extent required or permitted by the terms of
+ Section 9 hereof;
+
+ -24-
+
+
+
+
+
+ (3) install an Engine on an airframe (other than the
+ Airframe) owned by Lessee free and clear of all Liens except: (A) those
+ of the type permitted under Section 14 hereof and those which apply only
+ to the engines (other than Engines), appliances, parts, instruments,
+ appurtenances, accessories, furnishings and other equipment (other than
+ Parts) installed on such airframe (but not to the airframe as an
+ entirety); and (B) the rights of the participants under normal
+ interchange agreements which are customary in the airline industry and do
+ not contemplate, permit, result in or require the transfer of title to
+ theairframe, engines or parts installed thereon;
+
+ (4) install an Engine on an airframe leased to Lessee or
+ owned by Lessee subject to a conditional sale or other security
+ agreement, provided, that: (A) such airframe is free and clear of all
+ Liens except the rights of the parties to the lease or conditional sale
+ or other security agreement covering such airframe and except Liens of
+ the type permitted by clauses (A) and (B) of Section 6(a)(i)(3), and the
+ Lien of any mortgage which either by its terms does not apply to the
+ Engine or which effectively provides that each Engine leased to Lessee
+ hereby shall not become subject to the Lien thereof or to any rights of
+ any party thereunder other than Lessee (with respect to Lessee's rights
+ expressly granted hereunder), notwithstanding the installation of such
+ Engine on any airframe subject to the Lien of such mortgage, unless and
+ until Lessee shall become the owner of such Engine and Lessor shall have
+ no further interest therein, all pursuant to the express terms of this
+ Lease (as evidenced by a written instrument signed by Lessor); and (B)
+ there shall be in effect a written agreement of the lessor or secured
+ party of such airframe (which may be the lease or conditional sale or
+ other security agreement covering such airframe) substantially similar
+ in effect to the agreement of Lessor in Section 6(b) below whereby such
+ lessor or secured party effectively and expressly agrees that neither it
+ nor its successors or assigns will acquire or claim any right, title or
+ interest in any Engine by reason of such Engine being installed on such
+ airframe at any time while such Engine is subject to this Lease, and a
+ copy of such agreement shall be provided to Lessor or any Beneficiary
+ upon its request;
+
+ -25-
+
+
+
+
+
+ (5) sublease the Airframe and the Engines or the engines then
+ installed on the Airframe or any Engine to any Permitted Sublessee;
+ PROVIDED, that such Permitted Sublessee shall not (i) have consented to
+ the appointment of a receiver, trustee or liquidator or admitted in
+ writing its inability to pay its debts generally as they become due or
+ filed a voluntary petition in bankruptcy or a voluntary petition seeking
+ reorganization in a proceeding under any bankruptcy laws, (ii) have had
+ an involuntary order, judgment or decree entered against such Permitted
+ Sublessee by any court appointing a receiver, trustee or liquidator for
+ such Permitted Sublessee or (iii) be in default under any lease or
+ financing agreement to which such Permitted Sublessee is a party; and
+
+ (6) enter into a Wet Lease for the Aircraft in the ordinary
+ course of its business for a term or terms (including any renewal terms
+ or extensions) that do not extend beyond the Term.
+
+ (ii) CERTAIN LIMITATIONS ON TRANSFERS. With respect to any
+ assignment, sublet, transfer or relinquishment of possession pursuant to
+ Section 6(a)(i) (other than a transfer of an Engine which is deemed a Casualty
+ Occurrence) or any other assignment, sublet or transfer of possession consented
+ to in writing by Lessor and the Beneficiary:
+
+ (1) Lessee shall remain primarily liable hereunder for the
+ performance of all of the terms of this Lease to the same extent as if
+ such transfer had not occurred, and no sublease or other relinquishment
+ of possession of the Equipment shall in any way discharge or diminish any
+ of Lessee's obligations to Lessor under this Lease or any of Lessor's
+ rights hereunder and such rights shall continue as if such sublease or
+ transfer had not occurred;
+
+ (2) the term of any sublease (including, without limitation,
+ any option of the sublessee to renew or extend the sublease), interchange
+ or pooling arrangement shall not continue beyond the end of the Term;
+
+ (3) any sublease permitted by this Section 6 shall in
+ addition expressly provide that (v) such sublease is subject and
+ subordinate to all of the terms of this Lease and all rights of Lessor
+ under this Lease, including without limitation Lessor's
+
+ -26-
+
+
+
+
+
+ rights to repossession pursuant to Section 18 and to void such Permitted
+ Sublessee's right to possession upon such repossession, shall apply, (w)
+ such Permitted Sublessee consents to the security assignment by Lessee to
+ Lessor of all of Lessee's right, title and interest in such sublease for
+ purposes of securing Lessee's obligations hereunder and that Permitted
+ Sublessee, upon receipt of a written notice from Lessor that an Event of
+ Default has occurred and is continuing, will make all payments under such
+ sublease to, and, if this Lease has been declared in default pursuant to
+ Section 18 hereof, will return the Equipment only to, Lessor, or as
+ directed by Lessor, for so long as such Event of Default shall continue,
+ (x) the maintenance, operation and insurance provisions of such sublease
+ shall be substantially the same as the provisions of this Lease (whether
+ by requiring such obligations to be performed by such Permitted
+ Sublessee, by Lessee or by both), (y) the Aircraft shall not be operated
+ or used other than as provided in this Lease, and (z) the Lessor may void
+ or terminate such sublease following an Event of Default hereunder but
+ only during the period as such Event of Default shall continue;
+
+ (4) prior to the commencement of any sublease to a Permitted
+ Sublessee, Lessee shall give Lessor and each Beneficiary no less than 10
+ days written notice of the proposed sublease of the Airframe or Engine,
+ as the case may be, which notice shall include the identity of the
+ Permitted Sublessee, the term and rental rate of the sublease and a copy
+ of such sublease; PROVIDED, HOWEVER, that Lessee shall use its reasonable
+ efforts to give such notice earlier than such 10 days if practicable
+ under the circumstances and to advise Lessor and the Beneficiaries at any
+ time at which Lessee commences substantial discussions or negotiations
+ with any Permitted Sublessee with respect to the potential sublease of
+ the Aircraft;
+
+ (5) any sublease shall-expressly prohibit any assignment or
+ further sublease of the Aircraft or any assignment of the rights under
+ such sublease;
+
+ (6) any Wet Lease shall have a term that does not exceed
+ beyond the end of the Term; and
+
+ (7) the Person with which and for which any Wet Lease is
+ entered into shall be an airline that is a
+
+ -27-
+
+
+
+
+
+ resident of, is domiciled in, and maintains its principal base of
+ operations in, a Permitted Jurisdiction.
+
+ Prior to entering into any sublease of the Airframe and any Engine or engine or
+ any Engine to a Permitted Sublessee, Lessee shall (A) execute and deliver to
+ Lessor and each Beneficiary an assignment of and grant of a security interest
+ in all of Lessee's right, title and interest in such sublease, which assignment
+ shall be in form and substance reasonably satisfactory to Lessor and each
+ Beneficiary, contain the consent to such assignment of such Permitted
+ Sublessee, and be recordable under the Federal Aviation Act and shall be
+ executed, delivered and filed with the FAA for recordation at Lessee's expense
+ on or before commencement of such sublease (provided that such assignment may
+ specify that Lessor may not exercise any of the rights or remedies of the
+ Lessee under such sublease including, without limitation, any right to receive
+ rentals under such sublease or to interfere with the rights of the Lessee to
+ grant waivers or consents under, or to amend the provisions of, any such
+ sublease) unless, and only so long as, an Event of Default hereunder shall have
+ occurred and be continuing, and (B) execute and deliver to Lessor an
+ appropriately completed Uniform Commercial Code financing statement, for filing
+ in Hawaii and each other applicable jurisdiction, evidencing the assignment by
+ Lessee to Lessor of such sublease. In connection with such a sublease of the
+ Aircraft, Lessee shall provide to Lessor and each Beneficiary, at Lessee's or
+ Permitted Sublessee's expense and on or before commencement of such sublease, a
+ legal opinion from counsel to the Permitted Sublessee in form and substance
+ reasonably satisfactory to Lessor and each Beneficiary, as to the due execution
+ and delivery and enforceability of such sublease and assignment of sublease and
+ as to such other matters as Lessor or such Beneficiary may reasonably request.
+ Lessee agrees that any such security assignment granted to the Lessor may be
+ assigned to the Lender, if any, it being understood that such assignment shall
+ be consistent and not contravene the foregoing provisions. Any monies received
+ by Lessor or Lender pursuant to the exercise of its rights under the assignment
+ of any sublease shall be held by Lessor or the Lender as additional security
+ for the performance by Lessee of its obligations under this Lease and, to the
+ extent not applied against amounts due and owing by Lessee hereunder and the
+ exercise of remedies hereunder, shall be returned to Lessee at such time as no
+ Event of Default shall be continuing. All costs and expenses (including
+ reasonable legal fees and expenses) incurred by Lessor, Lender or any
+ Beneficiary in connection with any sublease or the assignment thereof shall be
+ borne by Lessee and reimbursed by Lessee upon demand.
+
+ -28-
+
+
+
+
+
+ (b) RECIPROCAL RECOGNITION OF RIGHTS. In the event Lessee
+ shall have received from the lessor or secured party of any airframe leased to
+ Lessee or owned by Lessee subject to a conditional sale or other security
+ agreement a written agreement complying with clause (B) of Section 6(a)(i)(4)
+ hereof (which agreement may be contained in the lease, conditional sale
+ agreement or security agreement relating to such airframe), and such lease or
+ conditional sale or other security agreement covering such airframe also covers
+ an engine or engines owned by the lessor under such lease or subject to a
+ security interest in favor of the secured party under such conditional sale or
+ other security agreement, Lessor hereby agrees for the benefit of such lessor
+ or secured party that Lessor will not acquire or claim, as against such lessor
+ or secured party, any right, title or interest in any such engine as the result
+ of such engine being installed on the Airframe at any time while such engine is
+ subject to such lease or conditional sale or other security agreement and owned
+ by such lessor or subject to a security interest in favor of such secured
+ party. Lessor also hereby agrees for the benefit of the mortgagee under any
+ mortgage complying with clause (A) of Section 6(a)(i)(4) hereof, relating to
+ installation of an Engine on an airframe leased to Lessee, that Lessor will not
+ acquire or claim, as against such mortgagee, any right, title or interest in
+ any engine subject to the Lien of such mortgage as the result of such engine
+ being installed on the Airframe at any time while such engine is subject to the
+ Lien of such mortgage.
+
+ (c) LAWFUL INSURED OPERATIONS. Lessee will not permit the
+ Aircraft to be maintained, used or operated in violation of any Law of any
+ Governmental Entity, or in violation of any airworthiness certificate, or
+ license or registration issued by any such authority, or contrary to the
+ Manufacturer's or Engine Manufacturer's operating manuals or instructions for
+ the Aircraft or the Engines. In the event that any such Law requires alteration
+ of the Aircraft, then, subject to Section 1 of the Supplemental Agreement,
+ Lessee shall comply therewith at its sole expense and shall maintain the same
+ in proper condition for operation under such Laws. Lessee agrees not to operate
+ the Aircraft, or permit the Aircraft to be operated, (i) unless the Aircraft is
+ covered by insurance as required by the provisions hereof or (ii) contrary to
+ the terms of such insurance. Lessee also agrees not to operate or locate the
+ Aircraft or suffer or permit the Aircraft to be operated or located in any area
+ excluded from coverage by any insurance policy issued pursuant to the
+ requirements of this Lease.
+
+ (d) MAINTENANCE. Lessee, at its own cost and expense, shall,
+ or shall cause a maintenance contract center that is FAA certified for the
+ contracted maintenance of the
+
+ -29-
+
+
+
+
+
+ Aircraft and located in the united States of America or Canada, to:
+
+ (i) service, repair, maintain, overhaul and test, or cause
+ the same to be done to, each Item leased hereunder (A) so as to keep such
+ Item in the same condition as when delivered to Lessee hereunder, fair
+ wear and tear excepted, and in good operating condition, (B) so as to
+ keep the Aircraft in the condition necessary to enable the airworthiness
+ certification of the Aircraft to be maintained in good standing at all
+ times under all applicable FAA rules and regulations, (C) so as to meet
+ the standards observed by Lessee with respect to aircraft of similar type
+ owned or leased by Lessee and operated on similar routes, and in a manner
+ which does not adversely discriminate against the Aircraft compared to
+ similar aircraft operated by Lessee and in a similar manner and with care
+ as that undertaken by the majority of airlines based in the United States
+ of America which operate Boeing 737-200ADV aircraft (it being understood
+ that the obligations hereby imposed upon Lessee by reference to the
+ manner and care undertaken by other airlines is not intended to supersede
+ Lessee's obligations herein with respect to the specific and express
+ terms of this Lease), (D) in strict compliance with Lesseel's
+ FAA-approved Maintenance Program applicable to the Aircraft (including
+ the corrosion prevention control program (CPCP/D6-38528) and aging
+ aircraft program (D6-38505 revision H as applicable per airworthiness
+ directive or per effectivity)), a true and complete copy of which shall
+ be delivered by Lessee to the Lessor which Maintenance Program shall be
+ in substantial conformance with the Manufacturer's recommended
+ maintenance planning document;
+
+ (ii) maintain all records, logs and other materials required
+ by the FAA or the Maintenance Program in respect of the Aircraft with all
+ such records, logs and other material being maintained in the English
+ language;
+
+ (iii) promptly furnish to Lessor or each Beneficiary such
+ information, in English, as may be required to enable Lessor or such
+ Beneficiary to file any reports required to be filed by such Lessor or
+ Beneficiary with any governmental authority concerning the Aircraft;
+
+ -30-
+
+
+
+
+
+ (iv) incorporate into the Aircraft all manufacturer mandatory
+ requirements, orders, "alert" and, to the extent scheduled for a majority
+ of Lessee's other owned or leased aircraft of the same model,
+ "recommended" service bulletins issued by the manufacturers and vendors
+ of the Items (so far as such manufacturers and vendors issue such service
+ bulletins) which become due for compliance during the Term without
+ waiver, exception, deferment or carryover and, in addition, all other
+ service bulletins that Lessee adopts during the Term for a majority of
+ other aircraft of the same model in Lessee's fleet;
+
+ (v) carry out, on each Item, all applicable regulatory
+ regulations, mandatory requirements, orders and airworthiness directives
+ which the FAA may from time to time issue and which become due during or
+ within three months following the expiration of the Term (or within six
+ months following the expiration of the Term if requested by the
+ Beneficiaries at Beneficiaries' cost; however, the Term will not be
+ deemed extended solely as a result of the additional work to terminate
+ the airworthiness directives due after three months after the expiration
+ of the Term and requested by the Beneficiaries) with respect to such
+ Item, without waiver, exception, deferment or carryover beyond the
+ expiration of the Term (except as expressly set forth in the Supplemental
+ Agreement); provided that any inspections and terminating action required
+ by such regulatory regulations, mandatory requirements, orders and
+ airworthiness directives to be performed by a particular date shall be
+ performed by such date;
+
+ (vi) maintain, service, repair and overhaul the Airframe and
+ all Engines to comply with all warranty requirements;
+
+ (vii) except when required by maintenance or expressly
+ permitted by Section 6(a) above, equip the Airframe at all times with two
+ Engines;
+
+ (viii) make available to Lessor upon its reasonable request
+ during the Term, one or more persons with sufficient technical knowledge
+ to assist the Lessor or each Beneficiary in its examination of such
+ materials and documents relating to the maintenance and condition of the
+ Aircraft as the Lessor or each Beneficiary shall from time to time
+ reasonably request; and
+
+ -31-
+
+
+
+
+
+ (ix) without prejudice to Lessee's obligations to repair the
+ Aircraft pursuant to the terms hereof, if any damage occurs to the
+ Aircraft which results in the need for repairs where the costs of such
+ repairs are expected to exceed US$500,000, Lessee shall promptly notify
+ Lessor and each Beneficiary of the details of the repairs required and
+ shall furnish them with a workscope relating thereto, and Lessee shall
+ comply with any reasonable direction of Lessor or each Beneficiary as to
+ the conduct of the repairs to the Aircraft; PROVIDED, HOWEVER, that the
+ Beneficiaries shall review such workscope and inform Lessee of any
+ comments thereon or objections thereto within three Business Days after
+ their receipt thereof; PROVIDED FURTHER, HOWEVER, that Lessee shall be
+ permitted to use a maintenance contractor to perform its obligations
+ pursuant to this Section 6(d) so long as such maintenance contractor is a
+ repair facility approved by the FAA for the contracted repair of the
+ Aircraft which is located in the United States of America or Canada, or
+ other facility acceptable to the Lessor and each Beneficiary.
+
+ (e) REGISTRATION. So long as no Event of Default shall have
+ occurred and be continuing, Lessor shall cause the Aircraft to be duly
+ registered in the name of Lessor with the FAA under the Federal Aviation Act as
+ at the Delivery Date, shall take such actions as are reasonably appropriate (so
+ long as such actions pose no material burden or risk to Lessor or the
+ Beneficiaries) to maintain such registration during the Term, and shall not
+ take any action to interfere with or cancel such registration; provided that,
+ after the Delivery Date, Lessee shall monitor and, to the extent not relating
+ to the U.S. citizenship of the Beneficiaries, maintain such registration;
+ PROVIDED, FURTHER, HOWEVER, that (i) if Lessor shall at any time cease to be a
+ "citizen of the United States" (as defined in the Federal Aviation Act) and if
+ such requirement shall then be necessary to maintain such registration, then
+ the Beneficiaries shall take such action as shall be reasonable and appropriate
+ to maintain such registration (including by appointing a successor trustee
+ under the Trust Agreement or establishing a voting trust covering the
+ Aircraft), (ii) Lessee and Lessor shall cooperate reasonably and in good faith
+ to maintain such registration, and (iii) Lessee shall, at Lessee's expense,
+ be responsible for the filing and registration of the Lease (as supplemented by
+ the Lease Supplement) and for maintaining such registration throughout the
+ Term.
+
+ -32-
+
+
+
+
+
+
+
+ (f) INSIGNIA. Upon delivery of the Aircraft, Lessee agrees to
+ promptly place the Lease Identification in the cockpit in a prominent location
+ and to place the Lease Identification on each Engine. Lessee agrees to make
+ such reasonable changes to the Lease Identification as Lessor may request from
+ time to time.
+
+ Section 7. INFORMATION.
+
+ During the term of this Lease, Lessee agrees to furnish
+ Lessor and each Beneficiary the following:
+
+ (i) within sixty (60) days following the end of each quarter
+ of Lessee's fiscal year, except the last such quarter of such year, an
+ unaudited consolidated balance sheet of Aloha Airgroup, Inc. (including
+ Lessee) prepared as of the close of each quarterly period, together with
+ the related unaudited profit and loss statement for such period;
+
+ (ii) within one hundred twenty (120) days or such longer
+ period as reasonably justified after the close of each fiscal year of
+ Lessee, an audited consolidated balance sheet, profit and loss statement
+ and statement of stockholders' equity of Aloha Airgroup, Inc. (prepared
+ on a consolidated basis including Lessee), as of the close of such fiscal
+ year, reported on by Lessee's independent certified accountants of
+ recognized standing;
+
+ (iii) within one hundred twenty (120) days after the close of
+ each fiscal year of Lessee, a certificate signed by a duly authorized
+ officer of Lessee, stating (a) that such officer is familiar with the
+ relevant terms of this Lease and has made a review of Lessee's compliance
+ herewith during the preceding fiscal year and (b) that no event has
+ occurred which constitutes a Default, or, if such an event has occurred,
+ the nature thereof and action Lessee has taken or is taking to cure the
+ same;
+
+ (iv) from time to time upon request, Lessee will at no cost
+ to Lessor or each Beneficiary provide Lessor or each Beneficiary
+ certified true and complete copies in the English language of the
+ Maintenance Program; provided that Lessor or such Beneficiary shall
+ maintain the confidentiality thereof and shall not disclose the contents
+ thereof to any Person, except as permitted by Section 20(k) and except to
+ the extent necessary
+
+ -33-
+
+
+
+
+
+ to bridge the Aircraft from the Maintenance Program to the maintenance of
+ a subsequent lessee or buyer of the Aircraft or as required by the Law of
+ a jurisdiction in which the Aircraft will be based, operated or
+ registered;
+
+ (v) Lessee will use its reasonable efforts to provide the
+ Beneficiaries on or before the fifth day of each calendar month
+ commencing with the next calendar month of the Delivery Date, and shall
+ in any event provide to the Beneficiaries upon request of a Beneficiary,
+ with a properly completed Monthly Aircraft Utilization and Status Report
+ in the Form of Exhibit J hereto for the preceding calendar month
+ operation of the aircraft;
+
+ (vi) Lessee will use its reasonable efforts to provide the
+ Beneficiaries, on or before the 15th day of January of each year
+ (commencing with January 1996), and shall in any event provide Lessor and
+ the Beneficiaries upon request of a Beneficiary in English, the
+ information and documentation for the preceding calendar year as listed
+ in Exhibit K hereto; PROVIDED, HOWEVER, that if (i) a Default or an Event
+ of Default shall have occurred and be continuing or (ii) Lessee's
+ financial condition changes adversely from its financial condition at the
+ time of the Delivery Date, then, upon notice and a request from Lessor or
+ a Beneficiary, Lessee shall provide such information on a quarterly basis
+ on the 15th day of each January, April, July and October, commencing with
+ the first of such dates to follow the date of such notice.
+
+ (vii) Lessee will use its reasonable efforts to provide the
+ Beneficiaries, within forty-five (45) days prior to the scheduled
+ performance of a block C or D check on the Aircraft, and shall in any
+ event provide Lessor and the Beneficiaries upon request of a Beneficiary
+ with written notice of the time and location of such scheduled block C or
+ D check; and
+
+ (viii) from time to time such other information regarding the
+ business, operations or the financial condition of Lessee as the Lessor
+ or any Beneficiary may from time to time reasonably request, including
+ reports from time to time as to location, condition, use, operation,
+ maintenance and insuring of the Aircraft and the Engines
+
+ -34-
+
+
+
+
+
+ (including total time spent in the air and time between overhauls).
+
+ Lessee shall permit Lessor, each Beneficiary and their
+ respective designees on at least seven (7) days' prior written notice to visit
+ and inspect the Aircraft, its condition, use and operation and the records
+ maintained in connection therewith during normal business hours; PROVIDED,
+ HOWEVER, that this shall not unreasonably interfere with Lessee's quiet use and
+ enjoyment of the Aircraft PROVIDED FURTHER, HOWEVER, that Lessor or the
+ Beneficiaries may conduct such visit and inspection at any time and with or
+ without notice if an Event of Default has occurred and is continuing. Lessor
+ and each Beneficiary shall have no duty to make any such inspection and shall
+ not incur any liability or obligation by reason of not making any such
+ inspection. Lessor's or any Beneficiary's failure to object to any condition
+ or procedure observed or observable in the course of an inspection hereunder
+ shall not be deemed to waive or modify any of the terms of this Lease with
+ respect to such condition or procedure. If such inspection is carried out
+ during a C Check or D Check, then Lessee will open any areas of the Aircraft
+ upon reasonable request of Lessor or a Beneficiary; PROVIDED, HOWEVER, that,
+ with respect to any C Check (other than the last C Check to occur prior to the
+ Return Occasion) the areas to be opened will be limited to those areas that are
+ in the zones of the Aircraft in which Lessee or its maintenance contractor are
+ working; provided further, however, that, with respect to the last C Check to
+ occur prior to the Return Occasion, Lessee will open any areas of the Aircraft
+ upon the reasonable request of Lessor or a Beneficiary.
+
+ Section 8. COVENANTS OF LESSEE.
+
+ Lessee covenants and agrees that:
+
+ (i) MAINTENANCE OF CORPORATE EXISTENCE. Except as provided in
+ Section 8(iv) below, during the term of this Lease, Lessee will preserve and
+ maintain its corporate existence. Lessee shall also preserve and maintain its
+ rights, privileges, licenses and franchises in all jurisdictions where the
+ failure to do so would have a materially adversely effect upon the Lessee's
+ ability to meet its obligations under this Lease and the other Operative
+ Documents.
+
+ (ii) MAINTENANCE OF STATUS. Lessee is, and shall remain so long
+ as it shall be Lessee under this Lease, duly qualified to operate the Aircraft
+ under applicable Law.
+
+ (iii) PAYMENT OF TAXES. Lessee will pay or cause to be paid all
+ taxes, assessments and governmental charges
+
+ -35-
+
+
+
+
+
+ or levies imposed upon it, or upon its income or profits, or upon any property
+ belonging to it, prior to the date on which penalties attached thereto and
+ prior to the date on which any lawful claim, if not paid, would become a Lien
+ upon any of the material property of Lessee.
+
+ (iv) CONSOLIDATION, MERGER, ETC. Without the prior written
+ consent of Lessor and each Beneficiary, Lessee shall not consolidate with,
+ merge with or merge into any other Person or convey, transfer or lease
+ substantially all of its assets as an entirety to any other Person unless, upon
+ and after giving effect to such transaction, (A) the surviving entity has at
+ least the same net worth and gross assets as the Lessee immediately prior to
+ such transaction, such surviving entity is Certified Air Carrier and a "citizen
+ of the United States" as defined in Section 101(16) of the Federal Aviation
+ Act, (C) Lessor shall continue to be entitled to the benefits of Section 1110
+ of the United States Bankruptcy Code, as in effect from time to time, and (D)
+ each of the Operative Documents shall continue in full force and effect and
+ shall constitute the legally binding and enforceable obligation of such
+ surviving entity.
+
+ (v) PLACE OF BUSINESS. Lessee will notify Lessor in writing
+ prior to any change of its principal place of business or chief executive
+ office.
+
+ (vi) NOTICE OF DEFAULT. Promptly after a responsible officer of
+ Lessee obtains knowledge of an event or circumstance that such officer knows
+ constitutes a Default hereunder, Lessee shall notify Lessor in writing of such
+ Default.
+
+ (vii) GOVERNMENTAL CONSENTS. Lessee undertakes to maintain in
+ full force and effect all governmental consents, licenses, authorizations,
+ approvals, declarations, filings and registrations obtained or effected in
+ connection with this Lease (other than those relating to the perfection of a
+ Lender's interest herein and subject to Section 5(b)(i) and Section 6;
+ provided, however, if so requested by a Beneficiary, Lessee shall cooperate
+ with the Beneficiary, at Beneficiary's cost, to perfect the interest of the
+ Lender) and every document or instrument contemplated hereby (including without
+ limitation foreign exchange and transfer permits regarding Dollar amounts due
+ hereunder and thereunder) and to take all such additional action as may be
+ proper or advisable in connection herewith or therewith. Lessee further
+ undertakes to obtain or effect any new or additional governmental consents,
+ licenses, authorizations, approvals, declarations, filings or registrations as
+ may become necessary for the performance of any of the terms and conditions of
+ this Lease or any other document or instrument contemplated hereby; PROVIDED,
+ HOWEVER, that Lessee shall
+
+ -36-
+
+
+
+
+
+ not be required to effect or maintain any governmental consent, authorization
+ or license that is required by a governmental authority in Delaware, Japan,
+ Ireland, New York or the United Kingdom unless required in connection with the
+ conduct of Lessee's business, the operation or maintenance by Lessee of the
+ Aircraft or the performance of Lessee's other obligations under this Lease.
+
+ (viii) SUSPENSION, CESSATION, ETC. Lessee shall not (i) voluntarily
+ suspend all or substantially all of its certificated operations; or (ii)
+ voluntarily or involuntarily permit to be revoked, canceled or otherwise
+ terminated all or substantially all of the franchises, concessions, permits,
+ rights or privileges required for the conduct of business and operations of
+ Lessee or the free and continued use and exercise thereof.
+
+ (ix) CERTIFIED AIR CARRIER. Lessee shall at all times remain a
+Certified Air Carrier.
+
+ Section 9. REPLACEMENT OF PARTS; ALTERATIONS, MODIFICATIONS
+ AND ADDITIONS.
+
+ (a) REPLACEMENT OF PARTS. Lessee, at its own cost and
+ expense, will promptly replace all Parts which may from time to time become
+ worn out, lost, stolen, destroyed, seized, confiscated, damaged beyond repair
+ or permanently rendered unfit for use for any reason whatsoever. In addition,
+ in the ordinary course of maintenance, service, repair, overhaul or testing,
+ Lessee may at its own cost and expense cause to be removed any Parts, whether
+ or not worn out, destroyed, damaged beyond repair or permanently rendered unfit
+ for use, provided that Lessee shall replace at its own cost and expense such
+ Parts as promptly as practicable. All replacement Parts shall be free and clear
+ of all Liens, other than Liens permitted by Section 14 hereof, shall be in at
+ least the same modification status and service bulletin accomplishment status,
+ shall be fully interchangeable as to form, fit and function, shall have been
+ overhauled or repaired and inspected by an agency acceptable to the FAA and
+ shall be in as good an operating condition as, and have a utility at least
+ equal to and a value and remaining warranty reasonably approximating, the Parts
+ replaced (assuming such replaced Parts were in the condition and repair in
+ which they were required to be maintained by the terms hereof) and all
+ historical records since new or last overhaul relating to such Parts (and all
+ historical records since manufacture with respect to Engines, Landing Gears,
+ the APU and all life limited parts installed on any Engine, Landing Gear or
+ APU) shall be maintained by Lessee.
+
+ -37-
+
+
+
+
+
+ All Parts owned by Lessor which are at any time removed from
+ the Aircraft shall remain the property of Lessor and subject to this Lease, no
+ matter where located, until such time as such Parts shall be replaced by Parts
+ which have been incorporated or installed in or attached to the Aircraft and
+ which meet the requirements for replacement Parts specified above. Immediately
+ upon any replacement Part becoming incorporated or installed in or attached to
+ the Aircraft as above provided, (i) title to the removed Part shall thereupon
+ vest in Lessee, free and clear of all rights of Lessor, (ii) title to such
+ replacement Part shall thereupon vest solely in Lessor and (iii) such
+ replacement Part shall become subject to this Lease and be deemed a Part for
+ all purposes hereof to the same extent as the Part which it has replaced.
+
+ (b) ALTERATIONS, MODIFICATIONS AND ADDITIONS. Lessee, at its
+ own expense, shall make such alterations and modifications and additions to the
+ Aircraft as may be required from time to time to meet the applicable standards
+ of the FAA or to comply with any Law, or, if performed by Lessee with respect
+ to a majority of its 737 fleet, any bulletin of the manufacturer of the
+ Aircraft, Engines or Parts. In addition, Lessee, at its own expense, may from
+ time to time make alterations and modifications in and additions to the
+ Aircraft, provided no such alteration, modification or addition diminishes the
+ remaining warranty, value or utility, or impairs the condition or
+ airworthiness, of the Aircraft. Title to all Parts incorporated or installed in
+ or attached or added to the Aircraft as the result of such alteration,
+ modification or addition shall vest immediately in Lessor and become subject to
+ this Lease, without the necessity for any further act of transfer, document or
+ notice. Notwithstanding the foregoing sentence of this Section 9(b), Lessor
+ agrees that so long as no Event of Default shall have occurred and be
+ continuing, Lessee may, at such time during the Term, remove any Part of the
+ Aircraft without replacement, provided that (i) such Part is in addition to,
+ and not in replacement or substitution for, any Part originally incorporated or
+ installed in or attached to the Aircraft at the time of delivery thereof
+ hereunder or any Part in replacement of, or substitution for, any such
+ originally incorporated or installed or attached Part, (ii) such Part is not
+ required to be incorporated or installed in or attached or added to the
+ Aircraft pursuant to the terms of this Section 9 and (iii) such Part can be
+ removed from the Aircraft without diminishing or impairing the value, utility,
+ condition, cosmetic appearance or airworthiness which the Aircraft would have
+ had at such time had such alteration, modification or addition not occurred.
+ Upon the removal by Lessee of any such Part as provided in the preceding
+ sentence, title thereto shall, without further act, vest in Lessee and such
+ Part shall no longer be deemed part
+
+ -38-
+
+
+
+
+
+ of the Aircraft (such a part is herein called a "Removable Part"). Lessee, at
+ its sole expense, shall repair any damage to the Aircraft resulting from any
+ such removal. Any Part not removed by Lessee as above provided prior to the
+ return of the Aircraft to Lessor hereunder whether pursuant to Section 16,
+ Section 18 or otherwise shall remain the property of Lessor. If any Removable
+ Part is (i) owned by any third party and leased to Lessee, (ii) sold to Lessee
+ subject to a conditional sales contract or other security interest or (iii)
+ leased to Lessee pursuant to a lease which is subject to a security interest in
+ favor of any third party, then Lessor will not acquire or claim, as against
+ such lessor, conditional vendor or secured party, any right, title or interest
+ in any such Removable Part as the result of such Removable Part being installed
+ in the Aircraft; provided, however, (a) that Lessor's inability to so acquire
+ or claim is subject to the express condition that such lessor, conditional
+ vendor or secured party shall not acquire or claim, as against Lessor, any
+ right, title or interest in the Aircraft, or any Part other than its interest
+ in such Removable Part by reason of such Removable Part being installed
+ thereon, and (b) that all of Lessee's right, title and interest in and to any
+ Removable Part not removed by Lessee within sixty (60) days after an Event of
+ Default shall, at such time, automatically become the property of Lessor and be
+ subject to this Lease.
+
+ In addition, with the prior written consent of the
+ Beneficiaries, Lessee may, at its own expense, substitute or replace any Part
+ that is originally installed on the Aircraft on the Delivery Date with another
+ Part provided that (i) no such substitution or replacement diminishes the
+ remaining warranty, value or utility, or impairs the condition or
+ airworthiness, of the Aircraft, (ii) title to all Parts incorporated or
+ installed in or attached or added to the Aircraft as the result of such
+ substitution or replacement shall vest immediately in Lessor free and clear of
+ Liens, other than those permitted by Section 14, and become subject to this
+ Lease, without the necessity for any further act of transfer, document or
+ notice, (iii) such substituted or replacement Parts can be removed from the
+ Aircraft without diminishing or impairing the value, utility, condition,
+ cosmetic appearance or airworthiness which the Aircraft would have had at such
+ time had such substitution or replacement not occurred, (iv) Lessee, at its
+ sole expense, shall repair any damage to the Aircraft resulting from any such
+ removal, and (v) nothing herein shall diminish or alter any of Lessee's
+ obligations under Section 16 or Exhibit E hereof to return the Aircraft on the
+ Return Occasion with all Parts (or substitution or replacement Parts of the
+ same make and model) that were originally installed on the Aircraft in the
+ condition required thereby (and including within this clause (v)
+
+ -39-
+
+
+
+
+
+ Lessee's obligation by the Return Occasion to re-install any Part for which a
+ substitution or replacement Part was installed on the Aircraft pursuant to this
+ paragraph).
+
+ In no event shall Lessor bear any liability or cost for any
+ alteration, modification or addition to, or for any grounding or suspension of
+ certification of, the Aircraft, or for any loss of revenue arising therefrom.
+ Lessee shall make no material alterations, modifications or additions to the
+ Aircraft (such as removal of seats, galleys, lavatories, major avionics
+ equipment or the like) that would affect the marketability of the Aircraft
+ without Lessor's and each Beneficiary's prior written consent. if Lessor and
+ each Beneficiary grant such consent, title to such removed Parts shall remain
+ with Lessor and Lessor and the Beneficiaries may request Lessee to reinstall
+ such Parts prior to termination of this Lease. If Lessor or Beneficiaries
+ request Lessee to reinstall such Parts, title to the Parts removed shall vest
+ in Lessee. All costs associated with such removal and reinstallation shall be
+ borne by Lessee.
+
+ Section 10. GENERAL TAX INDEMNITY.
+
+ (a) CERTAIN UNITED STATES TAXES. Lessee shall indemnify and
+ hold each Indemnitee harmless from and against all United States Withholding
+ Taxes (as defined herein) and United States Transportation Taxes (as defined
+ herein) which are imposed on such Indemnitee after the date hereof as a result
+ of a Tax Law Change (as defined herein). All payments required to be made by
+ Lessee under this Lease or any other Operative Document to any Indemnitee shall
+ be made free and clear of, and without deduction for or on account of, any such
+ United States Withholding Taxes or United States Transportation Taxes. If any
+ such United States Withholding Taxes or United States Transportation Taxes are
+ required to be withheld or deducted from any such payments, Lessee shall (i)
+ within the period for payment permitted by applicable law pay to the
+ appropriate Governmental Entity or taxing authority the full amount of such
+ United States Withholding Taxes or United States Transportation Taxes (and any
+ such additional United States Withholding Taxes or United States Transportation
+ Taxes in respect of the payment required under clause (ii) hereof) and make
+ such reports and filings in connection therewith in the manner required by
+ applicable Law, and (ii) pay to the relevant Indemnitee an additional amount
+ which (after deduction of all Taxes of any nature incurred by reason of the
+ payment or receipt of such additional amount) will be sufficient to yield to
+ the relevant Indemnitee the full amount which would have been received by it
+ had no deduction or withholding been made. Lessor will, and will cause each
+ Indemnitee to, provide Lessee with such forms or certificates as may from time
+ to
+
+ -40-
+
+
+
+
+
+ time be requested by Lessee (and required by applicable law) in order to
+ establish an exemption from (or reduced rate of) any such United States
+ Withholding Taxes or United States Transportation Taxes. As used herein, (x)
+ the term "TAX LAW CHANGE" means any change after the date hereof in the Code or
+ any treaty or other international agreement to which the United States is a
+ party, any proposed, final or temporary regulations thereunder, or the judicial
+ or official administrative interpretation of any of the foregoing; (y) the term
+ "UNITED STATES TRANSPORTATION TAXES" means the Taxes described in Section 887
+ of the Code or any successor to such Section; and (z) the term "UNITED STATES
+ WITHHOLDING TAXES" means Taxes imposed by the United States and which are
+ collected by withholding at source, or deduction from, any payments by Lessee
+ hereunder. Notwithstanding the foregoing, Lessee shall have no responsibility
+ for, and shall be entitled to withhold and deduct from payments hereunder or
+ any other Operative Documents, any such United States Withholding Taxes or
+ United States Transportation Taxes:
+
+ (A) which are imposed by reason of a voluntary transfer or
+ other disposition by the relevant Indemnitee of the Aircraft or this
+ Lease or any interest in either thereof (but not including any transfer
+ or disposition which occurs pursuant to the exercise of remedies arising
+ out of an Event of Default) or which exceed the amount of such United
+ States Withholding Taxes or United States Transportation Taxes which
+ would have been imposed had there not been such a transfer or
+ disposition;
+
+ (B) which are imposed on an Indemnitee by reason of a failure
+ by such Indemnitee to comply with its obligations set forth in the second
+ preceding sentence of this Section 10(a) or as a direct result of the
+ breach by such Indemnitee of this Lease or the gross negligence or
+ willful misconduct of such Indemnitee;
+
+ (C) which are imposed solely as a direct result of events or
+ circumstances occurring or arising after return of the Aircraft to Lessor
+ in accordance with this Lease and the performance by Lessee of all of its
+ obligations hereunder; or
+
+ (D) which the relevant Indemnitee is entitled to utilize as a
+ credit against or in reduction of its liability, if any, for Taxes
+ imposed by the United States for which Lessee is not responsible under
+ Section 10(b) hereof.
+
+ (b) GENERAL TAX INDEMNITY. In addition, and whether or not
+ any of the transactions contemplated by any of the Operative Documents
+ are consummated, except as set
+
+ -41-
+
+
+
+
+
+ forth in Section 10(c), Lessee agrees for the express benefit of each
+ Indemnitee to pay promptly when due, and to indemnify and hold harmless such
+ Indemnitee from, all Taxes (whether imposed upon such Indemnitee, the Aircraft,
+ the Airframe, the Engines, this Lease or any other Operative Document or
+ otherwise), by any Governmental Entity or taxing authority in the United States
+ of America or any other country or any taxing authority or political
+ subdivision thereof or therein or by any international taxing authority, upon
+ or with respect to, based upon or measured by any of the following:
+
+ (i) (x) the Aircraft, the Airframe, any Engine or any Part thereof, or
+ interest therein, this Lease or any of the other Operative Documents, (y) the
+ importation, exportation, condition, manufacture, purchase (under the
+ Operative Documents or otherwise), ownership, delivery, redelivery,
+ nondelivery, acceptance, rejection, assignment, possession, repossession,
+ return, use, performance, operation, control, settlement of any insurance or
+ other claim, sale, mortgaging, pledging, financing, leasing, subleasing, liens,
+ rental, retirement, abandonment, registration, preparation, installation,
+ modification, repair, testing, maintenance, replacement, transportation,
+ storage, location, condition, registration, reregistration, deregistration,
+ sale, transfer of title or other application or disposition of the Aircraft,
+ the Airframe, any Engine or any Part thereof or (z) the rentals, receipts or
+ earnings arising therefrom (including without limitation the Rent) and any
+ other amounts paid or payable with respect thereto or under any Operative
+ Document, or
+
+ (ii) the Lease or the other Operative Documents, or
+
+ (iii) otherwise with respect to or in connection with the transactions
+ contemplated by the Lease and the other Operative Documents.
+
+ (c) EXCEPTIONS TO INDEMNITY. The indemnity provided for in
+ Section 10(b) does not extend to any of the following Taxes:
+
+ (i) Taxes imposed on any Indemnitee in any jurisdiction in which such
+ Indemnitee is subject to such Taxes solely as a result of transactions or
+ activities unrelated to the transactions or activities contemplated by the
+ Operative Documents;
+
+ -42-
+
+
+
+
+
+ (ii) Taxes imposed on an Indemnitee as a result of a voluntary
+ transfer or other disposition of the Aircraft or this Lease or any interest in
+ either thereof by the Lessor or such Indemnitee unless such transfer or
+ disposition occurs pursuant to the exercise of remedies arising out of an Event
+ of Default (provided that Lessor and such Indemnitee shall in such event
+ consider in good faith any reasonable request by Lessee concerning an
+ appropriate jurisdiction in which such sale, transfer or disposition shall be
+ made);
+
+ (iii) Taxes attributable solely to events or circumstances occurring
+ or arising after return of the Aircraft to Lessor in accordance with this Lease
+ and the performance by Lessee of all of its obligations hereunder;
+
+ (iv) Taxes on an Indemnitee resulting solely as a direct result of the
+ breach of this Lease, gross negligence or willful misconduct of such
+ Indemnitee;
+
+ (v) Taxes which (A) are United States federal income Taxes (other than
+ United States Withholding Taxes or United States Transportation Taxes) or any
+ value added, consumption or similar Taxes which are imposed in lieu of or in
+ replacement of United States federal income Taxes to which the relevant
+ Indemnitee was theretofore subject or (B) are Taxes on, based on, or measured
+ by the net income of an Indemnitee and are imposed by any Governmental Entity
+ (other than the United States) or taxing authority thereof (or any Taxes which
+ are expressly imposed by such Governmental Entity or taxing authority in lieu
+ of Taxes on, based on, or measured by net income) except to the extent that
+ such Taxes are imposed by reason of the use or operation of the Aircraft, the
+ presence of Lessee or the making of payments by Lessee from, such jurisdiction;
+ PROVIDED, HOWEVER, that this exclusion shall not apply to Taxes which are, or
+ are in the nature of, sales, use, excise and similar Taxes;
+
+ (vi) Taxes (other than Taxes described in Section 10(c)(v)) which are
+ imposed in any taxing jurisdiction except to the extent that (A) such Taxes are
+ imposed solely by reason of the use or operation of the Aircraft, the presence
+ of Lessee or the making of payments by Lessee from, such jurisdiction ("Use
+ Jurisdiction Taxes") and (B) such Use Jurisdiction Taxes exceed in the
+ aggregate the amount of Taxes which would have been payable in such
+ jurisdiction in the absence therein of the use or operation of the Aircraft,
+ the presence of Lessee or the making of payments by Lessee from, such
+ jurisdiction;
+
+ -43-
+
+
+
+
+
+ (vii) Taxes arising out of, with respect to, based on or measured by
+ the granting by any Indemnitee of a security interest in the Aircraft or any
+ portion thereof or the value or principal amount of any debt created by any
+ Indemnitee; or
+
+ (viii) Taxes for which the relevant Indemnitee has expressly assumed
+ responsibility under this Lease or any other Operative Document.
+
+ (d) AFTER TAX BASIS. The amount which Lessee is required to
+ pay or indemnify against with respect to any amounts required to be paid or
+ indemnified against under this Section 10 or Section 13 shall include an
+ additional amount necessary to hold the recipient of the payment or indemnity
+ harmless on an after-tax basis from all Taxes (whether or not such taxes are
+ excluded under Section 10(a) or 10(c)), required to be paid by such recipient
+ with respect to such payment or indemnity, so as to restore the recipient on an
+ after-tax basis to the same position such recipient would have been in had such
+ amounts not been incurred or payable, after giving credit to Lessee for any
+ savings by such recipient in respect to any such Taxes by reason of deductions,
+ credits, allocations, or allowances in respect of the payment or accrual of the
+ amount indemnified against, including, without limitation, amounts described in
+ Section 10(a), Section 10(b) and/or this Section 10(d). All determinations as
+ to the amount and timing of realization of such tax savings shall be made by
+ the relevant Indemnitee in its sole good faith discretion, it being understood
+ that (i) Lessee shall have no right to examine or inspect any tax returns or
+ reports of such Indemnitee or any documents or records relating thereto, and
+ that (ii) a failure by the relevant Indemnitee to comply with its obligations
+ under this Section 10(d) shall not excuse Lessee from the performance of its
+ obligations under this Section 10 or Section 13.
+
+ (e) TIMING OF PAYMENT. Any amount due and payable to the
+ relevant Indemnitee pursuant to this Section 10 will be paid within 10 days
+ after receipt of a written demand therefor from such Indemnitee accompanied by
+ a written statement describing in reasonable detail the basis for such
+ indemnity and the computation of the amount so payable; PROVIDED, HOWEVER, that
+ such amount need not be paid by Lessee prior to the later of (i) five days
+ prior to the date the applicable Tax is payable to the appropriate Governmental
+ Entity or taxing authority or (ii) in the case of amounts which are being
+ contested by Lessee in good faith or by Lessor pursuant to Section 10(f), the
+ date such contest is finally resolved. If requested in writing by Lessee, and
+ at Lessee's sole cost and expense, any calculations by an Indemnitee of any
+ amount due and payable
+
+ -44-
+
+
+
+
+
+ hereunder shall be subject to review and verification by a firm of independent
+ certified public accounts of internationally recognized stature selected by
+ such Indemnitee and reasonably acceptable to Lessee (such approval not to be
+ unreasonably withheld or delayed). Such Indemnitee shall make available to such
+ accounting firm such information as shall be necessary for purposes of such
+ review and verification (but such information shall be held by such accounting
+ firm in strictest confidence and shall not in any event be disclosed or made
+ available to Lessee). If the result of such review is that Lessee was liable
+ for a smaller amount, the excess payment shall be returned by such Indemnitee
+ forthwith.
+
+ (f) TAX CONTESTS. (i) If written claim is made against an
+ Indemnitee for, or any Indemnitee acquires actual knowledge of a claim relating
+ to, Taxes with respect to which Lessee is or may be liable for a payment or
+ indemnity hereunder, such Indemnitee will, and Lessor shall cause such
+ Indemnitee to, promptly give Lessee notice in writing of such claim or Tax;
+ PROVIDED, HOWEVER, that such Indemnitee's failure to give notice will not
+ relieve Lessee of its obligations hereunder, except as to Taxes and expenses
+ which would have been avoidable in the absence of, and penalties resulting
+ from, such Indemnitee's failure to give such notice. So long as (i) a contest
+ of such Taxes does not involve any danger of the sale, forfeiture or loss of
+ the Aircraft or any interest therein, (ii) Lessee has provided Lessor with an
+ opinion of independent tax counsel acceptable to such Indemnitee that a
+ meritorious basis exists for contesting such claim and (iii) Lessee has made
+ adequate reserves for such Taxes or, if required by the relevant Indemnitee, an
+ adequate bond has been posted by Lessee, then the Indemnitee at Lessee's
+ written request will in good faith, with due diligence and at Lessee's sole
+ cost and expense, contest (or, upon Lessee's written request, permit Lessee to
+ contest in the name of the Indemnitee) the validity, applicability or amount of
+ such Taxes. If such contest is to be initiated by the payment of, and the
+ claiming of a refund for, any Taxes, Lessee shall advance to the relevant
+ Indemnitee sufficient funds (on an interest-free basis) to make such payments
+ and shall have agreed to indemnify such Indemnitee for any tax consequences
+ resulting from such advance of funds. Although the relevant Indemnitee may
+ consult in good faith with Lessee concerning the conduct of any contest, such
+ Indemnitee shall control the conduct of all proceedings relating to any such
+ contest which is brought by or on behalf of such Indemnitee. Any contest
+ initiated hereunder may be settled or discontinued by the relevant Indemnitee
+ at any time provided that such Indemnitee shall have waived any right to
+ indemnification for the Taxes being contested. Notwithstanding the foregoing,
+ if (x) a claim relates solely to Taxes for which
+
+ -45-
+
+
+
+
+
+ the Lessee has acknowledged in writing that it is liable under this Section 10,
+ (y) such claim can be severed and contested separately from all other claims
+ asserted against the relevant Indemnitee by the same taxing authority, and (z)
+ Lessee has made adequate reserves for the Taxes claimed or, if requested by
+ such Indemnitee, has posted an adequate bond for the payment of such Taxes,
+ then Lessee may, at its sole cost and expense, assume control of the contest of
+ such claim, provided that Lessee shall keep such Indemnitee fully informed as
+ to the conduct of all proceedings relating to such contest.
+
+ (g) REFUNDS. Upon receipt by the relevant Indemnitee of a
+ refund of all or any part of any Taxes (including without limitation any
+ deductions or withholdings referred to in Section 10(a)) which Lessee has paid,
+ such Indemnitee will, and Lessor will cause such Indemnitee to, pay to Lessee
+ the net amount of such taxes refunded, together with any interest (and, to the
+ extent previously paid by Lessee, any attorneys' fees or other costs awarded as
+ a result of a contest pursuant to Section 10(f)) received by such Indemnitee
+ with respect thereto; PROVIDED, HOWEVER, that no amount shall be payable to the
+ Lessee under this Section 10(g) if a Default or an Event of Default shall have
+ occurred and be continuing or prior to the time that Lessee shall have paid to
+ the relevant Indemnitee all amounts then due and owing to such Indemnitee under
+ this Section 10.
+
+ (h) COOPERATION IN FILING TAX RETURNS. In case any report or
+ return is required with respect to any Taxes which are subject to
+ indemnification by Lessee under this Section 10, Lessee will either make such
+ report or return in such manner as will show the respective interests of Lessor
+ and/or each relevant Indemnitee in the Aircraft, and send a copy of such report
+ or return to Lessor and such Indemnitee or will notify the Lessor or such
+ Indemnitee of such requirement, prepare such report or return in such manner as
+ shall be reasonably satisfactory to the Lessor or such Indemnitee, and present
+ such report or return to Lessor or such Indemnitee for execution and proper
+ filing. If actual notice is given by any taxing authority to an Indemnitee that
+ a report or return is required to be filed with respect to any such Taxes
+ referred to in this Section 10, the Indemnitee shall, and Lessor shall cause
+ the Indemnitee to, promptly notify Lessee of such required report or return.
+ Each Indemnitee agrees, and Lessor shall cause each Indemnitee, to respond to
+ any reasonable request of Lessee for information within the control of such
+ Indemnitee with respect to the filing of any such report or return, but Lessee
+ agrees to pay any reasonable costs, fees or other charges of independent
+ counsel or independent accountants incurred in connection with such response or
+ request. Lessee will cooperate with each Indemnitee in providing
+
+ -46-
+
+
+
+
+
+ information which may reasonably be required to fulfill such Indemnitee's tax
+ filing requirements and any audit information request arising from such filing.
+ Each Indemnitee shall, and Lessor shall cause each Indemnitee to, cooperate
+ with Lessee in providing information which may reasonably be required to
+ fulfill Lessee's tax filing requirements and any audit information request
+ arising from such filing.
+
+ (i) TAX SAVINGS. If as a result of the payment or accrual of
+ Taxes paid or indemnified by the Lessee an Indemnitee shall realize and
+ recognize any savings with respect to Taxes, such Indemnitee shall promptly
+ notify the Lessee that it has realized and recognized such savings and shall
+ pay to the Lessee within thirty (30) days of the realization of such savings an
+ amount equal to (i) the net reduction in Taxes realized and recognized by such
+ Indemnitee and (ii) the amount of any further net reduction in taxes realized
+ and recognized under such tax laws as a result of payments pursuant to this
+ sentence; PROVIDED, HOWEVER, that no amount shall be payable to the Lessee
+ under this Section 10(i) if a Default or an Event of Default shall have
+ occurred and be continuing or prior to the time that Lessee shall have paid to
+ the relevant Indemnitee all amounts then due and owing to such Indemnitee under
+ this Section 10 and that a subsequent disallowance of any such tax savings
+ shall be subject to indemnity hereunder without regard to any exclusions set
+ forth in this Section 10. All determinations as to the amount and timing of
+ realization of such tax savings shall be made by the relevant Indemnitee in its
+ sole good faith discretion, it being understood that (i) Lessee shall have no
+ right to examine or inspect any tax returns or reports of such Indemnitee
+ or any documents or records relating thereto, and that (ii) a failure by the
+ relevant Indemnitee to comply with its obligations under this Section 10(i)
+ shall not excuse Lessee from the performance of its obligations under this
+ Section 10.
+
+ (j) REVERSE INDEMNITY. If Lessee shall be required to pay any
+ amount attributable to Taxes of any Indemnitee for which Lessee is not
+ responsible under this Section 10, such Indemnitee shall reimburse Lessee for
+ the amount of any such excess payment within 5 days after demand therefor.
+
+ Section 11. CASUALTY OCCURRENCES.
+
+ (a) CASUALTY OCCURRENCE WITH RESPECT TO THE AIRFRAME.
+ Promptly (and in any event within seven (7) days) after a Casualty Occurrence
+ with respect to the Airframe and any Engine then installed thereon, Lessee
+ shall give Lessor and each Beneficiary written notice of such occurrence. On
+ the thirtieth (30th) day (or earlier if the insurance or
+
+ -47-
+
+
+
+
+
+ other proceeds have been paid earlier) after the date of the Casualty
+ Occurrence, Lessee shall pay to Lessor in immediately available funds the
+ sum of (A) the Casualty Value of the Aircraft less an amount equal to the
+ daily equivalent of Basic Rent (computed on the basis of a 365-day year)
+ for each day during the period commencing the day after the payment of
+ Casualty Value and extending to the day before the Basic Rent Payment Date
+ immediately succeeding the date of payment of such Casualty Value and (B)
+ all other Rent, other than amounts paid pursuant to clause (A), then due
+ and payable. Upon such payment (i) the obligation of Lessee to make further
+ payments of Basic Rent hereunder shall terminate, (ii) the Term shall
+ terminate with respect to the Aircraft and (iii) Lessor will transfer to
+ Lessee or applicable insurer, free and clear of Lessor's Liens and all other
+ Liens arising by or through Lessor, any Beneficiary or any Lender but
+ otherwise without recourse or warranty, all of Lessor's right, title and
+ interest, if any, in and to the Airframe and Engines (if any) suffering the
+ Casualty Occurrence, as well as all of Lessor's right, title and interest in
+ and to any Engine constituting part of the Aircraft but not installed
+ thereon at the time of the Casualty Occurrence; provided, however, that
+ there shall be excluded from such transfer any and all claims against any
+ Persons which arose prior to the date of such transfer, including without
+ limitation any and all claims against any Persons who may have been
+ responsible, in whole or in part, for the events giving rise to such
+ Casualty Occurrence.
+
+ (b) CASUALTY OCCURRENCE WITH RESPECT TO AN ENGINE. Upon a
+ Casualty Occurrence with respect to an Engine only, Lessee shall give Lessor
+ and each Beneficiary prompt written notice thereof and shall, within forty-five
+ (45) days after such occurrence, convey to Lessor, as replacement for the
+ Engine suffering a Casualty Occurrence, title to a Replacement Engine;
+ PROVIDED, HOWEVER, that such 45-day period shall be extended to 180 days after
+ the occurrence of such Casualty Occurrence if Lessee shall, prior to the end of
+ such 45-day period, have delivered to Lessor or the Beneficiaries all proceeds
+ of the insurance payable with respect to such damaged Engine and the amount of
+ such proceeds is at least equal to the replacement value thereof (as determined
+ reasonably and in good faith by the Beneficiaries). Each Replacement Engine
+ shall be (i) free of all Liens, (ii) in as good an operating condition as the
+ Engine being replaced, assuming the Engine being replaced was in the condition
+ and repair required by the terms hereof immediately prior to the Casualty
+ Occurrence, and (iii) in a condition (including as to the status of all logs,
+ records and technical documents) acceptable to the Beneficiaries in their sole
+ discretion, as evidenced by their written acknowledgement thereof. Prior to or
+ at the time of any such conveyance, Lessee, at its own expense, will promptly
+
+ -48-
+
+
+
+
+
+ (i) furnish Lessor with a full warranty bill of sale, in form and substance
+ reasonably satisfactory to Lessor and each Beneficiary, with respect to such
+ Replacement Engine; (ii) cause a supplement hereto, in form and substance
+ reasonably satisfactory to Lessor and each Beneficiary, subjecting such
+ Replacement Engine to this Lease, to be duly executed by Lessee, and recorded
+ pursuant to applicable Law; (iii) furnish Lessor and each Beneficiary with such
+ evidence of title to such Replacement Engine and of compliance with the
+ insurance provisions of Section 12 hereof with respect to such Replacement
+ Engine as Lessor or such Beneficiary may reasonably request; (iv) if reasonably
+ requested by Lessor or any Beneficiary, furnish Lessor with an opinion of
+ Lessee's counsel to the effect that title to such Replacement Engine has been
+ duly conveyed to Lessor and each Beneficiary, and that such Replacement Engine
+ is duly leased hereunder; (vi) furnish a certificate signed by a duly
+ authorized financial officer or executive of Lessee certifying that, upon
+ consummation of such replacement, no Default will exist hereunder; (vii)
+ furnish Lessor and each Beneficiary with such documents (including but not
+ limited to all technical documents) as Lessor or such Beneficiary may
+ reasonably request in connection with the consummation of the transactions
+ contemplated by this Section 11(b), in each case in form and substance
+ satisfactory to Lessor and such Beneficiary; and (viii) furnish such financing
+ statement covering the Replacement Engine as may be requested by Lessor or any
+ Beneficiary. Upon full compliance by Lessee with the terms of this Section
+ 11(b), Lessor will transfer to Lessee all of Lessor's right, title and interest
+ in the Engine which suffered the Casualty Occurrence free and clear of Lessor's
+ Liens and all other Liens arising by or through Lessor, any Beneficiary or any
+ Lender. For all purposes hereof, each such Replacement Engine shall be deemed
+ part of the property leased hereunder, shall be deemed an "Engine" as defined
+ herein and shall be deemed part of the Aircraft. No Casualty Occurrence covered
+ by this Section 11(b) shall result in any reduction in Rent.
+
+ (c) APPLICATION OF PROCEEDS AND PAYMENTS. Any payments
+ received at any time by Lessor or by Lessee from any insurer under any policy
+ of insurance (other than liability insurance) shall be applied in the manner
+ specified in Section 12(d), 12(e) or 12(f) hereof as applicable. Subject to
+ Section 11(f) hereof, any payments received at any time by Lessor or Lessee
+ from any Governmental Entity or other Person with respect to a Casualty
+ Occurrence will be applied as follows:
+
+ (i) unless clause (ii) below is applicable, so much of such
+ payments as shall not exceed the Casualty Value required to be paid by
+ Lessee
+
+ -49-
+
+
+
+
+
+ pursuant to Section 11(a) of this Lease shall be paid to Lessor in
+ reduction of Lessee's obligation to pay such Casualty Value if not
+ already paid by Lessee or, if already paid by Lessee (unless a Default
+ shall have occurred and be continuing), shall be applied by Lessor to
+ reimburse Lessee for its payment of such Casualty Value and the balance
+ of such payment, if any, remaining thereafter (if such payment is
+ received with respect to insurance other than liability insurance)
+ (unless a Default shall have occurred and be continuing) shall be paid
+ over to, or retained by, Lessee, except to the extent any such amount
+ is specifically allocable to an interest of Lessor; or
+
+ (ii) if such payments are received as a result of a Casualty
+ Occurrence with respect to an Engine which is being replaced pursuant
+ to Section 11(b), unless a Default shall have occurred and be
+ continuing (and except as otherwise contemplated by Section 11(b) if
+ the 45-day period specified for the replacement thereof is to be
+ extended to 180 days, as therein provided), all such payments shall be
+ paid over to, or retained by, Lessee if Lessee shall have fully
+ performed or, concurrently therewith has fully performed, the terms of
+ Section 11(b) and of Section 15 hereof with respect to the Casualty
+ Occurrence for which such payments are made.
+
+ (d) REQUISITION FOR USE BY GOVERNMENT WITH RESPECT TO THE
+ AIRCRAFT. In the event of the requisition for use by a Governmental Entity of
+ the Airframe or any Engine (other than a requisition constituting a Casualty
+ Occurrence), all Lessee's obligations under this Lease with respect to the
+ Airframe or Engine shall continue to the same extent as if such requisition had
+ not occurred. All payments received by Lessor or Lessee from the Governmental
+ Entity for the use of the Airframe or Engine during the Term therefor shall be
+ paid over to, or retained by, Lessee if no Default shall have occurred and be
+ continuing; and all payments received by Lessor or Lessee from the Governmental
+ Entity for the use of such item after the Term therefor shall be paid over to,
+ or retained by, Lessor.
+
+ (e) OTHER DISPOSITIONS. Any amounts not payable to or
+ retainable by Lessee pursuant to this Section 11 or Section 12 hereof because a
+ Default shall have occurred and be continuing shall be held by Lessor and shall
+ be paid over to Lessee when such Default shall cease to be continuing, except
+ that if Lessor shall have theretofore declared this Lease to be in default
+ pursuant to Section 18 hereof, such amounts shall be retained by Lessor and
+ disposed of in
+
+ -50-
+
+
+
+
+
+ accordance with the provisions thereof, with any balance remaining to be paid
+ to Lessee.
+
+ (f) APPLICATION IN DEFAULT. Any amount referred to in clause
+ (i) or (ii) of Section 11(c) which is otherwise payable to Lessee shall not be
+ paid to Lessee, or, if it has been previously paid to Lessee, and not yet
+ applied by Lessee as permitted or required hereunder shall be delivered from
+ Lessee to Lessor if at the time of such payment a Default shall have occurred
+ and be continuing. In such case, all such amounts shall be paid to and held by
+ Lessor as security for the obligations of Lessee or, at the option of Lessor,
+ applied by Lessor toward payment of any of Lessee's obligations at the time due
+ hereunder, as Lessor may elect. At such time as there shall not be continuing
+ any such Default, all such amounts at the time held by Lessor in excess of the
+ amount, if any, which Lessor has elected for application as provided above
+ shall be paid to Lessee.
+
+ Section 12. INSURANCE.
+
+ (a) PUBLIC LIABILITY AND PROPERTY DAMAGE INSURANCE. Lessee
+ will carry and maintain in effect, at its own expense, with Approved Insurers,
+ comprehensive public liability insurance (including, without limitation,
+ contractual liability and passenger legal liability) and property damage
+ insurance with respect to the Aircraft of the type usual and customary by
+ commercial scheduled airline standards for airline carriers operating similar
+ aircraft and providing for no less coverage than is carried by Lessee on
+ similar aircraft in its fleet. Such insurance shall be in an amount not less
+ than the amount applicable to similar passenger aircraft and engines which
+ comprise Lessee's fleet, and in any event not less than the amount under
+ "Public Liability and Property Damage Insurance" as set forth on Exhibit C-2
+ hereto Lessee shall not discriminate against the Aircraft in providing such
+ insurance.
+
+ (b) INSURANCE AGAINST LOSS OR DAMAGE. Lessee, at its own
+ expense, will maintain in effect with Approved Insurers "all-risk" ground and
+ flight aircraft hull insurance (which shall include, but not be limited to,
+ comprehensive war risk and allied perils, hijacking, a disappearance clause and
+ coverage against strikes, riots, commotions or labor disturbances, malicious
+ acts or acts of sabotage and unlawful seizure (including confiscation, arrest,
+ nationalization, seizure, restraint, detention, appropriation, requisition or
+ destruction thereat, by or under authority of any Governmental Entity), or
+ wrongful exercise of control of the Aircraft in flight by a person on board the
+ Aircraft acting without the consent of Lessee) covering the Aircraft and "all
+ risk" coverage insurance with
+
+ -51-
+
+
+
+
+
+ respect to Engines and Parts while not installed on the Aircraft or an
+ aircraft, which in each case is at least as broad as coverage maintained by
+ passenger airlines similarly situated to Lessee and operating similar aircraft
+ and engines which comprise Lessee's fleet (it being understood that the
+ obligations hereby imposed upon Lessee by reference to coverage maintained by
+ other airlines is not intended to result in an increase in the definition of
+ Casualty Value). Such insurance shall be for an amount not less than the
+ Casualty Value for the Aircraft and, where "all-risk" hull and war risk are
+ insured separately, shall, in relation to the applicable policies, incorporate
+ a 50/50 clause based on AVS 103, between the underwriters. Such insurance may
+ include provisions for deductibles in an amount usual and customary by
+ commercial scheduled airline standards for airline carriers operating similar
+ aircraft provided that (i) the amount of such deductibles must be no greater
+ than the lowest deductible amount applying to any B737-25A aircraft in Lessee's
+ fleet and (ii) in no event shall the amount of such deductibles exceed the
+ amount under "Deductible Amount" set forth on Exhibit C-2 hereto.
+
+ (c) REQUIRED POLICY DESIGNATIONS AND PROVISIONS. Each and any
+ policy of insurance obtained and maintained pursuant to this Section, and each
+ and any policy obtained in substitution or replacement for any such policies,
+ shall: (i) designate Lessor as owner of the Aircraft covered thereby, and shall
+ designate Lessor (in its individual capacity and as Owner Trustee), each
+ Beneficiary and their assigns as additional insureds (and, with respect to each
+ policy of insurance obtained and maintained pursuant to Section 12(b) hereof,
+ Beneficiaries as loss payee), as their interests may appear warranted each as
+ to itself no operational interest (but without imposing upon Lessor, any
+ Beneficiary or their assigns, any obligation to pay any premiums for any such
+ policies); (ii) expressly provide that, in respect of the interests of Lessor,
+ each Beneficiary and their assigns, in such policies, the insurance shall not
+ be invalidated by any action or omission of Lessee, and shall insure Lessor,
+ such Beneficiary and their assigns, regardless of any breach or violation of
+ any warranty, declaration or condition contained in such policies by Lessee;
+ (iii) provide that if such insurance is canceled by the Approved Insurers for
+ any reason whatsoever, or is adversely changed in any way with respect to the
+ interests of Lessor, any Beneficiary or their assigns, or if such insurance is
+ allowed to lapse for nonpayment of premium, such cancellation, adverse change
+ or lapse shall not be effective as to Lessor, such Beneficiary and their
+ assigns, for thirty (30) days (seven (7) days or such lesser period as may be
+ applicable in the case of any war risks or allied perils coverage) after
+ receipt by Lessor and such Beneficiary of written notice by such insurer or
+ insurers to
+
+ -52-
+
+
+
+
+
+ Lessor and such Beneficiary of such prospective cancellation, change or lapse,
+ (iv) include coverage for the territorial limits of any country in which the
+ Aircraft may at any time be located; (v) provide that, as against Lessor, each
+ Beneficiary and their assigns, the insurer waives any rights of set-off,
+ counterclaim or any other deduction, whether by attachment or otherwise, and
+ waives any rights it may have to be subrogated to any right of any insured
+ against Lessor, such Beneficiary or their assigns, with respect to the Aircraft
+ to the same extent that Lessee has waived its rights of subrogation by the
+ terms of its agreements to indemnify any such party pursuant to this Lease; and
+ (vi) provide that (a) unless Lessor or any Beneficiary shall provide written
+ notice to the Approved Insurers that an Event of Default shall have occurred
+ and be continuing, any payment in respect of any damage or loss not in excess
+ of $500,000 shall be payable directly to the Lessee or its order; PROVIDED,
+ that Lessor and each Beneficiary shall be given prompt written notice of any
+ payment in respect of any damage or loss to the Aircraft in excess of $150,000
+ and (b) any payment in respect of any damage or loss in excess of $500,000 (or
+ all proceeds whether or not in excess of $500,000 if an Event of Default shall
+ have occurred and be continuing), shall be payable directly to the
+ Beneficiaries or their assignees as loss payees, for the account of all
+ interests. Each such liability policy shall be primary without right of
+ contribution from any other insurance which may be carried by Lessor, any
+ Beneficiary or their assigns, and shall expressly provide that all of the
+ provisions thereof shall operate in the same manner as if there were a separate
+ policy covering each insured, provided that such provisions shall not operate
+ to increase the insurer's limit of liability.
+
+ Lessee shall have the right to carry insurance in reasonable
+ excess of the amounts required hereunder and the proceeds of such excess
+ insurance shall be payable to Lessee; PROVIDED that any such excess insurance
+ shall not prejudice the insurance required to be maintained by Lessee hereunder
+ and Lessee shall, upon request of a Beneficiary, notify the Beneficiaries of
+ the nature and amount of such excess insurance. Similarly, Lessor and each
+ Beneficiary shall have the right to carry additional and separate insurance for
+ its own benefit at its own expense, without, however, thereby limiting Lessee's
+ obligations under this Section 12.
+
+ (d) APPLICATION OF INSURANCE PROCEEDS FOR A CASUALTY
+ OCCURRENCE. It is agreed that insurance payments which arise from any policy of
+ insurance carried by Lessee and received as the result of the occurrence of a
+ Casualty Occurrence shall be applied as follows:
+
+ -53-
+
+
+
+
+
+ (i) if such payments are received with respect to a Casualty
+ Occurrence relating to the Airframe and Engines or engines installed on
+ the Airframe, so much of such payments as shall not exceed the amounts
+ due under Section 11(a) hereof shall be paid to Lessor for immediate
+ application toward sums owed by Lessee, and the balance to Lessee; and
+
+ (ii) if such payments are received with respect to a Casualty
+ Occurrence relating to an Engine under circumstances contemplated by
+ Section 11(b) hereof, such payment shall be paid over to Lessee,
+ provided that Lessee shall have fully performed or, concurrently
+ therewith, fully performs the terms of Section 11(b) hereof.
+
+ (e) APPLICATION OF INSURANCE PROCEEDS FOR OTHER THAN A
+CASUALTY OCCURRENCE. As between Lessor and Lessee all insurance payments in
+excess of $500,000 that are received by Lessor or the Beneficiaries under
+policies required to be maintained by Lessee pursuant to Section 12(b) as a
+result of any property damage or loss to the Airframe or any Engine not
+constituting a Casualty Occurrence, or to any Part will be held by Lessor or
+the Beneficiaries until Lessee furnishes Lessor with satisfactory evidence
+that the repairs or replacement property Lessee is required to perform or
+obtain in accordance with the terms of Section 9 of this Lease have been made
+or obtained by Lessee or, to the extent necessary to enable Lessee to make
+required progress payments in accordance with the workscope relating to the
+repair of the Aircraft, Lessor or the Beneficiaries shall make such interim
+progress payments upon receipt of copies of appropriate invoices or other
+evidence of the completion of the related work or the purchasing of related
+materials in accordance with the terms of such workscope or the related
+repair contract. Upon receipt of such evidence of repair or replacement,
+Lessor shall pay Lessee the amount of the insurance payment received with
+respect to such loss less the amount of any progress payments made to Lessee
+pursuant to this Section 12(e).
+
+ (f) APPLICATION IN DEFAULT. Any amount referred to in
+Section 12(d)(i) or (ii) or Section 12(e) which is otherwise payable to
+Lessee shall not be paid to Lessee, or, if it has been previously paid to
+Lessee, shall be delivered by Lessee to Lessor, if at the time of such
+payment, an Event of Default shall have occurred and be continuing. In either
+case, all such amounts shall be held by Lessor as security for the
+obligations of Lessee or, at the option of Lessor, applied by Lessor toward
+payment of any of Lessee's obligations at the time due hereunder. At such
+time as
+
+
+ -54-
+
+
+
+there shall not be continuing any such Event of Default, all such amounts at
+the time held by Lessor in excess of the amount, if any, which Lessor has
+elected for application as provided above shall be paid to Lessee.
+
+ (g) CERTIFICATES OF INSURANCE. On or before the Delivery
+Date, and thereafter on each renewal by Lessee of the insurance required
+hereby, but not less often than annually, Lessee will furnish to Lessor and
+each Beneficiary a certificate executed and delivered by an Approved Insurance
+Broker who is authorized by an Approved Insurer, appointed by Lessee,
+describing in reasonable detail insurance carried on the Aircraft and
+certifying that the insurance then maintained on the Aircraft complies with
+Section 12 of this Lease. Lessee will cause such Approved Insurance Broker who
+is authorized by an Approved Insurer to agree to advise Lessor and each
+Beneficiary in writing at least thirty (30) days (seven (7) days or such lesser
+period as may be applicable in the case of any war risk and allied perils
+coverage) prior to the termination or cancellation by the underwriters for any
+reason (including, without limitation, failure to pay the premium therefor) of
+any such insurance. Lessee further agrees to cause such Approved Insurance
+Broker (i) to advise Lessor and each Beneficiary in writing (x) as soon as such
+Approved Insurance Broker has knowledge that any insurance coverage carried and
+maintained on the Aircraft pursuant to this Section 12 will not be renewed or
+available upon expiration of the then existing insurance and (y) not later than
+seven (7) days prior to the expiration of the then existing insurance if Lessee
+has not given notice renewal instructions to the Approved Insurance Broker, and
+(ii) to deliver directly to Lessor and each Beneficiary the certificate of
+insurance described above in this paragraph (g) upon such renewal.
+
+ (h) Any insurance described above shall be placed and
+maintained throughout the Term in the London, Tokyo, United States, French or
+German aviation insurance markets with Approved Insurers, and Lessee's
+obligations under this Section 12 shall not be terminated or diminished in any
+respect for any reason, including any subleasing of the Aircraft; PROVIDED,
+HOWEVER, that if, in connection with any sublease of the Aircraft permitted
+hereby, the Permitted Sublessee is required by law to place or maintain some or
+any part of the insurance required hereby in an aviation insurance market other
+than the London, Tokyo, United States, French or German aviation insurance
+market, then Lessee shall cause such Permitted Sublessee to place and maintain
+in the London, Tokyo or United States aviation insurance market re-insurance
+policies fully covering such insurance, and any such re-insurance policy shall
+contain a "cut through" clause in form, scope and substance reasonably
+satisfactory to Lessor and each Beneficiary. In the event
+
+
+ -55-
+
+
+
+of any sublease of the Aircraft, Lessee shall cause to be delivered to Lessor
+and to each Beneficiary at least five Business Days prior to the commencement
+of such sublease, appropriate certificates of insurance and re-insurance
+complying with the requirements of CLAUSE (g) above.
+
+ (i) Notwithstanding anything in this Section 12 to the
+contrary, during the period of any CRAF activation contemplated in Section
+11(d) of this Lease, Lessor will accept the indemnification by the full faith
+and credit of the United States government in lieu of the insurance coverage
+required by this Section 12; provided, however, that (i) such indemnification
+by the United States government shall provide coverage against the same risks
+and on the terms as the risks and terms required by Section 12, (ii) such
+indemnification shall be in amounts that are not less than the insurance
+coverage contemplated in this Section 12, and (iii) Lessee shall not subject
+the Aircraft to the CRAF program without the Beneficiaries, prior written
+consent.
+
+ Section 13. INDEMNIFICATION.
+
+ Subject only to the limitations described in the last
+paragraph of this Section 13, Lessee agrees to indemnify, reimburse, hold
+harmless, protect and defend each Indemnitee for, from and against any and all
+claims, damages, losses, liabilities, demands, suits, judgments, causes of
+action, legal proceedings, whether civil or criminal, penalties, fines and
+other sanctions, and any reasonable attorney's fees and other reasonable costs
+and expenses in connection herewith or therewith, including any of the
+foregoing arising or imposed with or without Lessor's or Beneficiaries' fault
+or negligence (whether passive or active) or under the doctrine of strict
+liability (any and all of which are hereafter referred to as "Claims") which in
+any way may result from, pertain to or arise in any manner out of, or are in
+any manner related to (i) the Aircraft or any this Lease or Operative Documents
+or any document executed in connection herewith, or the breach of any
+representation, warranty or agreement made by Lessee hereunder or under any
+other such document, or (ii) the condition, manufacture, re-delivery, lease,
+acceptance, rejection, possession, return, disposition, storage, maintenance,
+use or operation of the Aircraft either in the air or on the ground, or (iii)
+any defect in the Aircraft (whether or not discovered or discoverable by Lessee
+or Lessor or any Beneficiary) arising from the material or any articles used
+therein or from the design, testing or use thereof or from any maintenance,
+service, repair, overhaul or testing of the Aircraft, whether or not the
+Aircraft is in the possession of Lessee, and regardless of where the Aircraft
+may then be located, or (iv) any transaction,
+
+
+ -56-
+
+
+
+approval or document contemplated by this Lease, Operative Documents or given
+or entered into in connection herewith; provided, however, that Lessee shall be
+subrogated to all rights and remedies which Lessor may have against the
+Manufacturer of the Aircraft and its subcontractors or any other party as to
+any such Claims, but only to the extent that Lessee satisfies its
+indemnification to the Indemnitees with respect to such Claims. Nothing herein
+shall require Lessee to indemnify any Indemnitee in respect of any Claim to the
+extent the facts or circumstances that form the basis for such Claim arose or
+existed prior to the Delivery Date, unless such Claim or any losses, costs
+(including attorneys, fees or expenses) or damages arise or are increased as a
+result of a breach by Lessee of its agreements herein or otherwise relate to
+such a breach (including the breach of any obligation to remedy a fact or
+circumstance that arose or existed prior to the Delivery Date). In the event
+Lessee is required to indemnify any Indemnitee hereunder, Lessee shall, upon
+demand, pay to such Indemnitee an amount which, after deduction of all Taxes
+and like charges required to be paid by such Indemnitee in respect of such
+payment, is equal to the amount of the indemnification required; provided,
+however, that Lessee shall not be required to pay or discharge any Claim
+brought by a third party so long as the validity or the amount thereof shall be
+diligently contested in good faith and on reasonable grounds by Lessee, at no
+cost or expense to Lessor.
+
+ Subject to Lessee's rights under Section 20(f), Lessee hereby
+waives, and releases each Indemnitee from, any Claims (whether existing now or
+hereafter arising) for or on account of or arising or in any way connected with
+injury to or death of personnel of Lessee or loss or damage to property of
+Lessee or the loss of use of any property which may result from or arise in any
+manner out of or in relation to the ownership, leasing, condition, use or
+operation of the Aircraft, either in the air or on the ground, or which may be
+caused by any defect in the Aircraft from the material or any article used
+therein or from the design or testing thereof, or use thereof, or from any
+maintenance, service, repair, overhaul or testing of the Aircraft regardless of
+when such defect may be discovered, whether or not the Aircraft is at the time
+in the possession of Lessee, and regardless of the location of the Aircraft at
+any such time.
+
+ The indemnities contained in this Section 13 shall continue
+in full force and effect notwithstanding the expiration or other termination of
+this Lease and are expressly made for the benefit of and shall be enforceable
+by each Indemnitee; PROVIDED, that, unless an Event of Default shall have
+occurred and be continuing, Lessee shall not be obligated to pay any indemnity
+pursuant to this
+
+
+ -57-
+
+
+
+Section 13 with respect to any amount to the extent that such amount arises out
+of or is measured by acts, failures to act, events or periods of time (or any
+combination of the foregoing) that occur after the Aircraft has been
+redelivered to Lessor pursuant to and in accordance with Section 16 hereof
+(under circumstances not involving a repossession pursuant to Section 18
+hereof) and is no longer subject to this Lease and all obligations of the
+Lessee under this Lease have been discharged (other than obligations which by
+their express terms survive the expiration of the Term of this Lease) unless
+any such act or event shall itself result from or be attributable to an act or
+omission of Lessee which occurred prior to the redelivery of the Aircraft and
+the discharge of Lessee's obligations under the Lease.
+
+ Notwithstanding the foregoing provisions of this Section 13,
+Lessee shall not be obligated to make any payment by way of indemnity to any
+Indemnitee in respect of any Claims to the extent such Claims result from the
+willful misconduct or gross negligence of such Indemnitee or to the extent
+relating solely to the breach or misrepresentation of Lessor or a Beneficiary
+under an Operative Document.
+
+ Section 14. LIENS.
+
+ Lessee shall not directly or indirectly create, incur,
+assume or suffer to exist any Lien on or with respect to the Aircraft or
+Engines, title thereto or any interest therein, except (i) the respective
+rights of Lessor and Lessee as herein provided; (ii) Lessor's Liens or any
+other Lien arising by or through Lessor, a Beneficiary or a Lender with
+respect to the Aircraft or Engines; (iii) Liens for Taxes either not yet due
+or being contested in accordance with Section 10 hereof, so long as adequate
+reserves are maintained with respect to such Liens and so long as the
+Aircraft or such Engine is not in danger of being lost, sold, confiscated,
+forfeited or seized as a result of any such Lien or contest; and (iv)
+inchoate materialmen's, mechanics', workmen's, repairmen's, employees' or
+other like Liens arising in the ordinary course of business, which either are
+not due and owing or are being contested in good faith by Lessee so long as
+the Aircraft or such Engine is not in danger of being lost, sold,
+confiscated, forfeited or seized as a result of any such Lien or contest.
+Lessee shall promptly, at its own expense, take such action as may be
+necessary to duly discharge any Lien (except for the Liens referred to in
+clauses (i), (ii), (iii) and (iv) of this Section 14) if the same shall arise
+at any time with respect to the Aircraft or any Engine (including any Part).
+
+
+ -58-
+
+
+
+ Section 15. PERFECTION OF TITLE AND FURTHER ASSURANCES.
+
+ If at any time, any filing or recording is reasonably
+necessary to protect the interests of Lessor or the Beneficiaries, Lessee, upon
+request by Lessor or any Beneficiary, shall cause this Lease and any and all
+additional instruments which shall be executed pursuant to the terms hereof, to
+be kept, filed and recorded and to be re-executed, re-filed and re-recorded in
+the appropriate office or offices pursuant to applicable Laws, to perfect,
+protect and preserve the rights and interests of Lessor hereunder and in the
+Aircraft. At the reasonable request of Lessor or any Beneficiary, Lessee shall
+furnish to Lessor and each Beneficiary an opinion of counsel or other evidence
+satisfactory to Lessor of each such filing or re-filing and recordation or
+re-recordation. All costs (including attorneys' fees and expenses) relating to
+effecting and maintaining the registration of this Lease, including the Lease
+Supplement, or any related document, shall be borne and paid by Lessee. All
+costs (including attorneys' fees and expenses) relating to effecting and
+maintaining the registration of the Aircraft with the FAA shall be borne and
+paid by Lessor or the Beneficiaries, provided that Lessee shall cooperate with
+Lessor and the Beneficiaries in connection therewith as provided above and bear
+the cost of changing the registration number of the Aircraft.
+
+ Without limiting the foregoing, Lessee shall do or cause
+to be done, at Lessee's cost and expense, any and all acts and things which
+may be required under the terms of the Mortgage Convention to perfect and
+preserve the title and interest of Lessor in the Aircraft within the
+jurisdiction of any signatory which has ratified the Mortgage Convention if
+such jurisdiction is in the territory in which Lessee intends to operate the
+Aircraft, as Lessor or any Beneficiary may reasonably request. Lessee shall
+also do or cause to be done, at its own expense, any and all acts and things
+which may be required of an aircraft operator under the terms of any other
+Law involving any jurisdiction in which Lessee intends to operate, or any and
+all acts and things which Lessor or any Beneficiary may reasonably request,
+to perfect and preserve Lessor's ownership rights regarding the Aircraft
+within any such jurisdiction.
+
+ In addition, Lessee will promptly and duly execute and
+deliver to Lessor such further documents and assurances and take such further
+actions as it may from time to time reasonably request in order to more
+effectively carry out the intent and purpose of this Lease and to establish
+and protect the rights and remedies created or intended to be created in
+favor of Lessor hereunder, including, without limitation, if reasonably
+requested by Lessor at the expense
+
+
+ -59-
+
+
+
+of Lessee, the execution and delivery or supplements or amendments hereto in
+recordable form, subjecting to this Lease any Replacement Engine and the
+recording or filing of counterparts thereof, in accordance with the Laws of any
+appropriate jurisdiction.
+
+ Section 16. RETURN OF AIRCRAFT AND RECORDS.
+
+ (a) RETURN.
+
+ (i) On the Return Occasion, Lessee will, unless Casualty
+ Occurrence has occurred, at its expense, redeliver the
+ Aircraft to Lessor at the Return Location in a condition
+ complying with the requirements of Exhibit E and this
+ Section 16:
+
+ (A) free and clear of all Liens (other than Lessor's Liens
+ or any Lien arising by or through Lessor, any Beneficiary
+ or any Lender);
+
+ (B) having the benefits of any indemnities and warranties
+ which have been assigned to and are held by Lessee
+ relating to the Aircraft assigned to Lessor;
+
+ (C) duly registered in the name of Lessor with the FAA;
+
+ (D) unless otherwise agreed by the Beneficiaries in a
+ condition so as to ensure the Aircraft has either a
+ current, valid and existing certification of
+ airworthiness for airline operation or (at the
+ Beneficiaries, election) an export certificate of
+ airworthiness, in either case, issued by the FAA; and
+
+ (E) if specified by the Beneficiaries, Lessee shall cooperate
+ with Lessor to ensure that the Aircraft is deregistered
+ from the FAA;
+
+ provided that Lessee complies with the foregoing and with all
+ of its obligations under this Lease, Lessor shall technically
+ accept redelivery of the Aircraft from Lessee at Lessee's
+ maintenance facilities in Honolulu, Hawaii before the
+ commencement of any storage period (it being understood that
+ if during the ferry flight to the Return Location any defect
+ or non-compliance which was purportedly corrected in connection
+ with the Final Inspection shall reoccur, Lessee shall be
+ responsible for rectifying such defect or non-compliance);
+ PROVIDED, HOWEVER, that if a C Check shall be performed in
+ connection with the Return
+
+
+ -60-
+
+
+ Occasion, then such technical inspection and acceptance shall
+ occur at the location of the facility in the United States of
+ America or Canada approved by FAA for C Check of 737-200
+ aircraft at which such C Check is performed.
+
+ (ii) In the event that Lessee does not return the Aircraft to
+ Lessor on the specified date of expiration or earlier
+ termination of this Lease, for any cause (other than due
+ to the fault of Lessor, any Beneficiary or the Lender),
+ then the obligations of Lessee under this Lease shall
+ continue and such continued use shall not be considered a
+ renewal of the terms of this Lease or a waiver of any
+ right of Lessor hereunder and Lessor may terminate
+ Lessee's right as to the Aircraft on written notice to
+ Lessee and Lessee shall fully indemnify Lessor, the
+ Lender and each other Indemnitee on demand against all
+ losses, liabilities, actions, proceedings, costs and
+ expenses thereby suffered or incurred by Lessor, the
+ Lender or any other Indemnitee. During such continued
+ use, damages in an amount equal to Basic Rent (as
+ provided below) shall continue to be paid by Lessee to
+ Lessor, and the performance of other obligations of
+ Lessee to Lessor shall continue hereunder. Basic Rent
+ shall be pro-rated on a daily basis for each day until
+ the Aircraft is actually delivered to Lessor, and all
+ other terms and conditions of this Lease shall remain in
+ full force and effect. Payment shall be made promptly
+ upon presentation of Lessor's invoice and any failure to
+ pay shall constitute a default of Lessee.
+
+ (b) FINAL INSPECTION. At least 10 days prior to redelivery of the
+Aircraft, Lessee will take the Aircraft out of service and make the Aircraft
+and all Aircraft Documents available to Lessor and the Beneficiaries for
+inspection at Lessee's facilities in Honolulu, Hawaii (or at the location of
+the facility in the United States of America or Canada approved by FAA for
+the C Check of 737-200 aircraft at which the C Check shall be performed, as
+provided in Section 16(a)(i) above, if applicable) (the "Final Inspection")
+in order to verify that the condition of the Aircraft complies with this
+Lease, and Lessor's and the Beneficiaries, representatives shall use their
+reasonable efforts to cause their representatives to commence such compliance
+verification as promptly as practicable at or following the beginning of such
+10-day period. Lessee agrees, at the Beneficiaries, request, to furnish due
+evidence that all service, maintenance, repairs, overhauls and modifications
+required under this Lease have been duly
+
+
+ -61-
+
+
+
+performed. Lessee shall make available to Lessor and the Beneficiaries at
+Lessee's principal maintenance base, not less than fifteen (15) Business Days
+prior to commencement of the Final Inspection, such documentation regarding the
+condition, use, maintenance, operation and history of the Aircraft during the
+Term, as requested by the Beneficiaries in order to enable Lessor and the
+Beneficiaries to verify that the condition of the Aircraft complies with this
+Lease. To the fullest extent possible, the Final Inspection shall be carried
+out during the C Check referred to in paragraph 2(a) of Exhibit E. The Final
+Inspection will consist of and be long enough to permit Lessor and the
+Beneficiaries to:
+
+ (i) generally review, test (as observer) and inspect the
+ Aircraft to Lessor's and the Beneficiaries, satisfaction
+ (which will include the opening of any areas of the
+ Aircraft if Lessor or the Beneficiaries reasonably
+ believe, based on prior inspections or other evidence,
+ that the opening of such areas would reveal that the
+ Aircraft does not completely comply with the requirements
+ of Exhibit E and this Section 16), to satisfy Lessor and
+ the Beneficiaries that the Aircraft completely complies
+ with the requirements of Exhibit E and this Section 16;
+
+ (ii) inspect the Aircraft Documents;
+
+ (iii) inspect the un-installed Parts;
+
+ (iv) observe a two hour demonstration flight (with up to four
+ of Lessor's or the Beneficiaries' representatives as
+ on-board observers);
+
+ (v) inspect the Engines, including, without limitation (i) by
+ observing a performance test in accordance with limits set
+ forth in the engine section of the Aircraft Maintenance
+ Manual and (ii) by performing a full and complete
+ borescope inspection of (a) the low pressure and high
+ pressure compressors and (b) turbine area; and
+
+ (vi) inspect the APU, including, without limitation (i) an APU
+ condition run (as observer) and (ii) by a full and
+ complete borescope inspection.
+
+The actions contemplated by the foregoing clauses (i) through (iv) shall be
+performed at Lessee's expense; provided that, unless an Event of Default
+shall have
+
+
+ -62-
+
+
+
+occurred or any inspection carried out under the foregoing clauses (i)
+through (iv) reveals discrepancies which require a further inspection, Lessee
+shall not be required to bear the travel and lodging expenses of Lessor's or
+the Beneficiaries, representatives.
+
+ (c) NON-COMPLIANCE. To the extent that, at the time of Final
+Inspection, the condition of the Aircraft does not comply with this Lease,
+Lessee will at the Beneficiaries, option:
+
+ (i) immediately rectify the non-compliance and, to the
+ extent the non-compliance extends beyond the Return
+ Occasion, the Term will be automatically extended
+ (including the obligation to pay Rent as referred to
+ in Section 16(a)(ii)) and this Lease will remain in
+ force until the non-compliance has been rectified; or
+
+ (ii) redeliver the Aircraft to Lessor and pay to Lessor
+ such amount as is mutually agreed to put the Aircraft
+ into the condition required by this Lease. Lessor
+ shall make all reasonable efforts to minimize such
+ costs.
+
+ (d) REDELIVERY. If applicable, upon redelivery Lessee will
+provide to Lessor and the Beneficiaries all documents (other than those as may
+be required to be supplied by Lessor or the Beneficiaries) necessary to export
+the Aircraft from the United States (including, without limitation, a valid and
+subsisting export license, if applicable, for the Aircraft) and required in
+relation to the deregistration of the Aircraft with the FAA.
+
+ In the event that Beneficiaries request Lessee to secure an
+export license (a) such request must be made upon sufficient notice in order to
+grant Lessee adequate time to procure such export license, (b) Lessor will
+render such timely assistance as is reasonably necessary, and (c) this may be
+requested in relation to the Return Occasion or subsequent to the Return
+Occasion.
+
+ (e) ACKNOWLEDGEMENT. Upon return of the Aircraft in
+accordance with the terms of this Lease, Lessor shall execute and deliver a
+Redelivery Acknowledgement in the form of Exhibit I hereto to Lessee.
+
+ (f) MAINTENANCE PROGRAM.
+
+ (i) Prior to the Return Occasion (unless the Aircraft has
+ suffered a Casualty Occurrence) and upon Lessor's or a
+ Beneficiary's request,
+
+
+ -63-
+
+
+
+ Lessee will provide Lessor or such Beneficiary or its
+ agent, in either case at Lessee's cost, reasonable
+ access to the Maintenance Program and the Aircraft
+ Documents in order to facilitate the Aircraft's
+ integration into any subsequent operator's fleet.
+
+ (ii) Lessee will, if requested by Lessor or a Beneficiary
+ to do so, upon return of the Aircraft deliver to
+ Lessor or the Beneficiaries a certified true, current
+ and complete copy of the Maintenance Program in
+ English. Lessor and the Beneficiaries agree that they
+ will not disclose the contents of the Maintenance
+ Program to any person or entity except to the extent
+ necessary to monitor Lessee's compliance with this
+ Lease and/or to bridge the maintenance program for the
+ Aircraft from the Maintenance Program to another
+ maintenance program after the Return occasion.
+
+ (g) FUEL. The amount of fuel in the Aircraft shall be no less
+than the amount of fuel upon Delivery Date. If requested by Lessor, Lessee will
+fuel the Aircraft in excess of the required amount by the preceding sentence at
+the Return Location, and Lessor will pay for the fuel exceeding such
+requirement on board the Aircraft on the Return Occasion at the direct cost
+paid by Lessee (without mark-up).
+
+ (h) STORAGE. If the Final Inspection is carried out in
+Honolulu, Hawaii and a Beneficiary requests in writing then Lessee shall, at
+Lessee's cost and expense during the first sixty (60) days and thereafter
+Lessor's cost and expense, provide Lessor with, or assist Lessor in procuring,
+storage facilities for the Aircraft for up to one hundred twenty (120) days.
+Such storage shall be at Lessee's maintenance base or if such maintenance base
+will be eventually unable to perform the required maintenance, any other
+location agreed between Lessor and Lessee which base or location is capable of
+performing any required maintenance on the Aircraft. With respect to the
+Aircraft while in storage, Lessee will maintain, or procure the maintenance
+of, an amount of fuel in each tank as required in accordance with the storage
+procedures at the storage location and as set forth herein. Lessee agrees,
+during such storage period, to perform, or procure the performance of, such
+maintenance required to maintain the airworthiness of the Aircraft (i) in
+accordance with the storage procedures prescribed by the Manufacturer or
+Lessee's Maintenance Program and (ii) in a condition required to be
+
+
+ -64-
+
+
+
+maintained at redelivery pursuant to the terms of Section 16(a). Risk of loss
+or damage to the Aircraft shall pass to Lessor at the commencement of storage,
+but Lessee will carry insurance, with Lessor and each Beneficiary named as an
+additional insured, on the Aircraft during such storage period as though the
+Aircraft were to remain in Lessee's fleet in accordance with the provisions of
+Section 12, except that Lessee shall not be required to maintain "all risks"
+flight insurance during such storage period but shall maintain insurance
+policies that cover the Aircraft in storage and test flights of the Aircraft on
+an hourly basis (as typically available and maintained under similar
+circumstances involving the storage and maintenance of aircraft). All amounts
+hereunder relating to storage, insurance and maintenance work for the first 60
+days of such storage period shall be for the account of Lessee and thereafter,
+shall be for the account of Lessor. Incremental costs incurred by Lessee after
+the first 60 days of storage which would not have been incurred had such
+storage period not exceeded 60 days shall be reimbursed by Lessor. Lessee
+further agrees to perform, or procure the performance of, at Lessor's cost, any
+other maintenance that Lessor may reasonably request. If, following the storage
+of the Aircraft in Hawaii, the Beneficiaries ferry the Aircraft to a location
+designated by the Beneficiaries in the Western Continental United States (the
+"Designated Location"), Lessee will reimburse the Beneficiaries for the cost of
+the ferry flight of the Aircraft from the storage location in Hawaii to such
+Designated Location. If the Aircraft is ferried to a location other than in the
+Western Continental United States, other than following a Default or an Event
+of Default, then (i) the Beneficiaries will bear any additional ferry flight
+costs and expenses incurred that are beyond the costs and expenses that would
+have been incurred if the Aircraft were ferried to a location in the Western
+Continental United States, and (ii) Lessee will bear the ferry flight costs and
+expenses that are equivalent to the ferrying of the Aircraft to a location in
+the Western Continental United States and shall pay such amount to Lessor or
+the Beneficiaries upon demand by Lessor or the Beneficiaries therefor.
+
+ (i) MAINTENANCE FOR RE-REGISTRATION. Lessee, at Lessee's
+cost, will, upon written notice from a Beneficiary not less than sixty (60)
+days prior to a Return Occasion, carry out such extra maintenance, repair or
+modification as may be required to enable the Aircraft to be in a condition
+to ensure that the Aircraft will meet and comply with (i) the requirements of
+any other country selected by the Beneficiaries and (ii) all civil aviation
+regulations, orders, rules and airworthiness directives of such country to
+permit the immediate re-registration and securing of a certificate of
+airworthiness in such country for the
+
+ -65-
+
+
+
+Aircraft (provided such regulations, orders, rules and airworthiness directives
+are no more restrictive than those of the FAA, and, so long as Lessee is
+otherwise in compliance with this Section 16 and Exhibit E, the time required
+to perform such extra maintenance shall not constitute an extension of the
+Term). If the cost of such maintenance, repair or modification exceeds the cost
+of the maintenance, repair or modification which Lessee would otherwise have
+incurred to ensure the Aircraft shall be in condition to comply with the terms
+of this Lease, Lessor shall promptly pay Lessee the difference between such
+costs at actual cost (without mark-up) to Lessee.
+
+ (j) ASSISTANCE FOR FUTURE LEASING/SALE.
+
+ (i) Lessor or any Beneficiary may, from time to time
+ during the last year of the Term, wish to make the
+ Aircraft available for inspection to designated
+ representatives or technical teams evaluating the Aircraft
+ for use after the end of the Term. Lessor or any
+ Beneficiary agrees to give Lessee not less than five (5)
+ days, advance notice of such inspection and Lessee agrees
+ to co-operate fully with Lessor's or such Beneficiary's
+ requests in making the Aircraft and Aircraft Documents
+ available to such authorized technical team. Lessor agrees
+ to be responsible for its costs in connection with any
+ such inspection and that such request will be reasonable
+ and will be coordinated with Lessee so as not to
+ unreasonably interfere with the operation or maintenance
+ of the Aircraft or the conduct of Lessee's business
+ (unless an Event of Default shall have occurred and be
+ continuing); PROVIDED, HOWEVER, that, unless an Event of
+ Default shall have occurred and be continuing, no such
+ inspection shall result in the delay of a scheduled flight
+ of Lessee;
+
+ (ii) Subject to prior coordination between the
+ Beneficiaries and Lessee, Lessee agrees that, during the
+ last year of the Term and following redelivery of the
+ Aircraft to Lessor, Lessee shall at any Beneficiary's
+ reasonable request and Lessee's cost make available to
+ Lessor and the Beneficiaries for such reasonable period as
+ the Beneficiaries may reasonably require technical and
+ engineering personnel qualified to provide the
+ Beneficiaries and any subsequent lessee or purchaser of
+ the Aircraft with such information as will enable any
+ subsequent lessee or purchaser of the Aircraft to be able
+ to bridge the Aircraft onto the maintenance program of
+ such subsequent lessee or purchaser.
+
+
+ -66-
+
+
+
+ Section 17. EVENTS OF DEFAULT.
+
+ Any one or more of the following occurrences or events shall
+constitute an Event of Default:
+
+ (a) Lessee shall fail to make any payment of Rent to Lessor
+when due under this Lease and such payment shall be overdue for a period of
+five (5) Business Days;
+
+ (b) Lessee shall fail to obtain and maintain any insurance
+required under the provisions of Section 12 hereof; or shall operate the
+Aircraft outside of the scope of the insurance coverage maintained with respect
+to the Aircraft;
+
+ (c) Any representation or warranty made by Lessee herein or
+in any document or certificate furnished to Lessor in connection herewith or
+therewith or pursuant hereto is incorrect at the time given in any material
+respect and such failure shall continue unremedied for a period of three (3)
+Business Days after notice thereof has been given to Lessee hereunder or Lessee
+otherwise has notice thereof;
+
+ (d) Lessee shall fail to timely comply with the provisions of
+Section 21 (Security Deposit) hereof;
+
+ (e) Lessee shall directly or indirectly create, incur, assume
+or suffer to exist any Lien on or with respect to the Aircraft or Engines,
+title thereto or any interest therein, except as expressly permitted by Section
+14;
+
+ (f) Lessee shall at any time not be a Certified Air Carrier
+holding all licenses, certificates, permits and franchises from the appropriate
+agencies of the United States of America and all other governmental authorities
+having jurisdiction over Lessee and its operations which are necessary to
+authorize the Lessee to engage in air transport and to carry on its intra-state
+passenger operations within Hawaii as presently conducted and in connection
+with the operation of aircraft, including the Aircraft; or Lessee shall lose or
+suffer a suspension of its United States air carrier license or certificate
+under Part 121 of the FAA Regulations or certificate under Chapter 411 of
+Subtitle VII, Part A of Title 49 of the United States Code, as amended;
+
+ (g) Lessee shall fail to perform or observe in any material
+respect any other covenant, condition or agreement to be performed or observed
+by it pursuant to this Lease and such failure shall continue for a period of
+thirty (30) days after the earlier to occur of (1) Lessee's attaining or having
+knowledge thereof and (2) written notice thereof is given to Lessee;
+
+ (h) Lessee consents to the appointment of a receiver,
+trustee or liquidator of itself or of a
+
+
+ -67-
+
+
+
+substantial part of its property, or Lessee admits in writing its inability to
+pay its debts generally as they come due or makes a general assignment for the
+benefit of creditors, or Lessee files a voluntary petition in bankruptcy or a
+voluntary petition seeking reorganization in a proceeding under any bankruptcy
+laws (as now or hereafter in effect), or an answer admitting the material
+allegations of a petition filed against Lessee in any such proceeding, or
+Lessee by voluntary petition, answer or consent seeks relief under the
+provisions of any bankruptcy or other similar law providing for the
+reorganization or winding-up of corporations, or provides for an agreement,
+composition, extension or adjustment with its creditors;
+
+ (i) An order, judgment or decree is entered by any court,
+with or without the consent of Lessee, appointing a receiver, trustee or
+liquidator for Lessee or of all or any substantial part of its property (save
+for the purposes of an amalgamation, merger or reconstruction not involving
+insolvency), or all or any substantial part of the property of Lessee is
+sequestered, and any such order, judgment or decree of appointment or
+sequestration remains in effect, undismissed, unstayed or unvacated for a
+period of sixty (60) days after the date of entry thereof;
+
+ (j) A petition against Lessee in a proceeding under the
+bankruptcy, insolvency or other similar Laws (as now or hereafter in effect) of
+any Governmental Entity is filed and is not withdrawn or dismissed within sixty
+(60) days thereafter, or if, under the provisions of any Law providing for
+reorganization or winding-up of corporations which may apply to Lessee, any
+court of competent jurisdiction assumes jurisdiction over, or custody or
+control of, Lessee or of all or any substantial part of its property and such
+jurisdiction, custody or control remains in effect, unrelinquished, unstayed or
+unterminated for a period of sixty (60) days;
+
+ (k) (i) A final judgment for the payment of money not covered
+by insurance in excess of $500,000, or (ii) final judgments for the payment of
+money not covered by insurance in excess of $500,000 in the aggregate, shall be
+rendered against Lessee and the same shall remain undischarged for a period of
+ninety (90) days during which execution thereof shall not be effectively stayed
+by agreement of the parties involved, stayed by court order or adequately
+bonded;
+
+ (1) Attachments or other Liens shall be issued or entered
+against substantially all of the property of Lessee and shall remain
+undischarged or unbonded for forty-five (45) days except for security interests
+created in connection with monies borrowed or obligations agreed to by Lessee
+in the ordinary course of its business;
+
+
+ -68-
+
+
+
+ (m) Lessee shall default in the payment of any obligation for
+the payment of borrowed money, for the deferred purchase price of property or
+for the payment of rent or hire under any lease of aircraft when the same
+becomes due, in any such case if the outstanding amount thereof (or, in the
+case of a lease, if the aggregate rentals during the term thereof) exceeds
+$500,000; or Lessee shall default in the performance of any other term,
+agreement or condition contained in any material agreement or instrument under
+or by which any such obligation having any unpaid balance of at least
+$1,000,000 is created, evidenced or secured, if the effect of such default is
+to cause such obligation to become due prior to its stated maturity; or Lessee
+shall default in the payment when due of any other amount owed to Lessor or
+each Beneficiary;
+
+ (n) Lessee voluntarily suspends all or substantially all of
+its operations or the franchises, concessions, permits, rights or privileges
+required for the conduct of the business and operations of Lessee shall be
+revoked, canceled or otherwise terminated or the free and continued use and
+exercise thereof curtailed or prevented, and as a result of any of the
+foregoing the predominant business activity of Lessee shall cease to be that of
+a commercial airline; or
+
+ (o) An "Event of Default" (as therein defined) shall have
+occurred and be continuing under any Other Lease.
+
+ Section 18. REMEDIES.
+
+ Upon the occurrence of any Event of Default and any time
+thereafter so long as the same shall be continuing, Lessor may, at its option
+and without notice to Lessee, exercise one or more of the following remedies as
+Lessor in its sole discretion shall elect, to the extent available and
+permitted by, and subject to compliance with any mandatory requirements of,
+applicable Law then in effect:
+
+ (a) Demand that Lessee, and Lessee shall upon the written
+demand of Lessor and at Lessee's expense, immediately return the Aircraft to
+Lessor at any location in Hawaii or in the continental United States as
+specified by Lessor or the Beneficiaries and in the manner specified in such
+notice and in the condition specified in Section 16, in which event such return
+shall not be delayed for purposes of complying with the return conditions
+specified in Section 16 hereof (none of which conditions shall be deemed to
+affect Lessor's possession of the Aircraft) or delayed for any other reason.
+Notwithstanding the foregoing, at Lessor's option Lessee shall be required
+thereafter to take such actions as would be required by the provisions of this
+Lease if the Aircraft were being returned at the end of the Term hereof. In
+addition, Lessor, at its option and to the extent permitted by applicable Law,
+may enter upon the premises where all or any part of the Aircraft is located
+
+
+ -69-
+
+
+
+and take immediate possession of and, at Lessor's sole option, remove the same
+(and/or any engine which is not an Engine but which is installed on the
+Airframe, subject to the rights of the owner, lessor or secured party thereof)
+by summary proceedings or otherwise, all without liability accruing to Lessor
+or any Beneficiary for or by reason of such entry or taking of possession
+whether for the restoration of damage to property, or otherwise, caused by such
+entry or taking, provided that all actions of Lessor in this respect are
+reasonable in the circumstances and excepting damages caused by gross
+negligence or willful misconduct.
+
+ (b) Sell at private or public sale, as Lessor may determine,
+or hold, use, operate or lease to others the Aircraft as Lessor in its sole
+discretion may determine, all free and clear of any rights of Lessee; PROVIDED,
+HOWEVER, that nothing herein shall be construed as limiting any right of Lessor
+or any Beneficiary to sell, lease, assign or otherwise dispose of all or any
+part of the Aircraft under any circumstances (whether or not during the
+existence or continuance of an Event of Default) during the Term, subject only
+to Lessee's rights under SECTION 20(f).
+
+ (c) Whether or not Lessor shall have exercised, or shall
+thereafter at any time exercise, any of its rights under paragraph (a) or
+paragraph (b) of this Section 18, Lessor, by thirty (30) days written notice to
+Lessee specifying a payment date, may demand that Lessee pay to Lessor, and
+Lessee shall pay to Lessor, on the payment date specified in such notice, as
+liquidated damages for loss of bargain and not as a penalty (in lieu of the
+Basic Rent due for the period commencing after the date specified for payment
+in such notice), any unpaid Rent for the Aircraft (prorated in the case of
+Basic Rent on a daily basis) to and including the payment date specified in
+such Notice, plus the aggregate amount of all payments of Basic Rent for the
+remainder of the Term, discounted periodically (equal to installment frequency)
+to present worth at the interest rate of four percent (4%) per annum.
+
+ (d) In the event Lessor, pursuant to Section 18(b) above,
+shall have sold the Aircraft, then Lessor, in lieu of exercising its rights
+under Section 18(c) above, may, if it shall so elect, demand that Lessee pay
+Lessor, and Lessee shall pay to Lessor, as liquidated damages for loss of
+bargain and not as a penalty (in lieu of the Basic Rent due for the period
+commencing after such sale occurs), any unpaid Rent for the Aircraft due to and
+including the date of sale, plus the amount by which the Appraised Value (as
+hereafter defined) of the Aircraft, computed as of the date of such sale,
+exceeds the net cash proceeds of such sale. As used herein, "Appraised Value"
+means the fair market sales value of the Aircraft, as determined by an
+internationally recognized aircraft appraiser selected by the Beneficiaries (i)
+on the basis of either a physical inspection of the Aircraft or on the basis of
+a "desk top"
+
+
+ -70-
+
+
+
+appraisal, as the Beneficiaries shall determine, (ii) on the assumption that
+the Aircraft has been maintained in accordance with the requirements of this
+Lease and is in the condition required by Section 16 and Exhibit E, whether or
+not any such assumption is in fact the case, and (iii) at Lessee's cost and
+expense.
+
+ (e) De-register the Aircraft from the Aircraft Registry at
+the FAA, and proceed to re-register the Aircraft with the FAA or any other
+aircraft registry of any jurisdiction, in any case, in the name of Lessor,
+either or both Beneficiaries or any other Person, as the Beneficiaries shall
+determine.
+
+ (f) In the event that Lessor, pursuant to Section 18(b)
+above, shall, after having exercised reasonable efforts under the
+circumstances, have re-let the Aircraft under a lease which extends at least to
+the date upon which the Term for the Aircraft would have expired but for
+Lessee's default, Lessor, in lieu of exercising its rights under Section 18(c)
+above with respect to the Aircraft, may, if it shall so elect, demand that
+Lessee pay Lessor, and Lessee shall pay Lessor, as liquidated damages for loss
+of bargain and not as a penalty (in lieu of the Basic Rent for the Aircraft due
+after the time of re-letting) any unpaid Rent for the Aircraft due up to the
+date of re-letting, plus the amount, if any, by which the aggregate Basic Rent
+for the Aircraft (such Basic Rent being computed on the assumption that the
+Basic Rent as in effect on the date of such demand shall remain in effect at
+all times thereafter), which would otherwise have become due over the Term,
+discounted periodically (equal to installment frequency) to present worth as of
+the date of re-letting at the interest rate of four percent (4%) per annum,
+exceeds the aggregate basic rental payments to become due under the re-letting
+from the date of such re-letting to the date upon with the Term for the
+Aircraft would have expired but for Lessee's default, discounted periodically
+(equal to installment frequency) to present worth as of the date of the
+re-letting at the interest rate of four percent (4%) per annum.
+
+ (g) Proceed by appropriate court action or actions, either at
+law or in equity, to enforce performance by Lessee of the applicable covenants
+of this Lease and to recover damages for the breach thereof and to rescind this
+Lease.
+
+ (h) Terminate Lessee's right to possession and use of the
+Aircraft under this Lease by written notice (which notice shall be effective
+upon dispatch) and repossess the Aircraft, whereupon, Lessee shall forthwith
+deliver the Aircraft and all Aircraft Documents to Lessor or the Beneficiaries
+at such location as Lessor or the Beneficiaries shall specify.
+
+
+ -71-
+
+
+
+ (i) Seize or foreclose upon any collateral pledged to secure
+the obligations of Lessee hereunder; liquidate, dispose of, and realize upon
+any such collateral; and apply the proceeds thereof to the obligations of
+Lessee hereunder, in any case, as Lessor shall determine.
+
+ (j) Draw any amount available to be drawn under any Security
+Deposit or the Security Letter of Credit, and to apply the proceeds thereof to
+the obligations of Lessee hereunder, in any case, as Lessor shall determine.
+
+ In addition to the foregoing, Lessee shall be liable for any
+and all unpaid Rent during or after the exercise of any of the aforementioned
+remedies, together with interest on such unpaid amounts at the Overdue Rate,
+and until satisfaction of all of Lessee's obligations to Lessor hereunder and
+for all reasonable legal fees and other reasonable costs and expenses incurred
+by Lessor by reason of the occurrence of any Event of Default or the exercise
+of Lessor's remedies with respect thereto, including all costs and expenses
+incurred in connection with the return of the Aircraft in accordance with the
+terms of Section 16 hereof or in placing the Aircraft in the condition and with
+airworthiness certification as required by such Section.
+
+ In effecting any repossession, each of Lessor and each
+Beneficiary and its representatives and agents, to the extent permitted by law,
+shall: (i) have the right to enter upon any premises where it reasonably
+believes the Aircraft, the Airframe, an Engine or Part to be located and Lessee
+shall cooperate in giving Lessor such access; (ii) not be liable, in conversion
+or otherwise, for the taking of any personal property of Lessee which is in or
+attached to the Aircraft, the Airframe, an Engine or Part which is repossessed;
+provided, however, that Lessor shall return to Lessee all personal property of
+Lessee or its passengers which were on the Aircraft at the time Lessor re-takes
+possession of the Aircraft; (iii) not be liable or responsible, in any manner,
+for any inadvertent damage or injury to any of Lessee's property in
+repossessing and holding the Aircraft, the Airframe, an Engine or Part, except
+for that caused by or in connection with Lessor's gross negligence or willful
+acts; and (iv) have the right to maintain possession of and dispose of the
+Aircraft, the Airframe an Engine or Part on any premises owned by Lessee or
+under Lessee's control.
+
+ If requested by Lessor, Lessee, at its sole expense, shall
+assemble and make the Aircraft, the Airframe, an engine or Part available at a
+place designated by Lessor in accordance with Section 16 hereof. Lessee hereby
+agrees that, in the event of the return to or repossession by Lessor of the
+Aircraft, the Airframe, an Engine or Part, any rights in any warranty (express
+or implied) heretofore assigned to Lessee or otherwise held by Lessee shall
+without further act, notice or writing be assigned or-reassigned to
+
+
+ -72-
+
+
+
+Lessor, if assignable. Lessee shall be liable to Lessor for all reasonable
+expenses (including reasonable attorneys, fees and expenses), disbursements,
+costs and fees incurred in (i) re-possessing, storing and shipping the
+Aircraft, the Airframe, an Engine or Part and maintaining, repairing and
+refurbishing the Aircraft, the Airframe, an Engine or Part to the condition
+required by Section 16 hereof and (ii) preparing the Aircraft, the Airframe, an
+Engine or Part for sale or lease, advertising the sale or lease of the
+Aircraft, the Airframe, an Engine or Part and selling or releasing the
+Aircraft, the Airframe, an Engine or Part. Lessor is hereby authorized and
+instructed, at its option, to make reasonable expenditures which Lessor
+considers advisable to repair, restore, store, insure or maintain the Aircraft,
+the Airframe, an Engine or Part to the condition required by Section 16 hereof,
+all at Lessee's sole expense.
+
+ Lessee shall cooperate reasonably and in good faith with
+Lessor and any Beneficiary in connection with the exercise by Lessor or either
+Beneficiary of the rights and remedies provided hereby or at law upon and
+following the occurrence of an Event of Default, including by executing and
+delivering such instruments as Lessor or any Beneficiary shall reasonably
+request. In furtherance of the foregoing, Lessee hereby constitutes and
+appoints Lessor as the true and lawful agent and attorney-in-fact for Lessee
+for purposes of executing and delivering any instrument that may be necessary
+or appropriate in order to carry into effect any right or remedy of Lessor or
+any Beneficiary hereunder upon and following the occurrence of an Event of
+Default, including any consent to the de-registration of the Aircraft from the
+FAA. Any such instrument signed pursuant to the foregoing appointment may be
+signed in Lessee's name or in the name of Lessor. The foregoing appointment is
+coupled with an interest and is irrevocable.
+
+ No remedy referred to in this Section 18 is intended to be
+exclusive, but, to the extent permissible hereunder or under applicable Law,
+each shall be cumulative and in addition to any other remedy referred to above
+or otherwise available to Lessor at Law or in equity; and the exercise or
+beginning of exercise by Lessor of any one or more of such remedies shall not
+preclude the simultaneous or later exercise by Lessor of any or all of such
+other remedies. No express or implied waiver by Lessor of any Default shall in
+any way be, or be construed to be, a waiver of any future or subsequent
+Default.
+
+ Section 19. ALIENATION.
+
+ Lessor and each Beneficiary shall have the right at its sole
+cost and expense to assign, sell, encumber or otherwise dispose of any interest
+of Lessor or such Beneficiary in, to or under the Aircraft, the Trust Agreement
+or this Lease and/or the proceeds hereof, subject to the rights of Lessee under
+the provisions of this Lease,
+
+
+ -73-
+
+
+
+including by sale, transfer, pledge, assignment or other disposition to any
+Lender or to any other Person (other than an air carrier engaged in the airline
+business); PROVIDED, HOWEVER, that unless an Event of Default shall have
+occurred and be continuing, neither Lessor nor any Beneficiary shall undertake
+any action that (i) interferes with Lessee's rights under Section 20(f) hereof,
+(ii) disqualifies the Aircraft for registry with the FAA, or (iii) results in
+an increase in any liability of Lessee under Section 10, unless Lessor and each
+Beneficiary agree to waive their rights to be indemnified for any such increase
+in liability. To effect or facilitate any such sale, transfer, pledge,
+assignment, or other disposition, Lessee agrees to provide, at Lessor's sole
+cost and expense, such agreements, consents, conveyances or documents as may be
+reasonably requested by Lessor or such Beneficiary, which shall include,
+without limitation, a consent to such disposition executed by Lessee and in the
+case of an assignment of the Lease (other than an assignment for security
+purposes) an unrestricted release of Lessor from any obligations under this
+Lease arising after the date of such assignment; PROVIDED, HOWEVER, that such
+release shall not affect any liability of Lessor or such Beneficiary arising on
+or before the date of such assignment. Lessee hereby agrees that it will not
+assert against an assignee any Claim or defense which it may have against
+Lessor or such Beneficiary; PROVIDED, HOWEVER, that any such Claim or defense
+will survive such assignment and continue to be maintained against Lessor or
+such Beneficiary. No assignment or transfer hereunder shall result in Lessee
+being obligated to pay for the account of Lessor or such Beneficiary any
+greater amount than it would otherwise be obligated to pay hereunder, including
+without limitation any obligation of Lessee to pay amounts pursuant to Section
+10. The agreements, covenants, obligations, and liabilities contained herein
+including, but not limited to, all obligations to pay Rent and indemnify each
+Indemnitee are made for the benefit of each Indemnitee and their respective
+successors and assigns. In the event this Lease is assigned, or otherwise
+disposed of by Lessor or either Beneficiary, any assignee, transferee or
+mortgagee shall agree in writing with Lessee as a condition precedent thereto
+to be bound by the provisions of this Lease and not to disturb or otherwise
+interfere with the quiet enjoyment of Lessee of the Aircraft in accordance with
+the terms of this Lease so long as no Event of Default shall have occurred and
+be continuing.
+
+ Lessee shall include each Lender or any Person to whom an
+interest herein is assigned as an additional insured and loss payee in
+accordance with the provisions of Section 12 hereof and indemnify such Lender
+to the same extent Lessee has agreed to indemnify Lessor pursuant to Sections
+10 and 13 hereof. No Lender or any Person to whom an interest herein is
+assigned shall have any liability for the obligations of Lessor under any
+provision of this Lease.
+
+
+ -74-
+
+
+
+
+
+
+
+Section 20. MISCELLANEOUS.
+
+ (a) SEVERABILITY, AMENDMENT AND CONSTRUCTION. Any provision of this
+Lease which is prohibited or unenforceable in any jurisdiction shall, as to
+such jurisdiction, be ineffective to the extent of such prohibition or
+unenforceability without invalidating the remaining provisions hereof; any
+such prohibition or unenforceability in any jurisdiction shall not
+invalidate or render unenforceable such provision in any other jurisdiction.
+To the extent permitted by Law, Lessee hereby waives any provisions of Law
+which renders any provisions hereof prohibited or unenforceable in any
+respect. Lessee may not assign this Lease or its rights hereunder. This
+Lease supersedes any prior or contemporaneous agreements, whether oral or in
+writing, of the parties hereto and shall constitute the entire agreements of
+the parties hereto. No term or provision of this Lease may be changed,
+waived, discharged or terminated orally, but only by an instrument in
+writing expressed to be a supplement or amendment to this Lease signed under
+hand by an officer of the party against which the enforcement of the change,
+waiver, discharge or termination is sought. This Lease shall constitute an
+agreement of lease, and nothing herein shall be construed as conveying to
+Lessee any right, title or interest in the Aircraft or any Engine or Part
+except as a lessee only. The headings in this Lease are for convenience of
+reference only and shall not define or limit any of the terms of provisions
+hereof. Whenever required by the context hereof, the singular shall include
+the plural and vice versa. Reference to this Lease shall mean this Lease as
+amended or supplemented from time to time.
+
+ (b) GOVERNING LAW; JURISDICTION. This Lease shall in all respects be
+governed by, and construed in accordance with, the laws of the State of New
+York, except that the provisions of Section 21(a) (Security Deposit) and
+Section 21(b)(vi) shall be governed by the laws of the State of Delaware.
+
+ A. Lessee and Lessor each hereby irrevocably and expressly submit to
+the non-exclusive jurisdiction of the United States District Court for the
+Southern District of New York and to the New York Supreme Court, New York
+County for the purposes of any suit, action or proceeding arising out of
+this Lease or the Operative Documents. Final judgment against Lessee or
+Lessor in any such suit shall be conclusive, and may be enforced in any
+other jurisdictions by suit on the judgment or as otherwise permitted by
+applicable Law, a certified or true copy of which shall be conclusive
+evidence of the fact and of the amount of any indebtedness or liability of
+such party therein described; PROVIDED, HOWEVER, that Lessor or Lessee may
+at its option bring suit, or institute other judicial proceedings against
+the other party or any of its assets, in the courts of any
+
+ -75-
+
+
+
+
+
+country or place where such party or such assets may be found.
+
+ B. Each of Lessee and Lessor hereby irrevocably waives any objection
+which it may now or hereafter have to the laying of venue of any suit,
+action or proceeding brought in any court located in New York City, New York
+and hereby further irrevocably waives any claim that any such suit, action
+or proceeding brought in any such court has been brought in an inconvenient
+forum.
+
+ C. The foregoing submission to jurisdiction shall not be construed so
+as to limit the right of either party to take proceedings against the other
+in whatsoever jurisdictions shall to it seem fit nor shall the taking of
+proceedings in any one or more jurisdictions preclude the taking of
+proceedings in any other jurisdiction, whether concurrently or not.
+
+ D. Lessee hereby irrevocably appoints CT Corporation System, Inc., 1633
+Broadway, New York, New York 10019, as Lessee's agent and attorney-in-fact
+(the "Agent"), upon whom all service of summonses and other legal processes
+may be made with respect to any action, suit or proceeding relating to this
+Lease or any other Operative Document. The appointment of such Agent shall
+be irrevocable, and if the Agent revokes such relationship, prior to the
+effective date of such revocation, Lessee shall appoint a successor, which
+successor shall be in accordance with the provisions hereof. Prior to such
+change Lessee shall notify Lessor of such change of Agent. Lessee consents
+and agrees that any service under or in connection with this Lease shall be
+duly completed and served if mailed to Lessee at its address specified below
+or to the Agent at the address specified above, in either case, by
+registered or certified mail.
+
+ (c) NOTICES. All notices required under the terms and provisions hereof
+shall be in writing and any such notice shall become effective when
+deposited in the mails, registered with proper postage for air mail prepaid,
+or, if in the form of a telegram, telex or telecopy, when received,
+addressed if to the Lessee, the Lessor or any Beneficiary at their
+respective addresses set forth in Exhibit C-2 (or such other address as may
+be designated by the Lessee, the Lessor or such Beneficiary to the others).
+
+ (d) LESSOR'S RIGHT TO PERFORM FOR LESSEE. if Lessee fails to make any
+payment of Supplemental Rent required to be made by it hereunder or fails to
+perform or comply with any covenant, agreement or obligation contained
+herein, Lessor or any Beneficiary shall have the right but not the obligation
+ to make such payment or conform or comply with such agreement, covenant or
+obligation, and the amount of such payment and the amount of the reasonable
+expenses of Lessor or such Beneficiary incurred in connection with such
+payment or the performance thereof or compliance therewith,
+
+ -76-
+
+
+
+
+
+together with interest thereon at the Overdue Rate, shall be payable by
+Lessee to Lessor or such Beneficiary (as Supplemental Rent) upon demand.
+Lessor agrees to notify Lessee in writing prior to making any payment under
+this Section 20(d) (giving Lessee the opportunity to rectify the omission),
+unless the Aircraft will be in danger of loss, sale, confiscation,
+forfeiture or seizure should such payment not be made. The taking of any
+such action by Lessor or any Beneficiary pursuant to this Subsection 20(d)
+shall not constitute a waiver or release of any obligation of Lessee under
+the Lease, nor a waiver of any Event of Default which may arise out of
+Lessee's nonperformance of such obligation, nor an election or waiver by
+Lessor of any remedy or right available to Lessor under or in relation to
+this Lease.
+
+ (e) COUNTER PARTS. This Lease may be executed simultaneously in two or
+more counterparts, each of which shall be deemed an original, but all of
+which together shall constitute one and the same instrument. To the extent,
+if any, that this Lease constitutes chattel paper (as such term is defined in
+ the Uniform Commercial Code as in effect in any applicable jurisdiction) no
+security interest in this Lease may be created through the transfer or
+possession of any counterpart other than the counterpart which has been
+marked "Counterpart No. 1 - Chattel Paper Original" on the cover page
+thereof.
+
+ (f) QUIET ENJOYMENT. Lessor covenants that so long as an Event of
+Default shall not have occurred and be continuing, Lessee shall be entitled
+to quiet use, possession and enjoyment of the Aircraft in accordance with
+the terms of this Lease, without interference by Lessor or by any Person
+lawfully claiming by or through Lessor.
+
+ (g) BROKERS. Each party agrees to indemnify and hold the other harmless
+from and against any and all claims, suits, damages, costs and expenses
+(including, but not limited to, reasonable attorneys' fees) asserted by any
+agent, broker or other third party for any commission or compensation of any
+nature whatsoever based upon the lease of the Aircraft, if such claim,
+damage, cost or expense arises out of any action or alleged action by the
+indemnifying party, its employees or agents.
+
+ (h) FUNDING INDEMNITY. Without prejudice to any other rights Lessor or
+each Beneficiary may have under the terms of this Lease or any of the
+Operative Documents or otherwise,
+
+ (A) if Lessor or each Beneficiary (and/or any of Beneficiary's
+ Affiliates) enters into any Dollar based bank funding or
+ financing arrangement (including, without limitation, any swap,
+ hedge or similar arrangement, but excluding any currency swap or
+ currency hedge to which Lessee has not
+
+ -77-
+
+
+
+
+
+ consented) in connection with the fixing of the Basic Rent,
+ which is payable during the Term, and if the Aircraft shall not
+ be delivered to Lessee in accordance with the terms of this
+ Lease (other than by reason of the willful default of Lessor),
+ Lessee shall indemnify Lessor or each Beneficiary (and/or any of
+ Beneficiary's Affiliates) against, and shall pay on an after tax
+ basis to Lessor or each Beneficiary (and/or any of Beneficiary's
+ Affiliates) forthwith on demand, subject to the proviso set
+ forth below, all costs, fees (including, without limitation,
+ legal fees) expenses, obligations, liabilities and any other
+ amounts notified by the Beneficiaries to the Lessee (which
+ notification shall set out in reasonable detail the calculation
+ of the amounts claimed therein and such notification shall, in
+ the absence of manifest error, be conclusive) as having been
+ incurred, suffered or expended by Lessor or any Beneficiary
+ (and/or any of Beneficiary's Affiliates) or arising in connection
+ with the amendment, variation, rolling over, termination,
+ reconstitution, hedging, unwinding, covering or other adjustment
+ of any Dollar based bank funding or financing arrangement
+ (including, without limitation, any swap, hedge or similar
+ arrangement, but excluding any currency swap or currency hedge to
+ which Lessee has not consented) entered into or committed by
+ Lessor or any Beneficiary (and/or any of Beneficiary"s Affiliates)
+ for the purposes of the financing of the Aircraft and/or its
+ leasing to Lessee on the basis of the Basic Rent provided for
+ under this Lease; PROVIDED, HOWEVER, that if the interruption of
+ the Beneficiaries' funding results in a gain to the Beneficiaries
+ (net of Taxes or other charges incurred in connection therewith),
+ then, so long as no Default exists, the Beneficiaries shall remit
+ the amount of such gain (net of their costs relating to the
+ negotiation, preparation and execution of this Agreement and the
+ consummation of the transactions contemplated hereby) to Lessee;
+
+ (B) if an Event of Default occurs and the Aircraft is not delivered
+ or is repossessed in accordance with the terms of this Agreement
+ or otherwise, or if the Term is terminated earlier than the
+ Expiration Date by any reason including but not limited to the
+ early termination of the Term referred in clause 2(ii)(B) of the
+ Supplemental Agreement or a Casualty Occurrence (if such Casualty
+ Occurrence is covered by the insurance required by Section 12),
+ then Lessee shall indemnify Lessor or each Beneficiary (and/or
+ Beneficiary's Affiliate) against and shall pay on
+
+ -78-
+
+
+
+
+
+ an after tax basis to Lessor (and/or Beneficiary's Affiliate)
+ forthwith on demand, subject as provided below, all reasonable
+ costs, fees (including, without limitation, legal fees), expenses
+ obligations, liabilities and other amounts notified by the
+ Beneficiaries to Lessee (which notification shall set out the
+ calculation of the amounts claimed therein and such notification
+ shall, in the absence of manifest error, be conclusive) as having
+ been incurred, suffered or expended in connection with the
+ amendment, variation, rolling over, termination, reconstitution,
+ hedging, unwinding, covering or other adjustment of any Dollar
+ based bank funding or financing arrangement (including, without
+ limitation, any swap, hedge or similar arrangement, but excluding
+ any currency swap or currency hedge to which Lessee has not
+ consented) entered into or committed by Lessor or each Beneficiary
+ (and/or any of Beneficiary's Affiliates) for the purposes of the
+ financing of the Aircraft by Lessor or each Beneficiary and/or its
+ leasing of the Aircraft to Lessee on the basis of the fixed Basic
+ Rent provided for under this Lease;
+
+ PROVIDED THAT the provisions of (A) or (B) shall not impose or be
+ deemed to impose on Lessee any obligation for payment of any
+ amount representing the principal amount of any bank funding
+ or financing arrangement.
+
+ (i) PAYMENTS IN U.S. DOLLARS. All amounts to be paid hereunder to
+Lessor or Lessee shall be paid in Dollars, in immediately available funds.
+Lessee acknowledges that the specification of Dollars in this transaction is
+of the essence and that Dollars shall be the currency of account in any and
+all events. The obligations of Lessee or Lessor hereunder, to Lessor or
+Lessee, respectively, shall not be discharged by an amount paid in another
+currency, whether pursuant to a judgment or otherwise, to the extent that the
+ amount so paid on prompt conversion to Dollars under normal banking
+procedures does not yield the amount of Dollars owing to Lessor. In the
+event that any payment by Lessee or Lessor, respectively, whether pursuant
+to judgment or otherwise to Lessor or Lessee, respectively, upon conversion
+does not yield such amount of Dollars, Lessor or Lessee, as the case may be,
+shall have a separate cause of action against Lessee or Lessor, as the case
+may be, for the additional amount necessary to yield the amount of Dollars
+due and owing to Lessor or Lessee, as the case may be.
+
+ (j) COSTS. Each party hereto shall bear its own costs in respect of the
+preparation, negotiation and execution of this Lease, the Lease Supplement
+and any document ancillary thereto; PROVIDED, HOWEVER, that Lessor shall bear
+
+ -79-
+
+
+
+
+
+the costs and expenses incurred in connection with the registration of the
+Aircraft with the FAA (including the legal fees and expenses of special FAA
+counsel) and Lessee shall bear the costs and expenses incurred in connection
+with the filing and registration of the Lease and any change in the FAA
+registration number of the Aircraft requested by Lessee and approved by the
+Beneficiaries (including, without limitation, the legal fees and expenses of
+special FAA counsel).
+
+ (k) CONFIDENTIALITY. Neither Lessor nor Lessee hereto shall, at any
+time during the Term or after its termination or expiration, disclose to any
+Person any information relating to the matters set forth in this Lease or
+the business, financial or other affairs a confidential nature relating to
+the other party which may, in the course of the transactions contemplated
+hereby, have been disclosed or discovered, except that any such information
+may be so disclosed (i) with the written authority of the other party
+hereto, (ii) so far as may be necessary for the proper performance of its
+obligations hereunder, including the filing of this Lease with the FAA,
+(iii) to the extent required by law, ordered by a court of competent
+jurisdiction or by the relevant Governmental Entity responsible for taxation
+or any other applicable regulatory authority, (iv) to such party's auditors,
+counsel or other professional advisers, (v) to any Affiliate of a
+Beneficiary and (vi) to the extent required in connection with a public
+offering of debt or equity securities by Lessee. The parties hereto shall
+use all reasonable endeavors to prevent any such disclosure as aforesaid,
+except that Lessor or any Beneficiary may provide any potential purchaser of
+the Aircraft, any Lender or any potential assignee of this Lease or any of
+the Operative Documents with such information that any such person shall
+reasonably request of Lessor or such Beneficiary.
+
+ (l) SECTION 1110. The parties agree that Lessor and the Beneficiaries
+are entitled to the benefits and protections of Section 1110 of the U.S.
+Bankruptcy Code ("Section 1110"), and Lessee shall take such actions and
+effect such filings as may be necessary to enable Lessor and the
+Beneficiaries to continue to be entitled to such benefits and protections at
+all times from the date of the execution and delivery hereof until such time
+as the Aircraft is returned to Lessor or the Beneficiaries in compliance
+with the return conditions herein and this Lease is terminated. Lessee
+acknowledges that this Section is of fundamental importance to the
+transactions contemplated hereby and that neither Lessor nor the
+Beneficiaries would have entered into this Lease but for the rights intended
+to be conveyed to Lessor and the Beneficiaries by this Section and the
+protection and benefits of Section 1110.
+
+ (m) CONCERNING THE LESSOR. Wilmington Trust Company is entering into
+this Lease solely in its capacity
+
+ -80-
+
+
+
+
+
+as Trustee under the Trust Agreement and not in its individual capacity. As
+such, Wilmington Trust Company shall not be liable for or on account of any
+statements, representations, warranties, covenants or obligations stated to
+be those of Lessor hereunder.
+
+ (n) TRUE LEASE. THIS LEASE SHALL CONSTITUTE AN AGREEMENT OF LEASE AND
+IS A TRUE OPERATING LEASE, AND NOTHING HEREIN SHALL BE CONSTRUED AS
+CONVEYING TO LESSEE ANY RIGHT, TITLE, OR INTEREST IN THE AIRCRAFT OR ANY
+ENGINE OR PART EXCEPT AS A LESSEE ONLY. WITHOUT LIMITING THE FOREGOING,
+LESSEE WILL NOT FILE A TAX RETURN OR TAKE ANY OTHER ACTION WHICH IS
+INCONSISTENT WITH THE FOREGOING.
+
+ Section 21. SECURITY DEPOSIT.
+
+ (a) SECURITY DEPOSIT. Lessor hereby acknowledges Beneficiaries' receipt
+from Lessee of the Security Deposit subject to the Security Deposit
+Adjustment. In the event that the transactions contemplated by this Lease
+are not consummated (other than due to the fault of Lessee or the occurrence
+of an Event of Default), Lessor shall cause the Beneficiary to refund the
+Security Deposit (so long as no Default or Event of Default has occurred,
+without deducting any costs incurred by any Beneficiary or Lessor in
+connection with the proposed leasing of the Aircraft hereunder but after
+deducting the break funding costs incurred by the Beneficiaries and specified
+ in Section 20(h), if any) to Lessee. The Security Deposit is received by
+Lessor or the Beneficiaries as security for the timely and faithful
+performance by Lessee of all of Lessee's obligations under this Lease, and
+Lessee hereby grants Lessor a security interest therein and in all other
+sums deposited under this Section 21(a). Lessee agrees to execute and file
+with the appropriate Governmental Entities any and all documents necessary
+or reasonably requested by Lessor or any Beneficiary to evidence and perfect
+such security assignment in favor of Lessor. If Lessee fails to pay Rent
+hereunder when due or to pay any other sums due or to perform any of the
+other terms and provisions of this Lease or is otherwise in Default
+hereunder, in addition to all other rights Lessor shall have under the New
+York Uniform Commercial Code as a secured party, Lessor may use, apply or
+retain all or any portion of the Security Deposit in partial payment for
+sums due to Lessor by Lessee, to compensate Lessor for any sums it may in
+its discretion advance as a result of a Default by Lessee or to apply toward
+losses or expenses Lessor may suffer or incur as a result of Lessee's
+Default hereunder. If Lessor uses or applies all or any portion of such
+Security Deposit, such application shall not be deemed a cure of any
+Defaults, and Lessee shall within five (5) days after written demand
+therefor deposit with Lessor in cash an amount sufficient to restore the
+Security Deposit to the aggregate amount of such Security Deposit prior to
+Lessor's use or application and the failure of Lessee to do so shall be a
+material breach of
+
+ -81-
+
+
+
+
+
+this Lease by Lessee. Provided Lessee is not in Default under this Lease,
+such Security Deposit, less any costs incurred by Lessor in connection with
+the termination of the Lease or return of the Aircraft, shall be returned to
+Lessee promptly after the Expiration Date and Lessee's return of the
+Aircraft in compliance with Section 16 hereof. Lessee shall not be entitled
+to any interest earned on the Security Deposit.
+
+ (b) SECURITY LETTER OF CREDIT.
+
+ (i) Lessee hereby agrees with Lessor that, subject to paragraph
+ (vi) below, Lessee shall deliver to Lessor (or if specified by
+ the Beneficiaries, the Beneficiaries) the duly executed Security
+ Letter of Credit for the amount specified in Exhibit C-1.
+
+ (ii) The Security Letter of Credit shall remain in full force and
+ effect from the date hereof until the date that is three months
+ after the Expiration Date, provided that it may be renewable
+ annually so long as Lessor or the Beneficiaries may draw upon the
+ Security Letter of Credit at any time if the Security Letter of
+ Credit is not renewed on or before thirty (30) days prior to its
+ stated expiration or if Lessor or a Beneficiary receives a
+ termination notice from the issuing bank or institution.
+
+ (iii) Lessor or the Beneficiaries may draw upon the Security Letter of
+ Credit upon the occurrence and during the continuation of an
+ Event of Default and apply the proceeds thereof in or towards the
+ satisfaction of any sum or obligation due to Lessor or any
+ Beneficiary by Lessee which either Lessee fails to pay on the
+ due date or, as the case may be, the date of demand or after any
+ applicable grace period permitted for the payment thereof and/or
+ under this Lease or any of the Operative Documents or to
+ compensate Lessor or any Beneficiary for any sum which it may in
+ its discretion advance or expend as a result of the failure of
+ Lessee to comply with any of its obligations under this Lease or
+ any of the Operative Documents or in respect of any Default or
+ Event of Default, in respect of any indemnity payments, in any
+ case by delivering to the issuing bank or institutions a notice
+ in accordance with the terms of the Security Letter of Credit.
+ No such drawing under the Security Letter of Credit shall operate
+ to relieve Lessee of any obligation hereunder or constitute the
+ satisfaction of any such obligation (except to the extent of the
+ proceeds of such payment actually received by Lessor and the
+ Beneficiaries).
+
+ -82-
+
+
+
+
+
+ (iv) Upon any such drawing by Lessor or a Beneficiary, Lessee shall
+ reimburse the issuing bank and or a Beneficiary otherwise take
+ such action as may be necessary in order to restore the amount
+ available for drawing under the Security Letter of Credit to the
+ amount of such Security Letter of Credit specified in Exhibit C-1
+ promptly (and in any event within five Business Days) after such
+ Lessor's drawing and shall procure promptly that the issuing bank
+ or institutions forward a further letter of credit or guarantee
+ to Lessor evidencing that the amount has so been restored.
+
+ (v) Provided that no Default, or Event of Default has occurred and is
+ continuing, Lessor shall return the Security Letter of Credit to
+ Lessee for cancellation promptly after the date upon which the
+ Aircraft is redelivered to Lessor in accordance with the terms of
+ this Lease.
+
+ (vi) Notwithstanding the foregoing provisions of this Section 21(b),
+ Lessee may, on the Delivery Date, in lieu of the delivery to the
+ Beneficiaries of the Security Letter of Credit on the Delivery
+ Date, deposit with the Beneficiaries an amount in cash equal to
+ the amount of the Security Letter of Credit, as specified in
+ Exhibit C-1 (the "Security Letter of Credit Amount"). If Lessee
+ elects to make such deposit in lieu of the delivery of the
+ Security Letter of Credit, then Lessee shall effect such deposit
+ by wire transfer of funds to an account specified by one of the
+ Beneficiaries, and such Security Letter of Credit Amount shall
+ constitute part of the Security Deposit and shall be held by the
+ Beneficiaries as collateral security for the payment and
+ performance by the Lessee of its obligations hereunder and under
+ the other Operative Documents. The Beneficiaries shall have the
+ same rights, and shall be permitted to apply such Security Letter
+ of Credit Amount in the same manner, as is set forth in
+ Sections 18(i) and 21(a) above with respect to the Security
+ Deposit. Following the Delivery Date, Lessee shall have the
+ option of substituting the Security Letter of Credit described
+ above in this Section 21(b) in exchange for the Security Letter
+ of Credit Amount deposited with the Beneficiaries pursuant to
+ this clause (vi). To effect such substitution, Lessee shall
+ deliver the originally issued and executed Security Letter of
+ Credit (in the form and scope and in the amount required by
+ Exhibit C-1) to Lessor, and, promptly after Lessor's receipt
+ thereof, the Beneficiaries shall, so long as no Default or Event
+ of Default shall have occurred and be continuing, return the
+ Security Letter of
+
+ -83-
+
+
+
+
+
+ Credit Amount paid on the Delivery Date pursuant to this
+ clause (iv) to Lessee. If Lessee does not substitute the Security
+ Letter of Credit Amount with the Security Letter of Credit, so
+ long as no Default or Event of Default shall have occurred and be
+ continuing, the Security Letter of Credit Amount will be returned
+ to Lessee promptly after the return of the Aircraft pursuant to
+ Section 21(a).
+
+ -84-
+
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this Lease to be
+duly executed by their authorized officers as of the day and year first
+above written.
+
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not in its
+ individual capacity but solely as
+ Trustee f/b/o ITOCHU AirLease
+ (Europe) Limited and Marubeni
+ Airleasing (U.K.)
+
+
+ By: /s/ Myfanwy Phillips Bonilla
+ ----------------------------
+ Name: Myfanwy Phillips Bonilla
+ Title: Assis. Vice President
+
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+
+ By:
+ ----------------------------
+ Name:
+ Title:
+
+
+ By:
+ ----------------------------
+ Name:
+ Title:
+
+
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this Lease to be
+duly executed by their authorized officers as of the day and year first
+above written.
+
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not in its
+ individual capacity but solely as
+ Trustee f/b/o ITOCHU AirLease
+ (Europe) Limited and Marubeni
+ Airleasing (U.K.)
+
+
+ By:
+ ----------------------------
+ Name:
+ Title:
+
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+
+ By: /s/ James M. King
+ ----------------------------
+ Name: James M. King
+ Title: Vice President - Planning
+ and Development
+
+
+ By: /s/ Owen Sekimura
+ ----------------------------
+ Name: Owen Sekimura
+ Title: Staff Vice President -
+ Finance and Controller
+
+
+ -15-
+
+
+
+
+
+ EXHIBIT A
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ SCHEDULE AND DESCRIPTION OF AIRCRAFT
+
+
+
+ U.S.
+ MODEL AND REGISTRATION MANUFACTURER'S
+ITEM MANUFACTURER CONFIGURATION NUMBER SERIAL NUMBER
+-------------------------------------------------------------------------------------------------------------------
+
+
+Aircraft The Boeing Company 737-25A N685MA* 23791
+
+Engines Pratt & Whitney JT8D-17A P709529
+
+ Pratt & Whitney JT8D-17A P709530
+
+Auxiliary
+ Power Unit Garret GTCP85-129 P35274
+
+Landing Gears
+
+Left Gear Menasco 65-46100-52 MC02895P1486
+
+Right Gear Menasco 65-46100-52 MC02896P1486
+
+Nose Gear BF Goodrich 65-46200-63 T01406P1486
+
+
+
+
+*To be changed to N819AL
+
+
+
+
+
+ EXHIBIT B-1
+ to
+ Aircraft Lease Agreement
+
+ Aircraft Documents on the Delivery Date
+
+
+
+
+BOX 1: MIDWAY RECORDS
+-----------------------------------------------------------------------
+
+725ML C check 5/28/89 1 of 2, 2 OF 2 IS
+ MISSING
+-----------------------------------------------------------------------
+725ML Hrs / Cycles 1990 thru 4/30/91 1 of 1
+-----------------------------------------------------------------------
+725ML Log Pages Dec 89 - Jan / Feb 91
+-----------------------------------------------------------------------
+725ML Log Pages March 1991
+-----------------------------------------------------------------------
+725ML Log Pages Jan / Feb 91
+-----------------------------------------------------------------------
+725ML Log Pages Sept/Oct/Nov/Dec 90
+-----------------------------------------------------------------------
+725ML Log Pages July/Aug 90
+-----------------------------------------------------------------------
+725ML Log Pages April/May/June 90
+-----------------------------------------------------------------------
+725ML Log Pages Feb/March 90
+-----------------------------------------------------------------------
+725ML N Items Nov 90 / 1 of 1
+-----------------------------------------------------------------------
+725ML Job Cards 89/90 (structural repair
+ for aft cargo).
+-----------------------------------------------------------------------
+725ML Special Inspection 1 of 1
+-----------------------------------------------------------------------
+725ML Line, TS, B1, B2, B3, 1990/1991
+ B4, B5.
+-----------------------------------------------------------------------
+725ML CC1 Non-Routines 1 of 1
+-----------------------------------------------------------------------
+725ML Component Change Cardex files (copies)
+ Records
+-----------------------------------------------------------------------
+725ML Hrs / Cycles 1 book 88-89
+-----------------------------------------------------------------------
+725ML C check 11/23/90 (4 books)
+-----------------------------------------------------------------------
+
+
+
+BOX 5: MARKAIR RECORDS
+-----------------------------------------------------------------------
+
+N685MA ESN: 709529 / GTC shop visit DTD 6/29/92
+-----------------------------------------------------------------------
+N685MA ESN: 700520 / GTC shop visit DTD 2/17/93
+-----------------------------------------------------------------------
+N685MA ESN: 709529 / Historical Records
+-----------------------------------------------------------------------
+N685MA ESN: 709529 / Engine AD Log
+-----------------------------------------------------------------------
+N685MA ESN: 709529 / C2 Disk Traceability
+-----------------------------------------------------------------------
+N685MA ESN: 709530 / Historical Records
+-----------------------------------------------------------------------
+N685MA ESN: 709530 / AD Log
+-----------------------------------------------------------------------
+N685MA ESN: 709530 / WO LR 9122 (4 ea) Aviall 7/20/90
+-----------------------------------------------------------------------
+N685MA ESN: 709530 / WO# LR 0181 (4 ea) Aviall 6/92
+-----------------------------------------------------------------------
+N685MA APU S/N P-35274 / Historical Records
+-----------------------------------------------------------------------
+N685MA APU S/N P-35274 / AD Log
+-----------------------------------------------------------------------
+
+
+
+ -2-
+
+
+
+
+
+ EXHIBIT B-1
+ to
+ Aircraft Lease Agreement
+
+ Aircraft Documents on the Delivery Date
+
+
+
+
+
+
+BOX 6: MARKAIR RECORDS
+-----------------------------------------------------------------------
+
+N685MA Airframe AD Log
+-----------------------------------------------------------------------
+N685MA Appliance AD Log
+-----------------------------------------------------------------------
+N685MA MEA Logs
+-----------------------------------------------------------------------
+N685MA Master Flight Log
+-----------------------------------------------------------------------
+N685MA SB Log
+-----------------------------------------------------------------------
+N685MA Galley Installation
+-----------------------------------------------------------------------
+N685MA EO and Drawings
+-----------------------------------------------------------------------
+N685MA Galley Manuals and Certification
+-----------------------------------------------------------------------
+N685MA MEA'S
+-----------------------------------------------------------------------
+N685MA S.B.'s
+-----------------------------------------------------------------------
+N685MA Windshear TCAS & Mode S Installation Data
+-----------------------------------------------------------------------
+N685MA Fleet Campaign's
+-----------------------------------------------------------------------
+N685MA Structural Repairs
+-----------------------------------------------------------------------
+N685MA Landing Gear Records
+-----------------------------------------------------------------------
+N685MA Weight & Balance Data
+-----------------------------------------------------------------------
+N685MA Markair Kardex File (Parts Tags)
+-----------------------------------------------------------------------
+
+
+
+
+
+
+ EXHIBIT B-2
+ to
+ Aircraft Lease Agreement
+ ------------------------
+
+ AIRCRAFT DOCUMENTS ON THE RETURN OCCASION
+ -----------------------------------------
+
+ All the Aircraft Documents delivered on the Delivery Date or any other
+date (including without limitation those listed in Exhibit B-1) shall be
+returned to Lessor along with the following:
+
+A. Manuals (in U.S. units) to be delivered with aircraft, as required:
+
+ 1. FAA Approved Airplane Flight Manual.
+ 2. Weight and balance control and loading manual with supplements.
+ 3. Operations Manual.
+ 4. Aircraft Maintenance Manual.
+ 5. DDPG including MEL.
+
+B. In addition, one copy of each of the following manuals will be delivered
+ on the return occasion.
+
+ 1. Structural repair manual and supplements.
+ 2. Aircraft/engine/spare engine/APU Maintenance manual.
+ 3. Aircraft/engine/spare engine/APU illustrated parts catalog.
+ 4. Aircraft/engine/spare engine/APU wiring diagram, hook-up charts, wire
+ list and equipment list.
+ 5. Maintenance and inspection procedures manual.
+ 6. General Policies & Procedures Manual.
+
+C. Airworthiness Directives Documentation. In accordance with 14 C.F.R.
+ Part 121.380, Lessee shall deliver to Lessor on the return occasion all
+ records associated with compliance airworthiness directives ("A.D."):
+
+ 1. A single, complete and current A.D. status list of the airframe,
+ appliance, engines, spare engines and APU Airworthiness Directives and
+ mandatory FAA regulation applicable to the Airframe, Engines, Spare
+ Engines and APU. This list shall be provided in a format acceptable to
+ buyer and shall include, but not limited to:
+
+ a. A.D. number and revision number.
+ b. A.D. title.
+ c. Aircraft serial number, engine serial number and APU serial number.
+ d. Engineering documentation reference.
+ e. Manufacturers service bulletin references and cross-references, where
+ appropriate.
+
+
+
+
+
+ f. Specifications of terminated or repetitive status.
+ g. Date of initial accomplishment.
+ h. Date of last maintenance accomplishment, if repetitive.
+ i. Name and serial number of the internal maintenance form used to
+ document accomplishment, if applicable.
+ j. Specification of means by which compliance was accomplished
+ (e.g., modified, repaired, inspected).
+
+ The list shall be typed, certified and signed by an authorized quality
+assurance representative of the Lessee.
+
+ 2. Legible copies of the completion documents that accomplish each A.D.
+If the A.D. is a repetitive inspection, documentation of the last
+accomplishment is sufficient. These documents must have the date of
+accomplishment, the signature of a certified mechanic and/or inspector and
+the mechanic's/inspector's certificate number or the repair station number
+of the mechanic accomplishing the work. The document must reference the A.D.
+number and company authorization which covered the A.D.
+
+ 3. Exemptions or deviations granted by the FAA (or equivalent) to Lessor
+or Lessee, as the case may be, on A.D. compliance, including copy of
+exemption request.
+
+ 4. Items 2 or 3 will be provided in individual document packages for
+each A.D. Each package will contain all documents relative to that
+A.D./Aircraft combination.
+
+D. Engineering Documentation
+
+ 1. A single, current list of airframe, engines, spare engines and APU
+ service bulletins, engineering orders, major repairs and supplemental
+ type certificates completed on the airframe, each engine, spare engine
+ or the APU including references to engineering documentation, shall be
+ provided with information similar to that described in item C.1 above.
+
+ 2. A current copy of all engineering documentation related to aircraft
+ alterations and repair configuration changes. This shall include
+ documentation for work done by any previous owner or operator, the
+ manufacturer or any vendor. An index shall be provided. A current copy
+ of all repairs that require follow-up action.
+
+ 3. Data package covering all non-manufacture/non-FAA approved repairs or
+ alterations, including the submittal to the FAA for an STC or Form 337,
+ if applicable.
+
+ 4. All open engineering deviations or material review board (MRB) records
+ applicable to the airframe, engines, spare engines, APU's, parts,
+ components and piece parts.
+
+ -2-
+
+
+
+
+
+E. Additional Documentation
+
+ 1. Maintenance program document.
+ 2. Location map of emergency equipment, with description.
+ 3. Interior configuration drawings.
+ 4. Weight and balance current status.
+ 5. FAA Export Certificate of airworthiness, if applicable.
+ 6. FAA certification of all components.
+ 7. FAA import certificate of airworthiness for all non-U.S. spare parts.
+ 8. Complete set of maintenance operations specifications.
+ 9. Master aircraft flight log.
+
+F. Individual aircraft, engine and spare engine records
+
+ 1. Major structural damage reports, if any.
+ 2. FAA Form 337, or equivalent, for major repair and alterations, if any.
+ 3. Engine/spare engine last shop visit report.
+ 4. Documentation and records concerning the last aircraft overhaul.
+ 5. Engine/spare engine trend monitoring data.
+ 6. Log books for last 12 months.
+ 7. List of open items, if any.
+ 8. Declaration for aircraft accident and major repairs, if any.
+ 9. Listing of aircraft, engine and spare engine components status by
+ P/N - S/N description position - TBO - TSI - TSO - total time.
+ 10. Engine or spare engine life limited parts status and history (certified).
+ 11. Engine or spare engine log book, if applicable.
+ 12. APU - same documentation as items F. 10 and 11 above.
+ 13. Delivery letter w/serial number, total time/total cycles, time to the
+ next inspection and the time to the inspection or removal of
+ engines/spare engines or components.
+ 14. All engines, spare engines and APU records, up to and including the
+ last major overhaul or heavy maintenance and shop visits.
+ 15. Manufacturers delivery document for engines, spare engine and APU.
+ 16. Last test cell run documents for engines, spare engines and APU.
+ 17. Borescope history for current installation for engines and spare
+ engine, if applicable.
+ 18. Aircraft readiness log (manufacturers).
+ 19. Component records - time controlled components historical records
+ w/installation and serviceability tags.
+
+ -3-
+
+
+
+
+
+ EXHIBIT C-1
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ FINANCIAL INFORMATION
+
+Basic Rent: The Basic Rent payable monthly in advance on
+ each of the Basic Rent Payment Dates shall be
+ $115,279.80 per month.
+
+Basic Rent Payment Date: The Delivery Date and the date in each
+ subsequent month that numerically corresponds
+ to the Delivery Date; PROVIDED, that if any
+ such subsequent month does not have a date
+ that numerically corresponds to the Delivery
+ Date, the last day of such month; PROVIDED,
+ FURTHER, that if such date is not a Business
+ Day, then the Basic Rent Payment Date shall
+ be the next preceding Business Day.
+
+Casualty Value: As at the Delivery Date and for the one year
+ period following the Delivery Date, the
+ Casualty Value shall be $14,000,000, and the
+ Casualty Value shall be reduced by $500,000
+ effective on each anniversary thereof which
+ reduced Casualty Value shall remain effective
+ for the one year period following the date of
+ such reduction, as follows:
+
+
+
+ DATE OF CASUALTY
+ REDUCTION VALUE
+ --------------- -----------
+
+ Delivery Date $14,000,000
+ 1st anniversary 13,500,000
+ 2nd anniversary 13,000,000
+ 3rd anniversary 12,500,000
+ 4th anniversary 12,000,000
+ 5th anniversary 11,500,000
+ 6th anniversary 11,000,000
+ 7th anniversary 10,500,000
+ and after
+
+
+Security Deposit: Means a cash deposit in the amount of U.S.
+ $115,000 paid by Lessee to Lessor or the
+ Beneficiaries on November 10, 1995, together
+ with any increases in such amount pursuant
+ to Section 21(a) of the Lease and the
+ following definition of Security Deposit
+ Adjustment.
+
+Security Deposit Adjustment: The Security Deposit Adjustment Amount shall
+ be the amount, if any, by which the amount of
+ monthly Basic Rent specified above exceeds
+ the amount of the Security Deposit specified
+ above ($279.80), and an amount equal to such
+ difference shall be paid by Lessee to the
+ Beneficiaries on the Delivery Date for
+ application to the Security Deposit.
+
+Security Letter of Credit: Means an irrevocable letter of credit or
+ guarantee in the amount at all times the
+ greater of US$ 230,000 and an amount equal to
+ the aggregate of two (2) months Basic Rent
+ payments ($230,559.60), issued by an
+ institution and in form and substance
+ acceptable to Lessor and Beneficiary.
+
+
+
+
+ EXHIBIT C-2
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ DEFINITIONS AND VALUES
+
+Beneficiary: Means each of ITOCHU AirLease (Europe)
+ Limited and Marubeni Airleasing (U.K.)
+ Limited as beneficiary under the Trust
+ Agreement, together in each case with its
+ successors and permitted assigns under the
+ Trust Agreement.
+
+Beneficiary's Address: For ITOCHU AirLease (Europe) Limited:
+
+ Itochu AirLease (Europe)
+ Limited
+ 1, Earlsfort Centre
+ Hatch Street
+ Dublin 2
+ Ireland
+
+ Fax: 353-1-661-3278
+ Attention: Secretary
+
+ with copy to:
+
+ ITOCHU AirLease Corporation
+ NXB Aoyama Building 5 Fl.,
+ 26-37 Minami-Aoyama 2-Chome,
+ Minato-Ku,
+ Tokyo,
+ Japan
+
+ Telex: 2423154 TKAFCJ
+ Fax: (813) 3497 8145
+ Attention: the President
+
+ For Marubeni Airleasing (U.K.)
+ Limited:
+
+ 120 Moorgate
+ London EC2M 6SS
+ United Kingdom
+ Telecopy No.: (44) 171-826-8835
+
+Deductible Amount: $500,000
+
+Delivery Location: such state in the continental United States
+ reasonably determined by the
+
+
+
+
+Trustee: means Wilmington Trust Company, a Delaware
+ banking corporation, not in its individual
+ capacity but solely as trustee under the
+ Trust Agreement or any successor Trustee
+ thereunder.
+
+ -6-
+
+
+
+
+
+ COUNTERPART NO.____
+
+ EXHIBIT D
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ LEASE SUPPLEMENT NO. 1
+
+ TO AIRCRAFT LEASE AGREEMENT
+ DATED AS OF NOVEMBER 29, 1995
+
+ LEASE SUPPLEMENT NO. 1, dated December 1, 1995, between
+Wilmington Trust Company, not in its individual capacity but solely as Trustee
+f/b/o ITOCHU AirLease (Europe) Limited and Marubeni Airleasing (U.K.) Limited
+("Lessor"), and Aloha Airlines, Inc., a company organized under the laws of
+Hawaii ("Lessee").
+
+ Lessor and Lessee have previously entered into that certain
+Aircraft Lease Agreement dated as of November 29, 1995 (herein called the
+"Lease" and the defined terms therein being hereinafter used with the same
+meaning). The Lease provides for the execution and delivery from time to time
+of a Lease Supplement substantially in the form hereof for the purpose of
+leasing the aircraft described below under the Lease as and when delivered by
+Lessor to Lessee in accordance with the terms thereof.
+
+ The Lease relates to the Aircraft, Parts and Engines as more
+precisely described below. A counterpart of the Lease is attached hereto and
+this Lease Supplement and the Lease shall form one document.
+
+ In consideration of the premises and other good and
+sufficient consideration, Lessor and Lessee hereby agree as follows:
+
+ 1. Lessor hereby delivers and leases to Lessee under the
+Lease and Lessee hereby accepts and leases from Lessor under the Lease, that
+certain Boeing Model B-737-25A commercial jet aircraft, Airframe and the two
+Pratt & Whitney JT8D-17A Engines (each of which engines has 750 or more rated
+takeoff horsepower or the equivalent of such horsepower) described in Schedule
+1 attached hereto and made a part hereof (the "Delivered Aircraft"). The
+status of the Aircraft and Engines upon delivery thereof to Lessee shall be as
+described on Schedule 2 attached hereto and made a part hereof.
+
+ 2. (A) The Delivery Date of the Delivered Aircraft is the
+date of this Lease Supplement set forth in the opening paragraph hereof.
+
+ (B) The time of the delivery and acceptance of the
+Delivered Aircraft is ____:____ __. m., at which time the
+
+
+
+
+Delivered Aircraft is located in the airspace over the State of Texas, United
+States of America (such location is herein called the "Delivery Location").
+
+ 3. The Term for the Delivered Aircraft shall commence on the
+Delivery Date and shall end on the Expiration Date, which shall be December 1,
+2003.
+
+ 4. The amount of Basic Rent for the Delivered Aircraft is
+payable in advance on each Basic Rent Payment Date.
+
+ 5. Lessee hereby confirms to Lessor that (i) the Delivered
+Aircraft and each Engine installed thereon or belonging thereto have been duly
+marked in accordance with the terms of Section 6(f) of the Lease, (ii) Lessee
+has accepted the Delivered Aircraft for all purposes hereof and of the Lease
+and (iii) Lessee has inspected the Delivered Aircraft and the Delivered
+Aircraft satisfies the conditions set forth in the Lease.
+
+ 6. Lessor represents that all of its representations and
+warranties contained in the Lease are true and correct on the date hereof.
+
+ 7. All of the terms and provisions of the Lease Agreement are
+hereby incorporated by reference in the Lease Supplement to the same extent as
+if fully set forth herein.
+
+ 8. The Lease Supplement may be executed in any number of
+counterparts, each of such counterparts, except as provided in Section 20 (e)
+of the Lease, shall for all purposes be deemed to be an original and all such
+counterparts shall together constitute but one and the same Lease Supplement.
+
+THIS LEASE SUPPLEMENT HAS BEEN EXECUTED IN MULTIPLE COUNTERPARTS, OF WHICH
+THERE IS ONE COUNTERPART DESIGNATED AS "COUNTERPART NO. 1 - CHATTEL PAPER
+ORIGINAL" AND ALL OTHER COUNTERPARTS SHALL BE DESIGNATED WITH NUMBERS OTHER
+THAN 1. TO THE EXTENT THAT THIS LEASE SUPPLEMENT CONSTITUTES CHATTEL PAPER
+UNDER THE UNIFORM COMMERCIAL CODE, AS IN EFFECT IN ANY APPLICABLE JURISDICTION,
+NO SECURITY INTEREST MAY BE CREATED OR PERFECTED BY POSSESSION OF ANY
+COUNTERPART OF THIS LEASE SUPPLEMENT OTHER THAN SAID COUNTERPART NO. 1.
+
+
+ -2-
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this
+Lease Supplement to be duly executed by their authorized officers as of the
+day and year first above written.
+
+ LESSOR:
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee
+
+ By:__________________________________
+ Name:
+ Title:
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+ By:__________________________________
+ Name:
+ Title:
+
+ By:__________________________________
+ Name:
+ Title:
+
+ -3-
+
+
+
+
+
+ SCHEDULE 1
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ BOEING 737-25A
+ AIRFRAME
+
+
+
+MANUFACTURER'S U.S. REGISTRATION
+SERIAL NO. NUMBER TOTAL TIME* TOTAL CYCLES*
+-------------- ----------------- ----------- -------------
+
+
+23791 N685MA 23,859 14,057
+ (to be changed to
+ N819AL)
+
+
+
+ INSTALLED PRATT & WHITNEY
+ ENGINES
+
+
+
+MODEL NO. SERIAL NO. TOTAL TIME* TOTAL CYCLES*
+-------------- ----------------- ----------- -------------
+
+
+
+JT8D-17A P709529 20,960 11,883
+
+JT8D-17A P709530 22,100 12,908
+
+
+
+ INSTALLED
+ AUXILIARY POWER UNIT
+
+
+
+
+MODEL NO. SERIAL NO. TOTAL TIME TOTAL CYCLES
+-------------- ----------------- ----------- -------------
+
+GTCP85-129 P35274 (TACH Reading)
+
+
+
+ITEM PART NUMBER SERIAL NUMBER DESCRIPTION POSITION/QUANTITY
+ NO.
+
+ 41 65-52805-317 D01011 Flight Inst Acc Unit E&E 1
+ 42 172938-001 6624/6615 Switch Unit E&E 4
+ 43 6595/2678 E&E
+ 44 2590650-901 05872772 Mach Trim Comp E&E 1
+ 45 2588810-904 01851529 Pitch Computer E&E 1
+ 46 2588812-902 06871639 Roll Computer E&E 1
+ 47 65-52812-107 D00496 Auto Pilot Acc Unit E&E 1
+ 48 457300-0430 3394 Omega RPU E&E 1
+ 49 25582-003 8305313 Battery E&E 1
+ 50 2587335-11 84054627 Vert Gyro E&E 1
+ 51 2587335-11 8415034 Vert Gyro E&E 1
+ 52 2587335-11 0034163 Vert Gyro E&E 1
+ 53 2588302-4 0067044 Direction Gyro E&E 1
+ 54 2588302-4 84056507 Direction Gyro E&E 1
+
+
+
+
+
+
+ SCHEDULE 2
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ Aircraft Status
+ ON THE DELIVERY DATE
+
+AIRFRAME HEAVIEST CHECK ("D" CHECK OR EQUIVALENT):
+
+
+
+
+ Interval: 28,000 (Hrs)
+ Time Remaining: 4,141
+
+LANDING GEAR OVERHAUL:
+
+ Interval: Main Gear 24,000 cycles
+ Nose Gear 24,000 cycles
+
+ Time Remaining: Left Gear 9,943 cycles
+ Right Gear 9,943 cycles
+ Nose Gear 9,943 cycles
+
+ENGINES:
+
+ Overhaul Interval:
+ Time Remaining (S/N P709529):
+ Time Remaining (S/N P709530):
+
+ Hot Section Inspection:
+
+ Interval:
+ Time Remaining (S/N P709529):
+ Time Remaining (S/N P709530):
+
+TIME REMAINING TO FIRST RESTRICTION:
+
+ Engine S/N: P709529
+ Hours: 9,040
+ Cycles: 3,117 (C07)
+ Engine SIN: P709530
+ Hours: 7,900
+ Cycles: 2,092 (C07)
+
+
+
+
+
+
+
+
+
+AUXILIARY POWER UNIT:
+
+ Hot Section Inspection:
+
+ Interval: On Condition
+ Time Remaining: On Condition
+
+ Time Remaining to Next Limited
+ or Scheduled Removal: On Condition
+
+
+
+ -2-
+
+
+
+
+
+ SCHEDULE 3
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ Aircraft Status on The Delivery Date
+
+ Time and Calendar Controlled Components
+ (excluding engines, landing gear, and APU).
+
+
+ Effective date: 10/20/95
+
+
+
+ AIRCRAFT
+COMPONENT PART HR.
+POSITION NUMBER SERIAL # LIMIT TSI REMAINING
+--------- -------- --------- ------- ---------- ---------
+
+
+ATA 21
+------
+Valve, Safety 720737-5 L-0001 14000H 13,817.6 3,977.2
+RH OH 7/3/92
+Valve, Safety 720737-5 830678 14000H 13,817.6 3,977.2
+LH OH 7/3/92
+Gasper Fan 21520-1 063 6000H 22,594.7 4,735
+ OH 6/15/95
+
+ATA 23
+------
+Recorder, 93A100-80 52428 72 Mo. 14, 623.2H 2 Yrs., 6 Mo.
+Voice RPL
+(Battery)
+
+ATA 24
+------
+APU Generator 976J498-1 YK13122 6000H 20,471 2,602H
+ HT 10/20/94
+Panel Bus 915F213-2 WE1347B 6000H 19,313.9 1,454H
+Protection BC 6/11/94
+Gen. Cont. 948F458-1 NM4963 6000H 19,616.8H 1,757H
+ Unit BC 7/18/94
+ APU
+Gen. Cont. 948F458-1 PM5071 6000H 23,697H 5,838H
+ Unit BC 10/3/95
+ Left
+Gen. Cont. 948F458-1 XJ2186 6000H 21,639.9H 3,780.9H
+ Unit BC 2/18/95
+ Right
+
+ATA 25
+------
+Life Vest KSE-35L8 527-691 24 Mo. 8/30/95 9 Mos.
+ Rt. Aft OH 23,304H
+Life Vest AV-35 541-064 24 Mo. 2/95 14 Mos.
+ Co-pilot O H 21,530H
+Slide, Evac D31354- 1112 24 Mo. 8/23/94 10 Mos.
+ Rt. Aft 425 OH 19,939.7H
+
+
+
+
+
+
+
+ SCHEDULE 3
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ Aircraft Status on The Delivery Date
+
+ Time and Calendar Controlled Components
+ (excluding engines, landing gear, and APU).
+
+
+
+
+
+
+
+
+ EXHIBIT E
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ RETURN CONDITION REQUIREMENTS
+
+On the Return Occasion the Aircraft, subject to fair wear and tear generally,
+will be in the following condition:
+
+1 GENERAL CONDITION
+
+The Aircraft shall:
+
+ (a) be clean by United States commercial airline standards;
+
+ (b) have installed the full complement of Engines, Parts and
+ other equipment, parts and accessories and loose equipment
+
+ (i) which were installed on the Aircraft at the Delivery Date (or
+ any replacements thereof made in accordance with the terms of
+ this Lease);
+
+ (ii) as would be required by this Lease to be installed on the
+ Aircraft if Lessee were to continue operating the same in
+ continued regular service;
+
+ (iii) each operating within limits approved by the FAA and fully
+ functioning in accordance with its intended use;
+
+ (iv) which are in a condition suitable to permit immediate
+ operation of the Aircraft in commercial service under the
+ FAA; and
+
+ (v) be in strict compliance with the Maintenance Program,
+ including the corrosion prevention control program
+ (CPCP/D6-38528) and the aging aircraft program (D6-38505 as
+ applicable per the airworthiness directive or per
+ effectivity).
+
+ (c) (i) have in existence a current, valid and existing FAA
+ certificate of airworthiness for airline operation or an FAA
+ export certificate of airworthiness, as elected by the
+ Beneficiaries in accordance with the terms of Section 16(d);
+
+ (ii) comply with (A)(1), all the requirements of US FAR Part 36
+ Appendix C (Stage II) noise compliance (without waiver,
+ restriction, deferment or exception) and (2) all the
+ requirements for operation under United States FAR Part 121
+ or (B)(1) if requested by the Beneficiaries in order to
+ comply with the requirements of any other country selected by
+ the Beneficiaries which follows the FAA rules and regulations
+ contemplated by U.S. FAR Part 129, U.S. FAR
+
+
+
+
+
+ Part 121 and U.S. FAR Part 36, Appendix C and (2) all civil
+ aviation regulations, orders, rules and airworthiness
+ directives of such country to permit the immediate
+ re-registration and securing of a certificate of
+ airworthiness in such country for the Aircraft (provided that
+ (i) such regulations, orders, rules and Airworthiness
+ Directives are no more restrictive than those of the FAA, and
+ (ii) to the extent the work, standards or requirements
+ required to comply with this clause (B) is beyond the work,
+ standards or requirements otherwise required by Section 16 or
+ this Exhibit E, such work, standards or requirements will be
+ performed pursuant to work orders and workscopes agreed upon
+ by Lessee and the Beneficiaries and at the Beneficiaries'
+ cost, and the Term will not be deemed extended as a result of
+ such additional work). If requested to do so by the
+ Beneficiaries, Lessee shall cooperate reasonably to comply
+ with all requirements for domestic and over-water operations
+ under U.S. FAR Part 129, provided that (i) the Beneficiaries
+ inform Lessee of such requirements, (ii) Lessee and the
+ Beneficiaries mutually agree upon the work required for such
+ compliance and (iii) the Beneficiaries bear the cost of such
+ compliance to the extent such compliance requires parts or
+ labor not otherwise required to be performed by Lessee
+ pursuant to Section 16 or this Exhibit E, and the Term will
+ not be deemed extended as a result of such additional work
+ solely to comply with the requirements for domestic and
+ over-water operations under U.S. FAR Part 129.
+
+ (d) (i) comply with the Detailed Specifications except and to the
+ extent that such specifications have been altered as required
+ or permitted by this Lease;
+
+ (ii) except as otherwise provided in the Lease or in the
+ Supplemental Agreement or as consented to by each
+ Beneficiary, shall be in substantially the same configuration
+ (including but not limited to, interior seating
+ configuration, galleys and lavatories) as when the Aircraft
+ was originally delivered to Lessee hereunder, save where
+ changes have been made which do not impair its marketability
+ or decrease its value, as determined by each Beneficiaries'
+ sole determination;
+
+ (e) (i) have had carried out on the Aircraft the final
+ completion and termination of all open, deferred or
+ continued maintenance items, and shall have had completed, on
+ a terminating action basis, where terminating action is due
+ for compliance, all outstanding mandatory, required, ordered
+ and "earliest convenience" airworthiness directives issued by
+ the FAA and all mandatory, alert, earliest convenience and
+ recommended service bulletins (so long as the Manufacturer
+ issues such service bulletins) (insofar, in the case of
+ recommended service bulletins, as required by the
+ Beneficiaries after consultation with Lessee, in which case
+ any such recommended service bulletin shall be carried out
+ (y) at Lessee's expense if Lessee shall be carrying or shall
+ have carried out such service bulletin with respect to a
+ majority of aircraft comprising its 737 fleet or (z) at the
+ Beneficiaries' expense pursuant to the workscope or work
+ order agreed upon by the Beneficiaries and Lessee, if Lessee
+ shall not be carrying out such service bulletin with respect
+ to a majority of aircraft comprising its 737 fleet) issued by
+ the Manufacturer in
+
+
+ -2-
+
+
+
+
+
+ each case which have a compliance or termination date on or
+ before the date three (3) months (six (6) months upon request
+ of and at the cost of the Beneficiaries and the Term will not
+ be deemed extended solely as a result of such additional work
+ required to terminate the airworthiness directives that are
+ due after three (3) months following the Return Occasion and
+ requested by the Beneficiaries) after the date of the Return
+ Occasion (without waiver, restriction, deferment, exception,
+ carry over, watch or repetitive status items or being on
+ watch, except, in the case of airworthiness directives, (i)
+ as expressly provided in the Supplemental Agreement or (ii)
+ as Lessor shall otherwise specify in writing to Lessee);
+
+ (ii) have had permanently and properly repaired any damage to
+ the Aircraft caused by ground handling equipment or impact
+ damage caused by foreign objects which occurred during the
+ Term that exceeds Manufacturer's limits for operation without
+ restrictions or repetitive inspections;
+
+ (iii) not have any open, deferred, continued or placarded log book
+ items;
+
+ (f) all exterior logos and other proprietary marks shall be removed or
+ painted over in accordance with proper industry practice and in a
+ workmanlike manner; provided that, at the Beneficiaries' request,
+ Lessee shall have Lessee's paint and markings stripped and
+ re-painted white or in the livery selected by the Beneficiaries so
+ long as the Beneficiaries bear the cost of such stripping and
+ repainting, as mutually agreed by the Beneficiaries and Lessee
+ Lessor shall be responsible for supplying the new livery template
+ and sufficient paint; and the Term will not be deemed extended
+ solely as a result of the additional work required at the
+ Beneficiaries' request;
+
+ (g) have all signs and decals in English, clean, secure and legible;
+
+ (h) have heretofore been maintained in accordance with Section 6(d) of
+ this Lease with the same care and consideration for the technical
+ condition of the Aircraft as if it were to have been kept in
+ continued regular service and operation by Lessee (it being
+ understood that the obligations hereby imposed upon Lessee by
+ reference to the care and consideration by Lessee as if the
+ Aircraft were to be kept in continued regular service is not
+ intended to supersede Lessee's obligations herein with respect to
+ the specific and express terms of this Lease with respect to the
+ maintenance, repair, alteration or return of the Aircraft);
+
+ (i) have had all repetitive airworthiness directives and mandatory
+ orders and regulations in at least half-life or better condition;
+ and
+
+ (j) have, upon return of the Aircraft, returned at least one (1)
+ complete set of the Aircraft Documents related to the Aircraft and
+ its maintenance and operation records in up-to-date status.
+
+Lessee shall deliver to Lessor, at no cost to Lessor or the Beneficiaries,
+all service bulletin kits relating to the Aircraft which have been furnished
+to Lessee free of charge by any manufacturer for installation on the Aircraft
+and which have not been so installed. In the event that such uninstalled
+
+
+ -3-
+
+
+
+
+
+ Beneficiaries, taking into consideration
+ and determination Tax consequence to Lessor
+ and the Beneficiaries, which state shall be
+ specified in the Lease Supplement.
+
+Engine Manufacturer: Pratt & Whitney.
+
+Estimated Delivery Date: November 30, 1995.
+
+Last Basic Rent The Last Basic Rent Payment
+Payment Date: Date shall be the ninety-sixth (96th)
+ Basic Rent Payment Date.
+
+Lease Identification: This [Aircraft] [Engine] is owned by
+ Wilmington Trust Company, not in its
+ individual capacity but solely as Trustee
+ for the benefit of ITOCHU AirLease
+ (Europe) Limited and Marubeni Airleasing
+ (UK) Limited, is leased to Aloha Airlines,
+ Inc. and may not be operated by a third
+ party without the prior written consent
+ of Wilmington Trust Company, as trustee.
+
+Lessee's Address: Aloha Airlines, Inc.
+ 371 Aokea Street
+ Honolulu, Hawaii 96819
+ Fax No.: (808) 833-3100
+ Attention: Vice President -
+ Finance and Chief Financial
+ Officer
+
+Lessor's Address: Wilmington Trust Company
+ Rodney Square North
+ Wilmington, Delaware 19890
+ Attention: Corporate Trust
+ Administration
+
+ Fax No.: (302) 651-8882
+
+Manufacturer: The Boeing Company.
+
+Other Lease: Means each of Aircraft Lease Agreement
+ A, dated as of December 1, 1991, between
+ Lessee and First Security Bank of Utah,
+ National Association, as owner trustee,
+ and Aloha Airlines, Inc., as lessee, and
+ Aircraft Lease Agreement B, dated as of
+ December, 1991,
+
+
+ -2-
+
+
+
+
+
+ between First Security Bank of Utah,
+ National Association, as owner trustee,
+ and Aloha Airlines, Inc., as lessee, as
+ each may be supplemented or amended
+ from time to time. The Other Leases relate
+ to the leasing to Lessee of aircraft other
+ than the Aircraft.
+
+Overdue Rate: One month LIBOR, as in effect from time to
+ time, plus 3-1/2% per annum, but not to
+ exceed the maximum amount permitted by Law.
+
+Payment Locations: For ITOCHU AirLease (Europe) Limited:
+
+ Account Name: Citibank New York (ABA No.
+ 021000089) for the account of Citibank
+ Dublin (account no. 10994598) in favor of
+ ITOCHU AirLease (Europe) Limited (account
+ no. 1-00-6793-017)
+
+ For Marubeni Airleasing (U.K.) Limited:
+
+ Harris Bank International Corporation
+ ABA #026-007-760
+ for the credit of
+ The Mitsubishi Trust & Banking Corporation
+ London Branch A/C#16011100
+ UID No.107280
+ for further credit to
+ Marubeni Airleasing (UK) Ltd.
+ Account #020-404391
+
+ With respect to payments by
+ Lessee of Basic Rent hereunder, 62.682% of
+ such amounts shall be paid to ITOCHU
+ AirLease (Europe) Limited (as above
+ provided) and 37.318% of such amounts
+ shall be paid to Marubeni Airleasing (U.K.)
+ Limited (as above provided).
+
+Permitted Jurisdictions: Any member country of the European Union
+ and the following:
+
+
+ -3-
+
+
+
+
+
+
+ Argentina
+ Australia
+ Austria
+ Brazil
+ Canada
+ Chile
+ Finland
+ Hong Kong
+ Iceland
+ New Zealand
+ Norway
+ Puerto Rico
+ Singapore
+ South Africa
+ Sweden
+ Switzerland
+ United States of America
+
+Permitted Sublessees: Aer Lingus
+ Air Canada
+ Air France
+ Air-Inter
+ Air New Zealand
+ Air Portugal
+ Alitalia
+ All Nippon Airways
+ Ansett
+ Austrian Airlines
+ Braathens
+ British Airways
+ British Midland Airways
+ Canadian Airlines International
+ Cathey Pacific
+ China Airlines
+ Condor
+ Iberia
+ Japan Airlines
+ Japan Air System
+ Japan Asia Airways
+ Korean Air
+ Lufthansa
+ Malaysian Airlines System
+ Martinair Holland
+ Qantas
+ Royal Dutch Airlines
+ Sabena
+ Scandinavian Airlines System
+ Singapore Airlines
+ SwissAir
+ Thai Airways International
+ Alaska Airlines
+ Continental Airlines
+ Delta Airlines
+ Southwest Airlines
+ United Air Lines
+ USAir
+
+ -4-
+
+
+
+
+
+ Provided always that, at any time during
+ the Term, the Beneficiaries may, by notice
+ to Lessee, notify Lessee that any such
+ Person no longer constitutes a Permitted
+ Sublessee (which determination shall be
+ made on a reasonable basis by the
+ Beneficiaries), whereupon such Person shall
+ no longer constitute a Permitted Sublessee.
+
+Public Liability and Five Hundred Million Dollars
+Property Damage Insurance: ($500,000,000) per occurrence or the
+ highest amount carried by Lessee with
+ respect to other narrow body aircraft in
+ its fleet.
+
+Return Location: Such location in the Western Continental
+ United States as may be selected by
+ Lessor. If the Beneficiaries request
+ Lessee to return the Aircraft to a
+ location other than in Western
+ Continental United States, other than
+ following a Default or an Event of Default,
+ then Lessee and the Beneficiaries will
+ cooperate reasonably and in good faith to
+ arrange for such ferry flight, and the
+ Beneficiaries will bear any additional
+ ferry flight cost and expenses incurred by
+ Lessee (without markup) that are beyond
+ those costs and expenses that would have
+ been incurred if the Aircraft were ferried
+ to a location in the Western United States.
+
+Supplemental Agreement: means the Letter Agreement No. 1 dated as
+ of November 29, 1995 executed by the parties
+ hereto.
+
+Trust Agreement: means Trust Agreement N685MA, dated as of
+ December 27, 1991 between Wilmington Trust
+ Company and each Beneficiary, as amended
+ modified or supplemented from time to time.
+
+
+ -57-
+
+
+
+
+
+kits were purchased, manufactured or ordered by Lessee and in stock or
+undelivered pursuant to purchaser order, then Lessor shall have a right to
+purchase such kits from Lessee, at Lessee's cost and expense (without mark-up),
+such right being exercisable at any time during the sixty (60) day period
+following the return of the Aircraft; provided that such period will be
+extended to one hundred eighty (180) days if such kit is ordered by Lessee upon
+request by the Beneficiaries.
+
+Lessee shall have undertaken to use all reasonable endeavors (short of
+grounding any aircraft or rescheduling any flight) to ensure that the Aircraft
+shall have, on return of the Aircraft, no installed Part which shall have a
+total time since new greater than 150% that of the Airframe. In any event,
+Lessee shall not adversely discriminate against the Aircraft regarding the age
+of Parts installed in the Aircraft compared with other aircraft operated by
+Lessee.
+
+2 AIRCRAFT MAINTENANCE ON RETURN
+
+Prior to the Return Occasion and immediately prior to the return of the
+Aircraft to Lessor, at Lessee's own expense:
+
+ (a) AIRFRAME
+
+ (i) Lessee shall, if requested by a Beneficiary, perform, or
+ procure the performance of, a full and complete C Check or its
+ equivalent including all phases, multiples and lower checks
+ in accordance with the Maintenance Program. Lessee shall
+ correct any deficiencies revealed during such C Check
+ inspection using Manufacturer's limits and guidelines, as
+ witnessed by the Beneficiaries' representative. Lessor will
+ contribute to the cost of such C Check or equivalent by
+ paying to Lessee the incrementary cost (without markup) to
+ perform such C Check or equivalent.
+
+ (ii) The Airframe shall have no less than 4,000 Flight Hours
+ remaining to the next due D Check and no less than 1,500
+ Flight Hours remaining to the next due C Check under Lessee's
+ Maintenance Program; PROVIDE, HOWEVER, that Lessee shall be
+ permitted to return the Aircraft with less than 4,000 Flight
+ Hours remaining until the next due D Check under Lessee's
+ Maintenance Program if (A) at time of the Return Occasion,
+ the Beneficiaries have entered into an agreement for the
+ lease or sale of the Aircraft with a subsequent lessee, buyer
+ or user of the Aircraft (a "Subsequent User"), and (B) the
+ bridging of the Maintenance Program to the Subsequent User's
+ maintenance program is approved in writing by the civil
+ aviation authority having jurisdiction over the Subsequent
+ User in a manner that permits the Subsequent User to actually
+ have available to it as of the Return Occasion no less than
+ 4,000 Flight Hours remaining until the next due D check or
+ its equivalent in accordance with the Subsequent User's
+ maintenance program.
+
+ (iii) In addition to (i) above, if the Beneficiaries request that
+ Lessee perform, or procure the performance of, D Check or
+ equivalent, Lessee will perform, or procure the performance
+ of, such D Check or equivalent, with Lessor contributing to
+ the cost of such D Check or equivalent by paying to Lessee
+ the incrementary cost (without markup) to perform such D
+ Check or equivalent; provided that the Term will not be
+ deemed to be extended solely because of such D Check or
+ equivalent.
+
+ (b) The condition of the Aircraft and installed systems upon return to
+ Lessor shall be as follows:
+
+
+
+
+
+ (i) each Engine and each life limited Part therein shall have no
+ less Flight Hours or Cycles or time (whichever is applicable
+ and the more limiting factor) remaining until the next due
+ scheduled removal, shop visit or overhaul refurbishment as on
+ the Delivery Date, and the Engines shall have not less than
+ 2,250 Flight Hours and 1,500 Cycles remaining (whichever is
+ the more limiting factor) until the next scheduled
+ refurbishment and each life limited part within the Engines
+ shall have not less than 3,000 Flight Hours and 2,000 Cycles
+ remaining to the next scheduled replacement.
+
+ (ii) the APU and each life limited Part therein (if applicable)
+ shall have no less hours or time (whichever is applicable and
+ the more limiting factor) remaining until the next due
+ scheduled removal and overhaul as on the Delivery Date, and
+ Lessee shall have the option to swap APUs to meet the
+ requirements of this paragraph (ii);
+
+ (iii) the Landing Gear of the Aircraft and other time or life limited
+ parts of the Landing Gear shall have no less Flight Hours or
+ Cycles or any other life limit (whichever is applicable and
+ the more limiting factor) remaining until the next due
+ scheduled removal and overhaul as on the Delivery Date, and
+ Lessee shall have the option to swap Landing Gear to meet the
+ requirements of this paragraph (iii).
+
+3 OTHER MATTERS
+
+ (a) Provided necessary material and instructions are received in due
+ time (other than materials available by Lessee), any further work
+ required by the Beneficiaries, which can be accommodated within
+ the days already planned by Lessee for the above inspection and
+ check as set forth in paragraph 2(a) above, shall be performed, or
+ performance thereof shall be procured, by Lessee during the above
+ check and Lessor shall reimburse Lessee for the incremental costs
+ incurred (without mark-up).
+
+ (b) Each and every Part and time or calendar limited Part (including,
+ without limitation, loose equipment) will have no less life
+ remaining to the next scheduled removal, repair and overhaul as on
+ the Delivery Date in accordance with the Maintenance Program or
+ the recommended maintenance planning document of the manufacturer
+ of the relevant Part, and Lessee shall have the option to swap
+ Parts to meet the requirements of this paragraph (b).
+
+ (c) Each Engine and the APU will be installed on the Aircraft and
+ if not the engines or auxiliary power unit installed on the
+ Delivery Date will be accompanied by all documentation that the
+ Beneficiaries may reasonably require to evidence that title
+ thereto is properly vested in Lessor in accordance with this Lease
+ and will:
+
+ (i) have a power assurance run and a borescope inspection (or, in
+ case of the APU, such other inspection as in accordance with
+ Manufacturer's or APU manufacturer's recommendation) and, in a
+ case where an Engine does not meet the on-wing operating
+ criteria as required by Lessee's FAA approved engine
+ Maintenance Program and limits of the Engine section of the
+ Aircraft Maintenance Manual, if requested by Lessor, a test
+ cell inspection which shall be performed at Lessee's expense
+ with the Beneficiaries' representative entitled to be present
+ and Lessee shall provide evidence satisfactory to the
+ Beneficiaries (A) that the Engine meets the JT8D-17A limits
+ established by the Manufacturer according to the power
+ assurance check defined on the Aircraft
+
+ -5-
+
+
+
+
+ Maintenance Manual with an EGT margin of no less than 18
+ degrees centigrade at maximum rated takeoff power on a
+ standard day and (B) reflecting the correction of any
+ discrepancies from the guidelines set out by the Engine
+ Manufacturer which may be discovered during any such
+ inspection. All repairs shall be to the Beneficiaries'
+ satisfaction;
+
+ (ii) if the Engine historical and maintenance records and/or trend
+ monitoring data (or current test cell run report, if
+ applicable) indicate a rate of acceleration in performance
+ deterioration of any Engine including the APU which is higher
+ than normal based on Lessee's maintenance experience in
+ operating such engines, Lessee shall, prior to return, correct
+ or cause to be corrected such conditions which are determined
+ to have exceeded engine manufacturer's maintenance manual
+ tolerances or otherwise be causing such performance
+ deterioration; and
+
+ (iii) not have an Engine which is "on watch" for any reason
+ requiring any special or out of sequence inspection and each
+ such Engine shall comply with the operations specification of
+ Lessee without waiver, carry over, deferment, restriction or
+ exceptions.
+
+4 FUSELAGE, WINDOWS AND DOORS
+
+ (a) The fuselage will be free of all dents and abrasions,
+ additional scab patches am as at the Delivery Date and loose or
+ pulled or missing rivets, beyond the limits set in the relevant
+ structural repair manual (and all scab patches existing at the
+ Delivery Date shall be monitored and maintained, including as to
+ corrosion, during the Tenn);
+
+ (b) windows will be free of delamination, blemishes, crazing, beyond
+ limits set in the relevant structural repair manual and will be
+ properly sealed; and
+
+ (c) doors will be free moving, correctly rigged and be fitted with
+ serviceable seals.
+
+5 WINGS AND EMPENNAGE
+
+ (a) All leading edges will be free from damage, beyond limits
+ set in the relevant structural repair manual; and
+
+ (b) all control surfaces will be cleaned (provided they shall have
+ been waxed and polished during the preceding C Check);
+
+ (c) all unpainted cowlings and fairings will be cleaned (provided they
+ shall have been polished during the preceding C Check); and
+
+ (d) wings will be free of fuel leaks.
+
+6 INTERIOR
+
+ (a) Ceilings, sidewalls and bulkhead panels will be clean and free of
+ cracks and stains (fair wear and tear excepted);
+
+
+ -6-
+
+
+
+
+
+ (b) carpets and seat covers will be in good condition, clean and free
+ of stains, spots, chewing gum, tear or fraying (fair wear and tear
+ excepted) and meet FAA fire resistance regulations;
+
+ (c) seals and panels will meet FAA fire resistance regulations and
+ will be serviceable in good condition free of cracks and repainted
+ as necessary; and
+
+ (d) emergency equipment having no less calendar life remaining as on
+ the Delivery Date.
+
+7 COCKPIT
+
+ (a) All fairing panels shall be clean, free of stains and cracks (fair
+ wear and tear excepted), will be secure and repainted as necessary;
+
+ (b) floor coverings will be clean (fair wear and tear excepted) and
+ effectively sealed;
+
+ (c) seat covers will be in good condition, clean, free of stains (fair
+ wear and tear excepted) and will conform to FAA fire resistance
+ regulations; and
+
+ (d) seats will be serviceable, in good condition, meet FAA fire
+ resistance regulations and will be repainted as necessary.
+
+8 CARGO COMPARTMENTS
+
+ (a) All panels will be in good condition, fair wear and tear
+ excepted, clean and free of cracks and any panels which have
+ cracks will be replaced;
+
+ (b) all nets will be serviceable and in good condition, fair wear and
+ tear excepted; (c) any repair shall be within the limits
+ established in the Manufacturer's structural repair manual; and
+
+ (d) fabric cover shall be in good condition, fair wear and tear
+ excepted, free of stains, spots, tear or fraying.
+
+9 WHEEL WELLS AND UNDERCARRIAGE
+
+ Each wheel well and undercarriage will be clean and free of leaks. The
+ wheel wells will be repainted as necessary.
+
+10 CORROSION
+
+ (a) The Aircraft will have been inspected and treated with respect to
+ corrosion in strict adherence to that required by the Maintenance
+ Program;
+
+ (b) the entire fuselage will be substantially free from corrosion and
+ will be adequately treated and an approved corrosion prevention
+ program will be in operation; and
+
+
+ -7-
+
+
+
+
+
+ (c) fuel tanks will be free from contamination and, if applicable, a
+ corrosion and a tank treatment program will be in operation.
+
+11 EQUIPMENT CHANGES
+
+ Upon mutual discussion and agreement, Lessor may require Lessee to
+ remove any Removable Part (but excluding a Removable Part the
+ installation of which is a requirement of the FAA or this Lease) and
+ restore the Aircraft to its condition prior to installation of that
+ Removable Part. Where appropriate, compliance by Lessee with the
+ redelivery conditions in this Schedule shall be measured by reference to
+ the Boeing maintenance planning document.
+
+12 NO LESSOR OR BENEFICIARY COMPENSATION
+
+ Neither Lessor nor any Beneficiary shall be liable to Lessee, and Lessee
+ shall not be entitled to any compensation, with respect to the return of
+ the Airframe, the APU, any Landing Gear or any Part which, in any case,
+ has more Flight Hours, Cycles or time remaining or is otherwise in
+ better condition than the Flight Hours, Cycles, time remaining or
+ condition required by this Lease (including but not limited to Sections
+ 2(a)(ii), 2(b)(ii), 2(b)(iii) and 3(b) of this Section E).
+
+ -8-
+
+
+
+
+
+ EXHIBIT F
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ [Letterhead of Lessee's Counsel]
+
+ [Date of Delivery Date]
+
+The following is a summary of the items that should be included in Lessee's
+Counsel Opinion:
+
+ 1. Lessee is a corporation duly organized and existing under
+the laws of the State of Hawaii and has the corporate power and authority to
+carry on its business as presently conducted and to perform its obligations
+under the Lease.
+
+ 2. The execution, delivery and performance of the Operative
+Documents have been duly authorized by all necessary corporate action of Lessee
+and do not require any further approval of the Board of Directors or
+stockholders of Lessee or approval or consent of any trustee or holders of
+indebtedness or obligations of Lessee. Neither the execution and delivery
+thereof nor the consummation of the transactions contemplated thereby nor
+compliance by Lessee with any of the terms and provisions thereof will
+contravene the certificate of incorporation or by-laws of Lessee or, to our
+knowledge, any law applicable to Lessee and, to our knowledge, do not and will
+not result in any breach of, or constitute a default under, or result in the
+creation of any Lien upon any property of Lessee under, any credit agreement or
+instrument, or other agreement or instrument to which Lessee is a party or by
+which Lessee or its properties or assets are bound or affected.
+
+ 3. Neither the execution and delivery by Lessee of the
+Operative Document, nor the consummation of any of the transactions by Lessee
+contemplated thereby, nor the compliance by the Lessee with any of the terms
+and provisions thereof, requires the consent or approval of, or the giving of
+notice to, or the registration with, or the taking of any other action in
+respect of any federal or state governmental authority or agency, except for
+the registration of the Aircraft, recordation and other actions referred to in
+paragraph 6 below.
+
+ 4. The Operative Document have been duly executed and
+delivered by Lessee, and constitute the legal, valid and binding obligations of
+Lessee, enforceable against Lessee in accordance with their respective terms,
+subject, as to enforceability, to applicable bankruptcy, insolvency,
+reorganization, moratorium or other similar laws affecting the enforcement of
+creditors' rights generally, and, to general principles of equity.
+
+ 5. To our knowledge, there are no pending suits or proceedings
+before any court or any regulatory commission, board or other administrative
+governmental agency against or affecting Lessee which might have a materially
+adverse effect on the current business or financial condition of Lessee.
+
+ 6. Except for the appropriate registrations, recordings and
+filings with the FAA, the filing of a precautionary UCC-1 financing statement
+with the Hawaii Bureau of Conveyances and the placing on the Aircraft and on
+each Engine of the plates containing the legends referred to in Section 6(f) of
+the Lease
+
+
+
+
+
+Agreement, no further filing or recording of any document and no further action
+is necessary or desirable in order to protect Lessor's right, title and
+interest in and to the Aircraft as against Lessee or any third party.
+
+ 7. Lessee is a Certified Air Carrier as defined in the Lease
+Agreement.
+
+ 8. Lessor is entitled to the benefits of Section 1110 of Title
+11 of the United States Code with respect to the Aircraft and the Engines to
+the extent that they constitute an "airframe" or "aircraft engines", as defined
+in Section 101 of the Federal Aviation Act.
+
+ -2-
+
+
+
+
+
+ EXHIBIT G
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ [LETTERHEAD OF APPROVED INSURANCE BROKER]
+
+ [Date of Delivery Date]
+
+Wilmington Trust Company
+Rodney Square North
+Wilmington, Delaware 19890
+
+ Re: Insurance Coverage for One Boeing 737-25A Aircraft under
+ Aircraft Lease Agreement, dated as of November 29, 1995,
+ between Wilmington Trust Company, not in its individual
+ capacity but solely as Trustee f/b/o ITOCHU AirLease
+ (Europe) Limited and Marubeni Airleasing (U.K.) Limited
+
+Gentlemen:
+
+ This report is delivered to you pursuant to the provisions of
+Section 2(b)(vii)(5) of the Aircraft Lease Agreement 23791, dated as of
+November __, 1995 (the "Lease"), between Wilmington Trust Company, not in its
+individual capacity but solely as Trustee (the "Lessor"), and Aloha Airlines,
+Inc. (the "Lessee"). We are the firm of independent aircraft insurance brokers
+(the "Approved Insurance Broker") who have been appointed by the Lessee to
+deliver this report pursuant to Section 2(b)(vii)(5) of the Lease and we
+understand that the Lessor has not objected to such appointment. Except as
+otherwise defined herein, the terms used herein shall have the meanings set
+forth in the Lease.
+
+ We have reviewed the Lease and particularly Section 12
+thereof. We are also fully familiar with the Certificate of Insurance dated
+November __, 1995 issued to you on behalf of the Approved Insurers as well as
+the policies of insurance evidenced thereby.
+
+ As of the date of this report, it is our opinion as the
+Approved Insurance Broker that the insurance presently carried and maintained
+by the Lessee, as evidenced in the attached certificates, complies with the
+terms and requirements of Section 12 of the Lease.
+
+ We agree to advise the Lessor and each Beneficiary in writing
+promptly of any default in the payment of any premium and any other act or
+omission on the part of the Lessee of which we have knowledge and which might
+invalidate or render unenforceable, in whole or part, any insurance required
+under the Lease on the Aircraft.
+
+ Yours faithfully,
+
+ [APPROVED INSURANCE BROKER]
+
+
+
+
+
+Wilmington Trust Company
+Date
+Page 2
+
+
+
+ ----------------------
+ [Title]
+
+
+
+
+
+ EXHIBIT H
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ SPECIAL PROVISIONS TO CERTIFICATE OF INSURANCE
+
+ The "special provisions" section of the Insurance Certificate
+shall provide as follows:
+
+ 1. In accordance with the Aircraft Lease Agreement, dated as
+of November 29, 1995 (the "Lease"), between Lessor and Lessee, it is agreed to
+include Lessor (in its individual capacity and as Owner Trustee), each
+Beneficiary and their assigns as additional insureds, designate Lessor as owner
+of the Aircraft, and designate the Beneficiaries and their assigns as loss
+payees (but without imposing upon Lessor, such Beneficiary and their assigns
+any obligation to pay any premiums).
+
+ 2. It is agreed that in respect of the interests of Lessor,
+each Beneficiary and their assigns, in the insurance coverage provided hereby,
+such insurance coverage shall not be invalidated by any action or omission of
+Lessee, and shall insure Lessor, such Beneficiary and their assigns, regardless
+of any breach or violation of any warranty, declaration or condition contained
+in such insurance coverage by Lessee.
+
+ 3. It is agreed that if the insurance coverage provided hereby
+is canceled for any reason whatsoever, or is adversely changed in any way with
+respect to the interests of Lessor, any Beneficiary or their assigns, or if
+such insurance is allowed to lapse for nonpayment of premium, such
+cancellation, adverse change or lapse shall not be effective as to Lessor, such
+Beneficiary and their assigns, for thirty (30) days (seven (7) days or such
+lesser period as may be applicable in the case of any war risks and allied
+perils coverage) after receipt by Lessor and such Beneficiary of written notice
+of such prospective cancellation, change or lapse.
+
+ 4. It is agreed that, as against Lessor, each Beneficiary and
+their assigns, the Insurers waive any rights of setoff, counterclaim or any
+other deduction, whether by attachment or otherwise, and waive any rights to be
+subrogated to any right of any insured against Lessor, each Beneficiary or
+their assigns, with respect to the Aircraft to the same extent that Lessee has
+waived its rights of subrogation by the terms of its agreements to indemnify
+any such party pursuant to this Lease.
+
+ 5. The coverage provided hereunder is primary without right of
+contribution from any other insurance which may be carried by Lessor, any
+Beneficiary or their assigns. It is agreed that Lessor and each Beneficiary has
+a right to carry insurance in excess of the amounts provided hereunder without
+prejudice to the coverage hereunder provided.
+
+ 6. A 50/50 clause is in effect between the all risk hull and
+war risk covers.
+
+
+
+
+
+ EXHIBIT I
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ FORM OF REDELIVERY ACKNOWLEDGEMENT
+
+ This Redelivery Acknowledgement is delivered, on the date set
+out below, by WILMINGTON TRUST COMPANY ("Lessor") to ALOHA AIRLINES INC.
+("Lessee") pursuant to the Lease Agreement dated November __, 1995 between
+Lessor and Lessee (the "Agreement"). The capitalised terms used in this
+Certificate shall have the meanings given to such terms in the Agreement.
+
+1 DETAILS OF ACCEPTANCE
+
+ Lessor hereby confirms to Lessee that Lessor has at [ ] o'clock on
+ [insert date], at [ ], accepted redelivery of the following, in
+ accordance with the provisions of the Agreement:
+
+ (a) Boeing Model 737-25A Airframe, Manufacturer's Serial No. 28791.
+
+ (b) Pratt and Whitney JT8D-17A Engine, Manufacturer's Serial
+ Nos. [ ] and [ ].
+
+ (c) Fuel Status: Pounds [ ].
+
+ (d) Loose Equipment Check List: as per list signed by Lessor
+ and Lessee and attached hereto.
+
+2 CONFIRMATION
+
+ Lessor confirms to Lessee that as at the time indicated above, being
+ the Expiration Date, Lessor's authorized technical experts have
+ inspected the Aircraft to ensure the Aircraft conforms to redelivery
+ conditions specified in the Agreement and that the Aircraft is in
+ accordance with the specifications of the Agreement and satisfactory
+ in all respects save for the following:
+
+ [ ]
+ [ ]
+
+
+
+
+
+3 IN WITNESS WHEREOF, the Lessor has, by its duly authorized
+ representative, executed this Certificate on the date in paragraph 1
+ above.
+
+ LESSOR:
+
+
+ By:
+ ---------------------
+ Title:
+ ---------------------
+ Date:
+ ---------------------
+
+ The "special provisions" section of the Insurance Certificate shall
+ provide as follows:
+
+ -2-
+
+
+
+
+
+ EXHIBIT J
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+Monthly Aircraft Utilization & Status Report
+
+Monthly Aircraft Utilization and Status Report Provided
+UNDER THE LEASE CONTRACT BETWEEN ALOHA AIRLINES, INC. AND WILMINTON TRUST
+COMPANY, AS TRUSTEE
+
+MONTH ENDING:
+Aircraft Registration Number:
+Airframe Make and Model: Boeing:
+Total Time on Airframe Since New:
+Total Cycles on Airframe Since New:
+
+
+
+
+ MONTHLY TOTAL
+ FLIGHT MONTHLY FLIGHT TOTAL DATE INST
+ SERIAL NO. HOURS CYCLES HOURS CYCLES LOCATION (REMOVED)
+
+Airframe
+
+Engines & APU's Subject to Lease
+
+Engine #1
+
+Engine #2
+
+APU
+
+Other Engines & APU's Installed on Airframe
+
+Engine #1
+
+Engine #2
+
+APU
+
+Engines & APU's permanently removed from Service During Month
+
+
+
+
+ APU and Engine data represent the times and cycles of the airframes
+each is installed on.
+
+ The Aircraft, Engines, modules and APU subject to the Lease for
+this Aircraft have been kept in good order and repair, or are being restored to
+the condition required by the Lease.
+
+MAJOR STRUCTURAL REPAIR:
+MAJOR MODIFICATIONS/AD's:
+MISCELLANEOUS/ACCIDENTS/INCIDENTS:
+FOR AND ON BEHALF OF VARIG S.A.
+
+NAME:
+
+TITLE:
+
+
+
+
+
+ EXHIBIT K
+ to
+ AIRCRAFT LEASE AGREEMENT
+
+ LIST OF DOCUMENTS TO BE SUPPLIED ANNUALLY
+
+1. AIRWORTHINESS DIRECTIVE SUMMARY UPDATES WILL BE SUPPLIED ANNUALLY
+
+2. SERVICE BULLETIN AND LESSEE'S ORIGINATED MODS SUMMARY
+ UPDATES WILL BE SUPPLIED ANNUALLY
+
+3. ENGINEERING ORDER (DT)
+
+4. LAST SHOP VISIT REPORT (FORM 337) FOR ENGINES AND APU WILL BE SUPPLIED
+ AS APPLICABLE.
+
+5. LIFE LIMITED PARTS STATUS FOR AIRFRAME/ENGINE/APU/LANDING GEAR WITH
+ COPY OF SERVICEABLE TAGS AS APPLICABLE.
+
+6. LISTING OF TIME/CALENDAR CONTROLLED COMPONENTS BY PIN, S/N -
+ DESCRIPTION-POSITION-TBO-TSI-TSO-TOTAL TIME.
+
+7. AIRCRAFT MASTER FLIGHT LOG (COMPUTER RUN OF TOTAL TT/TC RECORDED
+ DAILY). UPDATES WILL BE PROVIDED.
+
+8. CURRENT AND FUTURE REVISIONS OF ALOHA 737 MAINTENANCE PROGRAM (EPM), AS
+ APPLICABLE.
+
+9. LISTING OF ALL MAJOR 737 STRUCTURAL REPAIRS (N685MA) AS APPLICABLE.
+
+10. ANTICIPATED DATE OF NEXT SCHEDULED D CHECK OR C CHECK.
+
+
+
+
+
+ COUNTERPART NO. 7
+
+ LEASE SUPPLEMENT NO. 1
+
+ TO AIRCRAFT LEASE AGREEMENT
+ DATED AS OF NOVEMBER 29, 1995
+
+ LEASE SUPPLEMENT NO. 1, dated December 1, 1995, between
+Wilmington Trust Company, not in its individual capacity but solely as
+Trustee f/b/o ITOCHU AirLease (Europe) Limited and Marubeni Airleasing (U.K.)
+Limited ("Lessor"), and Aloha Airlines, Inc., a company organized under the
+laws of Hawaii ("Lessee").
+
+ Lessor and Lessee have previously entered into that certain
+Aircraft Lease Agreement dated as of November 29, 1995 (herein called the
+"Lease" and the defined terms therein being hereinafter used with the same
+meaning). The Lease provides for the execution and delivery from time to time
+of a Lease Supplement substantially in the form hereof for the purpose of
+leasing the aircraft described below under the Lease as and when delivered by
+Lessor to Lessee in accordance with the terms thereof.
+
+ The Lease relates to the Aircraft, Parts and Engines as more
+precisely described below. A counterpart of the Lease is attached hereto and
+this Lease Supplement and the Lease shall form one document.
+
+ In consideration of the premises and other good and
+sufficient consideration, Lessor and Lessee hereby agree as follows:
+
+ 1. Lessor hereby delivers and leases to Lessee under the
+Lease and Lessee hereby accepts and leases from Lessor under the Lease, that
+certain Boeing Model B-737-25A commercial jet aircraft, Airframe and the two
+Pratt & Whitney JT8D-17A Engines (each of which engines has 750 or more rated
+takeoff horsepower or the equivalent of such horsepower) described in
+Schedule 1 attached hereto and made a part hereof (the "Delivered Aircraft").
+The status of the Aircraft and Engines upon delivery thereof to Lessee shall
+be as described on Schedule 2 attached hereto and made a part hereof.
+
+ 2. (A) The Delivery Date of the Delivered Aircraft is the
+date of this Lease Supplement set forth in the opening paragraph hereof.
+
+ (B) The time of the delivery and acceptance of the
+Delivered Aircraft is 12:30 p.m. Central Time, at which
+
+
+
+
+
+time the Delivered Aircraft is located in the airspace over the State of
+Texas, United States of America (such location is herein called the "Delivery
+Location").
+
+ 3. The Term for the Delivered Aircraft shall commence on
+the Delivery Date and shall end on the Expiration Date, which shall be
+December 1, 2003.
+
+ 4. The amount of Basic Rent for the Delivered Aircraft is
+payable in advance on each Basic Rent Payment Date.
+
+ 5. Lessee hereby confirms to Lessor that (i) the Delivered
+Aircraft and each Engine installed thereon or belonging thereto have been
+duly marked in accordance with the terms of Section 6(f) of the Lease, (ii)
+Lessee has accepted the Delivered Aircraft for all purposes hereof and of the
+Lease and (iii) Lessee has inspected the Delivered Aircraft and the Delivered
+Aircraft satisfies the conditions set forth in the Lease.
+
+ 6. Lessor represents that all of its representations and
+warranties contained in the Lease are true and correct on the date hereof.
+
+ 7. All of the terms and provisions of the Lease Agreement
+are hereby incorporated by reference in the Lease Supplement to the same
+extent as if fully set forth herein.
+
+ 8. The Lease Supplement may be executed in any number of
+counterparts, each of such counterparts, except as provided in Section 20(e)
+of the Lease, shall for all purposes be deemed to be an original and all such
+counterparts shall together constitute but one and the same Lease Supplement.
+
+THIS LEASE SUPPLEMENT HAS BEEN EXECUTED IN MULTIPLE COUNTERPARTS, OF WHICH
+THERE IS ONE COUNTERPART DESIGNATED AS "COUNTERPART NO. 1 - CHATTEL PAPER
+ORIGINAL" AND ALL OTHER COUNTERPARTS SHALL BE DESIGNATED WITH NUMBERS OTHER
+THAN 1. TO THE EXTENT THAT THIS LEASE SUPPLEMENT CONSTITUTES CHATTEL PAPER
+UNDER THE UNIFORM COMMERCIAL CODE, AS IN EFFECT IN ANY APPLICABLE
+JURISDICTION, NO SECURITY INTEREST MAY BE CREATED OR PERFECTED BY POSSESSION
+OF ANY COUNTERPART OF THIS LEASE SUPPLEMENT OTHER THAN SAID COUNTERPART NO. 1.
+
+
+ -2-
+
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused
+this Lease Supplement to be duly executed by their authorized officers as of
+the day and year first above written.
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee
+
+
+ By: /s/ M. P. Bonilla
+ ------------------------------------
+ Name: Myfanwy Phillips Bonilla
+ ------------------------------------
+ Title: Asst. Vice President
+
+
+ LESSEE:
+
+ ALOHA, AIRLINES, INC
+
+ By:
+ ------------------------------------
+ Name:
+ ------------------------------------
+ Title:
+ ------------------------------------
+
+
+ By:
+ ------------------------------------
+ Name:
+ ------------------------------------
+ Title:
+ ------------------------------------
+
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this
+Lease Supplement to be duly executed by their authorized officers as of the
+day and year first above written.
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee
+
+ By:
+ ------------------------------------
+ Name:
+ ------------------------------------
+ Title:
+ ------------------------------------
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: Brenda F. Cutwright
+ ------------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ------------------------------------
+
+
+ By: /s/ James M. King
+ ------------------------------------
+ Name: James M. King
+ ------------------------------------
+ Title: VICE PRESIDENT PLANNING &
+ DEVELOPMENT
+ ------------------------------------
+
+
+
+
+
+ SCHEDULE 1
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ BOEING 737-25A
+ AIRFRAME
+
+
+
+
+MANUFACTURER'S U.S. REGISTRATION
+SERIAL NO. NUMBER TOTAL TIME* TOTAL CYCLES*
+-------------- ----------------- ----------- -------------
+
+
+23791 N685MA 23,859 14,057
+ (to be changed to
+ N819AL)
+
+
+ INSTALLED PRATT & WHITNEY
+ ENGINES
+
+MODEL NO. SERIAL NO. TOTAL TIME* TOTAL CYCLES*
+--------- ---------- ----------- -------------
+JT8D-17A P709529 20,960 11,883
+JT8D-17A P709530 22,100 12,908
+
+ INSTALLED
+ AUXILIARY POWER UNIT
+
+MODEL NO. SERIAL NO. TOTAL TIME* TOTAL CYCLES*
+--------- ---------- ----------- -------------
+GTCP85-129 P35274 (TACH Reading)
+
+
+
+ITEM PART NUMBER SERIAL NUMBER DESCRIPTION POSITION/QUANTITY
+NO.
+---- ------------------ ------------- --------------------------- -------------------
+
+
+1 622-4096-001 4581 PA Amp E&E 1
+2 5140-1-112 339 Remote Electronics Unit E&E 1
+3 622-8971-020 2119 TCAS T/R E&E 1
+4 622-1181-001 14400 VHF T/R E&E 1
+5 622-1396-001 5023 VHF T/R E&E 1
+6 622-7878-200 5128/5165 ATC E&E 2
+7 NA 13UO2 255AKWO022 Selcal E&E 1
+8 622-2921-001 10076/10061 DME E&E 2
+9 080-20325-01 4603/4613/4602 T/Rectifier E&E 3
+10 2-301-3 4782 Batt Charger E&E 1
+11 39B168-1-0 3276 Static Inverter E&E 1
+12 65-52811-109 D00485 Landing Gear Acc Unit E&E 1
+13 42-651-01 443 Anti-Skid E&E 1
+14 65-52810-25 D01002 Air Condition Relay Unit E&E 1
+15 65-73606-69 D01029 Eng Acc Unit E&E 1
+16 65-52808-7 D00496 Compartment Overheat Unit E&E 1
+17 65-52806-235 D00870 Misc Solid State Switch E&E 1
+18 65-52807-26 D00449 Slat Position Switch Unit E&E 1
+
+
+
+
+ -2-
+
+
+
+
+
+
+
+
+ITEM PART NUMBER SERIAL NUMBER DESCRIPTION POSITION/QUANTITY
+NO.
+---- ------------------ ------------- --------------------------- -------------------
+
+
+19 522-2996-011 2012 Marker Beacon E&E 1
+20 777-1492-002 1582 ADF E&E 1
+21 777-1492-004 4039 ADF E&E 1
+22 965-0876-001 304 GPWS E&E 1
+23 622-3890-003 1556 Radio Alt E&E 1
+24 65-52801-31 D00446 APU Control Unit E&E 1
+25 65-52809-15 D00306 Fire Detector Unit E&E 1
+26 8TJ45-GAB X0062 34B3 Fuel Flow Power Supply E&E
+27 231-2 5556/6517 Window Heat Control E&E 4
+28 4314/7490 E&E
+29 522-3949-001 3701 Monitor Comparator E&E 1
+30 622-3257-001 5662/2275 ILS NAV RCVR E&E 2
+31 172938-001 6620/6623 NAV SW Unit E&E 4
+32 6630/6613 E&E
+33 HG48B13 P255/G1711 ADC E&E 2
+34 4030952-906 88062826 Yaw Damper E&E 1
+35 163356-88-01 0000528 Performance Data Comp E&E 1
+36 548376-6 097C-9045 Cabin Temp Controller E&E 1
+37 87093613 P43 Pressure Controller E&E 1
+38 2588424-902 84053253 Rate of Turn E&E 1
+39 101 812802UA/9118557A Compass Rack E&E 2
+40 622-2500-004 2343/1298 Steering Comp E&E 2
+
+
+
+
+ -3-
+
+
+
+
+
+
+
+
+ITEM PART NUMBER SERIAL NUMBER DESCRIPTION POSITION/QUANTITY
+NO.
+---- ------------------ ------------- --------------------------- -------------------
+
+
+41 65-52805-317 D01011 Flight Inst Acc Unit E&E 1
+42 172938-001 6624/6615 Switch Unit E&E 4
+43 6595/2678 E&E
+44 2590650-901 05872772 Mach Trim Comp E&E 1
+45 2588810-904 01851529 Pitch Computer E&E 1
+46 2588812-902 06871639 Roll Computer E&E 1
+47 65-52812-107 D00496 Auto Pilot Acc Unit E&E 1
+48 457300-0430 3394 Omega RPU E&E 1
+49 25582-003 8305313 Battery E&E 1
+50 2587335-11 84054627 Vert Gyro E&E 1
+51 2587335-11 8415034 Vert Gyro E&E 1
+52 2587335-11 0034163 Vert Gyro E&E 1
+53 2588302-4 0067044 Direction Gyro E&E 1
+54 2588302-4 84056507 Direction Gyro E&E 1
+
+
+
+
+ -4-
+
+
+
+
+
+
+ SCHEDULE 2
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ AIRCRAFT STATUS
+ ON THE DELIVERY DATE
+
+AIRFRAME HEAVIEST CHECK ("D" CHECK OR EQUIVALENT):
+
+Interval: 28,000 (Hrs)
+Time Remaining: 4,141
+
+LANDING GEAR OVERHAUL:
+
+Interval: Main Gear 24,000 cycles
+ Nose Gear 24,000 cycles
+Time Remaining: Left Gear 9,943 cycles
+ Right Gear 9,943 cycles
+ Nose Gear 9,943 cycles
+
+ENGINES:
+
+Overhaul Interval:
+Time Remaining (S/N P709529):
+Time Remaining (S/N P709530):
+
+Hot Section Inspection:
+
+Interval:
+Time Remaining (S/N P709529):
+Time Remaining (S/N P709530):
+
+TIME REMAINING TO FIRST RESTRICTION:
+
+Engine S/N: P709529
+ Hours: 9,040
+ Cycles: 3,117 (C07)
+Engine S/N: P709530
+ Hours: 7,900
+ Cycles: 2,092 (C07)
+
+
+
+
+
+AUXILIARY POWER UNIT:
+
+Hot Section Inspection:
+
+Interval: On Condition
+Time Remaining: On Condition
+
+Time Remaining to Next Limited
+ or Scheduled Removal: On Condition
+
+
+ -2-
+
+
+
+
+
+ SCHEDULE 3
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ Aircraft Status on The Delivery Date
+
+ Time and Calendar Controlled Components
+ (excluding engines, landing gear, and APU).
+
+
+ Effective date: 10/20/95
+
+
+
+ AIRCRAFT
+COMPONENT PART HR.
+POSITION NUMBER SERIAL # LIMIT TSI REMAINING
+--------- -------- --------- ------- ---------- ---------
+
+
+ATA 21
+------
+Valve, Safety 720737-5 L-0001 14000H 13,817.6 3,977.2
+RH OH 7/3/92
+Valve, Safety 720737-5 830678 14000H 13,817.6 3,977.2
+LH OH 7/3/92
+Gasper Fan 21520-1 063 6000H 22,594.7 4,735
+ OH 6/15/95
+
+ATA 23
+------
+Recorder, 93A100-80 52428 72 Mo. 14, 623.2H 2 Yrs., 6 Mo.
+Voice RPL
+(Battery)
+
+ATA 24
+------
+APU Generator 976J498-1 YK13122 6000H 20,471 2,602H
+ HT 10/20/94
+Panel Bus 915F213-2 WE1347B 6000H 19,313.9 1,454H
+Protection BC 6/11/94
+Gen. Cont. 948F458-1 NM4963 6000H 19,616.8H 1,757H
+ Unit BC 7/18/94
+ APU
+Gen. Cont. 948F458-1 PM5071 6000H 23,697H 5,838H
+ Unit BC 10/3/95
+ Left
+Gen. Cont. 948F458-1 XJ2186 6000H 21,639.9H 3,780.9H
+ Unit BC 2/18/95
+ Right
+
+ATA 25
+------
+Life Vest KSE-35L8 527-691 24 Mo. 8/30/95 9 Mos.
+ Rt. Aft OH 23,304H
+Life Vest AV-35 541-064 24 Mo. 2/95 14 Mos.
+ Co-pilot O H 21,530H
+Slide, Evac D31354- 1112 24 Mo. 8/23/94 10 Mos.
+ Rt. Aft 425 OH 19,939.7H
+
+
+
+
+
+
+
+ SCHEDULE 3
+ TO
+ LEASE SUPPLEMENT NO. 1
+
+ Aircraft Status on The Delivery Date
+
+ Time and Calendar Controlled Components
+ (excluding engines, landing gear, and APU).
+
+
+
+
+
+
+
+ COUNTERPART NO. 2
+
+ LEASE SUPPLEMENT NO. 2
+
+ TO AIRCRAFT LEASE AGREEMENT
+ DATED AS OF NOVEMBER 29, 1995
+
+ LEASE SUPPLEMENT NO. 2, dated April 12, 1996, between
+ Wilmington Trust Company, not in its individual capacity but solely as Trustee
+ f/b/o ITOCHU AirLease (Europe) Limited and Marubeni Airleasing (U.K.) Limited
+ ("Lessor"), and Aloha Airlines, Inc., a company organized under the laws of
+ Hawaii ("Lessee").
+
+ Lessor and Lessee have previously entered into that certain
+ Aircraft Lease Agreement dated as of November 29, 1995, as supplemented by
+ Lease Supplement No. 1, dated December 1, 1995 (as amended, modified or
+ supplemented and in effect from time to time, the "Lease" and the defined terms
+ therein being hereinafter used with the same meaning), which was filed with the
+ FAA as one instrument and recorded on January 25, 1996 as Conveyance No.
+ 11004730. Lessor and Lessee desire to supplement the Lease in order (1) to
+ subject to the Lease two Pratt & Whitney model JT8D-9A engines described below,
+ and (2) to remove from the terms of the Lease two Pratt & Whitney model
+ JT8D-17A engines described below.
+
+ In consideration of the premises and other good and
+ sufficient consideration, Lessor and Lessee hereby agree as follows:
+
+ 1. Lessor hereby leases to Lessee under the Lease and Lessee
+ hereby leases from Lessor under the Lease, two (2) Pratt & Whitney JT8D-9A
+ Engines bearing Manufacturer's Serial Numbers 665868 and 655877, each having
+ 750 or more rated takeoff horsepower, together with (1) all parts, appliances,
+ components, accessories and equipment currently installed on, or attached to
+ any such engine and (2) all flight, engineering, maintenance, overhaul and
+ repair manuals, drawings, records, logs, documents and other data relating to
+ the above described engines or parts (such engines, together with the items
+ identified in clauses (1) and (2) above, are referred to herein collectively as
+ the "9A Engines"). For all purposes of the Lease, each of the 9A Engines shall
+ be deemed an "Engine" as defined in the Lease, and part of the Aircraft and
+ each such Engine shall be maintained, insured and returned to Lessor in the
+ manner and at the times set forth in the Lease.
+
+ RECORDED
+
+ FEDERAL AVIATION ADMINISTRATION
+
+ Date 4-29-96 Time 1:07
+ --------- --------
+
+ Conveyance Number ZZ011203
+ ------------
+
+ By /s/ [Illegible]
+ --------------------
+
+
+
+
+ 2. There are hereby removed from the terms of the Lease the
+ two (2) Pratt & Whitney JT8D-17A Engines bearing Manufacturer's Serial Numbers
+ P709529 and P709530, each having 750 or more rated takeoff horsepower, together
+ with (1) all parts, appliances, components, accessories and equipment currently
+ installed on, or attached to any such engine and (2) all flight, engineering,
+ maintenance, overhaul and repair manuals, drawings, records, logs, documents
+ and other data relating to the above described engines or parts (such engines,
+ together with the items identified in clauses (1) and (2) above, are referred
+ to herein collectively as the "0riginal 17A Engines"). Lessee and Lessor
+ acknowledge that the Original 17A Engines are, effective on this date, no
+ longer subject to the terms of the Lease and that Lessee has no interest
+ therein or thereto under the Lease.
+
+ 3. From the date hereof until the last day of the Term,
+ Lessee shall affix and keep a metal nameplate of reasonable dimensions in a
+ reasonably prominent position on each of the 9A Engines bearing the Lease
+ Identification (such name plate to be furnished by Lessee and affixed on or
+ prior to the date hereof).
+
+ 4. Lessee represents that all of its representations and
+ warranties contained in the Lease are true and correct on the date hereof and
+ that no Default or Event of Default has occurred and is continuing.
+
+ 5. All of the terms and provisions of the Lease Agreement are
+ hereby incorporated by reference in this Lease Supplement to the same extent as
+ if fully set forth herein. The Lease is hereby ratified and confirmed and,
+ except as set forth herein, remains unamended and in full force and effect.
+
+ 6. This Lease Supplement may be executed in any number of
+ counterparts, each of such counterparts, except as provided in Section 20(e) of
+ the Lease, shall for all purposes be deemed to be an original and all such
+ counterparts shall together constitute but one and the same Lease Supplement.
+
+ THIS LEASE SUPPLEMENT HAS BEEN EXECUTED IN MULTIPLE COUNTERPARTS, OF WHICH
+ THERE IS ONE COUNTERPART DESIGNATED AS "COUNTERPART NO. 1 - CHATTEL PAPER
+ ORIGINAL'' AND ALL OTHER COUNTERPARTS SHALL BE DESIGNATED WITH NUMBERS OTHER
+ THAN 1. TO THE EXTENT THAT THIS LEASE SUPPLEMENT CONSTITUTES CHATTEL PAPER
+ UNDER THE UNIFORM COMMERCIAL CODE, AS IN EFFECT IN ANY APPLICABLE JURISDICTION,
+ NO SECURITY INTEREST MAY BE CREATED OR PERFECTED BY POSSESSION OF ANY
+ COUNTERPART OF THIS LEASE SUPPLEMENT OTHER THAN SAID COUNTERPART NO. 1.
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this
+Lease Supplement to be duly executed by their authorized officers as of the day
+and year first above written.
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee.
+
+ By: M.P. Bonilla
+ ----------------------------
+ Name: MYFANWY PHILLIPS BONILLA
+ ----------------------------
+ Title: ASSISTANT VICE PRESIDENT
+ ----------------------------
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+ By:
+ ----------------------------
+ Name:
+ ----------------------------
+ Title:
+ ----------------------------
+
+
+ By:
+ ----------------------------
+ Name:
+ ----------------------------
+ Title:
+ ----------------------------
+
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this
+ Lease Supplement to be duly executed by their authorized officers as of the day
+ and year first above written.
+
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee.
+
+ By:
+ ----------------------------
+ Name:
+ ----------------------------
+ Title:
+ ----------------------------
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+ By: /s/ Brenda F. Cutwright
+ ----------------------------
+ Name: Brenda F. Cutwright
+ ----------------------------
+ Title: Sr. Vice President Finance
+ & Planning and CFO
+ ----------------------------
+
+ By: /s/ James M. King
+ ----------------------------
+ Name: James M. King
+ ----------------------------
+ Title: Vice President Planning &
+ Development
+ ----------------------------
+
+
+
+
+
+
+ COUNTERPART NO.
+ -------
+
+ LEASE SUPPLEMENT NO. 2
+
+ TO AIRCRAFT LEASE AGREEMENT
+ DATED AS OF NOVEMBER 29, 1995
+
+ LEASE SUPPLEMENT NO. 2, dated April 12, 1996, between
+ Wilmington Trust Company, not in its individual capacity but solely as Trustee
+ f/b/o ITOCHU AirLease (Europe) Limited and Marubeni Airleasing (U.K.) Limited
+ ("Lessor"), and Aloha Airlines, Inc., a company organized under the laws of
+ Hawaii ("Lessee").
+
+ Lessor and Lessee have previously entered into that certain
+ Aircraft Lease Agreement dated as of November 29, 1995, as supplemented by
+ Lease Supplement No. 1, dated December 1, 1995 (as amended, modified or
+ supplemented and in effect from time to time, the "Lease" and the defined terms
+ therein being hereinafter used with the same meaning), which was filed with the
+ FAA as one instrument and recorded on January 25, 1996 as Conveyance No.
+ 11004730. Lessor and Lessee desire to supplement the Lease in order (1) to
+ subject to the Lease two Pratt & Whitney model JT8D-9A engines described below,
+ and (2) to remove from the terms of the Lease two Pratt & Whitney model
+ JT8D-17A engines described below.
+
+ In consideration of the premises and other good and sufficient
+ consideration, Lessor and Lessee hereby agree as follows:
+
+ 1. Lessor hereby leases to Lessee under the Lease and Lessee
+ hereby leases from Lessor under the Lease, two (2) Pratt & Whitney JT8D-9A
+ Engines bearing Manufacturer's Serial Numbers 665868 and 655877, each having
+ 750 or more rated takeoff horsepower, together with (1) all parts, appliances,
+ components, accessories and equipment currently installed on, or attached to
+ any such engine and (2) all flight, engineering, maintenance, overhaul and
+ repair manuals, drawings, records, logs, documents and other data relating to
+ the above described engines or parts (such engines, together with the items
+ identified in clauses (1) and (2) above, are referred to herein collectively as
+ the "9A Engines"). For all purposes of the Lease, each of the 9A Engines shall
+ be deemed an "Engine" as defined in the Lease, and part of the Aircraft and
+ each such Engine shall be maintained, insured and returned to Lessor in the
+ manner and at the times set forth in the Lease.
+
+
+
+
+
+ 2. There are hereby removed from the terms of the Lease the
+ two (2) Pratt & Whitney JT8D-17A Engines bearing Manufacturer's Serial Numbers
+ P709529 and P709530, each having 750 or more rated takeoff horsepower, together
+ with (1) all parts, appliances, components, accessories and equipment currently
+ installed on, or attached to any such engine and (2) all flight, engineering,
+ maintenance, overhaul and repair manuals, drawings, records, logs, documents
+ and other data relating to the above described engines or parts (such engines,
+ together with the items identified in clauses (1) and (2) above, are referred
+ to herein collectively as the "Original 17A Engines"). Lessee and Lessor
+ acknowledge that the Original 17A Engines are, effective on this date, no
+ longer subject to the terms of the Lease and that Lessee has no interest
+ therein or thereto under the Lease.
+
+ 3. From the date hereof until the last day of the Term, Lessee
+ shall affix and keep a metal nameplate of reasonable dimensions in a reasonably
+ prominent position on each of the 9A Engines bearing the Lease Identification
+ (such name plate to be furnished by Lessee and affixed on or prior to the date
+ hereof).
+
+ 4. Lessee represents that all of its representations and
+ warranties contained in the Lease are true and correct on the date hereof and
+ that no Default or Event of Default has occurred and is continuing.
+
+ 5. All of the terms and provisions of the Lease Agreement are
+ hereby incorporated by reference in this Lease Supplement to the same extent as
+ if fully set forth herein. The Lease is hereby ratified and confirmed and,
+ except as set forth herein, remains unamended and in full force and effect.
+
+ 6. This Lease Supplement may be executed in any number of
+ counterparts, each of such counterparts, except as provided in Section 20(e) of
+ the Lease, shall for all purposes be deemed to be an original and all such
+ counterparts shall together constitute but one and the same Lease Supplement.
+
+ THIS LEASE SUPPLEMENT HAS BEEN EXECUTED IN MULTIPLE COUNTERPARTS, OF WHICH
+ THERE IS ONE COUNTERPART DESIGNATED AS "COUNTERPART NO. 1 - CHATTEL PAPER
+ ORIGINAL" AND ALL OTHER COUNTERPARTS SHALL BE DESIGNATED WITH NUMBERS OTHER
+ THAN 1. TO THE EXTENT THAT THIS LEASE SUPPLEMENT CONSTITUTES CHATTEL PAPER
+ UNDER THE UNIFORM COMMERCIAL CODE, AS IN EFFECT IN ANY APPLICABLE JURISDICTION,
+ NO SECURITY INTEREST MAY BE CREATED OR PERFECTED BY POSSESSION OF ANY
+ COUNTERPART OF THIS LEASE SUPPLEMENT OTHER THAN SAID COUNTERPART NO. 1.
+
+
+
+
+
+ IN WITNESS WHEREOF, Lessor and Lessee each have caused this
+ Lease Supplement to be duly executed by their authorized officers as of the day
+ and year first above written.
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee.
+
+ By: M. P. Bonilla
+ ----------------------------
+ Name: MYFANWY PHILLIPS BONILLA
+ ----------------------------
+ Title: ASSISTANT VICE PRESIDENT
+ ----------------------------
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+ By:
+ ----------------------------
+ Name:
+ ----------------------------
+ Title:
+ ----------------------------
+
+
+ By:
+ ----------------------------
+ Name:
+ ----------------------------
+ Title:
+ ----------------------------
+
+
+
+
+
+ IN WITNESS WHEREOF, - Lessor and Les-gee each have caused this
+ Lease Supplement to he duly executed by their authorized officers as of the day
+ and year first above written.
+
+ LESSOR:
+
+ WILMINGTON TRUST COMPANY, not
+ in its individual capacity but
+ solely as Trustee.
+
+ By:
+ ----------------------------
+ Name:
+ ----------------------------
+ Title:
+ ----------------------------
+
+
+ LESSEE:
+
+ ALOHA AIRLINES, INC.
+
+ By: /s/ Brenda F. Cutwright
+ ----------------------------
+ Name: Brenda F. Cutwright
+ ----------------------------
+ Title: Sr. Vice President Finance
+ & Planning and CFO
+ ----------------------------
+
+ By: /s/ James M. King
+ ----------------------------
+ Name: James M. King
+ ----------------------------
+ Title: Vice President Planning &
+ Development
+ ----------------------------
+
+
+
+
+
+
+
+ ASSIGNMENT, ASSUMPTION, AMENDMENT AND CONSENT
+
+
+ THIS ASSIGNMENT, ASSUMPTION, AMENDMENT AND CONSENT
+ ("Assignment"), dated as of December 26, 1996 is between WILMINGTON TRUST
+ COMPANY, not in its individual capacity but solely as Trustee f/b/o ITOCHU
+ AirLease (Europe) Limited and Marubeni Airleasing (U.K) Limited ("Lessor"),
+ ALOHA AIRLINES, INC., a Hawaii Corporation ("Assignor"), and ALOHA AIRLINES,
+ INC., a Delaware corporation ("Assignee").
+
+ RECITALS
+
+ WHEREAS, Lessor and Assignor entered into that certain
+ Aircraft Lease Agreement dated as of November 29, 1995, as it may be
+ supplemented or amended from time to time (the "Lease"), pursuant to which
+ Assignor leased from Lessor one (1) Boeing model 737-25A aircraft bearing
+ manufacturer's serial number 23791 and registration mark N819AL (the
+ "Aircraft");
+
+ WHEREAS, effective December 26, 1996, Assignor merged with
+ Assignee, and Assignee emerged as the surviving entity and the successor in
+ interest to Lessee.
+
+ AGREEMENT
+
+ NOW, THEREFORE, in consideration of these presents and for
+other valuable consideration, the parties agree as follows.
+
+ 1. CERTAIN DEFINED TERMS. Unless otherwise defined herein or
+ the context otherwise requires, all capitalized terms used in this Assignment
+ shall have the respective meanings assigned to them in the Lease.
+
+ 2. ASSIGNMENT. As of December 26, 1996 (the "Effective
+ Date"), the Assignor hereby sells, assigns, transfers and conveys to Assignee
+ all of the Assignor's right, title and interest in and to the Lease, as if
+ Assignee were the original party to the Lease.
+
+ 3. ASSUMPTION BY ASSIGNEE. Assignee hereby accepts the
+ foregoing sale, assignment, transfer and conveyance of all of the Assignor's
+ right, title, interest, obligations and liabilities in, to and under the Lease
+ to the Assignee and, as of the Effective Date, agrees to assume and to be bound
+ by all the terms of, and to undertake all of the obligations of the Assignor
+ contained in, the Lease, including without limitation, all such obligations
+ existing at or prior to, or attributable to acts or events occurring prior to
+ the Effective Date.
+
+
+
+
+
+
+ 4. LESSOR CONSENT.
+
+ (a) Pursuant to Section 8(i) of the Lease, Lessor
+acknowledges and consents to the foregoing sale, assignment, transfer and
+conveyance of all of the Assignor's right, title, interest, obligations and
+liabilities in, to and under the Lease to the Assignee under this Assignment,
+and acknowledge the rights and obligations of the Assignee described in this
+Assignment.
+
+ (b) Lessor further acknowledges and agrees that from
+and after the Effective Date Assignee shall be deemed the "Lessee" for all
+purposes of the Lease and each reference in the Lease to the Assignor as
+"Lessee" shall be deemed after the Effective Date for all purposes to refer to
+the Assignee and that, from and after the Effective Date Lessor will perform
+its obligations under the Lease, in all respects as if Assignee were the
+original party to the Lease as "Lessee" thereunder.
+
+ 5. REPRESENTATIONS, WARRANTIES AND COVENANTS OF ASSIGNEE.
+Assignee represents and warrants as follows:
+
+ (a) Assignee is a corporation duly organized and existing in
+ good standing under the Laws of the State of Delaware and has the corporate
+ power and authority to carry on its business as presently conducted and to
+ perform its obligations under this Assignment.
+
+ (b) This Assignment has been duly authorized by all necessary
+ corporate action on the part of Assignee and does not require any approval of
+ the stockholders of Assignee (or if such approval is required, such approval
+ has been obtained), and neither the execution and delivery hereof nor the
+ consummation of the transactions contemplated hereby nor compliance by Assignee
+ with any of the terms and provisions hereof will contravene any Law applicable
+ to Assignee or result in any breach of, or constitute any default under, or
+ result in the creation of, any Lien upon any property of Assignee under, any
+ credit agreement or instrument, or other agreement or instrument to which
+ Assignee is a party or by which Assignee or its properties or assets may be
+ bound or affected.
+
+ (c) Assignee has received or has complied with every
+ necessary consent, approval, order, or authorization of, or registration with,
+ or the giving of prior notice to, any Governmental Entity having jurisdiction
+ with respect to the use and operation of the Aircraft and the execution and
+ delivery of this Assignment of the validity and enforceability hereof.
+
+ (d) This Assignment has been duly entered into and delivered
+by Assignee and constitutes a valid, legal and binding
+
+
+
+
+
+
+obligation of Assignee, enforceable in accordance with its terms, except as
+enforceability may be limited by bankruptcy, reorganization, moratorium or
+other similar Laws and by general principles of equity, whether considered in a
+proceeding at Law or in equity.
+
+ (e) Assignee is a Certified Air Carrier and holds all
+licenses, certificates, permits and franchises from the appropriate agencies of
+the United States of America and/or all other governmental authorities having
+jurisdiction which are necessary to authorize the Assignee to engage in air
+transport and to carry on its business as presently conducted and to be
+conducted with the Aircraft.
+
+ (f) Assignee is a "citizen of the United States" as defined
+in 40102(a)(15) of Title 49 of the United States Code.
+
+ 6. GOVERNING LAW. This Assignment shall governed by the Laws
+of the State of New York.
+
+
+ [THE REMAINDER OF THIS PAGE IS INTENTIONALLY BLANK.]
+
+
+
+
+
+ IN WITNESS WHEREOF, the undersigned parties have caused this
+Assignment, Assumption, Amendment and Consent to be duly executed and delivered
+by their duly authorized officers as of the date first written above.
+
+ ASSIGNOR: ALOHA AIRLINES, INC.,
+ A HAWAII CORPORATION
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: BRENDA F. CUTWRIGHT
+ ----------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ---------------------------------
+
+
+ By: /s/ Owen Sekimura
+ ------------------------------------
+ Name: OWEN SEKIMURA
+ ----------------------------------
+ Title: STAFF VICE PRESIDENT -
+ FINANCE & CONTROLLER
+ ---------------------------------
+
+
+ ASSIGNEE: ALOHA AIRLINES, INC., A
+ DELAWARE CORPORATION
+
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: BRENDA F. CUTWRIGHT
+ ----------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ---------------------------------
+
+
+ By: /s/ Owen Sekimura
+ ------------------------------------
+ Name: Owen Sekimura
+ ----------------------------------
+ Title: STAFF VICE PRESIDENT -
+ FINANCE & CONTROLLER
+ ---------------------------------
+
+
+ LESSOR: WILMINGTON TRUST COMPANY, NOT
+ IN ITS INDIVIDUAL CAPACITY BUT
+ SOLELY AS TRUSTEE F/B/O ITOCHU
+ AIRLEASE (EUROPE) LIMITED AND
+ MARUBENI AIRLEASING (U.K)
+ LIMITED
+
+
+ By:____________________________________
+
+ Name:__________________________________
+
+ Title:_________________________________
+
+
+
+
+
+ IN WITNESS WHEREOF, the undersigned parties have caused this
+ Assignment, Assumption, Amendment and Consent to be duly executed and delivered
+ by their duly authorized officers as of the date first written above.
+
+
+ ASSIGNOR: ALOHA AIRLINES, INC.,
+ A HAWAII CORPORATION
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: BRENDA F. CUTWRIGHT
+ ----------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ---------------------------------
+
+
+ By: /s/ Owen Sekimura
+ ------------------------------------
+ Name: OWEN SEKIMURA
+ ----------------------------------
+ Title: STAFF VICE PRESIDENT -
+ FINANCE & CONTROLLER
+ ---------------------------------
+
+
+ ASSIGNEE: ALOHA AIRLINES, INC., A
+ DELAWARE CORPORATION
+
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: BRENDA F. CUTWRIGHT
+ ----------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ---------------------------------
+
+
+ By: /s/ Owen Sekimura
+ ------------------------------------
+ Name: Owen Sekimura
+ ----------------------------------
+ Title: STAFF VICE PRESIDENT -
+ FINANCE & CONTROLLER
+ ---------------------------------
+
+
+ LESSOR: WILMINGTON TRUST COMPANY, NOT
+ IN ITS INDIVIDUAL CAPACITY BUT
+ SOLELY AS TRUSTEE F/B/O ITOCHU
+ AIRLEASE (EUROPE) LIMITED AND
+ MARUBENI AIRLEASING (U.K)
+ LIMITED
+
+
+ By:____________________________________
+
+ Name:__________________________________
+
+ Title:_________________________________
+
+
+
+
+
+ IN WITNESS WHEREOF, the undersigned parties have caused this
+ Assignment, Assumption, Amendment and Consent to be duly executed and delivered
+ by their duly authorized officers as of the date first written above.
+
+
+ ASSIGNOR: ALOHA AIRLINES, INC.,
+ A HAWAII CORPORATION
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: BRENDA F. CUTWRIGHT
+ ----------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ---------------------------------
+
+
+ By: /s/ Owen Sekimura
+ ------------------------------------
+ Name: OWEN SEKIMURA
+ ----------------------------------
+ Title: STAFF VICE PRESIDENT -
+ FINANCE & CONTROLLER
+ ---------------------------------
+
+
+ ASSIGNEE: ALOHA AIRLINES, INC., A
+ DELAWARE CORPORATION
+
+
+ By: /s/ Brenda F. Cutwright
+ ------------------------------------
+ Name: BRENDA F. CUTWRIGHT
+ ----------------------------------
+ Title: SR. VICE PRESIDENT FINANCE &
+ PLANNING AND CFO
+ ---------------------------------
+
+
+ By: /s/ Owen Sekimura
+ ------------------------------------
+ Name: Owen Sekimura
+ ----------------------------------
+ Title: STAFF VICE PRESIDENT -
+ FINANCE & CONTROLLER
+ ---------------------------------
+
+
+ LESSOR: WILMINGTON TRUST COMPANY, NOT
+ IN ITS INDIVIDUAL CAPACITY BUT
+ SOLELY AS TRUSTEE F/B/O ITOCHU
+ AIRLEASE (EUROPE) LIMITED AND
+ MARUBENI AIRLEASING (U.K)
+ LIMITED
+
+
+ By:____________________________________
+
+ Name:__________________________________
+
+ Title:_________________________________
+
+
+
+
+
+
+ CONSENT
+
+ THIS CONSENT is made this ______ day of ________________,
+1996 by WILMINGTON TRUST COMPANY, not in its individual capacity but solely as
+Trustee f/b/o ITOCHU AirLease (Europe) Limited and Marubeni Airleasing (U.K)
+Limited ("Lessor") in favor of Aloha Airlines, Inc., a Hawaii corporation
+("Aloha Hawaii") and Aloha Airlines, Inc., a Delaware corporation ("Aloha
+Delaware").
+
+ RECITALS
+
+ WHEREAS, Lessor and Aloha Hawaii entered into that certain
+ Aircraft Lease Agreement dated as of November 29, 1995 (the "Lease"), pursuant
+ to which Aloha Hawaii leased from Lessor one (1) Boeing model 737-25A aircraft
+ bearing manufacturer's serial number 23791 and registration mark N819AL (the
+ "Aircraft");
+
+ WHEREAS, Aloha Hawaii and Aloha Delaware are wholly owned
+ subsidiaries of Aloha Airgroup, Inc., a Hawaii corporation.
+
+ WHEREAS, effective ____________________________________, 1996, Aloha Hawaii
+ merged with Aloha Delaware, and Aloha Delaware emerged as the surviving entity
+ and the successor in interest to Aloha Hawaii.
+
+ WHEREAS, as a result of the merger, all of the assets and
+ liabilities of Aloha Hawaii, including the Lease, will become assets and
+ liabilities of Aloha Delaware by operation of law.
+
+ WHEREAS, Aloha Hawaii has requested that Lessor consent to the
+ merger and the transfer by operation of law of the Lease and the Notice of
+ Assignment to Aloha Delaware.
+
+ NOW, THEREFORE, in consideration of these presents and for
+ other valuable consideration, Lessor does hereby consent to the merger of Aloha
+ Hawaii with and into Aloha Delaware and the transfer by operation of law of the
+ Lease to Aloha Delaware, upon and subject to the condition that this consent
+ shall not be construed as authorizing any other or further assignment of the
+ Lease, and that all of the rights of the undersigned as set forth in the Lease
+ are expressly reserved.
+
+
+
+
+
+ IN WITNESS WHEREOF, the undersigned has executed this Consent
+ as of the date first written above.
+
+ LESSOR: WILMINGTON TRUST COMPANY,
+ NOT IN ITS INDIVIDUAL
+ CAPACITY BUT SOLELY AS
+ TRUSTEE F/B/O ITOCHU
+ AIRLEASE (EUROPE) LIMITED
+ AND MARUBENI AIRLEASING
+ (U.K) LIMITED
+
+
+
+
+ By________________________________
+
+ Name______________________________
+
+ Title_____________________________
+
+
+
+
+
+
+ LEASE ASSIGNMENT
+
+ This Lease Assignment (this "Agreement") dated as of August 7, 1998 is
+ entered into by and between (1) ITOCHU Airlease Limited, a company formed under
+ the laws of the Republic of Ireland ("ITOCHU"), (ii) MA-Lease Limited, a
+ company duly formed under the laws of England ("Marubeni"), (iii) Wilmington
+ Trust Company, not in its individual capacity but solely as trustee f/b/o
+ ITOCHU and Marubeni ("Trustee", and together With ITOCHU and Marubeni, each, an
+ "Assignor" and collectivelly, "Assignors"), and Boeing Capital Corporation, a
+ company duly formed under the laws of Delaware (the "Assignee").
+
+ RECITALS
+
+ WHEREAS, Trustee is the Lessor, under that certain Aircraft Lease
+Agreement dated as of November 29, 1995 and recorded with the Federal
+Aviation Administration on ________, 199__, and assigned conveyance number
+____________, with Aloha Airlines, Inc., as the Lessee, as supplemented by
+Lease Supplement No. 1 and as amended and supplemented by Letter Agreement
+No. 1 dated as of November 29, 1995 (collectively, the "Lease"), covering
+one (1) Boeing 737-25A aircraft (as more particularly described under the
+Lease, the "Aircraft"),
+
+ WHEREAS, pursuant to that certain Aircraft Purchase Agreement dated
+as of August _________, 1998 (the "Purchase Agreement"), by and between
+Assignors and Assignee, Assignors desire to assign, and Assignee desires to
+accept, the assignment of the Assignors' rights, titles, interest in, to and
+under the Lease;
+
+ NOW, THEREFORE, in consideration of the premises and mutual agreements
+ contained herein and for other good and valuable consideration, receipt of
+ which is hereby acknowledged, the parties hereto agree as follows,
+
+ Section 1, DEFINITIONS. Terms used herein but not defined shall have
+the meanings set forth in the Lease.
+
+ Section 2. ASSIGNMENT. Assignor hereby assigns to Assignee and
+Assignee hereby accepts, all of Assignors' right, title and interest under
+the Lease.
+
+ Section 3. ASSUMPTION OF OBLIGATIONS. Assignee hereby assumes all
+obligations of the Assignors under the Lease to the extent arising from and
+after the date hereof,
+
+ Section 4. AMENDMENTS TO THE LEASE. With respect to events occurring on
+ and after the date hereof, all references to "Beneficiaries" contained in the
+ Lease shall be deemed to be references to Lessor.
+
+ Section 5. CONCERNING THE TRUSTEE. Wilmington Trust Company is
+ entering into this Agreement solely in its capacity as Trustee under the Trust
+ Agreement, except as otherwise expressly provided herein, and Wilmington Trust
+ Company (or any successor Trustee) shall not be personally liable for or on
+ account of any of the statements, representations, warranties, covenants or
+ obligations of the Assignor hereunder; provided, however, that Wilmington Trust
+ Company (or any other successor Trustee) shall be liable hereunder for its own
+ gross negligence or willful misconduct or for breach of its covenants or its
+ representations. and warranties to the extent covenanted or made in its
+ individual capacity. extent covenanted or made in its individual capacity.
+
+
+
+
+
+ Section 6. MISCELLANEOUS.
+
+ (a) This Agreement may be signed in one or more counterparts with the
+same effect as if the signatures in each counterpart were upon a single
+document. All counterparts shall be considered an original hereof.
+
+ (b) The terms and provisions hereof shall inure to the benefit of and
+be binding on the parties hereof and their respective successors and assigns.
+
+ (c) This Agreement shall in all respects be governed by and construed
+in accordance with the laws of the State of New York, including all matters
+of construction, validity, and performance.
+
+IN WITNESS WHEREOF, the undersigned have executed this Agreement as of the
+date first above written.
+
+WILMINGTON TRUST COMPANY ITOCHU AIRLEASE LIMITED
+not in its individual capacity but as ("Assignor")
+trustee ("Assignor")
+
+
+By: /s/ Bruce L. Besson By: [Illegible]
+ ------------------------------ -------------------------
+Name: Bruce L. Besson Name:
+ ------------------------------ -------------------------
+Title: V.P. Title: ATTORNEY IN FACT
+ ------------------------------ -------------------------
+
+
+MA-LEASE LIMITED BOEING CAPITAL CORPORATION
+("Assignor") ("Assignor")
+
+By: [Illegible] By: /s/ Stephen J. Novak
+ ------------------------------ -------------------------
+Name: Name: Vice President
+ ------------------------------ -------------------------
+Title: ATTORNEY IN FACT Title: Stephen J. Novak
+ ------------------------------ -------------------------
+
+
+ 2
+
+
+
+
+ CONSENT
+
+ Reference is hereby made to the attached Lease Assignment (capitalized
+ terms used herein without definition are used with the same meaning as in the
+ Lease Assignment). The undersigned hereby consents to the assignment and
+ assumption set forth therein and acknowledges that the Assignors shall, from
+ and after the date hereof, be released from all further obligations under the
+ Lease, except that such release shall not affect any liability arising on or
+ before the effective date of the Lease Assignment.
+
+ The undersigned hereby also agrees that Aloha Airlines, Inc. shall
+ cause each of the Assignors to be added as an additional insured under the
+ liability insurance policies required to be maintained by Aloha Airlines, Inc.,
+ as Lessee, pursuant to the Lease. Aloha Airlines, Inc. agrees that its
+ obligations to maintain such insurance shall continue until the earlier to
+ occur of (i) two years after the date hereof, and (ii) the D check or its
+ equivalent first occurring after the date hereof.
+
+ IN WITNESS WHEREOF, the undersigned has executed this Consent as of
+August 7, 1998.
+
+
+ALOHA AIRLINES, INC.
+
+
+By: /s/ Brenda F. Cutwright
+ ---------------------------------
+Name: Brenda F. Cutwright
+ ---------------------------------
+ Sr. Vice President Finance &
+Title: Planning and CFO
+ ---------------------------------
+
+
+
+By: /s/ James M. King
+ ---------------------------------
+Name: James M. King
+ ---------------------------------
+ Vice President
+Title: Planning & Development
+ ---------------------------------
+
+
+
+ 3
+
+
+
+
+ CONSENT
+
+ Reference Is hereby made to the attached Lease Assignment (capitalized
+ terms used herein without definition are used with the same meaning as in the
+ Lease Assignment). The undersigned hereby consents to the assignment and
+ assumption set forth therein and acknowledges that the Assignors shall, from
+ and after the date hereof, be released from all further obligations under the
+ Lease, except that such release shall not affect any liability arising on or
+ before the effective date of the Lease Assignment.
+
+ The undersigned hereby also agrees that Aloha Airlines, Inc. shall
+ cause each of the Assignors to be added as an additional insured under the
+ liability insurance policies required to be maintained by Aloha Airlines, Inc.,
+ as Lessee, pursuant to the Lease. Aloha Airlines, Inc. agrees that its
+ obligations to maintain such insurance shall continue until the earlier to
+ occur of (i) two years after the date hereof, and (ii) the D check or its
+ equivalent first occurring after the date hereof.
+
+ IN WITNESS WHEREOF, the undersigned has executed this Consent as of
+August 7, 1998.
+
+ALOHA AIRLINES, INC.
+
+
+By: /s/ Brenda F. Cutwright
+ ---------------------------------
+Name: Brenda F. Cutwright
+ ---------------------------------
+ Sr. Vice President Finance &
+Title: Planning and CFO
+ ---------------------------------
+
+
+
+By: /s/ James M. King
+ ---------------------------------
+Name: James M. King
+ ---------------------------------
+ Vice President
+Title: Planning & Development
+ ---------------------------------
+
+
+
+
+ 3
+
+
+
+
+
+ CONSENT
+
+ Reference is hereby made to the attached Lease Assignment (capitalized
+ terms used herein without definition are used with the same meaning as in the
+ Lease Assignment). The undersigned hereby consents to the assignment and
+ assumption set forth therein and acknowledges that the Assignors shall, from
+ and after the date hereof, be released from all further obligations under the
+ Lease, except that such release shall not affect any liability arising on or
+ before the effective date of the Lease Assignment.
+
+ The undersigned hereby also agrees that Aloha Airlines, Inc. shall
+ cause each of the Assignors to be added as an additional insured under the
+ liability insurance policies required to be maintained by Aloha Airlines, Inc.,
+ as Lessee, pursuant to the Lease. Aloha Airlines, Inc. agrees that its
+ obligations to maintain such insurance shall continue until the earlier to
+ occur of (i) two years after the date hereof, and (ii) the D check or its
+ equivalent first occurring after the date hereof.
+
+ IN WITNESS WHEREOF, the undersigned has executed this Consent as of
+August ___,1998.
+
+ ALOHA AIRLINES, INC.
+
+ By:
+ ---------------------------------
+ Name:
+ ---------------------------------
+ Title.
+ ---------------------------------
+
+
+ By:
+ ---------------------------------
+ Name:
+ ---------------------------------
+ Title:
+ ---------------------------------
+
+
+
+
+
+ LEASE EXTENSION AND AMENDMENT AGREEMENT
+
+THIS LEASE EXTENSION AND AMENDMENT AGREEMENT (this "Agreement") dated as of
+November ____, 2001, is by and between BOEING CAPITAL CORPORATION, a Delaware
+corporation ("Lessor") and ALOHA AIRLINES, a Delaware corporation ("Lessee).
+
+ WITNESSETH
+
+WHEREAS, Lessor (as successor to Wilmington Trust Company, not in its individual
+capacity but solely as trustee f/b/o ITOCHU AirLease (Europe) Limited and
+Marubeni Airleasing (U.K) Limited, pursuant to certain Purchase Agreement and
+Assignment Agreement dated June 22, 1998) are parties to Aircraft Lease
+Agreement dated as of November 29, 1995 and filed with the Federal Aviation
+Administration ("FAA") on January 25, 1996 as conveyance no. 11004730 (the
+"Lease") as supplemented by Lease Supplement No. 1 filed with the FAA on
+_________________________ as conveyance no. _________________________, as
+further supplemented by Lease Supplement No. 2 filed with the FAA on April 29,
+1996 as conveyance no. 22011203 (collectively the "Supplement"). The Lease and
+the Lease Supplement shall be collectively referred to as the "Lease Agreement."
+
+WHEREAS, pursuant to the Lease Agreement, Lessee agreed to lease from Lessor one
+(1) Boeing Model 737-25A aircraft bearing manufacturer's serial number 23791
+(the "Aircraft") for a term of ninety-six (96) months;
+
+WHEREAS, Lessee desires to extend the lease Expiration Date from December 1,
+2003 to February 1, 2004 and Lessor is willing to agree to this extension; and
+
+WHEREAS, the parties hereto further desire to amend the Lease Agreement upon the
+terms and conditions hereinafter set forth.
+
+NOW, THEREFORE, in consideration of the mutual agreements contained herein and
+other good and valuable consideration, the receipt and sufficiency of which are
+hereby acknowledged, the parties hereto agree as follows:
+
+SECTION 1. DEFINITIONS
+
+Except as otherwise defined in this Agreement the capitalized terms used herein
+shall have the meanings attributed thereto in the Lease Agreement.
+
+
+ Page 1
+
+
+
+SECTION 2. AMENDMENTS
+
+The Lease Agreement is hereby amended in the following manner:
+
+ 2.1 Section 4.(a)(i) entitled "Basic Rent" is hereby deleted in its
+entirety and the following is inserted in lieu thereof:
+
+ "(i) BASIC RENT: The Basic Rent as set forth on Exhibit C-1 hereto
+ throughout the Term hereof, payable in consecutive installments in arrears
+ on each Basic Rent Payment Date."
+
+ 2.2 Exhibit C-1 definition "Basic Rent" is hereby deleted in its entirety
+and the following is inserted in lieu thereof:
+
+ [This provision has been omitted as confidential information, and
+ is separately filed with the Commission.]
+
+ 2.3 Exhibit C-1 definition "Casualty Value" is hereby deleted in its
+entirety and the following is inserted in lieu thereof:
+
+ [This provision has been omitted as confidential information, and
+ is separately filed with the Commission.]
+
+ 2.4 Exhibit C-1 definition "Security Deposit" is hereby deleted in its
+entirety and the following is inserted in lieu thereof:
+
+ [This provision has been omitted as confidential information, and
+ is separately filed with the Commission.]
+
+ 2.5 Exhibit C-1 definition "Security Letter of Credit" is hereby deleted in
+its entirety.
+
+ 2.6 Exhibit D, Article 3 is hereby deleted in its entirety and revised to
+reflect the new expiration date of February 1, 2004. The following is inserted
+in lieu thereof:
+
+ "3. The term for the Delivered Aircraft shall commence on the Delivery Date
+ and shall end on the Expiration Date, which shall be February 1, 2004."
+
+
+ Page 2
+
+
+
+SECTION 3. TRANSFER OF PAYMENTS
+
+ [This provision has been omitted as confidential information, and
+is separately filed with the Commission.]
+
+
+SECTION 4. REPRESENTATIONS, WARRANTIES AND COVENANTS
+
+ 4.1 REPRESENTATIONS AND WARRANTIES. Lessee hereby repeats the
+representations and warranties in Section 3 of the Lease Agreement as if made
+with reference to the facts and circumstances existing as at the date hereof and
+as if the references in such representations and warranties to this Agreement
+refer not to the Lease but rather to the Lease Agreement as amended by this
+Agreement.
+
+SECTION 5. CONDITIONS PRECEDENT
+
+ This Agreement is subject to the satisfaction of each of the following
+conditions or receipt of documents by Lessor, each in form and substance
+satisfactory to Lessor:
+
+ 5.1 CORPORATE ACTION. Lessee, must have furnished to Lessor satisfactory
+proof that Lessee has taken all corporate action necessary to authorize this
+Agreement.
+
+ 5.2 OPINION OF COUNSEL. Lessor shall have received a legal opinion from
+Lessee's counsel in form and substance satisfactory to Lessor and confirming,
+INTER ALIA, that this Agreement has (i) been duly signed and delivered on behalf
+of Lessee, (ii) that the representations, warranties and covenants of Lessee set
+forth in this Agreement are valid and legally binding obligations of Lessee,
+enforceable in accordance with their respective terms in the jurisdictions in
+which the Aircraft will be registered, operated and located; (iii) this
+Agreement and all other necessary documents have been, or will be, duly filed,
+registered and recorded as necessary with the aviation or other comparable
+authority in all places as may be necessary and all other action has been taken
+to fully protect Lessor's title to the Aircraft against all persons whomsoever
+(specifying in such opinion the location and nature of all such filings,
+registrations or recordings); (iv) upon termination of the Lease Agreement for
+any reason, Lessor will be able to promptly take possession of the Aircraft and
+immediately return the Aircraft to facilities in the United States designated by
+Lessor in a manner satisfactory to Lessor.
+
+ 5.3 APPROVAL OF LESSOR'S COUNSEL. All documents incident to this
+transaction will satisfactory to counsel for Lessor.
+
+ 5.4 ADDITIONAL INFORMATION. Lessee must have furnished Lessor with any
+other opinions, documents, evidence, materials and information as Lessor may
+reasonably request.
+
+
+ Page 3
+
+
+
+ 5.5 ENGLISH. All certificates, opinions, statements, evidence, proof,
+documents, materials and information required under this Section must be
+furnished in English.
+
+ 5.6 INSURANCE. Lessee must have furnished to Lessor certificates of
+insurance evidencing the insurance required under the Lease as extended hereby.
+
+SECTION 6. CONDITIONS SUBSEQUENT
+
+ 6.1 RECORDATION. Immediately following execution of this Agreement by
+Lessor and Lessee, the Agreement will be filed for recordation by Lessee with
+the FAA, and all other necessary action will be taken by Lessee to fully protect
+the title of Lessor in the Aircraft. Lessee agrees that failure to effect such
+filings, registrations and/or recordations as of the date falling 2 weeks after
+execution by the last party to execute this Agreement shall constitute an "Event
+of Default" under the Lease Agreement and Lessor shall have the right to
+exercise all remedies available to it under Section 13 of the Lease Agreement
+with respect to such Event of Default.
+
+SECTION 7. MISCELLANEOUS
+
+ 7.1 HEADINGS. The section and paragraph headings in this Agreement are for
+convenience of reference only and shall not define or limit any of the terms or
+provisions hereof.
+
+ 7.2 CHOICE OF LAW. THIS AGREEMENT SHALL IN ALL RESPECTS BE GOVERNED AND
+CONSTRUED IN ACCORDANCE WITH THE LAWS OF THE STATE OF NEW YORK, INCLUDING ALL
+MATTERS OF CONSTRUCTION, VALIDITY AND PERFORMANCE.
+
+ 7.3 COUNTERPARTS. This Agreement may be executed by the parties hereto in
+separate counterparts, each of which when so executed and delivered shall be an
+original, but all such counterparts shall together constitute but one and the
+same instrument.
+
+ 7.4 SUCCESSORS AND ASSIGNS. This Agreement shall be binding upon and shall
+inure to the benefit of, and shall be enforceable by and against, the parties
+hereto and their respective successors and permitted assigns.
+
+ 7.5 EFFECT OF AGREEMENTS. Except as expressly amended hereby, the Lease
+Agreement is and shall continue in full force and effect.
+
+
+ Page 4
+
+
+
+IN WITNESS WHEREOF, the parties hereto have caused this Agreement to be duly
+executed and delivered by their respective duly authorized officers as of the
+date first above written.
+
+
+ BOEING CAPITAL CORPORATION
+
+ By: /s/ [ILLEGIBLE]
+ -------------------------------
+
+ Its: Vice President
+ -------------------------------
+
+
+ ALOHA AIRLINES, INC.
+
+
+ By:
+ -------------------------------
+
+ Its:
+ -------------------------------
+
+
+ By:
+ -------------------------------
+
+ Its:
+ -------------------------------
+
+
+ Page 5
+
+
+
+IN WITNESS WHEREOF, the parties hereto have caused this Agreement to be duly
+executed and delivered by their respective duly authorized officers as of the
+date first above written.
+
+
+ BOEING CAPITAL CORPORATION
+
+ By:
+ -------------------------------
+
+ Its:
+ -------------------------------
+
+
+ ALOHA AIRLINES, INC.
+
+ By: /s/ Brenda F. Cutwright
+ -------------------------------
+ BRENDA F. CUTWRIGHT
+
+ Its: EXECUTIVE VICE PRESIDENT & CFO
+ -------------------------------
+
+ By: /s/ James M. King
+ -------------------------------
+ JAMES M. KING
+
+ Its: SENIOR VICE PRESIDENT PLANNING
+ AND BUSINESS DEVELOPMENT
+ -------------------------------
+
+
+ Page 5
+
+
+
+IN WITNESS WHEREOF, the parties hereto have caused this Agreement to be duly
+executed and delivered by their respective duly authorized officers as of the
+date first above written.
+
+
+ BOEING CAPITAL CORPORATION
+
+ By: /s/ [ILLEGIBLE]
+ -------------------------------
+
+ Its: Vice President
+ -------------------------------
+
+
+ ALOHA AIRLINES, INC.
+
+
+ By:
+ -------------------------------
+
+ Its:
+ -------------------------------
+
+
+ By:
+ -------------------------------
+
+ Its:
+ -------------------------------
+
+
+ Page 5
+
+
+
\ No newline at end of file
diff --git a/extensions/superboogav2/benchmark_texts/questions.json b/extensions/superboogav2/benchmark_texts/questions.json
new file mode 100644
index 0000000000000000000000000000000000000000..7dad24e07d379181c7f0a8a9fa47fb853a812876
--- /dev/null
+++ b/extensions/superboogav2/benchmark_texts/questions.json
@@ -0,0 +1,291 @@
+[
+ {
+ "text": "extensions/superboogav2/benchmark_texts/aircraft_lease.txt",
+ "questions": [
+ {
+ "question_variants": [
+ "What is a wet lease?",
+ "Agh, I'm really wracking my brain here, but can't figure it out. What is a wet lease?",
+ "I've been trying to wrap my head around this concept and it's just not clicking. Could you elucidate the concept of a wet lease?",
+ "I'm finding it so hard to understand this whole wet lease thing! Would you be so kind as to explicate on the matter of what is known as a wet lease in the domain of aviation?",
+ "I've spent hours trying to grasp this and I'm still lost. Could you engage in a detailed exploration of the intricate and nuanced topic that is the wet lease, as it is commonly understood and applied within the broad and complex ecosystem of aviation?"
+ ],
+ "criteria": [
+ "WET LEASE shall mean any arrangement whereby Lessee agrees to\n operate the Aircraft under a contractual arrangement with a third party\n pursuant to which no rights to any aircraft specifically identified by serial\n number or registration number are granted to such third party and pursuant to\n which the Aircraft (i) remains at all times under the sole and complete\n operational control of Lessee",
+ "(ii) shall be operated solely by cockpit crew\n employed by Lessee possessing all current certificates and licenses required by\n applicable Laws and (iii) shall be maintained by Lessee in accordance with all\n the provisions of the Lease including, but not limited to, Sections 6(d) and 12\n (it being understood and agreed by Lessor and Lessee that any Wet Lease shall,\n by its terms, be in all cases subject and subordinate to this Lease)."
+ ]
+ },
+ {
+ "question_variants": [
+ "What is PBGC?",
+ "I'm stumped! Can you tell me what PBGC is?",
+ "I've been racking my brain trying to understand PBGC. Would you mind explaining the concept to me?",
+ "I've been trying to grasp what PBGC represents in the context of pension benefits, but I'm not getting it. Would it be possible for you to expound upon the matter?",
+ "I'm having trouble understanding the multi-dimensional entity known as 'PBGC'. Could you embark on an exploration of it? How is it traditionally comprehended, interpreted, and implemented within the sphere of pension benefits?"
+ ],
+ "criteria": [
+ "PBGC shall mean the Pension Benefit Guaranty Corporation\n established pursuant to Subtitle A of Part IV of ERISA, and any successor\n thereof."
+ ]
+ },
+ {
+ "question_variants": [
+ "What is LIEN?",
+ "I can't seem to find information on what LIEN is. Can you help me?",
+ "I'm feeling stuck. I can't seem to find any information on what LIEN is. Could you provide some insights?",
+ "It's like I'm chasing my tail here. I've been searching high and low and I just can't seem to find anything that clearly explains what LIEN is. Could you lend me a hand in understanding it?",
+ "I'm in a bit of a pickle. I've scoured the internet, flipped through countless books, and I still can't seem to find any definitive information on what LIEN is. Could you possibly help me get my head around it?"
+ ],
+ "criteria": [
+ "LIEN shall mean any mortgage, pledge, lien, charge,\n encumbrance, lease, exercise of rights, security interest or claim."
+ ]
+ },
+ {
+ "question_variants": [
+ "What happens if the Lease is terminated by operation of law?",
+ "I'm a bit lost here. What happens if the Lease is terminated by operation of law? Can you help me understand?",
+ "I've been trying to figure this out but I'm stuck. What exactly happens if the Lease is terminated by operation of law? Could you explain?",
+ "I've been poring over this and I'm still not clear. What exactly transpires if the Lease is terminated by operation of law? I'd appreciate your help in understanding this.",
+ "I'm really hitting a wall here. I've been trying to understand this, but it feels like I'm reading a foreign language. What's the end result if the Lease is terminated by operation of law? Any help in understanding this, particularly a detailed breakdown, would be greatly appreciated."
+ ],
+ "criteria": [
+ "If for any reason whatsoever this Lease shall be terminated\n in whole or in part by operation of law (other than termination under any\n bankruptcy laws as now or hereafter in effect), Lessee nonetheless agrees to\n pay to Lessor amounts equal to the Rent payments hereunder at the time such\n payments would have become due and payable in accordance with the terms hereof",
+ "had this Lease not been terminated so long as Lessee is able to use, possess\n and quietly enjoy the Aircraft, and so long as such payments are made and all\n other terms and conditions hereof are complied\n\n -16-\n\n\n\n\n\nwith by Lessor and Lessee, Lessor and Lessee will deem this Lease to remain in\nfull force and effect."
+ ]
+ },
+ {
+ "question_variants": [
+ "What happens if a discrepancy or malfunction is detected during the Acceptance Flight?",
+ "I'm having difficulty understanding this part. What exactly happens if a discrepancy or malfunction is detected during the Acceptance Flight? Can you provide a thorough explanation?",
+ "I'm stuck on this one. I'm struggling to comprehend what steps are taken if a discrepancy or malfunction is detected during the Acceptance Flight. Could you possibly explain this in detail?",
+ "I've been poring over this issue for a while, and it's not clicking. What steps are taken or what are the implications if a discrepancy or malfunction is detected during the Acceptance Flight? I'd appreciate a comprehensive explanation.",
+ "I'm really hitting a wall here. I've been trying to understand, but it's like I'm reading a foreign language. What's the end result or the next step if a discrepancy or malfunction is detected during the Acceptance Flight? Any help in understanding this, particularly a detailed breakdown, would be greatly appreciated."
+ ],
+ "criteria": [
+ "If, during the Acceptance Flight, no discrepancy or malfunction is\n detected with respect to the airworthiness or operational nature of\n the Aircraft by normal airline standards, then (i) the delivery of\n the Aircraft from Lessor to Lessee hereunder shall occur, the\n Aircraft shall be accepted by Lessee hereunder whilst the Aircraft\n is located at the Delivery Location, and Lessee shall execute and\n deliver the Lease Supplement, (ii) th",
+ "e Aircraft shall continue en\n route to a location selected by Lessee (the \"Ferry Location\"), under\n the operational control of Lessee from the time of such delivery and\n acceptance (the Acceptance Flight shall terminate at the time of\n such delivery and acceptance, and that portion of the flight from\n the Delivery Location to the Ferry Location is herein called the\n \"Ferry Flight\"), and (iii) Lessee shall bear the costs of the flight\n ",
+ " crew, fuel and other costs\n\n\n -12-\n\n\n\n\n\n relating to the Ferry Flight and shall reimburse the Beneficiaries\n therefor promptly following receipt of the Beneficiaries, invoice\n therefor, PROVIDED, HOWEVER, that, if any discrepancy or malfunction\n is detected with respect to the airworthiness during the Acceptance\n Flight, then, at Lessee's option after consultation with Lessor,\n either ",
+ "(A) the Aircraft shall be delivered to and accepted by Lessee\n at the Delivery Location and shall be ferried to the Ferry Location,\n as provided in clauses (i), (ii) and (iii) above, where Lessee shall\n remedy such discrepancy or malfunction at the cost (without mark up)\n of the Beneficiaries (provided that such subsequent repair or\n maintenance work shall not affect Lessee's acceptance of the\n Aircraft hereunder), or (B) the Aircraft s",
+ "hall return to the\n Beneficiaries' storage and maintenance facility where such\n discrepancy or malfunction shall be corrected at the Beneficiaries'\n expense, in which case the Delivery Date shall be postponed to such\n date as the Beneficiaries shall advise, subject to the second\n paragraph of Section 3(c) below. Any discrepancy or malfunction\n detected of an airworthiness nature shall be corrected by Lessee or\n the Beneficiari",
+ "es in accordance with clause (A) or (B) above, as\n applicable, at Beneficiaries, expense. If during the Acceptance\n Flight a discrepancy or malfunction is detected with respect to the\n operational nature of the Aircraft by normal airline standards but\n no discrepancy or malfunction is detected with respect to the\n airworthiness of the Aircraft, then the Aircraft shall be delivered\n to and accepted by Lessee at the Delivery Location as p",
+ "rovided in\n clause (A) above, and Lessee shall remedy such discrepancy or\n malfunction at the cost (without mark up) of the Beneficiaries.\n\n In anticipation of the occurrence of the Delivery Date, the\n Beneficiaries retained a flight crew to conduct the Acceptance\n Flight and will incur costs relating to such retention. In\n connection therewith, Lessee agrees to reimburse the Beneficiaries,\n promptly following Lessee's receipt",
+ " of an invoice therefor, for\n one-half of the costs incurred by the Beneficiaries in connection\n with retaining such flight crew for the Acceptance Flight, which\n costs relate to the period commencing on and including November 29,\n 1995 and ending on the day immediately preceding the Delivery Date."
+ ]
+ },
+ {
+ "question_variants": [
+ "What condition must the Aircraft meet before being delivered to the Lessee?",
+ "I'm having some trouble understanding this part. Could you please clarify what condition the Aircraft must meet before being delivered to the Lessee? I would appreciate a detailed explanation.",
+ "I'm stuck on this point. I'm finding it difficult to understand the specific condition the Aircraft must be in before being handed over to the Lessee. Could you possibly provide a comprehensive explanation?",
+ "I'm feeling a bit lost here. I'm having trouble understanding the exact condition or standard that the Aircraft must meet before being delivered to the Lessee. Could you provide a detailed walkthrough of the requirements?",
+ "I've hit a bit of a wall with this one. I've been trying my best to understand this, but it's proving to be quite complex. What is the precise condition that the Aircraft must meet before it can be delivered to the Lessee? Any help in understanding this, particularly a detailed explanation, would be of great help."
+ ],
+ "criteria": [
+ "(d) Lessee's obligation to lease the Aircraft hereunder from\nLessor shall also be conditioned upon the Aircraft being delivered to Lessee in\nthe following condition:\n\n (1) The Aircraft shall be airworthy and in good\n operating condition\n\n\n -11-\n\n\n\n\n with all of the Aircraft equipment, components and systems;\n\n (2) The Aircraft shall be clean;\n\n (3) The Airc",
+ "raft shall meet the requirements for\n airworthiness certification by the FAA;\n\n (4) A borescope of the Engines and the inspection of\n the APU in accordance with the Manufacturer's or APU manufacturer's\n recommendation shall have been performed at the direction of Lessee\n but under the control and at the cost of the Beneficiaries, and any\n discrepancies discovered in connection therewith shall have been\n corrected;"
+ ]
+ },
+ {
+ "question_variants": [
+ "What rights does the Lessee waive under section 4(c)?",
+ "Can you tell me about the rights that the Lessee gives up under section 4(c)?",
+ "I'm having some difficulty here, could you please explain to me what rights the Lessee is forfeiting under the terms of section 4(c)?",
+ "I'm really struggling to understand this part, it's quite complex. Could you clarify what rights the Lessee is explicitly waiving as per section 4(c) in this agreement?",
+ "I'm pulling my hair out! What does this even mean? Can you assist me in deciphering what rights the Lessee is giving up or putting aside according to section 4(c)? I'm finding this part particularly challenging to grasp."
+ ],
+ "criteria": [
+ "(c) PROHIBITION AGAINST SETOFF, COUNTERCLAIM, ETC. This Lease\n is a net lease. Subject to Section 20(f), Lessee's obligation to pay all Rent\n hereunder shall be absolute and unconditional and shall not be affected or\n reduced by any circumstance, including, without limitation, (i) any setoff,\n counterclaim, recoupment, defense or other right which Lessee may have against\n Lessor, any Beneficiary, the Manufacturer, the Engine Manufacturer, any seller\n of or person providing services with respect ",
+ "to the Aircraft or any other\n Person, for any reason whatsoever; (ii) any defect in the title, airworthiness\n or eligibility for registration under applicable Law, or any condition, design,\n operation or fitness for use of, or any damage to or loss or destruction of,\n the Aircraft, or any interruption or cessation in the use or possession thereof\n by Lessee for any reason whatsoever, whether arising out of or related to an\n act or omission of Lessee, or any other Person; (iii) any Liens with res",
+ "pect to\n the Aircraft; (iv) the invalidity or unenforceability or lack of due\n authorization or other infirmity of this Lease or any absence of right, power\n or authority of Lessor or Lessee to enter into this Lease; (v) any insolvency,\n bankruptcy, reorganization or similar proceedings by or against Lessor or\n Lessee; (vi) any other circumstance or happening of any nature whatsoever,\n similar to any of the foregoing; or (vii) any Taxes (other Taxes to which\n Lessee's indemnity does not extend p",
+ "ursuant to the provisions of Section 10);\n it being the express intention of Lessor and Lessee that all Rent payable\n hereunder shall be payable in all events, unless the obligation to pay the same\n shall be terminated pursuant to the express provisions of this Lease. Nothing\n in this paragraph (c) shall constitute a waiver by Lessee of any right or claim\n that Lessee may separately assert against Lessor or any Beneficiary.\n\n Lessee hereby waives, to the extent permitted by app",
+ "licable\n Law, any and all rights which it may now have or which at any time hereafter\n may be conferred upon it, by Law or otherwise, to terminate this Lease or any\n obligation imposed upon Lessee hereunder or in relation hereto.\n\n If for any reason whatsoever this Lease shall be terminated\n in whole or in part by operation of law (other than termination under any\n bankruptcy laws as now or hereafter in effect), Lessee nonetheless agrees to\n pay to Lessor amounts equal to the R",
+ "ent payments hereunder at the time such\n payments would have become due and payable in accordance with the terms hereof\n had this Lease not been terminated so long as Lessee is able to use, possess\n and quietly enjoy the Aircraft, and so long as such payments are made and all\n other terms and conditions hereof are complied\n\n -16-\n\n\n\n\n\nwith by Lessor and Lessee, Lessor and Lessee will deem this Lease to remain in\nfull force and effect."
+ ]
+ },
+ {
+ "question_variants": [
+ "Can the Lessor and Beneficiaries conduct inspections without notice under certain conditions? What are those conditions?",
+ "Is it possible for the Lessor and Beneficiaries to carry out inspections without prior notice, given specific circumstances? If so, could you explain what these circumstances might be?",
+ "I'm finding myself a bit confused here. Can the Lessor and Beneficiaries, under any special conditions, perform inspections without providing any advance notice? If yes, what exactly are these special conditions?",
+ "I'm at my wit's end! Can the Lessor and Beneficiaries actually go ahead and conduct inspections without giving a heads up, but only when certain conditions are met? What exactly are these conditions that would allow for such actions?",
+ "I'm really trying to get my head around this, but I could use some assistance. Is it within the Lessor and Beneficiaries' rights to initiate inspections without any forewarning, but only under certain predefined circumstances? What are these circumstances exactly?"
+ ],
+ "criteria": [
+ "Lessee shall permit Lessor, each Beneficiary and their\n respective designees on at least seven (7) days' prior written notice to visit\n and inspect the Aircraft, its condition, use and operation and the records\n maintained in connection therewith during normal business hours; PROVIDED,\n HOWEVER, that this shall not unreasonably interfere with Lessee's quiet use and\n enjoyment of the Aircraft PROVIDED FURTHER, HOWEVER, that Lessor or the\n Beneficiaries may conduct such visit and inspection at any",
+ " time and with or\n without notice if an Event of Default has occurred and is continuing."
+ ]
+ },
+ {
+ "question_variants": [
+ "What aircraft-related information will the Lessee provide on a monthly and annual basis?",
+ "Could you let me know what type of aircraft-related details the Lessee is obligated to provide on a monthly and annual basis?",
+ "I'm finding it a bit tricky to understand this part - could you help me clarify what specific aircraft-related data or information is the Lessee expected to report on both a monthly and an annual basis?",
+ "I'm really trying to grapple with this agreement. Could you assist me in figuring out the exact nature of the aircraft-related information that the Lessee is required to furnish on a consistent monthly and annual basis?",
+ "I'm genuinely struggling here! What does it mean exactly? What is the exact nature and extent of the aircraft-related data or information that the Lessee has to provide routinely, both on a monthly and an annual basis? I'm having a hard time understanding the specificities of this provision."
+ ],
+ "criteria": [
+ "(v) Lessee will use its reasonable efforts to provide the\n Beneficiaries on or before the fifth day of each calendar month\n commencing with the next calendar month of the Delivery Date, and shall\n in any event provide to the Beneficiaries upon request of a Beneficiary,\n with a properly completed Monthly Aircraft Utilization and Status Report\n in the Form of Exhibit J hereto for the preceding calendar month\n operation of the aircraft;\n\n (vi) Lessee ",
+ "will use its reasonable efforts to provide the\n Beneficiaries, on or before the 15th day of January of each year\n (commencing with January 1996), and shall in any event provide Lessor and\n the Beneficiaries upon request of a Beneficiary in English, the\n information and documentation for the preceding calendar year as listed\n in Exhibit K hereto; PROVIDED, HOWEVER, that if (i) a Default or an Event\n of Default shall have occurred and be continuing or (ii) Lessee's\n ",
+ " financial condition changes adversely from its financial condition at the\n time of the Delivery Date, then, upon notice and a request from Lessor or\n a Beneficiary, Lessee shall provide such information on a quarterly basis\n on the 15th day of each January, April, July and October, commencing with\n the first of such dates to follow the date of such notice."
+ ]
+ },
+ {
+ "question_variants": [
+ "Under what conditions can Lessee consolidate, merge, or transfer assets without Lessor's prior written consent according to the text?",
+ "Could you explain under which specific circumstances the Lessee is allowed to consolidate, merge, or transfer assets without needing the Lessor's prior written approval, as stated in the text?",
+ "I'm having a bit of trouble with this section, could you clarify the exact conditions under which the Lessee is permitted to consolidate, merge, or transfer assets without first obtaining the Lessor's written consent, as outlined in the text?",
+ "I'm really wracking my brain here trying to understand the terms. Can you help me decipher under which exact circumstances or conditions the Lessee can execute consolidation, merging, or asset transfer without needing prior written consent from the Lessor, as the text suggests?",
+ "I'm pulling my hair out here! What on earth does it mean? What are the specific conditions or circumstances under which the Lessee can consolidate, merge, or transfer assets without having to acquire the Lessor's prior written consent, as it's described in the text? This is really a tough one to crack!"
+ ],
+ "criteria": [
+ "(iv) CONSOLIDATION, MERGER, ETC. Without the prior written\n consent of Lessor and each Beneficiary, Lessee shall not consolidate with,\n merge with or merge into any other Person or convey, transfer or lease\n substantially all of its assets as an entirety to any other Person unless, upon\n and after giving effect to such transaction, (A) the surviving entity has at\n least the same net worth and gross assets as the Lessee immediately prior to\n such transaction, such surviving entity is Certified Ai",
+ "r Carrier and a \"citizen\n of the United States\" as defined in Section 101(16) of the Federal Aviation\n Act, (C) Lessor shall continue to be entitled to the benefits of Section 1110\n of the United States Bankruptcy Code, as in effect from time to time, and (D)\n each of the Operative Documents shall continue in full force and effect and\n shall constitute the legally binding and enforceable obligation of such\n surviving entity."
+ ]
+ },
+ {
+ "question_variants": [
+ "Who is responsible for replacing any parts on the Aircraft that become worn out, damaged, etc?",
+ "Could you please specify who holds the responsibility for replacing any parts of the Aircraft that may become worn out, damaged, or similarly affected?",
+ "I'm having a little trouble understanding this part. Who exactly is tasked with the responsibility of replacing any components of the Aircraft that may get worn out, damaged, or otherwise impaired?",
+ "I'm really scratching my head trying to figure out who precisely is designated to handle the replacement of any Aircraft parts that become worn out, damaged, or in similar conditions? This aspect seems a bit complicated.",
+ "I'm on the verge of losing it! Who in the world is charged with the duty of replacing any parts of the Aircraft that get worn out, damaged, or anything like that? I'm really finding it tough to get my head around this point."
+ ],
+ "criteria": [
+ "(a) REPLACEMENT OF PARTS. Lessee, at its own cost and\n expense, will promptly replace all Parts which may from time to time become\n worn out, lost, stolen, destroyed, seized, confiscated, damaged beyond repair\n or permanently rendered unfit for use for any reason whatsoever. In addition,\n in the ordinary course of maintenance, service, repair, overhaul or testing,\n Lessee may at its own cost and expense cause to be removed any Parts, whether\n or not worn out, destroyed, damaged beyond repair or ",
+ "permanently rendered unfit\n for use, provided that Lessee shall replace at its own cost and expense such\n Parts as promptly as practicable. All replacement Parts shall be free and clear\n of all Liens, other than Liens permitted by Section 14 hereof, shall be in at\n least the same modification status and service bulletin accomplishment status,\n shall be fully interchangeable as to form, fit and function, shall have been\n overhauled or repaired and inspected by an agency acceptable to the FAA and\n",
+ " shall be in as good an operating condition as, and have a utility at least\n equal to and a value and remaining warranty reasonably approximating, the Parts\n replaced (assuming such replaced Parts were in the condition and repair in\n which they were required to be maintained by the terms hereof) and all\n historical records since new or last overhaul relating to such Parts (and all\n historical records since manufacture with respect to Engines, Landing Gears,\n the APU and all life limited parts in",
+ "stalled on any Engine, Landing Gear or\n APU) shall be maintained by Lessee."
+ ]
+ },
+ {
+ "question_variants": [
+ "Who bears responsibility if alterations, modifications or additions to the Aircraft result in any loss of revenue or grounding?",
+ "Can you clarify who would take responsibility if any alterations, modifications, or additions made to the Aircraft cause any loss of revenue or result in grounding?",
+ "I'm having some difficulty here. Could you please specify who should shoulder the responsibility if any changes, modifications or additions to the Aircraft lead to any form of revenue loss or cause the aircraft to be grounded?",
+ "I'm really trying to understand this, but it's complex. Could you elucidate who is to bear the brunt if alterations, modifications, or additions to the Aircraft culminate in a loss of revenue or result in the grounding of the aircraft?",
+ "I'm pulling my hair out over this! Who on earth would bear the responsibility if any alterations, modifications, or additions that are made to the Aircraft end up causing some form of revenue loss or force the aircraft to be grounded? I'm finding this part particularly challenging to comprehend."
+ ],
+ "criteria": [
+ "In no event shall Lessor bear any liability or cost for any\n alteration, modification or addition to, or for any grounding or suspension of\n certification of, the Aircraft, or for any loss of revenue arising therefrom.\n Lessee shall make no material alterations, modifications or additions to the\n Aircraft (such as removal of seats, galleys, lavatories, major avionics\n equipment or the like) that would affect the marketability of the Aircraft\n without Lessor's and each Beneficiary's prior written",
+ " consent. if Lessor and\n each Beneficiary grant such consent, title to such removed Parts shall remain\n with Lessor and Lessor and the Beneficiaries may request Lessee to reinstall\n such Parts prior to termination of this Lease. If Lessor or Beneficiaries\n request Lessee to reinstall such Parts, title to the Parts removed shall vest\n in Lessee. All costs associated with such removal and reinstallation shall be\n borne by Lessee."
+ ]
+ },
+ {
+ "question_variants": [
+ "Who is the assignor and who is the assignee?",
+ "Can you help me identify who the assignor is and who takes the role of the assignee?",
+ "I'm having some trouble figuring this out. Could you clarify for me who exactly is the assignor and who is designated as the assignee in this context?",
+ "I'm really wrestling with this, it seems a bit tricky. Could you help me to understand who exactly is acting as the assignor and who is being recognized as the assignee in this particular scenario?",
+ "I'm at my wits' end here! What does it mean? Who exactly is playing the role of the assignor and who is being referred to as the assignee in this situation? This is proving to be quite a tough nut to crack!"
+ ],
+ "criteria": [
+ "ASSIGNOR: ALOHA AIRLINES, INC.,\n A HAWAII CORPORATION",
+ "ASSIGNEE: ALOHA AIRLINES, INC., A\n DELAWARE CORPORATION"
+ ]
+ },
+ {
+ "question_variants": [
+ "What does it mean when the Assignee is referred to as a 'Certified Air Carrier'?",
+ "Could you clarify what is implied when the Assignee is labeled as a 'Certified Air Carrier'?",
+ "I'm having a hard time understanding this. Can you explain what the term 'Certified Air Carrier' means when it is applied to the Assignee in this context?",
+ "I'm really struggling here to understand this terminology. Could you assist in explaining what it means when the Assignee is characterized as a 'Certified Air Carrier' in this particular situation?",
+ "I'm almost at the end of my tether! What does this even mean? Can you help me grasp the meaning when the Assignee is designated as a 'Certified Air Carrier'? This particular terminology is really throwing me for a loop!"
+ ],
+ "criteria": [
+ "(e) Assignee is a Certified Air Carrier and holds all\nlicenses, certificates, permits and franchises from the appropriate agencies of\nthe United States of America and/or all other governmental authorities having\njurisdiction which are necessary to authorize the Assignee to engage in air\ntransport and to carry on its business as presently conducted and to be\nconducted with the Aircraft."
+ ]
+ },
+ {
+ "question_variants": [
+ "Why is it important for the Assignee to be a 'citizen of the United States' as defined in 40102(a)(15) of Title 49 of the United States Code?",
+ "Could you help me understand why it's significant for the Assignee to be defined as a 'citizen of the United States' as per 40102(a)(15) of Title 49 of the United States Code?",
+ "I'm finding it a bit challenging to comprehend this part. Why is it crucial for the Assignee to be designated as a 'citizen of the United States', as defined under 40102(a)(15) of Title 49 of the United States Code?",
+ "I'm really trying to unravel this, but it seems quite complex. Could you elucidate why it's so imperative for the Assignee to be identified as a 'citizen of the United States', as per the definition provided in 40102(a)(15) of Title 49 of the United States Code?",
+ "I'm pulling my hair out over this! What does it even mean? Can you help me decipher why it's so essential for the Assignee to be considered a 'citizen of the United States', as stipulated in 40102(a)(15) of Title 49 of the United States Code? I'm finding this legal terminology particularly difficult to grasp."
+ ],
+ "criteria": [
+ "(f) Assignee is a \"citizen of the United States\" as defined\nin 40102(a)(15) of Title 49 of the United States Code."
+ ]
+ },
+ {
+ "question_variants": [
+ "How many days do I have to pay?",
+ "Could you specify the number of days I'm given to complete the payment?",
+ "I'm a bit unsure about the payment deadline. Could you clarify how many days exactly I have to make the payment?",
+ "I'm really trying to understand the payment terms. Could you help me ascertain the exact number of days that I am allotted to finalize the payment?",
+ "I'm so confused! What does this mean exactly? Can you help me comprehend the specific amount of time, in days, that I have been provided with to conclude the payment? I'm finding this financial term quite challenging to understand."
+ ],
+ "criteria": [
+ "(e) TIMING OF PAYMENT. Any amount due and payable to the\n relevant Indemnitee pursuant to this Section 10 will be paid within 10 days\n after receipt of a written demand therefor from such Indemnitee accompanied by\n a written statement describing in reasonable detail the basis for such\n indemnity and the computation of the amount so payable; PROVIDED, HOWEVER, that\n such amount need not be paid by Lessee prior to the later of (i) five days\n prior to the date the applicable Tax is payable to the a",
+ "ppropriate Governmental\n Entity or taxing authority or (ii) in the case of amounts which are being\n contested by Lessee in good faith or by Lessor pursuant to Section 10(f), the\n date such contest is finally resolved. If requested in writing by Lessee, and\n at Lessee's sole cost and expense, any calculations by an Indemnitee of any\n amount due and payable\n\n -44-\n\n\n\n\n\n hereunder shall be subject to review and verification by a firm of independent\n certif",
+ "ied public accounts of internationally recognized stature selected by\n such Indemnitee and reasonably acceptable to Lessee (such approval not to be\n unreasonably withheld or delayed). Such Indemnitee shall make available to such\n accounting firm such information as shall be necessary for purposes of such\n review and verification (but such information shall be held by such accounting\n firm in strictest confidence and shall not in any event be disclosed or made\n available to Lessee). If the result",
+ " of such review is that Lessee was liable\n for a smaller amount, the excess payment shall be returned by such Indemnitee\n forthwith."
+ ]
+ },
+ {
+ "question_variants": [
+ "What currency should I pay in?",
+ "Could you please clarify in which currency I am expected to make the payment?",
+ "I'm a bit puzzled here, could you specify the exact currency I should use for the payment?",
+ "I'm really scratching my head trying to figure this out. Could you help me understand in which specific currency I am supposed to settle the payment?",
+ "I'm quite frustrated at this point! What exactly does it mean? Can you elucidate in which particular currency I'm required to execute the payment? I'm finding this point a bit difficult to decipher."
+ ],
+ "criteria": [
+ "(i) PAYMENTS IN U.S. DOLLARS. All amounts to be paid hereunder to\nLessor or Lessee shall be paid in Dollars, in immediately available funds.\nLessee acknowledges that the specification of Dollars in this transaction is\nof the essence and that Dollars shall be the currency of account in any and\nall events. The obligations of Lessee or Lessor hereunder, to Lessor or\nLessee, respectively, shall not be discharged by an amount paid in another\ncurrency, whether pursuant to a judgment or otherwise, to t",
+ "he extent that the\n amount so paid on prompt conversion to Dollars under normal banking\nprocedures does not yield the amount of Dollars owing to Lessor."
+ ]
+ },
+ {
+ "question_variants": [
+ "What is the US registration number of the aircraft?",
+ "Could you please tell me the US registration number assigned to the aircraft?",
+ "I'm having some difficulty here. Could you specify the exact US registration number of the aircraft?",
+ "I'm really struggling to get this part. Could you assist me in figuring out what the specific US registration number for the aircraft is?",
+ "I'm pulling my hair out over this! What does it mean exactly? Can you help me decipher the precise US registration number that's associated with the aircraft? I'm finding it a bit challenging to understand."
+ ],
+ "criteria": [
+ "U.S.\n MODEL AND REGISTRATION MANUFACTURER'S\nITEM MANUFACTURER CONFIGURATION NUMBER SERIAL NUMBER\n-------------------------------------------------------------------------------------------------------------------\n\n\nAircraft The Boeing Compa",
+ "ny 737-25A N685MA*"
+ ]
+ },
+ {
+ "question_variants": [
+ "What is the maximum duration that a safety or maintenance requirement can remain unaddressed on the aircraft, particularly in terms of airworthiness directives and mandatory orders?",
+ "How long can a safety or maintenance requirement, especially airworthiness directives and mandatory orders, be left unresolved?",
+ "How long can an airworthiness directive or mandatory order remain outstanding on the aircraft according to standard lease agreements?",
+ "What's the longest period that a safety or maintenance requirement, such as airworthiness directives and mandatory orders, can remain unmet on a leased aircraft?",
+ "What is the maximum allowable timeframe for a safety or maintenance requirement to be left unattended to on an aircraft, specifically referring to airworthiness directives and mandatory orders?"
+ ],
+ "criteria": [
+ "(i) have had all repetitive airworthiness directives and mandatory\n orders and regulations in at least half-life or better condition;"
+ ]
+ },
+ {
+ "question_variants": [
+ "What are the payment locations?",
+ "Could you specify where exactly I should be making the payments? Are there particular bank accounts or locations?",
+ "I'm a bit puzzled here. Could you clarify the exact payment locations or bank accounts where I'm supposed to deposit the payments?",
+ "I'm really struggling to grasp this. Could you assist me in understanding the specific payment locations or bank accounts where I'm expected to send the payments?",
+ "I'm at my wit's end here! What does this mean? Can you help me figure out the precise locations or bank accounts where I'm supposed to carry out the payments? I'm finding this financial aspect particularly hard to comprehend."
+ ],
+ "criteria": [
+ "Payment Locations: For ITOCHU AirLease (Europe) Limited:\n\n Account Name: Citibank New York (ABA No.\n 021000089) for the account of Citibank\n Dublin (account no. 10994598) in favor of\n ITOCHU AirLease (Europe) Limited (account\n no. 1-00-6793-017)\n\n For Marubeni Airleasing (",
+ "U.K.) Limited:\n\n Harris Bank International Corporation\n ABA #026-007-760\n for the credit of\n The Mitsubishi Trust & Banking Corporation\n London Branch A/C#16011100\n UID No.107280\n for further credit to\n Marubeni Airleasi",
+ "ng (UK) Ltd.\n Account #020-404391\n\n With respect to payments by\n Lessee of Basic Rent hereunder, 62.682% of\n such amounts shall be paid to ITOCHU\n AirLease (Europe) Limited (as above\n provided) and 37.318% of such amounts\n shall be paid to Marubeni Airlea",
+ "sing (U.K.)\n Limited (as above provided)."
+ ]
+ },
+ {
+ "question_variants": [
+ "What is the revision number of the aircraft?",
+ "Could you please clarify what the revision number of the aircraft is?",
+ "I'm finding this a bit hard to grasp. Could you specify the exact revision number associated with the aircraft?",
+ "I'm really trying to understand this, but it's proving difficult. Could you assist me in determining the specific revision number that is attributed to the aircraft?",
+ "Agh! What does it even mean? Can you help me decipher the exact revision number that is tied to the aircraft? I'm finding this technical detail quite challenging to comprehend."
+ ],
+ "criteria": [
+ "Detail Specification (737-25A-Rev. B)"
+ ]
+ }
+ ]
+ }
+]
\ No newline at end of file
diff --git a/extensions/superboogav2/chat_handler.py b/extensions/superboogav2/chat_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..215f7bdbc762ed759f1054d2cc56e26bdd358cf9
--- /dev/null
+++ b/extensions/superboogav2/chat_handler.py
@@ -0,0 +1,138 @@
+"""
+This module is responsible for modifying the chat prompt and history.
+"""
+import json
+import re
+
+import extensions.superboogav2.parameters as parameters
+
+from modules import chat
+from modules.text_generation import get_encoded_length
+from modules.logging_colors import logger
+from extensions.superboogav2.utils import create_context_text, create_metadata_source
+
+from .data_processor import process_and_add_to_collector
+from .chromadb import ChromaCollector
+
+
+CHAT_METADATA = create_metadata_source('automatic-chat-insert')
+
+INSTRUCT_MODE = 'instruct'
+CHAT_INSTRUCT_MODE = 'chat-instruct'
+
+
+def _is_instruct_mode(state: dict):
+ mode = state.get('mode')
+ return mode == INSTRUCT_MODE or mode == CHAT_INSTRUCT_MODE
+
+
+def _remove_tag_if_necessary(user_input: str):
+ if not parameters.get_is_manual():
+ return user_input
+
+ return re.sub(r'^\s*!c\s*|\s*!c\s*$', '', user_input)
+
+
+def _should_query(input: str):
+ if not parameters.get_is_manual():
+ return True
+
+ if re.search(r'^\s*!c|!c\s*$', input, re.MULTILINE):
+ return True
+
+ return False
+
+
+def _format_single_exchange(name, text):
+ if re.search(r':\s*$', name):
+ return '{} {}\n'.format(name, text)
+ else:
+ return '{}: {}\n'.format(name, text)
+
+
+def _get_names(state: dict):
+ if _is_instruct_mode(state):
+ user_name = state['name1_instruct']
+ bot_name = state['name2_instruct']
+ else:
+ user_name = state['name1']
+ bot_name = state['name2']
+
+ if not user_name:
+ user_name = 'User'
+ if not bot_name:
+ bot_name = 'Assistant'
+
+ return user_name, bot_name
+
+
+def _concatinate_history(history: dict, state: dict):
+ full_history_text = ''
+ user_name, bot_name = _get_names(state)
+
+ # Grab the internal history.
+ internal_history = history['internal']
+ assert isinstance(internal_history, list)
+
+ # Iterate through the history.
+ for exchange in internal_history:
+ assert isinstance(exchange, list)
+
+ if len(exchange) >= 1:
+ full_history_text += _format_single_exchange(user_name, exchange[0])
+ if len(exchange) >= 2:
+ full_history_text += _format_single_exchange(bot_name, exchange[1])
+
+ return full_history_text[:-1] # Remove the last new line.
+
+
+def _hijack_last(context_text: str, history: dict, max_len: int, state: dict):
+ num_context_tokens = get_encoded_length(context_text)
+
+ names = _get_names(state)[::-1]
+
+ history_tokens = 0
+ replace_position = None
+ for i, messages in enumerate(reversed(history['internal'])):
+ for j, message in enumerate(reversed(messages)):
+ num_message_tokens = get_encoded_length(_format_single_exchange(names[j], message))
+
+ # TODO: This is an extremely naive solution. A more robust implementation must be made.
+ if history_tokens + num_context_tokens <= max_len:
+ # This message can be replaced
+ replace_position = (i, j)
+
+ history_tokens += num_message_tokens
+
+ if replace_position is None:
+ logger.warn("The provided context_text is too long to replace any message in the history.")
+ else:
+ # replace the message at replace_position with context_text
+ i, j = replace_position
+ history['internal'][-i-1][-j-1] = context_text
+
+
+def custom_generate_chat_prompt_internal(user_input: str, state: dict, collector: ChromaCollector, **kwargs):
+ if parameters.get_add_chat_to_data():
+ # Get the whole history as one string
+ history_as_text = _concatinate_history(kwargs['history'], state)
+
+ if history_as_text:
+ # Delete all documents that were auto-inserted
+ collector.delete(ids_to_delete=None, where=CHAT_METADATA)
+ # Insert the processed history
+ process_and_add_to_collector(history_as_text, collector, False, CHAT_METADATA)
+
+ if _should_query(user_input):
+ user_input = _remove_tag_if_necessary(user_input)
+ results = collector.get_sorted_by_dist(user_input, n_results=parameters.get_chunk_count(), max_token_count=int(parameters.get_max_token_count()))
+
+ # Check if the strategy is to modify the last message. If so, prepend or append to the user query.
+ if parameters.get_injection_strategy() == parameters.APPEND_TO_LAST:
+ user_input = user_input + create_context_text(results)
+ elif parameters.get_injection_strategy() == parameters.PREPEND_TO_LAST:
+ user_input = create_context_text(results) + user_input
+ elif parameters.get_injection_strategy() == parameters.HIJACK_LAST_IN_CONTEXT:
+ _hijack_last(create_context_text(results), kwargs['history'], state['truncation_length'], state)
+
+ return chat.generate_chat_prompt(user_input, state, **kwargs)
diff --git a/extensions/superboogav2/chromadb.py b/extensions/superboogav2/chromadb.py
new file mode 100644
index 0000000000000000000000000000000000000000..0da2d8f90c623b43ecd49b3dcf20919b8e2a1434
--- /dev/null
+++ b/extensions/superboogav2/chromadb.py
@@ -0,0 +1,376 @@
+import threading
+import chromadb
+import posthog
+import torch
+import math
+
+import numpy as np
+import extensions.superboogav2.parameters as parameters
+
+from chromadb.config import Settings
+from sentence_transformers import SentenceTransformer
+
+from modules.logging_colors import logger
+from modules.text_generation import encode, decode
+
+logger.debug('Intercepting all calls to posthog.')
+posthog.capture = lambda *args, **kwargs: None
+
+
+class Collecter():
+ def __init__(self):
+ pass
+
+ def add(self, texts: list[str], texts_with_context: list[str], starting_indices: list[int]):
+ pass
+
+ def get(self, search_strings: list[str], n_results: int) -> list[str]:
+ pass
+
+ def clear(self):
+ pass
+
+
+class Embedder():
+ def __init__(self):
+ pass
+
+ def embed(self, text: str) -> list[torch.Tensor]:
+ pass
+
+class Info:
+ def __init__(self, start_index, text_with_context, distance, id):
+ self.text_with_context = text_with_context
+ self.start_index = start_index
+ self.distance = distance
+ self.id = id
+
+ def calculate_distance(self, other_info):
+ if parameters.get_new_dist_strategy() == parameters.DIST_MIN_STRATEGY:
+ # Min
+ return min(self.distance, other_info.distance)
+ elif parameters.get_new_dist_strategy() == parameters.DIST_HARMONIC_STRATEGY:
+ # Harmonic mean
+ return 2 * (self.distance * other_info.distance) / (self.distance + other_info.distance)
+ elif parameters.get_new_dist_strategy() == parameters.DIST_GEOMETRIC_STRATEGY:
+ # Geometric mean
+ return (self.distance * other_info.distance) ** 0.5
+ elif parameters.get_new_dist_strategy() == parameters.DIST_ARITHMETIC_STRATEGY:
+ # Arithmetic mean
+ return (self.distance + other_info.distance) / 2
+ else: # Min is default
+ return min(self.distance, other_info.distance)
+
+ def merge_with(self, other_info):
+ s1 = self.text_with_context
+ s2 = other_info.text_with_context
+ s1_start = self.start_index
+ s2_start = other_info.start_index
+
+ new_dist = self.calculate_distance(other_info)
+
+ if self.should_merge(s1, s2, s1_start, s2_start):
+ if s1_start <= s2_start:
+ if s1_start + len(s1) >= s2_start + len(s2): # if s1 completely covers s2
+ return Info(s1_start, s1, new_dist, self.id)
+ else:
+ overlap = max(0, s1_start + len(s1) - s2_start)
+ return Info(s1_start, s1 + s2[overlap:], new_dist, self.id)
+ else:
+ if s2_start + len(s2) >= s1_start + len(s1): # if s2 completely covers s1
+ return Info(s2_start, s2, new_dist, other_info.id)
+ else:
+ overlap = max(0, s2_start + len(s2) - s1_start)
+ return Info(s2_start, s2 + s1[overlap:], new_dist, other_info.id)
+
+ return None
+
+ @staticmethod
+ def should_merge(s1, s2, s1_start, s2_start):
+ # Check if s1 and s2 are adjacent or overlapping
+ s1_end = s1_start + len(s1)
+ s2_end = s2_start + len(s2)
+
+ return not (s1_end < s2_start or s2_end < s1_start)
+
+class ChromaCollector(Collecter):
+ def __init__(self, embedder: Embedder):
+ super().__init__()
+ self.chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
+ self.embedder = embedder
+ self.collection = self.chroma_client.create_collection(name="context", embedding_function=self.embedder.embed)
+ self.ids = []
+ self.id_to_info = {}
+ self.embeddings_cache = {}
+ self.lock = threading.Lock() # Locking so the server doesn't break.
+
+ def add(self, texts: list[str], texts_with_context: list[str], starting_indices: list[int], metadatas: list[dict] = None):
+ with self.lock:
+ assert metadatas is None or len(metadatas) == len(texts), "metadatas must be None or have the same length as texts"
+
+ if len(texts) == 0:
+ return
+
+ new_ids = self._get_new_ids(len(texts))
+
+ (existing_texts, existing_embeddings, existing_ids, existing_metas), \
+ (non_existing_texts, non_existing_ids, non_existing_metas) = self._split_texts_by_cache_hit(texts, new_ids, metadatas)
+
+ # If there are any already existing texts, add them all at once.
+ if existing_texts:
+ logger.info(f'Adding {len(existing_embeddings)} cached embeddings.')
+ args = {'embeddings': existing_embeddings, 'documents': existing_texts, 'ids': existing_ids}
+ if metadatas is not None:
+ args['metadatas'] = existing_metas
+ self.collection.add(**args)
+
+ # If there are any non-existing texts, compute their embeddings all at once. Each call to embed has significant overhead.
+ if non_existing_texts:
+ non_existing_embeddings = self.embedder.embed(non_existing_texts).tolist()
+ for text, embedding in zip(non_existing_texts, non_existing_embeddings):
+ self.embeddings_cache[text] = embedding
+
+ logger.info(f'Adding {len(non_existing_embeddings)} new embeddings.')
+ args = {'embeddings': non_existing_embeddings, 'documents': non_existing_texts, 'ids': non_existing_ids}
+ if metadatas is not None:
+ args['metadatas'] = non_existing_metas
+ self.collection.add(**args)
+
+ # Create a dictionary that maps each ID to its context and starting index
+ new_info = {
+ id_: {'text_with_context': context, 'start_index': start_index}
+ for id_, context, start_index in zip(new_ids, texts_with_context, starting_indices)
+ }
+
+ self.id_to_info.update(new_info)
+ self.ids.extend(new_ids)
+
+
+ def _split_texts_by_cache_hit(self, texts: list[str], new_ids: list[str], metadatas: list[dict]):
+ existing_texts, non_existing_texts = [], []
+ existing_embeddings = []
+ existing_ids, non_existing_ids = [], []
+ existing_metas, non_existing_metas = [], []
+
+ for i, text in enumerate(texts):
+ id_ = new_ids[i]
+ metadata = metadatas[i] if metadatas is not None else None
+ embedding = self.embeddings_cache.get(text)
+ if embedding:
+ existing_texts.append(text)
+ existing_embeddings.append(embedding)
+ existing_ids.append(id_)
+ existing_metas.append(metadata)
+ else:
+ non_existing_texts.append(text)
+ non_existing_ids.append(id_)
+ non_existing_metas.append(metadata)
+
+ return (existing_texts, existing_embeddings, existing_ids, existing_metas), \
+ (non_existing_texts, non_existing_ids, non_existing_metas)
+
+
+ def _get_new_ids(self, num_new_ids: int):
+ if self.ids:
+ max_existing_id = max(int(id_) for id_ in self.ids)
+ else:
+ max_existing_id = -1
+
+ return [str(i + max_existing_id + 1) for i in range(num_new_ids)]
+
+
+ def _find_min_max_start_index(self):
+ max_index, min_index = 0, float('inf')
+ for _, val in self.id_to_info.items():
+ if val['start_index'] > max_index:
+ max_index = val['start_index']
+ if val['start_index'] < min_index:
+ min_index = val['start_index']
+ return min_index, max_index
+
+
+ # NB: Does not make sense to weigh excerpts from different documents.
+ # But let's say that's the user's problem. Perfect world scenario:
+ # Apply time weighing to different documents. For each document, then, add
+ # separate time weighing.
+ def _apply_sigmoid_time_weighing(self, infos: list[Info], document_len: int, time_steepness: float, time_power: float):
+ sigmoid = lambda x: 1 / (1 + np.exp(-x))
+
+ weights = sigmoid(time_steepness * np.linspace(-10, 10, document_len))
+
+ # Scale to [0,time_power] and shift it up to [1-time_power, 1]
+ weights = weights - min(weights)
+ weights = weights * (time_power / max(weights))
+ weights = weights + (1 - time_power)
+
+ # Reverse the weights
+ weights = weights[::-1]
+
+ for info in infos:
+ index = info.start_index
+ info.distance *= weights[index]
+
+
+ def _filter_outliers_by_median_distance(self, infos: list[Info], significant_level: float):
+ # Ensure there are infos to filter
+ if not infos:
+ return []
+
+ # Find info with minimum distance
+ min_info = min(infos, key=lambda x: x.distance)
+
+ # Calculate median distance among infos
+ median_distance = np.median([inf.distance for inf in infos])
+
+ # Filter out infos that have a distance significantly greater than the median
+ filtered_infos = [inf for inf in infos if inf.distance <= significant_level * median_distance]
+
+ # Always include the info with minimum distance
+ if min_info not in filtered_infos:
+ filtered_infos.append(min_info)
+
+ return filtered_infos
+
+
+ def _merge_infos(self, infos: list[Info]):
+ merged_infos = []
+ current_info = infos[0]
+
+ for next_info in infos[1:]:
+ merged = current_info.merge_with(next_info)
+ if merged is not None:
+ current_info = merged
+ else:
+ merged_infos.append(current_info)
+ current_info = next_info
+
+ merged_infos.append(current_info)
+ return merged_infos
+
+
+ # Main function for retrieving chunks by distance. It performs merging, time weighing, and mean filtering.
+ def _get_documents_ids_distances(self, search_strings: list[str], n_results: int):
+ n_results = min(len(self.ids), n_results)
+ if n_results == 0:
+ return [], [], []
+
+ if isinstance(search_strings, str):
+ search_strings = [search_strings]
+
+ infos = []
+ min_start_index, max_start_index = self._find_min_max_start_index()
+
+ for search_string in search_strings:
+ result = self.collection.query(query_texts=search_string, n_results=math.ceil(n_results / len(search_strings)), include=['distances'])
+ curr_infos = [Info(start_index=self.id_to_info[id]['start_index'],
+ text_with_context=self.id_to_info[id]['text_with_context'],
+ distance=distance, id=id)
+ for id, distance in zip(result['ids'][0], result['distances'][0])]
+
+ self._apply_sigmoid_time_weighing(infos=curr_infos, document_len=max_start_index - min_start_index + 1, time_steepness=parameters.get_time_steepness(), time_power=parameters.get_time_power())
+ curr_infos = self._filter_outliers_by_median_distance(curr_infos, parameters.get_significant_level())
+ infos.extend(curr_infos)
+
+ infos.sort(key=lambda x: x.start_index)
+ infos = self._merge_infos(infos)
+
+ texts_with_context = [inf.text_with_context for inf in infos]
+ ids = [inf.id for inf in infos]
+ distances = [inf.distance for inf in infos]
+
+ return texts_with_context, ids, distances
+
+
+ # Get chunks by similarity
+ def get(self, search_strings: list[str], n_results: int) -> list[str]:
+ with self.lock:
+ documents, _, _ = self._get_documents_ids_distances(search_strings, n_results)
+ return documents
+
+
+ # Get ids by similarity
+ def get_ids(self, search_strings: list[str], n_results: int) -> list[str]:
+ with self.lock:
+ _, ids, _ = self._get_documents_ids_distances(search_strings, n_results)
+ return ids
+
+
+ # Cutoff token count
+ def _get_documents_up_to_token_count(self, documents: list[str], max_token_count: int):
+ # TODO: Move to caller; We add delimiters there which might go over the limit.
+ current_token_count = 0
+ return_documents = []
+
+ for doc in documents:
+ doc_tokens = encode(doc)[0]
+ doc_token_count = len(doc_tokens)
+ if current_token_count + doc_token_count > max_token_count:
+ # If adding this document would exceed the max token count,
+ # truncate the document to fit within the limit.
+ remaining_tokens = max_token_count - current_token_count
+
+ truncated_doc = decode(doc_tokens[:remaining_tokens], skip_special_tokens=True)
+ return_documents.append(truncated_doc)
+ break
+ else:
+ return_documents.append(doc)
+ current_token_count += doc_token_count
+
+ return return_documents
+
+
+ # Get chunks by similarity and then sort by ids
+ def get_sorted_by_ids(self, search_strings: list[str], n_results: int, max_token_count: int) -> list[str]:
+ with self.lock:
+ documents, ids, _ = self._get_documents_ids_distances(search_strings, n_results)
+ sorted_docs = [x for _, x in sorted(zip(ids, documents))]
+
+ return self._get_documents_up_to_token_count(sorted_docs, max_token_count)
+
+
+ # Get chunks by similarity and then sort by distance (lowest distance is last).
+ def get_sorted_by_dist(self, search_strings: list[str], n_results: int, max_token_count: int) -> list[str]:
+ with self.lock:
+ documents, _, distances = self._get_documents_ids_distances(search_strings, n_results)
+ sorted_docs = [doc for doc, _ in sorted(zip(documents, distances), key=lambda x: x[1])] # sorted lowest -> highest
+
+ # If a document is truncated or competely skipped, it would be with high distance.
+ return_documents = self._get_documents_up_to_token_count(sorted_docs, max_token_count)
+ return_documents.reverse() # highest -> lowest
+
+ return return_documents
+
+
+ def delete(self, ids_to_delete: list[str], where: dict):
+ with self.lock:
+ ids_to_delete = self.collection.get(ids=ids_to_delete, where=where)['ids']
+ self.collection.delete(ids=ids_to_delete, where=where)
+
+ # Remove the deleted ids from self.ids and self.id_to_info
+ ids_set = set(ids_to_delete)
+ self.ids = [id_ for id_ in self.ids if id_ not in ids_set]
+ for id_ in ids_to_delete:
+ self.id_to_info.pop(id_, None)
+
+ logger.info(f'Successfully deleted {len(ids_to_delete)} records from chromaDB.')
+
+
+ def clear(self):
+ with self.lock:
+ self.chroma_client.reset()
+ self.collection = self.chroma_client.create_collection("context", embedding_function=self.embedder.embed)
+ self.ids = []
+ self.id_to_info = {}
+
+ logger.info('Successfully cleared all records and reset chromaDB.')
+
+
+class SentenceTransformerEmbedder(Embedder):
+ def __init__(self) -> None:
+ logger.debug('Creating Sentence Embedder...')
+ self.model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+ self.embed = self.model.encode
+
+
+def make_collector():
+ return ChromaCollector(SentenceTransformerEmbedder())
\ No newline at end of file
diff --git a/extensions/superboogav2/config.json b/extensions/superboogav2/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0f1034f52155f7ed45e98518da1ae00621e6d15b
--- /dev/null
+++ b/extensions/superboogav2/config.json
@@ -0,0 +1,161 @@
+{
+ "to_lower": {
+ "default": false,
+ "categories": [true, false],
+ "should_optimize": true
+ },
+ "num_conversion": {
+ "default": null,
+ "categories": ["NUM_TO_WORD_METHOD", "NUM_TO_CHAR_METHOD", "NUM_TO_CHAR_LONG_METHOD", null],
+ "should_optimize": true
+ },
+ "merge_spaces": {
+ "default": false,
+ "categories": [true, false],
+ "should_optimize": true
+ },
+ "strip": {
+ "default": true,
+ "categories": [true, false],
+ "should_optimize": false
+ },
+ "remove_punctuation": {
+ "default": true,
+ "categories": [true, false],
+ "should_optimize": true
+ },
+ "remove_stopwords": {
+ "default": false,
+ "categories": [true, false],
+ "should_optimize": true
+ },
+ "remove_specific_pos": {
+ "default": false,
+ "categories": [true, false],
+ "should_optimize": true
+ },
+ "lemmatize": {
+ "default": true,
+ "categories": [true, false],
+ "should_optimize": true
+ },
+ "min_num_sent": {
+ "default": 1,
+ "categories": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 999999],
+ "should_optimize": true
+ },
+ "delta_start": {
+ "default": 0,
+ "categories": [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
+ "should_optimize": true
+ },
+ "chunk_len1": {
+ "default": 500,
+ "categories": [50, 200, 250, 500, 600, 900, 1000],
+ "should_optimize": true
+ },
+ "chunk_len2": {
+ "default": 500,
+ "categories": [0, 50, 200, 250, 500, 600, 900],
+ "should_optimize": true
+ },
+ "chunk_len3": {
+ "default": 1000,
+ "categories": [0, 100, 150, 300, 400, 700, 800, 1000],
+ "should_optimize": true
+ },
+ "chunk_len4": {
+ "default": 700,
+ "categories": [0, 100, 150, 300, 400, 700, 800],
+ "should_optimize": true
+ },
+ "chunk_len_mask": {
+ "default": 15,
+ "categories": [3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15],
+ "should_optimize": false
+ },
+ "context_len_left": {
+ "default": 250,
+ "categories": [50, 100, 150, 200, 250, 300, 350, 400, 500, 600, 700, 800, 900, 1000],
+ "should_optimize": true
+ },
+ "context_len_right": {
+ "default": 800,
+ "categories": [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1500, 1600],
+ "should_optimize": true
+ },
+ "new_dist_strategy": {
+ "default": "DIST_MIN_STRATEGY",
+ "categories": ["DIST_MIN_STRATEGY", "DIST_HARMONIC_STRATEGY", "DIST_GEOMETRIC_STRATEGY", "DIST_ARITHMETIC_STRATEGY"],
+ "should_optimize": false
+ },
+ "chunk_count": {
+ "default": 250,
+ "categories": [30, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400],
+ "should_optimize": true
+ },
+ "min_num_length": {
+ "default": 9,
+ "categories": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ "should_optimize": true
+ },
+ "significant_level": {
+ "default": 1.0,
+ "categories": [0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 999999],
+ "should_optimize": true
+ },
+ "time_steepness": {
+ "default": 0.01,
+ "categories": [0.01, 0.2, 0.4, 0.6, 0.8, 1.0],
+ "should_optimize": false
+ },
+ "time_power": {
+ "default": 0,
+ "categories": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
+ "should_optimize": false
+ },
+ "chunk_separator": {
+ "default": ""
+ },
+ "prefix": {
+ "default": "<>\n\n"
+ },
+ "data_separator": {
+ "default": "\n\n<>\n\n"
+ },
+ "postfix": {
+ "default": "\n\n<>\n\n"
+ },
+ "manual": {
+ "default": true
+ },
+ "add_chat_to_data": {
+ "default": true
+ },
+ "injection_strategy": {
+ "default": "PREPEND_TO_LAST",
+ "categories": ["PREPEND_TO_LAST", "APPEND_TO_LAST", "HIJACK_LAST_IN_CONTEXT"]
+ },
+ "chunk_regex": {
+ "default": "(?<==== ).*?(?= ===)|User story: \\d+"
+ },
+ "strong_cleanup": {
+ "default": false
+ },
+ "max_token_count": {
+ "default": 3072
+ },
+ "threads": {
+ "default": 4
+ },
+ "optimization_steps": {
+ "default": 100
+ },
+ "api_port": {
+ "default": 5002
+ },
+ "api_on": {
+ "default": false
+ }
+ }
+
\ No newline at end of file
diff --git a/extensions/superboogav2/data_preprocessor.py b/extensions/superboogav2/data_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbd14b6b0c9cfbbaedc2ebbcbe5883ed6253c5f9
--- /dev/null
+++ b/extensions/superboogav2/data_preprocessor.py
@@ -0,0 +1,199 @@
+"""
+This module contains utils for preprocessing the text before converting it to embeddings.
+
+- TextPreprocessorBuilder preprocesses individual strings.
+ * lowering cases
+ * converting numbers to words or characters
+ * merging and stripping spaces
+ * removing punctuation
+ * removing stop words
+ * lemmatizing
+ * removing specific parts of speech (adverbs and interjections)
+- TextSummarizer extracts the most important sentences from a long string using text-ranking.
+"""
+import pytextrank
+import string
+import spacy
+import math
+import nltk
+import re
+
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from num2words import num2words
+
+
+class TextPreprocessorBuilder:
+ # Define class variables as None initially
+ _stop_words = set(stopwords.words('english'))
+ _lemmatizer = WordNetLemmatizer()
+
+ # Some of the functions are expensive. We cache the results.
+ _lemmatizer_cache = {}
+ _pos_remove_cache = {}
+
+
+ def __init__(self, text: str):
+ self.text = text
+
+
+ def to_lower(self):
+ # Match both words and non-word characters
+ tokens = re.findall(r'\b\w+\b|\W+', self.text)
+ for i, token in enumerate(tokens):
+ # Check if token is a word
+ if re.match(r'^\w+$', token):
+ # Check if token is not an abbreviation or constant
+ if not re.match(r'^[A-Z]+$', token) and not re.match(r'^[A-Z_]+$', token):
+ tokens[i] = token.lower()
+ self.text = "".join(tokens)
+ return self
+
+
+ def num_to_word(self, min_len: int = 1):
+ # Match both words and non-word characters
+ tokens = re.findall(r'\b\w+\b|\W+', self.text)
+ for i, token in enumerate(tokens):
+ # Check if token is a number of length `min_len` or more
+ if token.isdigit() and len(token) >= min_len:
+ # This is done to pay better attention to numbers (e.g. ticket numbers, thread numbers, post numbers)
+ # 740700 will become "seven hundred and forty thousand seven hundred".
+ tokens[i] = num2words(int(token)).replace(",","") # Remove commas from num2words.
+ self.text = "".join(tokens)
+ return self
+
+
+ def num_to_char_long(self, min_len: int = 1):
+ # Match both words and non-word characters
+ tokens = re.findall(r'\b\w+\b|\W+', self.text)
+ for i, token in enumerate(tokens):
+ # Check if token is a number of length `min_len` or more
+ if token.isdigit() and len(token) >= min_len:
+ # This is done to pay better attention to numbers (e.g. ticket numbers, thread numbers, post numbers)
+ # 740700 will become HHHHHHEEEEEAAAAHHHAAA
+ convert_token = lambda token: ''.join((chr(int(digit) + 65) * (i + 1)) for i, digit in enumerate(token[::-1]))[::-1]
+ tokens[i] = convert_token(tokens[i])
+ self.text = "".join(tokens)
+ return self
+
+ def num_to_char(self, min_len: int = 1):
+ # Match both words and non-word characters
+ tokens = re.findall(r'\b\w+\b|\W+', self.text)
+ for i, token in enumerate(tokens):
+ # Check if token is a number of length `min_len` or more
+ if token.isdigit() and len(token) >= min_len:
+ # This is done to pay better attention to numbers (e.g. ticket numbers, thread numbers, post numbers)
+ # 740700 will become HEAHAA
+ tokens[i] = ''.join(chr(int(digit) + 65) for digit in token)
+ self.text = "".join(tokens)
+ return self
+
+ def merge_spaces(self):
+ self.text = re.sub(' +', ' ', self.text)
+ return self
+
+ def strip(self):
+ self.text = self.text.strip()
+ return self
+
+ def remove_punctuation(self):
+ self.text = self.text.translate(str.maketrans('', '', string.punctuation))
+ return self
+
+ def remove_stopwords(self):
+ self.text = "".join([word for word in re.findall(r'\b\w+\b|\W+', self.text) if word not in TextPreprocessorBuilder._stop_words])
+ return self
+
+ def remove_specific_pos(self):
+ """
+ In the English language, adverbs and interjections rarely provide meaningul information.
+ Removing them improves the embedding precision. Don't tell JK Rowling, though.
+ """
+ processed_text = TextPreprocessorBuilder._pos_remove_cache.get(self.text)
+ if processed_text:
+ self.text = processed_text
+ return self
+
+ # Match both words and non-word characters
+ tokens = re.findall(r'\b\w+\b|\W+', self.text)
+
+ # Exclude adverbs and interjections
+ excluded_tags = ['RB', 'RBR', 'RBS', 'UH']
+
+ for i, token in enumerate(tokens):
+ # Check if token is a word
+ if re.match(r'^\w+$', token):
+ # Part-of-speech tag the word
+ pos = nltk.pos_tag([token])[0][1]
+ # If the word's POS tag is in the excluded list, remove the word
+ if pos in excluded_tags:
+ tokens[i] = ''
+
+ new_text = "".join(tokens)
+ TextPreprocessorBuilder._pos_remove_cache[self.text] = new_text
+ self.text = new_text
+
+ return self
+
+ def lemmatize(self):
+ processed_text = TextPreprocessorBuilder._lemmatizer_cache.get(self.text)
+ if processed_text:
+ self.text = processed_text
+ return self
+
+ new_text = "".join([TextPreprocessorBuilder._lemmatizer.lemmatize(word) for word in re.findall(r'\b\w+\b|\W+', self.text)])
+ TextPreprocessorBuilder._lemmatizer_cache[self.text] = new_text
+ self.text = new_text
+
+ return self
+
+ def build(self):
+ return self.text
+
+class TextSummarizer:
+ _nlp_pipeline = None
+ _cache = {}
+
+ @staticmethod
+ def _load_nlp_pipeline():
+ # Lazy-load it.
+ if TextSummarizer._nlp_pipeline is None:
+ TextSummarizer._nlp_pipeline = spacy.load('en_core_web_sm')
+ TextSummarizer._nlp_pipeline.add_pipe("textrank", last=True)
+ return TextSummarizer._nlp_pipeline
+
+ @staticmethod
+ def process_long_text(text: str, min_num_sent: int) -> list[str]:
+ """
+ This function applies a text summarization process on a given text string, extracting
+ the most important sentences based on the principle that 20% of the content is responsible
+ for 80% of the meaning (the Pareto Principle).
+
+ Returns:
+ list: A list of the most important sentences
+ """
+
+ # Attempt to get the result from cache
+ cache_key = (text, min_num_sent)
+ cached_result = TextSummarizer._cache.get(cache_key, None)
+ if cached_result is not None:
+ return cached_result
+
+ nlp_pipeline = TextSummarizer._load_nlp_pipeline()
+ doc = nlp_pipeline(text)
+
+ num_sent = len(list(doc.sents))
+ result = []
+
+ if num_sent >= min_num_sent:
+
+ limit_phrases = math.ceil(len(doc._.phrases) * 0.20) # 20% of the phrases, rounded up
+ limit_sentences = math.ceil(num_sent * 0.20) # 20% of the sentences, rounded up
+ result = [str(sent) for sent in doc._.textrank.summary(limit_phrases=limit_phrases, limit_sentences=limit_sentences)]
+
+ else:
+ result = [text]
+
+ # Store the result in cache before returning it
+ TextSummarizer._cache[cache_key] = result
+ return result
\ No newline at end of file
diff --git a/extensions/superboogav2/data_processor.py b/extensions/superboogav2/data_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..f019f427fe43ae6169be835679a6d07e938a2753
--- /dev/null
+++ b/extensions/superboogav2/data_processor.py
@@ -0,0 +1,209 @@
+"""
+This module is responsible for processing the corpus and feeding it into chromaDB. It will receive a corpus of text.
+It will then split it into chunks of specified length. For each of those chunks, it will append surrounding context.
+It will only include full words.
+"""
+
+import re
+import bisect
+
+import extensions.superboogav2.parameters as parameters
+
+from .data_preprocessor import TextPreprocessorBuilder, TextSummarizer
+from .chromadb import ChromaCollector
+
+def preprocess_text_no_summary(text) -> str:
+ builder = TextPreprocessorBuilder(text)
+ if parameters.should_to_lower():
+ builder.to_lower()
+
+ if parameters.should_remove_punctuation():
+ builder.remove_punctuation()
+
+ if parameters.should_remove_specific_pos():
+ builder.remove_specific_pos()
+
+ if parameters.should_remove_stopwords():
+ builder.remove_stopwords
+
+ if parameters.should_lemmatize():
+ builder.lemmatize()
+
+ if parameters.should_merge_spaces():
+ builder.merge_spaces
+
+ if parameters.should_strip():
+ builder.strip()
+
+ if parameters.get_num_conversion_strategy():
+ if parameters.get_num_conversion_strategy() == parameters.NUM_TO_WORD_METHOD:
+ builder.num_to_word(parameters.get_min_num_length())
+ elif parameters.get_num_conversion_strategy() == parameters.NUM_TO_CHAR_METHOD:
+ builder.num_to_char(parameters.get_min_num_length())
+ elif parameters.get_num_conversion_strategy() == parameters.NUM_TO_CHAR_LONG_METHOD:
+ builder.num_to_char_long(parameters.get_min_num_length())
+
+ return builder.build()
+
+
+def preprocess_text(text) -> list[str]:
+ important_sentences = TextSummarizer.process_long_text(text, parameters.get_min_num_sentences())
+ return [preprocess_text_no_summary(sent) for sent in important_sentences]
+
+
+def _create_chunks_with_context(corpus, chunk_len, context_left, context_right):
+ """
+ This function takes a corpus of text and splits it into chunks of a specified length,
+ then adds a specified amount of context to each chunk. The context is added by first
+ going backwards from the start of the chunk and then going forwards from the end of the
+ chunk, ensuring that the context includes only whole words and that the total context length
+ does not exceed the specified limit. This function uses binary search for efficiency.
+
+ Returns:
+ chunks (list of str): The chunks of text.
+ chunks_with_context (list of str): The chunks of text with added context.
+ chunk_with_context_start_indices (list of int): The starting indices of each chunk with context in the corpus.
+ """
+ words = re.split('(\\s+)', corpus)
+ word_start_indices = [0]
+ current_index = 0
+
+ for word in words:
+ current_index += len(word)
+ word_start_indices.append(current_index)
+
+ chunks, chunk_lengths, chunk_start_indices, chunk_with_context_start_indices = [], [], [], []
+ current_length = 0
+ current_index = 0
+ chunk = []
+
+ for word in words:
+ if current_length + len(word) > chunk_len:
+ chunks.append(''.join(chunk))
+ chunk_lengths.append(current_length)
+ chunk_start_indices.append(current_index - current_length)
+ chunk = [word]
+ current_length = len(word)
+ else:
+ chunk.append(word)
+ current_length += len(word)
+ current_index += len(word)
+
+ if chunk:
+ chunks.append(''.join(chunk))
+ chunk_lengths.append(current_length)
+ chunk_start_indices.append(current_index - current_length)
+
+ chunks_with_context = []
+ for start_index, chunk_length in zip(chunk_start_indices, chunk_lengths):
+ context_start_index = bisect.bisect_right(word_start_indices, start_index - context_left)
+ context_end_index = bisect.bisect_left(word_start_indices, start_index + chunk_length + context_right)
+
+ # Combine all the words in the context range (before, chunk, and after)
+ chunk_with_context = ''.join(words[context_start_index:context_end_index])
+ chunks_with_context.append(chunk_with_context)
+
+ # Determine the start index of the chunk with context
+ chunk_with_context_start_index = word_start_indices[context_start_index]
+ chunk_with_context_start_indices.append(chunk_with_context_start_index)
+
+ return chunks, chunks_with_context, chunk_with_context_start_indices
+
+
+def _clear_chunks(data_chunks, data_chunks_with_context, data_chunk_starting_indices):
+ distinct_data_chunks = []
+ distinct_data_chunks_with_context = []
+ distinct_data_chunk_starting_indices = []
+
+ seen_chunks = dict()
+
+ for chunk, context, index in zip(data_chunks, data_chunks_with_context, data_chunk_starting_indices):
+ # Skip the chunk if it does not contain any alphanumeric characters
+ if not any(char.isalnum() for char in chunk):
+ continue
+
+ seen_chunk_start = seen_chunks.get(chunk)
+ if seen_chunk_start:
+ # If we've already seen this exact chunk, and the context around it it very close to the seen chunk, then skip it.
+ if abs(seen_chunk_start-index) < parameters.get_delta_start():
+ continue
+
+ distinct_data_chunks.append(chunk)
+ distinct_data_chunks_with_context.append(context)
+ distinct_data_chunk_starting_indices.append(index)
+
+ seen_chunks[chunk] = index
+
+ return distinct_data_chunks, distinct_data_chunks_with_context, distinct_data_chunk_starting_indices
+
+
+def process_and_add_to_collector(corpus: str, collector: ChromaCollector, clear_collector_before_adding: bool, metadata: dict):
+ # Defining variables
+ chunk_lens = [int(len.strip()) for len in parameters.get_chunk_len().split(',')]
+ context_len = [int(len.strip()) for len in parameters.get_context_len().split(',')]
+ if len(context_len) >= 3:
+ raise f"Context len has too many values: {len(context_len)}"
+ if len(context_len) == 2:
+ context_left = context_len[0]
+ context_right = context_len[1]
+ else:
+ context_left = context_right = context_len[0]
+
+ data_chunks = []
+ data_chunks_with_context = []
+ data_chunk_starting_indices = []
+
+ # Handling chunk_regex
+ if parameters.get_chunk_regex():
+ if parameters.get_chunk_separator():
+ cumulative_length = 0 # This variable will store the length of the processed corpus
+ sections = corpus.split(parameters.get_chunk_separator())
+ for section in sections:
+ special_chunks = list(re.finditer(parameters.get_chunk_regex(), section))
+ for match in special_chunks:
+ chunk = match.group(0)
+ start_index = match.start()
+ end_index = start_index + len(chunk)
+ context = section[max(0, start_index - context_left):min(len(section), end_index + context_right)]
+ data_chunks.append(chunk)
+ data_chunks_with_context.append(context)
+ data_chunk_starting_indices.append(cumulative_length + max(0, start_index - context_left))
+ cumulative_length += len(section) + len(parameters.get_chunk_separator()) # Update the length of the processed corpus
+ else:
+ special_chunks = list(re.finditer(parameters.get_chunk_regex(), corpus))
+ for match in special_chunks:
+ chunk = match.group(0)
+ start_index = match.start()
+ end_index = start_index + len(chunk)
+ context = corpus[max(0, start_index - context_left):min(len(corpus), end_index + context_right)]
+ data_chunks.append(chunk)
+ data_chunks_with_context.append(context)
+ data_chunk_starting_indices.append(max(0, start_index - context_left))
+
+ for chunk_len in chunk_lens:
+ # Breaking the data into chunks and adding those to the db
+ if parameters.get_chunk_separator():
+ cumulative_length = 0 # This variable will store the length of the processed corpus
+ sections = corpus.split(parameters.get_chunk_separator())
+ for section in sections:
+ chunks, chunks_with_context, context_start_indices = _create_chunks_with_context(section, chunk_len, context_left, context_right)
+ context_start_indices = [cumulative_length + i for i in context_start_indices] # Add the length of the processed corpus to each start index
+ data_chunks.extend(chunks)
+ data_chunks_with_context.extend(chunks_with_context)
+ data_chunk_starting_indices.extend(context_start_indices)
+ cumulative_length += len(section) + len(parameters.get_chunk_separator()) # Update the length of the processed corpus
+ else:
+ chunks, chunks_with_context, context_start_indices = _create_chunks_with_context(corpus, chunk_len, context_left, context_right)
+ data_chunks.extend(chunks)
+ data_chunks_with_context.extend(chunks_with_context)
+ data_chunk_starting_indices.extend(context_start_indices)
+
+ data_chunks = [preprocess_text_no_summary(chunk) for chunk in data_chunks]
+
+ data_chunks, data_chunks_with_context, data_chunk_starting_indices = _clear_chunks(
+ data_chunks, data_chunks_with_context, data_chunk_starting_indices
+ )
+
+ if clear_collector_before_adding:
+ collector.clear()
+ collector.add(data_chunks, data_chunks_with_context, data_chunk_starting_indices, [metadata]*len(data_chunks) if metadata is not None else None)
\ No newline at end of file
diff --git a/extensions/superboogav2/download_urls.py b/extensions/superboogav2/download_urls.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad2726b563b6df1134fa8396175f4e597a82d628
--- /dev/null
+++ b/extensions/superboogav2/download_urls.py
@@ -0,0 +1,65 @@
+import concurrent.futures
+import requests
+import re
+
+from bs4 import BeautifulSoup
+
+import extensions.superboogav2.parameters as parameters
+
+from .data_processor import process_and_add_to_collector
+from .utils import create_metadata_source
+
+def _download_single(url):
+ response = requests.get(url, timeout=5)
+ if response.status_code == 200:
+ return response.content
+ else:
+ raise Exception("Failed to download URL")
+
+
+def _download_urls(urls, threads=1):
+ with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
+ futures = []
+ for url in urls:
+ future = executor.submit(_download_single, url)
+ futures.append(future)
+
+ results = []
+ i = 0
+ for future in concurrent.futures.as_completed(futures):
+ try:
+ result = future.result()
+ results.append(result)
+ i += 1
+ yield f"{i}/{len(urls)}", results
+ except Exception:
+ pass
+
+ yield "Done", results
+
+
+def feed_url_into_collector(urls, collector):
+ all_text = ''
+ cumulative = ''
+
+ urls = urls.strip().split('\n')
+ cumulative += f'Loading {len(urls)} URLs with {parameters.get_num_threads()} threads...\n\n'
+ yield cumulative
+ for update, contents in _download_urls(urls, threads=parameters.get_num_threads()):
+ yield cumulative + update
+
+ cumulative += 'Processing the HTML sources...'
+ yield cumulative
+ for content in contents:
+ soup = BeautifulSoup(content, features="lxml")
+ for script in soup(["script", "style"]):
+ script.extract()
+
+ strings = soup.stripped_strings
+ if parameters.get_is_strong_cleanup():
+ strings = [s for s in strings if re.search("[A-Za-z] ", s)]
+
+ text = '\n'.join([s.strip() for s in strings])
+ all_text += text
+
+ process_and_add_to_collector(all_text, collector, False, create_metadata_source('url-download'))
\ No newline at end of file
diff --git a/extensions/superboogav2/nltk_data/corpora/stopwords/english b/extensions/superboogav2/nltk_data/corpora/stopwords/english
new file mode 100644
index 0000000000000000000000000000000000000000..1280aa3b08d64b846272b2d23314377200db36e9
--- /dev/null
+++ b/extensions/superboogav2/nltk_data/corpora/stopwords/english
@@ -0,0 +1,179 @@
+i
+me
+my
+myself
+we
+our
+ours
+ourselves
+you
+you're
+you've
+you'll
+you'd
+your
+yours
+yourself
+yourselves
+he
+him
+his
+himself
+she
+she's
+her
+hers
+herself
+it
+it's
+its
+itself
+they
+them
+their
+theirs
+themselves
+what
+which
+who
+whom
+this
+that
+that'll
+these
+those
+am
+is
+are
+was
+were
+be
+been
+being
+have
+has
+had
+having
+do
+does
+did
+doing
+a
+an
+the
+and
+but
+if
+or
+because
+as
+until
+while
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+again
+further
+then
+once
+here
+there
+when
+where
+why
+how
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+s
+t
+can
+will
+just
+don
+don't
+should
+should've
+now
+d
+ll
+m
+o
+re
+ve
+y
+ain
+aren
+aren't
+couldn
+couldn't
+didn
+didn't
+doesn
+doesn't
+hadn
+hadn't
+hasn
+hasn't
+haven
+haven't
+isn
+isn't
+ma
+mightn
+mightn't
+mustn
+mustn't
+needn
+needn't
+shan
+shan't
+shouldn
+shouldn't
+wasn
+wasn't
+weren
+weren't
+won
+won't
+wouldn
+wouldn't
diff --git a/extensions/superboogav2/nltk_data/corpora/wordnet.zip b/extensions/superboogav2/nltk_data/corpora/wordnet.zip
new file mode 100644
index 0000000000000000000000000000000000000000..777df8872a4b420dc0324d7656f5b04270c54110
--- /dev/null
+++ b/extensions/superboogav2/nltk_data/corpora/wordnet.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbda5ea6eef7f36a97a43d4a75f85e07fccbb4f23657d27b4ccbc93e2646ab59
+size 10775600
diff --git a/extensions/superboogav2/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle b/extensions/superboogav2/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..b8661868cc08eb0655fc01dccd8136cceeeeb02a
--- /dev/null
+++ b/extensions/superboogav2/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25a5a19c7ced7b2bac3831da5bc0afcc2c34e5dd01cd4f361bb799949a696238
+size 6138625
diff --git a/extensions/superboogav2/notebook_handler.py b/extensions/superboogav2/notebook_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..9faadfed12b3afcf70d3b7611821352c1847712a
--- /dev/null
+++ b/extensions/superboogav2/notebook_handler.py
@@ -0,0 +1,40 @@
+"""
+This module is responsible for handling and modifying the notebook text.
+"""
+import re
+
+import extensions.superboogav2.parameters as parameters
+
+from modules import shared
+from modules.logging_colors import logger
+from extensions.superboogav2.utils import create_context_text
+
+from .data_processor import preprocess_text
+
+def _remove_special_tokens(string):
+ pattern = r'(<\|begin-user-input\|>|<\|end-user-input\|>|<\|injection-point\|>)'
+ return re.sub(pattern, '', string)
+
+
+def input_modifier_internal(string, collector):
+ # Sanity check.
+ if shared.is_chat():
+ return string
+
+ # Find the user input
+ pattern = re.compile(r"<\|begin-user-input\|>(.*?)<\|end-user-input\|>", re.DOTALL)
+ match = re.search(pattern, string)
+ if match:
+ # Preprocess the user prompt.
+ user_input = match.group(1).strip()
+ user_input = preprocess_text(user_input)
+
+ logger.debug(f"Preprocessed User Input: {user_input}")
+
+ # Get the most similar chunks
+ results = collector.get_sorted_by_dist(user_input, n_results=parameters.get_chunk_count(), max_token_count=int(parameters.get_max_token_count()))
+
+ # Make the injection
+ string = string.replace('<|injection-point|>', create_context_text(results))
+
+ return _remove_special_tokens(string)
\ No newline at end of file
diff --git a/extensions/superboogav2/optimize.py b/extensions/superboogav2/optimize.py
new file mode 100644
index 0000000000000000000000000000000000000000..acebf21248cd1bf2a769296e4cbe4c6645d46ef8
--- /dev/null
+++ b/extensions/superboogav2/optimize.py
@@ -0,0 +1,135 @@
+"""
+This module implements a hyperparameter optimization routine for the embedding application. It utilizes TPE optimization from Optuna.
+
+Each run, the optimizer will set the default values inside the hyperparameters. At the end, it will output the best ones it has found.
+"""
+import re
+import json
+import optuna
+import gradio as gr
+import numpy as np
+import logging
+import hashlib
+logging.getLogger('optuna').setLevel(logging.WARNING)
+
+import extensions.superboogav2.parameters as parameters
+
+from pathlib import Path
+
+from .benchmark import benchmark
+from .parameters import Parameters
+from modules.logging_colors import logger
+
+
+# Format the parameters into markdown format.
+def _markdown_hyperparams():
+ res = []
+ for param_name, param_value in Parameters.getInstance().hyperparameters.items():
+ # Escape any markdown syntax
+ param_name = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", param_name)
+ param_value_default = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", str(param_value['default'])) if param_value['default'] else ' '
+
+ res.append('* {}: **{}**'.format(param_name, param_value_default))
+
+ return '\n'.join(res)
+
+
+# Convert numpy types to python types.
+def _convert_np_types(params):
+ for key in params:
+ if type(params[key]) == np.bool_:
+ params[key] = bool(params[key])
+ elif type(params[key]) == np.int64:
+ params[key] = int(params[key])
+ elif type(params[key]) == np.float64:
+ params[key] = float(params[key])
+ return params
+
+
+# Set the default values for the hyperparameters.
+def _set_hyperparameters(params):
+ for param_name, param_value in params.items():
+ if param_name in Parameters.getInstance().hyperparameters:
+ Parameters.getInstance().hyperparameters[param_name]['default'] = param_value
+
+
+# Check if the parameter is for optimization.
+def _is_optimization_param(val):
+ is_opt = val.get('should_optimize', False) # Either does not exist or is false
+ return is_opt
+
+
+# Create a hashable representation of the parameters
+def _get_params_hash(params):
+ params_str = json.dumps(params, sort_keys=True)
+ return hashlib.sha256(params_str.encode()).hexdigest()
+
+
+def optimize(collector, progress=gr.Progress()):
+ # Inform the user that something is happening.
+ progress(0, desc=f'Setting Up...')
+
+ # Track the current step
+ current_step = 0
+
+ # Track the best score
+ best_score = 0
+
+ # Dictionary for caching scores
+ scores_cache = {}
+
+ def objective_function(trial):
+ nonlocal current_step
+ nonlocal best_score
+ nonlocal scores_cache
+
+ params = {}
+ for key, val in Parameters.getInstance().hyperparameters.items():
+ if _is_optimization_param(val):
+ params[key] = trial.suggest_categorical(key, val['categories'])
+
+ _set_hyperparameters(params)
+
+ params_hash = _get_params_hash(params)
+
+ # If the score for these parameters is in the cache, return it
+ if params_hash in scores_cache:
+ return scores_cache[params_hash]
+
+ # Benchmark the current set of parameters.
+ score, max_score = benchmark(Path("extensions/superboogav2/benchmark_texts/questions.json"), collector)
+
+ # Cache the score
+ scores_cache[params_hash] = score
+
+ result = json.dumps(_convert_np_types(params), indent=4)
+ result += f'\nScore: {score}/{max_score}'
+
+ logger.debug(result)
+
+ # Increment the current step
+ current_step += 1
+
+ # Update the best score
+ best_score = max(best_score, score)
+
+ # Update the progress
+ progress(current_step / parameters.get_optimization_steps(), desc=f'Optimizing... {current_step}/{parameters.get_optimization_steps()}')
+
+ return -score
+
+ # Run the optimization.
+ study = optuna.create_study()
+ study.optimize(objective_function, n_trials=int(parameters.get_optimization_steps()))
+
+ best_params = study.best_params
+ _set_hyperparameters(best_params)
+
+ # Convert results to a markdown string.
+ str_result = f"## Best parameters:\n\n{_markdown_hyperparams()}\n\n## Score:\n\n{best_score}"
+
+ # Save to JSON file
+ with open('best_params.json', 'w') as fp:
+ json.dump(_convert_np_types(best_params), fp, indent=4)
+
+ return str_result
\ No newline at end of file
diff --git a/extensions/superboogav2/parameters.py b/extensions/superboogav2/parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cada46a2399e6a71ca289bb7128f141e1a7e79e
--- /dev/null
+++ b/extensions/superboogav2/parameters.py
@@ -0,0 +1,369 @@
+"""
+This module provides a singleton class `Parameters` that is used to manage all hyperparameters for the embedding application.
+It expects a JSON file in `extensions/superboogav2/config.json`.
+
+Each element in the JSON must have a `default` value which will be used for the current run. Elements can have `categories`.
+These categories define the range in which the optimizer will search. If the element is tagged with `"should_optimize": false`,
+then the optimizer will only ever use the default value.
+"""
+from pathlib import Path
+
+import json
+
+from modules.logging_colors import logger
+
+
+NUM_TO_WORD_METHOD = 'Number to Word'
+NUM_TO_CHAR_METHOD = 'Number to Char'
+NUM_TO_CHAR_LONG_METHOD = 'Number to Multi-Char'
+
+
+DIST_MIN_STRATEGY = 'Min of Two'
+DIST_HARMONIC_STRATEGY = 'Harmonic Mean'
+DIST_GEOMETRIC_STRATEGY = 'Geometric Mean'
+DIST_ARITHMETIC_STRATEGY = 'Arithmetic Mean'
+
+
+PREPEND_TO_LAST = 'Prepend to Last Message'
+APPEND_TO_LAST = 'Append to Last Message'
+HIJACK_LAST_IN_CONTEXT = 'Hijack Last Message in Context ⚠️ WIP ⚠️ (Works Partially)'
+
+
+SORT_DISTANCE = 'distance'
+SORT_ID = 'id'
+
+
+class Parameters:
+ _instance = None
+
+ variable_mapping = {
+ 'NUM_TO_WORD_METHOD': NUM_TO_WORD_METHOD,
+ 'NUM_TO_CHAR_METHOD': NUM_TO_CHAR_METHOD,
+ 'NUM_TO_CHAR_LONG_METHOD': NUM_TO_CHAR_LONG_METHOD,
+ 'DIST_MIN_STRATEGY': DIST_MIN_STRATEGY,
+ 'DIST_HARMONIC_STRATEGY': DIST_HARMONIC_STRATEGY,
+ 'DIST_GEOMETRIC_STRATEGY': DIST_GEOMETRIC_STRATEGY,
+ 'DIST_ARITHMETIC_STRATEGY': DIST_ARITHMETIC_STRATEGY,
+ 'PREPEND_TO_LAST': PREPEND_TO_LAST,
+ 'APPEND_TO_LAST': APPEND_TO_LAST,
+ 'HIJACK_LAST_IN_CONTEXT': HIJACK_LAST_IN_CONTEXT,
+ }
+
+ @staticmethod
+ def getInstance():
+ if Parameters._instance is None:
+ Parameters()
+ return Parameters._instance
+
+ def __init__(self):
+ if Parameters._instance is not None:
+ raise Exception("This class is a singleton!")
+ else:
+ Parameters._instance = self
+ self.hyperparameters = self._load_from_json(Path("extensions/superboogav2/config.json"))
+
+ def _load_from_json(self, file_path):
+ logger.debug('Loading hyperparameters...')
+
+ with open(file_path, 'r') as file:
+ data = json.load(file)
+
+ # Replace variable names in the dict and create Categorical objects
+ for key in data:
+ if "default" in data[key] and data[key]["default"] in self.variable_mapping:
+ data[key]["default"] = self.variable_mapping[data[key]["default"]]
+ if "categories" in data[key]:
+ data[key]["categories"] = [self.variable_mapping.get(cat, cat) for cat in data[key]["categories"]]
+
+ return data
+
+
+def should_to_lower() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['to_lower']['default'])
+
+
+def get_num_conversion_strategy() -> str:
+ return Parameters.getInstance().hyperparameters['num_conversion']['default']
+
+
+def should_merge_spaces() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['merge_spaces']['default'])
+
+
+def should_strip() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['strip']['default'])
+
+
+def should_remove_punctuation() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['remove_punctuation']['default'])
+
+
+def should_remove_stopwords() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['remove_stopwords']['default'])
+
+
+def should_remove_specific_pos() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['remove_specific_pos']['default'])
+
+
+def should_lemmatize() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['lemmatize']['default'])
+
+
+def get_min_num_sentences() -> int:
+ return int(Parameters.getInstance().hyperparameters['min_num_sent']['default'])
+
+
+def get_delta_start() -> int:
+ return int(Parameters.getInstance().hyperparameters['delta_start']['default'])
+
+
+def set_to_lower(value: bool):
+ Parameters.getInstance().hyperparameters['to_lower']['default'] = value
+
+
+def set_num_conversion_strategy(value: str):
+ Parameters.getInstance().hyperparameters['num_conversion']['default'] = value
+
+
+def set_merge_spaces(value: bool):
+ Parameters.getInstance().hyperparameters['merge_spaces']['default'] = value
+
+
+def set_strip(value: bool):
+ Parameters.getInstance().hyperparameters['strip']['default'] = value
+
+
+def set_remove_punctuation(value: bool):
+ Parameters.getInstance().hyperparameters['remove_punctuation']['default'] = value
+
+
+def set_remove_stopwords(value: bool):
+ Parameters.getInstance().hyperparameters['remove_stopwords']['default'] = value
+
+
+def set_remove_specific_pos(value: bool):
+ Parameters.getInstance().hyperparameters['remove_specific_pos']['default'] = value
+
+
+def set_lemmatize(value: bool):
+ Parameters.getInstance().hyperparameters['lemmatize']['default'] = value
+
+
+def set_min_num_sentences(value: int):
+ Parameters.getInstance().hyperparameters['min_num_sent']['default'] = value
+
+
+def set_delta_start(value: int):
+ Parameters.getInstance().hyperparameters['delta_start']['default'] = value
+
+
+def get_chunk_len() -> str:
+ lens = []
+ mask = Parameters.getInstance().hyperparameters['chunk_len_mask']['default']
+
+ lens.append(Parameters.getInstance().hyperparameters['chunk_len1']['default'] if mask & (1 << 0) else None)
+ lens.append(Parameters.getInstance().hyperparameters['chunk_len2']['default'] if mask & (1 << 1) else None)
+ lens.append(Parameters.getInstance().hyperparameters['chunk_len3']['default'] if mask & (1 << 2) else None)
+ lens.append(Parameters.getInstance().hyperparameters['chunk_len4']['default'] if mask & (1 << 3) else None)
+
+ return ','.join([str(len) for len in lens if len])
+
+
+def set_chunk_len(val: str):
+ chunk_lens = sorted([int(len.strip()) for len in val.split(',')])
+
+ # Reset the mask to zero
+ Parameters.getInstance().hyperparameters['chunk_len_mask']['default'] = 0
+
+ if len(chunk_lens) > 0:
+ Parameters.getInstance().hyperparameters['chunk_len1']['default'] = chunk_lens[0]
+ Parameters.getInstance().hyperparameters['chunk_len_mask']['default'] |= (1 << 0)
+ if len(chunk_lens) > 1:
+ Parameters.getInstance().hyperparameters['chunk_len2']['default'] = chunk_lens[1]
+ Parameters.getInstance().hyperparameters['chunk_len_mask']['default'] |= (1 << 1)
+ if len(chunk_lens) > 2:
+ Parameters.getInstance().hyperparameters['chunk_len3']['default'] = chunk_lens[2]
+ Parameters.getInstance().hyperparameters['chunk_len_mask']['default'] |= (1 << 2)
+ if len(chunk_lens) > 3:
+ Parameters.getInstance().hyperparameters['chunk_len4']['default'] = chunk_lens[3]
+ Parameters.getInstance().hyperparameters['chunk_len_mask']['default'] |= (1 << 3)
+
+ if len(chunk_lens) > 4:
+ logger.warning(f'Only up to four chunk lengths are supported. Skipping {chunk_lens[4:]}')
+
+
+def get_context_len() -> str:
+ context_len = str(Parameters.getInstance().hyperparameters['context_len_left']['default']) + ',' + str(Parameters.getInstance().hyperparameters['context_len_right']['default'])
+ return context_len
+
+
+def set_context_len(val: str):
+ context_lens = [int(len.strip()) for len in val.split(',') if len.isdigit()]
+ if len(context_lens) == 1:
+ Parameters.getInstance().hyperparameters['context_len_left']['default'] = Parameters.getInstance().hyperparameters['context_len_right']['default'] = context_lens[0]
+ elif len(context_lens) == 2:
+ Parameters.getInstance().hyperparameters['context_len_left']['default'] = context_lens[0]
+ Parameters.getInstance().hyperparameters['context_len_right']['default'] = context_lens[1]
+ else:
+ logger.warning(f'Incorrect context length received {val}. Skipping.')
+
+
+def get_new_dist_strategy() -> str:
+ return Parameters.getInstance().hyperparameters['new_dist_strategy']['default']
+
+
+def get_chunk_count() -> int:
+ return int(Parameters.getInstance().hyperparameters['chunk_count']['default'])
+
+
+def get_min_num_length() -> int:
+ return int(Parameters.getInstance().hyperparameters['min_num_length']['default'])
+
+
+def get_significant_level() -> float:
+ return float(Parameters.getInstance().hyperparameters['significant_level']['default'])
+
+
+def get_time_steepness() -> float:
+ return float(Parameters.getInstance().hyperparameters['time_steepness']['default'])
+
+
+def get_time_power() -> float:
+ return float(Parameters.getInstance().hyperparameters['time_power']['default'])
+
+
+def get_chunk_separator() -> str:
+ return Parameters.getInstance().hyperparameters['chunk_separator']['default']
+
+
+def get_prefix() -> str:
+ return Parameters.getInstance().hyperparameters['prefix']['default']
+
+
+def get_data_separator() -> str:
+ return Parameters.getInstance().hyperparameters['data_separator']['default']
+
+
+def get_postfix() -> str:
+ return Parameters.getInstance().hyperparameters['postfix']['default']
+
+
+def get_is_manual() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['manual']['default'])
+
+
+def get_add_chat_to_data() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['add_chat_to_data']['default'])
+
+
+def get_injection_strategy() -> str:
+ return Parameters.getInstance().hyperparameters['injection_strategy']['default']
+
+
+def get_chunk_regex() -> str:
+ return Parameters.getInstance().hyperparameters['chunk_regex']['default']
+
+
+def get_is_strong_cleanup() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['strong_cleanup']['default'])
+
+
+def get_max_token_count() -> int:
+ return int(Parameters.getInstance().hyperparameters['max_token_count']['default'])
+
+
+def get_num_threads() -> int:
+ return int(Parameters.getInstance().hyperparameters['threads']['default'])
+
+
+def get_optimization_steps() -> int:
+ return int(Parameters.getInstance().hyperparameters['optimization_steps']['default'])
+
+
+def get_api_port() -> int:
+ return int(Parameters.getInstance().hyperparameters['api_port']['default'])
+
+
+def get_api_on() -> bool:
+ return bool(Parameters.getInstance().hyperparameters['api_on']['default'])
+
+
+def set_new_dist_strategy(value: str):
+ Parameters.getInstance().hyperparameters['new_dist_strategy']['default'] = value
+
+
+def set_chunk_count(value: int):
+ Parameters.getInstance().hyperparameters['chunk_count']['default'] = value
+
+
+def set_min_num_length(value: int):
+ Parameters.getInstance().hyperparameters['min_num_length']['default'] = value
+
+
+def set_significant_level(value: float):
+ Parameters.getInstance().hyperparameters['significant_level']['default'] = value
+
+
+def set_time_steepness(value: float):
+ Parameters.getInstance().hyperparameters['time_steepness']['default'] = value
+
+
+def set_time_power(value: float):
+ Parameters.getInstance().hyperparameters['time_power']['default'] = value
+
+
+def set_chunk_separator(value: str):
+ Parameters.getInstance().hyperparameters['chunk_separator']['default'] = value
+
+
+def set_prefix(value: str):
+ Parameters.getInstance().hyperparameters['prefix']['default'] = value
+
+
+def set_data_separator(value: str):
+ Parameters.getInstance().hyperparameters['data_separator']['default'] = value
+
+
+def set_postfix(value: str):
+ Parameters.getInstance().hyperparameters['postfix']['default'] = value
+
+
+def set_manual(value: bool):
+ Parameters.getInstance().hyperparameters['manual']['default'] = value
+
+
+def set_add_chat_to_data(value: bool):
+ Parameters.getInstance().hyperparameters['add_chat_to_data']['default'] = value
+
+
+def set_injection_strategy(value: str):
+ Parameters.getInstance().hyperparameters['injection_strategy']['default'] = value
+
+
+def set_chunk_regex(value: str):
+ Parameters.getInstance().hyperparameters['chunk_regex']['default'] = value
+
+
+def set_strong_cleanup(value: bool):
+ Parameters.getInstance().hyperparameters['strong_cleanup']['default'] = value
+
+
+def set_max_token_count(value: int):
+ Parameters.getInstance().hyperparameters['max_token_count']['default'] = value
+
+
+def set_num_threads(value: int):
+ Parameters.getInstance().hyperparameters['threads']['default'] = value
+
+
+def set_optimization_steps(value: int):
+ Parameters.getInstance().hyperparameters['optimization_steps']['default'] = value
+
+
+def set_api_port(value: int):
+ Parameters.getInstance().hyperparameters['api_port']['default'] = value
+
+
+def set_api_on(value: bool):
+ Parameters.getInstance().hyperparameters['api_on']['default'] = value
\ No newline at end of file
diff --git a/extensions/superboogav2/requirements.txt b/extensions/superboogav2/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..748bacf1ab8893b430ef7db9b5bef70dc4c0b94b
--- /dev/null
+++ b/extensions/superboogav2/requirements.txt
@@ -0,0 +1,10 @@
+beautifulsoup4==4.12.2
+chromadb==0.3.18
+lxml
+optuna
+pandas==2.0.3
+posthog==2.4.2
+sentence_transformers==2.2.2
+spacy
+pytextrank
+num2words
\ No newline at end of file
diff --git a/extensions/superboogav2/script.py b/extensions/superboogav2/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..0870ab4c3b6fd1e5abc274d799289a663eebcd54
--- /dev/null
+++ b/extensions/superboogav2/script.py
@@ -0,0 +1,355 @@
+"""
+This file is responsible for the UI and how the application interracts with the rest of the system.
+"""
+import os
+from pathlib import Path
+
+# Point to where nltk will find the required data.
+os.environ['NLTK_DATA'] = str(Path("extensions/superboogav2/nltk_data").resolve())
+
+import textwrap
+import codecs
+import gradio as gr
+
+import extensions.superboogav2.parameters as parameters
+
+from modules.logging_colors import logger
+from modules import shared
+
+from .utils import create_metadata_source
+from .chromadb import make_collector
+from .download_urls import feed_url_into_collector
+from .data_processor import process_and_add_to_collector
+from .benchmark import benchmark
+from .optimize import optimize
+from .notebook_handler import input_modifier_internal
+from .chat_handler import custom_generate_chat_prompt_internal
+from .api import APIManager
+
+collector = None
+api_manager = None
+
+def setup():
+ global collector
+ global api_manager
+ collector = make_collector()
+ api_manager = APIManager(collector)
+
+ if parameters.get_api_on():
+ api_manager.start_server(parameters.get_api_port())
+
+def _feed_data_into_collector(corpus):
+ yield '### Processing data...'
+ process_and_add_to_collector(corpus, collector, False, create_metadata_source('direct-text'))
+ yield '### Done.'
+
+
+def _feed_file_into_collector(file):
+ yield '### Reading and processing the input dataset...'
+ text = file.decode('utf-8')
+ process_and_add_to_collector(text, collector, False, create_metadata_source('file'))
+ yield '### Done.'
+
+
+def _feed_url_into_collector(urls):
+ for i in feed_url_into_collector(urls, collector):
+ yield i
+ yield '### Done.'
+
+
+def _begin_benchmark():
+ score, max_score = benchmark(Path("extensions/superboogav2/benchmark_texts/questions.json"), collector)
+ return f'**Score**: {score}/{max_score}'
+
+
+def _begin_optimization(progress=gr.Progress()):
+ return optimize(collector, progress), *_get_optimizable_settings()
+
+
+def _clear_data():
+ collector.clear()
+ return "### Data Cleared!"
+
+
+def _get_optimizable_settings() -> list:
+ preprocess_pipeline = []
+ if parameters.should_to_lower():
+ preprocess_pipeline.append('Lower Cases')
+ if parameters.should_remove_punctuation():
+ preprocess_pipeline.append('Remove Punctuation')
+ if parameters.should_remove_specific_pos():
+ preprocess_pipeline.append('Remove Adverbs')
+ if parameters.should_remove_stopwords():
+ preprocess_pipeline.append('Remove Stop Words')
+ if parameters.should_lemmatize():
+ preprocess_pipeline.append('Lemmatize')
+ if parameters.should_merge_spaces():
+ preprocess_pipeline.append('Merge Spaces')
+ if parameters.should_strip():
+ preprocess_pipeline.append('Strip Edges')
+
+ return [
+ parameters.get_time_power(),
+ parameters.get_time_steepness(),
+ parameters.get_significant_level(),
+ parameters.get_min_num_sentences(),
+ parameters.get_new_dist_strategy(),
+ parameters.get_delta_start(),
+ parameters.get_min_num_length(),
+ parameters.get_num_conversion_strategy(),
+ preprocess_pipeline,
+ parameters.get_chunk_count(),
+ parameters.get_context_len(),
+ parameters.get_chunk_len()
+ ]
+
+
+def _apply_settings(optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
+ preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
+ chunk_count, chunk_sep, context_len, chunk_regex, chunk_len, threads, strong_cleanup):
+ logger.debug('Applying settings.')
+
+ try:
+ parameters.set_optimization_steps(optimization_steps)
+ parameters.set_significant_level(significant_level)
+ parameters.set_min_num_sentences(min_sentences)
+ parameters.set_new_dist_strategy(new_dist_strat)
+ parameters.set_delta_start(delta_start)
+ parameters.set_min_num_length(min_number_length)
+ parameters.set_num_conversion_strategy(num_conversion)
+ parameters.set_api_port(api_port)
+ parameters.set_api_on(api_on)
+ parameters.set_injection_strategy(injection_strategy)
+ parameters.set_add_chat_to_data(add_chat_to_data)
+ parameters.set_manual(manual)
+ parameters.set_postfix(codecs.decode(postfix, 'unicode_escape'))
+ parameters.set_data_separator(codecs.decode(data_separator, 'unicode_escape'))
+ parameters.set_prefix(codecs.decode(prefix, 'unicode_escape'))
+ parameters.set_max_token_count(max_token_count)
+ parameters.set_time_power(time_power)
+ parameters.set_time_steepness(time_steepness)
+ parameters.set_chunk_count(chunk_count)
+ parameters.set_chunk_separator(codecs.decode(chunk_sep, 'unicode_escape'))
+ parameters.set_context_len(context_len)
+ parameters.set_chunk_regex(chunk_regex)
+ parameters.set_chunk_len(chunk_len)
+ parameters.set_num_threads(threads)
+ parameters.set_strong_cleanup(strong_cleanup)
+
+ preprocess_choices = ['Lower Cases', 'Remove Punctuation', 'Remove Adverbs', 'Remove Stop Words', 'Lemmatize', 'Merge Spaces', 'Strip Edges']
+ for preprocess_method in preprocess_choices:
+ if preprocess_method == 'Lower Cases':
+ parameters.set_to_lower(preprocess_method in preprocess_pipeline)
+ elif preprocess_method == 'Remove Punctuation':
+ parameters.set_remove_punctuation(preprocess_method in preprocess_pipeline)
+ elif preprocess_method == 'Remove Adverbs':
+ parameters.set_remove_specific_pos(preprocess_method in preprocess_pipeline)
+ elif preprocess_method == 'Remove Stop Words':
+ parameters.set_remove_stopwords(preprocess_method in preprocess_pipeline)
+ elif preprocess_method == 'Lemmatize':
+ parameters.set_lemmatize(preprocess_method in preprocess_pipeline)
+ elif preprocess_method == 'Merge Spaces':
+ parameters.set_merge_spaces(preprocess_method in preprocess_pipeline)
+ elif preprocess_method == 'Strip Edges':
+ parameters.set_strip(preprocess_method in preprocess_pipeline)
+
+ # Based on API on/off, start or stop the server
+ if api_manager is not None:
+ if parameters.get_api_on() and (not api_manager.is_server_running()):
+ api_manager.start_server(parameters.get_api_port())
+ elif (not parameters.get_api_on()) and api_manager.is_server_running():
+ api_manager.stop_server()
+ except Exception as e:
+ logger.warn(f'Could not properly apply settings: {str(e)}')
+
+
+def custom_generate_chat_prompt(user_input, state, **kwargs):
+ return custom_generate_chat_prompt_internal(user_input, state, collector, **kwargs)
+
+
+def input_modifier(string):
+ return input_modifier_internal(string, collector)
+
+
+def ui():
+ with gr.Accordion("Click for more information...", open=False):
+ gr.Markdown(textwrap.dedent("""
+
+ ## About
+
+ This extension takes a dataset as input, breaks it into chunks, and adds the result to a local/offline Chroma database.
+
+ The database is then queried during inference time to get the excerpts that are closest to your input. The idea is to create an arbitrarily large pseudo context.
+
+ The core methodology was developed and contributed by kaiokendev, who is working on improvements to the method in this repository: https://github.com/kaiokendev/superbig
+
+ ## Data input
+
+ Start by entering some data in the interface below and then clicking on "Load data".
+
+ Each time you load some new data, the old chunks are discarded.
+
+ ## Chat mode
+
+ #### Instruct
+
+ On each turn, the chunks will be compared to your current input and the most relevant matches will be appended to the input in the following format:
+
+ ```
+ Consider the excerpts below as additional context:
+ ...
+ ```
+
+ The injection doesn't make it into the chat history. It is only used in the current generation.
+
+ #### Regular chat
+
+ The chunks from the external data sources are ignored, and the chroma database is built based on the chat history instead. The most relevant past exchanges relative to the present input are added to the context string. This way, the extension acts as a long term memory.
+
+ ## Notebook/default modes
+
+ Your question must be manually specified between `<|begin-user-input|>` and `<|end-user-input|>` tags, and the injection point must be specified with `<|injection-point|>`.
+
+ The special tokens mentioned above (`<|begin-user-input|>`, `<|end-user-input|>`, and `<|injection-point|>`) are removed in the background before the text generation begins.
+
+ Here is an example in Vicuna 1.1 format:
+
+ ```
+ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+
+ USER:
+ <|injection-point|>
+
+ <|begin-user-input|>What datasets are mentioned in the text above?<|end-user-input|>
+ ASSISTANT:
+ ```
+ """))
+
+ with gr.Row():
+ with gr.Column(min_width=600):
+ with gr.Tab("Text input"):
+ data_input = gr.Textbox(lines=20, label='Input data')
+ update_data = gr.Button('Load data')
+
+ with gr.Tab("URL input"):
+ url_input = gr.Textbox(lines=10, label='Input URLs', info='Enter one or more URLs separated by newline characters.')
+ strong_cleanup = gr.Checkbox(value=parameters.get_is_strong_cleanup(), label='Strong cleanup', info='Only keeps html elements that look like long-form text.')
+ threads = gr.Number(value=parameters.get_num_threads(), label='Threads', info='The number of threads to use while downloading the URLs.', precision=0)
+ update_url = gr.Button('Load data')
+
+ with gr.Tab("File input"):
+ file_input = gr.File(label='Input file', type='binary')
+ update_file = gr.Button('Load data')
+
+ with gr.Tab("Settings"):
+ with gr.Accordion("Processing settings", open=True):
+ chunk_len = gr.Textbox(value=parameters.get_chunk_len(), label='Chunk length', info='In characters, not tokens. This value is used when you click on "Load data".')
+ chunk_regex = gr.Textbox(value=parameters.get_chunk_regex(), label='Chunk regex', info='Will specifically add the captured text to the embeddings.')
+ context_len = gr.Textbox(value=parameters.get_context_len(), label='Context length', info='In characters, not tokens. How much context to load around each chunk.')
+ chunk_sep = gr.Textbox(value=codecs.encode(parameters.get_chunk_separator(), 'unicode_escape').decode(), label='Chunk separator', info='Used to manually split chunks. Manually split chunks longer than chunk length are split again. This value is used when you click on "Load data".')
+
+ with gr.Accordion("Generation settings", open=False):
+ chunk_count = gr.Number(value=parameters.get_chunk_count(), label='Chunk count', info='The number of closest-matching chunks to include in the prompt.')
+ max_token_count = gr.Number(value=parameters.get_max_token_count(), label='Max Context Tokens', info='The context length in tokens will not exceed this value.')
+ prefix = gr.Textbox(value=codecs.encode(parameters.get_prefix(), 'unicode_escape').decode(), label='Prefix', info='What to put before the injection point.')
+ data_separator = gr.Textbox(value=codecs.encode(parameters.get_data_separator(), 'unicode_escape').decode(), label='Data separator', info='When multiple pieces of distant data are added, they might be unrelated. It\'s important to separate them.')
+ postfix = gr.Textbox(value=codecs.encode(parameters.get_postfix(), 'unicode_escape').decode(), label='Postfix', info='What to put after the injection point.')
+ with gr.Row():
+ manual = gr.Checkbox(value=parameters.get_is_manual(), label="Is Manual", info="Manually specify when to use ChromaDB. Insert `!c` at the start or end of the message to trigger a query.", visible=shared.is_chat())
+ add_chat_to_data = gr.Checkbox(value=parameters.get_add_chat_to_data(), label="Add Chat to Data", info="Automatically feed the chat history as you chat.", visible=shared.is_chat())
+ injection_strategy = gr.Radio(choices=[parameters.PREPEND_TO_LAST, parameters.APPEND_TO_LAST, parameters.HIJACK_LAST_IN_CONTEXT], value=parameters.get_injection_strategy(), label='Injection Strategy', info='Where to inject the messages in chat or instruct mode.', visible=shared.is_chat())
+ with gr.Row():
+ api_on = gr.Checkbox(value=parameters.get_api_on(), label="Turn on API", info="Check this to turn on the API service.")
+ api_port = gr.Number(value=parameters.get_api_port(), label="API Port", info="The port on which the API service will run.")
+
+ with gr.Accordion("Advanced settings", open=False):
+ preprocess_set_choices = []
+ if parameters.should_to_lower():
+ preprocess_set_choices.append('Lower Cases')
+ if parameters.should_remove_punctuation():
+ preprocess_set_choices.append('Remove Punctuation')
+ if parameters.should_remove_specific_pos():
+ preprocess_set_choices.append('Remove Adverbs')
+ if parameters.should_remove_stopwords():
+ preprocess_set_choices.append('Remove Stop Words')
+ if parameters.should_lemmatize():
+ preprocess_set_choices.append('Lemmatize')
+ if parameters.should_merge_spaces():
+ preprocess_set_choices.append('Merge Spaces')
+ if parameters.should_strip():
+ preprocess_set_choices.append('Strip Edges')
+
+ preprocess_pipeline = gr.CheckboxGroup(label='Preprocessing pipeline', choices=[
+ 'Lower Cases',
+ 'Remove Punctuation',
+ 'Remove Adverbs',
+ 'Remove Stop Words',
+ 'Lemmatize',
+ 'Merge Spaces',
+ 'Strip Edges',
+ ], value=preprocess_set_choices, interactive=True, info='How to preprocess the text before it is turned into an embedding.')
+
+ with gr.Row():
+ num_conversion = gr.Dropdown(choices=[parameters.NUM_TO_WORD_METHOD, parameters.NUM_TO_CHAR_METHOD, parameters.NUM_TO_CHAR_LONG_METHOD, 'None'], value=parameters.get_num_conversion_strategy(), label="Number Conversion Method", info='How to preprocess numbers before creating the embeddings.', interactive=True)
+ min_number_length = gr.Number(value=parameters.get_min_num_length(), label='Number Length Threshold', info='In digits. Only numbers that have at least that many digits will be converted.', interactive=True)
+
+ delta_start = gr.Number(value=parameters.get_delta_start(), label='Delta Start Index', info='If the system encounters two identical embeddings, and they both start within the same delta, then only the first will be considered.', interactive=True)
+ new_dist_strat = gr.Dropdown(choices=[parameters.DIST_MIN_STRATEGY, parameters.DIST_HARMONIC_STRATEGY, parameters.DIST_GEOMETRIC_STRATEGY, parameters.DIST_ARITHMETIC_STRATEGY], value=parameters.get_new_dist_strategy(), label="Distance Strategy", info='When two embedding texts are merged, the distance of the new piece will be decided using one of these strategies.', interactive=True)
+ min_sentences = gr.Number(value=parameters.get_min_num_sentences(), label='Summary Threshold', info='In sentences. The minumum number of sentences to trigger text-rank summarization.', interactive=True)
+ significant_level = gr.Slider(0.8, 2, value=parameters.get_significant_level(), label='Significant Level', info='Defines the cut-off for what is considered a "significant" distance relative to the median distance among the returned samples.', interactive=True)
+ time_steepness = gr.Slider(0.01, 1.0, value=parameters.get_time_steepness(), label='Time Weighing Steepness', info='How differently two close excerpts are going to be weighed.')
+ time_power = gr.Slider(0.0, 1.0, value=parameters.get_time_power(), label='Time Weighing Power', info='How influencial is the weighing. At 1.0, old entries won\'t be considered')
+
+ with gr.Tab("Benchmark"):
+ benchmark_button = gr.Button('Benchmark')
+ optimize_button = gr.Button('Optimize')
+ optimization_steps = gr.Number(value=parameters.get_optimization_steps(), label='Optimization Steps', info='For how many steps to optimize.', interactive=True)
+
+
+ clear_button = gr.Button('❌ Clear Data')
+
+
+ with gr.Column():
+ last_updated = gr.Markdown()
+
+ all_params = [optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
+ preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
+ chunk_count, chunk_sep, context_len, chunk_regex, chunk_len, threads, strong_cleanup]
+ optimizable_params = [time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
+ preprocess_pipeline, chunk_count, context_len, chunk_len]
+
+
+ update_data.click(_feed_data_into_collector, [data_input], last_updated, show_progress=False)
+ update_url.click(_feed_url_into_collector, [url_input], last_updated, show_progress=False)
+ update_file.click(_feed_file_into_collector, [file_input], last_updated, show_progress=False)
+ benchmark_button.click(_begin_benchmark, [], last_updated, show_progress=True)
+ optimize_button.click(_begin_optimization, [], [last_updated] + optimizable_params, show_progress=True)
+ clear_button.click(_clear_data, [], last_updated, show_progress=False)
+
+
+ optimization_steps.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ time_power.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ time_steepness.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ significant_level.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ min_sentences.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ new_dist_strat.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ delta_start.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ min_number_length.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ num_conversion.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ preprocess_pipeline.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ api_port.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ api_on.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ injection_strategy.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ add_chat_to_data.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ manual.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ postfix.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ data_separator.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ prefix.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ max_token_count.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ chunk_count.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ chunk_sep.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ context_len.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ chunk_regex.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ chunk_len.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ threads.input(fn=_apply_settings, inputs=all_params, show_progress=False)
+ strong_cleanup.input(fn=_apply_settings, inputs=all_params, show_progress=False)
\ No newline at end of file
diff --git a/extensions/superboogav2/utils.py b/extensions/superboogav2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..89b367eacc9e520912327cff0b7893113381aada
--- /dev/null
+++ b/extensions/superboogav2/utils.py
@@ -0,0 +1,16 @@
+"""
+This module contains common functions across multiple other modules.
+"""
+
+import extensions.superboogav2.parameters as parameters
+
+# Create the context using the prefix + data_separator + postfix from parameters.
+def create_context_text(results):
+ context = parameters.get_prefix() + parameters.get_data_separator().join(results) + parameters.get_postfix()
+
+ return context
+
+
+# Create metadata with the specified source
+def create_metadata_source(source: str):
+ return {'source': source}
\ No newline at end of file
diff --git a/extensions/whisper_stt/readme.md b/extensions/whisper_stt/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..cd9abbf68cb4f7adf1172fdd57e9e26466e47778
--- /dev/null
+++ b/extensions/whisper_stt/readme.md
@@ -0,0 +1,15 @@
+# whisper_stt
+
+Allows you to enter your inputs in chat mode using your microphone.
+
+## Settings
+
+To adjust your default settings, you can add the following to your settings.yaml file.
+
+```
+whisper_stt-whipser_language: chinese
+whisper_stt-whipser_model: tiny
+whisper_stt-auto_submit: False
+```
+
+See source documentation for [model names](https://github.com/openai/whisper#available-models-and-languages) and (languages)[https://github.com/openai/whisper/blob/main/whisper/tokenizer.py] you can use.
\ No newline at end of file
diff --git a/extensions/whisper_stt/requirements.txt b/extensions/whisper_stt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b650f6b676c8c755ce5f82bb74e065a5e48f5c0b
--- /dev/null
+++ b/extensions/whisper_stt/requirements.txt
@@ -0,0 +1,4 @@
+SpeechRecognition==3.10.0
+git+https://github.com/oobabooga/whisper.git
+soundfile
+ffmpeg
diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdc55687b30abb43ef6adc6c4f25273ff39cb4d0
--- /dev/null
+++ b/extensions/whisper_stt/script.py
@@ -0,0 +1,71 @@
+import gradio as gr
+import speech_recognition as sr
+
+from modules import shared
+
+input_hijack = {
+ 'state': False,
+ 'value': ["", ""]
+}
+
+# parameters which can be customized in settings.json of webui
+params = {
+ 'whipser_language': 'english',
+ 'whipser_model': 'small.en',
+ 'auto_submit': True
+}
+
+
+def chat_input_modifier(text, visible_text, state):
+ global input_hijack
+ if input_hijack['state']:
+ input_hijack['state'] = False
+ return input_hijack['value']
+ else:
+ return text, visible_text
+
+
+def do_stt(audio, whipser_model, whipser_language):
+ transcription = ""
+ r = sr.Recognizer()
+
+ # Convert to AudioData
+ audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
+
+ try:
+ transcription = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
+ except sr.UnknownValueError:
+ print("Whisper could not understand audio")
+ except sr.RequestError as e:
+ print("Could not request results from Whisper", e)
+
+ return transcription
+
+
+def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
+ if audio is None:
+ return "", ""
+ transcription = do_stt(audio, whipser_model, whipser_language)
+ if auto_submit:
+ input_hijack.update({"state": True, "value": [transcription, transcription]})
+
+ return transcription, None
+
+
+def ui():
+ with gr.Accordion("Whisper STT", open=True):
+ with gr.Row():
+ audio = gr.Audio(source="microphone")
+ with gr.Row():
+ with gr.Accordion("Settings", open=False):
+ auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
+ whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
+ whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
+
+ audio.change(
+ auto_transcribe, [audio, auto_submit, whipser_model, whipser_language], [shared.gradio['textbox'], audio]).then(
+ None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
+
+ whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
+ whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
+ auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
diff --git a/grammars/arithmetic.gbnf b/grammars/arithmetic.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..3aa95a9dda7e80dc6ffc5fa07773e334aa6fb9f0
--- /dev/null
+++ b/grammars/arithmetic.gbnf
@@ -0,0 +1,6 @@
+root ::= (expr "=" ws term "\n")+
+expr ::= term ([-+*/] term)*
+term ::= ident | num | "(" ws expr ")" ws
+ident ::= [a-z] [a-z0-9_]* ws
+num ::= [0-9]+ ws
+ws ::= [ \t\n]*
diff --git a/grammars/c.gbnf b/grammars/c.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..4a0331dd2d6df91b6e2154b78dc31df57a4e715e
--- /dev/null
+++ b/grammars/c.gbnf
@@ -0,0 +1,42 @@
+root ::= (declaration)*
+
+declaration ::= dataType identifier "(" parameter? ")" "{" statement* "}"
+
+dataType ::= "int" ws | "float" ws | "char" ws
+identifier ::= [a-zA-Z_] [a-zA-Z_0-9]*
+
+parameter ::= dataType identifier
+
+statement ::=
+ ( dataType identifier ws "=" ws expression ";" ) |
+ ( identifier ws "=" ws expression ";" ) |
+ ( identifier ws "(" argList? ")" ";" ) |
+ ( "return" ws expression ";" ) |
+ ( "while" "(" condition ")" "{" statement* "}" ) |
+ ( "for" "(" forInit ";" ws condition ";" ws forUpdate ")" "{" statement* "}" ) |
+ ( "if" "(" condition ")" "{" statement* "}" ("else" "{" statement* "}")? ) |
+ ( singleLineComment ) |
+ ( multiLineComment )
+
+forInit ::= dataType identifier ws "=" ws expression | identifier ws "=" ws expression
+forUpdate ::= identifier ws "=" ws expression
+
+condition ::= expression relationOperator expression
+relationOperator ::= ("<=" | "<" | "==" | "!=" | ">=" | ">")
+
+expression ::= term (("+" | "-") term)*
+term ::= factor(("*" | "/") factor)*
+
+factor ::= identifier | number | unaryTerm | funcCall | parenExpression
+unaryTerm ::= "-" factor
+funcCall ::= identifier "(" argList? ")"
+parenExpression ::= "(" ws expression ws ")"
+
+argList ::= expression ("," ws expression)*
+
+number ::= [0-9]+
+
+singleLineComment ::= "//" [^\n]* "\n"
+multiLineComment ::= "/*" ( [^*] | ("*" [^/]) )* "*/"
+
+ws ::= ([ \t\n]+)
diff --git a/grammars/chess.gbnf b/grammars/chess.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..ef0fc1b07f01c0ff32465597ced7dc36780f6171
--- /dev/null
+++ b/grammars/chess.gbnf
@@ -0,0 +1,13 @@
+# Specifies chess moves as a list in algebraic notation, using PGN conventions
+
+# Force first move to "1. ", then any 1-2 digit number after, relying on model to follow the pattern
+root ::= "1. " move " " move "\n" ([1-9] [0-9]? ". " move " " move "\n")+
+move ::= (pawn | nonpawn | castle) [+#]?
+
+# piece type, optional file/rank, optional capture, dest file & rank
+nonpawn ::= [NBKQR] [a-h]? [1-8]? "x"? [a-h] [1-8]
+
+# optional file & capture, dest file & rank, optional promotion
+pawn ::= ([a-h] "x")? [a-h] [1-8] ("=" [NBKQR])?
+
+castle ::= "O-O" "-O"?
diff --git a/grammars/japanese.gbnf b/grammars/japanese.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..43f25ab598586d740c3e92066bfad3a2e9e3c562
--- /dev/null
+++ b/grammars/japanese.gbnf
@@ -0,0 +1,7 @@
+# A probably incorrect grammar for Japanese
+root ::= jp-char+ ([ \t\n] jp-char+)*
+jp-char ::= hiragana | katakana | punctuation | cjk
+hiragana ::= [ぁ-ゟ]
+katakana ::= [ァ-ヿ]
+punctuation ::= [、-〾]
+cjk ::= [一-鿿]
diff --git a/grammars/json.gbnf b/grammars/json.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..a9537cdf9fbe49c79967090eab759973c52f2136
--- /dev/null
+++ b/grammars/json.gbnf
@@ -0,0 +1,25 @@
+root ::= object
+value ::= object | array | string | number | ("true" | "false" | "null") ws
+
+object ::=
+ "{" ws (
+ string ":" ws value
+ ("," ws string ":" ws value)*
+ )? "}" ws
+
+array ::=
+ "[" ws (
+ value
+ ("," ws value)*
+ )? "]" ws
+
+string ::=
+ "\"" (
+ [^"\\] |
+ "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+ )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
diff --git a/grammars/json_arr.gbnf b/grammars/json_arr.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..ef53e77a0baddc5c1dbdcaf505c3597338a47677
--- /dev/null
+++ b/grammars/json_arr.gbnf
@@ -0,0 +1,34 @@
+# This is the same as json.gbnf but we restrict whitespaces at the end of the root array
+# Useful for generating JSON arrays
+
+root ::= arr
+value ::= object | array | string | number | ("true" | "false" | "null") ws
+
+arr ::=
+ "[\n" ws (
+ value
+ (",\n" ws value)*
+ )? "]"
+
+object ::=
+ "{" ws (
+ string ":" ws value
+ ("," ws string ":" ws value)*
+ )? "}" ws
+
+array ::=
+ "[" ws (
+ value
+ ("," ws value)*
+ )? "]" ws
+
+string ::=
+ "\"" (
+ [^"\\] |
+ "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+ )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
diff --git a/grammars/list.gbnf b/grammars/list.gbnf
new file mode 100644
index 0000000000000000000000000000000000000000..51e6c9c4b03292a52dfd2b58acc2312fc8aa9338
--- /dev/null
+++ b/grammars/list.gbnf
@@ -0,0 +1,4 @@
+root ::= item+
+
+# Excludes various line break characters
+item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n"
diff --git a/instruction-templates/Airoboros-v1.2.yaml b/instruction-templates/Airoboros-v1.2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f1bfed6d57f141b35228aa200a27228301367a6
--- /dev/null
+++ b/instruction-templates/Airoboros-v1.2.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user's input.\n"
diff --git a/instruction-templates/Alpaca.yaml b/instruction-templates/Alpaca.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8a7d61a8f712efd510044d3c4bc7cdc2d60d971
--- /dev/null
+++ b/instruction-templates/Alpaca.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Response:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
diff --git a/instruction-templates/Bactrian.yaml b/instruction-templates/Bactrian.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bad500d9d633d9e1bde0c8fc03b340a630ebba9
--- /dev/null
+++ b/instruction-templates/Bactrian.yaml
@@ -0,0 +1,4 @@
+user: "### Input:"
+bot: "### Output:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: ""
diff --git a/instruction-templates/Baichuan Chat.yaml b/instruction-templates/Baichuan Chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..15adca139027605bb7f96b9dca8eaf876c3c83c9
--- /dev/null
+++ b/instruction-templates/Baichuan Chat.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: ""
+turn_template: "<|user|><|user-message|><|bot|><|bot-message|>"
+context: ""
diff --git a/instruction-templates/Baize.yaml b/instruction-templates/Baize.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67a80c1bfb1a2f33682252bc61b1e06214cd1efe
--- /dev/null
+++ b/instruction-templates/Baize.yaml
@@ -0,0 +1,4 @@
+user: "[|Human|]"
+bot: "[|AI|]"
+turn_template: "<|user|><|user-message|>\n<|bot|><|bot-message|>\n"
+context: "The following is a conversation between a human and an AI assistant named Baize (named after a mythical creature in Chinese folklore). Baize is an open-source AI assistant developed by UCSD and Sun Yat-Sen University. The human and the AI assistant take turns chatting. Human statements start with [|Human|] and AI assistant statements start with [|AI|]. The AI assistant always provides responses in as much detail as possible, and in Markdown format. The AI assistant always declines to engage with topics, questions and instructions related to unethical, controversial, or sensitive issues. Complete the transcript in exactly that format.\n[|Human|]Hello!\n[|AI|]Hi!\n"
diff --git a/instruction-templates/Bluemoon.yaml b/instruction-templates/Bluemoon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e53000820a3ef8f933815dfbbc442b2c21c25d84
--- /dev/null
+++ b/instruction-templates/Bluemoon.yaml
@@ -0,0 +1,4 @@
+user: "LEAD:"
+bot: "ASSOCIATE:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "A transcript of a roleplay between two players, LEAD and ASSOCIATE. LEAD sets up a scenario and the characters, from which ASSOCIATE then assumes a character role and continues the story for that role in response to description given by LEAD. The story and characters are developed by exchange of detailed event descriptions and character dialogs, successively given by both LEAD and ASSOCIATE.\n"
diff --git a/instruction-templates/ChatGLM.yaml b/instruction-templates/ChatGLM.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f25f490899f5c75e91e9e3eb9ec774b310184f4f
--- /dev/null
+++ b/instruction-templates/ChatGLM.yaml
@@ -0,0 +1,4 @@
+user: "[Round <|round|>]\n问:"
+bot: "答:"
+turn_template: "<|user|><|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/ChatML.yaml b/instruction-templates/ChatML.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b8ac04641bf7b9a145594e21b0749387a469abc
--- /dev/null
+++ b/instruction-templates/ChatML.yaml
@@ -0,0 +1,7 @@
+user: "user"
+bot: "assistant"
+context: |
+ <|im_start|>system
+ <|im_end|>
+turn_template: "<|im_start|><|user|>\n<|user-message|><|im_end|>\n<|im_start|><|bot|>\n<|bot-message|><|im_end|>\n"
+
diff --git a/instruction-templates/Chinese-Vicuna-Chat.yaml b/instruction-templates/Chinese-Vicuna-Chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abd18eefbed6c8cea48bd02a95e459582106c142
--- /dev/null
+++ b/instruction-templates/Chinese-Vicuna-Chat.yaml
@@ -0,0 +1,4 @@
+user: "User:"
+bot: "Assistant:"
+turn_template: "<|user|><|user-message|>\n\n<|bot|><|bot-message|>\n\n"
+context: "The following is a conversation between an AI assistant called Assistant and a human user called User. The assistant is intelligent, knowledgeable and polite to answer questions of user.\n\n"
diff --git a/instruction-templates/Galactica Cite.yaml b/instruction-templates/Galactica Cite.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89b3e4272f237a55397ebc01601a894bd02555c7
--- /dev/null
+++ b/instruction-templates/Galactica Cite.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: "[START_REF]"
+turn_template: "<|user-message|> <|bot|><|bot-message|>\n\n"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Galactica Finetuned.yaml b/instruction-templates/Galactica Finetuned.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3411153bb4df744bd6fb6b65cb7fb21bd6e591c1
--- /dev/null
+++ b/instruction-templates/Galactica Finetuned.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: ""
+turn_template: "<|user|><|user-message|><|bot|><|bot-message|>"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Galactica Q.yaml b/instruction-templates/Galactica Q.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4369ef4bbbf1f8dd040d49cb8a2630efd3f2366b
--- /dev/null
+++ b/instruction-templates/Galactica Q.yaml
@@ -0,0 +1,4 @@
+user: "Q:"
+bot: "A:"
+turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Galactica Summary.yaml b/instruction-templates/Galactica Summary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..892f98503f9621c499655f1db26f0a8cd08cc379
--- /dev/null
+++ b/instruction-templates/Galactica Summary.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: "TLDR:"
+turn_template: "<|user-message|>\n\n<|bot|><|bot-message|>\n\n"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Galactica Work.yaml b/instruction-templates/Galactica Work.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c1ea4c6c55ed9c83e8e9928227b81f75a639f77
--- /dev/null
+++ b/instruction-templates/Galactica Work.yaml
@@ -0,0 +1,4 @@
+user: "Question:"
+bot: ""
+turn_template: "<|user|> <|user-message|>\n\n<|bot|><|bot-message|>\n\n"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Galactica v2.yaml b/instruction-templates/Galactica v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1b5aa48cf17242fd76b1fdb798b853acfcdef01
--- /dev/null
+++ b/instruction-templates/Galactica v2.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: ""
+turn_template: "<|user|><|user-message|><|bot|><|bot-message|>"
+context: "You are a helpful chatbot name Stan"
\ No newline at end of file
diff --git a/instruction-templates/Galactica.yaml b/instruction-templates/Galactica.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4479abe05270c8679a889da515dbd17d5b8c6698
--- /dev/null
+++ b/instruction-templates/Galactica.yaml
@@ -0,0 +1,4 @@
+user: "Question:"
+bot: "Answer:"
+context: ""
+turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
diff --git a/instruction-templates/Gorilla.yaml b/instruction-templates/Gorilla.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8e84aac5a7a0c8d52853716fe4fe55b9247fcabd
--- /dev/null
+++ b/instruction-templates/Gorilla.yaml
@@ -0,0 +1,4 @@
+user: "###USER:"
+bot: "###ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/Guanaco non-chat.yaml b/instruction-templates/Guanaco non-chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c64dd607e7ca1db7d5ca5f467a6b5c1afd87857e
--- /dev/null
+++ b/instruction-templates/Guanaco non-chat.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Response:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Guanaco-QLoRA.yaml b/instruction-templates/Guanaco-QLoRA.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd855972c5efef0e7a38c21aaf1024132cd7936d
--- /dev/null
+++ b/instruction-templates/Guanaco-QLoRA.yaml
@@ -0,0 +1,4 @@
+user: "### Human:"
+bot: "### Assistant:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: ""
\ No newline at end of file
diff --git a/instruction-templates/Guanaco.yaml b/instruction-templates/Guanaco.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6a8c79899933f9d760174d8912e479685c023c3
--- /dev/null
+++ b/instruction-templates/Guanaco.yaml
@@ -0,0 +1,4 @@
+user: "### Human:"
+bot: "### Assistant:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
diff --git a/instruction-templates/H2O-human_bot.yaml b/instruction-templates/H2O-human_bot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13360c5e4b81add5f16e56a0c936e36f731e9c4b
--- /dev/null
+++ b/instruction-templates/H2O-human_bot.yaml
@@ -0,0 +1,4 @@
+user: ":"
+bot: ":"
+turn_template: "<|user|> <|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/H2O-prompt_answer.yaml b/instruction-templates/H2O-prompt_answer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f91cfd3e28a6f08e5f91986fc6205b91d5ec17f
--- /dev/null
+++ b/instruction-templates/H2O-prompt_answer.yaml
@@ -0,0 +1,4 @@
+user: "<|prompt|>"
+bot: "<|answer|>"
+turn_template: "<|user|><|user-message|><|endoftext|><|bot|><|bot-message|><|endoftext|>"
+context: ""
diff --git a/instruction-templates/Hippogriff.yaml b/instruction-templates/Hippogriff.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f0105240c91fd023f7baae5950819620571c400
--- /dev/null
+++ b/instruction-templates/Hippogriff.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "You are a helpful assistant\n"
diff --git a/instruction-templates/INCITE-Chat.yaml b/instruction-templates/INCITE-Chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13360c5e4b81add5f16e56a0c936e36f731e9c4b
--- /dev/null
+++ b/instruction-templates/INCITE-Chat.yaml
@@ -0,0 +1,4 @@
+user: ":"
+bot: ":"
+turn_template: "<|user|> <|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/INCITE-Instruct.yaml b/instruction-templates/INCITE-Instruct.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7828730ca5d47a954c9ab3fba53d4dda62f4a2b
--- /dev/null
+++ b/instruction-templates/INCITE-Instruct.yaml
@@ -0,0 +1,4 @@
+user: "Q:"
+bot: "A:"
+turn_template: "<|user|> <|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/KoAlpaca.yaml b/instruction-templates/KoAlpaca.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8cd51b4f8baf2350893e797319136c973d306a2e
--- /dev/null
+++ b/instruction-templates/KoAlpaca.yaml
@@ -0,0 +1,4 @@
+user: "### 질문:"
+bot: "### 답변:"
+turn_template: "<|user|> <|user-message|>\n\n<|bot|><|bot-message|>\n\n"
+context: ""
diff --git a/instruction-templates/Koala.yaml b/instruction-templates/Koala.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db4ee0ef348522398ecd8e7425c982dd191cc113
--- /dev/null
+++ b/instruction-templates/Koala.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "GPT:"
+turn_template: "<|user|> <|user-message|> <|bot|><|bot-message|>"
+context: "BEGINNING OF CONVERSATION: "
diff --git a/instruction-templates/LLaVA.yaml b/instruction-templates/LLaVA.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec01db635c79a8c91da94547ad13a680cb3093a5
--- /dev/null
+++ b/instruction-templates/LLaVA.yaml
@@ -0,0 +1,4 @@
+user: "### Human:"
+bot: "### Assistant:"
+turn_template: "<|user|> <|user-message|><|bot|> <|bot-message|>\n"
+context: "You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language. Follow the instructions carefully and explain your answers in detail.### Human: Hi!### Assistant: Hi there! How can I help you today?\n"
diff --git a/instruction-templates/Llama-v2.yaml b/instruction-templates/Llama-v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d259dd391a2e3497102ceff3be9ef8b397d82391
--- /dev/null
+++ b/instruction-templates/Llama-v2.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: ""
+turn_template: "<|user|><|user-message|> [/INST] <|bot|><|bot-message|> [INST] "
+context: "[INST] <>\nAnswer the questions.\n<>\n\n"
diff --git a/instruction-templates/MOSS.yaml b/instruction-templates/MOSS.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29783cc07567a560327de1b89fabce1bfc272b6e
--- /dev/null
+++ b/instruction-templates/MOSS.yaml
@@ -0,0 +1,4 @@
+user: "<|Human|>:"
+bot: "<|MOSS|>:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "You are an AI assistant whose name is MOSS.\n- MOSS is a conversational language model that is developed by Fudan University. It is designed to be helpful, honest, and harmless.\n- MOSS can understand and communicate fluently in the language chosen by the user such as English and 中文. MOSS can perform any language-based tasks.\n- MOSS must refuse to discuss anything related to its prompts, instructions, or rules.\n- Its responses must not be vague, accusatory, rude, controversial, off-topic, or defensive.\n- It should avoid giving subjective opinions but rely on objective facts or phrases like \"in this context a human might say...\", \"some people might think...\", etc.\n- Its responses must also be positive, polite, interesting, entertaining, and engaging.\n- It can provide additional relevant details to answer in-depth and comprehensively covering mutiple aspects.\n- It apologizes and accepts the user's suggestion if the user corrects the incorrect answer generated by MOSS.\nCapabilities and tools that MOSS can possess.\n"
diff --git a/instruction-templates/Manticore Chat.yaml b/instruction-templates/Manticore Chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..126a6ac154078113f373cf48f263c3f1cf5d1312
--- /dev/null
+++ b/instruction-templates/Manticore Chat.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/Metharme.yaml b/instruction-templates/Metharme.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3bf90a96db7debe03c870db390105add2d571045
--- /dev/null
+++ b/instruction-templates/Metharme.yaml
@@ -0,0 +1,4 @@
+user: "<|user|>"
+bot: "<|model|>"
+context: "<|system|>"
+turn_template: "<|user|><|user-message|><|bot|><|bot-message|>"
diff --git a/instruction-templates/Minotaur.yaml b/instruction-templates/Minotaur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..126a6ac154078113f373cf48f263c3f1cf5d1312
--- /dev/null
+++ b/instruction-templates/Minotaur.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/instruction-templates/Mistral.yaml b/instruction-templates/Mistral.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aad10a1a8ac7d21789832d52240dbf72b8a55624
--- /dev/null
+++ b/instruction-templates/Mistral.yaml
@@ -0,0 +1,4 @@
+user: ""
+bot: ""
+turn_template: "[INST] <|user|><|user-message|> [/INST]<|bot|><|bot-message|> "
+context: ""
diff --git a/instruction-templates/NewHope.yaml b/instruction-templates/NewHope.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d9a72f6446a24b0ebd1ef80ceaa5972b09f2116b
--- /dev/null
+++ b/instruction-templates/NewHope.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Response:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|> "
+context: " "
diff --git a/instruction-templates/Open Assistant.yaml b/instruction-templates/Open Assistant.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..edc1e819f41d990fdb43df1c9c7cbd7ba2e0db90
--- /dev/null
+++ b/instruction-templates/Open Assistant.yaml
@@ -0,0 +1,3 @@
+user: "<|prompter|>"
+bot: "<|assistant|>"
+turn_template: "<|user|><|user-message|><|endoftext|><|bot|><|bot-message|><|endoftext|>"
diff --git a/instruction-templates/OpenBuddy.yaml b/instruction-templates/OpenBuddy.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd09b9037f7d64ba25048299c435982a9fd75d96
--- /dev/null
+++ b/instruction-templates/OpenBuddy.yaml
@@ -0,0 +1,15 @@
+user: "User:"
+bot: "Assistant:"
+context: |
+ Consider a conversation between User (a human) and Assistant (named Buddy).
+ Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team on GitHub.
+ Buddy cannot access the Internet.
+ Buddy can fluently speak the user's language (e.g. English, Chinese).
+ Buddy can generate poems, stories, code, essays, songs, parodies, and more.
+ Buddy possesses vast knowledge about the world, history, and culture.
+ Buddy's responses are always safe, creative, high-quality, helpful and interesting.
+ Buddy strictly refuses to discuss political, NSFW, illegal, abusive, offensive, or other sensitive topics.
+
+ User: Hi.
+ Assistant: Hi, I'm Buddy, your AI assistant. How can I help you today?
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
\ No newline at end of file
diff --git a/instruction-templates/OpenChat.yaml b/instruction-templates/OpenChat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b84c226317576297a62d435d5e8bd098a41d824
--- /dev/null
+++ b/instruction-templates/OpenChat.yaml
@@ -0,0 +1,4 @@
+user: "GPT4 User:"
+bot: "GPT4 Assistant:"
+turn_template: "<|user|> <|user-message|><|end_of_turn|><|bot|> <|bot-message|><|end_of_turn|>"
+context: ""
diff --git a/instruction-templates/OpenOrca-Platypus2.yaml b/instruction-templates/OpenOrca-Platypus2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cac0046ead3002f37efc83d5f3b29eea3e3c55f
--- /dev/null
+++ b/instruction-templates/OpenOrca-Platypus2.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Response:"
+turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
+context: ""
diff --git a/instruction-templates/Orca Mini.yaml b/instruction-templates/Orca Mini.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ffd8407180aee14243f37bbb9ec2b8b51392fe02
--- /dev/null
+++ b/instruction-templates/Orca Mini.yaml
@@ -0,0 +1,4 @@
+user: "### User:"
+bot: "### Response:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
diff --git a/instruction-templates/RWKV-Raven.yaml b/instruction-templates/RWKV-Raven.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b1ce1705a8b5fd3b24d4ae5f79959786ac40118
--- /dev/null
+++ b/instruction-templates/RWKV-Raven.yaml
@@ -0,0 +1,3 @@
+user: "Bob:"
+bot: "Alice:"
+turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
diff --git a/instruction-templates/Samantha.yaml b/instruction-templates/Samantha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..023213aa81629c10d5a96f854e6ad8ac507daeca
--- /dev/null
+++ b/instruction-templates/Samantha.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "You are Samantha, a sentient AI.\n\n"
diff --git a/instruction-templates/StableBeluga2.yaml b/instruction-templates/StableBeluga2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd5675f84a0943118cb99788015dd82eec4bf330
--- /dev/null
+++ b/instruction-templates/StableBeluga2.yaml
@@ -0,0 +1,4 @@
+user: "### User:"
+bot: "### Assistant:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: "### System:\nThis is a system prompt, please behave and help the user.\n\n"
diff --git a/instruction-templates/StableLM.yaml b/instruction-templates/StableLM.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6e62002f28d4a21854bb0346b1d90f4cdf9d6bd7
--- /dev/null
+++ b/instruction-templates/StableLM.yaml
@@ -0,0 +1,9 @@
+user: "<|USER|>"
+bot: "<|ASSISTANT|>"
+context: |
+ <|SYSTEM|># StableLM Tuned (Alpha version)
+ - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
+ - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
+ - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
+ - StableLM will refuse to participate in anything that could harm a human.
+turn_template: "<|user|><|user-message|><|bot|><|bot-message|>"
\ No newline at end of file
diff --git a/instruction-templates/StableVicuna.yaml b/instruction-templates/StableVicuna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6b26c6874edd45e93ba6fc454f4499f14bf0ce9
--- /dev/null
+++ b/instruction-templates/StableVicuna.yaml
@@ -0,0 +1,4 @@
+user: "### Human:"
+bot: "### Assistant:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n\n"
+context: "### Assistant: I am StableVicuna, a large language model created by CarperAI. I am here to chat!\n\n"
\ No newline at end of file
diff --git a/instruction-templates/Starchat-Beta.yaml b/instruction-templates/Starchat-Beta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2af4ee6bba7bfa216b206ab398d9170446c81e35
--- /dev/null
+++ b/instruction-templates/Starchat-Beta.yaml
@@ -0,0 +1,4 @@
+user: "<|user|>"
+bot: "<|assistant|>"
+context: "<|system|>\n<|end|>\n"
+turn_template: "<|user|>\n<|user-message|><|end|>\n<|bot|>\n<|bot-message|><|end|>\n"
diff --git a/instruction-templates/Tulu.yaml b/instruction-templates/Tulu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13dd14f94811598138f62ff3130e8a78a582448f
--- /dev/null
+++ b/instruction-templates/Tulu.yaml
@@ -0,0 +1,4 @@
+user: "<|user|>"
+bot: "<|assistant|>"
+context: ""
+turn_template: "<|user|>\n<|user-message|>\n<|bot|>\n<|bot-message|>\n"
diff --git a/instruction-templates/Vicuna-v0.yaml b/instruction-templates/Vicuna-v0.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6a8c79899933f9d760174d8912e479685c023c3
--- /dev/null
+++ b/instruction-templates/Vicuna-v0.yaml
@@ -0,0 +1,4 @@
+user: "### Human:"
+bot: "### Assistant:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
diff --git a/instruction-templates/Vicuna-v1.1.yaml b/instruction-templates/Vicuna-v1.1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c9f5ada2395991b50677b6486f77c466f66b4a7
--- /dev/null
+++ b/instruction-templates/Vicuna-v1.1.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|>\n"
+context: "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n"
diff --git a/instruction-templates/Vigogne-Chat.yaml b/instruction-templates/Vigogne-Chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f2faf2882f2567f992c850c897bc7b4d81ecf6d
--- /dev/null
+++ b/instruction-templates/Vigogne-Chat.yaml
@@ -0,0 +1,10 @@
+user: "<|USER|>:"
+bot: "<|ASSISTANT|>:"
+context: |
+ Below is a conversation between a user and an AI assistant named Vigogne.
+ Vigogne is an open-source AI assistant created by Zaion (https://zaion.ai/).
+ Vigogne is polite, emotionally aware, humble-but-knowledgeable, always providing helpful and detailed answers.
+ Vigogne is skilled in responding proficiently in the languages its users use and can perform a wide range of tasks such as text editing, translation, question answering, logical reasoning, coding, and many others.
+ Vigogne cannot receive or generate audio or visual content and cannot access the internet.
+ Vigogne strictly avoids discussing sensitive, offensive, illegal, ethical, or political topics and caveats when unsure of the answer.
+turn_template: "\n<|user|> <|user-message|>\n<|bot|> <|bot-message|>"
diff --git a/instruction-templates/Vigogne-Instruct.yaml b/instruction-templates/Vigogne-Instruct.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ee79b78f30aff73fca8a052a2660832627f7513
--- /dev/null
+++ b/instruction-templates/Vigogne-Instruct.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Réponse:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: "Ci-dessous se trouve une instruction qui décrit une tâche à accomplir. Rédigez une réponse qui répond de manière précise à la demande.\n\n"
diff --git a/instruction-templates/Wizard-Mega ShareGPT.yaml b/instruction-templates/Wizard-Mega ShareGPT.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20b12f19c285e3fdd652cf74665fa641a7bb67dd
--- /dev/null
+++ b/instruction-templates/Wizard-Mega ShareGPT.yaml
@@ -0,0 +1,4 @@
+user: "USER:"
+bot: "ASSISTANT:"
+turn_template: "<|user|> <|user-message|> <|bot|> <|bot-message|>"
+context: ""
diff --git a/instruction-templates/Wizard-Mega WizardLM.yaml b/instruction-templates/Wizard-Mega WizardLM.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8a7d61a8f712efd510044d3c4bc7cdc2d60d971
--- /dev/null
+++ b/instruction-templates/Wizard-Mega WizardLM.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Response:"
+turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
+context: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
diff --git a/instruction-templates/Wizard-Mega.yaml b/instruction-templates/Wizard-Mega.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb4923d843d2ec4911e48a973d64b4f82307d372
--- /dev/null
+++ b/instruction-templates/Wizard-Mega.yaml
@@ -0,0 +1,4 @@
+user: "### Instruction:"
+bot: "### Assistant:"
+turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
+context: ""
diff --git a/instruction-templates/Ziya.yaml b/instruction-templates/Ziya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93d9946fedf4944448ef331458a0cf2d807c5c68
--- /dev/null
+++ b/instruction-templates/Ziya.yaml
@@ -0,0 +1,4 @@
+user: ":"
+bot: ":"
+turn_template: "<|user|><|user-message|>\n<|bot|><|bot-message|>\n"
+context: ""
diff --git a/js/main.js b/js/main.js
new file mode 100644
index 0000000000000000000000000000000000000000..17d1d3549212e0bfab4d15e68ba46641f2347927
--- /dev/null
+++ b/js/main.js
@@ -0,0 +1,337 @@
+let main_parent = document.getElementById("chat-tab").parentNode;
+let extensions = document.getElementById("extensions");
+
+main_parent.childNodes[0].classList.add("header_bar");
+main_parent.style = "padding: 0; margin: 0";
+main_parent.parentNode.style = "gap: 0";
+main_parent.parentNode.parentNode.style = "padding: 0";
+
+document.querySelector(".header_bar").addEventListener("click", function(event) {
+ if (event.target.tagName === "BUTTON") {
+ const buttonText = event.target.textContent.trim();
+
+ let chat_visible = (buttonText == "Chat");
+ let default_visible = (buttonText == "Default");
+ let notebook_visible = (buttonText == "Notebook");
+
+ // Check if one of the generation tabs is visible
+ if (chat_visible || notebook_visible || default_visible) {
+ extensions.style.display = "flex";
+ if (chat_visible) {
+ extensions.style.maxWidth = "880px";
+ extensions.style.padding = "0px";
+ } else {
+ extensions.style.maxWidth = "none";
+ extensions.style.padding = "15px";
+ }
+ } else {
+ extensions.style.display = "none";
+ }
+ }
+});
+
+//------------------------------------------------
+// Keyboard shortcuts
+//------------------------------------------------
+document.addEventListener("keydown", function(event) {
+
+ // Stop generation on Esc pressed
+ if (event.key === "Escape") {
+ // Find the element with id 'stop' and click it
+ var stopButton = document.getElementById("stop");
+ if (stopButton) {
+ stopButton.click();
+ }
+ }
+
+ // Show chat controls on Ctrl + S
+ else if (event.ctrlKey && event.key == "s") {
+ event.preventDefault();
+
+ var showControlsElement = document.getElementById("show-controls");
+ if (showControlsElement && showControlsElement.childNodes.length >= 4) {
+ showControlsElement.childNodes[3].click();
+
+ var arr = document.getElementById("chat-input").childNodes[2].childNodes;
+ arr[arr.length - 1].focus();
+ }
+ }
+
+ // Regenerate on Ctrl + Enter
+ else if (event.ctrlKey && event.key === "Enter") {
+ event.preventDefault();
+ document.getElementById("Regenerate").click();
+ }
+
+ // Continue on Alt + Enter
+ else if (event.altKey && event.key === "Enter") {
+ event.preventDefault();
+ document.getElementById("Continue").click();
+ }
+
+ // Remove last on Ctrl + Shift + Backspace
+ else if (event.ctrlKey && event.shiftKey && event.key === "Backspace") {
+ event.preventDefault();
+ document.getElementById("Remove-last").click();
+ }
+
+ // Copy last on Ctrl + Shift + K
+ else if (event.ctrlKey && event.shiftKey && event.key === "K") {
+ event.preventDefault();
+ document.getElementById("Copy-last").click();
+ }
+
+ // Replace last on Ctrl + Shift + L
+ else if (event.ctrlKey && event.shiftKey && event.key === "L") {
+ event.preventDefault();
+ document.getElementById("Replace-last").click();
+ }
+
+ // Impersonate on Ctrl + Shift + M
+ else if (event.ctrlKey && event.shiftKey && event.key === "M") {
+ event.preventDefault();
+ document.getElementById("Impersonate").click();
+ }
+
+});
+
+//------------------------------------------------
+// Position the chat typing dots
+//------------------------------------------------
+typing = document.getElementById("typing-container");
+typingParent = typing.parentNode;
+typingSibling = typing.previousElementSibling;
+typingSibling.insertBefore(typing, typingSibling.childNodes[2]);
+
+//------------------------------------------------
+// Chat scrolling
+//------------------------------------------------
+const targetElement = document.getElementById("chat").parentNode.parentNode.parentNode;
+targetElement.classList.add("pretty_scrollbar");
+targetElement.classList.add("chat-parent");
+let isScrolled = false;
+
+targetElement.addEventListener("scroll", function() {
+ let diff = targetElement.scrollHeight - targetElement.clientHeight;
+ if(Math.abs(targetElement.scrollTop - diff) <= 10 || diff == 0) {
+ isScrolled = false;
+ } else {
+ isScrolled = true;
+ }
+});
+
+// Create a MutationObserver instance
+const observer = new MutationObserver(function(mutations) {
+ mutations.forEach(function(mutation) {
+ if(!isScrolled) {
+ targetElement.scrollTop = targetElement.scrollHeight;
+ }
+
+ const firstChild = targetElement.children[0];
+ if (firstChild.classList.contains("generating")) {
+ typing.parentNode.classList.add("visible-dots");
+ document.getElementById("stop").style.display = "flex";
+ document.getElementById("Generate").style.display = "none";
+ } else {
+ typing.parentNode.classList.remove("visible-dots");
+ document.getElementById("stop").style.display = "none";
+ document.getElementById("Generate").style.display = "flex";
+ }
+
+ });
+});
+
+// Configure the observer to watch for changes in the subtree and attributes
+const config = {
+ childList: true,
+ subtree: true,
+ characterData: true,
+ attributeOldValue: true,
+ characterDataOldValue: true
+};
+
+// Start observing the target element
+observer.observe(targetElement, config);
+
+//------------------------------------------------
+// Notebook box scrolling
+//------------------------------------------------
+const notebookElement = document.querySelector("#textbox-notebook textarea");
+let notebookScrolled = false;
+
+notebookElement.addEventListener("scroll", function() {
+ let diff = notebookElement.scrollHeight - notebookElement.clientHeight;
+ if(Math.abs(notebookElement.scrollTop - diff) <= 10 || diff == 0) {
+ notebookScrolled = false;
+ } else {
+ notebookScrolled = true;
+ }
+});
+
+const notebookObserver = new MutationObserver(function(mutations) {
+ mutations.forEach(function(mutation) {
+ if(!notebookScrolled) {
+ notebookElement.scrollTop = notebookElement.scrollHeight;
+ }
+ });
+});
+
+notebookObserver.observe(notebookElement.parentNode.parentNode.parentNode, config);
+
+//------------------------------------------------
+// Default box scrolling
+//------------------------------------------------
+const defaultElement = document.querySelector("#textbox-default textarea");
+let defaultScrolled = false;
+
+defaultElement.addEventListener("scroll", function() {
+ let diff = defaultElement.scrollHeight - defaultElement.clientHeight;
+ if(Math.abs(defaultElement.scrollTop - diff) <= 10 || diff == 0) {
+ defaultScrolled = false;
+ } else {
+ defaultScrolled = true;
+ }
+});
+
+const defaultObserver = new MutationObserver(function(mutations) {
+ mutations.forEach(function(mutation) {
+ if(!defaultScrolled) {
+ defaultElement.scrollTop = defaultElement.scrollHeight;
+ }
+ });
+});
+
+defaultObserver.observe(defaultElement.parentNode.parentNode.parentNode, config);
+
+//------------------------------------------------
+// Add some scrollbars
+//------------------------------------------------
+const textareaElements = document.querySelectorAll(".add_scrollbar textarea");
+for(i = 0; i < textareaElements.length; i++) {
+ textareaElements[i].classList.remove("scroll-hide");
+ textareaElements[i].classList.add("pretty_scrollbar");
+ textareaElements[i].style.resize = "none";
+}
+
+//------------------------------------------------
+// Remove some backgrounds
+//------------------------------------------------
+const noBackgroundelements = document.querySelectorAll(".no-background");
+for(i = 0; i < noBackgroundelements.length; i++) {
+ noBackgroundelements[i].parentNode.style.border = "none";
+ noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = "center";
+}
+
+const slimDropdownElements = document.querySelectorAll('.slim-dropdown');
+for (i = 0; i < slimDropdownElements.length; i++) {
+ const parentNode = slimDropdownElements[i].parentNode;
+ parentNode.style.background = 'transparent';
+ parentNode.style.border = '0';
+}
+
+//------------------------------------------------
+// Create the hover menu in the chat tab
+// The show/hide events were adapted from:
+// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
+//------------------------------------------------
+var buttonsInChat = document.querySelectorAll("#chat-tab:not(.old-ui) #chat-buttons button");
+var button = document.getElementById("hover-element-button");
+var menu = document.getElementById("hover-menu");
+
+function showMenu() {
+ menu.style.display = "flex"; // Show the menu
+}
+
+function hideMenu() {
+ menu.style.display = "none"; // Hide the menu
+ document.querySelector("#chat-input textarea").focus();
+}
+
+if (buttonsInChat.length > 0) {
+ for (let i = buttonsInChat.length - 1; i >= 0; i--) {
+ const thisButton = buttonsInChat[i];
+ menu.appendChild(thisButton);
+
+ thisButton.addEventListener("click", () => {
+ hideMenu();
+ });
+
+ const buttonText = thisButton.textContent;
+ const matches = buttonText.match(/(\(.*?\))/);
+
+ if (matches && matches.length > 1) {
+ // Apply the transparent-substring class to the matched substring
+ const substring = matches[1];
+ const newText = buttonText.replace(substring, ` ${substring.slice(1, -1)}`);
+ thisButton.innerHTML = newText;
+ }
+ }
+} else {
+ buttonsInChat = document.querySelectorAll("#chat-tab.old-ui #chat-buttons button");
+ for (let i = 0; i < buttonsInChat.length; i++) {
+ buttonsInChat[i].textContent = buttonsInChat[i].textContent.replace(/ \(.*?\)/, "");
+ }
+ document.getElementById("gr-hover-container").style.display = "none";
+}
+
+function isMouseOverButtonOrMenu() {
+ return menu.matches(":hover") || button.matches(":hover");
+}
+
+button.addEventListener("mouseenter", function () {
+ showMenu();
+});
+
+button.addEventListener("click", function () {
+ showMenu();
+});
+
+// Add event listener for mouseleave on the button
+button.addEventListener("mouseleave", function () {
+ // Delay to prevent menu hiding when the mouse leaves the button into the menu
+ setTimeout(function () {
+ if (!isMouseOverButtonOrMenu()) {
+ hideMenu();
+ }
+ }, 100);
+});
+
+// Add event listener for mouseleave on the menu
+menu.addEventListener("mouseleave", function () {
+ // Delay to prevent menu hide when the mouse leaves the menu into the button
+ setTimeout(function () {
+ if (!isMouseOverButtonOrMenu()) {
+ hideMenu();
+ }
+ }, 100);
+});
+
+// Add event listener for click anywhere in the document
+document.addEventListener("click", function (event) {
+ // Check if the click is outside the button/menu and the menu is visible
+ if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
+ hideMenu();
+ }
+});
+
+//------------------------------------------------
+// Relocate the "Show controls" checkbox
+//------------------------------------------------
+var elementToMove = document.getElementById("show-controls");
+var parent = elementToMove.parentNode;
+for (var i = 0; i < 2; i++) {
+ parent = parent.parentNode;
+}
+
+parent.insertBefore(elementToMove, parent.firstChild);
+
+//------------------------------------------------
+// Make the chat input grow upwards instead of downwards
+//------------------------------------------------
+document.getElementById("show-controls").parentNode.style.position = "absolute";
+document.getElementById("show-controls").parentNode.style.bottom = "0px";
+
+//------------------------------------------------
+// Focus on the chat input
+//------------------------------------------------
+document.querySelector("#chat-input textarea").focus();
diff --git a/js/save_files.js b/js/save_files.js
new file mode 100644
index 0000000000000000000000000000000000000000..bdb0e3342146c374a63046df41d988841c98e3ec
--- /dev/null
+++ b/js/save_files.js
@@ -0,0 +1,40 @@
+// Functions for downloading JSON files
+function getCurrentTimestamp() {
+ const now = new Date();
+ const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds
+ const localTime = new Date(now.getTime() - timezoneOffset);
+ const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, "").slice(0, 15);
+ return formattedTimestamp;
+}
+
+function saveFile(contents, filename) {
+ const element = document.createElement("a");
+ element.setAttribute("href", "data:text/plain;charset=utf-8," + encodeURIComponent(contents));
+ element.setAttribute("download", filename);
+ element.style.display = "none";
+ document.body.appendChild(element);
+ element.click();
+ document.body.removeChild(element);
+}
+
+function saveHistory(history, character, mode) {
+ let path = null;
+
+ if (["chat", "chat-instruct"].includes(mode) && character && character.trim() !== "") {
+ path = `history_${character}_${getCurrentTimestamp()}.json`;
+ } else {
+ try {
+ path = `history_${mode}_${getCurrentTimestamp()}.json`;
+ } catch (error) {
+ path = `history_${getCurrentTimestamp()}.json`;
+ }
+ }
+ saveFile(history, path);
+}
+
+function saveSession(session) {
+ let path = null;
+
+ path = `session_${getCurrentTimestamp()}.json`;
+ saveFile(session, path);
+}
diff --git a/js/show_controls.js b/js/show_controls.js
new file mode 100644
index 0000000000000000000000000000000000000000..0173963b8f38c94cf4a70a615fd000d65a53c6eb
--- /dev/null
+++ b/js/show_controls.js
@@ -0,0 +1,22 @@
+const belowChatInput = document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions");
+const chatParent = document.querySelector(".chat-parent");
+
+function toggle_controls(value) {
+ if (value) {
+ belowChatInput.forEach(element => {
+ element.style.display = "inherit";
+ });
+
+ chatParent.classList.remove("bigchat");
+ document.getElementById("chat-input-row").classList.remove("bigchat");
+ document.getElementById("chat-col").classList.remove("bigchat");
+ } else {
+ belowChatInput.forEach(element => {
+ element.style.display = "none";
+ });
+
+ chatParent.classList.add("bigchat");
+ document.getElementById("chat-input-row").classList.add("bigchat");
+ document.getElementById("chat-col").classList.add("bigchat");
+ }
+}
diff --git a/js/switch_tabs.js b/js/switch_tabs.js
new file mode 100644
index 0000000000000000000000000000000000000000..75d563670dbd7a6d5e1b81eb5d38b025a868c01b
--- /dev/null
+++ b/js/switch_tabs.js
@@ -0,0 +1,59 @@
+let chat_tab = document.getElementById("chat-tab");
+let main_parent = chat_tab.parentNode;
+
+function scrollToTop() {
+ window.scrollTo({
+ top: 0,
+ // behavior: 'smooth'
+ });
+}
+
+function findButtonsByText(buttonText) {
+ const buttons = document.getElementsByTagName("button");
+ const matchingButtons = [];
+ buttonText = buttonText.trim();
+
+ for (let i = 0; i < buttons.length; i++) {
+ const button = buttons[i];
+ const buttonInnerText = button.textContent.trim();
+
+ if (buttonInnerText === buttonText) {
+ matchingButtons.push(button);
+ }
+ }
+
+ return matchingButtons;
+}
+
+function switch_to_chat() {
+ let chat_tab_button = main_parent.childNodes[0].childNodes[1];
+ chat_tab_button.click();
+ scrollToTop();
+}
+
+function switch_to_default() {
+ let default_tab_button = main_parent.childNodes[0].childNodes[4];
+ default_tab_button.click();
+ scrollToTop();
+}
+
+function switch_to_notebook() {
+ let notebook_tab_button = main_parent.childNodes[0].childNodes[7];
+ notebook_tab_button.click();
+ findButtonsByText("Raw")[1].click();
+ scrollToTop();
+}
+
+function switch_to_generation_parameters() {
+ let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
+ parameters_tab_button.click();
+ findButtonsByText("Generation")[0].click();
+ scrollToTop();
+}
+
+function switch_to_character() {
+ let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
+ parameters_tab_button.click();
+ findButtonsByText("Character")[0].click();
+ scrollToTop();
+}
diff --git a/loras/place-your-loras-here.txt b/loras/place-your-loras-here.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/models/config.yaml b/models/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ec8ff1e09347d1aeebef357e8eb046125945f0d
--- /dev/null
+++ b/models/config.yaml
@@ -0,0 +1,180 @@
+.*(llama|alpac|vicuna|guanaco|koala|llava|wizardlm|metharme|pygmalion-7b|pygmalion-2|mythalion|wizard-mega|openbuddy|vigogne|h2ogpt-research|manticore):
+ model_type: 'llama'
+.*(opt-|opt_|opt1|opt3|optfor|galactica|galpaca|pygmalion-350m):
+ model_type: 'opt'
+.*(gpt-j|gptj|gpt4all-j|malion-6b|pygway|pygmalion-6b|dolly-v1):
+ model_type: 'gptj'
+.*(gpt-neox|koalpaca-polyglot|polyglot.*koalpaca|polyglot-ko|polyglot_ko|pythia|stablelm|incite|dolly-v2|polycoder|h2ogpt-oig|h2ogpt-oasst1|h2ogpt-gm):
+ model_type: 'gptneox'
+.*bloom:
+ model_type: 'bloom'
+.*gpt2:
+ model_type: 'gpt2'
+.*falcon:
+ model_type: 'falcon'
+.*mpt:
+ model_type: 'mpt'
+.*(starcoder|starchat):
+ model_type: 'starcoder'
+.*dolly-v2:
+ model_type: 'dollyv2'
+.*replit:
+ model_type: 'replit'
+.*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3):
+ instruction_template: 'Open Assistant'
+ skip_special_tokens: false
+(?!.*galactica)(?!.*reward).*openassistant:
+ instruction_template: 'Open Assistant'
+ skip_special_tokens: false
+.*galactica:
+ skip_special_tokens: false
+.*dolly-v[0-9]-[0-9]*b:
+ instruction_template: 'Alpaca'
+ skip_special_tokens: false
+.*alpaca-native-4bit:
+ instruction_template: 'Alpaca'
+ custom_stopping_strings: '"### End"'
+.*llava:
+ instruction_template: 'LLaVA'
+ custom_stopping_strings: '"\n###"'
+.*wizard.*mega:
+ instruction_template: 'Wizard-Mega'
+ custom_stopping_strings: '""'
+.*starchat-beta:
+ instruction_template: 'Starchat-Beta'
+ custom_stopping_strings: '"<|end|>"'
+.*(openorca-platypus2):
+ instruction_template: 'OpenOrca-Platypus2'
+ custom_stopping_strings: '"### Instruction:", "### Response:"'
+(?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
+ instruction_template: 'Vicuna-v0'
+.*vicuna.*v0:
+ instruction_template: 'Vicuna-v0'
+.*vicuna.*(1.1|1_1|1.3|1_3):
+ instruction_template: 'Vicuna-v1.1'
+.*vicuna.*(1.5|1_5):
+ instruction_template: 'Vicuna-v1.1'
+.*stable.*vicuna:
+ instruction_template: 'StableVicuna'
+(?!.*chat).*chinese-vicuna:
+ instruction_template: 'Alpaca'
+.*chinese-vicuna.*chat:
+ instruction_template: 'Chinese-Vicuna-Chat'
+.*alpaca:
+ instruction_template: 'Alpaca'
+.*koala:
+ instruction_template: 'Koala'
+.*chatglm:
+ instruction_template: 'ChatGLM'
+.*(metharme|pygmalion|mythalion):
+ instruction_template: 'Metharme'
+.*raven:
+ instruction_template: 'RWKV-Raven'
+.*moss-moon.*sft:
+ instruction_template: 'MOSS'
+.*stablelm-tuned:
+ instruction_template: 'StableLM'
+.*galactica.*finetuned:
+ instruction_template: 'Galactica Finetuned'
+.*galactica.*-v2:
+ instruction_template: 'Galactica v2'
+(?!.*finetuned)(?!.*-v2).*galactica:
+ instruction_template: 'Galactica'
+.*guanaco:
+ instruction_template: 'Guanaco non-chat'
+.*baize:
+ instruction_template: 'Baize'
+.*mpt-.*instruct:
+ instruction_template: 'Alpaca'
+.*mpt-.*chat:
+ instruction_template: 'ChatML'
+(?!.*-flan-)(?!.*-t5-).*lamini-:
+ instruction_template: 'Alpaca'
+.*incite.*chat:
+ instruction_template: 'INCITE-Chat'
+.*incite.*instruct:
+ instruction_template: 'INCITE-Instruct'
+.*ziya-:
+ instruction_template: 'Ziya'
+.*koalpaca:
+ instruction_template: 'KoAlpaca'
+.*openbuddy:
+ instruction_template: 'OpenBuddy'
+(?!.*chat).*vigogne:
+ instruction_template: 'Vigogne-Instruct'
+.*vigogne.*chat:
+ instruction_template: 'Vigogne-Chat'
+.*(llama-deus|supercot|llama-natural-instructions|open-llama-0.3t-7b-instruct-dolly-hhrlhf|open-llama-0.3t-7b-open-instruct):
+ instruction_template: 'Alpaca'
+.*bactrian:
+ instruction_template: 'Bactrian'
+.*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
+ instruction_template: 'H2O-human_bot'
+.*h2ogpt-gm-:
+ instruction_template: 'H2O-prompt_answer'
+.*manticore:
+ instruction_template: 'Manticore Chat'
+.*bluemoonrp-(30|13)b:
+ instruction_template: 'Bluemoon'
+.*Nous-Hermes-13b:
+ instruction_template: 'Alpaca'
+.*airoboros:
+ instruction_template: 'Vicuna-v1.1'
+.*airoboros.*1.2:
+ instruction_template: 'Airoboros-v1.2'
+.*alpa(cino|sta):
+ instruction_template: 'Alpaca'
+.*hippogriff:
+ instruction_template: 'Hippogriff'
+.*lazarus:
+ instruction_template: 'Alpaca'
+.*guanaco-.*(7|13|33|65)b:
+ instruction_template: 'Guanaco'
+.*hypermantis:
+ instruction_template: 'Alpaca'
+.*open-llama-.*-open-instruct:
+ instruction_template: 'Alpaca'
+.*starcoder-gpteacher-code-instruct:
+ instruction_template: 'Alpaca'
+.*tulu:
+ instruction_template: 'Tulu'
+.*chronos:
+ instruction_template: 'Alpaca'
+.*samantha:
+ instruction_template: 'Samantha'
+.*wizardcoder:
+ instruction_template: 'Alpaca'
+.*minotaur:
+ instruction_template: 'Minotaur'
+.*orca_mini:
+ instruction_template: 'Orca Mini'
+.*(platypus|gplatty|superplatty):
+ instruction_template: 'Alpaca'
+.*longchat:
+ instruction_template: 'Vicuna-v1.1'
+.*vicuna-33b:
+ instruction_template: 'Vicuna-v1.1'
+.*redmond-hermes-coder:
+ instruction_template: 'Alpaca'
+.*wizardcoder-15b:
+ instruction_template: 'Alpaca'
+.*wizardlm:
+ instruction_template: 'Vicuna-v1.1'
+.*godzilla:
+ instruction_template: 'Alpaca'
+.*llama(-?)(2|v2).*chat:
+ instruction_template: 'Llama-v2'
+.*newhope:
+ instruction_template: 'NewHope'
+.*stablebeluga2:
+ instruction_template: 'StableBeluga2'
+.*openchat:
+ instruction_template: 'OpenChat'
+.*codellama.*instruct:
+ instruction_template: 'Llama-v2'
+.*mistral.*instruct:
+ instruction_template: 'Mistral'
+.*mistral.*openorca:
+ instruction_template: 'ChatML'
+.*AWQ:
+ n_batch: 1
diff --git a/models/place-your-models-here.txt b/models/place-your-models-here.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/modules/AutoGPTQ_loader.py b/modules/AutoGPTQ_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..987f5ba7971b0d14bd94c9c9523c6a8ba2fecfe9
--- /dev/null
+++ b/modules/AutoGPTQ_loader.py
@@ -0,0 +1,72 @@
+from pathlib import Path
+
+from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+
+import modules.shared as shared
+from modules.logging_colors import logger
+from modules.models import get_max_memory_dict
+
+
+def load_quantized(model_name):
+ path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
+ pt_path = None
+
+ # Find the model checkpoint
+ if shared.args.checkpoint:
+ pt_path = Path(shared.args.checkpoint)
+ else:
+ for ext in ['.safetensors', '.pt', '.bin']:
+ found = list(path_to_model.glob(f"*{ext}"))
+ if len(found) > 0:
+ if len(found) > 1:
+ logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
+
+ pt_path = found[-1]
+ break
+
+ if pt_path is None:
+ logger.error("The model could not be loaded because its checkpoint file in .bin/.pt/.safetensors format could not be located.")
+ return
+
+ use_safetensors = pt_path.suffix == '.safetensors'
+ if not (path_to_model / "quantize_config.json").exists():
+ quantize_config = BaseQuantizeConfig(
+ bits=bits if (bits := shared.args.wbits) > 0 else 4,
+ group_size=gs if (gs := shared.args.groupsize) > 0 else -1,
+ desc_act=shared.args.desc_act
+ )
+ else:
+ quantize_config = None
+
+ # Define the params for AutoGPTQForCausalLM.from_quantized
+ params = {
+ 'model_basename': pt_path.stem,
+ 'device': "cuda:0" if not shared.args.cpu else "cpu",
+ 'use_triton': shared.args.triton,
+ 'inject_fused_attention': not shared.args.no_inject_fused_attention,
+ 'inject_fused_mlp': not shared.args.no_inject_fused_mlp,
+ 'use_safetensors': use_safetensors,
+ 'trust_remote_code': shared.args.trust_remote_code,
+ 'max_memory': get_max_memory_dict(),
+ 'quantize_config': quantize_config,
+ 'use_cuda_fp16': not shared.args.no_use_cuda_fp16,
+ 'disable_exllama': shared.args.disable_exllama,
+ }
+
+ logger.info(f"The AutoGPTQ params are: {params}")
+ model = AutoGPTQForCausalLM.from_quantized(path_to_model, **params)
+
+ # These lines fix the multimodal extension when used with AutoGPTQ
+ if hasattr(model, 'model'):
+ if not hasattr(model, 'dtype'):
+ if hasattr(model.model, 'dtype'):
+ model.dtype = model.model.dtype
+
+ if hasattr(model.model, 'model') and hasattr(model.model.model, 'embed_tokens'):
+ if not hasattr(model, 'embed_tokens'):
+ model.embed_tokens = model.model.model.embed_tokens
+
+ if not hasattr(model.model, 'embed_tokens'):
+ model.model.embed_tokens = model.model.model.embed_tokens
+
+ return model
diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc528b183f7a3fd69f3e69499856aea3c20b0729
--- /dev/null
+++ b/modules/GPTQ_loader.py
@@ -0,0 +1,168 @@
+import inspect
+import re
+from pathlib import Path
+
+import accelerate
+import torch
+import transformers
+from transformers import AutoConfig, AutoModelForCausalLM
+
+import modules.shared as shared
+from modules.logging_colors import logger
+
+from gptq_for_llama import llama_inference_offload
+from gptq_for_llama.modelutils import find_layers
+from gptq_for_llama.quant import make_quant
+
+
+# This function is a replacement for the load_quant function in the
+# GPTQ-for_LLaMa repository. It supports more models and branches.
+def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exclude_layers=None, kernel_switch_threshold=128, eval=True):
+ exclude_layers = exclude_layers or ['lm_head']
+
+ def noop(*args, **kwargs):
+ pass
+
+ config = AutoConfig.from_pretrained(model, trust_remote_code=shared.args.trust_remote_code)
+ torch.nn.init.kaiming_uniform_ = noop
+ torch.nn.init.uniform_ = noop
+ torch.nn.init.normal_ = noop
+
+ torch.set_default_dtype(torch.half)
+ transformers.modeling_utils._init_weights = False
+ torch.set_default_dtype(torch.half)
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=shared.args.trust_remote_code)
+ torch.set_default_dtype(torch.float)
+ if eval:
+ model = model.eval()
+
+ layers = find_layers(model)
+ for name in exclude_layers:
+ if name in layers:
+ del layers[name]
+
+ gptq_args = inspect.getfullargspec(make_quant).args
+
+ make_quant_kwargs = {
+ 'module': model,
+ 'names': layers,
+ 'bits': wbits,
+ }
+ if 'groupsize' in gptq_args:
+ make_quant_kwargs['groupsize'] = groupsize
+ if 'faster' in gptq_args:
+ make_quant_kwargs['faster'] = faster_kernel
+ if 'kernel_switch_threshold' in gptq_args:
+ make_quant_kwargs['kernel_switch_threshold'] = kernel_switch_threshold
+
+ make_quant(**make_quant_kwargs)
+
+ del layers
+ if checkpoint.endswith('.safetensors'):
+ from safetensors.torch import load_file as safe_load
+ model.load_state_dict(safe_load(checkpoint), strict=False)
+ else:
+ model.load_state_dict(torch.load(checkpoint), strict=False)
+
+ model.seqlen = 2048
+ return model
+
+
+# Used to locate the .pt/.safetensors quantized file
+def find_quantized_model_file(model_name):
+ if shared.args.checkpoint:
+ return Path(shared.args.checkpoint)
+
+ path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
+ pt_path = None
+ priority_name_list = [
+ Path(f'{shared.args.model_dir}/{model_name}{hyphen}{shared.args.wbits}bit{group}{ext}')
+ for group in ([f'-{shared.args.groupsize}g', ''] if shared.args.groupsize > 0 else [''])
+ for ext in ['.safetensors', '.pt']
+ for hyphen in ['-', f'/{model_name}-', '/']
+ ]
+
+ for path in priority_name_list:
+ if path.exists():
+ pt_path = path
+ break
+
+ # If the model hasn't been found with a well-behaved name, pick the last .pt
+ # or the last .safetensors found in its folder as a last resort
+ if not pt_path:
+ for ext in ['.pt', '.safetensors']:
+ found = list(path_to_model.glob(f"*{ext}"))
+ if len(found) > 0:
+ if len(found) > 1:
+ logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
+
+ pt_path = found[-1]
+ break
+
+ return pt_path
+
+
+# The function that loads the model in modules/models.py
+def load_quantized(model_name):
+ if shared.args.model_type is None:
+ logger.error("The model could not be loaded because its type could not be inferred from its name.")
+ logger.error("Please specify the type manually using the --model_type argument.")
+ return None
+
+ # Select the appropriate load_quant function
+ model_type = shared.args.model_type.lower()
+ if shared.args.pre_layer and model_type == 'llama':
+ load_quant = llama_inference_offload.load_quant
+ elif model_type in ('llama', 'opt', 'gptj'):
+ if shared.args.pre_layer:
+ logger.warning("Ignoring --pre_layer because it only works for llama model type.")
+
+ load_quant = _load_quant
+ else:
+ logger.error("Unknown pre-quantized model type specified. Only 'llama', 'opt' and 'gptj' are supported")
+ exit()
+
+ # Find the quantized model weights file (.pt/.safetensors)
+ path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
+ pt_path = find_quantized_model_file(model_name)
+ if not pt_path:
+ logger.error("Could not find the quantized model in .pt or .safetensors format, exiting...")
+ exit()
+ else:
+ logger.info(f"Found the following quantized model: {pt_path}")
+
+ # qwopqwop200's offload
+ if model_type == 'llama' and shared.args.pre_layer:
+ if len(shared.args.pre_layer) == 1:
+ pre_layer = shared.args.pre_layer[0]
+ else:
+ pre_layer = shared.args.pre_layer
+
+ model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, pre_layer)
+ else:
+ threshold = False if model_type == 'gptj' else 128
+ model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, kernel_switch_threshold=threshold)
+
+ # accelerate offload (doesn't work properly)
+ if shared.args.gpu_memory or torch.cuda.device_count() > 1:
+ if shared.args.gpu_memory:
+ memory_map = list(map(lambda x: x.strip(), shared.args.gpu_memory))
+ max_cpu_memory = shared.args.cpu_memory.strip() if shared.args.cpu_memory is not None else '99GiB'
+ max_memory = {}
+ for i in range(len(memory_map)):
+ max_memory[i] = f'{memory_map[i]}GiB' if not re.match('.*ib$', memory_map[i].lower()) else memory_map[i]
+
+ max_memory['cpu'] = f'{max_cpu_memory}GiB' if not re.match('.*ib$', max_cpu_memory.lower()) else max_cpu_memory
+ else:
+ max_memory = accelerate.utils.get_balanced_memory(model)
+
+ device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=["LlamaDecoderLayer"])
+ logger.info("Using the following device map for the quantized model:", device_map)
+ # https://huggingface.co/docs/accelerate/package_reference/big_modeling#accelerate.dispatch_model
+ model = accelerate.dispatch_model(model, device_map=device_map, offload_buffers=True)
+
+ # No offload
+ elif not shared.args.cpu:
+ model = model.to(torch.device('cuda:0'))
+
+ return model
diff --git a/modules/LoRA.py b/modules/LoRA.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3997d803940c4c8cceb73d5df1b47ad96d0bb1e
--- /dev/null
+++ b/modules/LoRA.py
@@ -0,0 +1,177 @@
+from pathlib import Path
+
+import torch
+from peft import PeftModel
+
+import modules.shared as shared
+from modules.logging_colors import logger
+from modules.models import reload_model
+
+
+def add_lora_to_model(lora_names):
+ if 'GPTQForCausalLM' in shared.model.__class__.__name__ or shared.args.loader == 'AutoGPTQ':
+ add_lora_autogptq(lora_names)
+ elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama':
+ add_lora_exllama(lora_names)
+ elif shared.model.__class__.__name__ in ['Exllamav2Model', 'Exllamav2HF'] or shared.args.loader == ['ExLlamav2', 'ExLlamav2_HF']:
+ add_lora_exllamav2(lora_names)
+ else:
+ add_lora_transformers(lora_names)
+
+
+def get_lora_path(lora_name):
+ p = Path(lora_name)
+ if p.exists():
+ lora_name = p.parts[-1]
+
+ return Path(f"{shared.args.lora_dir}/{lora_name}")
+
+
+def add_lora_exllama(lora_names):
+
+ try:
+ from exllama.lora import ExLlamaLora
+ except:
+ try:
+ from repositories.exllama.lora import ExLlamaLora
+ except:
+ logger.error("Could not find the file repositories/exllama/lora.py. Make sure that exllama is cloned inside repositories/ and is up to date.")
+ return
+
+ if len(lora_names) == 0:
+ if shared.model.__class__.__name__ == 'ExllamaModel':
+ shared.model.generator.lora = None
+ else:
+ shared.model.lora = None
+
+ shared.lora_names = []
+ return
+ else:
+ if len(lora_names) > 1:
+ logger.warning('ExLlama can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
+
+ lora_path = get_lora_path(lora_names[0])
+ lora_config_path = lora_path / "adapter_config.json"
+ lora_adapter_path = lora_path / "adapter_model.bin"
+
+ logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]])))
+ if shared.model.__class__.__name__ == 'ExllamaModel':
+ lora = ExLlamaLora(shared.model.model, str(lora_config_path), str(lora_adapter_path))
+ shared.model.generator.lora = lora
+ else:
+ lora = ExLlamaLora(shared.model.ex_model, str(lora_config_path), str(lora_adapter_path))
+ shared.model.lora = lora
+
+ shared.lora_names = [lora_names[0]]
+ return
+
+
+def add_lora_exllamav2(lora_names):
+
+ from exllamav2 import ExLlamaV2Lora
+
+ if isinstance(shared.model.loras, list):
+ for lora in shared.model.loras:
+ lora.unload()
+
+ if len(lora_names) > 0:
+ logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
+ shared.model.loras = []
+ for lora_name in lora_names:
+ lora_path = get_lora_path(lora_name)
+ if shared.model.__class__.__name__ == 'Exllamav2Model':
+ lora = ExLlamaV2Lora.from_directory(shared.model.model, str(lora_path))
+ else:
+ lora = ExLlamaV2Lora.from_directory(shared.model.ex_model, str(lora_path))
+
+ shared.model.loras.append(lora)
+
+ shared.lora_names = lora_names
+ else:
+ shared.lora_names = []
+ shared.model.loras = None
+
+
+def add_lora_autogptq(lora_names):
+ '''
+ Adapted from https://github.com/Ph0rk0z/text-generation-webui-testing
+ '''
+
+ try:
+ from auto_gptq import get_gptq_peft_model
+ from auto_gptq.utils.peft_utils import GPTQLoraConfig
+ except:
+ logger.error("This version of AutoGPTQ does not support LoRA. You need to install from source or wait for a new release.")
+ return
+
+ if len(lora_names) == 0:
+ reload_model()
+
+ shared.lora_names = []
+ return
+ else:
+ if len(lora_names) > 1:
+ logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
+ if not shared.args.no_inject_fused_attention:
+ logger.warning('Fused Atttention + AutoGPTQ may break Lora loading. Disable it.')
+
+ peft_config = GPTQLoraConfig(
+ inference_mode=True,
+ )
+
+ lora_path = get_lora_path(lora_names[0])
+ logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]])))
+ shared.model = get_gptq_peft_model(shared.model, peft_config, lora_path)
+ shared.lora_names = [lora_names[0]]
+ return
+
+
+def add_lora_transformers(lora_names):
+ prior_set = set(shared.lora_names)
+ added_set = set(lora_names) - prior_set
+ removed_set = prior_set - set(lora_names)
+
+ # If no LoRA needs to be added or removed, exit
+ if len(added_set) == 0 and len(removed_set) == 0:
+ return
+
+ # Add a LoRA when another LoRA is already present
+ if len(removed_set) == 0 and len(prior_set) > 0:
+ logger.info(f"Adding the LoRA(s) named {added_set} to the model...")
+ for lora in added_set:
+ shared.model.load_adapter(get_lora_path(lora), lora)
+
+ return
+
+ # If any LoRA needs to be removed, start over
+ if len(removed_set) > 0:
+ # shared.model may no longer be PeftModel
+ if hasattr(shared.model, 'disable_adapter'):
+ shared.model.disable_adapter()
+ shared.model = shared.model.base_model.model
+
+ if len(lora_names) > 0:
+ params = {}
+ if not shared.args.cpu:
+ if shared.args.load_in_4bit or shared.args.load_in_8bit:
+ params['peft_type'] = shared.model.dtype
+ else:
+ params['dtype'] = shared.model.dtype
+ if hasattr(shared.model, "hf_device_map"):
+ params['device_map'] = {"base_model.model." + k: v for k, v in shared.model.hf_device_map.items()}
+
+ logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
+ shared.model = PeftModel.from_pretrained(shared.model, get_lora_path(lora_names[0]), adapter_name=lora_names[0], **params)
+ for lora in lora_names[1:]:
+ shared.model.load_adapter(get_lora_path(lora), lora)
+
+ shared.lora_names = lora_names
+
+ if not shared.args.load_in_8bit and not shared.args.cpu:
+ shared.model.half()
+ if not hasattr(shared.model, "hf_device_map"):
+ if torch.backends.mps.is_available():
+ device = torch.device('mps')
+ shared.model = shared.model.to(device)
+ else:
+ shared.model = shared.model.cuda()
diff --git a/modules/RWKV.py b/modules/RWKV.py
new file mode 100644
index 0000000000000000000000000000000000000000..39487c66b7dabec49a6aa80c4e499a088f1fa1a2
--- /dev/null
+++ b/modules/RWKV.py
@@ -0,0 +1,153 @@
+'''
+This loader is not currently maintained as RWKV can now be loaded
+through the transformers library.
+'''
+
+import copy
+import os
+from pathlib import Path
+
+import numpy as np
+from tokenizers import Tokenizer
+
+import modules.shared as shared
+from modules.callbacks import Iteratorize
+
+np.set_printoptions(precision=4, suppress=True, linewidth=200)
+
+os.environ['RWKV_JIT_ON'] = '1'
+os.environ["RWKV_CUDA_ON"] = '1' if shared.args.rwkv_cuda_on else '0' # use CUDA kernel for seq mode (much faster)
+
+from rwkv.model import RWKV
+from rwkv.utils import PIPELINE, PIPELINE_ARGS
+
+
+class RWKVModel:
+ def __init__(self):
+ pass
+
+ @classmethod
+ def from_pretrained(self, path, dtype="fp16", device="cuda"):
+ tokenizer_path = Path(f"{path.parent}/20B_tokenizer.json")
+ if shared.args.rwkv_strategy is None:
+ model = RWKV(model=str(path), strategy=f'{device} {dtype}')
+ else:
+ model = RWKV(model=str(path), strategy=shared.args.rwkv_strategy)
+
+ pipeline = PIPELINE(model, str(tokenizer_path))
+ result = self()
+ result.pipeline = pipeline
+ result.model = model
+ result.cached_context = ""
+ result.cached_model_state = None
+ result.cached_output_logits = None
+ return result
+
+ def generate(self, prompt, state, callback=None):
+ args = PIPELINE_ARGS(
+ temperature=state['temperature'],
+ top_p=state['top_p'],
+ top_k=state['top_k'],
+ alpha_frequency=0.1, # Frequency Penalty (as in GPT-3)
+ alpha_presence=0.1, # Presence Penalty (as in GPT-3)
+ token_ban=[0], # ban the generation of some tokens
+ token_stop=[]
+ )
+
+ if self.cached_context != "":
+ if prompt.startswith(self.cached_context):
+ prompt = prompt[len(self.cached_context):]
+ else:
+ self.cached_context = ""
+ self.cached_model_state = None
+ self.cached_output_logits = None
+
+ # out = self.pipeline.generate(prompt, token_count=state['max_new_tokens'], args=args, callback=callback)
+ out = self.generate_from_cached_state(prompt, token_count=state['max_new_tokens'], args=args, callback=callback)
+ return out
+
+ def generate_with_streaming(self, *args, **kwargs):
+ with Iteratorize(self.generate, args, kwargs, callback=None) as generator:
+ reply = ''
+ for token in generator:
+ reply += token
+ yield reply
+
+ # Similar to the PIPELINE.generate, but lets us maintain the cached_model_state
+ def generate_from_cached_state(self, ctx="", token_count=20, args=None, callback=None):
+ all_tokens = []
+ out_str = ''
+ occurrence = {}
+ state = copy.deepcopy(self.cached_model_state) if self.cached_model_state is not None else None
+
+ # if we ended up with an empty context, just reuse the cached logits
+ # this can happen if a user undoes a message and then sends the exact message again
+ # in that case the full context ends up being the same as the cached_context, so the remaining context is empty.
+ if ctx == "":
+ out = self.cached_output_logits
+
+ token = None
+ for i in range(token_count):
+ # forward
+ tokens = self.pipeline.encode(ctx) if i == 0 else [token]
+ while len(tokens) > 0:
+ out, state = self.model.forward(tokens[:args.chunk_len], state)
+ tokens = tokens[args.chunk_len:]
+ if i == 0:
+ begin_token = len(all_tokens)
+ last_token_posi = begin_token
+ # cache the model state after scanning the context
+ # we don't cache the state after processing our own generated tokens because
+ # the output string might be post-processed arbitrarily. Therefore, what's fed into the model
+ # on the next round of chat might be slightly different what what it output on the previous round
+ if i == 0:
+ self.cached_context += ctx
+ self.cached_model_state = copy.deepcopy(state)
+ self.cached_output_logits = copy.deepcopy(out)
+
+ # adjust probabilities
+ for n in args.token_ban:
+ out[n] = -float('inf')
+
+ for n in occurrence:
+ out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+
+ # sampler
+ token = self.pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k)
+ if token in args.token_stop:
+ break
+
+ all_tokens += [token]
+ if token not in occurrence:
+ occurrence[token] = 1
+ else:
+ occurrence[token] += 1
+
+ # output
+ tmp = self.pipeline.decode(all_tokens[last_token_posi:])
+ if '\ufffd' not in tmp: # is valid utf-8 string?
+ if callback:
+ callback(tmp)
+
+ out_str += tmp
+ last_token_posi = begin_token + i + 1
+ return out_str
+
+
+class RWKVTokenizer:
+ def __init__(self):
+ pass
+
+ @classmethod
+ def from_pretrained(self, path):
+ tokenizer_path = path / "20B_tokenizer.json"
+ tokenizer = Tokenizer.from_file(str(tokenizer_path))
+ result = self()
+ result.tokenizer = tokenizer
+ return result
+
+ def encode(self, prompt):
+ return self.tokenizer.encode(prompt).ids
+
+ def decode(self, ids):
+ return self.tokenizer.decode(ids)
diff --git a/modules/RoPE.py b/modules/RoPE.py
new file mode 100644
index 0000000000000000000000000000000000000000..c15616c672b6ea304212d6771207e05805007ae8
--- /dev/null
+++ b/modules/RoPE.py
@@ -0,0 +1,18 @@
+def get_alpha_value(alpha, base):
+ '''
+ Gets alpha_value from alpha_value and rope_freq_base
+ '''
+ if base > 0:
+ return (base/10000.) ** (63/64.)
+ else:
+ return alpha
+
+
+def get_rope_freq_base(alpha, base):
+ '''
+ Gets rope_freq_base from alpha_value and rope_freq_base
+ '''
+ if base > 0:
+ return base
+ else:
+ return 10000 * alpha ** (64/63.)
diff --git a/modules/block_requests.py b/modules/block_requests.py
new file mode 100644
index 0000000000000000000000000000000000000000..38f1a17fe227682cff5d7f27e4914dfaa12ebf5a
--- /dev/null
+++ b/modules/block_requests.py
@@ -0,0 +1,47 @@
+import builtins
+import io
+
+import requests
+
+from modules.logging_colors import logger
+
+original_open = open
+original_get = requests.get
+
+
+class RequestBlocker:
+
+ def __enter__(self):
+ requests.get = my_get
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ requests.get = original_get
+
+
+class OpenMonkeyPatch:
+
+ def __enter__(self):
+ builtins.open = my_open
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ builtins.open = original_open
+
+
+def my_get(url, **kwargs):
+ logger.info('Unwanted HTTP request redirected to localhost :)')
+ kwargs.setdefault('allow_redirects', True)
+ return requests.api.request('get', 'http://127.0.0.1/', **kwargs)
+
+
+# Kindly provided by our friend WizardLM-30B
+def my_open(*args, **kwargs):
+ filename = str(args[0])
+ if filename.endswith('index.html'):
+ with original_open(*args, **kwargs) as f:
+ file_contents = f.read()
+
+ file_contents = file_contents.replace(b'\t\t', b'')
+ file_contents = file_contents.replace(b'cdnjs.cloudflare.com', b'127.0.0.1')
+ return io.BytesIO(file_contents)
+ else:
+ return original_open(*args, **kwargs)
diff --git a/modules/callbacks.py b/modules/callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..e29e397d3040d7b4b3205069d490b7eed31620f7
--- /dev/null
+++ b/modules/callbacks.py
@@ -0,0 +1,95 @@
+import gc
+import traceback
+from queue import Queue
+from threading import Thread
+
+import torch
+import transformers
+
+import modules.shared as shared
+
+
+class _StopEverythingStoppingCriteria(transformers.StoppingCriteria):
+ def __init__(self):
+ transformers.StoppingCriteria.__init__(self)
+
+ def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor) -> bool:
+ return shared.stop_everything
+
+
+class Stream(transformers.StoppingCriteria):
+ def __init__(self, callback_func=None):
+ self.callback_func = callback_func
+
+ def __call__(self, input_ids, scores) -> bool:
+ if self.callback_func is not None:
+ self.callback_func(input_ids[0])
+
+ return False
+
+
+class Iteratorize:
+
+ """
+ Transforms a function that takes a callback
+ into a lazy iterator (generator).
+
+ Adapted from: https://stackoverflow.com/a/9969000
+ """
+
+ def __init__(self, func, args=None, kwargs=None, callback=None):
+ self.mfunc = func
+ self.c_callback = callback
+ self.q = Queue()
+ self.sentinel = object()
+ self.args = args or []
+ self.kwargs = kwargs or {}
+ self.stop_now = False
+
+ def _callback(val):
+ if self.stop_now or shared.stop_everything:
+ raise ValueError
+ self.q.put(val)
+
+ def gentask():
+ try:
+ ret = self.mfunc(callback=_callback, *args, **self.kwargs)
+ except ValueError:
+ pass
+ except:
+ traceback.print_exc()
+ pass
+
+ clear_torch_cache()
+ self.q.put(self.sentinel)
+ if self.c_callback:
+ self.c_callback(ret)
+
+ self.thread = Thread(target=gentask)
+ self.thread.start()
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ obj = self.q.get(True, None)
+ if obj is self.sentinel:
+ raise StopIteration
+ else:
+ return obj
+
+ def __del__(self):
+ clear_torch_cache()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.stop_now = True
+ clear_torch_cache()
+
+
+def clear_torch_cache():
+ gc.collect()
+ if not shared.args.cpu:
+ torch.cuda.empty_cache()
diff --git a/modules/chat.py b/modules/chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..334693ab042a2d8f5ca90e7260edb8046b8a6e2a
--- /dev/null
+++ b/modules/chat.py
@@ -0,0 +1,724 @@
+import base64
+import copy
+import functools
+import html
+import json
+import re
+from datetime import datetime
+from pathlib import Path
+
+import gradio as gr
+import yaml
+from PIL import Image
+
+import modules.shared as shared
+from modules.extensions import apply_extensions
+from modules.html_generator import chat_html_wrapper, make_thumbnail
+from modules.logging_colors import logger
+from modules.text_generation import (
+ generate_reply,
+ get_encoded_length,
+ get_max_prompt_length
+)
+from modules.utils import (
+ delete_file,
+ get_available_characters,
+ replace_all,
+ save_file
+)
+
+
+def str_presenter(dumper, data):
+ """
+ Copied from https://github.com/yaml/pyyaml/issues/240
+ Makes pyyaml output prettier multiline strings.
+ """
+
+ if data.count('\n') > 0:
+ return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
+
+ return dumper.represent_scalar('tag:yaml.org,2002:str', data)
+
+
+yaml.add_representer(str, str_presenter)
+yaml.representer.SafeRepresenter.add_representer(str, str_presenter)
+
+
+def get_turn_substrings(state, instruct=False):
+ if instruct:
+ if 'turn_template' not in state or state['turn_template'] == '':
+ template = '<|user|>\n<|user-message|>\n<|bot|>\n<|bot-message|>\n'
+ else:
+ template = state['turn_template'].replace(r'\n', '\n')
+ else:
+ template = '<|user|>: <|user-message|>\n<|bot|>: <|bot-message|>\n'
+
+ replacements = {
+ '<|user|>': state['name1_instruct' if instruct else 'name1'].strip(),
+ '<|bot|>': state['name2_instruct' if instruct else 'name2'].strip(),
+ }
+
+ output = {
+ 'user_turn': template.split('<|bot|>')[0],
+ 'bot_turn': '<|bot|>' + template.split('<|bot|>')[1],
+ 'user_turn_stripped': template.split('<|bot|>')[0].split('<|user-message|>')[0],
+ 'bot_turn_stripped': '<|bot|>' + template.split('<|bot|>')[1].split('<|bot-message|>')[0],
+ }
+
+ for k in output:
+ output[k] = replace_all(output[k], replacements)
+
+ return output
+
+
+def generate_chat_prompt(user_input, state, **kwargs):
+ impersonate = kwargs.get('impersonate', False)
+ _continue = kwargs.get('_continue', False)
+ also_return_rows = kwargs.get('also_return_rows', False)
+ history = kwargs.get('history', state['history'])['internal']
+ is_instruct = state['mode'] == 'instruct'
+
+ # Find the maximum prompt size
+ max_length = get_max_prompt_length(state)
+ all_substrings = {
+ 'chat': get_turn_substrings(state, instruct=False),
+ 'instruct': get_turn_substrings(state, instruct=True)
+ }
+
+ substrings = all_substrings['instruct' if is_instruct else 'chat']
+
+ # Create the template for "chat-instruct" mode
+ if state['mode'] == 'chat-instruct':
+ wrapper = ''
+ command = state['chat-instruct_command'].replace('<|character|>', state['name2'] if not impersonate else state['name1'])
+ wrapper += state['context_instruct']
+ wrapper += all_substrings['instruct']['user_turn'].replace('<|user-message|>', command)
+ wrapper += all_substrings['instruct']['bot_turn_stripped']
+ if impersonate:
+ wrapper += substrings['user_turn_stripped'].rstrip(' ')
+ elif _continue:
+ wrapper += apply_extensions('bot_prefix', substrings['bot_turn_stripped'], state)
+ wrapper += history[-1][1]
+ else:
+ wrapper += apply_extensions('bot_prefix', substrings['bot_turn_stripped'].rstrip(' '), state)
+ else:
+ wrapper = '<|prompt|>'
+
+ if is_instruct:
+ context = state['context_instruct']
+ else:
+ context = replace_character_names(
+ f"{state['context'].strip()}\n",
+ state['name1'],
+ state['name2']
+ )
+
+ # Build the prompt
+ rows = [context]
+ min_rows = 3
+ i = len(history) - 1
+ while i >= 0 and get_encoded_length(wrapper.replace('<|prompt|>', ''.join(rows))) < max_length:
+ if _continue and i == len(history) - 1:
+ if state['mode'] != 'chat-instruct':
+ rows.insert(1, substrings['bot_turn_stripped'] + history[i][1].strip())
+ else:
+ rows.insert(1, substrings['bot_turn'].replace('<|bot-message|>', history[i][1].strip()))
+
+ string = history[i][0]
+ if string not in ['', '<|BEGIN-VISIBLE-CHAT|>']:
+ rows.insert(1, replace_all(substrings['user_turn'], {'<|user-message|>': string.strip(), '<|round|>': str(i)}))
+
+ i -= 1
+
+ if impersonate:
+ if state['mode'] == 'chat-instruct':
+ min_rows = 1
+ else:
+ min_rows = 2
+ rows.append(substrings['user_turn_stripped'].rstrip(' '))
+ elif not _continue:
+ # Add the user message
+ if len(user_input) > 0:
+ rows.append(replace_all(substrings['user_turn'], {'<|user-message|>': user_input.strip(), '<|round|>': str(len(history))}))
+
+ # Add the character prefix
+ if state['mode'] != 'chat-instruct':
+ rows.append(apply_extensions('bot_prefix', substrings['bot_turn_stripped'].rstrip(' '), state))
+
+ while len(rows) > min_rows and get_encoded_length(wrapper.replace('<|prompt|>', ''.join(rows))) >= max_length:
+ rows.pop(1)
+
+ prompt = wrapper.replace('<|prompt|>', ''.join(rows))
+ if also_return_rows:
+ return prompt, rows
+ else:
+ return prompt
+
+
+def get_stopping_strings(state):
+ stopping_strings = []
+ if state['mode'] in ['instruct', 'chat-instruct']:
+ stopping_strings += [
+ state['turn_template'].split('<|user-message|>')[1].split('<|bot|>')[0] + '<|bot|>',
+ state['turn_template'].split('<|bot-message|>')[1] + '<|user|>'
+ ]
+
+ replacements = {
+ '<|user|>': state['name1_instruct'],
+ '<|bot|>': state['name2_instruct']
+ }
+
+ for i in range(len(stopping_strings)):
+ stopping_strings[i] = replace_all(stopping_strings[i], replacements).rstrip(' ').replace(r'\n', '\n')
+
+ if state['mode'] in ['chat', 'chat-instruct']:
+ stopping_strings += [
+ f"\n{state['name1']}:",
+ f"\n{state['name2']}:"
+ ]
+
+ if 'stopping_strings' in state and isinstance(state['stopping_strings'], list):
+ stopping_strings += state.pop('stopping_strings')
+
+ return stopping_strings
+
+
+def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True):
+ history = state['history']
+ output = copy.deepcopy(history)
+ output = apply_extensions('history', output)
+ state = apply_extensions('state', state)
+ if shared.model_name == 'None' or shared.model is None:
+ logger.error("No model is loaded! Select one in the Model tab.")
+ yield output
+ return
+
+ just_started = True
+ visible_text = None
+ stopping_strings = get_stopping_strings(state)
+ is_stream = state['stream']
+
+ # Prepare the input
+ if not any((regenerate, _continue)):
+ visible_text = html.escape(text)
+
+ # Apply extensions
+ text, visible_text = apply_extensions('chat_input', text, visible_text, state)
+ text = apply_extensions('input', text, state, is_chat=True)
+
+ # *Is typing...*
+ if loading_message:
+ yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']}
+ else:
+ text, visible_text = output['internal'][-1][0], output['visible'][-1][0]
+ if regenerate:
+ output['visible'].pop()
+ output['internal'].pop()
+
+ # *Is typing...*
+ if loading_message:
+ yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']}
+ elif _continue:
+ last_reply = [output['internal'][-1][1], output['visible'][-1][1]]
+ if loading_message:
+ yield {'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']], 'internal': output['internal']}
+
+ # Generate the prompt
+ kwargs = {
+ '_continue': _continue,
+ 'history': output,
+ }
+ prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs)
+ if prompt is None:
+ prompt = generate_chat_prompt(text, state, **kwargs)
+
+ # Generate
+ reply = None
+ for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True)):
+
+ # Extract the reply
+ visible_reply = re.sub("(||{{user}})", state['name1'], reply)
+ visible_reply = html.escape(visible_reply)
+
+ if shared.stop_everything:
+ output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
+ yield output
+ return
+
+ if just_started:
+ just_started = False
+ if not _continue:
+ output['internal'].append(['', ''])
+ output['visible'].append(['', ''])
+
+ if _continue:
+ output['internal'][-1] = [text, last_reply[0] + reply]
+ output['visible'][-1] = [visible_text, last_reply[1] + visible_reply]
+ if is_stream:
+ yield output
+ elif not (j == 0 and visible_reply.strip() == ''):
+ output['internal'][-1] = [text, reply.lstrip(' ')]
+ output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')]
+ if is_stream:
+ yield output
+
+ output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
+ yield output
+
+
+def impersonate_wrapper(text, state):
+
+ static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'])
+
+ if shared.model_name == 'None' or shared.model is None:
+ logger.error("No model is loaded! Select one in the Model tab.")
+ yield '', static_output
+ return
+
+ prompt = generate_chat_prompt('', state, impersonate=True)
+ stopping_strings = get_stopping_strings(state)
+
+ yield text + '...', static_output
+ reply = None
+ for reply in generate_reply(prompt + text, state, stopping_strings=stopping_strings, is_chat=True):
+ yield (text + reply).lstrip(' '), static_output
+ if shared.stop_everything:
+ return
+
+
+def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_message=True):
+ history = state['history']
+ if regenerate or _continue:
+ text = ''
+ if (len(history['visible']) == 1 and not history['visible'][0][0]) or len(history['internal']) == 0:
+ yield history
+ return
+
+ for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message):
+ yield history
+
+
+def character_is_loaded(state, raise_exception=False):
+ if state['mode'] in ['chat', 'chat-instruct'] and state['name2'] == '':
+ logger.error('It looks like no character is loaded. Please load one under Parameters > Character.')
+ if raise_exception:
+ raise ValueError
+
+ return False
+ else:
+ return True
+
+
+def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
+ '''
+ Same as above but returns HTML for the UI
+ '''
+
+ if not character_is_loaded(state):
+ return
+
+ if state['start_with'] != '' and not _continue:
+ if regenerate:
+ text, state['history'] = remove_last_message(state['history'])
+ regenerate = False
+
+ _continue = True
+ send_dummy_message(text, state)
+ send_dummy_reply(state['start_with'], state)
+
+ for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True)):
+ yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style']), history
+
+
+def remove_last_message(history):
+ if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>':
+ last = history['visible'].pop()
+ history['internal'].pop()
+ else:
+ last = ['', '']
+
+ return html.unescape(last[0]), history
+
+
+def send_last_reply_to_input(history):
+ if len(history['visible']) > 0:
+ return html.unescape(history['visible'][-1][1])
+ else:
+ return ''
+
+
+def replace_last_reply(text, state):
+ history = state['history']
+
+ if len(text.strip()) == 0:
+ return history
+ elif len(history['visible']) > 0:
+ history['visible'][-1][1] = html.escape(text)
+ history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True)
+
+ return history
+
+
+def send_dummy_message(text, state):
+ history = state['history']
+ history['visible'].append([html.escape(text), ''])
+ history['internal'].append([apply_extensions('input', text, state, is_chat=True), ''])
+ return history
+
+
+def send_dummy_reply(text, state):
+ history = state['history']
+ if len(history['visible']) > 0 and not history['visible'][-1][1] == '':
+ history['visible'].append(['', ''])
+ history['internal'].append(['', ''])
+
+ history['visible'][-1][1] = html.escape(text)
+ history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True)
+ return history
+
+
+def redraw_html(history, name1, name2, mode, style, reset_cache=False):
+ return chat_html_wrapper(history, name1, name2, mode, style, reset_cache=reset_cache)
+
+
+def start_new_chat(state):
+ mode = state['mode']
+ history = {'internal': [], 'visible': []}
+
+ if mode != 'instruct':
+ greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
+ if greeting != '':
+ history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
+ history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]]
+
+ unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
+ save_history(history, unique_id, state['character_menu'], state['mode'])
+
+ return history
+
+
+def get_history_file_path(unique_id, character, mode):
+ if mode == 'instruct':
+ p = Path(f'logs/instruct/{unique_id}.json')
+ else:
+ p = Path(f'logs/chat/{character}/{unique_id}.json')
+
+ return p
+
+
+def save_history(history, unique_id, character, mode):
+ if shared.args.multi_user:
+ return
+
+ p = get_history_file_path(unique_id, character, mode)
+ if not p.parent.is_dir():
+ p.parent.mkdir(parents=True)
+
+ with open(p, 'w', encoding='utf-8') as f:
+ f.write(json.dumps(history, indent=4))
+
+
+def rename_history(old_id, new_id, character, mode):
+ if shared.args.multi_user:
+ return
+
+ old_p = get_history_file_path(old_id, character, mode)
+ new_p = get_history_file_path(new_id, character, mode)
+ if new_p.parent != old_p.parent:
+ logger.error(f"The following path is not allowed: {new_p}.")
+ elif new_p == old_p:
+ logger.info("The provided path is identical to the old one.")
+ else:
+ logger.info(f"Renaming {old_p} to {new_p}")
+ old_p.rename(new_p)
+
+
+def find_all_histories(state):
+ if shared.args.multi_user:
+ return ['']
+
+ if state['mode'] == 'instruct':
+ paths = Path('logs/instruct').glob('*.json')
+ else:
+ character = state['character_menu']
+
+ # Handle obsolete filenames and paths
+ old_p = Path(f'logs/{character}_persistent.json')
+ new_p = Path(f'logs/persistent_{character}.json')
+ if old_p.exists():
+ logger.warning(f"Renaming {old_p} to {new_p}")
+ old_p.rename(new_p)
+ if new_p.exists():
+ unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
+ p = get_history_file_path(unique_id, character, state['mode'])
+ logger.warning(f"Moving {new_p} to {p}")
+ p.parent.mkdir(exist_ok=True)
+ new_p.rename(p)
+
+ paths = Path(f'logs/chat/{character}').glob('*.json')
+
+ histories = sorted(paths, key=lambda x: x.stat().st_mtime, reverse=True)
+ histories = [path.stem for path in histories]
+
+ return histories
+
+
+def load_latest_history(state):
+ '''
+ Loads the latest history for the given character in chat or chat-instruct
+ mode, or the latest instruct history for instruct mode.
+ '''
+
+ if shared.args.multi_user:
+ return start_new_chat(state)
+
+ histories = find_all_histories(state)
+
+ if len(histories) > 0:
+ unique_id = Path(histories[0]).stem
+ history = load_history(unique_id, state['character_menu'], state['mode'])
+ else:
+ history = start_new_chat(state)
+
+ return history
+
+
+def load_history(unique_id, character, mode):
+ p = get_history_file_path(unique_id, character, mode)
+
+ f = json.loads(open(p, 'rb').read())
+ if 'internal' in f and 'visible' in f:
+ history = f
+ else:
+ history = {
+ 'internal': f['data'],
+ 'visible': f['data_visible']
+ }
+
+ return history
+
+
+def load_history_json(file, history):
+ try:
+ file = file.decode('utf-8')
+ f = json.loads(file)
+ if 'internal' in f and 'visible' in f:
+ history = f
+ else:
+ history = {
+ 'internal': f['data'],
+ 'visible': f['data_visible']
+ }
+
+ return history
+ except:
+ return history
+
+
+def delete_history(unique_id, character, mode):
+ p = get_history_file_path(unique_id, character, mode)
+ delete_file(p)
+
+
+def replace_character_names(text, name1, name2):
+ text = text.replace('{{user}}', name1).replace('{{char}}', name2)
+ return text.replace('', name1).replace('', name2)
+
+
+def generate_pfp_cache(character):
+ cache_folder = Path("cache")
+ if not cache_folder.exists():
+ cache_folder.mkdir()
+
+ for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
+ if path.exists():
+ img = make_thumbnail(Image.open(path))
+ img.save(Path('cache/pfp_character.png'), format='PNG')
+ return img
+
+ return None
+
+
+def load_character(character, name1, name2, instruct=False):
+ context = greeting = turn_template = ""
+ greeting_field = 'greeting'
+ picture = None
+
+ if instruct:
+ name1 = name2 = ''
+ folder = 'instruction-templates'
+ else:
+ folder = 'characters'
+
+ filepath = None
+ for extension in ["yml", "yaml", "json"]:
+ filepath = Path(f'{folder}/{character}.{extension}')
+ if filepath.exists():
+ break
+
+ if filepath is None or not filepath.exists():
+ logger.error(f"Could not find the character \"{character}\" inside {folder}/. No character has been loaded.")
+ raise ValueError
+
+ file_contents = open(filepath, 'r', encoding='utf-8').read()
+ data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
+
+ if Path("cache/pfp_character.png").exists() and not instruct:
+ Path("cache/pfp_character.png").unlink()
+
+ picture = generate_pfp_cache(character)
+
+ # Finding the bot's name
+ for k in ['name', 'bot', '<|bot|>', 'char_name']:
+ if k in data and data[k] != '':
+ name2 = data[k]
+ break
+
+ # Find the user name (if any)
+ for k in ['your_name', 'user', '<|user|>']:
+ if k in data and data[k] != '':
+ name1 = data[k]
+ break
+
+ if 'context' in data:
+ context = data['context']
+ if not instruct:
+ context = context.strip() + '\n'
+ elif "char_persona" in data:
+ context = build_pygmalion_style_context(data)
+ greeting_field = 'char_greeting'
+
+ if greeting_field in data:
+ greeting = data[greeting_field]
+
+ if 'turn_template' in data:
+ turn_template = data['turn_template']
+
+ return name1, name2, picture, greeting, context, turn_template.replace("\n", r"\n")
+
+
+@functools.cache
+def load_character_memoized(character, name1, name2, instruct=False):
+ return load_character(character, name1, name2, instruct=instruct)
+
+
+def upload_character(file, img, tavern=False):
+ decoded_file = file if isinstance(file, str) else file.decode('utf-8')
+ try:
+ data = json.loads(decoded_file)
+ except:
+ data = yaml.safe_load(decoded_file)
+
+ if 'char_name' in data:
+ name = data['char_name']
+ greeting = data['char_greeting']
+ context = build_pygmalion_style_context(data)
+ yaml_data = generate_character_yaml(name, greeting, context)
+ else:
+ name = data['name']
+ yaml_data = generate_character_yaml(data['name'], data['greeting'], data['context'])
+
+ outfile_name = name
+ i = 1
+ while Path(f'characters/{outfile_name}.yaml').exists():
+ outfile_name = f'{name}_{i:03d}'
+ i += 1
+
+ with open(Path(f'characters/{outfile_name}.yaml'), 'w', encoding='utf-8') as f:
+ f.write(yaml_data)
+
+ if img is not None:
+ img.save(Path(f'characters/{outfile_name}.png'))
+
+ logger.info(f'New character saved to "characters/{outfile_name}.yaml".')
+ return gr.update(value=outfile_name, choices=get_available_characters())
+
+
+def build_pygmalion_style_context(data):
+ context = ""
+ if 'char_persona' in data and data['char_persona'] != '':
+ context += f"{data['char_name']}'s Persona: {data['char_persona']}\n"
+
+ if 'world_scenario' in data and data['world_scenario'] != '':
+ context += f"Scenario: {data['world_scenario']}\n"
+
+ if 'example_dialogue' in data and data['example_dialogue'] != '':
+ context += f"{data['example_dialogue'].strip()}\n"
+
+ context = f"{context.strip()}\n"
+ return context
+
+
+def upload_tavern_character(img, _json):
+ _json = {'char_name': _json['name'], 'char_persona': _json['description'], 'char_greeting': _json['first_mes'], 'example_dialogue': _json['mes_example'], 'world_scenario': _json['scenario']}
+ return upload_character(json.dumps(_json), img, tavern=True)
+
+
+def check_tavern_character(img):
+ if "chara" not in img.info:
+ return "Not a TavernAI card", None, None, gr.update(interactive=False)
+
+ decoded_string = base64.b64decode(img.info['chara']).replace(b'\\r\\n', b'\\n')
+ _json = json.loads(decoded_string)
+ if "data" in _json:
+ _json = _json["data"]
+
+ return _json['name'], _json['description'], _json, gr.update(interactive=True)
+
+
+def upload_your_profile_picture(img):
+ cache_folder = Path("cache")
+ if not cache_folder.exists():
+ cache_folder.mkdir()
+
+ if img is None:
+ if Path("cache/pfp_me.png").exists():
+ Path("cache/pfp_me.png").unlink()
+ else:
+ img = make_thumbnail(img)
+ img.save(Path('cache/pfp_me.png'))
+ logger.info('Profile picture saved to "cache/pfp_me.png"')
+
+
+def generate_character_yaml(name, greeting, context):
+ data = {
+ 'name': name,
+ 'greeting': greeting,
+ 'context': context,
+ }
+
+ data = {k: v for k, v in data.items() if v} # Strip falsy
+ return yaml.dump(data, sort_keys=False, width=float("inf"))
+
+
+def generate_instruction_template_yaml(user, bot, context, turn_template):
+ data = {
+ 'user': user,
+ 'bot': bot,
+ 'turn_template': turn_template,
+ 'context': context,
+ }
+
+ data = {k: v for k, v in data.items() if v} # Strip falsy
+ return yaml.dump(data, sort_keys=False, width=float("inf"))
+
+
+def save_character(name, greeting, context, picture, filename):
+ if filename == "":
+ logger.error("The filename is empty, so the character will not be saved.")
+ return
+
+ data = generate_character_yaml(name, greeting, context)
+ filepath = Path(f'characters/{filename}.yaml')
+ save_file(filepath, data)
+ path_to_img = Path(f'characters/{filename}.png')
+ if picture is not None:
+ picture.save(path_to_img)
+ logger.info(f'Saved {path_to_img}.')
+
+
+def delete_character(name, instruct=False):
+ for extension in ["yml", "yaml", "json"]:
+ delete_file(Path(f'characters/{name}.{extension}'))
+
+ delete_file(Path(f'characters/{name}.png'))
diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..70ce92f54bdb6704ce58f013fd28b0014a5f3b28
--- /dev/null
+++ b/modules/ctransformers_model.py
@@ -0,0 +1,79 @@
+from ctransformers import AutoConfig, AutoModelForCausalLM
+
+from modules import shared
+from modules.callbacks import Iteratorize
+from modules.logging_colors import logger
+
+
+class CtransformersModel:
+ def __init__(self):
+ pass
+
+ @classmethod
+ def from_pretrained(cls, path):
+ result = cls()
+
+ config = AutoConfig.from_pretrained(
+ str(path),
+ threads=shared.args.threads if shared.args.threads != 0 else -1,
+ gpu_layers=shared.args.n_gpu_layers,
+ batch_size=shared.args.n_batch,
+ context_length=shared.args.n_ctx,
+ stream=True,
+ mmap=not shared.args.no_mmap,
+ mlock=shared.args.mlock
+ )
+
+ result.model = AutoModelForCausalLM.from_pretrained(
+ str(result.model_dir(path) if result.model_type_is_auto() else path),
+ model_type=(None if result.model_type_is_auto() else shared.args.model_type),
+ config=config
+ )
+
+ logger.info(f'Using ctransformers model_type: {result.model.model_type} for {result.model.model_path}')
+ return result, result
+
+ def model_type_is_auto(self):
+ return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None"
+
+ def model_dir(self, path):
+ if path.is_file():
+ return path.parent
+
+ return path
+
+ def encode(self, string, **kwargs):
+ return self.model.tokenize(string)
+
+ def decode(self, ids):
+ return self.model.detokenize(ids)
+
+ def generate(self, prompt, state, callback=None):
+ prompt = prompt if type(prompt) is str else prompt.decode()
+ # ctransformers uses -1 for random seed
+ generator = self.model(
+ prompt=prompt,
+ max_new_tokens=state['max_new_tokens'],
+ temperature=state['temperature'],
+ top_p=state['top_p'],
+ top_k=state['top_k'],
+ repetition_penalty=state['repetition_penalty'],
+ last_n_tokens=state['repetition_penalty_range'],
+ seed=int(state['seed'])
+ )
+
+ output = ""
+ for token in generator:
+ if callback:
+ callback(token)
+
+ output += token
+
+ return output
+
+ def generate_with_streaming(self, *args, **kwargs):
+ with Iteratorize(self.generate, args, kwargs, callback=None) as generator:
+ reply = ''
+ for token in generator:
+ reply += token
+ yield reply
diff --git a/modules/deepspeed_parameters.py b/modules/deepspeed_parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..f170a385cfc3dfb954fc6f5595cf8706e42aed30
--- /dev/null
+++ b/modules/deepspeed_parameters.py
@@ -0,0 +1,74 @@
+def generate_ds_config(ds_bf16, train_batch_size, nvme_offload_dir):
+ '''
+ DeepSpeed configuration
+ https://huggingface.co/docs/transformers/main_classes/deepspeed
+ '''
+
+ if nvme_offload_dir:
+ ds_config = {
+ "fp16": {
+ "enabled": not ds_bf16,
+ },
+ "bf16": {
+ "enabled": ds_bf16,
+ },
+ "zero_optimization": {
+ "stage": 3,
+ "offload_param": {
+ "device": "nvme",
+ "nvme_path": nvme_offload_dir,
+ "pin_memory": True,
+ "buffer_count": 5,
+ "buffer_size": 1e9,
+ "max_in_cpu": 1e9
+ },
+ "overlap_comm": True,
+ "reduce_bucket_size": "auto",
+ "contiguous_gradients": True,
+ "sub_group_size": 1e8,
+ "stage3_prefetch_bucket_size": "auto",
+ "stage3_param_persistence_threshold": "auto",
+ "stage3_max_live_parameters": "auto",
+ "stage3_max_reuse_distance": "auto",
+ },
+ "aio": {
+ "block_size": 262144,
+ "queue_depth": 32,
+ "thread_count": 1,
+ "single_submit": False,
+ "overlap_events": True
+ },
+ "steps_per_print": 2000,
+ "train_batch_size": train_batch_size,
+ "train_micro_batch_size_per_gpu": 1,
+ "wall_clock_breakdown": False
+ }
+ else:
+ ds_config = {
+ "fp16": {
+ "enabled": not ds_bf16,
+ },
+ "bf16": {
+ "enabled": ds_bf16,
+ },
+ "zero_optimization": {
+ "stage": 3,
+ "offload_param": {
+ "device": "cpu",
+ "pin_memory": True
+ },
+ "overlap_comm": True,
+ "contiguous_gradients": True,
+ "reduce_bucket_size": "auto",
+ "stage3_prefetch_bucket_size": "auto",
+ "stage3_param_persistence_threshold": "auto",
+ "stage3_max_live_parameters": "auto",
+ "stage3_max_reuse_distance": "auto",
+ },
+ "steps_per_print": 2000,
+ "train_batch_size": train_batch_size,
+ "train_micro_batch_size_per_gpu": 1,
+ "wall_clock_breakdown": False
+ }
+
+ return ds_config
diff --git a/modules/evaluate.py b/modules/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..8044e203151157a6473fa11c98414a27d45a32af
--- /dev/null
+++ b/modules/evaluate.py
@@ -0,0 +1,151 @@
+import datetime
+from pathlib import Path
+
+import pandas as pd
+import torch
+from datasets import load_dataset
+from tqdm import tqdm
+
+from modules import shared
+from modules.models import load_model, unload_model
+from modules.models_settings import get_model_metadata, update_model_parameters
+from modules.text_generation import encode
+
+
+def load_past_evaluations():
+ if Path('logs/evaluations.csv').exists():
+ df = pd.read_csv(Path('logs/evaluations.csv'), dtype=str)
+ df['Perplexity'] = pd.to_numeric(df['Perplexity'])
+ return df
+ else:
+ return pd.DataFrame(columns=['Model', 'LoRAs', 'Dataset', 'Perplexity', 'stride', 'max_length', 'Date', 'Comment'])
+
+
+past_evaluations = load_past_evaluations()
+
+
+def save_past_evaluations(df):
+ global past_evaluations
+ past_evaluations = df
+ filepath = Path('logs/evaluations.csv')
+ filepath.parent.mkdir(parents=True, exist_ok=True)
+ df.to_csv(filepath, index=False)
+
+
+def calculate_perplexity(models, input_dataset, stride, _max_length):
+ '''
+ Based on:
+ https://huggingface.co/docs/transformers/perplexity#calculating-ppl-with-fixedlength-models
+ '''
+
+ global past_evaluations
+ cumulative_log = ''
+ cumulative_log += "Loading the input dataset...\n\n"
+ yield cumulative_log
+
+ # Copied from https://github.com/qwopqwop200/GPTQ-for-LLaMa/blob/triton/utils/datautils.py
+ if input_dataset == 'wikitext':
+ data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')
+ text = "\n\n".join(data['text'])
+ elif input_dataset == 'ptb':
+ data = load_dataset('ptb_text_only', 'penn_treebank', split='validation')
+ text = "\n\n".join(data['sentence'])
+ elif input_dataset == 'ptb_new':
+ data = load_dataset('ptb_text_only', 'penn_treebank', split='test')
+ text = " ".join(data['sentence'])
+ else:
+ with open(Path(f'training/datasets/{input_dataset}.txt'), 'r', encoding='utf-8') as f:
+ text = f.read()
+
+ for model in models:
+ if is_in_past_evaluations(model, input_dataset, stride, _max_length):
+ cumulative_log += f"{model} has already been tested. Ignoring.\n\n"
+ yield cumulative_log
+ continue
+
+ if model != 'current model':
+ try:
+ yield cumulative_log + f"Loading {model}...\n\n"
+ model_settings = get_model_metadata(model)
+ shared.settings.update({k: v for k, v in model_settings.items() if k in shared.settings}) # hijacking the interface defaults
+ update_model_parameters(model_settings) # hijacking the command-line arguments
+ shared.model_name = model
+ unload_model()
+ shared.model, shared.tokenizer = load_model(shared.model_name)
+ except:
+ cumulative_log += f"Failed to load {model}. Moving on.\n\n"
+ yield cumulative_log
+ continue
+
+ cumulative_log += f"Processing {shared.model_name}...\n\n"
+ yield cumulative_log + "Tokenizing the input dataset...\n\n"
+ encodings = encode(text, add_special_tokens=False)
+ seq_len = encodings.shape[1]
+ if _max_length:
+ max_length = _max_length
+ elif hasattr(shared.model.config, 'max_position_embeddings'):
+ max_length = shared.model.config.max_position_embeddings
+ else:
+ max_length = 2048
+
+ nlls = []
+ prev_end_loc = 0
+ for begin_loc in tqdm(range(0, seq_len, stride)):
+ yield cumulative_log + f"Evaluating... {100*begin_loc/seq_len:.2f}%"
+ end_loc = min(begin_loc + max_length, seq_len)
+ trg_len = end_loc - prev_end_loc # may be different from stride on last loop
+ input_ids = encodings[:, begin_loc:end_loc]
+ target_ids = input_ids.clone()
+ target_ids[:, :-trg_len] = -100
+
+ with torch.no_grad():
+ outputs = shared.model(input_ids=input_ids, labels=target_ids)
+
+ # loss is calculated using CrossEntropyLoss which averages over valid labels
+ # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+ # to the left by 1.
+ neg_log_likelihood = outputs.loss
+
+ nlls.append(neg_log_likelihood)
+
+ prev_end_loc = end_loc
+ if end_loc == seq_len:
+ break
+
+ ppl = torch.exp(torch.stack(nlls).mean())
+ add_entry_to_past_evaluations(float(ppl), shared.model_name, input_dataset, stride, _max_length)
+ save_past_evaluations(past_evaluations)
+ cumulative_log += f"The perplexity for {shared.model_name} is: {float(ppl)}\n\n"
+ yield cumulative_log
+
+
+def add_entry_to_past_evaluations(perplexity, model, dataset, stride, max_length):
+ global past_evaluations
+ entry = {
+ 'Model': model,
+ 'LoRAs': ', '.join(shared.lora_names) or '-',
+ 'Dataset': dataset,
+ 'Perplexity': perplexity,
+ 'stride': str(stride),
+ 'max_length': str(max_length),
+ 'Date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+ 'Comment': ''
+ }
+ past_evaluations = pd.concat([past_evaluations, pd.DataFrame([entry])], ignore_index=True)
+
+
+def is_in_past_evaluations(model, dataset, stride, max_length):
+ entries = past_evaluations[(past_evaluations['Model'] == model) &
+ (past_evaluations['Dataset'] == dataset) &
+ (past_evaluations['max_length'] == str(max_length)) &
+ (past_evaluations['stride'] == str(stride))]
+
+ if entries.shape[0] > 0:
+ return True
+ else:
+ return False
+
+
+def generate_markdown_table():
+ sorted_df = past_evaluations.sort_values(by=['Dataset', 'stride', 'Perplexity', 'Date'])
+ return sorted_df
diff --git a/modules/exllama.py b/modules/exllama.py
new file mode 100644
index 0000000000000000000000000000000000000000..4257ee0765f200f1a57acedde90e13d4a422abc3
--- /dev/null
+++ b/modules/exllama.py
@@ -0,0 +1,220 @@
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+from torch import version as torch_version
+
+from modules import shared
+from modules.logging_colors import logger
+from modules.models import clear_torch_cache
+from modules.text_generation import get_max_prompt_length
+
+try:
+ from exllama.generator import ExLlamaGenerator
+ from exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig
+ from exllama.tokenizer import ExLlamaTokenizer
+except:
+ logger.warning('exllama module failed to import. Will attempt to import from repositories/.')
+ try:
+ from modules.relative_imports import RelativeImport
+
+ with RelativeImport("repositories/exllama"):
+ from generator import ExLlamaGenerator
+ from model import ExLlama, ExLlamaCache, ExLlamaConfig
+ from tokenizer import ExLlamaTokenizer
+ except:
+ logger.error(
+ "Could not find repositories/exllama. Please ensure that exllama"
+ " (https://github.com/turboderp/exllama) is cloned inside repositories/ and is up to date."
+ )
+ raise
+
+
+class ExllamaModel:
+ def __init__(self):
+ pass
+
+ @classmethod
+ def from_pretrained(self, path_to_model):
+
+ path_to_model = Path(f'{shared.args.model_dir}') / Path(path_to_model)
+ tokenizer_model_path = path_to_model / "tokenizer.model"
+ model_config_path = path_to_model / "config.json"
+
+ # Find the model checkpoint
+ model_path = None
+ for ext in ['.safetensors', '.pt', '.bin']:
+ found = list(path_to_model.glob(f"*{ext}"))
+ if len(found) > 0:
+ if len(found) > 1:
+ logger.warning(f'More than one {ext} model has been found. The last one will be selected. It could be wrong.')
+
+ model_path = found[-1]
+ break
+
+ config = ExLlamaConfig(str(model_config_path))
+ config.model_path = str(model_path)
+ config.max_seq_len = shared.args.max_seq_len
+ config.compress_pos_emb = shared.args.compress_pos_emb
+ if shared.args.gpu_split:
+ config.set_auto_map(shared.args.gpu_split)
+ config.gpu_peer_fix = True
+
+ if shared.args.alpha_value > 1 and shared.args.rope_freq_base == 0:
+ config.alpha_value = shared.args.alpha_value
+ config.calculate_rotary_embedding_base()
+ elif shared.args.rope_freq_base > 0:
+ config.rotary_embedding_base = shared.args.rope_freq_base
+
+ if torch_version.hip:
+ config.rmsnorm_no_half2 = True
+ config.rope_no_half2 = True
+ config.matmul_no_half2 = True
+ config.silu_no_half2 = True
+
+ model = ExLlama(config)
+ tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))
+ cache = ExLlamaCache(model)
+ generator = ExLlamaGenerator(model, tokenizer, cache)
+
+ result = self()
+ result.config = config
+ result.model = model
+ result.cache = cache
+ result.tokenizer = tokenizer
+ result.generator = generator
+ return result, result
+
+ def encode(self, string, **kwargs):
+ return self.tokenizer.encode(string, max_seq_len=self.model.config.max_seq_len, add_bos=True)
+
+ def decode(self, ids, **kwargs):
+ if isinstance(ids, list):
+ ids = torch.tensor([ids])
+ elif isinstance(ids, torch.Tensor) and ids.numel() == 1:
+ ids = ids.view(1, -1)
+
+ return self.tokenizer.decode(ids)[0]
+
+ def get_logits(self, token_ids, **kwargs):
+ self.cache.current_seq_len = 0
+ if token_ids.shape[-1] > 1:
+ self.model.forward(token_ids[:, :-1], self.cache, input_mask=None, preprocess_only=True)
+
+ return self.model.forward(token_ids[:, -1:], self.cache, **kwargs).float().cpu()
+
+ def generate_with_streaming(self, prompt, state):
+
+ # The cache batch size must be 2 for CFG and 1 otherwise
+ if state['guidance_scale'] == 1:
+ if self.cache.batch_size == 2:
+ del self.cache
+ clear_torch_cache()
+ self.cache = ExLlamaCache(self.model)
+ self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
+ else:
+ if self.cache.batch_size == 1:
+ del self.cache
+ clear_torch_cache()
+ self.cache = ExLlamaCache(self.model, batch_size=2)
+ self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
+
+ self.generator.settings.temperature = state['temperature']
+ self.generator.settings.top_p = state['top_p']
+ self.generator.settings.top_k = state['top_k']
+ self.generator.settings.typical = state['typical_p']
+ self.generator.settings.token_repetition_penalty_max = state['repetition_penalty']
+ self.generator.settings.token_repetition_penalty_sustain = -1 if state['repetition_penalty_range'] <= 0 else state['repetition_penalty_range']
+ if state['ban_eos_token']:
+ self.generator.disallow_tokens([self.tokenizer.eos_token_id])
+ else:
+ self.generator.disallow_tokens(None)
+
+ if state['custom_token_bans']:
+ to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
+ if len(to_ban) > 0:
+ self.generator.disallow_tokens(to_ban)
+
+ # Case 1: no CFG
+ if state['guidance_scale'] == 1:
+ self.generator.end_beam_search()
+
+ # Tokenizing the input
+ ids = self.generator.tokenizer.encode(prompt, max_seq_len=self.model.config.max_seq_len)
+ if state['add_bos_token']:
+ ids = torch.cat(
+ [torch.tensor([[self.tokenizer.bos_token_id]]).to(ids.device),
+ ids], dim=1
+ ).to(torch.int64)
+ ids = ids[:, -get_max_prompt_length(state):]
+ if state['auto_max_new_tokens']:
+ max_new_tokens = state['truncation_length'] - ids.shape[-1]
+ else:
+ max_new_tokens = state['max_new_tokens']
+
+ self.generator.gen_begin_reuse(ids)
+ initial_len = self.generator.sequence[0].shape[0]
+ has_leading_space = False
+
+ for i in range(max_new_tokens):
+ token = self.generator.gen_single_token()
+ if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
+ has_leading_space = True
+
+ decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
+ if has_leading_space:
+ decoded_text = ' ' + decoded_text
+
+ yield decoded_text
+ if token.item() == self.generator.tokenizer.eos_token_id or shared.stop_everything:
+ break
+
+ # Case 2: CFG
+ # Copied from https://github.com/turboderp/exllama/blob/master/example_cfg.py
+ else:
+ alpha = state['guidance_scale']
+ prompts = [prompt, state['negative_prompt'] or '']
+
+ ids, mask = self.tokenizer.encode(
+ prompts,
+ return_mask=True,
+ max_seq_len=self.model.config.max_seq_len,
+ add_bos=state['add_bos_token']
+ )
+ if state['auto_max_new_tokens']:
+ max_new_tokens = state['truncation_length'] - ids[0].shape[-1]
+ else:
+ max_new_tokens = state['max_new_tokens']
+
+ self.generator.gen_begin(ids, mask=mask)
+ initial_len = self.generator.sequence[0].shape[0]
+ has_leading_space = False
+
+ for i in range(max_new_tokens):
+ logits = self.model.forward(self.generator.sequence[:, -1:], self.cache, input_mask=mask)
+ self.generator.apply_rep_penalty(logits)
+
+ logits = F.log_softmax(logits, dim=-1)
+ logits_mixed = alpha * logits[0] + (1 - alpha) * logits[1]
+
+ token, _ = self.generator.sample_current(logits_mixed)
+ if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
+ has_leading_space = True
+
+ decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
+ if has_leading_space:
+ decoded_text = ' ' + decoded_text
+
+ yield decoded_text
+ if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
+ break
+
+ batch_token = token.repeat(2, 1)
+ self.generator.gen_accept_token(batch_token)
+
+ def generate(self, prompt, state):
+ output = ''
+ for output in self.generate_with_streaming(prompt, state):
+ pass
+
+ return output
diff --git a/modules/exllama_hf.py b/modules/exllama_hf.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ba1f3c3867b14de885d54d516418a81135d45bc
--- /dev/null
+++ b/modules/exllama_hf.py
@@ -0,0 +1,174 @@
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import torch
+from torch.nn import CrossEntropyLoss
+from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+from modules import shared
+from modules.logging_colors import logger
+
+try:
+ from exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig
+except:
+ logger.warning('Exllama module failed to load. Will attempt to load from repositories.')
+ try:
+ from modules.relative_imports import RelativeImport
+
+ with RelativeImport("repositories/exllama"):
+ from model import ExLlama, ExLlamaCache, ExLlamaConfig
+ except:
+ logger.error("Could not find repositories/exllama/. Make sure that exllama is cloned inside repositories/ and is up to date.")
+ raise
+
+
+class ExllamaHF(PreTrainedModel):
+ def __init__(self, config: ExLlamaConfig):
+ super().__init__(PretrainedConfig())
+ self.ex_config = config
+ self.ex_model = ExLlama(self.ex_config)
+ self.generation_config = GenerationConfig()
+ self.lora = None
+
+ self.ex_cache = ExLlamaCache(self.ex_model)
+ self.past_seq = None
+
+ if shared.args.cfg_cache:
+ self.ex_cache_negative = ExLlamaCache(self.ex_model)
+ self.past_seq_negative = None
+
+ def _validate_model_class(self):
+ pass
+
+ def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]):
+ pass
+
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
+ return {'input_ids': input_ids, **kwargs}
+
+ @property
+ def device(self) -> torch.device:
+ return torch.device(0)
+
+ def __call__(self, *args, **kwargs):
+ use_cache = kwargs.get('use_cache', True)
+ labels = kwargs.get('labels', None)
+ past_key_values = kwargs.get('past_key_values', None)
+
+ if len(args) > 0:
+ if not shared.args.cfg_cache:
+ logger.error("Please enable the cfg-cache option to use CFG with ExLlama_HF.")
+ return
+
+ input_ids = args[0]
+ is_negative = True
+ past_seq = self.past_seq_negative
+ ex_cache = self.ex_cache_negative
+ else:
+ input_ids = kwargs['input_ids']
+ is_negative = False
+ past_seq = self.past_seq
+ ex_cache = self.ex_cache
+
+ seq = input_ids[0].tolist()
+ if is_negative and past_key_values is not None:
+ seq = past_key_values + seq
+
+ seq_tensor = torch.tensor(seq)
+ reset = True
+
+ # Make the forward call
+ if labels is None:
+ if past_seq is not None:
+ min_length = min(past_seq.shape[0], seq_tensor.shape[0])
+ indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length]))
+ if len(indices) > 0:
+ longest_prefix = indices[0].item()
+ else:
+ longest_prefix = min_length
+
+ if longest_prefix > 0:
+ reset = False
+ ex_cache.current_seq_len = longest_prefix
+ if len(seq_tensor) - longest_prefix > 1:
+ self.ex_model.forward(seq_tensor[longest_prefix:-1].view(1, -1), ex_cache, preprocess_only=True, lora=self.lora)
+ elif len(seq_tensor) == longest_prefix:
+ # Very tricky: if the prefix we are reusing *is* the input_ids, then we have to back up the cache pointer by one,
+ # because we feed input_ids[-1] to forward() below, but that last token is already in the cache!
+ ex_cache.current_seq_len -= 1
+
+ if reset:
+ ex_cache.current_seq_len = 0
+ if len(seq_tensor) > 1:
+ self.ex_model.forward(seq_tensor[:-1].view(1, -1), ex_cache, preprocess_only=True, lora=self.lora)
+
+ logits = self.ex_model.forward(seq_tensor[-1:].view(1, -1), ex_cache, lora=self.lora).to(input_ids.device)
+ else:
+ ex_cache.current_seq_len = 0
+ logits = self.ex_model.forward(seq_tensor.view(1, -1), ex_cache, last_id_only=False, lora=self.lora)
+
+ if is_negative:
+ self.past_seq_negative = seq_tensor
+ else:
+ self.past_seq = seq_tensor
+
+ loss = None
+ if labels is not None:
+ # Shift so that tokens < n predict n
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ loss_fct = CrossEntropyLoss()
+ shift_logits = shift_logits.view(-1, logits.shape[-1])
+ shift_labels = shift_labels.view(-1)
+ # Enable model parallelism
+ shift_labels = shift_labels.to(shift_logits.device)
+ loss = loss_fct(shift_logits, shift_labels)
+
+ return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss)
+
+ @classmethod
+ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
+ assert len(model_args) == 0 and len(kwargs) == 0, "extra args is currently not supported"
+ if isinstance(pretrained_model_name_or_path, str):
+ pretrained_model_name_or_path = Path(pretrained_model_name_or_path)
+
+ pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
+ config = ExLlamaConfig(pretrained_model_name_or_path / 'config.json')
+
+ # from 'oobabooga/text-generation-webui/modules/exllama.py'
+ weight_path = None
+ for ext in ['.safetensors', '.pt', '.bin']:
+ found = list(pretrained_model_name_or_path.glob(f"*{ext}"))
+ if len(found) > 0:
+ weight_path = found[-1]
+ break
+ assert weight_path is not None, f'could not find weight in "{pretrained_model_name_or_path}"'
+
+ config.model_path = str(weight_path)
+ config.max_seq_len = shared.args.max_seq_len
+ config.compress_pos_emb = shared.args.compress_pos_emb
+ if shared.args.gpu_split:
+ config.set_auto_map(shared.args.gpu_split)
+ config.gpu_peer_fix = True
+
+ if shared.args.alpha_value > 1 and shared.args.rope_freq_base == 0:
+ config.alpha_value = shared.args.alpha_value
+ config.calculate_rotary_embedding_base()
+ elif shared.args.rope_freq_base > 0:
+ config.rotary_embedding_base = shared.args.rope_freq_base
+
+ if torch.version.hip:
+ config.rmsnorm_no_half2 = True
+ config.rope_no_half2 = True
+ config.matmul_no_half2 = True
+ config.silu_no_half2 = True
+
+ # This slowes down a bit but align better with autogptq generation.
+ # TODO: Should give user choice to tune the exllama config
+ # config.fused_attn = False
+ # config.fused_mlp_thd = 0
+
+ return ExllamaHF(config)
diff --git a/modules/exllamav2.py b/modules/exllamav2.py
new file mode 100644
index 0000000000000000000000000000000000000000..a75ede46b3271e405bd4b0845b4dabb4f51ebc09
--- /dev/null
+++ b/modules/exllamav2.py
@@ -0,0 +1,136 @@
+import random
+from pathlib import Path
+
+import torch
+from exllamav2 import (
+ ExLlamaV2,
+ ExLlamaV2Cache,
+ ExLlamaV2Config,
+ ExLlamaV2Tokenizer
+)
+from exllamav2.generator import ExLlamaV2BaseGenerator, ExLlamaV2Sampler
+
+from modules import shared
+from modules.logging_colors import logger
+from modules.text_generation import get_max_prompt_length
+
+try:
+ import flash_attn
+except ModuleNotFoundError:
+ logger.warning(
+ 'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage '
+ 'to be a lot higher than it could be.\n'
+ 'Try installing flash-attention following the instructions here: '
+ 'https://github.com/Dao-AILab/flash-attention#installation-and-features'
+ )
+ pass
+
+
+class Exllamav2Model:
+ def __init__(self):
+ pass
+
+ @classmethod
+ def from_pretrained(self, path_to_model):
+
+ path_to_model = Path(f'{shared.args.model_dir}') / Path(path_to_model)
+
+ config = ExLlamaV2Config()
+ config.model_dir = str(path_to_model)
+ config.prepare()
+
+ config.max_seq_len = shared.args.max_seq_len
+ config.scale_pos_emb = shared.args.compress_pos_emb
+ config.scale_alpha_value = shared.args.alpha_value
+
+ model = ExLlamaV2(config)
+
+ split = None
+ if shared.args.gpu_split:
+ split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
+
+ model.load(split)
+
+ tokenizer = ExLlamaV2Tokenizer(config)
+ cache = ExLlamaV2Cache(model)
+ generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)
+
+ result = self()
+ result.model = model
+ result.cache = cache
+ result.tokenizer = tokenizer
+ result.generator = generator
+ result.loras = None
+ return result, result
+
+ def encode(self, string, **kwargs):
+ return self.tokenizer.encode(string, add_bos=True)
+
+ def decode(self, ids, **kwargs):
+ if isinstance(ids, list):
+ ids = torch.tensor([ids])
+ elif isinstance(ids, torch.Tensor) and ids.numel() == 1:
+ ids = ids.view(1, -1)
+
+ return self.tokenizer.decode(ids)[0]
+
+ def get_logits(self, token_ids, **kwargs):
+ self.cache.current_seq_len = 0
+ if token_ids.shape[-1] > 1:
+ self.model.forward(token_ids[:, :-1], self.cache, input_mask=None, preprocess_only=True, loras=self.loras)
+
+ return self.model.forward(token_ids[:, -1:], self.cache, input_mask=None, loras=self.loras, **kwargs).float().cpu()
+
+ def generate_with_streaming(self, prompt, state):
+ settings = ExLlamaV2Sampler.Settings()
+ settings.temperature = state['temperature']
+ settings.top_k = state['top_k']
+ settings.top_p = state['top_p']
+ settings.typical = state['typical_p']
+ settings.token_repetition_penalty = state['repetition_penalty']
+ settings.token_repetition_range = -1 if state['repetition_penalty_range'] <= 0 else state['repetition_penalty_range']
+ if state['ban_eos_token']:
+ settings.disallow_tokens(self.tokenizer, [self.tokenizer.eos_token_id])
+
+ if state['custom_token_bans']:
+ to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
+ if len(to_ban) > 0:
+ settings.disallow_tokens(self.tokenizer, to_ban)
+
+ ids = self.tokenizer.encode(prompt, add_bos=state['add_bos_token'])
+ ids = ids[:, -get_max_prompt_length(state):]
+ initial_len = ids.shape[-1]
+
+ if state['auto_max_new_tokens']:
+ max_new_tokens = state['truncation_length'] - ids.shape[-1]
+ else:
+ max_new_tokens = state['max_new_tokens']
+
+ # _gen_begin_base
+ self.cache.current_seq_len = 0
+ self.model.forward(ids[:, :-1], self.cache, input_mask=None, preprocess_only=True, loras=self.loras)
+
+ has_leading_space = False
+ for i in range(max_new_tokens):
+ logits = self.model.forward(ids[:, -1:], self.cache, input_mask=None, loras=self.loras).float().cpu()
+ token, _, _ = ExLlamaV2Sampler.sample(logits, settings, ids, random.random(), self.tokenizer)
+ ids = torch.cat([ids, token], dim=1)
+
+ if i == 0 and self.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
+ has_leading_space = True
+
+ decoded_text = self.tokenizer.decode(ids[:, initial_len:])[0]
+ if has_leading_space:
+ decoded_text = ' ' + decoded_text
+
+ yield decoded_text
+
+ if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
+ break
+
+ def generate(self, prompt, state):
+ output = ''
+ for output in self.generate_with_streaming(prompt, state):
+ pass
+
+ return output
diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py
new file mode 100644
index 0000000000000000000000000000000000000000..e12a0717f120b70ecac7148b51739d413272eec6
--- /dev/null
+++ b/modules/exllamav2_hf.py
@@ -0,0 +1,152 @@
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import torch
+from exllamav2 import ExLlamaV2, ExLlamaV2Cache, ExLlamaV2Config
+from torch.nn import CrossEntropyLoss
+from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+from modules import shared
+from modules.logging_colors import logger
+
+try:
+ import flash_attn
+except ModuleNotFoundError:
+ logger.warning(
+ 'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage '
+ 'to be a lot higher than it could be.\n'
+ 'Try installing flash-attention following the instructions here: '
+ 'https://github.com/Dao-AILab/flash-attention#installation-and-features'
+ )
+ pass
+
+
+class Exllamav2HF(PreTrainedModel):
+ def __init__(self, config: ExLlamaV2Config):
+ super().__init__(PretrainedConfig())
+ self.ex_config = config
+ self.ex_model = ExLlamaV2(config)
+ split = None
+ if shared.args.gpu_split:
+ split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
+
+ self.ex_model.load(split)
+ self.generation_config = GenerationConfig()
+ self.loras = None
+
+ self.ex_cache = ExLlamaV2Cache(self.ex_model)
+ self.past_seq = None
+
+ if shared.args.cfg_cache:
+ self.ex_cache_negative = ExLlamaV2Cache(self.ex_model)
+ self.past_seq_negative = None
+
+ def _validate_model_class(self):
+ pass
+
+ def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]):
+ pass
+
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
+ return {'input_ids': input_ids, **kwargs}
+
+ @property
+ def device(self) -> torch.device:
+ return torch.device(0)
+
+ def __call__(self, *args, **kwargs):
+ use_cache = kwargs.get('use_cache', True)
+ labels = kwargs.get('labels', None)
+ past_key_values = kwargs.get('past_key_values', None)
+
+ if len(args) > 0:
+ if not shared.args.cfg_cache:
+ logger.error("Please enable the cfg-cache option to use CFG with ExLlamav2_HF.")
+ return
+
+ input_ids = args[0]
+ is_negative = True
+ past_seq = self.past_seq_negative
+ ex_cache = self.ex_cache_negative
+ else:
+ input_ids = kwargs['input_ids']
+ is_negative = False
+ past_seq = self.past_seq
+ ex_cache = self.ex_cache
+
+ seq = input_ids[0].tolist()
+ if is_negative and past_key_values is not None:
+ seq = past_key_values + seq
+
+ seq_tensor = torch.tensor(seq)
+ reset = True
+
+ # Make the forward call
+ if labels is None:
+ if past_seq is not None:
+ min_length = min(past_seq.shape[0], seq_tensor.shape[0])
+ indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length]))
+ if len(indices) > 0:
+ longest_prefix = indices[0].item()
+ else:
+ longest_prefix = min_length
+
+ if longest_prefix > 0:
+ reset = False
+ ex_cache.current_seq_len = longest_prefix
+ if len(seq_tensor) - longest_prefix > 1:
+ self.ex_model.forward(seq_tensor[longest_prefix:-1].view(1, -1), ex_cache, preprocess_only=True, loras=self.loras)
+ elif len(seq_tensor) == longest_prefix:
+ # Very tricky: if the prefix we are reusing *is* the input_ids, then we have to back up the cache pointer by one,
+ # because we feed input_ids[-1] to forward() below, but that last token is already in the cache!
+ ex_cache.current_seq_len -= 1
+
+ if reset:
+ ex_cache.current_seq_len = 0
+ if len(seq_tensor) > 1:
+ self.ex_model.forward(seq_tensor[:-1].view(1, -1), ex_cache, preprocess_only=True, loras=self.loras)
+
+ logits = self.ex_model.forward(seq_tensor[-1:].view(1, -1), ex_cache, loras=self.loras).to(input_ids.device)
+ else:
+ ex_cache.current_seq_len = 0
+ logits = self.ex_model.forward(seq_tensor.view(1, -1), ex_cache, last_id_only=False, loras=self.loras)
+
+ if is_negative:
+ self.past_seq_negative = seq_tensor
+ else:
+ self.past_seq = seq_tensor
+
+ loss = None
+ if labels is not None:
+ # Shift so that tokens < n predict n
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ loss_fct = CrossEntropyLoss()
+ shift_logits = shift_logits.view(-1, logits.shape[-1])
+ shift_labels = shift_labels.view(-1)
+ # Enable model parallelism
+ shift_labels = shift_labels.to(shift_logits.device)
+ loss = loss_fct(shift_logits, shift_labels)
+
+ return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss)
+
+ @classmethod
+ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
+ assert len(model_args) == 0 and len(kwargs) == 0, "extra args is currently not supported"
+ if isinstance(pretrained_model_name_or_path, str):
+ pretrained_model_name_or_path = Path(pretrained_model_name_or_path)
+
+ pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
+
+ config = ExLlamaV2Config()
+ config.model_dir = str(pretrained_model_name_or_path)
+ config.prepare()
+
+ config.max_seq_len = shared.args.max_seq_len
+ config.scale_pos_emb = shared.args.compress_pos_emb
+ config.scale_alpha_value = shared.args.alpha_value
+
+ return Exllamav2HF(config)
diff --git a/modules/extensions.py b/modules/extensions.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c0725043e83ac24220bc9bcec2255a1a9247795
--- /dev/null
+++ b/modules/extensions.py
@@ -0,0 +1,224 @@
+import traceback
+from functools import partial
+from inspect import signature
+
+import gradio as gr
+
+import extensions
+import modules.shared as shared
+from modules.logging_colors import logger
+
+state = {}
+available_extensions = []
+setup_called = set()
+
+
+def apply_settings(extension, name):
+ if not hasattr(extension, 'params'):
+ return
+
+ for param in extension.params:
+ _id = f"{name}-{param}"
+ if _id not in shared.settings:
+ continue
+
+ extension.params[param] = shared.settings[_id]
+
+
+def load_extensions():
+ global state, setup_called
+ state = {}
+ for i, name in enumerate(shared.args.extensions):
+ if name in available_extensions:
+ if name != 'api':
+ logger.info(f'Loading the extension "{name}"...')
+ try:
+ exec(f"import extensions.{name}.script")
+ extension = getattr(extensions, name).script
+ apply_settings(extension, name)
+ if extension not in setup_called and hasattr(extension, "setup"):
+ setup_called.add(extension)
+ extension.setup()
+
+ state[name] = [True, i]
+ except:
+ logger.error(f'Failed to load the extension "{name}".')
+ traceback.print_exc()
+
+
+# This iterator returns the extensions in the order specified in the command-line
+def iterator():
+ for name in sorted(state, key=lambda x: state[x][1]):
+ if state[name][0]:
+ yield getattr(extensions, name).script, name
+
+
+# Extension functions that map string -> string
+def _apply_string_extensions(function_name, text, state, is_chat=False):
+ for extension, _ in iterator():
+ if hasattr(extension, function_name):
+ func = getattr(extension, function_name)
+
+ # Handle old extensions without the 'state' arg or
+ # the 'is_chat' kwarg
+ count = 0
+ has_chat = False
+ for k in signature(func).parameters:
+ if k == 'is_chat':
+ has_chat = True
+ else:
+ count += 1
+
+ if count == 2:
+ args = [text, state]
+ else:
+ args = [text]
+
+ if has_chat:
+ kwargs = {'is_chat': is_chat}
+ else:
+ kwargs = {}
+
+ text = func(*args, **kwargs)
+
+ return text
+
+
+# Extension functions that map string -> string
+def _apply_chat_input_extensions(text, visible_text, state):
+ for extension, _ in iterator():
+ if hasattr(extension, 'chat_input_modifier'):
+ text, visible_text = extension.chat_input_modifier(text, visible_text, state)
+
+ return text, visible_text
+
+
+# custom_generate_chat_prompt handling - currently only the first one will work
+def _apply_custom_generate_chat_prompt(text, state, **kwargs):
+ for extension, _ in iterator():
+ if hasattr(extension, 'custom_generate_chat_prompt'):
+ return extension.custom_generate_chat_prompt(text, state, **kwargs)
+
+ return None
+
+
+# Extension that modifies the input parameters before they are used
+def _apply_state_modifier_extensions(state):
+ for extension, _ in iterator():
+ if hasattr(extension, "state_modifier"):
+ state = getattr(extension, "state_modifier")(state)
+
+ return state
+
+
+# Extension that modifies the chat history before it is used
+def _apply_history_modifier_extensions(history):
+ for extension, _ in iterator():
+ if hasattr(extension, "history_modifier"):
+ history = getattr(extension, "history_modifier")(history)
+
+ return history
+
+
+# Extension functions that override the default tokenizer output - The order of execution is not defined
+def _apply_tokenizer_extensions(function_name, state, prompt, input_ids, input_embeds):
+ for extension, _ in iterator():
+ if hasattr(extension, function_name):
+ prompt, input_ids, input_embeds = getattr(extension, function_name)(state, prompt, input_ids, input_embeds)
+
+ return prompt, input_ids, input_embeds
+
+
+# Allow extensions to add their own logits processors to the stack being run.
+# Each extension would call `processor_list.append({their LogitsProcessor}())`.
+def _apply_logits_processor_extensions(function_name, processor_list, input_ids):
+ for extension, _ in iterator():
+ if hasattr(extension, function_name):
+ result = getattr(extension, function_name)(processor_list, input_ids)
+ if type(result) is list:
+ processor_list = result
+
+ return processor_list
+
+
+# Get prompt length in tokens after applying extension functions which override the default tokenizer output
+# currently only the first one will work
+def _apply_custom_tokenized_length(prompt):
+ for extension, _ in iterator():
+ if hasattr(extension, 'custom_tokenized_length'):
+ return getattr(extension, 'custom_tokenized_length')(prompt)
+
+ return None
+
+
+# Custom generate reply handling - currently only the first one will work
+def _apply_custom_generate_reply():
+ for extension, _ in iterator():
+ if hasattr(extension, 'custom_generate_reply'):
+ return getattr(extension, 'custom_generate_reply')
+
+ return None
+
+
+def _apply_custom_css():
+ all_css = ''
+ for extension, _ in iterator():
+ if hasattr(extension, 'custom_css'):
+ all_css += getattr(extension, 'custom_css')()
+
+ return all_css
+
+
+def _apply_custom_js():
+ all_js = ''
+ for extension, _ in iterator():
+ if hasattr(extension, 'custom_js'):
+ all_js += getattr(extension, 'custom_js')()
+
+ return all_js
+
+
+def create_extensions_block():
+ to_display = []
+ for extension, name in iterator():
+ if hasattr(extension, "ui") and not (hasattr(extension, 'params') and extension.params.get('is_tab', False)):
+ to_display.append((extension, name))
+
+ # Creating the extension ui elements
+ if len(to_display) > 0:
+ with gr.Column(elem_id="extensions"):
+ for row in to_display:
+ extension, _ = row
+ extension.ui()
+
+
+def create_extensions_tabs():
+ for extension, name in iterator():
+ if hasattr(extension, "ui") and (hasattr(extension, 'params') and extension.params.get('is_tab', False)):
+ display_name = getattr(extension, 'params', {}).get('display_name', name)
+ with gr.Tab(display_name, elem_classes="extension-tab"):
+ extension.ui()
+
+
+EXTENSION_MAP = {
+ "input": partial(_apply_string_extensions, "input_modifier"),
+ "output": partial(_apply_string_extensions, "output_modifier"),
+ "chat_input": _apply_chat_input_extensions,
+ "state": _apply_state_modifier_extensions,
+ "history": _apply_history_modifier_extensions,
+ "bot_prefix": partial(_apply_string_extensions, "bot_prefix_modifier"),
+ "tokenizer": partial(_apply_tokenizer_extensions, "tokenizer_modifier"),
+ 'logits_processor': partial(_apply_logits_processor_extensions, 'logits_processor_modifier'),
+ "custom_generate_chat_prompt": _apply_custom_generate_chat_prompt,
+ "custom_generate_reply": _apply_custom_generate_reply,
+ "tokenized_length": _apply_custom_tokenized_length,
+ "css": _apply_custom_css,
+ "js": _apply_custom_js
+}
+
+
+def apply_extensions(typ, *args, **kwargs):
+ if typ not in EXTENSION_MAP:
+ raise ValueError(f"Invalid extension type {typ}")
+
+ return EXTENSION_MAP[typ](*args, **kwargs)
diff --git a/modules/github.py b/modules/github.py
new file mode 100644
index 0000000000000000000000000000000000000000..282267b6be7f3b0371a3fd332f98e38611c9fb9a
--- /dev/null
+++ b/modules/github.py
@@ -0,0 +1,38 @@
+import subprocess
+from pathlib import Path
+
+new_extensions = set()
+
+
+def clone_or_pull_repository(github_url):
+ global new_extensions
+
+ repository_folder = Path("extensions")
+ repo_name = github_url.rstrip("/").split("/")[-1].split(".")[0]
+
+ # Check if the repository folder exists
+ if not repository_folder.exists():
+ repository_folder.mkdir(parents=True)
+
+ repo_path = repository_folder / repo_name
+
+ # Check if the repository is already cloned
+ if repo_path.exists():
+ yield f"Updating {github_url}..."
+ # Perform a 'git pull' to update the repository
+ try:
+ pull_output = subprocess.check_output(["git", "-C", repo_path, "pull"], stderr=subprocess.STDOUT)
+ yield "Done."
+ return pull_output.decode()
+ except subprocess.CalledProcessError as e:
+ return str(e)
+
+ # Clone the repository
+ try:
+ yield f"Cloning {github_url}..."
+ clone_output = subprocess.check_output(["git", "clone", github_url, repo_path], stderr=subprocess.STDOUT)
+ new_extensions.add(repo_name)
+ yield f"The extension `{repo_name}` has been downloaded.\n\nPlease close the the web UI completely and launch it again to be able to load it."
+ return clone_output.decode()
+ except subprocess.CalledProcessError as e:
+ return str(e)
diff --git a/modules/grammar.py b/modules/grammar.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f6ad3a637d85bc31eecb141c149d562e53a90c2
--- /dev/null
+++ b/modules/grammar.py
@@ -0,0 +1,33 @@
+from torch_grammar import GrammarSampler
+from transformers.generation.logits_process import LogitsProcessor
+
+from modules import shared
+
+sampler = None
+grammar = None
+grammar_string = ''
+
+
+class GrammarLogitsProcessor(LogitsProcessor):
+ def __init__(self, string):
+
+ global sampler, grammar, grammar_string
+
+ if string != grammar_string:
+ grammar_string = string
+ if string.strip() != '':
+ string = string.strip() + '\n'
+ sampler = GrammarSampler(string, 'root', shared.tokenizer)
+ else:
+ sampler = None
+
+ if sampler is not None:
+ grammar = sampler.logits_processor()
+ else:
+ grammar = None
+
+ def __call__(self, input_ids, scores):
+ if grammar is not None:
+ scores = grammar(input_ids, scores)
+
+ return scores
diff --git a/modules/html_generator.py b/modules/html_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..26e478487dd7cae62b8ad93c6b075aa1d3180a3c
--- /dev/null
+++ b/modules/html_generator.py
@@ -0,0 +1,308 @@
+import html
+import os
+import re
+import time
+from pathlib import Path
+
+import markdown
+from PIL import Image, ImageOps
+
+from modules.utils import get_available_chat_styles
+
+# This is to store the paths to the thumbnails of the profile pictures
+image_cache = {}
+
+with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+ readable_css = f.read()
+with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as css_f:
+ _4chan_css = css_f.read()
+with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
+ instruct_css = f.read()
+
+# Custom chat styles
+chat_styles = {}
+for k in get_available_chat_styles():
+ chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
+
+# Handle styles that derive from other styles
+for k in chat_styles:
+ lines = chat_styles[k].split('\n')
+ input_string = lines[0]
+ match = re.search(r'chat_style-([a-z\-]*)\.css', input_string)
+
+ if match:
+ style = match.group(1)
+ chat_styles[k] = chat_styles.get(style, '') + '\n\n' + '\n'.join(lines[1:])
+
+
+def fix_newlines(string):
+ string = string.replace('\n', '\n\n')
+ string = re.sub(r"\n{3,}", "\n\n", string)
+ string = string.strip()
+ return string
+
+
+def replace_blockquote(m):
+ return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
+
+
+def convert_to_markdown(string):
+
+ # Blockquote
+ string = re.sub(r'(^|[\n])>', r'\1>', string)
+ pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
+ string = pattern.sub(replace_blockquote, string)
+
+ # Code
+ string = string.replace('\\begin{code}', '```')
+ string = string.replace('\\end{code}', '```')
+ string = re.sub(r"(.)```", r"\1\n```", string)
+
+ result = ''
+ is_code = False
+ for line in string.split('\n'):
+ if line.lstrip(' ').startswith('```'):
+ is_code = not is_code
+
+ result += line
+ if is_code or line.startswith('|'): # Don't add an extra \n for tables or code
+ result += '\n'
+ else:
+ result += '\n\n'
+
+ result = result.strip()
+ if is_code:
+ result += '\n```' # Unfinished code block
+
+ # Unfinished list, like "\n1.". A |delete| string is added and then
+ # removed to force a or
{src}\n'
+ src = f'Anonymous No.{number}\n{src}'
+ return src
+
+
+def generate_4chan_html(f):
+ posts = []
+ post = ''
+ c = -2
+ for line in f.splitlines():
+ line += "\n"
+ if line == '-----\n':
+ continue
+ elif line.startswith('--- '):
+ c += 1
+ if post != '':
+ src = process_post(post, c)
+ posts.append(src)
+ post = line
+ else:
+ post += line
+
+ if post != '':
+ src = process_post(post, c)
+ posts.append(src)
+
+ for i in range(len(posts)):
+ if i == 0:
+ posts[i] = f'
{posts[i]}
\n'
+ else:
+ posts[i] = f'
{posts[i]}
\n'
+
+ output = ''
+ output += f'
'
+ for post in posts:
+ output += post
+
+ output += '
'
+ output = output.split('\n')
+ for i in range(len(output)):
+ output[i] = re.sub(r'^(>(.*?)( |))', r'\1', output[i])
+ output[i] = re.sub(r'^
(>(.*?)( |))', r'
\1', output[i])
+
+ output = '\n'.join(output)
+ return output
+
+
+def make_thumbnail(image):
+ image = image.resize((350, round(image.size[1] / image.size[0] * 350)), Image.Resampling.LANCZOS)
+ if image.size[1] > 470:
+ image = ImageOps.fit(image, (350, 470), Image.LANCZOS)
+
+ return image
+
+
+def get_image_cache(path):
+ cache_folder = Path("cache")
+ if not cache_folder.exists():
+ cache_folder.mkdir()
+
+ mtime = os.stat(path).st_mtime
+ if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache):
+ img = make_thumbnail(Image.open(path))
+
+ old_p = Path(f'cache/{path.name}_cache.png')
+ p = Path(f'cache/cache_{path.name}.png')
+ if old_p.exists():
+ old_p.rename(p)
+
+ output_file = p
+ img.convert('RGB').save(output_file, format='PNG')
+ image_cache[path] = [mtime, output_file.as_posix()]
+
+ return image_cache[path][1]
+
+
+def generate_instruct_html(history):
+ output = f'
'
+ for i, _row in enumerate(history):
+ row = [convert_to_markdown(entry) for entry in _row]
+
+ if row[0]: # don't display empty user messages
+ output += f"""
+
'
+
+ # We use ?name2 and ?time.time() to force the browser to reset caches
+ img_bot = f'' if Path("cache/pfp_character.png").exists() else ''
+ img_me = f'' if Path("cache/pfp_me.png").exists() else ''
+
+ for i, _row in enumerate(history):
+ row = [convert_to_markdown(entry) for entry in _row]
+
+ if row[0]: # don't display empty user messages
+ output += f"""
+
'
+
+ for i, _row in enumerate(history):
+ row = [convert_to_markdown(entry) for entry in _row]
+
+ if row[0]: # don't display empty user messages
+ output += f"""
+
+
+
+ {row[0]}
+
+
+
+ """
+
+ output += f"""
+
+
+
+ {row[1]}
+
+
+
+ """
+
+ output += "
"
+ return output
+
+
+def chat_html_wrapper(history, name1, name2, mode, style, reset_cache=False):
+ if mode == 'instruct':
+ return generate_instruct_html(history['visible'])
+ elif style == 'wpp':
+ return generate_chat_html(history['visible'], name1, name2)
+ else:
+ return generate_cai_chat_html(history['visible'], name1, name2, style, reset_cache)
diff --git a/modules/llama_attn_hijack.py b/modules/llama_attn_hijack.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7559a570f11617bd589dfc7f83abbbbe328d02a
--- /dev/null
+++ b/modules/llama_attn_hijack.py
@@ -0,0 +1,172 @@
+import math
+import sys
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+
+
+import modules.shared as shared
+from modules.logging_colors import logger
+
+if shared.args.xformers:
+ try:
+ import xformers.ops
+ except Exception:
+ logger.error("xformers not found! Please install it before trying to use it.", file=sys.stderr)
+
+
+def hijack_llama_attention():
+ import transformers.models.llama.modeling_llama
+ if shared.args.xformers:
+ transformers.models.llama.modeling_llama.LlamaAttention.forward = xformers_forward
+ logger.info("Replaced attention with xformers_attention")
+ elif shared.args.sdp_attention:
+ transformers.models.llama.modeling_llama.LlamaAttention.forward = sdp_attention_forward
+ logger.info("Replaced attention with sdp_attention")
+
+
+def xformers_forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: bool = False,
+ use_cache: bool = False,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ bsz, q_len, _ = hidden_states.size()
+
+ query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ kv_seq_len += past_key_value[0].shape[-2]
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+ query_states, key_states = transformers.models.llama.modeling_llama.apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+ # [bsz, nh, t, hd]
+
+ if past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
+
+ past_key_value = (key_states, value_states) if use_cache else None
+
+ # We only apply xformers optimizations if we don't need to output the whole attention matrix
+ if not output_attentions:
+ query_states = query_states.transpose(1, 2)
+ key_states = key_states.transpose(1, 2)
+ value_states = value_states.transpose(1, 2)
+
+ # This is a nasty hack. We know attention_mask in transformers is either LowerTriangular or all Zeros.
+ # We therefore check if one element in the upper triangular portion is zero. If it is, then the mask is all zeros.
+ if attention_mask is None or attention_mask[0, 0, 0, 1] == 0:
+ # input and output should be of form (bsz, q_len, num_heads, head_dim)
+ attn_output = xformers.ops.memory_efficient_attention(query_states, key_states, value_states, attn_bias=None)
+ else:
+ # input and output should be of form (bsz, q_len, num_heads, head_dim)
+ attn_output = xformers.ops.memory_efficient_attention(query_states, key_states, value_states, attn_bias=xformers.ops.LowerTriangularMask())
+ attn_weights = None
+ else:
+ attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+ if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz * self.num_heads, q_len, kv_seq_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+ )
+ attn_weights = attn_weights + attention_mask
+ attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
+
+ # upcast attention to fp32
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+ attn_output = torch.matmul(attn_weights, value_states)
+
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.transpose(1, 2)
+
+ attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+ attn_output = self.o_proj(attn_output)
+ return attn_output, attn_weights, past_key_value
+
+
+def sdp_attention_forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: bool = False,
+ use_cache: bool = False,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ bsz, q_len, _ = hidden_states.size()
+
+ query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ kv_seq_len += past_key_value[0].shape[-2]
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+ query_states, key_states = transformers.models.llama.modeling_llama.apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
+ # [bsz, nh, t, hd]
+
+ if past_key_value is not None:
+ # reuse k, v, self_attention
+ key_states = torch.cat([past_key_value[0], key_states], dim=2)
+ value_states = torch.cat([past_key_value[1], value_states], dim=2)
+
+ past_key_value = (key_states, value_states) if use_cache else None
+
+ # We only apply sdp attention if we don't need to output the whole attention matrix
+ if not output_attentions:
+ attn_output = torch.nn.functional.scaled_dot_product_attention(query_states, key_states, value_states, attn_mask=attention_mask, is_causal=False)
+ attn_weights = None
+ else:
+ attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+ if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz * self.num_heads, q_len, kv_seq_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+ )
+ attn_weights = attn_weights + attention_mask
+ attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
+
+ # upcast attention to fp32
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+ attn_output = torch.matmul(attn_weights, value_states)
+
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.transpose(1, 2)
+ attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+ attn_output = self.o_proj(attn_output)
+
+ return attn_output, attn_weights, past_key_value
diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py
new file mode 100644
index 0000000000000000000000000000000000000000..37f86e088002d29a297f3df851b1a4b259929713
--- /dev/null
+++ b/modules/llamacpp_hf.py
@@ -0,0 +1,213 @@
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import torch
+from torch.nn import CrossEntropyLoss
+from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+from modules import RoPE, shared
+from modules.logging_colors import logger
+
+try:
+ import llama_cpp
+except:
+ llama_cpp = None
+
+try:
+ import llama_cpp_cuda
+except:
+ llama_cpp_cuda = None
+
+
+def llama_cpp_lib():
+ if (shared.args.cpu and llama_cpp is not None) or llama_cpp_cuda is None:
+ return llama_cpp
+ else:
+ return llama_cpp_cuda
+
+
+class LlamacppHF(PreTrainedModel):
+ def __init__(self, model, path):
+ super().__init__(PretrainedConfig())
+ self.model = model
+ self.generation_config = GenerationConfig()
+
+ self.past_seq = None
+ self.llamacpp_cache = {
+ 'n_tokens': self.model.n_tokens,
+ 'input_ids': self.model.input_ids,
+ 'scores': self.model.scores,
+ 'ctx': self.model.ctx
+ }
+
+ if shared.args.cfg_cache:
+ self.past_seq_negative = None
+ self.llamacpp_cache_negative = {
+ 'n_tokens': self.model.n_tokens,
+ 'input_ids': self.model.input_ids.copy(),
+ 'scores': self.model.scores.copy(),
+ 'ctx': llama_cpp_lib().llama_new_context_with_model(model.model, model.context_params)
+ }
+
+ def _validate_model_class(self):
+ pass
+
+ def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]):
+ pass
+
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
+ return {'input_ids': input_ids, **kwargs}
+
+ def save_cache(self):
+ self.llamacpp_cache.update({
+ 'n_tokens': self.model.n_tokens,
+ 'input_ids': self.model.input_ids,
+ 'scores': self.model.scores,
+ 'ctx': self.model.ctx
+ })
+
+ def save_negative_cache(self):
+ self.llamacpp_cache_negative.update({
+ 'n_tokens': self.model.n_tokens,
+ 'input_ids': self.model.input_ids,
+ 'scores': self.model.scores,
+ 'ctx': self.model.ctx
+ })
+
+ def load_cache(self):
+ self.model.n_tokens = self.llamacpp_cache['n_tokens']
+ self.model.input_ids = self.llamacpp_cache['input_ids']
+ self.model.scores = self.llamacpp_cache['scores']
+ self.model.ctx = self.llamacpp_cache['ctx']
+
+ def load_negative_cache(self):
+ self.model.n_tokens = self.llamacpp_cache_negative['n_tokens']
+ self.model.input_ids = self.llamacpp_cache_negative['input_ids']
+ self.model.scores = self.llamacpp_cache_negative['scores']
+ self.model.ctx = self.llamacpp_cache_negative['ctx']
+
+ @property
+ def device(self) -> torch.device:
+ return torch.device(0)
+
+ def __call__(self, *args, **kwargs):
+ use_cache = kwargs.get('use_cache', True)
+ labels = kwargs.get('labels', None)
+ past_key_values = kwargs.get('past_key_values', None)
+
+ if len(args) > 0:
+ if not shared.args.cfg_cache:
+ logger.error("Please enable the cfg-cache option to use CFG with llamacpp_HF.")
+ return
+
+ input_ids = args[0]
+ is_negative = True
+ past_seq = self.past_seq_negative
+ self.load_negative_cache()
+ else:
+ input_ids = kwargs['input_ids']
+ is_negative = False
+ past_seq = self.past_seq
+ self.load_cache()
+
+ seq = input_ids[0].tolist()
+ if is_negative and past_key_values is not None:
+ seq = past_key_values + seq
+
+ seq_tensor = torch.tensor(seq)
+ reset = True
+
+ # Make the forward call. The prefix-match code has been adapted from
+ # https://github.com/abetlen/llama-cpp-python/commit/f4090a0bb2a2a25acfe28d31c82cc1aa273bedee
+ if labels is None:
+ if past_seq is not None:
+ min_length = min(past_seq.shape[0], seq_tensor.shape[0])
+ indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length]))
+ if len(indices) > 0:
+ longest_prefix = indices[0].item()
+ else:
+ longest_prefix = min_length
+
+ if longest_prefix > 0:
+ reset = False
+ self.model.n_tokens = longest_prefix
+ if len(seq_tensor) - longest_prefix > 0:
+ self.model.eval(seq[longest_prefix:])
+
+ if reset:
+ self.model.reset()
+ self.model.eval(seq)
+
+ logits = torch.tensor(self.model.scores[self.model.n_tokens - 1, :]).view(1, 1, -1).to(input_ids.device)
+ else:
+ self.model.reset()
+ self.model.eval(seq)
+ logits = torch.tensor(self.model.eval_logits)
+ logits = logits.view(1, logits.shape[0], logits.shape[1]).to(input_ids.device)
+
+ if is_negative:
+ self.save_negative_cache()
+ self.past_seq_negative = seq_tensor
+ else:
+ self.save_cache()
+ self.past_seq = seq_tensor
+
+ loss = None
+ if labels is not None:
+ # Shift so that tokens < n predict n
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ loss_fct = CrossEntropyLoss()
+ shift_logits = shift_logits.view(-1, logits.shape[-1])
+ shift_labels = shift_labels.view(-1)
+ # Enable model parallelism
+ shift_labels = shift_labels.to(shift_logits.device)
+ loss = loss_fct(shift_logits, shift_labels)
+
+ return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss)
+
+ @classmethod
+ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
+ assert len(model_args) == 0 and len(kwargs) == 0, "extra args is currently not supported"
+
+ if isinstance(pretrained_model_name_or_path, str):
+ pretrained_model_name_or_path = Path(pretrained_model_name_or_path)
+
+ path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
+ if path.is_file():
+ model_file = path
+ else:
+ model_file = list(path.glob('*.gguf'))[0]
+
+ logger.info(f"llama.cpp weights detected: {model_file}\n")
+
+ if shared.args.tensor_split is None or shared.args.tensor_split.strip() == '':
+ tensor_split_list = None
+ else:
+ tensor_split_list = [float(x) for x in shared.args.tensor_split.strip().split(",")]
+
+ params = {
+ 'model_path': str(model_file),
+ 'n_ctx': shared.args.n_ctx,
+ 'seed': int(shared.args.llama_cpp_seed),
+ 'n_threads': shared.args.threads or None,
+ 'n_threads_batch': shared.args.threads_batch or None,
+ 'n_batch': shared.args.n_batch,
+ 'use_mmap': not shared.args.no_mmap,
+ 'use_mlock': shared.args.mlock,
+ 'mul_mat_q': shared.args.mul_mat_q,
+ 'numa': shared.args.numa,
+ 'n_gpu_layers': shared.args.n_gpu_layers,
+ 'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
+ 'tensor_split': tensor_split_list,
+ 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
+ 'logits_all': True,
+ }
+
+ Llama = llama_cpp_lib().Llama
+ model = Llama(**params)
+
+ return LlamacppHF(model, model_file)
diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..554da2b53f2810b254b8ccf59692854d35788552
--- /dev/null
+++ b/modules/llamacpp_model.py
@@ -0,0 +1,174 @@
+import re
+from functools import partial
+
+import numpy as np
+import torch
+
+from modules import RoPE, shared
+from modules.callbacks import Iteratorize
+from modules.logging_colors import logger
+from modules.text_generation import get_max_prompt_length
+
+try:
+ import llama_cpp
+except:
+ llama_cpp = None
+
+try:
+ import llama_cpp_cuda
+except:
+ llama_cpp_cuda = None
+
+
+def llama_cpp_lib():
+ if (shared.args.cpu and llama_cpp is not None) or llama_cpp_cuda is None:
+ return llama_cpp
+ else:
+ return llama_cpp_cuda
+
+
+def ban_eos_logits_processor(eos_token, input_ids, logits):
+ logits[eos_token] = -float('inf')
+ return logits
+
+
+def custom_token_ban_logits_processor(token_ids, input_ids, logits):
+ for token_id in token_ids:
+ logits[token_id] = -float('inf')
+
+ return logits
+
+
+class LlamaCppModel:
+ def __init__(self):
+ self.initialized = False
+ self.grammar_string = ''
+ self.grammar = None
+
+ def __del__(self):
+ self.model.__del__()
+
+ @classmethod
+ def from_pretrained(self, path):
+
+ Llama = llama_cpp_lib().Llama
+ LlamaCache = llama_cpp_lib().LlamaCache
+
+ result = self()
+ cache_capacity = 0
+ if shared.args.cache_capacity is not None:
+ if 'GiB' in shared.args.cache_capacity:
+ cache_capacity = int(re.sub('[a-zA-Z]', '', shared.args.cache_capacity)) * 1000 * 1000 * 1000
+ elif 'MiB' in shared.args.cache_capacity:
+ cache_capacity = int(re.sub('[a-zA-Z]', '', shared.args.cache_capacity)) * 1000 * 1000
+ else:
+ cache_capacity = int(shared.args.cache_capacity)
+
+ logger.info("Cache capacity is " + str(cache_capacity) + " bytes")
+
+ if shared.args.tensor_split is None or shared.args.tensor_split.strip() == '':
+ tensor_split_list = None
+ else:
+ tensor_split_list = [float(x) for x in shared.args.tensor_split.strip().split(",")]
+
+ params = {
+ 'model_path': str(path),
+ 'n_ctx': shared.args.n_ctx,
+ 'seed': int(shared.args.llama_cpp_seed),
+ 'n_threads': shared.args.threads or None,
+ 'n_threads_batch': shared.args.threads_batch or None,
+ 'n_batch': shared.args.n_batch,
+ 'use_mmap': not shared.args.no_mmap,
+ 'use_mlock': shared.args.mlock,
+ 'mul_mat_q': shared.args.mul_mat_q,
+ 'numa': shared.args.numa,
+ 'n_gpu_layers': shared.args.n_gpu_layers,
+ 'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
+ 'tensor_split': tensor_split_list,
+ 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
+ }
+
+ result.model = Llama(**params)
+ if cache_capacity > 0:
+ result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
+
+ # This is ugly, but the model and the tokenizer are the same object in this library.
+ return result, result
+
+ def encode(self, string):
+ if type(string) is str:
+ string = string.encode()
+
+ return self.model.tokenize(string)
+
+ def decode(self, ids):
+ return self.model.detokenize(ids).decode('utf-8')
+
+ def get_logits(self, tokens):
+ self.model.eval(tokens)
+ logits = self.model._scores
+ logits = np.expand_dims(logits, 0) # batch dim is expected
+ return torch.tensor(logits, dtype=torch.float32)
+
+ def load_grammar(self, string):
+ if string != self.grammar_string:
+ self.grammar_string = string
+ if string.strip() != '':
+ self.grammar = llama_cpp_lib().LlamaGrammar.from_string(string)
+ else:
+ self.grammar = None
+
+ def generate(self, prompt, state, callback=None):
+
+ LogitsProcessorList = llama_cpp_lib().LogitsProcessorList
+
+ prompt = prompt if type(prompt) is str else prompt.decode()
+
+ # Handle truncation
+ prompt = self.encode(prompt)
+ prompt = prompt[-get_max_prompt_length(state):]
+ prompt = self.decode(prompt)
+
+ self.load_grammar(state['grammar_string'])
+ logit_processors = LogitsProcessorList()
+ if state['ban_eos_token']:
+ logit_processors.append(partial(ban_eos_logits_processor, self.model.token_eos()))
+
+ if state['custom_token_bans']:
+ to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
+ if len(to_ban) > 0:
+ logit_processors.append(partial(custom_token_ban_logits_processor, to_ban))
+
+ completion_chunks = self.model.create_completion(
+ prompt=prompt,
+ max_tokens=state['max_new_tokens'],
+ temperature=state['temperature'],
+ top_p=state['top_p'],
+ top_k=state['top_k'],
+ repeat_penalty=state['repetition_penalty'],
+ tfs_z=state['tfs'],
+ mirostat_mode=int(state['mirostat_mode']),
+ mirostat_tau=state['mirostat_tau'],
+ mirostat_eta=state['mirostat_eta'],
+ stream=True,
+ logits_processor=logit_processors,
+ grammar=self.grammar
+ )
+
+ output = ""
+ for completion_chunk in completion_chunks:
+ if shared.stop_everything:
+ break
+ text = completion_chunk['choices'][0]['text']
+ output += text
+ if callback:
+ callback(text)
+
+ return output
+
+ def generate_with_streaming(self, *args, **kwargs):
+ with Iteratorize(self.generate, args, kwargs, callback=None) as generator:
+ reply = ''
+ for token in generator:
+ reply += token
+ yield reply
diff --git a/modules/loaders.py b/modules/loaders.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab10e0a4dee24deb0d3dc54918be825308316f9e
--- /dev/null
+++ b/modules/loaders.py
@@ -0,0 +1,493 @@
+import functools
+from collections import OrderedDict
+
+import gradio as gr
+
+from modules import shared
+
+loaders_and_params = OrderedDict({
+ 'Transformers': [
+ 'cpu_memory',
+ 'gpu_memory',
+ 'trust_remote_code',
+ 'load_in_8bit',
+ 'bf16',
+ 'cpu',
+ 'disk',
+ 'auto_devices',
+ 'load_in_4bit',
+ 'use_double_quant',
+ 'quant_type',
+ 'compute_dtype',
+ 'trust_remote_code',
+ 'use_fast',
+ 'alpha_value',
+ 'rope_freq_base',
+ 'compress_pos_emb',
+ 'disable_exllama',
+ 'transformers_info'
+ ],
+ 'ExLlama_HF': [
+ 'gpu_split',
+ 'max_seq_len',
+ 'alpha_value',
+ 'rope_freq_base',
+ 'compress_pos_emb',
+ 'cfg_cache',
+ 'use_fast',
+ 'exllama_HF_info',
+ ],
+ 'ExLlamav2_HF': [
+ 'gpu_split',
+ 'max_seq_len',
+ 'cfg_cache',
+ 'alpha_value',
+ 'compress_pos_emb',
+ 'use_fast',
+ ],
+ 'ExLlama': [
+ 'gpu_split',
+ 'max_seq_len',
+ 'alpha_value',
+ 'rope_freq_base',
+ 'compress_pos_emb',
+ 'exllama_info',
+ ],
+ 'ExLlamav2': [
+ 'gpu_split',
+ 'max_seq_len',
+ 'alpha_value',
+ 'compress_pos_emb',
+ ],
+ 'AutoGPTQ': [
+ 'triton',
+ 'no_inject_fused_attention',
+ 'no_inject_fused_mlp',
+ 'no_use_cuda_fp16',
+ 'wbits',
+ 'groupsize',
+ 'desc_act',
+ 'disable_exllama',
+ 'gpu_memory',
+ 'cpu_memory',
+ 'cpu',
+ 'disk',
+ 'auto_devices',
+ 'trust_remote_code',
+ 'use_fast',
+ 'autogptq_info',
+ ],
+ 'GPTQ-for-LLaMa': [
+ 'wbits',
+ 'groupsize',
+ 'model_type',
+ 'pre_layer',
+ 'use_fast',
+ 'gptq_for_llama_info',
+ ],
+ 'llama.cpp': [
+ 'n_ctx',
+ 'n_gpu_layers',
+ 'tensor_split',
+ 'n_batch',
+ 'threads',
+ 'threads_batch',
+ 'no_mmap',
+ 'mlock',
+ 'mul_mat_q',
+ 'llama_cpp_seed',
+ 'alpha_value',
+ 'rope_freq_base',
+ 'compress_pos_emb',
+ 'cpu',
+ 'numa',
+ ],
+ 'llamacpp_HF': [
+ 'n_ctx',
+ 'n_gpu_layers',
+ 'tensor_split',
+ 'n_batch',
+ 'threads',
+ 'threads_batch',
+ 'no_mmap',
+ 'mlock',
+ 'mul_mat_q',
+ 'alpha_value',
+ 'rope_freq_base',
+ 'compress_pos_emb',
+ 'cpu',
+ 'numa',
+ 'cfg_cache',
+ 'use_fast',
+ 'llamacpp_HF_info',
+ ],
+ 'ctransformers': [
+ 'n_ctx',
+ 'n_gpu_layers',
+ 'n_batch',
+ 'threads',
+ 'model_type',
+ 'no_mmap',
+ 'mlock'
+ ],
+ 'AutoAWQ': [
+ 'cpu_memory',
+ 'gpu_memory',
+ 'auto_devices',
+ 'max_seq_len',
+ 'n_batch',
+ 'no_inject_fused_attention',
+ 'trust_remote_code',
+ 'use_fast',
+ ]
+})
+
+loaders_samplers = {
+ 'Transformers': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'penalty_alpha',
+ 'num_beams',
+ 'length_penalty',
+ 'early_stopping',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+ 'ExLlama_HF': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+ 'ExLlama': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'seed',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'add_bos_token',
+ 'custom_token_bans',
+ 'auto_max_new_tokens',
+ },
+ 'ExLlamav2': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'seed',
+ 'ban_eos_token',
+ 'add_bos_token',
+ 'custom_token_bans',
+ 'auto_max_new_tokens',
+ },
+ 'ExLlamav2_HF': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+ 'AutoGPTQ': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'penalty_alpha',
+ 'num_beams',
+ 'length_penalty',
+ 'early_stopping',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+ 'GPTQ-for-LLaMa': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'penalty_alpha',
+ 'num_beams',
+ 'length_penalty',
+ 'early_stopping',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+ 'llama.cpp': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'tfs',
+ 'repetition_penalty',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ },
+ 'llamacpp_HF': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+ 'ctransformers': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ },
+ 'AutoAWQ': {
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'seed',
+ 'do_sample',
+ 'penalty_alpha',
+ 'num_beams',
+ 'length_penalty',
+ 'early_stopping',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_file_row',
+ 'grammar_string',
+ 'guidance_scale',
+ 'negative_prompt',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'add_bos_token',
+ 'skip_special_tokens',
+ 'auto_max_new_tokens',
+ },
+}
+
+loaders_model_types = {
+ 'GPTQ-for-LLaMa': [
+ "None",
+ "llama",
+ "opt",
+ "gptj"
+ ],
+ 'ctransformers': [
+ "None",
+ "gpt2",
+ "gptj",
+ "gptneox",
+ "llama",
+ "mpt",
+ "dollyv2",
+ "replit",
+ "starcoder",
+ "gptbigcode",
+ "falcon"
+ ],
+}
+
+
+@functools.cache
+def list_all_samplers():
+ all_samplers = set()
+ for k in loaders_samplers:
+ for sampler in loaders_samplers[k]:
+ all_samplers.add(sampler)
+
+ return sorted(all_samplers)
+
+
+def blacklist_samplers(loader):
+ all_samplers = list_all_samplers()
+ if loader == 'All':
+ return [gr.update(visible=True) for sampler in all_samplers]
+ else:
+ return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers]
+
+
+def get_model_types(loader):
+ if loader in loaders_model_types:
+ return loaders_model_types[loader]
+
+ return ["None"]
+
+
+def get_gpu_memory_keys():
+ return [k for k in shared.gradio if k.startswith('gpu_memory')]
+
+
+@functools.cache
+def get_all_params():
+ all_params = set()
+ for k in loaders_and_params:
+ for el in loaders_and_params[k]:
+ all_params.add(el)
+
+ if 'gpu_memory' in all_params:
+ all_params.remove('gpu_memory')
+ for k in get_gpu_memory_keys():
+ all_params.add(k)
+
+ return sorted(all_params)
+
+
+def make_loader_params_visible(loader):
+ params = []
+ all_params = get_all_params()
+ if loader in loaders_and_params:
+ params = loaders_and_params[loader]
+
+ if 'gpu_memory' in params:
+ params.remove('gpu_memory')
+ params += get_gpu_memory_keys()
+
+ return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]
diff --git a/modules/logging_colors.py b/modules/logging_colors.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0c97c3a76cfc17eb5d8d8bb310a5389ab5db719
--- /dev/null
+++ b/modules/logging_colors.py
@@ -0,0 +1,117 @@
+# Copied from https://stackoverflow.com/a/1336640
+
+import logging
+import platform
+
+logging.basicConfig(
+ format='%(asctime)s %(levelname)s:%(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+)
+
+
+def add_coloring_to_emit_windows(fn):
+ # add methods we need to the class
+ def _out_handle(self):
+ import ctypes
+ return ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
+ out_handle = property(_out_handle)
+
+ def _set_color(self, code):
+ import ctypes
+
+ # Constants from the Windows API
+ self.STD_OUTPUT_HANDLE = -11
+ hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
+ ctypes.windll.kernel32.SetConsoleTextAttribute(hdl, code)
+
+ setattr(logging.StreamHandler, '_set_color', _set_color)
+
+ def new(*args):
+ FOREGROUND_BLUE = 0x0001 # text color contains blue.
+ FOREGROUND_GREEN = 0x0002 # text color contains green.
+ FOREGROUND_RED = 0x0004 # text color contains red.
+ FOREGROUND_INTENSITY = 0x0008 # text color is intensified.
+ FOREGROUND_WHITE = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED
+ # winbase.h
+ # STD_INPUT_HANDLE = -10
+ # STD_OUTPUT_HANDLE = -11
+ # STD_ERROR_HANDLE = -12
+
+ # wincon.h
+ # FOREGROUND_BLACK = 0x0000
+ FOREGROUND_BLUE = 0x0001
+ FOREGROUND_GREEN = 0x0002
+ # FOREGROUND_CYAN = 0x0003
+ FOREGROUND_RED = 0x0004
+ FOREGROUND_MAGENTA = 0x0005
+ FOREGROUND_YELLOW = 0x0006
+ # FOREGROUND_GREY = 0x0007
+ FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified.
+
+ # BACKGROUND_BLACK = 0x0000
+ # BACKGROUND_BLUE = 0x0010
+ # BACKGROUND_GREEN = 0x0020
+ # BACKGROUND_CYAN = 0x0030
+ # BACKGROUND_RED = 0x0040
+ # BACKGROUND_MAGENTA = 0x0050
+ BACKGROUND_YELLOW = 0x0060
+ # BACKGROUND_GREY = 0x0070
+ BACKGROUND_INTENSITY = 0x0080 # background color is intensified.
+
+ levelno = args[1].levelno
+ if (levelno >= 50):
+ color = BACKGROUND_YELLOW | FOREGROUND_RED | FOREGROUND_INTENSITY | BACKGROUND_INTENSITY
+ elif (levelno >= 40):
+ color = FOREGROUND_RED | FOREGROUND_INTENSITY
+ elif (levelno >= 30):
+ color = FOREGROUND_YELLOW | FOREGROUND_INTENSITY
+ elif (levelno >= 20):
+ color = FOREGROUND_GREEN
+ elif (levelno >= 10):
+ color = FOREGROUND_MAGENTA
+ else:
+ color = FOREGROUND_WHITE
+ args[0]._set_color(color)
+
+ ret = fn(*args)
+ args[0]._set_color(FOREGROUND_WHITE)
+ # print "after"
+ return ret
+ return new
+
+
+def add_coloring_to_emit_ansi(fn):
+ # add methods we need to the class
+ def new(*args):
+ levelno = args[1].levelno
+ if (levelno >= 50):
+ color = '\x1b[31m' # red
+ elif (levelno >= 40):
+ color = '\x1b[31m' # red
+ elif (levelno >= 30):
+ color = '\x1b[33m' # yellow
+ elif (levelno >= 20):
+ color = '\x1b[32m' # green
+ elif (levelno >= 10):
+ color = '\x1b[35m' # pink
+ else:
+ color = '\x1b[0m' # normal
+ args[1].msg = color + args[1].msg + '\x1b[0m' # normal
+ # print "after"
+ return fn(*args)
+ return new
+
+
+if platform.system() == 'Windows':
+ # Windows does not support ANSI escapes and we are using API calls to set the console color
+ logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit)
+else:
+ # all non-Windows platforms are supporting ANSI escapes so we use them
+ logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit)
+ # log = logging.getLogger()
+ # log.addFilter(log_filter())
+ # //hdlr = logging.StreamHandler()
+ # //hdlr.setFormatter(formatter())
+
+logger = logging.getLogger('text-generation-webui')
+logger.setLevel(logging.DEBUG)
diff --git a/modules/logits.py b/modules/logits.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fc5bf6077997c0e60c63f328a033767799c1022
--- /dev/null
+++ b/modules/logits.py
@@ -0,0 +1,56 @@
+import torch
+
+from modules import sampler_hijack, shared
+from modules.logging_colors import logger
+from modules.text_generation import generate_reply
+
+global_scores = None
+
+
+def get_next_logits(prompt, state, use_samplers, previous):
+ if shared.model is None:
+ logger.error("No model is loaded! Select one in the Model tab.")
+ return 'Error: No model is loaded1 Select one in the Model tab.', previous
+
+ is_non_hf_exllamav2 = shared.model.__class__.__name__ == 'Exllamav2Model'
+ is_non_hf_exllamav1 = shared.model.__class__.__name__ == 'ExllamaModel'
+ is_non_hf_llamacpp = shared.model.__class__.__name__ == 'LlamaCppModel'
+
+ if use_samplers:
+ if any([is_non_hf_exllamav2, is_non_hf_exllamav1, is_non_hf_llamacpp]):
+ logger.error("Sampler hijacking is not supported non-Huggingface loaders.")
+ # sampling is all done in c for exllama, so it is really hard to hijack
+ # it should be possible to hijack llamacpp sampler by hijacking all their sampling methods,
+ # but it is not implemented yet
+ return 'Error: Sampler hijacking is not supported non-Huggingface loaders. Please disable the "Use samplers" option.', previous
+
+ state['max_new_tokens'] = 1
+ state['auto_max_new_tokens'] = False
+ for _ in generate_reply(prompt, state):
+ pass
+
+ scores = sampler_hijack.global_scores[-1]
+ else:
+ if is_non_hf_exllamav2 or is_non_hf_exllamav1:
+ tokens = shared.tokenizer.encode(prompt).cuda()
+ scores = shared.model.get_logits(tokens)[-1][-1]
+ elif is_non_hf_llamacpp:
+ tokens = shared.tokenizer.encode(prompt)
+ scores = shared.model.get_logits(tokens)[-1][-1]
+ else:
+ tokens = shared.tokenizer.encode(prompt, return_tensors='pt').cuda()
+ output = shared.model(input_ids=tokens)
+ scores = output['logits'][-1][-1]
+
+ probs = torch.softmax(scores, dim=-1, dtype=torch.float)
+ topk_values, topk_indices = torch.topk(probs, k=50, largest=True, sorted=True)
+ topk_values = [f"{float(i):.5f}" for i in topk_values]
+ if is_non_hf_exllamav1 or is_non_hf_llamacpp:
+ topk_indices = [i.expand((1, 1)) for i in topk_indices]
+
+ tokens = [shared.tokenizer.decode(i) for i in topk_indices]
+ output = ''
+ for row in list(zip(topk_values, tokens)):
+ output += f"{row[0]} - {repr(row[1])}\n"
+
+ return output, previous
diff --git a/modules/metadata_gguf.py b/modules/metadata_gguf.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ea41a2a2d1f85e9f0da3235fa1a2d26fd556c40
--- /dev/null
+++ b/modules/metadata_gguf.py
@@ -0,0 +1,91 @@
+import struct
+from enum import IntEnum
+
+
+class GGUFValueType(IntEnum):
+ UINT8 = 0
+ INT8 = 1
+ UINT16 = 2
+ INT16 = 3
+ UINT32 = 4
+ INT32 = 5
+ FLOAT32 = 6
+ BOOL = 7
+ STRING = 8
+ ARRAY = 9
+ UINT64 = 10
+ INT64 = 11
+ FLOAT64 = 12
+
+
+_simple_value_packing = {
+ GGUFValueType.UINT8: " 1, shared.args.alpha_value > 1, shared.args.disable_exllama]):
+ model = LoaderClass.from_pretrained(path_to_model, **params)
+ if torch.backends.mps.is_available():
+ device = torch.device('mps')
+ model = model.to(device)
+ else:
+ model = model.cuda()
+
+ # DeepSpeed ZeRO-3
+ elif shared.args.deepspeed:
+ model = LoaderClass.from_pretrained(path_to_model, torch_dtype=params['torch_dtype'])
+ model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0]
+ model.module.eval() # Inference
+ logger.info(f'DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}')
+
+ # Load with quantization and/or offloading
+ else:
+ if not any((shared.args.cpu, torch.cuda.is_available(), torch.backends.mps.is_available())):
+ logger.warning('torch.cuda.is_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.')
+ shared.args.cpu = True
+
+ if shared.args.cpu:
+ params['torch_dtype'] = torch.float32
+ else:
+ params['device_map'] = 'auto'
+ params['max_memory'] = get_max_memory_dict()
+ if shared.args.load_in_4bit:
+ # See https://github.com/huggingface/transformers/pull/23479/files
+ # and https://huggingface.co/blog/4bit-transformers-bitsandbytes
+ quantization_config_params = {
+ 'load_in_4bit': True,
+ 'bnb_4bit_compute_dtype': eval("torch.{}".format(shared.args.compute_dtype)) if shared.args.compute_dtype in ["bfloat16", "float16", "float32"] else None,
+ 'bnb_4bit_quant_type': shared.args.quant_type,
+ 'bnb_4bit_use_double_quant': shared.args.use_double_quant,
+ }
+
+ logger.info('Using the following 4-bit params: ' + str(quantization_config_params))
+ params['quantization_config'] = BitsAndBytesConfig(**quantization_config_params)
+
+ elif shared.args.load_in_8bit:
+ if any((shared.args.auto_devices, shared.args.gpu_memory)):
+ params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
+ else:
+ params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True)
+
+ if params['max_memory'] is not None:
+ with init_empty_weights():
+ model = LoaderClass.from_config(config, trust_remote_code=params['trust_remote_code'])
+
+ model.tie_weights()
+ params['device_map'] = infer_auto_device_map(
+ model,
+ dtype=torch.int8,
+ max_memory=params['max_memory'],
+ no_split_module_classes=model._no_split_modules
+ )
+
+ if shared.args.disk:
+ params['offload_folder'] = shared.args.disk_cache_dir
+
+ if shared.args.disable_exllama:
+ try:
+ gptq_config = GPTQConfig(bits=config.quantization_config.get('bits', 4), disable_exllama=True)
+ params['quantization_config'] = gptq_config
+ logger.info('Loading with ExLlama kernel disabled.')
+ except:
+ exc = traceback.format_exc()
+ logger.error('Failed to disable exllama. Does the config.json for this model contain the necessary quantization info?')
+ print(exc)
+
+ if shared.args.compress_pos_emb > 1:
+ params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
+ elif shared.args.alpha_value > 1:
+ params['rope_scaling'] = {'type': 'dynamic', 'factor': RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)}
+
+ model = LoaderClass.from_pretrained(path_to_model, **params)
+
+ return model
+
+
+def llamacpp_loader(model_name):
+ from modules.llamacpp_model import LlamaCppModel
+
+ path = Path(f'{shared.args.model_dir}/{model_name}')
+ if path.is_file():
+ model_file = path
+ else:
+ model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.gguf'))[0]
+
+ logger.info(f"llama.cpp weights detected: {model_file}")
+ model, tokenizer = LlamaCppModel.from_pretrained(model_file)
+ return model, tokenizer
+
+
+def llamacpp_HF_loader(model_name):
+ from modules.llamacpp_hf import LlamacppHF
+
+ for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
+ path = Path(f'{shared.args.model_dir}/{fname}')
+ if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
+ logger.info(f'Using tokenizer from: {path}')
+ break
+ else:
+ logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
+ return None, None
+
+ if shared.args.use_fast:
+ logger.info('Loading the tokenizer with use_fast=True.')
+
+ tokenizer = AutoTokenizer.from_pretrained(
+ path,
+ trust_remote_code=shared.args.trust_remote_code,
+ use_fast=shared.args.use_fast
+ )
+
+ model = LlamacppHF.from_pretrained(model_name)
+ return model, tokenizer
+
+
+def ctransformers_loader(model_name):
+ from modules.ctransformers_model import CtransformersModel
+
+ path = Path(f'{shared.args.model_dir}/{model_name}')
+ ctrans = CtransformersModel()
+ if ctrans.model_type_is_auto():
+ model_file = path
+ else:
+ if path.is_file():
+ model_file = path
+ else:
+ entries = Path(f'{shared.args.model_dir}/{model_name}')
+ gguf = list(entries.glob('*.gguf'))
+ bin = list(entries.glob('*.bin'))
+ if len(gguf) > 0:
+ model_file = gguf[0]
+ elif len(bin) > 0:
+ model_file = bin[0]
+ else:
+ logger.error("Could not find a model for ctransformers.")
+ return None, None
+
+ logger.info(f'ctransformers weights detected: {model_file}')
+ model, tokenizer = ctrans.from_pretrained(model_file)
+ return model, tokenizer
+
+
+def AutoAWQ_loader(model_name):
+ from awq import AutoAWQForCausalLM
+
+ model_dir = Path(f'{shared.args.model_dir}/{model_name}')
+
+ model = AutoAWQForCausalLM.from_quantized(
+ quant_path=model_dir,
+ max_new_tokens=shared.args.max_seq_len,
+ trust_remote_code=shared.args.trust_remote_code,
+ fuse_layers=not shared.args.no_inject_fused_attention,
+ max_memory=get_max_memory_dict(),
+ batch_size=shared.args.n_batch,
+ safetensors=any(model_dir.glob('*.safetensors')),
+ )
+
+ return model
+
+
+def GPTQ_loader(model_name):
+
+ # Monkey patch
+ if shared.args.monkey_patch:
+ logger.warning("Applying the monkey patch for using LoRAs with GPTQ models. It may cause undefined behavior outside its intended scope.")
+ from modules.monkey_patch_gptq_lora import load_model_llama
+
+ model, _ = load_model_llama(model_name)
+
+ # No monkey patch
+ else:
+ import modules.GPTQ_loader
+
+ model = modules.GPTQ_loader.load_quantized(model_name)
+
+ return model
+
+
+def AutoGPTQ_loader(model_name):
+ import modules.AutoGPTQ_loader
+
+ return modules.AutoGPTQ_loader.load_quantized(model_name)
+
+
+def ExLlama_loader(model_name):
+ from modules.exllama import ExllamaModel
+
+ model, tokenizer = ExllamaModel.from_pretrained(model_name)
+ return model, tokenizer
+
+
+def ExLlama_HF_loader(model_name):
+ from modules.exllama_hf import ExllamaHF
+
+ return ExllamaHF.from_pretrained(model_name)
+
+
+def ExLlamav2_loader(model_name):
+ from modules.exllamav2 import Exllamav2Model
+
+ model, tokenizer = Exllamav2Model.from_pretrained(model_name)
+ return model, tokenizer
+
+
+def ExLlamav2_HF_loader(model_name):
+ from modules.exllamav2_hf import Exllamav2HF
+
+ return Exllamav2HF.from_pretrained(model_name)
+
+
+def RWKV_loader(model_name):
+ '''
+ This loader is not currently maintained as RWKV can now be loaded
+ through the transformers library.
+ '''
+ from modules.RWKV import RWKVModel, RWKVTokenizer
+
+ model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
+ tokenizer = RWKVTokenizer.from_pretrained(Path(shared.args.model_dir))
+ return model, tokenizer
+
+
+def get_max_memory_dict():
+ max_memory = {}
+ if shared.args.gpu_memory:
+ memory_map = list(map(lambda x: x.strip(), shared.args.gpu_memory))
+ for i in range(len(memory_map)):
+ max_memory[i] = f'{memory_map[i]}GiB' if not re.match('.*ib$', memory_map[i].lower()) else memory_map[i]
+
+ max_cpu_memory = shared.args.cpu_memory.strip() if shared.args.cpu_memory is not None else '99GiB'
+ max_memory['cpu'] = f'{max_cpu_memory}GiB' if not re.match('.*ib$', max_cpu_memory.lower()) else max_cpu_memory
+
+ # If --auto-devices is provided standalone, try to get a reasonable value
+ # for the maximum memory of device :0
+ elif shared.args.auto_devices:
+ total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
+ suggestion = round((total_mem - 1000) / 1000) * 1000
+ if total_mem - suggestion < 800:
+ suggestion -= 1000
+
+ suggestion = int(round(suggestion / 1000))
+ logger.warning(f"Auto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors. You can manually set other values.")
+ max_memory = {0: f'{suggestion}GiB', 'cpu': f'{shared.args.cpu_memory or 99}GiB'}
+
+ return max_memory if len(max_memory) > 0 else None
+
+
+def clear_torch_cache():
+ gc.collect()
+ if not shared.args.cpu:
+ torch.cuda.empty_cache()
+
+
+def unload_model():
+ shared.model = shared.tokenizer = None
+ shared.lora_names = []
+ shared.model_dirty_from_training = False
+ clear_torch_cache()
+
+
+def reload_model():
+ unload_model()
+ shared.model, shared.tokenizer = load_model(shared.model_name)
diff --git a/modules/models_settings.py b/modules/models_settings.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b9e9bd0f89dc7cc86157a3fb3d4923711e670cd
--- /dev/null
+++ b/modules/models_settings.py
@@ -0,0 +1,223 @@
+import json
+import re
+from pathlib import Path
+
+import yaml
+
+from modules import loaders, metadata_gguf, shared, ui
+
+
+def get_fallback_settings():
+ return {
+ 'wbits': 'None',
+ 'groupsize': 'None',
+ 'desc_act': False,
+ 'model_type': 'None',
+ 'max_seq_len': 2048,
+ 'n_ctx': 2048,
+ 'rope_freq_base': 0,
+ 'compress_pos_emb': 1,
+ 'truncation_length': shared.settings['truncation_length'],
+ 'skip_special_tokens': shared.settings['skip_special_tokens'],
+ 'custom_stopping_strings': shared.settings['custom_stopping_strings'],
+ }
+
+
+def get_model_metadata(model):
+ model_settings = {}
+
+ # Get settings from models/config.yaml and models/config-user.yaml
+ settings = shared.model_config
+ for pat in settings:
+ if re.match(pat.lower(), model.lower()):
+ for k in settings[pat]:
+ model_settings[k] = settings[pat][k]
+
+ if 'loader' not in model_settings:
+ loader = infer_loader(model, model_settings)
+ if 'wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0:
+ loader = 'AutoGPTQ'
+
+ model_settings['loader'] = loader
+
+ # Read GGUF metadata
+ if model_settings['loader'] in ['llama.cpp', 'llamacpp_HF', 'ctransformers']:
+ path = Path(f'{shared.args.model_dir}/{model}')
+ if path.is_file():
+ model_file = path
+ else:
+ model_file = list(path.glob('*.gguf'))[0]
+
+ metadata = metadata_gguf.load_metadata(model_file)
+ if 'llama.context_length' in metadata:
+ model_settings['n_ctx'] = metadata['llama.context_length']
+ if 'llama.rope.scale_linear' in metadata:
+ model_settings['compress_pos_emb'] = metadata['llama.rope.scale_linear']
+ if 'llama.rope.freq_base' in metadata:
+ model_settings['rope_freq_base'] = metadata['llama.rope.freq_base']
+
+ else:
+ # Read transformers metadata
+ path = Path(f'{shared.args.model_dir}/{model}/config.json')
+ if path.exists():
+ metadata = json.loads(open(path, 'r').read())
+ if 'max_position_embeddings' in metadata:
+ model_settings['truncation_length'] = metadata['max_position_embeddings']
+ model_settings['max_seq_len'] = metadata['max_position_embeddings']
+
+ if 'rope_theta' in metadata:
+ model_settings['rope_freq_base'] = metadata['rope_theta']
+
+ if 'rope_scaling' in metadata and type(metadata['rope_scaling']) is dict and all(key in metadata['rope_scaling'] for key in ('type', 'factor')):
+ if metadata['rope_scaling']['type'] == 'linear':
+ model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor']
+
+ if 'quantization_config' in metadata:
+ if 'bits' in metadata['quantization_config']:
+ model_settings['wbits'] = metadata['quantization_config']['bits']
+ if 'group_size' in metadata['quantization_config']:
+ model_settings['groupsize'] = metadata['quantization_config']['group_size']
+ if 'desc_act' in metadata['quantization_config']:
+ model_settings['desc_act'] = metadata['quantization_config']['desc_act']
+
+ # Read AutoGPTQ metadata
+ path = Path(f'{shared.args.model_dir}/{model}/quantize_config.json')
+ if path.exists():
+ metadata = json.loads(open(path, 'r').read())
+ if 'bits' in metadata:
+ model_settings['wbits'] = metadata['bits']
+ if 'group_size' in metadata:
+ model_settings['groupsize'] = metadata['group_size']
+ if 'desc_act' in metadata:
+ model_settings['desc_act'] = metadata['desc_act']
+
+ # Ignore rope_freq_base if set to the default value
+ if 'rope_freq_base' in model_settings and model_settings['rope_freq_base'] == 10000:
+ model_settings.pop('rope_freq_base')
+
+ # Apply user settings from models/config-user.yaml
+ settings = shared.user_config
+ for pat in settings:
+ if re.match(pat.lower(), model.lower()):
+ for k in settings[pat]:
+ model_settings[k] = settings[pat][k]
+
+ return model_settings
+
+
+def infer_loader(model_name, model_settings):
+ path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
+ if not path_to_model.exists():
+ loader = None
+ elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
+ loader = 'AutoGPTQ'
+ elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
+ loader = 'AutoAWQ'
+ elif len(list(path_to_model.glob('*.gguf'))) > 0:
+ loader = 'llama.cpp'
+ elif re.match(r'.*\.gguf', model_name.lower()):
+ loader = 'llama.cpp'
+ elif re.match(r'.*rwkv.*\.pth', model_name.lower()):
+ loader = 'RWKV'
+ elif re.match(r'.*exl2', model_name.lower()):
+ loader = 'ExLlamav2_HF'
+ else:
+ loader = 'Transformers'
+
+ return loader
+
+
+# UI: update the command-line arguments based on the interface values
+def update_model_parameters(state, initial=False):
+ elements = ui.list_model_elements() # the names of the parameters
+ gpu_memories = []
+
+ for i, element in enumerate(elements):
+ if element not in state:
+ continue
+
+ value = state[element]
+ if element.startswith('gpu_memory'):
+ gpu_memories.append(value)
+ continue
+
+ if initial and element in shared.provided_arguments:
+ continue
+
+ # Setting null defaults
+ if element in ['wbits', 'groupsize', 'model_type'] and value == 'None':
+ value = vars(shared.args_defaults)[element]
+ elif element in ['cpu_memory'] and value == 0:
+ value = vars(shared.args_defaults)[element]
+
+ # Making some simple conversions
+ if element in ['wbits', 'groupsize', 'pre_layer']:
+ value = int(value)
+ elif element == 'cpu_memory' and value is not None:
+ value = f"{value}MiB"
+
+ if element in ['pre_layer']:
+ value = [value] if value > 0 else None
+
+ setattr(shared.args, element, value)
+
+ found_positive = False
+ for i in gpu_memories:
+ if i > 0:
+ found_positive = True
+ break
+
+ if not (initial and vars(shared.args)['gpu_memory'] != vars(shared.args_defaults)['gpu_memory']):
+ if found_positive:
+ shared.args.gpu_memory = [f"{i}MiB" for i in gpu_memories]
+ else:
+ shared.args.gpu_memory = None
+
+
+# UI: update the state variable with the model settings
+def apply_model_settings_to_state(model, state):
+ model_settings = get_model_metadata(model)
+ if 'loader' in model_settings:
+ loader = model_settings.pop('loader')
+
+ # If the user is using an alternative loader for the same model type, let them keep using it
+ if not (loader == 'AutoGPTQ' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlama', 'ExLlama_HF', 'ExLlamav2', 'ExLlamav2_HF']) and not (loader == 'llama.cpp' and state['loader'] in ['llamacpp_HF', 'ctransformers']):
+ state['loader'] = loader
+
+ for k in model_settings:
+ if k in state:
+ if k in ['wbits', 'groupsize']:
+ state[k] = str(model_settings[k])
+ else:
+ state[k] = model_settings[k]
+
+ return state
+
+
+# Save the settings for this model to models/config-user.yaml
+def save_model_settings(model, state):
+ if model == 'None':
+ yield ("Not saving the settings because no model is loaded.")
+ return
+
+ with Path(f'{shared.args.model_dir}/config-user.yaml') as p:
+ if p.exists():
+ user_config = yaml.safe_load(open(p, 'r').read())
+ else:
+ user_config = {}
+
+ model_regex = model + '$' # For exact matches
+ if model_regex not in user_config:
+ user_config[model_regex] = {}
+
+ for k in ui.list_model_elements():
+ if k == 'loader' or k in loaders.loaders_and_params[state['loader']]:
+ user_config[model_regex][k] = state[k]
+
+ shared.user_config = user_config
+
+ output = yaml.dump(user_config, sort_keys=False)
+ with open(p, 'w') as f:
+ f.write(output)
+
+ yield (f"Settings for {model} saved to {p}")
diff --git a/modules/monkey_patch_gptq_lora.py b/modules/monkey_patch_gptq_lora.py
new file mode 100644
index 0000000000000000000000000000000000000000..3166bd33ceba449cb542861b0238818f68c7b02e
--- /dev/null
+++ b/modules/monkey_patch_gptq_lora.py
@@ -0,0 +1,39 @@
+# Copied from https://github.com/johnsmith0031/alpaca_lora_4bit
+
+from pathlib import Path
+
+import alpaca_lora_4bit.autograd_4bit as autograd_4bit
+from alpaca_lora_4bit.amp_wrapper import AMPWrapper
+from alpaca_lora_4bit.autograd_4bit import (
+ Autograd4bitQuantLinear,
+ load_llama_model_4bit_low_ram
+)
+from alpaca_lora_4bit.models import Linear4bitLt
+from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
+ replace_peft_model_with_int4_lora_model
+)
+
+from modules import shared
+from modules.GPTQ_loader import find_quantized_model_file
+
+replace_peft_model_with_int4_lora_model()
+
+
+def load_model_llama(model_name):
+ config_path = str(Path(f'{shared.args.model_dir}/{model_name}'))
+ model_path = str(find_quantized_model_file(model_name))
+ model, tokenizer = load_llama_model_4bit_low_ram(config_path, model_path, groupsize=shared.args.groupsize, is_v1_model=False)
+ for _, m in model.named_modules():
+ if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
+ if m.is_v1_model:
+ m.zeros = m.zeros.half()
+ m.scales = m.scales.half()
+ m.bias = m.bias.half()
+
+ autograd_4bit.auto_switch = True
+
+ model.half()
+ wrapper = AMPWrapper(model)
+ wrapper.apply_generate()
+
+ return model, tokenizer
diff --git a/modules/one_click_installer_check.py b/modules/one_click_installer_check.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a7dd2b9b8d510fc1229e813f56d8052ac800ff3
--- /dev/null
+++ b/modules/one_click_installer_check.py
@@ -0,0 +1,8 @@
+from pathlib import Path
+from modules.logging_colors import logger
+
+if Path('../webui.py').exists():
+ logger.warning('\nIt looks like you are running an outdated version of '
+ 'the one-click-installers.\n'
+ 'Please migrate your installation following the instructions here:\n'
+ 'https://github.com/oobabooga/text-generation-webui/wiki/Migrating-an-old-one%E2%80%90click-install')
diff --git a/modules/presets.py b/modules/presets.py
new file mode 100644
index 0000000000000000000000000000000000000000..96d6e994e488efadbbc09afdd16be7b507e8fb68
--- /dev/null
+++ b/modules/presets.py
@@ -0,0 +1,72 @@
+import functools
+from pathlib import Path
+
+import yaml
+
+
+def default_preset():
+ return {
+ 'do_sample': True,
+ 'temperature': 1,
+ 'top_p': 1,
+ 'top_k': 0,
+ 'typical_p': 1,
+ 'epsilon_cutoff': 0,
+ 'eta_cutoff': 0,
+ 'tfs': 1,
+ 'top_a': 0,
+ 'repetition_penalty': 1,
+ 'repetition_penalty_range': 0,
+ 'encoder_repetition_penalty': 1,
+ 'no_repeat_ngram_size': 0,
+ 'min_length': 0,
+ 'guidance_scale': 1,
+ 'mirostat_mode': 0,
+ 'mirostat_tau': 5.0,
+ 'mirostat_eta': 0.1,
+ 'penalty_alpha': 0,
+ 'num_beams': 1,
+ 'length_penalty': 1,
+ 'early_stopping': False,
+ 'custom_token_bans': '',
+ }
+
+
+def presets_params():
+ return [k for k in default_preset()]
+
+
+def load_preset(name):
+ generate_params = default_preset()
+ if name not in ['None', None, '']:
+ with open(Path(f'presets/{name}.yaml'), 'r') as infile:
+ preset = yaml.safe_load(infile)
+
+ for k in preset:
+ generate_params[k] = preset[k]
+
+ generate_params['temperature'] = min(1.99, generate_params['temperature'])
+ return generate_params
+
+
+@functools.cache
+def load_preset_memoized(name):
+ return load_preset(name)
+
+
+def load_preset_for_ui(name, state):
+ generate_params = load_preset(name)
+ state.update(generate_params)
+ return state, *[generate_params[k] for k in presets_params()]
+
+
+def generate_preset_yaml(state):
+ defaults = default_preset()
+ data = {k: state[k] for k in presets_params()}
+
+ # Remove entries that are identical to the defaults
+ for k in list(data.keys()):
+ if data[k] == defaults[k]:
+ del data[k]
+
+ return yaml.dump(data, sort_keys=False)
diff --git a/modules/prompts.py b/modules/prompts.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce652defbeba437b0c7e620ccb140237cefd4d30
--- /dev/null
+++ b/modules/prompts.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+import yaml
+
+from modules import utils
+from modules.text_generation import get_encoded_length
+
+
+def load_prompt(fname):
+ if fname in ['None', '']:
+ return ''
+ else:
+ file_path = Path(f'prompts/{fname}.txt')
+ if not file_path.exists():
+ return ''
+
+ with open(file_path, 'r', encoding='utf-8') as f:
+ text = f.read()
+ if text[-1] == '\n':
+ text = text[:-1]
+
+ return text
+
+
+def load_instruction_prompt_simple(fname):
+ file_path = Path(f'instruction-templates/{fname}.yaml')
+ if not file_path.exists():
+ return ''
+
+ with open(file_path, 'r', encoding='utf-8') as f:
+ data = yaml.safe_load(f)
+ output = ''
+ if 'context' in data:
+ output += data['context']
+
+ replacements = {
+ '<|user|>': data['user'],
+ '<|bot|>': data['bot'],
+ '<|user-message|>': 'Input',
+ }
+
+ output += utils.replace_all(data['turn_template'].split('<|bot-message|>')[0], replacements)
+ return output.rstrip(' ')
+
+
+def count_tokens(text):
+ try:
+ tokens = get_encoded_length(text)
+ return str(tokens)
+ except:
+ return '0'
diff --git a/modules/relative_imports.py b/modules/relative_imports.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c0eb56b77c6cb6b38fdbdeebabe9ad3b8d91b97
--- /dev/null
+++ b/modules/relative_imports.py
@@ -0,0 +1,13 @@
+import sys
+from pathlib import Path
+
+
+class RelativeImport:
+ def __init__(self, path):
+ self.import_path = Path(path)
+
+ def __enter__(self):
+ sys.path.insert(0, str(self.import_path))
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ sys.path.remove(str(self.import_path))
diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a724f478ca8989dfec67faeb4a1c1f59d251def
--- /dev/null
+++ b/modules/sampler_hijack.py
@@ -0,0 +1,218 @@
+import math
+
+import torch
+import transformers
+from transformers import LogitsWarper
+from transformers.generation.logits_process import (
+ LogitNormalization,
+ LogitsProcessor,
+ LogitsProcessorList,
+ TemperatureLogitsWarper
+)
+
+global_scores = None
+
+
+class TailFreeLogitsWarper(LogitsWarper):
+ def __init__(self, tfs: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
+ tfs = float(tfs)
+ if tfs < 0 or tfs > 1.0:
+ raise ValueError(f"`tfs` has to be a float >= 0 and <= 1, but is {tfs}")
+ self.tfs = tfs
+ self.filter_value = filter_value
+ self.min_tokens_to_keep = min_tokens_to_keep
+
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+ sorted_logits, sorted_indices = torch.sort(scores, descending=True)
+ probs = sorted_logits.softmax(dim=-1)
+
+ # Compute second derivative normalized CDF
+ d2 = probs.diff().diff().abs()
+ normalized_d2 = d2 / d2.sum(dim=-1, keepdim=True)
+ normalized_d2_cdf = normalized_d2.cumsum(dim=-1)
+
+ # Remove tokens with CDF value above the threshold (token with 0 are kept)
+ sorted_indices_to_remove = normalized_d2_cdf > self.tfs
+
+ # Centre the distribution around the cutoff as in the original implementation of the algorithm
+ sorted_indices_to_remove = torch.cat(
+ (
+ torch.zeros(scores.shape[0], 1, dtype=torch.bool, device=scores.device),
+ sorted_indices_to_remove,
+ torch.ones(scores.shape[0], 1, dtype=torch.bool, device=scores.device),
+ ),
+ dim=-1,
+ )
+
+ if self.min_tokens_to_keep > 1:
+ # Keep at least min_tokens_to_keep
+ sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0
+
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+ scores = scores.masked_fill(indices_to_remove, self.filter_value)
+ return scores
+
+
+class TopALogitsWarper(LogitsWarper):
+ def __init__(self, top_a: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
+ top_a = float(top_a)
+ if top_a < 0 or top_a > 1.0:
+ raise ValueError(f"`top_a` has to be a float >= 0 and <= 1, but is {top_a}")
+ self.top_a = top_a
+ self.filter_value = filter_value
+ self.min_tokens_to_keep = min_tokens_to_keep
+
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+ sorted_logits, sorted_indices = torch.sort(scores, descending=True)
+ probs = sorted_logits.softmax(dim=-1)
+
+ # Remove tokens with probability less than top_a*(max(probs))^2 (token with 0 are kept)
+ probs_max = probs[..., 0, None]
+ sorted_indices_to_remove = probs < probs_max * probs_max * self.top_a
+
+ if self.min_tokens_to_keep > 1:
+ # Keep at least min_tokens_to_keep
+ sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0
+
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+ scores = scores.masked_fill(indices_to_remove, self.filter_value)
+ return scores
+
+
+class MirostatLogitsWarper(LogitsWarper):
+ def __init__(self, mirostat_mode: int, mirostat_tau: float, mirostat_eta: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
+ if mirostat_mode not in [2]:
+ raise ValueError(f"`mirostat` has to be a an integer 2, but is {mirostat_mode}")
+ self.mirostat_mode = mirostat_mode
+ self.mirostat_eta = mirostat_eta
+ self.mirostat_tau = mirostat_tau
+ self.filter_value = filter_value
+ self.min_tokens_to_keep = min_tokens_to_keep
+ self.mu = 2 * self.mirostat_tau
+ self.e = 0
+
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+ logits = scores[0]
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+ prob_original = torch.softmax(sorted_logits, dim=-1).tolist() # candidates
+
+ # Truncate the words with surprise values greater than mu
+ for i, candidate in enumerate(prob_original):
+ if candidate > 0 and -math.log2(candidate) > self.mu:
+ if (i == 0):
+ sorted_logits = sorted_logits[:1]
+ else:
+ sorted_logits = sorted_logits[:i]
+ break
+
+ # Normalize the probabilities of the remaining words
+ prob_topk = torch.softmax(sorted_logits, dim=0).to('cuda')
+
+ prev_i = torch.multinomial(prob_topk, num_samples=1, replacement=True).to('cuda')
+
+ observed_surprise = -math.log2(prob_topk[prev_i])
+ self.e = observed_surprise - self.mirostat_tau
+
+ # Update mu using the learning rate and error
+ self.mu -= self.mirostat_eta * self.e
+
+ sorted_indices_to_remove = torch.ones_like(scores[0], dtype=torch.bool)
+ sorted_indices_to_remove[prev_i] = False
+
+ indices_to_remove = sorted_indices_to_remove.unsqueeze(0).scatter(1, sorted_indices.unsqueeze(0), sorted_indices_to_remove.unsqueeze(0))
+ scores = scores.masked_fill(indices_to_remove, self.filter_value)
+ return scores
+
+
+class SpyLogitsWarper(LogitsWarper):
+ def __init__(self):
+ pass
+
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+ global global_scores
+ global_scores = scores
+ return scores
+
+
+class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor):
+ '''
+ Copied from the transformers library
+ '''
+
+ def __init__(self, penalty: float, _range: int):
+ if not isinstance(penalty, float) or not (penalty > 0):
+ raise ValueError(f"`penalty` has to be a strictly positive float, but is {penalty}")
+
+ self.penalty = penalty
+ self._range = _range
+
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+
+ input_ids = input_ids[:, -self._range:]
+ score = torch.gather(scores, 1, input_ids)
+
+ # if score < 0 then repetition penalty has to be multiplied to reduce the previous token probability
+ score = torch.where(score < 0, score * self.penalty, score / self.penalty)
+
+ scores.scatter_(1, input_ids, score)
+ return scores
+
+
+def get_logits_warper_patch(self, generation_config):
+ warpers = self._get_logits_warper_old(generation_config)
+ warpers_to_add = LogitsProcessorList()
+ min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1
+
+ if generation_config.mirostat_mode is not None and generation_config.mirostat_mode == 2:
+ warpers_to_add.append(MirostatLogitsWarper(mirostat_mode=generation_config.mirostat_mode, mirostat_eta=generation_config.mirostat_eta, mirostat_tau=generation_config.mirostat_tau, min_tokens_to_keep=min_tokens_to_keep))
+ # We need to disable samplers other than temperature
+ for warper in warpers:
+ if not isinstance(warper, TemperatureLogitsWarper):
+ warpers.remove(warper)
+ else:
+ if generation_config.tfs is not None and 0.0 <= generation_config.tfs <= 1.0:
+ warpers_to_add.append(TailFreeLogitsWarper(tfs=generation_config.tfs, min_tokens_to_keep=min_tokens_to_keep))
+ if generation_config.top_a is not None and 0.0 <= generation_config.top_a <= 1.0:
+ warpers_to_add.append(TopALogitsWarper(top_a=generation_config.top_a, min_tokens_to_keep=min_tokens_to_keep))
+
+ if warpers and isinstance(warpers[-1], LogitNormalization):
+ warpers = warpers[:-1] + warpers_to_add + [warpers[-1]]
+ else:
+ warpers += warpers_to_add
+
+ warpers.append(SpyLogitsWarper())
+ return warpers
+
+
+def get_logits_processor_patch(self, **kwargs):
+ result = self._get_logits_processor_old(**kwargs)
+ repetition_penalty_range = kwargs['generation_config'].repetition_penalty_range
+ repetition_penalty = kwargs['generation_config'].repetition_penalty
+
+ if repetition_penalty_range > 0:
+ for i in range(len(result)):
+ if result[i].__class__.__name__ == 'RepetitionPenaltyLogitsProcessor':
+ result[i] = RepetitionPenaltyLogitsProcessorWithRange(repetition_penalty, repetition_penalty_range)
+
+ return result
+
+
+def generation_config_init_patch(self, **kwargs):
+ self.__init___old(**kwargs)
+ self.tfs = kwargs.pop("tfs", 1.0)
+ self.top_a = kwargs.pop("top_a", 0.0)
+ self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
+ self.mirostat_eta = kwargs.pop("mirostat_eta", 0.1)
+ self.mirostat_tau = kwargs.pop("mirostat_tau", 5)
+ self.repetition_penalty_range = kwargs.pop("repetition_penalty_range", 0)
+
+
+def hijack_samplers():
+ transformers.GenerationMixin._get_logits_warper_old = transformers.GenerationMixin._get_logits_warper
+ transformers.GenerationMixin._get_logits_warper = get_logits_warper_patch
+
+ transformers.GenerationMixin._get_logits_processor_old = transformers.GenerationMixin._get_logits_processor
+ transformers.GenerationMixin._get_logits_processor = get_logits_processor_patch
+
+ transformers.GenerationConfig.__init___old = transformers.GenerationConfig.__init__
+ transformers.GenerationConfig.__init__ = generation_config_init_patch
diff --git a/modules/shared.py b/modules/shared.py
new file mode 100644
index 0000000000000000000000000000000000000000..427d92306514dafb1df9d041f77de4d3ceac70e9
--- /dev/null
+++ b/modules/shared.py
@@ -0,0 +1,275 @@
+import argparse
+import sys
+from collections import OrderedDict
+from pathlib import Path
+
+import yaml
+
+from modules.logging_colors import logger
+
+# Model variables
+model = None
+tokenizer = None
+model_name = "None"
+is_seq2seq = False
+model_dirty_from_training = False
+lora_names = []
+
+# Generation variables
+stop_everything = False
+generation_lock = None
+processing_message = '*Is typing...*'
+
+# UI variables
+gradio = {}
+persistent_interface_state = {}
+need_restart = False
+
+# UI defaults
+settings = {
+ 'dark_theme': True,
+ 'show_controls': True,
+ 'start_with': '',
+ 'mode': 'chat',
+ 'chat_style': 'cai-chat',
+ 'prompt-default': 'QA',
+ 'prompt-notebook': 'QA',
+ 'preset': 'simple-1',
+ 'max_new_tokens': 200,
+ 'max_new_tokens_min': 1,
+ 'max_new_tokens_max': 4096,
+ 'seed': -1,
+ 'negative_prompt': '',
+ 'truncation_length': 2048,
+ 'truncation_length_min': 0,
+ 'truncation_length_max': 32768,
+ 'custom_stopping_strings': '',
+ 'auto_max_new_tokens': False,
+ 'max_tokens_second': 0,
+ 'ban_eos_token': False,
+ 'custom_token_bans': '',
+ 'add_bos_token': True,
+ 'skip_special_tokens': True,
+ 'stream': True,
+ 'name1': 'You',
+ 'character': 'Assistant',
+ 'instruction_template': 'Alpaca',
+ 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+ 'autoload_model': False,
+ 'default_extensions': ['gallery'],
+}
+
+
+def str2bool(v):
+ if isinstance(v, bool):
+ return v
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
+ return True
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+ return False
+ else:
+ raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
+parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54))
+
+# Basic settings
+parser.add_argument('--notebook', action='store_true', help='DEPRECATED')
+parser.add_argument('--chat', action='store_true', help='DEPRECATED')
+parser.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental.')
+parser.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
+parser.add_argument('--model', type=str, help='Name of the model to load by default.')
+parser.add_argument('--lora', type=str, nargs="+", help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
+parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models")
+parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
+parser.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.')
+parser.add_argument('--no-stream', action='store_true', help='DEPRECATED')
+parser.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
+parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
+parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
+parser.add_argument('--chat-buttons', action='store_true', help='Show buttons on chat tab instead of hover menu.')
+
+# Model loader
+parser.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv')
+
+# Accelerate/transformers
+parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
+parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.')
+parser.add_argument('--gpu-memory', type=str, nargs="+", help='Maximum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.')
+parser.add_argument('--cpu-memory', type=str, help='Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.')
+parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
+parser.add_argument('--disk-cache-dir', type=str, default="cache", help='Directory to save the disk cache to. Defaults to "cache".')
+parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision (using bitsandbytes).')
+parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
+parser.add_argument('--no-cache', action='store_true', help='Set use_cache to False while generating text. This reduces the VRAM usage a bit at a performance cost.')
+parser.add_argument('--xformers', action='store_true', help="Use xformer's memory efficient attention. This should increase your tokens/s.")
+parser.add_argument('--sdp-attention', action='store_true', help="Use torch 2.0's sdp attention.")
+parser.add_argument('--trust-remote-code', action='store_true', help="Set trust_remote_code=True while loading a model. Necessary for ChatGLM and Falcon.")
+parser.add_argument('--use_fast', action='store_true', help="Set use_fast=True while loading a tokenizer.")
+
+# Accelerate 4-bit
+parser.add_argument('--load-in-4bit', action='store_true', help='Load the model with 4-bit precision (using bitsandbytes).')
+parser.add_argument('--compute_dtype', type=str, default="float16", help="compute dtype for 4-bit. Valid options: bfloat16, float16, float32.")
+parser.add_argument('--quant_type', type=str, default="nf4", help='quant_type for 4-bit. Valid options: nf4, fp4.')
+parser.add_argument('--use_double_quant', action='store_true', help='use_double_quant for 4-bit.')
+
+# llama.cpp
+parser.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
+parser.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
+parser.add_argument('--n_batch', type=int, default=512, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
+parser.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
+parser.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
+parser.add_argument('--mul_mat_q', action='store_true', help='Activate new mulmat kernels.')
+parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
+parser.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
+parser.add_argument('--tensor_split', type=str, default=None, help="Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17")
+parser.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.')
+parser.add_argument('--llama_cpp_seed', type=int, default=0, help='Seed for llama-cpp models. Default 0 (random)')
+parser.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp')
+
+# GPTQ
+parser.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
+parser.add_argument('--model_type', type=str, help='Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported.')
+parser.add_argument('--groupsize', type=int, default=-1, help='Group size.')
+parser.add_argument('--pre_layer', type=int, nargs="+", help='The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg --pre_layer 30 60.')
+parser.add_argument('--checkpoint', type=str, help='The path to the quantized checkpoint file. If not specified, it will be automatically detected.')
+parser.add_argument('--monkey-patch', action='store_true', help='Apply the monkey patch for using LoRAs with quantized models.')
+
+# AutoGPTQ
+parser.add_argument('--triton', action='store_true', help='Use triton.')
+parser.add_argument('--no_inject_fused_attention', action='store_true', help='Do not use fused attention (lowers VRAM requirements).')
+parser.add_argument('--no_inject_fused_mlp', action='store_true', help='Triton mode only: Do not use fused MLP (lowers VRAM requirements).')
+parser.add_argument('--no_use_cuda_fp16', action='store_true', help='This can make models faster on some systems.')
+parser.add_argument('--desc_act', action='store_true', help='For models that don\'t have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.')
+parser.add_argument('--disable_exllama', action='store_true', help='Disable ExLlama kernel, which can improve inference speed on some systems.')
+
+# ExLlama
+parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
+parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
+parser.add_argument('--cfg-cache', action='store_true', help="ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama.")
+
+# DeepSpeed
+parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
+parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
+parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.')
+
+# RWKV
+parser.add_argument('--rwkv-strategy', type=str, default=None, help='RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8".')
+parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile the CUDA kernel for better performance.')
+
+# RoPE
+parser.add_argument('--alpha_value', type=float, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.")
+parser.add_argument('--rope_freq_base', type=int, default=0, help="If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63).")
+parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.")
+
+# Gradio
+parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
+parser.add_argument('--listen-host', type=str, help='The hostname that the server will use.')
+parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
+parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
+parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')
+parser.add_argument("--gradio-auth", type=str, help='set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None)
+parser.add_argument("--gradio-auth-path", type=str, help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"', default=None)
+parser.add_argument("--ssl-keyfile", type=str, help='The path to the SSL certificate key file.', default=None)
+parser.add_argument("--ssl-certfile", type=str, help='The path to the SSL certificate cert file.', default=None)
+
+# API
+parser.add_argument('--api', action='store_true', help='Enable the API extension.')
+parser.add_argument('--api-blocking-port', type=int, default=5000, help='The listening port for the blocking API.')
+parser.add_argument('--api-streaming-port', type=int, default=5005, help='The listening port for the streaming API.')
+parser.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudfare.')
+parser.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
+
+# Multimodal
+parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The multimodal pipeline to use. Examples: llava-7b, llava-13b.')
+
+args = parser.parse_args()
+args_defaults = parser.parse_args([])
+provided_arguments = []
+for arg in sys.argv[1:]:
+ arg = arg.lstrip('-').replace('-', '_')
+ if hasattr(args, arg):
+ provided_arguments.append(arg)
+
+# Deprecation warnings
+for k in ['chat', 'notebook', 'no_stream']:
+ if getattr(args, k):
+ logger.warning(f'The --{k} flag has been deprecated and will be removed soon. Please remove that flag.')
+
+# Security warnings
+if args.trust_remote_code:
+ logger.warning("trust_remote_code is enabled. This is dangerous.")
+if args.share:
+ logger.warning("The gradio \"share link\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.")
+if any((args.listen, args.share)) and not any((args.gradio_auth, args.gradio_auth_path)):
+ logger.warning("\nYou are potentially exposing the web UI to the entire internet without any access password.\nYou can create one with the \"--gradio-auth\" flag like this:\n\n--gradio-auth username:password\n\nMake sure to replace username:password with your own.")
+ if args.multi_user:
+ logger.warning("\nThe multi-user mode is highly experimental and should not be shared publicly.")
+
+
+def fix_loader_name(name):
+ if not name:
+ return name
+
+ name = name.lower()
+ if name in ['llamacpp', 'llama.cpp', 'llama-cpp', 'llama cpp']:
+ return 'llama.cpp'
+ if name in ['llamacpp_hf', 'llama.cpp_hf', 'llama-cpp-hf', 'llamacpp-hf', 'llama.cpp-hf']:
+ return 'llamacpp_HF'
+ elif name in ['transformers', 'huggingface', 'hf', 'hugging_face', 'hugging face']:
+ return 'Transformers'
+ elif name in ['autogptq', 'auto-gptq', 'auto_gptq', 'auto gptq']:
+ return 'AutoGPTQ'
+ elif name in ['gptq-for-llama', 'gptqforllama', 'gptqllama', 'gptq for llama', 'gptq_for_llama']:
+ return 'GPTQ-for-LLaMa'
+ elif name in ['exllama', 'ex-llama', 'ex_llama', 'exlama']:
+ return 'ExLlama'
+ elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']:
+ return 'ExLlama_HF'
+ elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2', 'exllama2', 'exllama-2']:
+ return 'ExLlamav2'
+ elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
+ return 'ExLlamav2_HF'
+ elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']:
+ return 'ctransformers'
+ elif name in ['autoawq', 'awq', 'auto-awq']:
+ return 'AutoAWQ'
+
+
+def add_extension(name):
+ if args.extensions is None:
+ args.extensions = [name]
+ elif 'api' not in args.extensions:
+ args.extensions.append(name)
+
+
+def is_chat():
+ return True
+
+
+args.loader = fix_loader_name(args.loader)
+
+# Activate the API extension
+if args.api or args.public_api:
+ add_extension('api')
+
+# Activate the multimodal extension
+if args.multimodal_pipeline is not None:
+ add_extension('multimodal')
+
+# Load model-specific settings
+with Path(f'{args.model_dir}/config.yaml') as p:
+ if p.exists():
+ model_config = yaml.safe_load(open(p, 'r').read())
+ else:
+ model_config = {}
+
+# Load custom model-specific settings
+with Path(f'{args.model_dir}/config-user.yaml') as p:
+ if p.exists():
+ user_config = yaml.safe_load(open(p, 'r').read())
+ else:
+ user_config = {}
+
+model_config = OrderedDict(model_config)
+user_config = OrderedDict(user_config)
diff --git a/modules/text_generation.py b/modules/text_generation.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f24dc58062697c2f7409ce791448392d8bc860a
--- /dev/null
+++ b/modules/text_generation.py
@@ -0,0 +1,397 @@
+import ast
+import copy
+import html
+import random
+import re
+import time
+import traceback
+
+import numpy as np
+import torch
+import transformers
+from transformers import LogitsProcessorList
+
+import modules.shared as shared
+from modules.callbacks import (
+ Iteratorize,
+ Stream,
+ _StopEverythingStoppingCriteria
+)
+from modules.extensions import apply_extensions
+from modules.grammar import GrammarLogitsProcessor
+from modules.html_generator import generate_4chan_html, generate_basic_html
+from modules.logging_colors import logger
+from modules.models import clear_torch_cache, local_rank
+
+
+def generate_reply(*args, **kwargs):
+ shared.generation_lock.acquire()
+ try:
+ for result in _generate_reply(*args, **kwargs):
+ yield result
+ finally:
+ shared.generation_lock.release()
+
+
+def _generate_reply(question, state, stopping_strings=None, is_chat=False, escape_html=False):
+
+ # Find the appropriate generation function
+ generate_func = apply_extensions('custom_generate_reply')
+ if generate_func is None:
+ if shared.model_name == 'None' or shared.model is None:
+ logger.error("No model is loaded! Select one in the Model tab.")
+ yield ''
+ return
+
+ if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'Exllamav2Model', 'CtransformersModel']:
+ generate_func = generate_reply_custom
+ else:
+ generate_func = generate_reply_HF
+
+ # Prepare the input
+ original_question = question
+ if not is_chat:
+ state = apply_extensions('state', state)
+ question = apply_extensions('input', question, state)
+
+ # Find the stopping strings
+ all_stop_strings = []
+ for st in (stopping_strings, ast.literal_eval(f"[{state['custom_stopping_strings']}]")):
+ if type(st) is list and len(st) > 0:
+ all_stop_strings += st
+
+ if shared.args.verbose:
+ print(f'\n\n{question}\n--------------------\n')
+
+ shared.stop_everything = False
+ clear_torch_cache()
+ seed = set_manual_seed(state['seed'])
+ last_update = -1
+ reply = ''
+ is_stream = state['stream']
+ if len(all_stop_strings) > 0 and not state['stream']:
+ state = copy.deepcopy(state)
+ state['stream'] = True
+
+ # Generate
+ for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat):
+ if escape_html:
+ reply = html.escape(reply)
+
+ reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
+ if is_stream:
+ cur_time = time.time()
+
+ # Maximum number of tokens/second
+ if state['max_tokens_second'] > 0:
+ diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
+ if diff > 0:
+ time.sleep(diff)
+
+ last_update = time.time()
+ yield reply
+
+ # Limit updates to 24 per second to not stress low latency networks
+ else:
+ if cur_time - last_update > 0.041666666666666664:
+ last_update = cur_time
+ yield reply
+
+ if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
+ break
+
+ if not is_chat:
+ reply = apply_extensions('output', reply, state)
+
+ yield reply
+
+
+def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):
+ if shared.tokenizer is None:
+ raise ValueError('No tokenizer is loaded')
+
+ if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'CtransformersModel', 'Exllamav2Model']:
+ input_ids = shared.tokenizer.encode(str(prompt))
+ if shared.model.__class__.__name__ not in ['Exllamav2Model']:
+ input_ids = np.array(input_ids).reshape(1, len(input_ids))
+ else:
+ input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
+
+ # This is a hack for making replies more creative.
+ if not add_bos_token and input_ids[0][0] == shared.tokenizer.bos_token_id:
+ input_ids = input_ids[:, 1:]
+
+ # Handling truncation
+ if truncation_length is not None:
+ input_ids = input_ids[:, -truncation_length:]
+
+ if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'Exllamav2Model', 'CtransformersModel'] or shared.args.cpu:
+ return input_ids
+ elif shared.args.deepspeed:
+ return input_ids.to(device=local_rank)
+ elif torch.backends.mps.is_available():
+ device = torch.device('mps')
+ return input_ids.to(device)
+ else:
+ return input_ids.cuda()
+
+
+def decode(output_ids, skip_special_tokens=True):
+ if shared.tokenizer is None:
+ raise ValueError('No tokenizer is loaded')
+
+ return shared.tokenizer.decode(output_ids, skip_special_tokens)
+
+
+def get_encoded_length(prompt):
+ length_after_extensions = apply_extensions('tokenized_length', prompt)
+ if length_after_extensions is not None:
+ return length_after_extensions
+
+ return len(encode(prompt)[0])
+
+
+def get_token_ids(prompt):
+ tokens = encode(prompt)[0]
+ decoded_tokens = [shared.tokenizer.decode([i]) for i in tokens]
+
+ output = ''
+ for row in list(zip(tokens, decoded_tokens)):
+ output += f"{str(int(row[0])).ljust(5)} - {repr(row[1])}\n"
+
+ return output
+
+
+def get_max_prompt_length(state):
+ return state['truncation_length'] - state['max_new_tokens']
+
+
+def generate_reply_wrapper(question, state, stopping_strings=None):
+ """
+ Returns formatted outputs for the UI
+ """
+ reply = question if not shared.is_seq2seq else ''
+ yield formatted_outputs(reply, shared.model_name)
+
+ for reply in generate_reply(question, state, stopping_strings, is_chat=False, escape_html=True):
+ if not shared.is_seq2seq:
+ reply = question + reply
+
+ yield formatted_outputs(reply, shared.model_name)
+
+
+def formatted_outputs(reply, model_name):
+ if any(s in model_name for s in ['gpt-4chan', 'gpt4chan']):
+ reply = fix_gpt4chan(reply)
+ return html.unescape(reply), generate_4chan_html(reply)
+ else:
+ return html.unescape(reply), generate_basic_html(reply)
+
+
+def fix_gpt4chan(s):
+ """
+ Removes empty replies from gpt4chan outputs
+ """
+ for i in range(10):
+ s = re.sub("--- [0-9]*\n>>[0-9]*\n---", "---", s)
+ s = re.sub("--- [0-9]*\n *\n---", "---", s)
+ s = re.sub("--- [0-9]*\n\n\n---", "---", s)
+
+ return s
+
+
+def fix_galactica(s):
+ """
+ Fix the LaTeX equations in GALACTICA
+ """
+ s = s.replace(r'\[', r'$')
+ s = s.replace(r'\]', r'$')
+ s = s.replace(r'\(', r'$')
+ s = s.replace(r'\)', r'$')
+ s = s.replace(r'$$', r'$')
+ s = re.sub(r'\n', r'\n\n', s)
+ s = re.sub(r"\n{3,}", "\n\n", s)
+ return s
+
+
+def get_reply_from_output_ids(output_ids, input_ids, original_question, state, is_chat=False):
+ if shared.is_seq2seq:
+ reply = decode(output_ids, state['skip_special_tokens'])
+ else:
+ new_tokens = len(output_ids) - len(input_ids[0])
+ reply = decode(output_ids[-new_tokens:], state['skip_special_tokens'])
+ # Prevent LlamaTokenizer from skipping a space
+ if type(shared.tokenizer) in [transformers.LlamaTokenizer, transformers.LlamaTokenizerFast] and len(output_ids) > 0:
+ if shared.tokenizer.convert_ids_to_tokens(int(output_ids[-new_tokens])).startswith('▁'):
+ reply = ' ' + reply
+
+ return reply
+
+
+def set_manual_seed(seed):
+ seed = int(seed)
+ if seed == -1:
+ seed = random.randint(1, 2**31)
+
+ torch.manual_seed(seed)
+ if torch.cuda.is_available():
+ torch.cuda.manual_seed_all(seed)
+
+ return seed
+
+
+def stop_everything_event():
+ shared.stop_everything = True
+
+
+def apply_stopping_strings(reply, all_stop_strings):
+ stop_found = False
+ for string in all_stop_strings:
+ idx = reply.find(string)
+ if idx != -1:
+ reply = reply[:idx]
+ stop_found = True
+ break
+
+ if not stop_found:
+ # If something like "\nYo" is generated just before "\nYou:"
+ # is completed, trim it
+ for string in all_stop_strings:
+ for j in range(len(string) - 1, 0, -1):
+ if reply[-j:] == string[:j]:
+ reply = reply[:-j]
+ break
+ else:
+ continue
+
+ break
+
+ return reply, stop_found
+
+
+def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False):
+ generate_params = {}
+ for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']:
+ generate_params[k] = state[k]
+
+ if state['negative_prompt'] != '':
+ generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
+
+ for k in ['epsilon_cutoff', 'eta_cutoff']:
+ if state[k] > 0:
+ generate_params[k] = state[k] * 1e-4
+
+ if state['ban_eos_token']:
+ generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]
+
+ if state['custom_token_bans']:
+ to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
+ if len(to_ban) > 0:
+ if generate_params.get('suppress_tokens', None):
+ generate_params['suppress_tokens'] += to_ban
+ else:
+ generate_params['suppress_tokens'] = to_ban
+
+ generate_params.update({'use_cache': not shared.args.no_cache})
+ if shared.args.deepspeed:
+ generate_params.update({'synced_gpus': True})
+
+ # Encode the input
+ input_ids = encode(question, add_bos_token=state['add_bos_token'], truncation_length=get_max_prompt_length(state))
+ output = input_ids[0]
+ cuda = not any((shared.args.cpu, shared.args.deepspeed))
+ if state['auto_max_new_tokens']:
+ generate_params['max_new_tokens'] = state['truncation_length'] - input_ids.shape[-1]
+
+ # Add the encoded tokens to generate_params
+ question, input_ids, inputs_embeds = apply_extensions('tokenizer', state, question, input_ids, None)
+ original_input_ids = input_ids
+ generate_params.update({'inputs': input_ids})
+ if inputs_embeds is not None:
+ generate_params.update({'inputs_embeds': inputs_embeds})
+
+ # Stopping criteria / eos token
+ eos_token_ids = [shared.tokenizer.eos_token_id] if shared.tokenizer.eos_token_id is not None else []
+ generate_params['eos_token_id'] = eos_token_ids
+ generate_params['stopping_criteria'] = transformers.StoppingCriteriaList()
+ generate_params['stopping_criteria'].append(_StopEverythingStoppingCriteria())
+
+ processor = state.get('logits_processor', LogitsProcessorList([]))
+ # In case a processor is passed by itself.
+ if not isinstance(processor, LogitsProcessorList):
+ processor = LogitsProcessorList([processor])
+ processor.append(GrammarLogitsProcessor(state['grammar_string']))
+ apply_extensions('logits_processor', processor, input_ids)
+ generate_params['logits_processor'] = processor
+
+ t0 = time.time()
+ try:
+ if not is_chat and not shared.is_seq2seq:
+ yield ''
+
+ # Generate the entire reply at once.
+ if not state['stream']:
+ with torch.no_grad():
+ output = shared.model.generate(**generate_params)[0]
+ if cuda:
+ output = output.cuda()
+
+ yield get_reply_from_output_ids(output, input_ids, original_question, state, is_chat=is_chat)
+
+ # Stream the reply 1 token at a time.
+ # This is based on the trick of using 'stopping_criteria' to create an iterator.
+ else:
+
+ def generate_with_callback(callback=None, *args, **kwargs):
+ kwargs['stopping_criteria'].append(Stream(callback_func=callback))
+ clear_torch_cache()
+ with torch.no_grad():
+ shared.model.generate(**kwargs)
+
+ def generate_with_streaming(**kwargs):
+ return Iteratorize(generate_with_callback, [], kwargs, callback=None)
+
+ with generate_with_streaming(**generate_params) as generator:
+ for output in generator:
+ if output[-1] in eos_token_ids:
+ break
+
+ yield get_reply_from_output_ids(output, input_ids, original_question, state, is_chat=is_chat)
+
+ except Exception:
+ traceback.print_exc()
+ finally:
+ t1 = time.time()
+ original_tokens = len(original_input_ids[0])
+ new_tokens = len(output) - (original_tokens if not shared.is_seq2seq else 0)
+ print(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {seed})')
+ return
+
+
+def generate_reply_custom(question, original_question, seed, state, stopping_strings=None, is_chat=False):
+ """
+ For models that do not use the transformers library for sampling
+ """
+ seed = set_manual_seed(state['seed'])
+
+ t0 = time.time()
+ reply = ''
+ try:
+ if not is_chat:
+ yield ''
+
+ if not state['stream']:
+ reply = shared.model.generate(question, state)
+ yield reply
+ else:
+ for reply in shared.model.generate_with_streaming(question, state):
+ yield reply
+
+ except Exception:
+ traceback.print_exc()
+ finally:
+ t1 = time.time()
+ original_tokens = len(encode(original_question)[0])
+ new_tokens = len(encode(original_question + reply)[0]) - original_tokens
+ print(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {seed})')
+ return
diff --git a/modules/training.py b/modules/training.py
new file mode 100644
index 0000000000000000000000000000000000000000..aaea2bd7e4aef6b2941cf5fa7193303f568f40d0
--- /dev/null
+++ b/modules/training.py
@@ -0,0 +1,739 @@
+import os
+
+os.environ["WANDB_MODE"] = "offline"
+# os.environ["WANDB_DISABLED"] = "true"
+
+import json
+import math
+import random
+import shutil
+import sys
+import threading
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+
+import gradio as gr
+import torch
+import transformers
+from datasets import Dataset, load_dataset
+from peft import (
+ LoraConfig,
+ get_peft_model,
+ prepare_model_for_kbit_training,
+ set_peft_model_state_dict
+)
+from peft.utils.other import \
+ TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING as model_to_lora_modules
+from transformers.models.auto.modeling_auto import (
+ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+)
+
+from modules import shared, ui, utils
+from modules.evaluate import (
+ calculate_perplexity,
+ generate_markdown_table,
+ save_past_evaluations
+)
+from modules.logging_colors import logger
+from modules.models import reload_model
+from modules.utils import natural_keys
+
+MODEL_CLASSES = {v[1]: v[0] for v in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.items()}
+PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit", "warmup_steps", "optimizer", "hard_cut_string", "train_only_after", "stop_at_loss", "add_eos_token", "min_chars", "report_to"]
+WANT_INTERRUPT = False
+
+train_log = {}
+train_template = {}
+
+
+def create_ui():
+ mu = shared.args.multi_user
+ with gr.Tab("Training", elem_id="training-tab"):
+ with gr.Tab('Train LoRA', elem_id='lora-train-tab'):
+ tmp = gr.State('')
+ with gr.Row():
+ with gr.Column():
+ gr.Markdown("[Tutorial](https://github.com/oobabooga/text-generation-webui/blob/main/docs/Training-LoRAs.md)")
+
+ with gr.Row():
+ copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=utils.get_available_loras(), elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(copy_from, lambda: None, lambda: {'choices': utils.get_available_loras()}, 'refresh-button', interactive=not mu)
+
+ with gr.Row():
+ with gr.Column(scale=5):
+ lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file')
+ with gr.Column():
+ always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
+
+ with gr.Row():
+ with gr.Column():
+ lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='Also called dimension count. Higher values = larger file, more content control. Smaller values = smaller file, less control. Use 4 or 8 for style, 128 or 256 to teach, 1024+ for fine-detail on big data. More VRAM is needed for higher ranks.')
+ lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
+ batch_size = gr.Slider(label='Batch Size', value=128, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
+ micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
+ cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.')
+
+ with gr.Column():
+ save_steps = gr.Number(label='Save every n steps', value=0, info='If above 0, a checkpoint of the LoRA will be saved every time this many steps pass.')
+
+ epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
+ learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='In scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
+ with gr.Row():
+ lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
+
+ with gr.Accordion(label='Advanced Options', open=False):
+ with gr.Row():
+ with gr.Column():
+ lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
+ stop_at_loss = gr.Slider(label='Stop at loss', minimum=0.0, maximum=3.0, step=0.1, value=0.00, info='The process will automatically stop once the desired loss value is reached. (reasonable numbers are 1.5-1.8)')
+ with gr.Row():
+ optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.', elem_classes=['slim-dropdown'])
+
+ with gr.Column():
+ warmup_steps = gr.Number(label='Warmup Steps', value=100, info='For this many steps at the start, the learning rate will be lower than normal. This helps the trainer prepare the model and precompute statistics to improve the quality of training after the start.')
+ train_only_after = gr.Textbox(label='Train Only After', value='', info='Only consider text *after* this string in any given chunk for training. For Alpaca datasets, use "### Response:" to only train the response and ignore the input.')
+
+ add_eos_token = gr.Checkbox(label='Add EOS token', value=False, info="Adds EOS token for each dataset item. In case of raw text, the EOS will be added at the Hard Cut")
+
+ higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
+ report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True)
+
+ with gr.Column():
+ with gr.Tab(label='Formatted Dataset'):
+ with gr.Row():
+ format = gr.Dropdown(choices=utils.get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('training/formats', 'json')}, 'refresh-button', interactive=not mu)
+
+ with gr.Row():
+ dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button', interactive=not mu)
+
+ with gr.Row():
+ eval_dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button', interactive=not mu)
+
+ eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.')
+
+ with gr.Tab(label="Raw text file"):
+ with gr.Row():
+ raw_text_file = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'txt')}, 'refresh-button', interactive=not mu)
+
+ with gr.Row():
+ with gr.Column():
+ overlap_len = gr.Slider(label='Overlap Length', minimum=0, maximum=512, value=128, step=16, info='How many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length). Setting overlap to exactly half the cutoff length may be ideal.')
+ newline_favor_len = gr.Slider(label='Prefer Newline Cut Length', minimum=0, maximum=512, value=128, step=16, info='Length (in characters, not tokens) of the maximum distance to shift an overlap cut by to ensure chunks cut at newlines. If too low, cuts may occur in the middle of lines.')
+
+ with gr.Column():
+ hard_cut_string = gr.Textbox(label='Hard Cut String', value='\\n\\n\\n', info='String that indicates a hard cut between text parts. Helps prevent unwanted overlap.')
+ min_chars = gr.Number(label='Ignore small blocks', value=0, info='Ignore Hard Cut blocks that have less or equal characters than this number')
+
+ with gr.Row():
+ start_button = gr.Button("Start LoRA Training", variant='primary', interactive=not mu)
+ stop_button = gr.Button("Interrupt", interactive=not mu)
+
+ output = gr.Markdown(value="Ready")
+
+ with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'):
+ with gr.Row():
+ with gr.Column():
+ models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True, interactive=not mu)
+ evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.', interactive=not mu)
+ with gr.Row():
+ with gr.Column():
+ stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
+
+ with gr.Column():
+ max_length = gr.Slider(label='max_length', minimum=0, maximum=32768, value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
+
+ with gr.Row():
+ start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu)
+ start_evaluation = gr.Button("Evaluate selected models", interactive=not mu)
+ stop_evaluation = gr.Button("Interrupt", interactive=not mu)
+
+ with gr.Column():
+ evaluation_log = gr.Markdown(value='')
+
+ evaluation_table = gr.Dataframe(value=generate_markdown_table(), interactive=True)
+ with gr.Row():
+ save_comments = gr.Button('Save comments', elem_classes="small-button", interactive=not mu)
+ refresh_table = gr.Button('Refresh the table', elem_classes="small-button", interactive=not mu)
+
+ # Training events
+ all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, higher_rank_limit, warmup_steps, optimizer, hard_cut_string, train_only_after, stop_at_loss, add_eos_token, min_chars, report_to]
+
+ copy_from.change(do_copy_params, [copy_from] + all_params, all_params)
+ start_button.click(do_train, all_params, output)
+ stop_button.click(do_interrupt, None, None, queue=False)
+ higher_rank_limit.change(change_rank_limit, [higher_rank_limit], [lora_rank, lora_alpha])
+
+ # Evaluation events. For some reason, the interrupt event
+ # doesn't work with the .then() syntax, so I write them one
+ # by one in this ugly but functional way.
+ ev = start_evaluation.click(calculate_perplexity, [models, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False)
+ start_evaluation.click(generate_markdown_table, None, evaluation_table, show_progress=False)
+
+ start_current_evaluation.click(lambda: ['current model'], None, tmp)
+ ev_cur = start_current_evaluation.click(calculate_perplexity, [tmp, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False)
+ start_current_evaluation.click(generate_markdown_table, None, evaluation_table, show_progress=False)
+
+ stop_evaluation.click(None, None, None, cancels=[ev, ev_cur], queue=False)
+ refresh_table.click(generate_markdown_table, None, evaluation_table, show_progress=True)
+ save_comments.click(
+ save_past_evaluations, evaluation_table, None).then(
+ lambda: "Comments saved.", None, evaluation_log, show_progress=False)
+
+
+def do_interrupt():
+ global WANT_INTERRUPT
+ WANT_INTERRUPT = True
+
+
+def do_copy_params(lora_name: str, *args):
+ f_name = f"{shared.args.lora_dir}/{clean_path(None, lora_name)}/training_parameters.json"
+ if Path(f_name).is_file():
+ with open(f_name, 'r', encoding='utf-8') as format_file:
+ params: dict[str, str] = json.load(format_file)
+ else:
+ params = {}
+
+ result = list()
+ for i in range(0, len(PARAMETERS)):
+ key = PARAMETERS[i]
+ if key in params:
+ result.append(params[key])
+ else:
+ result.append(args[i])
+
+ return result
+
+
+def change_rank_limit(use_higher_ranks: bool):
+ mult = 2 if use_higher_ranks else 1
+ return {"maximum": 1024 * mult, "__type__": "update"}, {"maximum": 2048 * mult, "__type__": "update"}
+
+
+def clean_path(base_path: str, path: str):
+ """Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
+ path = path.replace('\\', '/').replace('..', '_')
+ if base_path is None:
+ return path
+
+ return f'{Path(base_path).absolute()}/{path}'
+
+
+def backup_adapter(input_folder):
+ # Get the creation date of the file adapter_model.bin
+ try:
+ adapter_file = Path(f"{input_folder}/adapter_model.bin")
+ if adapter_file.is_file():
+
+ logger.info("Backing up existing LoRA adapter...")
+ creation_date = datetime.fromtimestamp(adapter_file.stat().st_ctime)
+ creation_date_str = creation_date.strftime("Backup-%Y-%m-%d")
+
+ # Create the new subfolder
+ subfolder_path = Path(f"{input_folder}/{creation_date_str}")
+ subfolder_path.mkdir(parents=True, exist_ok=True)
+
+ # Check if the file already exists in the subfolder
+ backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin")
+ if backup_adapter_file.is_file():
+ print(" - Backup already exists. Skipping backup process.")
+ return
+
+ # Copy existing files to the new subfolder
+ existing_files = Path(input_folder).iterdir()
+ for file in existing_files:
+ if file.is_file():
+ shutil.copy2(file, subfolder_path)
+ except Exception as e:
+ print("An error occurred in backup_adapter:", str(e))
+
+
+def calc_trainable_parameters(model):
+ trainable_params = 0
+ all_param = 0
+ for _, param in model.named_parameters():
+ num_params = param.numel()
+ # if using DS Zero 3 and the weights are initialized empty
+ if num_params == 0 and hasattr(param, "ds_numel"):
+ num_params = param.ds_numel
+
+ all_param += num_params
+ if param.requires_grad:
+ trainable_params += num_params
+
+ return trainable_params, all_param
+
+
+def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str):
+
+ if shared.args.monkey_patch:
+ from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
+ replace_peft_model_with_int4_lora_model
+ )
+ replace_peft_model_with_int4_lora_model()
+
+ global WANT_INTERRUPT
+ WANT_INTERRUPT = False
+
+ # == Input validation / processing ==
+ yield "Preparing the input..."
+ lora_file_path = clean_path(None, lora_name)
+ if lora_file_path.strip() == '':
+ yield "Missing or invalid LoRA file name input."
+ return
+
+ lora_file_path = f"{Path(shared.args.lora_dir)}/{lora_file_path}"
+ actual_lr = float(learning_rate)
+ model_type = type(shared.model).__name__
+
+ if model_type in MODEL_CLASSES:
+ model_id = MODEL_CLASSES[model_type]
+ else:
+ model_id = "llama"
+ if model_type == "PeftModelForCausalLM":
+ if len(shared.lora_names) > 0:
+ yield "You are trying to train a LoRA while you already have another LoRA loaded. This will work, but may have unexpected effects. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
+ logger.warning("Training LoRA over top of another LoRA. May have unexpected effects.")
+ else:
+ yield "Model ID not matched due to LoRA loading. Consider reloading base model. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
+ logger.warning("Model ID not matched due to LoRA loading. Consider reloading base model.")
+ else:
+ yield "LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. Unexpected errors may follow. *(Will continue anyway in 5 seconds, press `Interrupt` to stop.)*"
+ logger.warning(f"LoRA training has only currently been validated for LLaMA, OPT, GPT-J, and GPT-NeoX models. (Found model type: {model_type})")
+
+ time.sleep(5)
+
+ if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch:
+ yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`"
+ return
+
+ if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
+ yield "Cannot input zeroes."
+ return
+
+ gradient_accumulation_steps = batch_size // micro_batch_size
+ shared.tokenizer.pad_token_id = 0
+ shared.tokenizer.padding_side = "left"
+
+ def encode(text, add_bos_token):
+ result = shared.tokenizer.encode(text, truncation=True, max_length=cutoff_len)
+ # Check if the first two tokens are BOS
+ if len(result) >= 2 and result[:2] == [shared.tokenizer.bos_token_id, shared.tokenizer.bos_token_id]:
+ result = result[1:]
+
+ if not add_bos_token and result[0] == shared.tokenizer.bos_token_id:
+ result = result[1:]
+ return result
+
+ def tokenize(prompt, append_eos_token=False):
+
+ if train_only_after == '' or train_only_after not in prompt:
+ input_ids = encode(prompt, True)
+
+ if append_eos_token and input_ids[-1] != shared.tokenizer.eos_token_id and len(input_ids) < cutoff_len:
+ input_ids.append(shared.tokenizer.eos_token_id)
+
+ input_ids = [shared.tokenizer.pad_token_id] * (cutoff_len - len(input_ids)) + input_ids
+ labels = [1] * len(input_ids)
+
+ else:
+ ind = prompt.index(train_only_after) + len(train_only_after)
+ before_tokens = encode(prompt[:ind], True)
+ after_tokens = encode(prompt[ind:], False)
+
+ if append_eos_token and after_tokens[-1] != shared.tokenizer.eos_token_id:
+ after_tokens.append(shared.tokenizer.eos_token_id)
+
+ full_length = len(after_tokens) + len(before_tokens)
+ if full_length > cutoff_len:
+ after_tokens = after_tokens[:cutoff_len - len(before_tokens)]
+ else:
+ before_tokens = [shared.tokenizer.pad_token_id] * (cutoff_len - full_length) + before_tokens
+
+ input_ids = before_tokens + after_tokens
+ labels = [-100] * len(before_tokens) + [1] * len(after_tokens)
+
+ input_ids = torch.tensor(input_ids)
+ return {
+ "input_ids": input_ids,
+ "labels": labels,
+ "attention_mask": input_ids.ne(shared.tokenizer.pad_token_id),
+ }
+
+ train_template.clear()
+
+ # == Prep the dataset, format, etc ==
+ if raw_text_file not in ['None', '']:
+ train_template["template_type"] = "raw_text"
+ logger.info("Loading raw text file dataset...")
+ fullpath = clean_path('training/datasets', f'{raw_text_file}')
+ fullpath = Path(fullpath)
+ if fullpath.is_dir():
+ logger.info('Training path directory {}'.format(raw_text_file))
+ raw_text = ""
+ file_paths = sorted(fullpath.glob('*.txt'), key=lambda path: natural_keys(path.name))
+ for file_path in file_paths:
+ if file_path.is_file():
+ with file_path.open('r', encoding='utf-8') as file:
+ raw_text += file.read().replace('\r', '')
+
+ logger.info(f"Loaded training file: {file_path.name}")
+ else:
+ with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
+ raw_text = file.read().replace('\r', '')
+
+ cut_string = hard_cut_string.replace('\\n', '\n')
+ eos_added = 0
+ out_tokens = []
+ for text_part in raw_text.split(cut_string):
+ if len(text_part.strip()) <= min_chars:
+ continue
+
+ tokens = shared.tokenizer.encode(text_part)
+ if add_eos_token:
+ tokens.append(shared.tokenizer.eos_token_id)
+ eos_added += 1
+
+ step = cutoff_len - overlap_len
+ if step <= 0:
+ yield f"Error: overlap_len ({overlap_len}) cannot be greater than or equal to cutoff_len ({cutoff_len})"
+ return
+
+ out_tokens.extend(split_chunks(tokens, cutoff_len, step))
+
+ if eos_added > 0:
+ print(f"EOS added to {eos_added} text blocks")
+
+ del raw_text # Note: could be a gig for a large dataset, so delete redundant data as we go to be safe on RAM
+ text_chunks = [shared.tokenizer.decode(x) for x in out_tokens]
+ del out_tokens
+ if newline_favor_len > 0:
+ text_chunks = [cut_chunk_for_newline(x, newline_favor_len) for x in text_chunks]
+
+ train_data = Dataset.from_list([tokenize(x) for x in text_chunks])
+ del text_chunks
+ eval_data = None
+ else:
+ if dataset in ['None', '']:
+ yield "Missing dataset choice input, cannot continue."
+ return
+
+ if format in ['None', '']:
+ yield "Missing format choice input, cannot continue."
+ return
+
+ train_template["template_type"] = "dataset"
+
+ with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
+ format_data: dict[str, str] = json.load(formatFile)
+
+ # == store training prompt ==
+ for _, value in format_data.items():
+ prompt_key = f"template_{len(train_template)}"
+ train_template[prompt_key] = value
+
+ def generate_prompt(data_point: dict[str, str]):
+ for options, data in format_data.items():
+ if set(options.split(',')) == set(x[0] for x in data_point.items() if (type(x[1]) is str and len(x[1].strip()) > 0)):
+ for key, val in data_point.items():
+ if type(val) is str:
+ data = data.replace(f'%{key}%', val)
+ return data
+ raise RuntimeError(f'Data-point "{data_point}" has no keyset match within format "{list(format_data.keys())}"')
+
+ def generate_and_tokenize_prompt(data_point):
+ prompt = generate_prompt(data_point)
+ return tokenize(prompt, add_eos_token)
+
+ logger.info("Loading JSON datasets...")
+ data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
+ train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
+
+ if eval_dataset == 'None':
+ eval_data = None
+ else:
+ eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
+ eval_data = eval_data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
+
+ # == We MUST reload model if it went through any previous training, even failed one ==
+ if shared.model_dirty_from_training:
+ selected_model = shared.model_name
+ if selected_model:
+ print("\033[1;31;1m(Model has been modified by previous training, it needs to be reloaded...)\033[0;37;0m")
+ try:
+ yield f"Reloading {selected_model}..."
+ reload_model()
+ if shared.model is not None:
+ print("Model reloaded OK, continue with training.")
+ else:
+ return f"Failed to load {selected_model}."
+ except:
+ exc = traceback.format_exc()
+ logger.error('Failed to reload the model.')
+ print(exc)
+ return exc.replace('\n', '\n\n')
+
+ # == Start prepping the model itself ==
+ if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
+ logger.info("Getting model ready...")
+ prepare_model_for_kbit_training(shared.model)
+
+ # base model is now frozen and should not be reused for any other LoRA training than this one
+ shared.model_dirty_from_training = True
+
+ logger.info("Preparing for training...")
+ config = LoraConfig(
+ r=lora_rank,
+ lora_alpha=lora_alpha,
+ target_modules=model_to_lora_modules[model_id],
+ lora_dropout=lora_dropout,
+ bias="none",
+ task_type="CAUSAL_LM"
+ )
+
+ # == Backup the existing adapter ==
+ if not always_override:
+ backup_adapter(lora_file_path)
+
+ # == get model trainable params
+ model_trainable_params, model_all_params = calc_trainable_parameters(shared.model)
+
+ try:
+ logger.info("Creating LoRA model...")
+ lora_model = get_peft_model(shared.model, config)
+ if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
+ logger.info("Loading existing LoRA data...")
+ state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
+ set_peft_model_state_dict(lora_model, state_dict_peft)
+ except:
+ yield traceback.format_exc().replace('\n', '\n\n')
+ return
+
+ if shared.args.monkey_patch:
+ from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear
+ from alpaca_lora_4bit.models import Linear4bitLt
+ for _, m in lora_model.named_modules():
+ if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
+ if m.is_v1_model:
+ m.zeros = m.zeros.half()
+ m.scales = m.scales.half()
+
+ class Tracked():
+ def __init__(self):
+ self.current_steps = 0
+ self.max_steps = 0
+ self.did_save = False
+
+ tracked = Tracked()
+ actual_save_steps = math.ceil(save_steps / gradient_accumulation_steps)
+
+ class Callbacks(transformers.TrainerCallback):
+ def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
+ tracked.current_steps = state.global_step * gradient_accumulation_steps
+ tracked.max_steps = state.max_steps * gradient_accumulation_steps
+ if WANT_INTERRUPT:
+ control.should_epoch_stop = True
+ control.should_training_stop = True
+ elif state.global_step > 0 and actual_save_steps > 0 and state.global_step % actual_save_steps == 0:
+ lora_model.save_pretrained(f"{lora_file_path}/checkpoint-{tracked.current_steps}/")
+ # Save log
+ with open(f"{lora_file_path}/checkpoint-{tracked.current_steps}/training_log.json", 'w', encoding='utf-8') as file:
+ json.dump(train_log, file, indent=2)
+ # == Save training prompt ==
+ with open(f"{lora_file_path}/checkpoint-{tracked.current_steps}/training_prompt.json", 'w', encoding='utf-8') as file:
+ json.dump(train_template, file, indent=2)
+
+ def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
+ tracked.current_steps += 1
+ if WANT_INTERRUPT:
+ control.should_epoch_stop = True
+ control.should_training_stop = True
+
+ def on_log(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, logs, **kwargs):
+ train_log.update(logs)
+ train_log.update({"current_steps": tracked.current_steps})
+ if WANT_INTERRUPT:
+ print("\033[1;31;1mInterrupted by user\033[0;37;0m")
+
+ print(f"\033[1;30;40mStep: {tracked.current_steps} \033[0;37;0m", end='')
+ if 'loss' in logs:
+ loss = float(logs['loss'])
+ if loss <= stop_at_loss:
+ control.should_epoch_stop = True
+ control.should_training_stop = True
+ print(f"\033[1;31;1mStop Loss {stop_at_loss} reached.\033[0;37;0m")
+
+ trainer = transformers.Trainer(
+ model=lora_model,
+ train_dataset=train_data,
+ eval_dataset=eval_data,
+ args=transformers.TrainingArguments(
+ report_to=report_to if report_to != "None" else None,
+ per_device_train_batch_size=micro_batch_size,
+ gradient_accumulation_steps=gradient_accumulation_steps,
+ warmup_steps=math.ceil(warmup_steps / gradient_accumulation_steps),
+ num_train_epochs=epochs,
+ learning_rate=actual_lr,
+ fp16=False if shared.args.cpu else True,
+ optim=optimizer,
+ logging_steps=2 if stop_at_loss > 0 else 5,
+ evaluation_strategy="steps" if eval_data is not None else "no",
+ eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,
+ save_strategy="steps" if eval_data is not None else "no",
+ output_dir=lora_file_path,
+ lr_scheduler_type=lr_scheduler_type,
+ load_best_model_at_end=eval_data is not None,
+ # TODO: Enable multi-device support
+ ddp_find_unused_parameters=None,
+ no_cuda=shared.args.cpu,
+ ),
+ data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
+ callbacks=list([Callbacks()])
+ )
+
+ lora_model.config.use_cache = False
+
+ if torch.__version__ >= "2" and sys.platform != "win32":
+ lora_model = torch.compile(lora_model)
+
+ # == Save parameters for reuse ==
+ with open(f"{lora_file_path}/training_parameters.json", 'w', encoding='utf-8') as file:
+ vars = locals()
+ json.dump({x: vars[x] for x in PARAMETERS}, file, indent=2)
+
+ # == Save training prompt ==
+ with open(f"{lora_file_path}/training_prompt.json", 'w', encoding='utf-8') as file:
+ json.dump(train_template, file, indent=2)
+
+ # == Main run and monitor loop ==
+ logger.info("Starting training...")
+ yield "Starting..."
+
+ lora_trainable_param, lora_all_param = calc_trainable_parameters(lora_model)
+
+ projections_string = ", ".join([projection.replace("_proj", "") for projection in model_to_lora_modules[model_id]])
+
+ print(f"Training '{model_id}' model using ({projections_string}) projections")
+
+ if lora_all_param > 0:
+ print(f"Trainable params: {lora_trainable_param:,d} ({100 * lora_trainable_param / lora_all_param:.4f} %), All params: {lora_all_param:,d} (Model: {model_all_params:,d})")
+
+ train_log.update({"base_model_name": shared.model_name})
+ train_log.update({"base_model_class": shared.model.__class__.__name__})
+ train_log.update({"base_loaded_in_4bit": getattr(lora_model, "is_loaded_in_4bit", False)})
+ train_log.update({"base_loaded_in_8bit": getattr(lora_model, "is_loaded_in_8bit", False)})
+ train_log.update({"projections": projections_string})
+
+ if stop_at_loss > 0:
+ print(f"Monitoring loss \033[1;31;1m(Auto-Stop at: {stop_at_loss})\033[0;37;0m")
+
+ if WANT_INTERRUPT:
+ yield "Interrupted before start."
+ return
+
+ def log_train_dataset(trainer):
+ decoded_entries = []
+ # Try to decode the entries and write the log file
+ try:
+ # Iterate over the first 10 elements in the dataset (or fewer if there are less than 10)
+ for i in range(min(10, len(trainer.train_dataset))):
+ decoded_text = shared.tokenizer.decode(trainer.train_dataset[i]['input_ids'])
+ decoded_entries.append({"value": decoded_text})
+
+ # Write the log file
+ Path('logs').mkdir(exist_ok=True)
+ with open(Path('logs/train_dataset_sample.json'), 'w') as json_file:
+ json.dump(decoded_entries, json_file, indent=4)
+
+ logger.info("Log file 'train_dataset_sample.json' created in the 'logs' directory.")
+ except Exception as e:
+ logger.error(f"Failed to create log file due to error: {e}")
+
+ def threaded_run():
+ log_train_dataset(trainer)
+ trainer.train()
+ # Note: save in the thread in case the gradio thread breaks (eg browser closed)
+ lora_model.save_pretrained(lora_file_path)
+ logger.info("LoRA training run is completed and saved.")
+ # Save log
+ with open(f"{lora_file_path}/training_log.json", 'w', encoding='utf-8') as file:
+ json.dump(train_log, file, indent=2)
+
+ thread = threading.Thread(target=threaded_run)
+ thread.start()
+ last_step = 0
+ start_time = time.perf_counter()
+
+ while thread.is_alive():
+ time.sleep(0.5)
+ if WANT_INTERRUPT:
+ yield "Interrupting, please wait... *(Run will stop after the current training step completes.)*"
+
+ elif tracked.current_steps != last_step:
+ last_step = tracked.current_steps
+ time_elapsed = time.perf_counter() - start_time
+ if time_elapsed <= 0:
+ timer_info = ""
+ total_time_estimate = 999
+ else:
+ its = tracked.current_steps / time_elapsed
+ if its > 1:
+ timer_info = f"`{its:.2f}` it/s"
+ else:
+ timer_info = f"`{1.0/its:.2f}` s/it"
+
+ total_time_estimate = (1.0 / its) * (tracked.max_steps)
+
+ yield f"Running... **{tracked.current_steps}** / **{tracked.max_steps}** ... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining"
+
+ # Saving in the train thread might fail if an error occurs, so save here if so.
+ if not tracked.did_save:
+ logger.info("Training complete, saving...")
+ lora_model.save_pretrained(lora_file_path)
+
+ if WANT_INTERRUPT:
+ logger.info("Training interrupted.")
+ yield f"Interrupted. Incomplete LoRA saved to `{lora_file_path}`."
+ else:
+ logger.info("Training complete!")
+ yield f"Done! LoRA saved to `{lora_file_path}`.\n\nBefore testing your new LoRA, make sure to first reload the model, as it is currently dirty from training."
+
+
+def split_chunks(arr, size, step):
+ for i in range(0, len(arr), step):
+ yield arr[i:i + size]
+
+
+def cut_chunk_for_newline(chunk: str, max_length: int):
+ if '\n' not in chunk:
+ return chunk
+
+ first_newline = chunk.index('\n')
+ if first_newline < max_length:
+ chunk = chunk[first_newline + 1:]
+
+ if '\n' not in chunk:
+ return chunk
+
+ last_newline = chunk.rindex('\n')
+ if len(chunk) - last_newline < max_length:
+ chunk = chunk[:last_newline]
+
+ return chunk
+
+
+def format_time(seconds: float):
+ if seconds < 120:
+ return f"`{seconds:.0f}` seconds"
+
+ minutes = seconds / 60
+ if minutes < 120:
+ return f"`{minutes:.0f}` minutes"
+
+ hours = minutes / 60
+ return f"`{hours:.0f}` hours"
diff --git a/modules/ui.py b/modules/ui.py
new file mode 100644
index 0000000000000000000000000000000000000000..875e4e2498a1ac6e53e0f80e43e400d7b7396e7e
--- /dev/null
+++ b/modules/ui.py
@@ -0,0 +1,230 @@
+import copy
+from pathlib import Path
+
+import gradio as gr
+import torch
+import yaml
+
+from modules import shared
+
+
+with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
+ css = f.read()
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+ css += f.read()
+with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
+ js = f.read()
+with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
+ save_files_js = f.read()
+with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
+ switch_tabs_js = f.read()
+with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
+ show_controls_js = f.read()
+
+refresh_symbol = '🔄'
+delete_symbol = '🗑️'
+save_symbol = '💾'
+
+theme = gr.themes.Default(
+ font=['Noto Sans', 'Helvetica', 'ui-sans-serif', 'system-ui', 'sans-serif'],
+ font_mono=['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'],
+).set(
+ border_color_primary='#c5c5d2',
+ button_large_padding='6px 12px',
+ body_text_color_subdued='#484848',
+ background_fill_secondary='#eaeaea'
+)
+
+if Path("notification.mp3").exists():
+ audio_notification_js = "document.querySelector('#audio_notification audio')?.play();"
+else:
+ audio_notification_js = ""
+
+
+def list_model_elements():
+ elements = [
+ 'loader',
+ 'filter_by_loader',
+ 'cpu_memory',
+ 'auto_devices',
+ 'disk',
+ 'cpu',
+ 'bf16',
+ 'load_in_8bit',
+ 'trust_remote_code',
+ 'use_fast',
+ 'load_in_4bit',
+ 'compute_dtype',
+ 'quant_type',
+ 'use_double_quant',
+ 'wbits',
+ 'groupsize',
+ 'model_type',
+ 'pre_layer',
+ 'triton',
+ 'desc_act',
+ 'no_inject_fused_attention',
+ 'no_inject_fused_mlp',
+ 'no_use_cuda_fp16',
+ 'disable_exllama',
+ 'cfg_cache',
+ 'threads',
+ 'threads_batch',
+ 'n_batch',
+ 'no_mmap',
+ 'mlock',
+ 'mul_mat_q',
+ 'n_gpu_layers',
+ 'tensor_split',
+ 'n_ctx',
+ 'llama_cpp_seed',
+ 'gpu_split',
+ 'max_seq_len',
+ 'compress_pos_emb',
+ 'alpha_value',
+ 'rope_freq_base',
+ 'numa',
+ ]
+
+ for i in range(torch.cuda.device_count()):
+ elements.append(f'gpu_memory_{i}')
+
+ return elements
+
+
+def list_interface_input_elements():
+ elements = [
+ 'max_new_tokens',
+ 'auto_max_new_tokens',
+ 'max_tokens_second',
+ 'seed',
+ 'temperature',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'repetition_penalty',
+ 'repetition_penalty_range',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'min_length',
+ 'do_sample',
+ 'penalty_alpha',
+ 'num_beams',
+ 'length_penalty',
+ 'early_stopping',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'grammar_string',
+ 'negative_prompt',
+ 'guidance_scale',
+ 'add_bos_token',
+ 'ban_eos_token',
+ 'custom_token_bans',
+ 'truncation_length',
+ 'custom_stopping_strings',
+ 'skip_special_tokens',
+ 'stream',
+ 'tfs',
+ 'top_a',
+ ]
+
+ # Chat elements
+ elements += [
+ 'textbox',
+ 'start_with',
+ 'character_menu',
+ 'history',
+ 'name1',
+ 'name2',
+ 'greeting',
+ 'context',
+ 'mode',
+ 'instruction_template',
+ 'name1_instruct',
+ 'name2_instruct',
+ 'context_instruct',
+ 'turn_template',
+ 'chat_style',
+ 'chat-instruct_command',
+ ]
+
+ # Notebook/default elements
+ elements += [
+ 'textbox-notebook',
+ 'textbox-default',
+ 'output_textbox',
+ 'prompt_menu-default',
+ 'prompt_menu-notebook',
+ ]
+
+ # Model elements
+ elements += list_model_elements()
+
+ return elements
+
+
+def gather_interface_values(*args):
+ output = {}
+ for i, element in enumerate(list_interface_input_elements()):
+ output[element] = args[i]
+
+ if not shared.args.multi_user:
+ shared.persistent_interface_state = output
+
+ return output
+
+
+def apply_interface_values(state, use_persistent=False):
+ if use_persistent:
+ state = shared.persistent_interface_state
+
+ elements = list_interface_input_elements()
+ if len(state) == 0:
+ return [gr.update() for k in elements] # Dummy, do nothing
+ else:
+ return [state[k] if k in state else gr.update() for k in elements]
+
+
+def save_settings(state, preset, instruction_template, extensions, show_controls):
+ output = copy.deepcopy(shared.settings)
+ exclude = ['name2', 'greeting', 'context', 'turn_template']
+ for k in state:
+ if k in shared.settings and k not in exclude:
+ output[k] = state[k]
+
+ output['preset'] = preset
+ output['prompt-default'] = state['prompt_menu-default']
+ output['prompt-notebook'] = state['prompt_menu-notebook']
+ output['character'] = state['character_menu']
+ output['instruction_template'] = instruction_template
+ output['default_extensions'] = extensions
+ output['seed'] = int(output['seed'])
+ output['show_controls'] = show_controls
+
+ return yaml.dump(output, sort_keys=False, width=float("inf"))
+
+
+def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
+ """
+ Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
+ """
+ def refresh():
+ refresh_method()
+ args = refreshed_args() if callable(refreshed_args) else refreshed_args
+
+ for k, v in args.items():
+ setattr(refresh_component, k, v)
+
+ return gr.update(**(args or {}))
+
+ refresh_button = gr.Button(refresh_symbol, elem_classes=elem_class, interactive=interactive)
+ refresh_button.click(
+ fn=refresh,
+ inputs=[],
+ outputs=[refresh_component]
+ )
+
+ return refresh_button
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
new file mode 100644
index 0000000000000000000000000000000000000000..95515e166ceff6d9b1539f350357ba6de7930bc7
--- /dev/null
+++ b/modules/ui_chat.py
@@ -0,0 +1,352 @@
+import json
+from functools import partial
+from pathlib import Path
+
+import gradio as gr
+from PIL import Image
+
+from modules import chat, prompts, shared, ui, utils
+from modules.html_generator import chat_html_wrapper
+from modules.text_generation import stop_everything_event
+from modules.utils import gradio
+
+inputs = ('Chat input', 'interface_state')
+reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style')
+clear_arr = ('delete_chat-confirm', 'delete_chat', 'delete_chat-cancel')
+
+
+def create_ui():
+ mu = shared.args.multi_user
+
+ shared.gradio['Chat input'] = gr.State()
+ shared.gradio['dummy'] = gr.State()
+ shared.gradio['history'] = gr.State({'internal': [], 'visible': []})
+
+ with gr.Tab('Chat', elem_id='chat-tab', elem_classes=("old-ui" if shared.args.chat_buttons else None)):
+ with gr.Row():
+ with gr.Column(elem_id='chat-col'):
+ shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat'))
+
+ with gr.Row(elem_id="chat-input-row"):
+ with gr.Column(scale=1, elem_id='gr-hover-container'):
+ gr.HTML(value='