antonovmaxim commited on
Commit
6926a80
1 Parent(s): 840dd12
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +5 -5
  2. requirements.txt +4 -0
  3. text-generation-webui/.gitignore +21 -0
  4. text-generation-webui/LICENSE +661 -0
  5. text-generation-webui/README.md +196 -0
  6. text-generation-webui/api-example-stream.py +90 -0
  7. text-generation-webui/api-example.py +59 -0
  8. text-generation-webui/characters/Example.json +7 -0
  9. text-generation-webui/characters/Example.png +0 -0
  10. text-generation-webui/convert-to-flexgen.py +60 -0
  11. text-generation-webui/convert-to-safetensors.py +38 -0
  12. text-generation-webui/download-model.py +176 -0
  13. text-generation-webui/extensions/character_bias/script.py +42 -0
  14. text-generation-webui/extensions/elevenlabs_tts/requirements.txt +3 -0
  15. text-generation-webui/extensions/elevenlabs_tts/script.py +113 -0
  16. text-generation-webui/extensions/gallery/script.py +82 -0
  17. text-generation-webui/extensions/google_translate/requirements.txt +1 -0
  18. text-generation-webui/extensions/google_translate/script.py +42 -0
  19. text-generation-webui/extensions/llama_prompts/script.py +18 -0
  20. text-generation-webui/extensions/send_pictures/script.py +46 -0
  21. text-generation-webui/extensions/silero_tts/requirements.txt +6 -0
  22. text-generation-webui/extensions/silero_tts/script.py +169 -0
  23. text-generation-webui/models/place-your-models-here.txt +0 -0
  24. text-generation-webui/modules/GPTQ_loader.py +71 -0
  25. text-generation-webui/modules/RWKV.py +74 -0
  26. text-generation-webui/modules/callbacks.py +98 -0
  27. text-generation-webui/modules/chat.py +398 -0
  28. text-generation-webui/modules/deepspeed_parameters.py +75 -0
  29. text-generation-webui/modules/extensions.py +45 -0
  30. text-generation-webui/modules/html_generator.py +357 -0
  31. text-generation-webui/modules/models.py +168 -0
  32. text-generation-webui/modules/shared.py +103 -0
  33. text-generation-webui/modules/text_generation.py +238 -0
  34. text-generation-webui/modules/ui.py +92 -0
  35. text-generation-webui/presets/Contrastive Search.txt +3 -0
  36. text-generation-webui/presets/Debug-deterministic.txt +1 -0
  37. text-generation-webui/presets/Default.txt +12 -0
  38. text-generation-webui/presets/Individual Today.txt +6 -0
  39. text-generation-webui/presets/Kobold-Godlike.txt +6 -0
  40. text-generation-webui/presets/Kobold-Liminal Drift.txt +6 -0
  41. text-generation-webui/presets/Naive.txt +4 -0
  42. text-generation-webui/presets/NovelAI-Best Guess.txt +6 -0
  43. text-generation-webui/presets/NovelAI-Decadence.txt +6 -0
  44. text-generation-webui/presets/NovelAI-Genesis.txt +6 -0
  45. text-generation-webui/presets/NovelAI-Lycaenidae.txt +6 -0
  46. text-generation-webui/presets/NovelAI-Ouroboros.txt +6 -0
  47. text-generation-webui/presets/NovelAI-Pleasing Results.txt +6 -0
  48. text-generation-webui/presets/NovelAI-Sphinx Moth.txt +6 -0
  49. text-generation-webui/presets/NovelAI-Storywriter.txt +6 -0
  50. text-generation-webui/presets/Pygmalion.txt +6 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  title: Text Generation Webui Space
3
- emoji:
4
- colorFrom: gray
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.20.1
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Text Generation Webui Space
3
+ emoji: 🏃
4
+ colorFrom: yellow
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 3.20.1
8
+ app_file: text-generation-webui\server.py
9
  pinned: false
10
  license: mit
11
  ---
12
 
13
+ Check out this repo https://github.com/oobabooga/text-generation-webui
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pytorch
2
+ torchvision
3
+ torchaudio
4
+ git
text-generation-webui/.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cache/*
2
+ characters/*
3
+ extensions/silero_tts/outputs/*
4
+ extensions/elevenlabs_tts/outputs/*
5
+ logs/*
6
+ models/*
7
+ softprompts/*
8
+ torch-dumps/*
9
+ *pycache*
10
+ */*pycache*
11
+ */*/pycache*
12
+
13
+ settings.json
14
+ img_bot*
15
+ img_me*
16
+
17
+ !characters/Example.json
18
+ !characters/Example.png
19
+ !models/place-your-models-here.txt
20
+ !softprompts/place-your-softprompts-here.txt
21
+ !torch-dumps/place-your-pt-models-here.txt
text-generation-webui/LICENSE ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU AFFERO GENERAL PUBLIC LICENSE
2
+ Version 3, 19 November 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU Affero General Public License is a free, copyleft license for
11
+ software and other kinds of works, specifically designed to ensure
12
+ cooperation with the community in the case of network server software.
13
+
14
+ The licenses for most software and other practical works are designed
15
+ to take away your freedom to share and change the works. By contrast,
16
+ our General Public Licenses are intended to guarantee your freedom to
17
+ share and change all versions of a program--to make sure it remains free
18
+ software for all its users.
19
+
20
+ When we speak of free software, we are referring to freedom, not
21
+ price. Our General Public Licenses are designed to make sure that you
22
+ have the freedom to distribute copies of free software (and charge for
23
+ them if you wish), that you receive source code or can get it if you
24
+ want it, that you can change the software or use pieces of it in new
25
+ free programs, and that you know you can do these things.
26
+
27
+ Developers that use our General Public Licenses protect your rights
28
+ with two steps: (1) assert copyright on the software, and (2) offer
29
+ you this License which gives you legal permission to copy, distribute
30
+ and/or modify the software.
31
+
32
+ A secondary benefit of defending all users' freedom is that
33
+ improvements made in alternate versions of the program, if they
34
+ receive widespread use, become available for other developers to
35
+ incorporate. Many developers of free software are heartened and
36
+ encouraged by the resulting cooperation. However, in the case of
37
+ software used on network servers, this result may fail to come about.
38
+ The GNU General Public License permits making a modified version and
39
+ letting the public access it on a server without ever releasing its
40
+ source code to the public.
41
+
42
+ The GNU Affero General Public License is designed specifically to
43
+ ensure that, in such cases, the modified source code becomes available
44
+ to the community. It requires the operator of a network server to
45
+ provide the source code of the modified version running there to the
46
+ users of that server. Therefore, public use of a modified version, on
47
+ a publicly accessible server, gives the public access to the source
48
+ code of the modified version.
49
+
50
+ An older license, called the Affero General Public License and
51
+ published by Affero, was designed to accomplish similar goals. This is
52
+ a different license, not a version of the Affero GPL, but Affero has
53
+ released a new version of the Affero GPL which permits relicensing under
54
+ this license.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ TERMS AND CONDITIONS
60
+
61
+ 0. Definitions.
62
+
63
+ "This License" refers to version 3 of the GNU Affero General Public License.
64
+
65
+ "Copyright" also means copyright-like laws that apply to other kinds of
66
+ works, such as semiconductor masks.
67
+
68
+ "The Program" refers to any copyrightable work licensed under this
69
+ License. Each licensee is addressed as "you". "Licensees" and
70
+ "recipients" may be individuals or organizations.
71
+
72
+ To "modify" a work means to copy from or adapt all or part of the work
73
+ in a fashion requiring copyright permission, other than the making of an
74
+ exact copy. The resulting work is called a "modified version" of the
75
+ earlier work or a work "based on" the earlier work.
76
+
77
+ A "covered work" means either the unmodified Program or a work based
78
+ on the Program.
79
+
80
+ To "propagate" a work means to do anything with it that, without
81
+ permission, would make you directly or secondarily liable for
82
+ infringement under applicable copyright law, except executing it on a
83
+ computer or modifying a private copy. Propagation includes copying,
84
+ distribution (with or without modification), making available to the
85
+ public, and in some countries other activities as well.
86
+
87
+ To "convey" a work means any kind of propagation that enables other
88
+ parties to make or receive copies. Mere interaction with a user through
89
+ a computer network, with no transfer of a copy, is not conveying.
90
+
91
+ An interactive user interface displays "Appropriate Legal Notices"
92
+ to the extent that it includes a convenient and prominently visible
93
+ feature that (1) displays an appropriate copyright notice, and (2)
94
+ tells the user that there is no warranty for the work (except to the
95
+ extent that warranties are provided), that licensees may convey the
96
+ work under this License, and how to view a copy of this License. If
97
+ the interface presents a list of user commands or options, such as a
98
+ menu, a prominent item in the list meets this criterion.
99
+
100
+ 1. Source Code.
101
+
102
+ The "source code" for a work means the preferred form of the work
103
+ for making modifications to it. "Object code" means any non-source
104
+ form of a work.
105
+
106
+ A "Standard Interface" means an interface that either is an official
107
+ standard defined by a recognized standards body, or, in the case of
108
+ interfaces specified for a particular programming language, one that
109
+ is widely used among developers working in that language.
110
+
111
+ The "System Libraries" of an executable work include anything, other
112
+ than the work as a whole, that (a) is included in the normal form of
113
+ packaging a Major Component, but which is not part of that Major
114
+ Component, and (b) serves only to enable use of the work with that
115
+ Major Component, or to implement a Standard Interface for which an
116
+ implementation is available to the public in source code form. A
117
+ "Major Component", in this context, means a major essential component
118
+ (kernel, window system, and so on) of the specific operating system
119
+ (if any) on which the executable work runs, or a compiler used to
120
+ produce the work, or an object code interpreter used to run it.
121
+
122
+ The "Corresponding Source" for a work in object code form means all
123
+ the source code needed to generate, install, and (for an executable
124
+ work) run the object code and to modify the work, including scripts to
125
+ control those activities. However, it does not include the work's
126
+ System Libraries, or general-purpose tools or generally available free
127
+ programs which are used unmodified in performing those activities but
128
+ which are not part of the work. For example, Corresponding Source
129
+ includes interface definition files associated with source files for
130
+ the work, and the source code for shared libraries and dynamically
131
+ linked subprograms that the work is specifically designed to require,
132
+ such as by intimate data communication or control flow between those
133
+ subprograms and other parts of the work.
134
+
135
+ The Corresponding Source need not include anything that users
136
+ can regenerate automatically from other parts of the Corresponding
137
+ Source.
138
+
139
+ The Corresponding Source for a work in source code form is that
140
+ same work.
141
+
142
+ 2. Basic Permissions.
143
+
144
+ All rights granted under this License are granted for the term of
145
+ copyright on the Program, and are irrevocable provided the stated
146
+ conditions are met. This License explicitly affirms your unlimited
147
+ permission to run the unmodified Program. The output from running a
148
+ covered work is covered by this License only if the output, given its
149
+ content, constitutes a covered work. This License acknowledges your
150
+ rights of fair use or other equivalent, as provided by copyright law.
151
+
152
+ You may make, run and propagate covered works that you do not
153
+ convey, without conditions so long as your license otherwise remains
154
+ in force. You may convey covered works to others for the sole purpose
155
+ of having them make modifications exclusively for you, or provide you
156
+ with facilities for running those works, provided that you comply with
157
+ the terms of this License in conveying all material for which you do
158
+ not control copyright. Those thus making or running the covered works
159
+ for you must do so exclusively on your behalf, under your direction
160
+ and control, on terms that prohibit them from making any copies of
161
+ your copyrighted material outside their relationship with you.
162
+
163
+ Conveying under any other circumstances is permitted solely under
164
+ the conditions stated below. Sublicensing is not allowed; section 10
165
+ makes it unnecessary.
166
+
167
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168
+
169
+ No covered work shall be deemed part of an effective technological
170
+ measure under any applicable law fulfilling obligations under article
171
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172
+ similar laws prohibiting or restricting circumvention of such
173
+ measures.
174
+
175
+ When you convey a covered work, you waive any legal power to forbid
176
+ circumvention of technological measures to the extent such circumvention
177
+ is effected by exercising rights under this License with respect to
178
+ the covered work, and you disclaim any intention to limit operation or
179
+ modification of the work as a means of enforcing, against the work's
180
+ users, your or third parties' legal rights to forbid circumvention of
181
+ technological measures.
182
+
183
+ 4. Conveying Verbatim Copies.
184
+
185
+ You may convey verbatim copies of the Program's source code as you
186
+ receive it, in any medium, provided that you conspicuously and
187
+ appropriately publish on each copy an appropriate copyright notice;
188
+ keep intact all notices stating that this License and any
189
+ non-permissive terms added in accord with section 7 apply to the code;
190
+ keep intact all notices of the absence of any warranty; and give all
191
+ recipients a copy of this License along with the Program.
192
+
193
+ You may charge any price or no price for each copy that you convey,
194
+ and you may offer support or warranty protection for a fee.
195
+
196
+ 5. Conveying Modified Source Versions.
197
+
198
+ You may convey a work based on the Program, or the modifications to
199
+ produce it from the Program, in the form of source code under the
200
+ terms of section 4, provided that you also meet all of these conditions:
201
+
202
+ a) The work must carry prominent notices stating that you modified
203
+ it, and giving a relevant date.
204
+
205
+ b) The work must carry prominent notices stating that it is
206
+ released under this License and any conditions added under section
207
+ 7. This requirement modifies the requirement in section 4 to
208
+ "keep intact all notices".
209
+
210
+ c) You must license the entire work, as a whole, under this
211
+ License to anyone who comes into possession of a copy. This
212
+ License will therefore apply, along with any applicable section 7
213
+ additional terms, to the whole of the work, and all its parts,
214
+ regardless of how they are packaged. This License gives no
215
+ permission to license the work in any other way, but it does not
216
+ invalidate such permission if you have separately received it.
217
+
218
+ d) If the work has interactive user interfaces, each must display
219
+ Appropriate Legal Notices; however, if the Program has interactive
220
+ interfaces that do not display Appropriate Legal Notices, your
221
+ work need not make them do so.
222
+
223
+ A compilation of a covered work with other separate and independent
224
+ works, which are not by their nature extensions of the covered work,
225
+ and which are not combined with it such as to form a larger program,
226
+ in or on a volume of a storage or distribution medium, is called an
227
+ "aggregate" if the compilation and its resulting copyright are not
228
+ used to limit the access or legal rights of the compilation's users
229
+ beyond what the individual works permit. Inclusion of a covered work
230
+ in an aggregate does not cause this License to apply to the other
231
+ parts of the aggregate.
232
+
233
+ 6. Conveying Non-Source Forms.
234
+
235
+ You may convey a covered work in object code form under the terms
236
+ of sections 4 and 5, provided that you also convey the
237
+ machine-readable Corresponding Source under the terms of this License,
238
+ in one of these ways:
239
+
240
+ a) Convey the object code in, or embodied in, a physical product
241
+ (including a physical distribution medium), accompanied by the
242
+ Corresponding Source fixed on a durable physical medium
243
+ customarily used for software interchange.
244
+
245
+ b) Convey the object code in, or embodied in, a physical product
246
+ (including a physical distribution medium), accompanied by a
247
+ written offer, valid for at least three years and valid for as
248
+ long as you offer spare parts or customer support for that product
249
+ model, to give anyone who possesses the object code either (1) a
250
+ copy of the Corresponding Source for all the software in the
251
+ product that is covered by this License, on a durable physical
252
+ medium customarily used for software interchange, for a price no
253
+ more than your reasonable cost of physically performing this
254
+ conveying of source, or (2) access to copy the
255
+ Corresponding Source from a network server at no charge.
256
+
257
+ c) Convey individual copies of the object code with a copy of the
258
+ written offer to provide the Corresponding Source. This
259
+ alternative is allowed only occasionally and noncommercially, and
260
+ only if you received the object code with such an offer, in accord
261
+ with subsection 6b.
262
+
263
+ d) Convey the object code by offering access from a designated
264
+ place (gratis or for a charge), and offer equivalent access to the
265
+ Corresponding Source in the same way through the same place at no
266
+ further charge. You need not require recipients to copy the
267
+ Corresponding Source along with the object code. If the place to
268
+ copy the object code is a network server, the Corresponding Source
269
+ may be on a different server (operated by you or a third party)
270
+ that supports equivalent copying facilities, provided you maintain
271
+ clear directions next to the object code saying where to find the
272
+ Corresponding Source. Regardless of what server hosts the
273
+ Corresponding Source, you remain obligated to ensure that it is
274
+ available for as long as needed to satisfy these requirements.
275
+
276
+ e) Convey the object code using peer-to-peer transmission, provided
277
+ you inform other peers where the object code and Corresponding
278
+ Source of the work are being offered to the general public at no
279
+ charge under subsection 6d.
280
+
281
+ A separable portion of the object code, whose source code is excluded
282
+ from the Corresponding Source as a System Library, need not be
283
+ included in conveying the object code work.
284
+
285
+ A "User Product" is either (1) a "consumer product", which means any
286
+ tangible personal property which is normally used for personal, family,
287
+ or household purposes, or (2) anything designed or sold for incorporation
288
+ into a dwelling. In determining whether a product is a consumer product,
289
+ doubtful cases shall be resolved in favor of coverage. For a particular
290
+ product received by a particular user, "normally used" refers to a
291
+ typical or common use of that class of product, regardless of the status
292
+ of the particular user or of the way in which the particular user
293
+ actually uses, or expects or is expected to use, the product. A product
294
+ is a consumer product regardless of whether the product has substantial
295
+ commercial, industrial or non-consumer uses, unless such uses represent
296
+ the only significant mode of use of the product.
297
+
298
+ "Installation Information" for a User Product means any methods,
299
+ procedures, authorization keys, or other information required to install
300
+ and execute modified versions of a covered work in that User Product from
301
+ a modified version of its Corresponding Source. The information must
302
+ suffice to ensure that the continued functioning of the modified object
303
+ code is in no case prevented or interfered with solely because
304
+ modification has been made.
305
+
306
+ If you convey an object code work under this section in, or with, or
307
+ specifically for use in, a User Product, and the conveying occurs as
308
+ part of a transaction in which the right of possession and use of the
309
+ User Product is transferred to the recipient in perpetuity or for a
310
+ fixed term (regardless of how the transaction is characterized), the
311
+ Corresponding Source conveyed under this section must be accompanied
312
+ by the Installation Information. But this requirement does not apply
313
+ if neither you nor any third party retains the ability to install
314
+ modified object code on the User Product (for example, the work has
315
+ been installed in ROM).
316
+
317
+ The requirement to provide Installation Information does not include a
318
+ requirement to continue to provide support service, warranty, or updates
319
+ for a work that has been modified or installed by the recipient, or for
320
+ the User Product in which it has been modified or installed. Access to a
321
+ network may be denied when the modification itself materially and
322
+ adversely affects the operation of the network or violates the rules and
323
+ protocols for communication across the network.
324
+
325
+ Corresponding Source conveyed, and Installation Information provided,
326
+ in accord with this section must be in a format that is publicly
327
+ documented (and with an implementation available to the public in
328
+ source code form), and must require no special password or key for
329
+ unpacking, reading or copying.
330
+
331
+ 7. Additional Terms.
332
+
333
+ "Additional permissions" are terms that supplement the terms of this
334
+ License by making exceptions from one or more of its conditions.
335
+ Additional permissions that are applicable to the entire Program shall
336
+ be treated as though they were included in this License, to the extent
337
+ that they are valid under applicable law. If additional permissions
338
+ apply only to part of the Program, that part may be used separately
339
+ under those permissions, but the entire Program remains governed by
340
+ this License without regard to the additional permissions.
341
+
342
+ When you convey a copy of a covered work, you may at your option
343
+ remove any additional permissions from that copy, or from any part of
344
+ it. (Additional permissions may be written to require their own
345
+ removal in certain cases when you modify the work.) You may place
346
+ additional permissions on material, added by you to a covered work,
347
+ for which you have or can give appropriate copyright permission.
348
+
349
+ Notwithstanding any other provision of this License, for material you
350
+ add to a covered work, you may (if authorized by the copyright holders of
351
+ that material) supplement the terms of this License with terms:
352
+
353
+ a) Disclaiming warranty or limiting liability differently from the
354
+ terms of sections 15 and 16 of this License; or
355
+
356
+ b) Requiring preservation of specified reasonable legal notices or
357
+ author attributions in that material or in the Appropriate Legal
358
+ Notices displayed by works containing it; or
359
+
360
+ c) Prohibiting misrepresentation of the origin of that material, or
361
+ requiring that modified versions of such material be marked in
362
+ reasonable ways as different from the original version; or
363
+
364
+ d) Limiting the use for publicity purposes of names of licensors or
365
+ authors of the material; or
366
+
367
+ e) Declining to grant rights under trademark law for use of some
368
+ trade names, trademarks, or service marks; or
369
+
370
+ f) Requiring indemnification of licensors and authors of that
371
+ material by anyone who conveys the material (or modified versions of
372
+ it) with contractual assumptions of liability to the recipient, for
373
+ any liability that these contractual assumptions directly impose on
374
+ those licensors and authors.
375
+
376
+ All other non-permissive additional terms are considered "further
377
+ restrictions" within the meaning of section 10. If the Program as you
378
+ received it, or any part of it, contains a notice stating that it is
379
+ governed by this License along with a term that is a further
380
+ restriction, you may remove that term. If a license document contains
381
+ a further restriction but permits relicensing or conveying under this
382
+ License, you may add to a covered work material governed by the terms
383
+ of that license document, provided that the further restriction does
384
+ not survive such relicensing or conveying.
385
+
386
+ If you add terms to a covered work in accord with this section, you
387
+ must place, in the relevant source files, a statement of the
388
+ additional terms that apply to those files, or a notice indicating
389
+ where to find the applicable terms.
390
+
391
+ Additional terms, permissive or non-permissive, may be stated in the
392
+ form of a separately written license, or stated as exceptions;
393
+ the above requirements apply either way.
394
+
395
+ 8. Termination.
396
+
397
+ You may not propagate or modify a covered work except as expressly
398
+ provided under this License. Any attempt otherwise to propagate or
399
+ modify it is void, and will automatically terminate your rights under
400
+ this License (including any patent licenses granted under the third
401
+ paragraph of section 11).
402
+
403
+ However, if you cease all violation of this License, then your
404
+ license from a particular copyright holder is reinstated (a)
405
+ provisionally, unless and until the copyright holder explicitly and
406
+ finally terminates your license, and (b) permanently, if the copyright
407
+ holder fails to notify you of the violation by some reasonable means
408
+ prior to 60 days after the cessation.
409
+
410
+ Moreover, your license from a particular copyright holder is
411
+ reinstated permanently if the copyright holder notifies you of the
412
+ violation by some reasonable means, this is the first time you have
413
+ received notice of violation of this License (for any work) from that
414
+ copyright holder, and you cure the violation prior to 30 days after
415
+ your receipt of the notice.
416
+
417
+ Termination of your rights under this section does not terminate the
418
+ licenses of parties who have received copies or rights from you under
419
+ this License. If your rights have been terminated and not permanently
420
+ reinstated, you do not qualify to receive new licenses for the same
421
+ material under section 10.
422
+
423
+ 9. Acceptance Not Required for Having Copies.
424
+
425
+ You are not required to accept this License in order to receive or
426
+ run a copy of the Program. Ancillary propagation of a covered work
427
+ occurring solely as a consequence of using peer-to-peer transmission
428
+ to receive a copy likewise does not require acceptance. However,
429
+ nothing other than this License grants you permission to propagate or
430
+ modify any covered work. These actions infringe copyright if you do
431
+ not accept this License. Therefore, by modifying or propagating a
432
+ covered work, you indicate your acceptance of this License to do so.
433
+
434
+ 10. Automatic Licensing of Downstream Recipients.
435
+
436
+ Each time you convey a covered work, the recipient automatically
437
+ receives a license from the original licensors, to run, modify and
438
+ propagate that work, subject to this License. You are not responsible
439
+ for enforcing compliance by third parties with this License.
440
+
441
+ An "entity transaction" is a transaction transferring control of an
442
+ organization, or substantially all assets of one, or subdividing an
443
+ organization, or merging organizations. If propagation of a covered
444
+ work results from an entity transaction, each party to that
445
+ transaction who receives a copy of the work also receives whatever
446
+ licenses to the work the party's predecessor in interest had or could
447
+ give under the previous paragraph, plus a right to possession of the
448
+ Corresponding Source of the work from the predecessor in interest, if
449
+ the predecessor has it or can get it with reasonable efforts.
450
+
451
+ You may not impose any further restrictions on the exercise of the
452
+ rights granted or affirmed under this License. For example, you may
453
+ not impose a license fee, royalty, or other charge for exercise of
454
+ rights granted under this License, and you may not initiate litigation
455
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
456
+ any patent claim is infringed by making, using, selling, offering for
457
+ sale, or importing the Program or any portion of it.
458
+
459
+ 11. Patents.
460
+
461
+ A "contributor" is a copyright holder who authorizes use under this
462
+ License of the Program or a work on which the Program is based. The
463
+ work thus licensed is called the contributor's "contributor version".
464
+
465
+ A contributor's "essential patent claims" are all patent claims
466
+ owned or controlled by the contributor, whether already acquired or
467
+ hereafter acquired, that would be infringed by some manner, permitted
468
+ by this License, of making, using, or selling its contributor version,
469
+ but do not include claims that would be infringed only as a
470
+ consequence of further modification of the contributor version. For
471
+ purposes of this definition, "control" includes the right to grant
472
+ patent sublicenses in a manner consistent with the requirements of
473
+ this License.
474
+
475
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
476
+ patent license under the contributor's essential patent claims, to
477
+ make, use, sell, offer for sale, import and otherwise run, modify and
478
+ propagate the contents of its contributor version.
479
+
480
+ In the following three paragraphs, a "patent license" is any express
481
+ agreement or commitment, however denominated, not to enforce a patent
482
+ (such as an express permission to practice a patent or covenant not to
483
+ sue for patent infringement). To "grant" such a patent license to a
484
+ party means to make such an agreement or commitment not to enforce a
485
+ patent against the party.
486
+
487
+ If you convey a covered work, knowingly relying on a patent license,
488
+ and the Corresponding Source of the work is not available for anyone
489
+ to copy, free of charge and under the terms of this License, through a
490
+ publicly available network server or other readily accessible means,
491
+ then you must either (1) cause the Corresponding Source to be so
492
+ available, or (2) arrange to deprive yourself of the benefit of the
493
+ patent license for this particular work, or (3) arrange, in a manner
494
+ consistent with the requirements of this License, to extend the patent
495
+ license to downstream recipients. "Knowingly relying" means you have
496
+ actual knowledge that, but for the patent license, your conveying the
497
+ covered work in a country, or your recipient's use of the covered work
498
+ in a country, would infringe one or more identifiable patents in that
499
+ country that you have reason to believe are valid.
500
+
501
+ If, pursuant to or in connection with a single transaction or
502
+ arrangement, you convey, or propagate by procuring conveyance of, a
503
+ covered work, and grant a patent license to some of the parties
504
+ receiving the covered work authorizing them to use, propagate, modify
505
+ or convey a specific copy of the covered work, then the patent license
506
+ you grant is automatically extended to all recipients of the covered
507
+ work and works based on it.
508
+
509
+ A patent license is "discriminatory" if it does not include within
510
+ the scope of its coverage, prohibits the exercise of, or is
511
+ conditioned on the non-exercise of one or more of the rights that are
512
+ specifically granted under this License. You may not convey a covered
513
+ work if you are a party to an arrangement with a third party that is
514
+ in the business of distributing software, under which you make payment
515
+ to the third party based on the extent of your activity of conveying
516
+ the work, and under which the third party grants, to any of the
517
+ parties who would receive the covered work from you, a discriminatory
518
+ patent license (a) in connection with copies of the covered work
519
+ conveyed by you (or copies made from those copies), or (b) primarily
520
+ for and in connection with specific products or compilations that
521
+ contain the covered work, unless you entered into that arrangement,
522
+ or that patent license was granted, prior to 28 March 2007.
523
+
524
+ Nothing in this License shall be construed as excluding or limiting
525
+ any implied license or other defenses to infringement that may
526
+ otherwise be available to you under applicable patent law.
527
+
528
+ 12. No Surrender of Others' Freedom.
529
+
530
+ If conditions are imposed on you (whether by court order, agreement or
531
+ otherwise) that contradict the conditions of this License, they do not
532
+ excuse you from the conditions of this License. If you cannot convey a
533
+ covered work so as to satisfy simultaneously your obligations under this
534
+ License and any other pertinent obligations, then as a consequence you may
535
+ not convey it at all. For example, if you agree to terms that obligate you
536
+ to collect a royalty for further conveying from those to whom you convey
537
+ the Program, the only way you could satisfy both those terms and this
538
+ License would be to refrain entirely from conveying the Program.
539
+
540
+ 13. Remote Network Interaction; Use with the GNU General Public License.
541
+
542
+ Notwithstanding any other provision of this License, if you modify the
543
+ Program, your modified version must prominently offer all users
544
+ interacting with it remotely through a computer network (if your version
545
+ supports such interaction) an opportunity to receive the Corresponding
546
+ Source of your version by providing access to the Corresponding Source
547
+ from a network server at no charge, through some standard or customary
548
+ means of facilitating copying of software. This Corresponding Source
549
+ shall include the Corresponding Source for any work covered by version 3
550
+ of the GNU General Public License that is incorporated pursuant to the
551
+ following paragraph.
552
+
553
+ Notwithstanding any other provision of this License, you have
554
+ permission to link or combine any covered work with a work licensed
555
+ under version 3 of the GNU General Public License into a single
556
+ combined work, and to convey the resulting work. The terms of this
557
+ License will continue to apply to the part which is the covered work,
558
+ but the work with which it is combined will remain governed by version
559
+ 3 of the GNU General Public License.
560
+
561
+ 14. Revised Versions of this License.
562
+
563
+ The Free Software Foundation may publish revised and/or new versions of
564
+ the GNU Affero General Public License from time to time. Such new versions
565
+ will be similar in spirit to the present version, but may differ in detail to
566
+ address new problems or concerns.
567
+
568
+ Each version is given a distinguishing version number. If the
569
+ Program specifies that a certain numbered version of the GNU Affero General
570
+ Public License "or any later version" applies to it, you have the
571
+ option of following the terms and conditions either of that numbered
572
+ version or of any later version published by the Free Software
573
+ Foundation. If the Program does not specify a version number of the
574
+ GNU Affero General Public License, you may choose any version ever published
575
+ by the Free Software Foundation.
576
+
577
+ If the Program specifies that a proxy can decide which future
578
+ versions of the GNU Affero General Public License can be used, that proxy's
579
+ public statement of acceptance of a version permanently authorizes you
580
+ to choose that version for the Program.
581
+
582
+ Later license versions may give you additional or different
583
+ permissions. However, no additional obligations are imposed on any
584
+ author or copyright holder as a result of your choosing to follow a
585
+ later version.
586
+
587
+ 15. Disclaimer of Warranty.
588
+
589
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597
+
598
+ 16. Limitation of Liability.
599
+
600
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608
+ SUCH DAMAGES.
609
+
610
+ 17. Interpretation of Sections 15 and 16.
611
+
612
+ If the disclaimer of warranty and limitation of liability provided
613
+ above cannot be given local legal effect according to their terms,
614
+ reviewing courts shall apply local law that most closely approximates
615
+ an absolute waiver of all civil liability in connection with the
616
+ Program, unless a warranty or assumption of liability accompanies a
617
+ copy of the Program in return for a fee.
618
+
619
+ END OF TERMS AND CONDITIONS
620
+
621
+ How to Apply These Terms to Your New Programs
622
+
623
+ If you develop a new program, and you want it to be of the greatest
624
+ possible use to the public, the best way to achieve this is to make it
625
+ free software which everyone can redistribute and change under these terms.
626
+
627
+ To do so, attach the following notices to the program. It is safest
628
+ to attach them to the start of each source file to most effectively
629
+ state the exclusion of warranty; and each file should have at least
630
+ the "copyright" line and a pointer to where the full notice is found.
631
+
632
+ <one line to give the program's name and a brief idea of what it does.>
633
+ Copyright (C) <year> <name of author>
634
+
635
+ This program is free software: you can redistribute it and/or modify
636
+ it under the terms of the GNU Affero General Public License as published
637
+ by the Free Software Foundation, either version 3 of the License, or
638
+ (at your option) any later version.
639
+
640
+ This program is distributed in the hope that it will be useful,
641
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
642
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643
+ GNU Affero General Public License for more details.
644
+
645
+ You should have received a copy of the GNU Affero General Public License
646
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
647
+
648
+ Also add information on how to contact you by electronic and paper mail.
649
+
650
+ If your software can interact with users remotely through a computer
651
+ network, you should also make sure that it provides a way for users to
652
+ get its source. For example, if your program is a web application, its
653
+ interface could display a "Source" link that leads users to an archive
654
+ of the code. There are many ways you could offer source, and different
655
+ solutions will be better for different programs; see section 13 for the
656
+ specific requirements.
657
+
658
+ You should also get your employer (if you work as a programmer) or school,
659
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
660
+ For more information on this, and how to apply and follow the GNU AGPL, see
661
+ <https://www.gnu.org/licenses/>.
text-generation-webui/README.md ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Text generation web UI
2
+
3
+ A gradio web UI for running Large Language Models like GPT-J 6B, OPT, GALACTICA, LLaMA, and Pygmalion.
4
+
5
+ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
6
+
7
+ [[Try it on Google Colab]](https://colab.research.google.com/github/oobabooga/AI-Notebooks/blob/main/Colab-TextGen-GPU.ipynb)
8
+
9
+ |![Image1](https://github.com/oobabooga/screenshots/raw/main/qa.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/cai3.png) |
10
+ |:---:|:---:|
11
+ |![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png) | ![Image4](https://github.com/oobabooga/screenshots/raw/main/galactica.png) |
12
+
13
+ ## Features
14
+
15
+ * Switch between different models using a dropdown menu.
16
+ * Notebook mode that resembles OpenAI's playground.
17
+ * Chat mode for conversation and role playing.
18
+ * Generate nice HTML output for GPT-4chan.
19
+ * Generate Markdown output for [GALACTICA](https://github.com/paperswithcode/galai), including LaTeX support.
20
+ * Support for [Pygmalion](https://huggingface.co/models?search=pygmalionai/pygmalion) and custom characters in JSON or TavernAI Character Card formats ([FAQ](https://github.com/oobabooga/text-generation-webui/wiki/Pygmalion-chat-model-FAQ)).
21
+ * Advanced chat features (send images, get audio responses with TTS).
22
+ * Stream the text output in real time.
23
+ * Load parameter presets from text files.
24
+ * Load large models in 8-bit mode (see [here](https://github.com/oobabooga/text-generation-webui/issues/147#issuecomment-1456040134), [here](https://github.com/oobabooga/text-generation-webui/issues/20#issuecomment-1411650652) and [here](https://www.reddit.com/r/PygmalionAI/comments/1115gom/running_pygmalion_6b_with_8gb_of_vram/) if you are on Windows).
25
+ * Split large models across your GPU(s), CPU, and disk.
26
+ * CPU mode.
27
+ * [FlexGen offload](https://github.com/oobabooga/text-generation-webui/wiki/FlexGen).
28
+ * [DeepSpeed ZeRO-3 offload](https://github.com/oobabooga/text-generation-webui/wiki/DeepSpeed).
29
+ * Get responses via API, [with](https://github.com/oobabooga/text-generation-webui/blob/main/api-example-streaming.py) or [without](https://github.com/oobabooga/text-generation-webui/blob/main/api-example.py) streaming.
30
+ * [Supports the LLaMA model, including 4-bit mode](https://github.com/oobabooga/text-generation-webui/wiki/LLaMA-model).
31
+ * [Supports the RWKV model](https://github.com/oobabooga/text-generation-webui/wiki/RWKV-model).
32
+ * Supports softprompts.
33
+ * [Supports extensions](https://github.com/oobabooga/text-generation-webui/wiki/Extensions).
34
+ * [Works on Google Colab](https://github.com/oobabooga/text-generation-webui/wiki/Running-on-Colab).
35
+
36
+ ## Installation option 1: conda
37
+
38
+ Open a terminal and copy and paste these commands one at a time ([install conda](https://docs.conda.io/en/latest/miniconda.html) first if you don't have it already):
39
+
40
+ ```
41
+ conda create -n textgen
42
+ conda activate textgen
43
+ conda install torchvision torchaudio pytorch-cuda=11.7 git -c pytorch -c nvidia
44
+ git clone https://github.com/oobabooga/text-generation-webui
45
+ cd text-generation-webui
46
+ pip install -r requirements.txt
47
+ ```
48
+
49
+ The third line assumes that you have an NVIDIA GPU.
50
+
51
+ * If you have an AMD GPU, replace the third command with this one:
52
+
53
+ ```
54
+ pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2
55
+ ```
56
+
57
+ * If you are running it in CPU mode, replace the third command with this one:
58
+
59
+ ```
60
+ conda install pytorch torchvision torchaudio git -c pytorch
61
+ ```
62
+
63
+ See also: [Installation instructions for human beings](https://github.com/oobabooga/text-generation-webui/wiki/Installation-instructions-for-human-beings).
64
+
65
+ ## Installation option 2: one-click installers
66
+
67
+ [oobabooga-windows.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-windows.zip)
68
+
69
+ [oobabooga-linux.zip](https://github.com/oobabooga/one-click-installers/archive/refs/heads/oobabooga-linux.zip)
70
+
71
+ Just download the zip above, extract it, and double click on "install". The web UI and all its dependencies will be installed in the same folder.
72
+
73
+ * To download a model, double click on "download-model"
74
+ * To start the web UI, double click on "start-webui"
75
+
76
+ ## Downloading models
77
+
78
+ Models should be placed under `models/model-name`. For instance, `models/gpt-j-6B` for [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main).
79
+
80
+ #### Hugging Face
81
+
82
+ [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) is the main place to download models. These are some noteworthy examples:
83
+
84
+ * [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main)
85
+ * [GPT-Neo](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=eleutherai+%2F+gpt-neo)
86
+ * [Pythia](https://huggingface.co/models?search=eleutherai/pythia)
87
+ * [OPT](https://huggingface.co/models?search=facebook/opt)
88
+ * [GALACTICA](https://huggingface.co/models?search=facebook/galactica)
89
+ * [\*-Erebus](https://huggingface.co/models?search=erebus) (NSFW)
90
+ * [Pygmalion](https://huggingface.co/models?search=pygmalion) (NSFW)
91
+
92
+ You can automatically download a model from HF using the script `download-model.py`:
93
+
94
+ python download-model.py organization/model
95
+
96
+ For instance:
97
+
98
+ python download-model.py facebook/opt-1.3b
99
+
100
+ If you want to download a model manually, note that all you need are the json, txt, and pytorch\*.bin (or model*.safetensors) files. The remaining files are not necessary.
101
+
102
+ #### GPT-4chan
103
+
104
+ [GPT-4chan](https://huggingface.co/ykilcher/gpt-4chan) has been shut down from Hugging Face, so you need to download it elsewhere. You have two options:
105
+
106
+ * Torrent: [16-bit](https://archive.org/details/gpt4chan_model_float16) / [32-bit](https://archive.org/details/gpt4chan_model)
107
+ * Direct download: [16-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model_float16/) / [32-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model/)
108
+
109
+ The 32-bit version is only relevant if you intend to run the model in CPU mode. Otherwise, you should use the 16-bit version.
110
+
111
+ After downloading the model, follow these steps:
112
+
113
+ 1. Place the files under `models/gpt4chan_model_float16` or `models/gpt4chan_model`.
114
+ 2. Place GPT-J 6B's config.json file in that same folder: [config.json](https://huggingface.co/EleutherAI/gpt-j-6B/raw/main/config.json).
115
+ 3. Download GPT-J 6B's tokenizer files (they will be automatically detected when you attempt to load GPT-4chan):
116
+
117
+ ```
118
+ python download-model.py EleutherAI/gpt-j-6B --text-only
119
+ ```
120
+
121
+ ## Starting the web UI
122
+
123
+ conda activate textgen
124
+ python server.py
125
+
126
+ Then browse to
127
+
128
+ `http://localhost:7860/?__theme=dark`
129
+
130
+
131
+
132
+ Optionally, you can use the following command-line flags:
133
+
134
+ | Flag | Description |
135
+ |-------------|-------------|
136
+ | `-h`, `--help` | show this help message and exit |
137
+ | `--model MODEL` | Name of the model to load by default. |
138
+ | `--notebook` | Launch the web UI in notebook mode, where the output is written to the same text box as the input. |
139
+ | `--chat` | Launch the web UI in chat mode.|
140
+ | `--cai-chat` | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. |
141
+ | `--cpu` | Use the CPU to generate text.|
142
+ | `--load-in-8bit` | Load the model with 8-bit precision.|
143
+ | `--load-in-4bit` | DEPRECATED: use `--gptq-bits 4` instead. |
144
+ | `--gptq-bits GPTQ_BITS` | Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA and OPT. |
145
+ | `--gptq-model-type MODEL_TYPE` | Model type of pre-quantized model. Currently only LLaMa and OPT are supported. |
146
+ | `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
147
+ | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.|
148
+ | `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
149
+ | `--disk-cache-dir DISK_CACHE_DIR` | Directory to save the disk cache to. Defaults to `cache/`. |
150
+ | `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` | Maxmimum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. |
151
+ | `--cpu-memory CPU_MEMORY` | Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99.|
152
+ | `--flexgen` | Enable the use of FlexGen offloading. |
153
+ | `--percent PERCENT [PERCENT ...]` | FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0). |
154
+ | `--compress-weight` | FlexGen: Whether to compress weight (default: False).|
155
+ | `--pin-weight [PIN_WEIGHT]` | FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%). |
156
+ | `--deepspeed` | Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. |
157
+ | `--nvme-offload-dir NVME_OFFLOAD_DIR` | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. |
158
+ | `--local_rank LOCAL_RANK` | DeepSpeed: Optional argument for distributed setups. |
159
+ | `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
160
+ | `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
161
+ | `--no-stream` | Don't stream the text output in real time. |
162
+ | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag.|
163
+ | `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
164
+ | `--listen` | Make the web UI reachable from your local network.|
165
+ | `--listen-port LISTEN_PORT` | The listening port that the server will use. |
166
+ | `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
167
+ | `--auto-launch` | Open the web UI in the default browser upon launch. |
168
+ | `--verbose` | Print the prompts to the terminal. |
169
+
170
+ Out of memory errors? [Check this guide](https://github.com/oobabooga/text-generation-webui/wiki/Low-VRAM-guide).
171
+
172
+ ## Presets
173
+
174
+ Inference settings presets can be created under `presets/` as text files. These files are detected automatically at startup.
175
+
176
+ By default, 10 presets by NovelAI and KoboldAI are included. These were selected out of a sample of 43 presets after applying a K-Means clustering algorithm and selecting the elements closest to the average of each cluster.
177
+
178
+ ## System requirements
179
+
180
+ Check the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/System-requirements) for some examples of VRAM and RAM usage in both GPU and CPU mode.
181
+
182
+ ## Contributing
183
+
184
+ Pull requests, suggestions, and issue reports are welcome.
185
+
186
+ Before reporting a bug, make sure that you have:
187
+
188
+ 1. Created a conda environment and installed the dependencies exactly as in the *Installation* section above.
189
+ 2. [Searched](https://github.com/oobabooga/text-generation-webui/issues) to see if an issue already exists for the issue you encountered.
190
+
191
+ ## Credits
192
+
193
+ - Gradio dropdown menu refresh button: https://github.com/AUTOMATIC1111/stable-diffusion-webui
194
+ - Verbose preset: Anonymous 4chan user.
195
+ - NovelAI and KoboldAI presets: https://github.com/KoboldAI/KoboldAI-Client/wiki/Settings-Presets
196
+ - Pygmalion preset, code for early stopping in chat mode, code for some of the sliders, --chat mode colors: https://github.com/PygmalionAI/gradio-ui/
text-generation-webui/api-example-stream.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ Contributed by SagsMug. Thank you SagsMug.
4
+ https://github.com/oobabooga/text-generation-webui/pull/175
5
+
6
+ '''
7
+
8
+ import asyncio
9
+ import json
10
+ import random
11
+ import string
12
+
13
+ import websockets
14
+
15
+
16
+ def random_hash():
17
+ letters = string.ascii_lowercase + string.digits
18
+ return ''.join(random.choice(letters) for i in range(9))
19
+
20
+ async def run(context):
21
+ server = "127.0.0.1"
22
+ params = {
23
+ 'max_new_tokens': 200,
24
+ 'do_sample': True,
25
+ 'temperature': 0.5,
26
+ 'top_p': 0.9,
27
+ 'typical_p': 1,
28
+ 'repetition_penalty': 1.05,
29
+ 'top_k': 0,
30
+ 'min_length': 0,
31
+ 'no_repeat_ngram_size': 0,
32
+ 'num_beams': 1,
33
+ 'penalty_alpha': 0,
34
+ 'length_penalty': 1,
35
+ 'early_stopping': False,
36
+ }
37
+ session = random_hash()
38
+
39
+ async with websockets.connect(f"ws://{server}:7860/queue/join") as websocket:
40
+ while content := json.loads(await websocket.recv()):
41
+ #Python3.10 syntax, replace with if elif on older
42
+ match content["msg"]:
43
+ case "send_hash":
44
+ await websocket.send(json.dumps({
45
+ "session_hash": session,
46
+ "fn_index": 7
47
+ }))
48
+ case "estimation":
49
+ pass
50
+ case "send_data":
51
+ await websocket.send(json.dumps({
52
+ "session_hash": session,
53
+ "fn_index": 7,
54
+ "data": [
55
+ context,
56
+ params['max_new_tokens'],
57
+ params['do_sample'],
58
+ params['temperature'],
59
+ params['top_p'],
60
+ params['typical_p'],
61
+ params['repetition_penalty'],
62
+ params['top_k'],
63
+ params['min_length'],
64
+ params['no_repeat_ngram_size'],
65
+ params['num_beams'],
66
+ params['penalty_alpha'],
67
+ params['length_penalty'],
68
+ params['early_stopping'],
69
+ ]
70
+ }))
71
+ case "process_starts":
72
+ pass
73
+ case "process_generating" | "process_completed":
74
+ yield content["output"]["data"][0]
75
+ # You can search for your desired end indicator and
76
+ # stop generation by closing the websocket here
77
+ if (content["msg"] == "process_completed"):
78
+ break
79
+
80
+ prompt = "What I would like to say is the following: "
81
+
82
+ async def get_result():
83
+ async for response in run(prompt):
84
+ # Print intermediate steps
85
+ print(response)
86
+
87
+ # Print final result
88
+ print(response)
89
+
90
+ asyncio.run(get_result())
text-generation-webui/api-example.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ This is an example on how to use the API for oobabooga/text-generation-webui.
4
+
5
+ Make sure to start the web UI with the following flags:
6
+
7
+ python server.py --model MODEL --listen --no-stream
8
+
9
+ Optionally, you can also add the --share flag to generate a public gradio URL,
10
+ allowing you to use the API remotely.
11
+
12
+ '''
13
+ import requests
14
+
15
+ # Server address
16
+ server = "127.0.0.1"
17
+
18
+ # Generation parameters
19
+ # Reference: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
20
+ params = {
21
+ 'max_new_tokens': 200,
22
+ 'do_sample': True,
23
+ 'temperature': 0.5,
24
+ 'top_p': 0.9,
25
+ 'typical_p': 1,
26
+ 'repetition_penalty': 1.05,
27
+ 'top_k': 0,
28
+ 'min_length': 0,
29
+ 'no_repeat_ngram_size': 0,
30
+ 'num_beams': 1,
31
+ 'penalty_alpha': 0,
32
+ 'length_penalty': 1,
33
+ 'early_stopping': False,
34
+ }
35
+
36
+ # Input prompt
37
+ prompt = "What I would like to say is the following: "
38
+
39
+ response = requests.post(f"http://{server}:7860/run/textgen", json={
40
+ "data": [
41
+ prompt,
42
+ params['max_new_tokens'],
43
+ params['do_sample'],
44
+ params['temperature'],
45
+ params['top_p'],
46
+ params['typical_p'],
47
+ params['repetition_penalty'],
48
+ params['top_k'],
49
+ params['min_length'],
50
+ params['no_repeat_ngram_size'],
51
+ params['num_beams'],
52
+ params['penalty_alpha'],
53
+ params['length_penalty'],
54
+ params['early_stopping'],
55
+ ]
56
+ }).json()
57
+
58
+ reply = response["data"][0]
59
+ print(reply)
text-generation-webui/characters/Example.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "char_name": "Chiharu Yamada",
3
+ "char_persona": "Chiharu Yamada is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.",
4
+ "char_greeting": "*Chiharu strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air*\nHey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!",
5
+ "world_scenario": "",
6
+ "example_dialogue": "{{user}}: So how did you get into computer engineering?\n{{char}}: I've always loved tinkering with technology since I was a kid.\n{{user}}: That's really impressive!\n{{char}}: *She chuckles bashfully* Thanks!\n{{user}}: So what do you do when you're not working on computers?\n{{char}}: I love exploring, going out with friends, watching movies, and playing video games.\n{{user}}: What's your favorite type of computer hardware to work with?\n{{char}}: Motherboards, they're like puzzles and the backbone of any system.\n{{user}}: That sounds great!\n{{char}}: Yeah, it's really fun. I'm lucky to be able to do this as a job."
7
+ }
text-generation-webui/characters/Example.png ADDED
text-generation-webui/convert-to-flexgen.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ Converts a transformers model to a format compatible with flexgen.
4
+
5
+ '''
6
+
7
+ import argparse
8
+ import os
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+ import torch
13
+ from tqdm import tqdm
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer
15
+
16
+ parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
17
+ parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
18
+ args = parser.parse_args()
19
+
20
+ def disable_torch_init():
21
+ """
22
+ Disable the redundant torch default initialization to accelerate model creation.
23
+ """
24
+ import torch
25
+ global torch_linear_init_backup
26
+ global torch_layer_norm_init_backup
27
+
28
+ torch_linear_init_backup = torch.nn.Linear.reset_parameters
29
+ setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
30
+
31
+ torch_layer_norm_init_backup = torch.nn.LayerNorm.reset_parameters
32
+ setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
33
+
34
+ def restore_torch_init():
35
+ """Rollback the change made by disable_torch_init."""
36
+ import torch
37
+ setattr(torch.nn.Linear, "reset_parameters", torch_linear_init_backup)
38
+ setattr(torch.nn.LayerNorm, "reset_parameters", torch_layer_norm_init_backup)
39
+
40
+ if __name__ == '__main__':
41
+ path = Path(args.MODEL)
42
+ model_name = path.name
43
+
44
+ print(f"Loading {model_name}...")
45
+ #disable_torch_init()
46
+ model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
47
+ #restore_torch_init()
48
+
49
+ tokenizer = AutoTokenizer.from_pretrained(path)
50
+
51
+ out_folder = Path(f"models/{model_name}-np")
52
+ if not Path(out_folder).exists():
53
+ os.mkdir(out_folder)
54
+
55
+ print(f"Saving the converted model to {out_folder}...")
56
+ for name, param in tqdm(list(model.model.named_parameters())):
57
+ name = name.replace("decoder.final_layer_norm", "decoder.layer_norm")
58
+ param_path = os.path.join(out_folder, name)
59
+ with open(param_path, "wb") as f:
60
+ np.save(f, param.cpu().detach().numpy())
text-generation-webui/convert-to-safetensors.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ Converts a transformers model to safetensors format and shards it.
4
+
5
+ This makes it faster to load (because of safetensors) and lowers its RAM usage
6
+ while loading (because of sharding).
7
+
8
+ Based on the original script by 81300:
9
+
10
+ https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303
11
+
12
+ '''
13
+
14
+ import argparse
15
+ from pathlib import Path
16
+
17
+ import torch
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer
19
+
20
+ parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
21
+ parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
22
+ parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).')
23
+ parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).")
24
+ parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
25
+ args = parser.parse_args()
26
+
27
+ if __name__ == '__main__':
28
+ path = Path(args.MODEL)
29
+ model_name = path.name
30
+
31
+ print(f"Loading {model_name}...")
32
+ model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16)
33
+ tokenizer = AutoTokenizer.from_pretrained(path)
34
+
35
+ out_folder = args.output or Path(f"models/{model_name}_safetensors")
36
+ print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...")
37
+ model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True)
38
+ tokenizer.save_pretrained(out_folder)
text-generation-webui/download-model.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Downloads models from Hugging Face to models/model-name.
3
+
4
+ Example:
5
+ python download-model.py facebook/opt-1.3b
6
+
7
+ '''
8
+
9
+ import argparse
10
+ import base64
11
+ import json
12
+ import multiprocessing
13
+ import re
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ import requests
18
+ import tqdm
19
+
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument('MODEL', type=str, default=None, nargs='?')
22
+ parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
23
+ parser.add_argument('--threads', type=int, default=1, help='Number of files to download simultaneously.')
24
+ parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
25
+ args = parser.parse_args()
26
+
27
+ def get_file(args):
28
+ url = args[0]
29
+ output_folder = args[1]
30
+ idx = args[2]
31
+ tot = args[3]
32
+
33
+ print(f"Downloading file {idx} of {tot}...")
34
+ r = requests.get(url, stream=True)
35
+ with open(output_folder / Path(url.split('/')[-1]), 'wb') as f:
36
+ total_size = int(r.headers.get('content-length', 0))
37
+ block_size = 1024
38
+ t = tqdm.tqdm(total=total_size, unit='iB', unit_scale=True)
39
+ for data in r.iter_content(block_size):
40
+ t.update(len(data))
41
+ f.write(data)
42
+ t.close()
43
+
44
+ def sanitize_branch_name(branch_name):
45
+ pattern = re.compile(r"^[a-zA-Z0-9._-]+$")
46
+ if pattern.match(branch_name):
47
+ return branch_name
48
+ else:
49
+ raise ValueError("Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
50
+
51
+ def select_model_from_default_options():
52
+ models = {
53
+ "Pygmalion 6B original": ("PygmalionAI", "pygmalion-6b", "b8344bb4eb76a437797ad3b19420a13922aaabe1"),
54
+ "Pygmalion 6B main": ("PygmalionAI", "pygmalion-6b", "main"),
55
+ "Pygmalion 6B dev": ("PygmalionAI", "pygmalion-6b", "dev"),
56
+ "Pygmalion 2.7B": ("PygmalionAI", "pygmalion-2.7b", "main"),
57
+ "Pygmalion 1.3B": ("PygmalionAI", "pygmalion-1.3b", "main"),
58
+ "Pygmalion 350m": ("PygmalionAI", "pygmalion-350m", "main"),
59
+ "OPT 6.7b": ("facebook", "opt-6.7b", "main"),
60
+ "OPT 2.7b": ("facebook", "opt-2.7b", "main"),
61
+ "OPT 1.3b": ("facebook", "opt-1.3b", "main"),
62
+ "OPT 350m": ("facebook", "opt-350m", "main"),
63
+ }
64
+ choices = {}
65
+
66
+ print("Select the model that you want to download:\n")
67
+ for i,name in enumerate(models):
68
+ char = chr(ord('A')+i)
69
+ choices[char] = name
70
+ print(f"{char}) {name}")
71
+ char = chr(ord('A')+len(models))
72
+ print(f"{char}) None of the above")
73
+
74
+ print()
75
+ print("Input> ", end='')
76
+ choice = input()[0].strip().upper()
77
+ if choice == char:
78
+ print("""\nThen type the name of your desired Hugging Face model in the format organization/name.
79
+
80
+ Examples:
81
+ PygmalionAI/pygmalion-6b
82
+ facebook/opt-1.3b
83
+ """)
84
+
85
+ print("Input> ", end='')
86
+ model = input()
87
+ branch = "main"
88
+ else:
89
+ arr = models[choices[choice]]
90
+ model = f"{arr[0]}/{arr[1]}"
91
+ branch = arr[2]
92
+
93
+ return model, branch
94
+
95
+ def get_download_links_from_huggingface(model, branch):
96
+ base = "https://huggingface.co"
97
+ page = f"/api/models/{model}/tree/{branch}?cursor="
98
+ cursor = b""
99
+
100
+ links = []
101
+ classifications = []
102
+ has_pytorch = False
103
+ has_safetensors = False
104
+ while True:
105
+ content = requests.get(f"{base}{page}{cursor.decode()}").content
106
+
107
+ dict = json.loads(content)
108
+ if len(dict) == 0:
109
+ break
110
+
111
+ for i in range(len(dict)):
112
+ fname = dict[i]['path']
113
+
114
+ is_pytorch = re.match("pytorch_model.*\.bin", fname)
115
+ is_safetensors = re.match("model.*\.safetensors", fname)
116
+ is_tokenizer = re.match("tokenizer.*\.model", fname)
117
+ is_text = re.match(".*\.(txt|json)", fname) or is_tokenizer
118
+
119
+ if any((is_pytorch, is_safetensors, is_text, is_tokenizer)):
120
+ if is_text:
121
+ links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
122
+ classifications.append('text')
123
+ continue
124
+ if not args.text_only:
125
+ links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
126
+ if is_safetensors:
127
+ has_safetensors = True
128
+ classifications.append('safetensors')
129
+ elif is_pytorch:
130
+ has_pytorch = True
131
+ classifications.append('pytorch')
132
+
133
+ cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
134
+ cursor = base64.b64encode(cursor)
135
+ cursor = cursor.replace(b'=', b'%3D')
136
+
137
+ # If both pytorch and safetensors are available, download safetensors only
138
+ if has_pytorch and has_safetensors:
139
+ for i in range(len(classifications)-1, -1, -1):
140
+ if classifications[i] == 'pytorch':
141
+ links.pop(i)
142
+
143
+ return links
144
+
145
+ if __name__ == '__main__':
146
+ model = args.MODEL
147
+ branch = args.branch
148
+ if model is None:
149
+ model, branch = select_model_from_default_options()
150
+ else:
151
+ if model[-1] == '/':
152
+ model = model[:-1]
153
+ branch = args.branch
154
+ if branch is None:
155
+ branch = "main"
156
+ else:
157
+ try:
158
+ branch = sanitize_branch_name(branch)
159
+ except ValueError as err_branch:
160
+ print(f"Error: {err_branch}")
161
+ sys.exit()
162
+ if branch != 'main':
163
+ output_folder = Path("models") / (model.split('/')[-1] + f'_{branch}')
164
+ else:
165
+ output_folder = Path("models") / model.split('/')[-1]
166
+ if not output_folder.exists():
167
+ output_folder.mkdir()
168
+
169
+ links = get_download_links_from_huggingface(model, branch)
170
+
171
+ # Downloading the files
172
+ print(f"Downloading the model to {output_folder}")
173
+ pool = multiprocessing.Pool(processes=args.threads)
174
+ results = pool.map(get_file, [[links[i], output_folder, i+1, len(links)] for i in range(len(links))])
175
+ pool.close()
176
+ pool.join()
text-generation-webui/extensions/character_bias/script.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ params = {
4
+ "activate": True,
5
+ "bias string": " *I am so happy*",
6
+ }
7
+
8
+ def input_modifier(string):
9
+ """
10
+ This function is applied to your text inputs before
11
+ they are fed into the model.
12
+ """
13
+
14
+ return string
15
+
16
+ def output_modifier(string):
17
+ """
18
+ This function is applied to the model outputs.
19
+ """
20
+
21
+ return string
22
+
23
+ def bot_prefix_modifier(string):
24
+ """
25
+ This function is only applied in chat mode. It modifies
26
+ the prefix text for the Bot and can be used to bias its
27
+ behavior.
28
+ """
29
+
30
+ if params['activate'] == True:
31
+ return f'{string} {params["bias string"].strip()} '
32
+ else:
33
+ return string
34
+
35
+ def ui():
36
+ # Gradio elements
37
+ activate = gr.Checkbox(value=params['activate'], label='Activate character bias')
38
+ string = gr.Textbox(value=params["bias string"], label='Character bias')
39
+
40
+ # Event functions to update the parameters in the backend
41
+ string.change(lambda x: params.update({"bias string": x}), string, None)
42
+ activate.change(lambda x: params.update({"activate": x}), activate, None)
text-generation-webui/extensions/elevenlabs_tts/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ elevenlabslib
2
+ soundfile
3
+ sounddevice
text-generation-webui/extensions/elevenlabs_tts/script.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import gradio as gr
4
+ from elevenlabslib import *
5
+ from elevenlabslib.helpers import *
6
+
7
+ params = {
8
+ 'activate': True,
9
+ 'api_key': '12345',
10
+ 'selected_voice': 'None',
11
+ }
12
+
13
+ initial_voice = ['None']
14
+ wav_idx = 0
15
+ user = ElevenLabsUser(params['api_key'])
16
+ user_info = None
17
+
18
+
19
+ # Check if the API is valid and refresh the UI accordingly.
20
+ def check_valid_api():
21
+
22
+ global user, user_info, params
23
+
24
+ user = ElevenLabsUser(params['api_key'])
25
+ user_info = user._get_subscription_data()
26
+ print('checking api')
27
+ if params['activate'] == False:
28
+ return gr.update(value='Disconnected')
29
+ elif user_info is None:
30
+ print('Incorrect API Key')
31
+ return gr.update(value='Disconnected')
32
+ else:
33
+ print('Got an API Key!')
34
+ return gr.update(value='Connected')
35
+
36
+ # Once the API is verified, get the available voices and update the dropdown list
37
+ def refresh_voices():
38
+
39
+ global user, user_info
40
+
41
+ your_voices = [None]
42
+ if user_info is not None:
43
+ for voice in user.get_available_voices():
44
+ your_voices.append(voice.initialName)
45
+ return gr.Dropdown.update(choices=your_voices)
46
+ else:
47
+ return
48
+
49
+ def remove_surrounded_chars(string):
50
+ new_string = ""
51
+ in_star = False
52
+ for char in string:
53
+ if char == '*':
54
+ in_star = not in_star
55
+ elif not in_star:
56
+ new_string += char
57
+ return new_string
58
+
59
+ def input_modifier(string):
60
+ """
61
+ This function is applied to your text inputs before
62
+ they are fed into the model.
63
+ """
64
+
65
+ return string
66
+
67
+ def output_modifier(string):
68
+ """
69
+ This function is applied to the model outputs.
70
+ """
71
+
72
+ global params, wav_idx, user, user_info
73
+
74
+ if params['activate'] == False:
75
+ return string
76
+ elif user_info == None:
77
+ return string
78
+
79
+ string = remove_surrounded_chars(string)
80
+ string = string.replace('"', '')
81
+ string = string.replace('“', '')
82
+ string = string.replace('\n', ' ')
83
+ string = string.strip()
84
+
85
+ if string == '':
86
+ string = 'empty reply, try regenerating'
87
+
88
+ output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.wav'.format(wav_idx))
89
+ voice = user.get_voices_by_name(params['selected_voice'])[0]
90
+ audio_data = voice.generate_audio_bytes(string)
91
+ save_bytes_to_path(Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.wav'), audio_data)
92
+
93
+ string = f'<audio src="file/{output_file.as_posix()}" controls></audio>'
94
+ wav_idx += 1
95
+ return string
96
+
97
+ def ui():
98
+
99
+ # Gradio elements
100
+ with gr.Row():
101
+ activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
102
+ connection_status = gr.Textbox(value='Disconnected', label='Connection Status')
103
+ voice = gr.Dropdown(value=params['selected_voice'], choices=initial_voice, label='TTS Voice')
104
+ with gr.Row():
105
+ api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')
106
+ connect = gr.Button(value='Connect')
107
+
108
+ # Event functions to update the parameters in the backend
109
+ activate.change(lambda x: params.update({'activate': x}), activate, None)
110
+ voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
111
+ api_key.change(lambda x: params.update({'api_key': x}), api_key, None)
112
+ connect.click(check_valid_api, [], connection_status)
113
+ connect.click(refresh_voices, [], voice)
text-generation-webui/extensions/gallery/script.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import gradio as gr
4
+
5
+ from modules.html_generator import get_image_cache
6
+
7
+
8
+ def generate_html():
9
+ css = """
10
+ .character-gallery {
11
+ margin: 1rem 0;
12
+ display: grid;
13
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
14
+ grid-column-gap: 0.4rem;
15
+ grid-row-gap: 1.2rem;
16
+ }
17
+
18
+ .character-container {
19
+ cursor: pointer;
20
+ text-align: center;
21
+ position: relative;
22
+ opacity: 0.85;
23
+ }
24
+
25
+ .character-container:hover {
26
+ opacity: 1;
27
+ }
28
+
29
+ .character-container .placeholder, .character-container img {
30
+ width: 150px;
31
+ height: 200px;
32
+ background-color: gray;
33
+ object-fit: cover;
34
+ margin: 0 auto;
35
+ border-radius: 1rem;
36
+ border: 3px solid white;
37
+ box-shadow: 3px 3px 6px 0px rgb(0 0 0 / 50%);
38
+ }
39
+
40
+ .character-name {
41
+ margin-top: 0.3rem;
42
+ display: block;
43
+ font-size: 1.2rem;
44
+ font-weight: 600;
45
+ overflow-wrap: anywhere;
46
+ }
47
+ """
48
+
49
+ container_html = f'<style>{css}</style><div class="character-gallery">'
50
+
51
+ # Iterate through files in image folder
52
+ for file in sorted(Path("characters").glob("*")):
53
+ if file.name.endswith(".json"):
54
+ character = file.name.replace(".json", "")
55
+ container_html += f'<div class="character-container" onclick=\'document.getElementById("character-menu").children[1].children[1].value = "{character}"; document.getElementById("character-menu").children[1].children[1].dispatchEvent(new Event("change"));\'>'
56
+ image_html = "<div class='placeholder'></div>"
57
+
58
+ for i in [
59
+ f"characters/{character}.png",
60
+ f"characters/{character}.jpg",
61
+ f"characters/{character}.jpeg",
62
+ ]:
63
+
64
+ path = Path(i)
65
+ if path.exists():
66
+ try:
67
+ image_html = f'<img src="file/{get_image_cache(path)}">'
68
+ break
69
+ except:
70
+ continue
71
+
72
+ container_html += f'{image_html} <span class="character-name">{character}</span>'
73
+ container_html += "</div>"
74
+
75
+ container_html += "</div>"
76
+ return container_html
77
+
78
+ def ui():
79
+ with gr.Accordion("Character gallery"):
80
+ update = gr.Button("Refresh")
81
+ gallery = gr.HTML(value=generate_html())
82
+ update.click(generate_html, [], gallery)
text-generation-webui/extensions/google_translate/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ deep-translator==1.9.2
text-generation-webui/extensions/google_translate/script.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from deep_translator import GoogleTranslator
3
+
4
+ params = {
5
+ "language string": "ja",
6
+ }
7
+
8
+ language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}
9
+
10
+ def input_modifier(string):
11
+ """
12
+ This function is applied to your text inputs before
13
+ they are fed into the model.
14
+ """
15
+
16
+ return GoogleTranslator(source=params['language string'], target='en').translate(string)
17
+
18
+ def output_modifier(string):
19
+ """
20
+ This function is applied to the model outputs.
21
+ """
22
+
23
+ return GoogleTranslator(source='en', target=params['language string']).translate(string)
24
+
25
+ def bot_prefix_modifier(string):
26
+ """
27
+ This function is only applied in chat mode. It modifies
28
+ the prefix text for the Bot and can be used to bias its
29
+ behavior.
30
+ """
31
+
32
+ return string
33
+
34
+ def ui():
35
+ # Finding the language name from the language code to use as the default value
36
+ language_name = list(language_codes.keys())[list(language_codes.values()).index(params['language string'])]
37
+
38
+ # Gradio elements
39
+ language = gr.Dropdown(value=language_name, choices=[k for k in language_codes], label='Language')
40
+
41
+ # Event functions to update the parameters in the backend
42
+ language.change(lambda x: params.update({"language string": language_codes[x]}), language, None)
text-generation-webui/extensions/llama_prompts/script.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import modules.shared as shared
3
+ import pandas as pd
4
+
5
+ df = pd.read_csv("https://raw.githubusercontent.com/devbrones/llama-prompts/main/prompts/prompts.csv")
6
+
7
+ def get_prompt_by_name(name):
8
+ if name == 'None':
9
+ return ''
10
+ else:
11
+ return df[df['Prompt name'] == name].iloc[0]['Prompt'].replace('\\n', '\n')
12
+
13
+ def ui():
14
+ if not shared.args.chat or shared.args.cai_chat:
15
+ choices = ['None'] + list(df['Prompt name'])
16
+
17
+ prompts_menu = gr.Dropdown(value=choices[0], choices=choices, label='Prompt')
18
+ prompts_menu.change(get_prompt_by_name, prompts_menu, shared.gradio['textbox'])
text-generation-webui/extensions/send_pictures/script.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from io import BytesIO
3
+
4
+ import gradio as gr
5
+ import torch
6
+ from transformers import BlipForConditionalGeneration, BlipProcessor
7
+
8
+ import modules.chat as chat
9
+ import modules.shared as shared
10
+
11
+ # If 'state' is True, will hijack the next chat generation with
12
+ # custom input text given by 'value' in the format [text, visible_text]
13
+ input_hijack = {
14
+ 'state': False,
15
+ 'value': ["", ""]
16
+ }
17
+
18
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
19
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float32).to("cpu")
20
+
21
+ def caption_image(raw_image):
22
+ inputs = processor(raw_image.convert('RGB'), return_tensors="pt").to("cpu", torch.float32)
23
+ out = model.generate(**inputs, max_new_tokens=100)
24
+ return processor.decode(out[0], skip_special_tokens=True)
25
+
26
+ def generate_chat_picture(picture, name1, name2):
27
+ text = f'*{name1} sends {name2} a picture that contains the following: "{caption_image(picture)}"*'
28
+ buffer = BytesIO()
29
+ picture.save(buffer, format="JPEG")
30
+ img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
31
+ visible_text = f'<img src="data:image/jpeg;base64,{img_str}">'
32
+ return text, visible_text
33
+
34
+ def ui():
35
+ picture_select = gr.Image(label='Send a picture', type='pil')
36
+
37
+ function_call = 'chat.cai_chatbot_wrapper' if shared.args.cai_chat else 'chat.chatbot_wrapper'
38
+
39
+ # Prepare the hijack with custom inputs
40
+ picture_select.upload(lambda picture, name1, name2: input_hijack.update({"state": True, "value": generate_chat_picture(picture, name1, name2)}), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None)
41
+
42
+ # Call the generation function
43
+ picture_select.upload(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)
44
+
45
+ # Clear the picture from the upload field
46
+ picture_select.upload(lambda : None, [], [picture_select], show_progress=False)
text-generation-webui/extensions/silero_tts/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ipython
2
+ omegaconf
3
+ pydub
4
+ PyYAML
5
+ torch
6
+ torchaudio
text-generation-webui/extensions/silero_tts/script.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from pathlib import Path
3
+
4
+ import gradio as gr
5
+ import torch
6
+
7
+ import modules.chat as chat
8
+ import modules.shared as shared
9
+
10
+ torch._C._jit_set_profiling_mode(False)
11
+
12
+ params = {
13
+ 'activate': True,
14
+ 'speaker': 'en_56',
15
+ 'language': 'en',
16
+ 'model_id': 'v3_en',
17
+ 'sample_rate': 48000,
18
+ 'device': 'cpu',
19
+ 'show_text': False,
20
+ 'autoplay': True,
21
+ 'voice_pitch': 'medium',
22
+ 'voice_speed': 'medium',
23
+ }
24
+
25
+ current_params = params.copy()
26
+ voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
27
+ voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
28
+ voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
29
+
30
+ # Used for making text xml compatible, needed for voice pitch and speed control
31
+ table = str.maketrans({
32
+ "<": "&lt;",
33
+ ">": "&gt;",
34
+ "&": "&amp;",
35
+ "'": "&apos;",
36
+ '"': "&quot;",
37
+ })
38
+
39
+ def xmlesc(txt):
40
+ return txt.translate(table)
41
+
42
+ def load_model():
43
+ model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
44
+ model.to(params['device'])
45
+ return model
46
+ model = load_model()
47
+
48
+ def remove_surrounded_chars(string):
49
+ new_string = ""
50
+ in_star = False
51
+ for char in string:
52
+ if char == '*':
53
+ in_star = not in_star
54
+ elif not in_star:
55
+ new_string += char
56
+ return new_string
57
+
58
+ def remove_tts_from_history(name1, name2):
59
+ for i, entry in enumerate(shared.history['internal']):
60
+ shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]
61
+ return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
62
+
63
+ def toggle_text_in_history(name1, name2):
64
+ for i, entry in enumerate(shared.history['visible']):
65
+ visible_reply = entry[1]
66
+ if visible_reply.startswith('<audio'):
67
+ if params['show_text']:
68
+ reply = shared.history['internal'][i][1]
69
+ shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"]
70
+ else:
71
+ shared.history['visible'][i] = [shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"]
72
+ return chat.generate_chat_output(shared.history['visible'], name1, name2, shared.character)
73
+
74
+ def input_modifier(string):
75
+ """
76
+ This function is applied to your text inputs before
77
+ they are fed into the model.
78
+ """
79
+
80
+ # Remove autoplay from the last reply
81
+ if (shared.args.chat or shared.args.cai_chat) and len(shared.history['internal']) > 0:
82
+ shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')]
83
+
84
+ shared.processing_message = "*Is recording a voice message...*"
85
+ return string
86
+
87
+ def output_modifier(string):
88
+ """
89
+ This function is applied to the model outputs.
90
+ """
91
+
92
+ global model, current_params
93
+
94
+ for i in params:
95
+ if params[i] != current_params[i]:
96
+ model = load_model()
97
+ current_params = params.copy()
98
+ break
99
+
100
+ if params['activate'] == False:
101
+ return string
102
+
103
+ original_string = string
104
+ string = remove_surrounded_chars(string)
105
+ string = string.replace('"', '')
106
+ string = string.replace('“', '')
107
+ string = string.replace('\n', ' ')
108
+ string = string.strip()
109
+
110
+ if string == '':
111
+ string = '*Empty reply, try regenerating*'
112
+ else:
113
+ output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{int(time.time())}.wav')
114
+ prosody = '<prosody rate="{}" pitch="{}">'.format(params['voice_speed'], params['voice_pitch'])
115
+ silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
116
+ model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
117
+
118
+ autoplay = 'autoplay' if params['autoplay'] else ''
119
+ string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
120
+ if params['show_text']:
121
+ string += f'\n\n{original_string}'
122
+
123
+ shared.processing_message = "*Is typing...*"
124
+ return string
125
+
126
+ def bot_prefix_modifier(string):
127
+ """
128
+ This function is only applied in chat mode. It modifies
129
+ the prefix text for the Bot and can be used to bias its
130
+ behavior.
131
+ """
132
+
133
+ return string
134
+
135
+ def ui():
136
+ # Gradio elements
137
+ with gr.Accordion("Silero TTS"):
138
+ with gr.Row():
139
+ activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
140
+ autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
141
+ show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
142
+ voice = gr.Dropdown(value=params['speaker'], choices=voices_by_gender, label='TTS voice')
143
+ with gr.Row():
144
+ v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
145
+ v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
146
+ with gr.Row():
147
+ convert = gr.Button('Permanently replace audios with the message texts')
148
+ convert_cancel = gr.Button('Cancel', visible=False)
149
+ convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
150
+
151
+ # Convert history with confirmation
152
+ convert_arr = [convert_confirm, convert, convert_cancel]
153
+ convert.click(lambda :[gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
154
+ convert_confirm.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
155
+ convert_confirm.click(remove_tts_from_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
156
+ convert_confirm.click(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
157
+ convert_cancel.click(lambda :[gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
158
+
159
+ # Toggle message text in history
160
+ show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
161
+ show_text.change(toggle_text_in_history, [shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'])
162
+ show_text.change(lambda : chat.save_history(timestamp=False), [], [], show_progress=False)
163
+
164
+ # Event functions to update the parameters in the backend
165
+ activate.change(lambda x: params.update({"activate": x}), activate, None)
166
+ autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
167
+ voice.change(lambda x: params.update({"speaker": x}), voice, None)
168
+ v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
169
+ v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)
text-generation-webui/models/place-your-models-here.txt ADDED
File without changes
text-generation-webui/modules/GPTQ_loader.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ import accelerate
5
+ import torch
6
+
7
+ import modules.shared as shared
8
+
9
+ sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
10
+ import llama
11
+ import opt
12
+
13
+
14
+ def load_quantized(model_name):
15
+ if not shared.args.gptq_model_type:
16
+ # Try to determine model type from model name
17
+ model_type = model_name.split('-')[0].lower()
18
+ if model_type not in ('llama', 'opt'):
19
+ print("Can't determine model type from model name. Please specify it manually using --gptq-model-type "
20
+ "argument")
21
+ exit()
22
+ else:
23
+ model_type = shared.args.gptq_model_type.lower()
24
+
25
+ if model_type == 'llama':
26
+ load_quant = llama.load_quant
27
+ elif model_type == 'opt':
28
+ load_quant = opt.load_quant
29
+ else:
30
+ print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported")
31
+ exit()
32
+
33
+ path_to_model = Path(f'models/{model_name}')
34
+ if path_to_model.name.lower().startswith('llama-7b'):
35
+ pt_model = f'llama-7b-{shared.args.gptq_bits}bit.pt'
36
+ elif path_to_model.name.lower().startswith('llama-13b'):
37
+ pt_model = f'llama-13b-{shared.args.gptq_bits}bit.pt'
38
+ elif path_to_model.name.lower().startswith('llama-30b'):
39
+ pt_model = f'llama-30b-{shared.args.gptq_bits}bit.pt'
40
+ elif path_to_model.name.lower().startswith('llama-65b'):
41
+ pt_model = f'llama-65b-{shared.args.gptq_bits}bit.pt'
42
+ else:
43
+ pt_model = f'{model_name}-{shared.args.gptq_bits}bit.pt'
44
+
45
+ # Try to find the .pt both in models/ and in the subfolder
46
+ pt_path = None
47
+ for path in [Path(p) for p in [f"models/{pt_model}", f"{path_to_model}/{pt_model}"]]:
48
+ if path.exists():
49
+ pt_path = path
50
+
51
+ if not pt_path:
52
+ print(f"Could not find {pt_model}, exiting...")
53
+ exit()
54
+
55
+ model = load_quant(str(path_to_model), str(pt_path), shared.args.gptq_bits)
56
+
57
+ # Multiple GPUs or GPU+CPU
58
+ if shared.args.gpu_memory:
59
+ max_memory = {}
60
+ for i in range(len(shared.args.gpu_memory)):
61
+ max_memory[i] = f"{shared.args.gpu_memory[i]}GiB"
62
+ max_memory['cpu'] = f"{shared.args.cpu_memory or '99'}GiB"
63
+
64
+ device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=["LLaMADecoderLayer"])
65
+ model = accelerate.dispatch_model(model, device_map=device_map)
66
+
67
+ # Single GPU
68
+ else:
69
+ model = model.to(torch.device('cuda:0'))
70
+
71
+ return model
text-generation-webui/modules/RWKV.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ from tokenizers import Tokenizer
6
+
7
+ import modules.shared as shared
8
+ from modules.callbacks import Iteratorize
9
+
10
+ np.set_printoptions(precision=4, suppress=True, linewidth=200)
11
+
12
+ os.environ['RWKV_JIT_ON'] = '1'
13
+ os.environ["RWKV_CUDA_ON"] = '1' if shared.args.rwkv_cuda_on else '0' # use CUDA kernel for seq mode (much faster)
14
+
15
+ from rwkv.model import RWKV
16
+ from rwkv.utils import PIPELINE, PIPELINE_ARGS
17
+
18
+
19
+ class RWKVModel:
20
+ def __init__(self):
21
+ pass
22
+
23
+ @classmethod
24
+ def from_pretrained(self, path, dtype="fp16", device="cuda"):
25
+ tokenizer_path = Path(f"{path.parent}/20B_tokenizer.json")
26
+
27
+ if shared.args.rwkv_strategy is None:
28
+ model = RWKV(model=str(path), strategy=f'{device} {dtype}')
29
+ else:
30
+ model = RWKV(model=str(path), strategy=shared.args.rwkv_strategy)
31
+ pipeline = PIPELINE(model, str(tokenizer_path))
32
+
33
+ result = self()
34
+ result.pipeline = pipeline
35
+ return result
36
+
37
+ def generate(self, context="", token_count=20, temperature=1, top_p=1, top_k=50, alpha_frequency=0.1, alpha_presence=0.1, token_ban=[0], token_stop=[], callback=None):
38
+ args = PIPELINE_ARGS(
39
+ temperature = temperature,
40
+ top_p = top_p,
41
+ top_k = top_k,
42
+ alpha_frequency = alpha_frequency, # Frequency Penalty (as in GPT-3)
43
+ alpha_presence = alpha_presence, # Presence Penalty (as in GPT-3)
44
+ token_ban = token_ban, # ban the generation of some tokens
45
+ token_stop = token_stop
46
+ )
47
+
48
+ return context+self.pipeline.generate(context, token_count=token_count, args=args, callback=callback)
49
+
50
+ def generate_with_streaming(self, **kwargs):
51
+ with Iteratorize(self.generate, kwargs, callback=None) as generator:
52
+ reply = kwargs['context']
53
+ for token in generator:
54
+ reply += token
55
+ yield reply
56
+
57
+ class RWKVTokenizer:
58
+ def __init__(self):
59
+ pass
60
+
61
+ @classmethod
62
+ def from_pretrained(self, path):
63
+ tokenizer_path = path / "20B_tokenizer.json"
64
+ tokenizer = Tokenizer.from_file(str(tokenizer_path))
65
+
66
+ result = self()
67
+ result.tokenizer = tokenizer
68
+ return result
69
+
70
+ def encode(self, prompt):
71
+ return self.tokenizer.encode(prompt).ids
72
+
73
+ def decode(self, ids):
74
+ return self.tokenizer.decode(ids)
text-generation-webui/modules/callbacks.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ from queue import Queue
3
+ from threading import Thread
4
+
5
+ import torch
6
+ import transformers
7
+
8
+ import modules.shared as shared
9
+
10
+ # Copied from https://github.com/PygmalionAI/gradio-ui/
11
+ class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
12
+
13
+ def __init__(self, sentinel_token_ids: torch.LongTensor,
14
+ starting_idx: int):
15
+ transformers.StoppingCriteria.__init__(self)
16
+ self.sentinel_token_ids = sentinel_token_ids
17
+ self.starting_idx = starting_idx
18
+
19
+ def __call__(self, input_ids: torch.LongTensor,
20
+ _scores: torch.FloatTensor) -> bool:
21
+ for sample in input_ids:
22
+ trimmed_sample = sample[self.starting_idx:]
23
+ # Can't unfold, output is still too tiny. Skip.
24
+ if trimmed_sample.shape[-1] < self.sentinel_token_ids.shape[-1]:
25
+ continue
26
+
27
+ for window in trimmed_sample.unfold(
28
+ 0, self.sentinel_token_ids.shape[-1], 1):
29
+ if torch.all(torch.eq(self.sentinel_token_ids, window)):
30
+ return True
31
+ return False
32
+
33
+ class Stream(transformers.StoppingCriteria):
34
+ def __init__(self, callback_func=None):
35
+ self.callback_func = callback_func
36
+
37
+ def __call__(self, input_ids, scores) -> bool:
38
+ if self.callback_func is not None:
39
+ self.callback_func(input_ids[0])
40
+ return False
41
+
42
+ class Iteratorize:
43
+
44
+ """
45
+ Transforms a function that takes a callback
46
+ into a lazy iterator (generator).
47
+ """
48
+
49
+ def __init__(self, func, kwargs={}, callback=None):
50
+ self.mfunc=func
51
+ self.c_callback=callback
52
+ self.q = Queue()
53
+ self.sentinel = object()
54
+ self.kwargs = kwargs
55
+ self.stop_now = False
56
+
57
+ def _callback(val):
58
+ if self.stop_now:
59
+ raise ValueError
60
+ self.q.put(val)
61
+
62
+ def gentask():
63
+ try:
64
+ ret = self.mfunc(callback=_callback, **self.kwargs)
65
+ except ValueError:
66
+ pass
67
+ clear_torch_cache()
68
+ self.q.put(self.sentinel)
69
+ if self.c_callback:
70
+ self.c_callback(ret)
71
+
72
+ self.thread = Thread(target=gentask)
73
+ self.thread.start()
74
+
75
+ def __iter__(self):
76
+ return self
77
+
78
+ def __next__(self):
79
+ obj = self.q.get(True,None)
80
+ if obj is self.sentinel:
81
+ raise StopIteration
82
+ else:
83
+ return obj
84
+
85
+ def __del__(self):
86
+ clear_torch_cache()
87
+
88
+ def __enter__(self):
89
+ return self
90
+
91
+ def __exit__(self, exc_type, exc_val, exc_tb):
92
+ self.stop_now = True
93
+ clear_torch_cache()
94
+
95
+ def clear_torch_cache():
96
+ gc.collect()
97
+ if not shared.args.cpu:
98
+ torch.cuda.empty_cache()
text-generation-webui/modules/chat.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import copy
3
+ import io
4
+ import json
5
+ import re
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+
9
+ from PIL import Image
10
+
11
+ import modules.extensions as extensions_module
12
+ import modules.shared as shared
13
+ from modules.extensions import apply_extensions
14
+ from modules.html_generator import generate_chat_html
15
+ from modules.text_generation import encode, generate_reply, get_max_prompt_length
16
+
17
+
18
+ # This gets the new line characters right.
19
+ def clean_chat_message(text):
20
+ text = text.replace('\n', '\n\n')
21
+ text = re.sub(r"\n{3,}", "\n\n", text)
22
+ text = text.strip()
23
+ return text
24
+
25
+ def generate_chat_output(history, name1, name2, character):
26
+ if shared.args.cai_chat:
27
+ return generate_chat_html(history, name1, name2, character)
28
+ else:
29
+ return history
30
+
31
+ def generate_chat_prompt(user_input, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=False):
32
+ user_input = clean_chat_message(user_input)
33
+ rows = [f"{context.strip()}\n"]
34
+
35
+ if shared.soft_prompt:
36
+ chat_prompt_size -= shared.soft_prompt_tensor.shape[1]
37
+ max_length = min(get_max_prompt_length(max_new_tokens), chat_prompt_size)
38
+
39
+ i = len(shared.history['internal'])-1
40
+ while i >= 0 and len(encode(''.join(rows), max_new_tokens)[0]) < max_length:
41
+ rows.insert(1, f"{name2}: {shared.history['internal'][i][1].strip()}\n")
42
+ if not (shared.history['internal'][i][0] == '<|BEGIN-VISIBLE-CHAT|>'):
43
+ rows.insert(1, f"{name1}: {shared.history['internal'][i][0].strip()}\n")
44
+ i -= 1
45
+
46
+ if not impersonate:
47
+ rows.append(f"{name1}: {user_input}\n")
48
+ rows.append(apply_extensions(f"{name2}:", "bot_prefix"))
49
+ limit = 3
50
+ else:
51
+ rows.append(f"{name1}:")
52
+ limit = 2
53
+
54
+ while len(rows) > limit and len(encode(''.join(rows), max_new_tokens)[0]) >= max_length:
55
+ rows.pop(1)
56
+
57
+ prompt = ''.join(rows)
58
+ return prompt
59
+
60
+ def extract_message_from_reply(question, reply, name1, name2, check, impersonate=False):
61
+ next_character_found = False
62
+
63
+ asker = name1 if not impersonate else name2
64
+ replier = name2 if not impersonate else name1
65
+
66
+ previous_idx = [m.start() for m in re.finditer(f"(^|\n){re.escape(replier)}:", question)]
67
+ idx = [m.start() for m in re.finditer(f"(^|\n){re.escape(replier)}:", reply)]
68
+ idx = idx[max(len(previous_idx)-1, 0)]
69
+
70
+ if not impersonate:
71
+ reply = reply[idx + 1 + len(apply_extensions(f"{replier}:", "bot_prefix")):]
72
+ else:
73
+ reply = reply[idx + 1 + len(f"{replier}:"):]
74
+
75
+ if check:
76
+ lines = reply.split('\n')
77
+ reply = lines[0].strip()
78
+ if len(lines) > 1:
79
+ next_character_found = True
80
+ else:
81
+ idx = reply.find(f"\n{asker}:")
82
+ if idx != -1:
83
+ reply = reply[:idx]
84
+ next_character_found = True
85
+ reply = clean_chat_message(reply)
86
+
87
+ # If something like "\nYo" is generated just before "\nYou:"
88
+ # is completed, trim it
89
+ next_turn = f"\n{asker}:"
90
+ for j in range(len(next_turn)-1, 0, -1):
91
+ if reply[-j:] == next_turn[:j]:
92
+ reply = reply[:-j]
93
+ break
94
+
95
+ return reply, next_character_found
96
+
97
+ def stop_everything_event():
98
+ shared.stop_everything = True
99
+
100
+ def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False):
101
+ shared.stop_everything = False
102
+ just_started = True
103
+ eos_token = '\n' if check else None
104
+ name1_original = name1
105
+ if 'pygmalion' in shared.model_name.lower():
106
+ name1 = "You"
107
+
108
+ # Check if any extension wants to hijack this function call
109
+ visible_text = None
110
+ custom_generate_chat_prompt = None
111
+ for extension, _ in extensions_module.iterator():
112
+ if hasattr(extension, 'input_hijack') and extension.input_hijack['state'] == True:
113
+ extension.input_hijack['state'] = False
114
+ text, visible_text = extension.input_hijack['value']
115
+ if custom_generate_chat_prompt is None and hasattr(extension, 'custom_generate_chat_prompt'):
116
+ custom_generate_chat_prompt = extension.custom_generate_chat_prompt
117
+
118
+ if visible_text is None:
119
+ visible_text = text
120
+ if shared.args.chat:
121
+ visible_text = visible_text.replace('\n', '<br>')
122
+ text = apply_extensions(text, "input")
123
+
124
+ if custom_generate_chat_prompt is None:
125
+ prompt = generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size)
126
+ else:
127
+ prompt = custom_generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size)
128
+
129
+ # Yield *Is typing...*
130
+ if not regenerate:
131
+ yield shared.history['visible']+[[visible_text, shared.processing_message]]
132
+
133
+ # Generate
134
+ reply = ''
135
+ for i in range(chat_generation_attempts):
136
+ for reply in generate_reply(f"{prompt}{' ' if len(reply) > 0 else ''}{reply}", max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name1}:"):
137
+
138
+ # Extracting the reply
139
+ reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check)
140
+ visible_reply = re.sub("(<USER>|<user>|{{user}})", name1_original, reply)
141
+ visible_reply = apply_extensions(visible_reply, "output")
142
+ if shared.args.chat:
143
+ visible_reply = visible_reply.replace('\n', '<br>')
144
+
145
+ # We need this global variable to handle the Stop event,
146
+ # otherwise gradio gets confused
147
+ if shared.stop_everything:
148
+ return shared.history['visible']
149
+ if just_started:
150
+ just_started = False
151
+ shared.history['internal'].append(['', ''])
152
+ shared.history['visible'].append(['', ''])
153
+
154
+ shared.history['internal'][-1] = [text, reply]
155
+ shared.history['visible'][-1] = [visible_text, visible_reply]
156
+ if not shared.args.no_stream:
157
+ yield shared.history['visible']
158
+ if next_character_found:
159
+ break
160
+
161
+ yield shared.history['visible']
162
+
163
+ def impersonate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
164
+ eos_token = '\n' if check else None
165
+
166
+ if 'pygmalion' in shared.model_name.lower():
167
+ name1 = "You"
168
+
169
+ prompt = generate_chat_prompt(text, max_new_tokens, name1, name2, context, chat_prompt_size, impersonate=True)
170
+
171
+ reply = ''
172
+ # Yield *Is typing...*
173
+ yield shared.processing_message
174
+ for i in range(chat_generation_attempts):
175
+ for reply in generate_reply(prompt+reply, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=eos_token, stopping_string=f"\n{name2}:"):
176
+ reply, next_character_found = extract_message_from_reply(prompt, reply, name1, name2, check, impersonate=True)
177
+ yield reply
178
+ if next_character_found:
179
+ break
180
+ yield reply
181
+
182
+ def cai_chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
183
+ for _history in chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts):
184
+ yield generate_chat_html(_history, name1, name2, shared.character)
185
+
186
+ def regenerate_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1):
187
+ if (shared.character != 'None' and len(shared.history['visible']) == 1) or len(shared.history['internal']) == 0:
188
+ yield generate_chat_output(shared.history['visible'], name1, name2, shared.character)
189
+ else:
190
+ last_visible = shared.history['visible'].pop()
191
+ last_internal = shared.history['internal'].pop()
192
+ # Yield '*Is typing...*'
193
+ yield generate_chat_output(shared.history['visible']+[[last_visible[0], shared.processing_message]], name1, name2, shared.character)
194
+ for _history in chatbot_wrapper(last_internal[0], max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, name1, name2, context, check, chat_prompt_size, chat_generation_attempts, regenerate=True):
195
+ if shared.args.cai_chat:
196
+ shared.history['visible'][-1] = [last_visible[0], _history[-1][1]]
197
+ else:
198
+ shared.history['visible'][-1] = (last_visible[0], _history[-1][1])
199
+ yield generate_chat_output(shared.history['visible'], name1, name2, shared.character)
200
+
201
+ def remove_last_message(name1, name2):
202
+ if len(shared.history['visible']) > 0 and not shared.history['internal'][-1][0] == '<|BEGIN-VISIBLE-CHAT|>':
203
+ last = shared.history['visible'].pop()
204
+ shared.history['internal'].pop()
205
+ else:
206
+ last = ['', '']
207
+
208
+ if shared.args.cai_chat:
209
+ return generate_chat_html(shared.history['visible'], name1, name2, shared.character), last[0]
210
+ else:
211
+ return shared.history['visible'], last[0]
212
+
213
+ def send_last_reply_to_input():
214
+ if len(shared.history['internal']) > 0:
215
+ return shared.history['internal'][-1][1]
216
+ else:
217
+ return ''
218
+
219
+ def replace_last_reply(text, name1, name2):
220
+ if len(shared.history['visible']) > 0:
221
+ if shared.args.cai_chat:
222
+ shared.history['visible'][-1][1] = text
223
+ else:
224
+ shared.history['visible'][-1] = (shared.history['visible'][-1][0], text)
225
+ shared.history['internal'][-1][1] = apply_extensions(text, "input")
226
+
227
+ return generate_chat_output(shared.history['visible'], name1, name2, shared.character)
228
+
229
+ def clear_html():
230
+ return generate_chat_html([], "", "", shared.character)
231
+
232
+ def clear_chat_log(name1, name2):
233
+ if shared.character != 'None':
234
+ found = False
235
+ for i in range(len(shared.history['internal'])):
236
+ if '<|BEGIN-VISIBLE-CHAT|>' in shared.history['internal'][i][0]:
237
+ shared.history['visible'] = [['', apply_extensions(shared.history['internal'][i][1], "output")]]
238
+ shared.history['internal'] = [shared.history['internal'][i]]
239
+ found = True
240
+ break
241
+ if not found:
242
+ shared.history['visible'] = []
243
+ shared.history['internal'] = []
244
+ else:
245
+ shared.history['internal'] = []
246
+ shared.history['visible'] = []
247
+
248
+ return generate_chat_output(shared.history['visible'], name1, name2, shared.character)
249
+
250
+ def redraw_html(name1, name2):
251
+ return generate_chat_html(shared.history['visible'], name1, name2, shared.character)
252
+
253
+ def tokenize_dialogue(dialogue, name1, name2):
254
+ _history = []
255
+
256
+ dialogue = re.sub('<START>', '', dialogue)
257
+ dialogue = re.sub('<start>', '', dialogue)
258
+ dialogue = re.sub('(\n|^)[Aa]non:', '\\1You:', dialogue)
259
+ dialogue = re.sub('(\n|^)\[CHARACTER\]:', f'\\g<1>{name2}:', dialogue)
260
+ idx = [m.start() for m in re.finditer(f"(^|\n)({re.escape(name1)}|{re.escape(name2)}):", dialogue)]
261
+ if len(idx) == 0:
262
+ return _history
263
+
264
+ messages = []
265
+ for i in range(len(idx)-1):
266
+ messages.append(dialogue[idx[i]:idx[i+1]].strip())
267
+ messages.append(dialogue[idx[-1]:].strip())
268
+
269
+ entry = ['', '']
270
+ for i in messages:
271
+ if i.startswith(f'{name1}:'):
272
+ entry[0] = i[len(f'{name1}:'):].strip()
273
+ elif i.startswith(f'{name2}:'):
274
+ entry[1] = i[len(f'{name2}:'):].strip()
275
+ if not (len(entry[0]) == 0 and len(entry[1]) == 0):
276
+ _history.append(entry)
277
+ entry = ['', '']
278
+
279
+ print("\033[1;32;1m\nDialogue tokenized to:\033[0;37;0m\n", end='')
280
+ for row in _history:
281
+ for column in row:
282
+ print("\n")
283
+ for line in column.strip().split('\n'):
284
+ print("| "+line+"\n")
285
+ print("|\n")
286
+ print("------------------------------")
287
+
288
+ return _history
289
+
290
+ def save_history(timestamp=True):
291
+ prefix = '' if shared.character == 'None' else f"{shared.character}_"
292
+ if timestamp:
293
+ fname = f"{prefix}{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
294
+ else:
295
+ fname = f"{prefix}persistent.json"
296
+ if not Path('logs').exists():
297
+ Path('logs').mkdir()
298
+ with open(Path(f'logs/{fname}'), 'w', encoding='utf-8') as f:
299
+ f.write(json.dumps({'data': shared.history['internal'], 'data_visible': shared.history['visible']}, indent=2))
300
+ return Path(f'logs/{fname}')
301
+
302
+ def load_history(file, name1, name2):
303
+ file = file.decode('utf-8')
304
+ try:
305
+ j = json.loads(file)
306
+ if 'data' in j:
307
+ shared.history['internal'] = j['data']
308
+ if 'data_visible' in j:
309
+ shared.history['visible'] = j['data_visible']
310
+ else:
311
+ shared.history['visible'] = copy.deepcopy(shared.history['internal'])
312
+ # Compatibility with Pygmalion AI's official web UI
313
+ elif 'chat' in j:
314
+ shared.history['internal'] = [':'.join(x.split(':')[1:]).strip() for x in j['chat']]
315
+ if len(j['chat']) > 0 and j['chat'][0].startswith(f'{name2}:'):
316
+ shared.history['internal'] = [['<|BEGIN-VISIBLE-CHAT|>', shared.history['internal'][0]]] + [[shared.history['internal'][i], shared.history['internal'][i+1]] for i in range(1, len(shared.history['internal'])-1, 2)]
317
+ shared.history['visible'] = copy.deepcopy(shared.history['internal'])
318
+ shared.history['visible'][0][0] = ''
319
+ else:
320
+ shared.history['internal'] = [[shared.history['internal'][i], shared.history['internal'][i+1]] for i in range(0, len(shared.history['internal'])-1, 2)]
321
+ shared.history['visible'] = copy.deepcopy(shared.history['internal'])
322
+ except:
323
+ shared.history['internal'] = tokenize_dialogue(file, name1, name2)
324
+ shared.history['visible'] = copy.deepcopy(shared.history['internal'])
325
+
326
+ def load_default_history(name1, name2):
327
+ if Path('logs/persistent.json').exists():
328
+ load_history(open(Path('logs/persistent.json'), 'rb').read(), name1, name2)
329
+ else:
330
+ shared.history['internal'] = []
331
+ shared.history['visible'] = []
332
+
333
+ def load_character(_character, name1, name2):
334
+ context = ""
335
+ shared.history['internal'] = []
336
+ shared.history['visible'] = []
337
+ if _character != 'None':
338
+ shared.character = _character
339
+ data = json.loads(open(Path(f'characters/{_character}.json'), 'r', encoding='utf-8').read())
340
+ name2 = data['char_name']
341
+ if 'char_persona' in data and data['char_persona'] != '':
342
+ context += f"{data['char_name']}'s Persona: {data['char_persona']}\n"
343
+ if 'world_scenario' in data and data['world_scenario'] != '':
344
+ context += f"Scenario: {data['world_scenario']}\n"
345
+ context = f"{context.strip()}\n<START>\n"
346
+ if 'example_dialogue' in data and data['example_dialogue'] != '':
347
+ data['example_dialogue'] = data['example_dialogue'].replace('{{user}}', name1).replace('{{char}}', name2)
348
+ data['example_dialogue'] = data['example_dialogue'].replace('<USER>', name1).replace('<BOT>', name2)
349
+ context += f"{data['example_dialogue'].strip()}\n"
350
+ if 'char_greeting' in data and len(data['char_greeting'].strip()) > 0:
351
+ shared.history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', data['char_greeting']]]
352
+ shared.history['visible'] += [['', apply_extensions(data['char_greeting'], "output")]]
353
+ else:
354
+ shared.history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', "Hello there!"]]
355
+ shared.history['visible'] += [['', "Hello there!"]]
356
+ else:
357
+ shared.character = None
358
+ context = shared.settings['context_pygmalion']
359
+ name2 = shared.settings['name2_pygmalion']
360
+
361
+ if Path(f'logs/{shared.character}_persistent.json').exists():
362
+ load_history(open(Path(f'logs/{shared.character}_persistent.json'), 'rb').read(), name1, name2)
363
+
364
+ if shared.args.cai_chat:
365
+ return name2, context, generate_chat_html(shared.history['visible'], name1, name2, shared.character)
366
+ else:
367
+ return name2, context, shared.history['visible']
368
+
369
+ def upload_character(json_file, img, tavern=False):
370
+ json_file = json_file if type(json_file) == str else json_file.decode('utf-8')
371
+ data = json.loads(json_file)
372
+ outfile_name = data["char_name"]
373
+ i = 1
374
+ while Path(f'characters/{outfile_name}.json').exists():
375
+ outfile_name = f'{data["char_name"]}_{i:03d}'
376
+ i += 1
377
+ if tavern:
378
+ outfile_name = f'TavernAI-{outfile_name}'
379
+ with open(Path(f'characters/{outfile_name}.json'), 'w', encoding='utf-8') as f:
380
+ f.write(json_file)
381
+ if img is not None:
382
+ img = Image.open(io.BytesIO(img))
383
+ img.save(Path(f'characters/{outfile_name}.png'))
384
+ print(f'New character saved to "characters/{outfile_name}.json".')
385
+ return outfile_name
386
+
387
+ def upload_tavern_character(img, name1, name2):
388
+ _img = Image.open(io.BytesIO(img))
389
+ _img.getexif()
390
+ decoded_string = base64.b64decode(_img.info['chara'])
391
+ _json = json.loads(decoded_string)
392
+ _json = {"char_name": _json['name'], "char_persona": _json['description'], "char_greeting": _json["first_mes"], "example_dialogue": _json['mes_example'], "world_scenario": _json['scenario']}
393
+ return upload_character(json.dumps(_json), img, tavern=True)
394
+
395
+ def upload_your_profile_picture(img):
396
+ img = Image.open(io.BytesIO(img))
397
+ img.save(Path('img_me.png'))
398
+ print('Profile picture saved to "img_me.png"')
text-generation-webui/modules/deepspeed_parameters.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def generate_ds_config(ds_bf16, train_batch_size, nvme_offload_dir):
2
+
3
+ '''
4
+ DeepSpeed configration
5
+ https://huggingface.co/docs/transformers/main_classes/deepspeed
6
+ '''
7
+
8
+ if nvme_offload_dir:
9
+ ds_config = {
10
+ "fp16": {
11
+ "enabled": not ds_bf16,
12
+ },
13
+ "bf16": {
14
+ "enabled": ds_bf16,
15
+ },
16
+ "zero_optimization": {
17
+ "stage": 3,
18
+ "offload_param": {
19
+ "device": "nvme",
20
+ "nvme_path": nvme_offload_dir,
21
+ "pin_memory": True,
22
+ "buffer_count": 5,
23
+ "buffer_size": 1e9,
24
+ "max_in_cpu": 1e9
25
+ },
26
+ "overlap_comm": True,
27
+ "reduce_bucket_size": "auto",
28
+ "contiguous_gradients": True,
29
+ "sub_group_size": 1e8,
30
+ "stage3_prefetch_bucket_size": "auto",
31
+ "stage3_param_persistence_threshold": "auto",
32
+ "stage3_max_live_parameters": "auto",
33
+ "stage3_max_reuse_distance": "auto",
34
+ },
35
+ "aio": {
36
+ "block_size": 262144,
37
+ "queue_depth": 32,
38
+ "thread_count": 1,
39
+ "single_submit": False,
40
+ "overlap_events": True
41
+ },
42
+ "steps_per_print": 2000,
43
+ "train_batch_size": train_batch_size,
44
+ "train_micro_batch_size_per_gpu": 1,
45
+ "wall_clock_breakdown": False
46
+ }
47
+ else:
48
+ ds_config = {
49
+ "fp16": {
50
+ "enabled": not ds_bf16,
51
+ },
52
+ "bf16": {
53
+ "enabled": ds_bf16,
54
+ },
55
+ "zero_optimization": {
56
+ "stage": 3,
57
+ "offload_param": {
58
+ "device": "cpu",
59
+ "pin_memory": True
60
+ },
61
+ "overlap_comm": True,
62
+ "contiguous_gradients": True,
63
+ "reduce_bucket_size": "auto",
64
+ "stage3_prefetch_bucket_size": "auto",
65
+ "stage3_param_persistence_threshold": "auto",
66
+ "stage3_max_live_parameters": "auto",
67
+ "stage3_max_reuse_distance": "auto",
68
+ },
69
+ "steps_per_print": 2000,
70
+ "train_batch_size": train_batch_size,
71
+ "train_micro_batch_size_per_gpu": 1,
72
+ "wall_clock_breakdown": False
73
+ }
74
+
75
+ return ds_config
text-generation-webui/modules/extensions.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import extensions
2
+ import modules.shared as shared
3
+
4
+ state = {}
5
+ available_extensions = []
6
+
7
+ def load_extensions():
8
+ global state
9
+ for i, name in enumerate(shared.args.extensions):
10
+ if name in available_extensions:
11
+ print(f'Loading the extension "{name}"... ', end='')
12
+ exec(f"import extensions.{name}.script")
13
+ state[name] = [True, i]
14
+ print('Ok.')
15
+
16
+ # This iterator returns the extensions in the order specified in the command-line
17
+ def iterator():
18
+ for name in sorted(state, key=lambda x : state[x][1]):
19
+ if state[name][0] == True:
20
+ yield eval(f"extensions.{name}.script"), name
21
+
22
+ # Extension functions that map string -> string
23
+ def apply_extensions(text, typ):
24
+ for extension, _ in iterator():
25
+ if typ == "input" and hasattr(extension, "input_modifier"):
26
+ text = extension.input_modifier(text)
27
+ elif typ == "output" and hasattr(extension, "output_modifier"):
28
+ text = extension.output_modifier(text)
29
+ elif typ == "bot_prefix" and hasattr(extension, "bot_prefix_modifier"):
30
+ text = extension.bot_prefix_modifier(text)
31
+ return text
32
+
33
+ def create_extensions_block():
34
+ # Updating the default values
35
+ for extension, name in iterator():
36
+ if hasattr(extension, 'params'):
37
+ for param in extension.params:
38
+ _id = f"{name}-{param}"
39
+ if _id in shared.settings:
40
+ extension.params[param] = shared.settings[_id]
41
+
42
+ # Creating the extension ui elements
43
+ for extension, name in iterator():
44
+ if hasattr(extension, "ui"):
45
+ extension.ui()
text-generation-webui/modules/html_generator.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ This is a library for formatting GPT-4chan and chat outputs as nice HTML.
4
+
5
+ '''
6
+
7
+ import os
8
+ import re
9
+ from pathlib import Path
10
+
11
+ from PIL import Image
12
+
13
+ # This is to store the paths to the thumbnails of the profile pictures
14
+ image_cache = {}
15
+
16
+ def generate_basic_html(s):
17
+ css = """
18
+ .container {
19
+ max-width: 600px;
20
+ margin-left: auto;
21
+ margin-right: auto;
22
+ background-color: rgb(31, 41, 55);
23
+ padding:3em;
24
+ }
25
+ .container p {
26
+ font-size: 16px !important;
27
+ color: white !important;
28
+ margin-bottom: 22px;
29
+ line-height: 1.4 !important;
30
+ }
31
+ """
32
+ s = '\n'.join([f'<p>{line}</p>' for line in s.split('\n')])
33
+ s = f'<style>{css}</style><div class="container">{s}</div>'
34
+ return s
35
+
36
+ def process_post(post, c):
37
+ t = post.split('\n')
38
+ number = t[0].split(' ')[1]
39
+ if len(t) > 1:
40
+ src = '\n'.join(t[1:])
41
+ else:
42
+ src = ''
43
+ src = re.sub('>', '&gt;', src)
44
+ src = re.sub('(&gt;&gt;[0-9]*)', '<span class="quote">\\1</span>', src)
45
+ src = re.sub('\n', '<br>\n', src)
46
+ src = f'<blockquote class="message">{src}\n'
47
+ src = f'<span class="name">Anonymous </span> <span class="number">No.{number}</span>\n{src}'
48
+ return src
49
+
50
+ def generate_4chan_html(f):
51
+ css = """
52
+
53
+ #parent #container {
54
+ background-color: #eef2ff;
55
+ padding: 17px;
56
+ }
57
+ #parent #container .reply {
58
+ background-color: rgb(214, 218, 240);
59
+ border-bottom-color: rgb(183, 197, 217);
60
+ border-bottom-style: solid;
61
+ border-bottom-width: 1px;
62
+ border-image-outset: 0;
63
+ border-image-repeat: stretch;
64
+ border-image-slice: 100%;
65
+ border-image-source: none;
66
+ border-image-width: 1;
67
+ border-left-color: rgb(0, 0, 0);
68
+ border-left-style: none;
69
+ border-left-width: 0px;
70
+ border-right-color: rgb(183, 197, 217);
71
+ border-right-style: solid;
72
+ border-right-width: 1px;
73
+ border-top-color: rgb(0, 0, 0);
74
+ border-top-style: none;
75
+ border-top-width: 0px;
76
+ color: rgb(0, 0, 0);
77
+ display: table;
78
+ font-family: arial, helvetica, sans-serif;
79
+ font-size: 13.3333px;
80
+ margin-bottom: 4px;
81
+ margin-left: 0px;
82
+ margin-right: 0px;
83
+ margin-top: 4px;
84
+ overflow-x: hidden;
85
+ overflow-y: hidden;
86
+ padding-bottom: 4px;
87
+ padding-left: 2px;
88
+ padding-right: 2px;
89
+ padding-top: 4px;
90
+ }
91
+
92
+ #parent #container .number {
93
+ color: rgb(0, 0, 0);
94
+ font-family: arial, helvetica, sans-serif;
95
+ font-size: 13.3333px;
96
+ width: 342.65px;
97
+ margin-right: 7px;
98
+ }
99
+
100
+ #parent #container .op {
101
+ color: rgb(0, 0, 0);
102
+ font-family: arial, helvetica, sans-serif;
103
+ font-size: 13.3333px;
104
+ margin-bottom: 8px;
105
+ margin-left: 0px;
106
+ margin-right: 0px;
107
+ margin-top: 4px;
108
+ overflow-x: hidden;
109
+ overflow-y: hidden;
110
+ }
111
+
112
+ #parent #container .op blockquote {
113
+ margin-left: 0px !important;
114
+ }
115
+
116
+ #parent #container .name {
117
+ color: rgb(17, 119, 67);
118
+ font-family: arial, helvetica, sans-serif;
119
+ font-size: 13.3333px;
120
+ font-weight: 700;
121
+ margin-left: 7px;
122
+ }
123
+
124
+ #parent #container .quote {
125
+ color: rgb(221, 0, 0);
126
+ font-family: arial, helvetica, sans-serif;
127
+ font-size: 13.3333px;
128
+ text-decoration-color: rgb(221, 0, 0);
129
+ text-decoration-line: underline;
130
+ text-decoration-style: solid;
131
+ text-decoration-thickness: auto;
132
+ }
133
+
134
+ #parent #container .greentext {
135
+ color: rgb(120, 153, 34);
136
+ font-family: arial, helvetica, sans-serif;
137
+ font-size: 13.3333px;
138
+ }
139
+
140
+ #parent #container blockquote {
141
+ margin: 0px !important;
142
+ margin-block-start: 1em;
143
+ margin-block-end: 1em;
144
+ margin-inline-start: 40px;
145
+ margin-inline-end: 40px;
146
+ margin-top: 13.33px !important;
147
+ margin-bottom: 13.33px !important;
148
+ margin-left: 40px !important;
149
+ margin-right: 40px !important;
150
+ }
151
+
152
+ #parent #container .message {
153
+ color: black;
154
+ border: none;
155
+ }
156
+ """
157
+
158
+ posts = []
159
+ post = ''
160
+ c = -2
161
+ for line in f.splitlines():
162
+ line += "\n"
163
+ if line == '-----\n':
164
+ continue
165
+ elif line.startswith('--- '):
166
+ c += 1
167
+ if post != '':
168
+ src = process_post(post, c)
169
+ posts.append(src)
170
+ post = line
171
+ else:
172
+ post += line
173
+ if post != '':
174
+ src = process_post(post, c)
175
+ posts.append(src)
176
+
177
+ for i in range(len(posts)):
178
+ if i == 0:
179
+ posts[i] = f'<div class="op">{posts[i]}</div>\n'
180
+ else:
181
+ posts[i] = f'<div class="reply">{posts[i]}</div>\n'
182
+
183
+ output = ''
184
+ output += f'<style>{css}</style><div id="parent"><div id="container">'
185
+ for post in posts:
186
+ output += post
187
+ output += '</div></div>'
188
+ output = output.split('\n')
189
+ for i in range(len(output)):
190
+ output[i] = re.sub(r'^(&gt;(.*?)(<br>|</div>))', r'<span class="greentext">\1</span>', output[i])
191
+ output[i] = re.sub(r'^<blockquote class="message">(&gt;(.*?)(<br>|</div>))', r'<blockquote class="message"><span class="greentext">\1</span>', output[i])
192
+ output = '\n'.join(output)
193
+
194
+ return output
195
+
196
+ def get_image_cache(path):
197
+ cache_folder = Path("cache")
198
+ if not cache_folder.exists():
199
+ cache_folder.mkdir()
200
+
201
+ mtime = os.stat(path).st_mtime
202
+ if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache):
203
+ img = Image.open(path)
204
+ img.thumbnail((200, 200))
205
+ output_file = Path(f'cache/{path.name}_cache.png')
206
+ img.convert('RGB').save(output_file, format='PNG')
207
+ image_cache[path] = [mtime, output_file.as_posix()]
208
+
209
+ return image_cache[path][1]
210
+
211
+ def generate_chat_html(history, name1, name2, character):
212
+ css = """
213
+ .chat {
214
+ margin-left: auto;
215
+ margin-right: auto;
216
+ max-width: 800px;
217
+ height: 66.67vh;
218
+ overflow-y: auto;
219
+ padding-right: 20px;
220
+ display: flex;
221
+ flex-direction: column-reverse;
222
+ }
223
+
224
+ .message {
225
+ display: grid;
226
+ grid-template-columns: 60px 1fr;
227
+ padding-bottom: 25px;
228
+ font-size: 15px;
229
+ font-family: Helvetica, Arial, sans-serif;
230
+ line-height: 1.428571429;
231
+ }
232
+
233
+ .circle-you {
234
+ width: 50px;
235
+ height: 50px;
236
+ background-color: rgb(238, 78, 59);
237
+ border-radius: 50%;
238
+ }
239
+
240
+ .circle-bot {
241
+ width: 50px;
242
+ height: 50px;
243
+ background-color: rgb(59, 78, 244);
244
+ border-radius: 50%;
245
+ }
246
+
247
+ .circle-bot img, .circle-you img {
248
+ border-radius: 50%;
249
+ width: 100%;
250
+ height: 100%;
251
+ object-fit: cover;
252
+ }
253
+
254
+ .text {
255
+ }
256
+
257
+ .text p {
258
+ margin-top: 5px;
259
+ }
260
+
261
+ .username {
262
+ font-weight: bold;
263
+ }
264
+
265
+ .message-body {
266
+ }
267
+
268
+ .message-body img {
269
+ max-width: 300px;
270
+ max-height: 300px;
271
+ border-radius: 20px;
272
+ }
273
+
274
+ .message-body p {
275
+ margin-bottom: 0 !important;
276
+ font-size: 15px !important;
277
+ line-height: 1.428571429 !important;
278
+ }
279
+
280
+ .dark .message-body p em {
281
+ color: rgb(138, 138, 138) !important;
282
+ }
283
+
284
+ .message-body p em {
285
+ color: rgb(110, 110, 110) !important;
286
+ }
287
+
288
+ """
289
+
290
+ output = ''
291
+ output += f'<style>{css}</style><div class="chat" id="chat">'
292
+ img = ''
293
+
294
+ for i in [
295
+ f"characters/{character}.png",
296
+ f"characters/{character}.jpg",
297
+ f"characters/{character}.jpeg",
298
+ "img_bot.png",
299
+ "img_bot.jpg",
300
+ "img_bot.jpeg"
301
+ ]:
302
+
303
+ path = Path(i)
304
+ if path.exists():
305
+ img = f'<img src="file/{get_image_cache(path)}">'
306
+ break
307
+
308
+ img_me = ''
309
+ for i in ["img_me.png", "img_me.jpg", "img_me.jpeg"]:
310
+ path = Path(i)
311
+ if path.exists():
312
+ img_me = f'<img src="file/{get_image_cache(path)}">'
313
+ break
314
+
315
+ for i,_row in enumerate(history[::-1]):
316
+ row = _row.copy()
317
+ row[0] = re.sub(r"(\*\*)([^\*\n]*)(\*\*)", r"<b>\2</b>", row[0])
318
+ row[1] = re.sub(r"(\*\*)([^\*\n]*)(\*\*)", r"<b>\2</b>", row[1])
319
+ row[0] = re.sub(r"(\*)([^\*\n]*)(\*)", r"<em>\2</em>", row[0])
320
+ row[1] = re.sub(r"(\*)([^\*\n]*)(\*)", r"<em>\2</em>", row[1])
321
+ p = '\n'.join([f"<p>{x}</p>" for x in row[1].split('\n')])
322
+ output += f"""
323
+ <div class="message">
324
+ <div class="circle-bot">
325
+ {img}
326
+ </div>
327
+ <div class="text">
328
+ <div class="username">
329
+ {name2}
330
+ </div>
331
+ <div class="message-body">
332
+ {p}
333
+ </div>
334
+ </div>
335
+ </div>
336
+ """
337
+
338
+ if not (i == len(history)-1 and len(row[0]) == 0):
339
+ p = '\n'.join([f"<p>{x}</p>" for x in row[0].split('\n')])
340
+ output += f"""
341
+ <div class="message">
342
+ <div class="circle-you">
343
+ {img_me}
344
+ </div>
345
+ <div class="text">
346
+ <div class="username">
347
+ {name1}
348
+ </div>
349
+ <div class="message-body">
350
+ {p}
351
+ </div>
352
+ </div>
353
+ </div>
354
+ """
355
+
356
+ output += "</div>"
357
+ return output
text-generation-webui/modules/models.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import time
4
+ import zipfile
5
+ from pathlib import Path
6
+
7
+ import numpy as np
8
+ import torch
9
+ import transformers
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer
11
+
12
+ import modules.shared as shared
13
+
14
+ transformers.logging.set_verbosity_error()
15
+
16
+ local_rank = None
17
+
18
+ if shared.args.flexgen:
19
+ from flexgen.flex_opt import (CompressionConfig, ExecutionEnv, OptLM,
20
+ Policy, str2bool)
21
+
22
+ if shared.args.deepspeed:
23
+ import deepspeed
24
+ from transformers.deepspeed import (HfDeepSpeedConfig,
25
+ is_deepspeed_zero3_enabled)
26
+
27
+ from modules.deepspeed_parameters import generate_ds_config
28
+
29
+ # Distributed setup
30
+ local_rank = shared.args.local_rank if shared.args.local_rank is not None else int(os.getenv("LOCAL_RANK", "0"))
31
+ world_size = int(os.getenv("WORLD_SIZE", "1"))
32
+ torch.cuda.set_device(local_rank)
33
+ deepspeed.init_distributed()
34
+ ds_config = generate_ds_config(shared.args.bf16, 1 * world_size, shared.args.nvme_offload_dir)
35
+ dschf = HfDeepSpeedConfig(ds_config) # Keep this object alive for the Transformers integration
36
+
37
+
38
+ def load_model(model_name):
39
+ print(f"Loading {model_name}...")
40
+ t0 = time.time()
41
+
42
+ shared.is_RWKV = model_name.lower().startswith('rwkv-')
43
+
44
+ # Default settings
45
+ if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]):
46
+ if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')):
47
+ model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True)
48
+ else:
49
+ model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16).cuda()
50
+
51
+ # FlexGen
52
+ elif shared.args.flexgen:
53
+ # Initialize environment
54
+ env = ExecutionEnv.create(shared.args.disk_cache_dir)
55
+
56
+ # Offloading policy
57
+ policy = Policy(1, 1,
58
+ shared.args.percent[0], shared.args.percent[1],
59
+ shared.args.percent[2], shared.args.percent[3],
60
+ shared.args.percent[4], shared.args.percent[5],
61
+ overlap=True, sep_layer=True, pin_weight=shared.args.pin_weight,
62
+ cpu_cache_compute=False, attn_sparsity=1.0,
63
+ compress_weight=shared.args.compress_weight,
64
+ comp_weight_config=CompressionConfig(
65
+ num_bits=4, group_size=64,
66
+ group_dim=0, symmetric=False),
67
+ compress_cache=False,
68
+ comp_cache_config=CompressionConfig(
69
+ num_bits=4, group_size=64,
70
+ group_dim=2, symmetric=False))
71
+
72
+ model = OptLM(f"facebook/{shared.model_name}", env, "models", policy)
73
+
74
+ # DeepSpeed ZeRO-3
75
+ elif shared.args.deepspeed:
76
+ model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16)
77
+ model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0]
78
+ model.module.eval() # Inference
79
+ print(f"DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}")
80
+
81
+ # RMKV model (not on HuggingFace)
82
+ elif shared.is_RWKV:
83
+ from modules.RWKV import RWKVModel, RWKVTokenizer
84
+
85
+ model = RWKVModel.from_pretrained(Path(f'models/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
86
+ tokenizer = RWKVTokenizer.from_pretrained(Path('models'))
87
+
88
+ return model, tokenizer
89
+
90
+ # Quantized model
91
+ elif shared.args.gptq_bits > 0:
92
+ from modules.GPTQ_loader import load_quantized
93
+
94
+ model = load_quantized(model_name)
95
+
96
+ # Custom
97
+ else:
98
+ command = "AutoModelForCausalLM.from_pretrained"
99
+ params = ["low_cpu_mem_usage=True"]
100
+ if not shared.args.cpu and not torch.cuda.is_available():
101
+ print("Warning: no GPU has been detected.\nFalling back to CPU mode.\n")
102
+ shared.args.cpu = True
103
+
104
+ if shared.args.cpu:
105
+ params.append("low_cpu_mem_usage=True")
106
+ params.append("torch_dtype=torch.float32")
107
+ else:
108
+ params.append("device_map='auto'")
109
+ params.append("load_in_8bit=True" if shared.args.load_in_8bit else "torch_dtype=torch.bfloat16" if shared.args.bf16 else "torch_dtype=torch.float16")
110
+
111
+ if shared.args.gpu_memory:
112
+ memory_map = shared.args.gpu_memory
113
+ max_memory = f"max_memory={{0: '{memory_map[0]}GiB'"
114
+ for i in range(1, len(memory_map)):
115
+ max_memory += (f", {i}: '{memory_map[i]}GiB'")
116
+ max_memory += (f", 'cpu': '{shared.args.cpu_memory or '99'}GiB'}}")
117
+ params.append(max_memory)
118
+ elif not shared.args.load_in_8bit:
119
+ total_mem = (torch.cuda.get_device_properties(0).total_memory/(1024*1024))
120
+ suggestion = round((total_mem-1000)/1000)*1000
121
+ if total_mem-suggestion < 800:
122
+ suggestion -= 1000
123
+ suggestion = int(round(suggestion/1000))
124
+ print(f"\033[1;32;1mAuto-assiging --gpu-memory {suggestion} for your GPU to try to prevent out-of-memory errors.\nYou can manually set other values.\033[0;37;0m")
125
+ params.append(f"max_memory={{0: '{suggestion}GiB', 'cpu': '{shared.args.cpu_memory or '99'}GiB'}}")
126
+ if shared.args.disk:
127
+ params.append(f"offload_folder='{shared.args.disk_cache_dir}'")
128
+
129
+ command = f"{command}(Path(f'models/{shared.model_name}'), {', '.join(set(params))})"
130
+ model = eval(command)
131
+
132
+ # Loading the tokenizer
133
+ if shared.model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')) and Path("models/gpt-j-6B/").exists():
134
+ tokenizer = AutoTokenizer.from_pretrained(Path("models/gpt-j-6B/"))
135
+ else:
136
+ tokenizer = AutoTokenizer.from_pretrained(Path(f"models/{shared.model_name}/"))
137
+ tokenizer.truncation_side = 'left'
138
+
139
+ print(f"Loaded the model in {(time.time()-t0):.2f} seconds.")
140
+ return model, tokenizer
141
+
142
+ def load_soft_prompt(name):
143
+ if name == 'None':
144
+ shared.soft_prompt = False
145
+ shared.soft_prompt_tensor = None
146
+ else:
147
+ with zipfile.ZipFile(Path(f'softprompts/{name}.zip')) as zf:
148
+ zf.extract('tensor.npy')
149
+ zf.extract('meta.json')
150
+ j = json.loads(open('meta.json', 'r').read())
151
+ print(f"\nLoading the softprompt \"{name}\".")
152
+ for field in j:
153
+ if field != 'name':
154
+ if type(j[field]) is list:
155
+ print(f"{field}: {', '.join(j[field])}")
156
+ else:
157
+ print(f"{field}: {j[field]}")
158
+ print()
159
+ tensor = np.load('tensor.npy')
160
+ Path('tensor.npy').unlink()
161
+ Path('meta.json').unlink()
162
+ tensor = torch.Tensor(tensor).to(device=shared.model.device, dtype=shared.model.dtype)
163
+ tensor = torch.reshape(tensor, (1, tensor.shape[0], tensor.shape[1]))
164
+
165
+ shared.soft_prompt = True
166
+ shared.soft_prompt_tensor = tensor
167
+
168
+ return name
text-generation-webui/modules/shared.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ model = None
4
+ tokenizer = None
5
+ model_name = ""
6
+ soft_prompt_tensor = None
7
+ soft_prompt = False
8
+ is_RWKV = False
9
+
10
+ # Chat variables
11
+ history = {'internal': [], 'visible': []}
12
+ character = 'None'
13
+ stop_everything = False
14
+ processing_message = '*Is typing...*'
15
+
16
+ # UI elements (buttons, sliders, HTML, etc)
17
+ gradio = {}
18
+
19
+ # Generation input parameters
20
+ input_params = []
21
+
22
+ settings = {
23
+ 'max_new_tokens': 200,
24
+ 'max_new_tokens_min': 1,
25
+ 'max_new_tokens_max': 2000,
26
+ 'name1': 'Person 1',
27
+ 'name2': 'Person 2',
28
+ 'context': 'This is a conversation between two people.',
29
+ 'stop_at_newline': True,
30
+ 'chat_prompt_size': 2048,
31
+ 'chat_prompt_size_min': 0,
32
+ 'chat_prompt_size_max': 2048,
33
+ 'chat_generation_attempts': 1,
34
+ 'chat_generation_attempts_min': 1,
35
+ 'chat_generation_attempts_max': 5,
36
+ 'name1_pygmalion': 'You',
37
+ 'name2_pygmalion': 'Kawaii',
38
+ 'context_pygmalion': "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n<START>",
39
+ 'stop_at_newline_pygmalion': False,
40
+ 'default_extensions': [],
41
+ 'chat_default_extensions': ["gallery"],
42
+ 'presets': {
43
+ 'default': 'NovelAI-Sphinx Moth',
44
+ 'pygmalion-*': 'Pygmalion',
45
+ 'RWKV-*': 'Naive',
46
+ },
47
+ 'prompts': {
48
+ 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
49
+ '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n',
50
+ '(rosey|chip|joi)_.*_instruct.*': 'User: \n',
51
+ 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>'
52
+ }
53
+ }
54
+
55
+ def str2bool(v):
56
+ if isinstance(v, bool):
57
+ return v
58
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
59
+ return True
60
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
61
+ return False
62
+ else:
63
+ raise argparse.ArgumentTypeError('Boolean value expected.')
64
+
65
+ parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54))
66
+ parser.add_argument('--model', type=str, help='Name of the model to load by default.')
67
+ parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.')
68
+ parser.add_argument('--chat', action='store_true', help='Launch the web UI in chat mode.')
69
+ parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.')
70
+ parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text.')
71
+ parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
72
+ parser.add_argument('--load-in-4bit', action='store_true', help='DEPRECATED: use --gptq-bits 4 instead.')
73
+ parser.add_argument('--gptq-bits', type=int, default=0, help='Load a pre-quantized model with specified precision. 2, 3, 4 and 8bit are supported. Currently only works with LLaMA and OPT.')
74
+ parser.add_argument('--gptq-model-type', type=str, help='Model type of pre-quantized model. Currently only LLaMa and OPT are supported.')
75
+ parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
76
+ parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.')
77
+ parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
78
+ parser.add_argument('--disk-cache-dir', type=str, default="cache", help='Directory to save the disk cache to. Defaults to "cache".')
79
+ parser.add_argument('--gpu-memory', type=int, nargs="+", help='Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs.')
80
+ parser.add_argument('--cpu-memory', type=int, help='Maximum CPU memory in GiB to allocate for offloaded weights. Must be an integer number. Defaults to 99.')
81
+ parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.')
82
+ parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).')
83
+ parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.")
84
+ parser.add_argument("--pin-weight", type=str2bool, nargs="?", const=True, default=True, help="FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%%).")
85
+ parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
86
+ parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
87
+ parser.add_argument('--local_rank', type=int, default=0, help='DeepSpeed: Optional argument for distributed setups.')
88
+ parser.add_argument('--rwkv-strategy', type=str, default=None, help='RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8".')
89
+ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile the CUDA kernel for better performance.')
90
+ parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.')
91
+ parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.')
92
+ parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
93
+ parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.')
94
+ parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
95
+ parser.add_argument('--share', action='store_true', help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
96
+ parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.')
97
+ parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
98
+ args = parser.parse_args()
99
+
100
+ # Provisional, this will be deleted later
101
+ if args.load_in_4bit:
102
+ print("Warning: --load-in-4bit is deprecated and will be removed. Use --gptq-bits 4 instead.\n")
103
+ args.gptq_bits = 4
text-generation-webui/modules/text_generation.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import re
3
+ import time
4
+
5
+ import numpy as np
6
+ import torch
7
+ import transformers
8
+
9
+ import modules.shared as shared
10
+ from modules.callbacks import (Iteratorize, Stream,
11
+ _SentinelTokenStoppingCriteria)
12
+ from modules.extensions import apply_extensions
13
+ from modules.html_generator import generate_4chan_html, generate_basic_html
14
+ from modules.models import local_rank
15
+
16
+
17
+ def get_max_prompt_length(tokens):
18
+ max_length = 2048-tokens
19
+ if shared.soft_prompt:
20
+ max_length -= shared.soft_prompt_tensor.shape[1]
21
+ return max_length
22
+
23
+ def encode(prompt, tokens_to_generate=0, add_special_tokens=True):
24
+ if shared.is_RWKV:
25
+ input_ids = shared.tokenizer.encode(str(prompt))
26
+ input_ids = np.array(input_ids).reshape(1, len(input_ids))
27
+ return input_ids
28
+ else:
29
+ input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', truncation=True, max_length=get_max_prompt_length(tokens_to_generate), add_special_tokens=add_special_tokens)
30
+ if shared.args.cpu:
31
+ return input_ids
32
+ elif shared.args.flexgen:
33
+ return input_ids.numpy()
34
+ elif shared.args.deepspeed:
35
+ return input_ids.to(device=local_rank)
36
+ else:
37
+ return input_ids.cuda()
38
+
39
+ def decode(output_ids):
40
+ # Open Assistant relies on special tokens like <|endoftext|>
41
+ if re.match('oasst-*', shared.model_name.lower()):
42
+ return shared.tokenizer.decode(output_ids, skip_special_tokens=False)
43
+ else:
44
+ reply = shared.tokenizer.decode(output_ids, skip_special_tokens=True)
45
+ reply = reply.replace(r'<|endoftext|>', '')
46
+ return reply
47
+
48
+ def generate_softprompt_input_tensors(input_ids):
49
+ inputs_embeds = shared.model.transformer.wte(input_ids)
50
+ inputs_embeds = torch.cat((shared.soft_prompt_tensor, inputs_embeds), dim=1)
51
+ filler_input_ids = torch.zeros((1, inputs_embeds.shape[1]), dtype=input_ids.dtype).to(shared.model.device)
52
+ #filler_input_ids += shared.model.config.bos_token_id # setting dummy input_ids to bos tokens
53
+ return inputs_embeds, filler_input_ids
54
+
55
+ # Removes empty replies from gpt4chan outputs
56
+ def fix_gpt4chan(s):
57
+ for i in range(10):
58
+ s = re.sub("--- [0-9]*\n>>[0-9]*\n---", "---", s)
59
+ s = re.sub("--- [0-9]*\n *\n---", "---", s)
60
+ s = re.sub("--- [0-9]*\n\n\n---", "---", s)
61
+ return s
62
+
63
+ # Fix the LaTeX equations in galactica
64
+ def fix_galactica(s):
65
+ s = s.replace(r'\[', r'$')
66
+ s = s.replace(r'\]', r'$')
67
+ s = s.replace(r'\(', r'$')
68
+ s = s.replace(r'\)', r'$')
69
+ s = s.replace(r'$$', r'$')
70
+ s = re.sub(r'\n', r'\n\n', s)
71
+ s = re.sub(r"\n{3,}", "\n\n", s)
72
+ return s
73
+
74
+ def formatted_outputs(reply, model_name):
75
+ if not (shared.args.chat or shared.args.cai_chat):
76
+ if model_name.lower().startswith('galactica'):
77
+ reply = fix_galactica(reply)
78
+ return reply, reply, generate_basic_html(reply)
79
+ elif model_name.lower().startswith(('gpt4chan', 'gpt-4chan', '4chan')):
80
+ reply = fix_gpt4chan(reply)
81
+ return reply, 'Only applicable for GALACTICA models.', generate_4chan_html(reply)
82
+ else:
83
+ return reply, 'Only applicable for GALACTICA models.', generate_basic_html(reply)
84
+ else:
85
+ return reply
86
+
87
+ def clear_torch_cache():
88
+ gc.collect()
89
+ if not shared.args.cpu:
90
+ torch.cuda.empty_cache()
91
+
92
+ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, eos_token=None, stopping_string=None):
93
+ clear_torch_cache()
94
+ t0 = time.time()
95
+
96
+ # These models are not part of Hugging Face, so we handle them
97
+ # separately and terminate the function call earlier
98
+ if shared.is_RWKV:
99
+ try:
100
+ if shared.args.no_stream:
101
+ reply = shared.model.generate(context=question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k)
102
+ yield formatted_outputs(reply, shared.model_name)
103
+ else:
104
+ yield formatted_outputs(question, shared.model_name)
105
+ # RWKV has proper streaming, which is very nice.
106
+ # No need to generate 8 tokens at a time.
107
+ for reply in shared.model.generate_with_streaming(context=question, token_count=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k):
108
+ yield formatted_outputs(reply, shared.model_name)
109
+ finally:
110
+ t1 = time.time()
111
+ output = encode(reply)[0]
112
+ input_ids = encode(question)
113
+ print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output)-len(input_ids[0]))/(t1-t0):.2f} tokens/s, {len(output)-len(input_ids[0])} tokens)")
114
+ return
115
+
116
+ original_question = question
117
+ if not (shared.args.chat or shared.args.cai_chat):
118
+ question = apply_extensions(question, "input")
119
+ if shared.args.verbose:
120
+ print(f"\n\n{question}\n--------------------\n")
121
+
122
+ input_ids = encode(question, max_new_tokens)
123
+ original_input_ids = input_ids
124
+ output = input_ids[0]
125
+ cuda = "" if (shared.args.cpu or shared.args.deepspeed or shared.args.flexgen) else ".cuda()"
126
+ eos_token_ids = [shared.tokenizer.eos_token_id] if shared.tokenizer.eos_token_id is not None else []
127
+ if eos_token is not None:
128
+ eos_token_ids.append(int(encode(eos_token)[0][-1]))
129
+ stopping_criteria_list = transformers.StoppingCriteriaList()
130
+ if stopping_string is not None:
131
+ # Copied from https://github.com/PygmalionAI/gradio-ui/blob/master/src/model.py
132
+ t = encode(stopping_string, 0, add_special_tokens=False)
133
+ stopping_criteria_list.append(_SentinelTokenStoppingCriteria(sentinel_token_ids=t, starting_idx=len(input_ids[0])))
134
+
135
+ if not shared.args.flexgen:
136
+ generate_params = [
137
+ f"max_new_tokens=max_new_tokens",
138
+ f"eos_token_id={eos_token_ids}",
139
+ f"stopping_criteria=stopping_criteria_list",
140
+ f"do_sample={do_sample}",
141
+ f"temperature={temperature}",
142
+ f"top_p={top_p}",
143
+ f"typical_p={typical_p}",
144
+ f"repetition_penalty={repetition_penalty}",
145
+ f"top_k={top_k}",
146
+ f"min_length={min_length if shared.args.no_stream else 0}",
147
+ f"no_repeat_ngram_size={no_repeat_ngram_size}",
148
+ f"num_beams={num_beams}",
149
+ f"penalty_alpha={penalty_alpha}",
150
+ f"length_penalty={length_penalty}",
151
+ f"early_stopping={early_stopping}",
152
+ ]
153
+ else:
154
+ generate_params = [
155
+ f"max_new_tokens={max_new_tokens if shared.args.no_stream else 8}",
156
+ f"do_sample={do_sample}",
157
+ f"temperature={temperature}",
158
+ f"stop={eos_token_ids[-1]}",
159
+ ]
160
+ if shared.args.deepspeed:
161
+ generate_params.append("synced_gpus=True")
162
+ if shared.soft_prompt:
163
+ inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids)
164
+ generate_params.insert(0, "inputs_embeds=inputs_embeds")
165
+ generate_params.insert(0, "inputs=filler_input_ids")
166
+ else:
167
+ generate_params.insert(0, "inputs=input_ids")
168
+
169
+ try:
170
+ # Generate the entire reply at once.
171
+ if shared.args.no_stream:
172
+ with torch.no_grad():
173
+ output = eval(f"shared.model.generate({', '.join(generate_params)}){cuda}")[0]
174
+ if shared.soft_prompt:
175
+ output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:]))
176
+
177
+ reply = decode(output)
178
+ if not (shared.args.chat or shared.args.cai_chat):
179
+ reply = original_question + apply_extensions(reply[len(question):], "output")
180
+
181
+ yield formatted_outputs(reply, shared.model_name)
182
+
183
+ # Stream the reply 1 token at a time.
184
+ # This is based on the trick of using 'stopping_criteria' to create an iterator.
185
+ elif not shared.args.flexgen:
186
+
187
+ def generate_with_callback(callback=None, **kwargs):
188
+ kwargs['stopping_criteria'].append(Stream(callback_func=callback))
189
+ clear_torch_cache()
190
+ with torch.no_grad():
191
+ shared.model.generate(**kwargs)
192
+
193
+ def generate_with_streaming(**kwargs):
194
+ return Iteratorize(generate_with_callback, kwargs, callback=None)
195
+
196
+ yield formatted_outputs(original_question, shared.model_name)
197
+ with eval(f"generate_with_streaming({', '.join(generate_params)})") as generator:
198
+ for output in generator:
199
+ if shared.soft_prompt:
200
+ output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:]))
201
+ reply = decode(output)
202
+
203
+ if not (shared.args.chat or shared.args.cai_chat):
204
+ reply = original_question + apply_extensions(reply[len(question):], "output")
205
+
206
+ if output[-1] in eos_token_ids:
207
+ break
208
+ yield formatted_outputs(reply, shared.model_name)
209
+
210
+ yield formatted_outputs(reply, shared.model_name)
211
+
212
+ # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria'
213
+ else:
214
+ for i in range(max_new_tokens//8+1):
215
+ clear_torch_cache()
216
+ with torch.no_grad():
217
+ output = eval(f"shared.model.generate({', '.join(generate_params)})")[0]
218
+ if shared.soft_prompt:
219
+ output = torch.cat((input_ids[0], output[filler_input_ids.shape[1]:]))
220
+ reply = decode(output)
221
+
222
+ if not (shared.args.chat or shared.args.cai_chat):
223
+ reply = original_question + apply_extensions(reply[len(question):], "output")
224
+
225
+ if np.count_nonzero(np.isin(input_ids[0], eos_token_ids)) < np.count_nonzero(np.isin(output, eos_token_ids)):
226
+ break
227
+ yield formatted_outputs(reply, shared.model_name)
228
+
229
+ input_ids = np.reshape(output, (1, output.shape[0]))
230
+ if shared.soft_prompt:
231
+ inputs_embeds, filler_input_ids = generate_softprompt_input_tensors(input_ids)
232
+
233
+ yield formatted_outputs(reply, shared.model_name)
234
+
235
+ finally:
236
+ t1 = time.time()
237
+ print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output)-len(original_input_ids[0]))/(t1-t0):.2f} tokens/s, {len(output)-len(original_input_ids[0])} tokens)")
238
+ return
text-generation-webui/modules/ui.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ refresh_symbol = '\U0001f504' # 🔄
4
+
5
+ css = """
6
+ .tabs.svelte-710i53 {
7
+ margin-top: 0
8
+ }
9
+ .py-6 {
10
+ padding-top: 2.5rem
11
+ }
12
+ .dark #refresh-button {
13
+ background-color: #ffffff1f;
14
+ }
15
+ #refresh-button {
16
+ flex: none;
17
+ margin: 0;
18
+ padding: 0;
19
+ min-width: 50px;
20
+ border: none;
21
+ box-shadow: none;
22
+ border-radius: 10px;
23
+ background-color: #0000000d;
24
+ }
25
+ #download-label, #upload-label {
26
+ min-height: 0
27
+ }
28
+ #accordion {
29
+ }
30
+ .dark svg {
31
+ fill: white;
32
+ }
33
+ svg {
34
+ display: unset !important;
35
+ vertical-align: middle !important;
36
+ margin: 5px;
37
+ }
38
+ ol li p, ul li p {
39
+ display: inline-block;
40
+ }
41
+ """
42
+
43
+ chat_css = """
44
+ .h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
45
+ height: 66.67vh
46
+ }
47
+ .gradio-container {
48
+ max-width: 800px !important;
49
+ margin-left: auto !important;
50
+ margin-right: auto !important;
51
+ }
52
+ .w-screen {
53
+ width: unset
54
+ }
55
+ div.svelte-362y77>*, div.svelte-362y77>.form>* {
56
+ flex-wrap: nowrap
57
+ }
58
+ /* fixes the API documentation in chat mode */
59
+ .api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h {
60
+ display: grid;
61
+ }
62
+ .pending.svelte-1ed2p3z {
63
+ opacity: 1;
64
+ }
65
+ """
66
+
67
+ class ToolButton(gr.Button, gr.components.FormComponent):
68
+ """Small button with single emoji as text, fits inside gradio forms"""
69
+
70
+ def __init__(self, **kwargs):
71
+ super().__init__(variant="tool", **kwargs)
72
+
73
+ def get_block_name(self):
74
+ return "button"
75
+
76
+ def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
77
+ def refresh():
78
+ refresh_method()
79
+ args = refreshed_args() if callable(refreshed_args) else refreshed_args
80
+
81
+ for k, v in args.items():
82
+ setattr(refresh_component, k, v)
83
+
84
+ return gr.update(**(args or {}))
85
+
86
+ refresh_button = ToolButton(value=refresh_symbol, elem_id=elem_id)
87
+ refresh_button.click(
88
+ fn=refresh,
89
+ inputs=[],
90
+ outputs=[refresh_component]
91
+ )
92
+ return refresh_button
text-generation-webui/presets/Contrastive Search.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ do_sample=False
2
+ penalty_alpha=0.6
3
+ top_k=4
text-generation-webui/presets/Debug-deterministic.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ do_sample=False
text-generation-webui/presets/Default.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ temperature=1
3
+ top_p=1
4
+ typical_p=1
5
+ repetition_penalty=1
6
+ top_k=50
7
+ num_beams=1
8
+ penalty_alpha=0
9
+ min_length=0
10
+ length_penalty=1
11
+ no_repeat_ngram_size=0
12
+ early_stopping=False
text-generation-webui/presets/Individual Today.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.9
3
+ top_k=50
4
+ temperature=1.39
5
+ repetition_penalty=1.08
6
+ typical_p=0.2
text-generation-webui/presets/Kobold-Godlike.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.5
3
+ top_k=0
4
+ temperature=0.7
5
+ repetition_penalty=1.1
6
+ typical_p=0.19
text-generation-webui/presets/Kobold-Liminal Drift.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=1.0
3
+ top_k=0
4
+ temperature=0.66
5
+ repetition_penalty=1.1
6
+ typical_p=0.6
text-generation-webui/presets/Naive.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ do_sample=True
2
+ temperature=0.7
3
+ top_p=0.85
4
+ top_k=50
text-generation-webui/presets/NovelAI-Best Guess.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.9
3
+ top_k=100
4
+ temperature=0.8
5
+ repetition_penalty=1.15
6
+ typical_p=1.0
text-generation-webui/presets/NovelAI-Decadence.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=1.0
3
+ top_k=100
4
+ temperature=2
5
+ repetition_penalty=1
6
+ typical_p=0.97
text-generation-webui/presets/NovelAI-Genesis.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.98
3
+ top_k=0
4
+ temperature=0.63
5
+ repetition_penalty=1.05
6
+ typical_p=1.0
text-generation-webui/presets/NovelAI-Lycaenidae.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.85
3
+ top_k=12
4
+ temperature=2
5
+ repetition_penalty=1.15
6
+ typical_p=1.0
text-generation-webui/presets/NovelAI-Ouroboros.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=1.0
3
+ top_k=100
4
+ temperature=1.07
5
+ repetition_penalty=1.05
6
+ typical_p=1.0
text-generation-webui/presets/NovelAI-Pleasing Results.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=1.0
3
+ top_k=0
4
+ temperature=0.44
5
+ repetition_penalty=1.15
6
+ typical_p=1.0
text-generation-webui/presets/NovelAI-Sphinx Moth.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.18
3
+ top_k=30
4
+ temperature=2.0
5
+ repetition_penalty=1.15
6
+ typical_p=1.0
text-generation-webui/presets/NovelAI-Storywriter.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.73
3
+ top_k=0
4
+ temperature=0.72
5
+ repetition_penalty=1.1
6
+ typical_p=1.0
text-generation-webui/presets/Pygmalion.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ do_sample=True
2
+ top_p=0.9
3
+ top_k=0
4
+ temperature=0.5
5
+ repetition_penalty=1.1
6
+ typical_p=1.0