mikeee commited on
Commit
51784c0
0 Parent(s):

Duplicate from mikeee/Wizard-Vicuna-7B-Uncensored-GGML

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +11 -0
  3. .ruff.toml +17 -0
  4. .stignore +102 -0
  5. README.md +13 -0
  6. app.py +489 -0
  7. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ call-activate.bat
2
+ okteto.yml
3
+ okteto-up.bat
4
+ install-sw.sh
5
+ install-sw1.sh
6
+ start-sshd.sh
7
+ pyproject.toml
8
+ models
9
+ .ruff_cache
10
+ run-nodemon.sh
11
+ app-.py
.ruff.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assume Python 3.10.
2
+ target-version = "py310"
3
+ # Decrease the maximum line length to 79 characters.
4
+ line-length = 300
5
+
6
+ # pyflakes, pycodestyle, isort
7
+ # flake8 YTT, pydocstyle D, pylint PLC
8
+ select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
9
+ # select = ["ALL"]
10
+
11
+ # D103 Missing docstring in public function
12
+ # D101 Missing docstring in public class
13
+ # `multi-line-summary-first-line` (D212)
14
+ # `one-blank-line-before-class` (D203)
15
+ extend-ignore = ["D103", "D101", "D212", "D203"]
16
+
17
+ exclude = [".venv"]
.stignore ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models
2
+ .git
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build
14
+ develop-eggs
15
+ dist
16
+ downloads
17
+ eggs
18
+ .eggs
19
+ lib
20
+ lib64
21
+ parts
22
+ sdist
23
+ var
24
+ wheels
25
+ pip-wheel-metadata
26
+ share/python-wheels
27
+ *.egg-info
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Translations
43
+ *.mo
44
+ *.pot
45
+
46
+ # Django stuff:
47
+ *.log
48
+ local_settings.py
49
+ db.sqlite3
50
+
51
+ # Flask stuff:
52
+ instance
53
+ .webassets-cache
54
+
55
+ # Scrapy stuff:
56
+ .scrapy
57
+
58
+ # Sphinx documentation
59
+ docs/_build
60
+
61
+ # PyBuilder
62
+ target
63
+
64
+ # Jupyter Notebook
65
+ .ipynb_checkpoints
66
+
67
+ # IPython
68
+ profile_default
69
+ ipython_config.py
70
+
71
+ # pyenv
72
+ .python-version
73
+
74
+ # celery beat schedule file
75
+ celerybeat-schedule
76
+
77
+ # SageMath parsed files
78
+ *.sage.py
79
+
80
+ # Environments
81
+ .env
82
+ .venv
83
+ env
84
+ venv
85
+ ENV
86
+ env.bak
87
+ venv.bak
88
+
89
+ # Spyder project settings
90
+ .spyderproject
91
+ .spyproject
92
+
93
+ # Rope project settings
94
+ .ropeproject
95
+
96
+ # mypy
97
+ .mypy_cache
98
+ .dmypy.json
99
+ dmypy.json
100
+
101
+ # Pyre type checker
102
+ .pyre
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
3
+ emoji: 🚀
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: mikeee/Wizard-Vicuna-7B-Uncensored-GGML
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run codes."""
2
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
+ # ruff: noqa: E501
4
+ import os
5
+ import time
6
+ from dataclasses import asdict, dataclass
7
+ from pathlib import Path
8
+ from types import SimpleNamespace
9
+ from urllib.parse import urlparse
10
+
11
+ import gradio as gr
12
+ import psutil
13
+ from about_time import about_time
14
+
15
+ # from ctransformers import AutoConfig, AutoModelForCausalLM
16
+ from ctransformers import AutoModelForCausalLM
17
+ from huggingface_hub import hf_hub_download
18
+ from loguru import logger
19
+
20
+ filename_list = [
21
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
22
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
23
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
24
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
25
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
26
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
27
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
28
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
29
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
30
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
31
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
32
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
33
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
34
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
35
+ ]
36
+
37
+ URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
38
+ MODEL_FILENAME = Path(URL).name
39
+ MODEL_FILENAME = filename_list[0] # q2_K 4.05G
40
+ MODEL_FILENAME = filename_list[5] # q4_1 4.21
41
+
42
+ REPO_ID = "/".join(
43
+ urlparse(URL).path.strip("/").split("/")[:2]
44
+ ) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
45
+
46
+ DESTINATION_FOLDER = "models"
47
+
48
+ os.environ["TZ"] = "Asia/Shanghai"
49
+ try:
50
+ time.tzset() # type: ignore # pylint: disable=no-member
51
+ except Exception:
52
+ # Windows
53
+ logger.warning("Windows, cant run time.tzset()")
54
+
55
+ ns = SimpleNamespace(
56
+ response="",
57
+ generator=[],
58
+ )
59
+
60
+ default_system_prompt = "A conversation between a user and an LLM-based AI assistant named Local Assistant. Local Assistant gives helpful and honest answers."
61
+
62
+ user_prefix = "[user]: "
63
+ assistant_prefix = "[assistant]: "
64
+
65
+
66
+ def predict_str(prompt, bot): # bot is in fact bot_history
67
+ # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
68
+
69
+ if bot is None:
70
+ bot = []
71
+
72
+ logger.debug(f"{prompt=}, {bot=}")
73
+
74
+ try:
75
+ # user_prompt = prompt
76
+ generator = generate(
77
+ LLM,
78
+ GENERATION_CONFIG,
79
+ system_prompt=default_system_prompt,
80
+ user_prompt=prompt.strip(),
81
+ )
82
+
83
+ ns.generator = generator # for .then
84
+
85
+ except Exception as exc:
86
+ logger.error(exc)
87
+
88
+ # bot.append([prompt, f"{response} {_}"])
89
+ # return prompt, bot
90
+
91
+ _ = bot + [[prompt, None]]
92
+ logger.debug(f"{prompt=}, {_=}")
93
+
94
+ return prompt, _
95
+
96
+
97
+ def bot_str(bot):
98
+ if bot:
99
+ bot[-1][1] = ""
100
+ else:
101
+ bot = [["Something is wrong", ""]]
102
+
103
+ print(assistant_prefix, end=" ", flush=True)
104
+
105
+ response = ""
106
+
107
+ flag = 1
108
+ then = time.time()
109
+ for word in ns.generator:
110
+ # record first response time
111
+ if flag:
112
+ logger.debug(f"\t {time.time() - then:.1f}s")
113
+ flag = 0
114
+ print(word, end="", flush=True)
115
+ # print(word, flush=True) # vertical stream
116
+ response += word
117
+ bot[-1][1] = response
118
+ yield bot
119
+
120
+
121
+ def predict(prompt, bot):
122
+ # logger.debug(f"{prompt=}, {bot=}, {timeout=}")
123
+ logger.debug(f"{prompt=}, {bot=}")
124
+
125
+ ns.response = ""
126
+ then = time.time()
127
+ with about_time() as atime: # type: ignore
128
+ try:
129
+ # user_prompt = prompt
130
+ generator = generate(
131
+ LLM,
132
+ GENERATION_CONFIG,
133
+ system_prompt=default_system_prompt,
134
+ user_prompt=prompt.strip(),
135
+ )
136
+
137
+ ns.generator = generator # for .then
138
+
139
+ print(assistant_prefix, end=" ", flush=True)
140
+
141
+ response = ""
142
+ buff.update(value="diggin...")
143
+
144
+ flag = 1
145
+ for word in generator:
146
+ # record first response time
147
+ if flag:
148
+ logger.debug(f"\t {time.time() - then:.1f}s")
149
+ flag = 0
150
+ # print(word, end="", flush=True)
151
+ print(word, flush=True) # vertical stream
152
+ response += word
153
+ ns.response = response
154
+ buff.update(value=response)
155
+ print("")
156
+ logger.debug(f"{response=}")
157
+ except Exception as exc:
158
+ logger.error(exc)
159
+ response = f"{exc=}"
160
+
161
+ # bot = {"inputs": [response]}
162
+ _ = (
163
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
164
+ f"{atime.duration/(len(prompt) + len(response)):.1f}s/char)" # type: ignore
165
+ )
166
+
167
+ bot.append([prompt, f"{response} {_}"])
168
+
169
+ return prompt, bot
170
+
171
+
172
+ def predict_api(prompt):
173
+ logger.debug(f"{prompt=}")
174
+ ns.response = ""
175
+ try:
176
+ # user_prompt = prompt
177
+ _ = GenerationConfig(
178
+ temperature=0.2,
179
+ top_k=0,
180
+ top_p=0.9,
181
+ repetition_penalty=1.0,
182
+ max_new_tokens=512, # adjust as needed
183
+ seed=42,
184
+ reset=False, # reset history (cache)
185
+ stream=True, # TODO stream=False and generator
186
+ threads=os.cpu_count() // 2, # type: ignore # adjust for your CPU
187
+ stop=["<|im_end|>", "|<"],
188
+ )
189
+
190
+ # TODO: stream does not make sense in api?
191
+ generator = generate(
192
+ LLM, _, system_prompt=default_system_prompt, user_prompt=prompt.strip()
193
+ )
194
+ print(assistant_prefix, end=" ", flush=True)
195
+
196
+ response = ""
197
+ buff.update(value="diggin...")
198
+ for word in generator:
199
+ print(word, end="", flush=True)
200
+ response += word
201
+ ns.response = response
202
+ buff.update(value=response)
203
+ print("")
204
+ logger.debug(f"{response=}")
205
+ except Exception as exc:
206
+ logger.error(exc)
207
+ response = f"{exc=}"
208
+ # bot = {"inputs": [response]}
209
+ # bot = [(prompt, response)]
210
+
211
+ return response
212
+
213
+
214
+ def download_quant(destination_folder: str, repo_id: str, model_filename: str):
215
+ local_path = os.path.abspath(destination_folder)
216
+ return hf_hub_download(
217
+ repo_id=repo_id,
218
+ filename=model_filename,
219
+ local_dir=local_path,
220
+ local_dir_use_symlinks=True,
221
+ )
222
+
223
+
224
+ @dataclass
225
+ class GenerationConfig:
226
+ temperature: float
227
+ top_k: int
228
+ top_p: float
229
+ repetition_penalty: float
230
+ max_new_tokens: int
231
+ seed: int
232
+ reset: bool
233
+ stream: bool
234
+ threads: int
235
+ stop: list[str]
236
+
237
+
238
+ def format_prompt(system_prompt: str, user_prompt: str):
239
+ """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
240
+ # TODO: fix prompts
241
+
242
+ system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
243
+ user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
244
+ assistant_prompt = "<|im_start|>assistant\n"
245
+
246
+ return f"{system_prompt}{user_prompt}{assistant_prompt}"
247
+
248
+
249
+ def generate(
250
+ llm: AutoModelForCausalLM,
251
+ generation_config: GenerationConfig,
252
+ system_prompt: str = default_system_prompt,
253
+ user_prompt: str = "",
254
+ ):
255
+ """Run model inference, will return a Generator if streaming is true."""
256
+ # if not user_prompt.strip():
257
+ return llm(
258
+ format_prompt(
259
+ system_prompt,
260
+ user_prompt,
261
+ ),
262
+ **asdict(generation_config),
263
+ )
264
+
265
+
266
+ # if "mpt" in model_filename:
267
+ # config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
268
+ # llm = AutoModelForCausalLM.from_pretrained(
269
+ # os.path.abspath(f"models/{model_filename}"),
270
+ # model_type="mpt",
271
+ # config=config,
272
+ # )
273
+
274
+ # https://huggingface.co/spaces/matthoffner/wizardcoder-ggml/blob/main/main.py
275
+ _ = """
276
+ llm = AutoModelForCausalLM.from_pretrained(
277
+ "TheBloke/WizardCoder-15B-1.0-GGML",
278
+ model_file="WizardCoder-15B-1.0.ggmlv3.q4_0.bin",
279
+ model_type="starcoder",
280
+ threads=8
281
+ )
282
+ # """
283
+
284
+ logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
285
+ download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
286
+ logger.info("done dl")
287
+
288
+ logger.debug(f"{os.cpu_count()=} {psutil.cpu_count(logical=False)=}")
289
+ cpu_count = os.cpu_count() // 2 # type: ignore
290
+ cpu_count = psutil.cpu_count(logical=False)
291
+
292
+ logger.debug(f"{cpu_count=}")
293
+
294
+ logger.info("load llm")
295
+
296
+ _ = Path("models", MODEL_FILENAME).absolute().as_posix()
297
+ logger.debug(f"model_file: {_}, exists: {Path(_).exists()}")
298
+ LLM = AutoModelForCausalLM.from_pretrained(
299
+ # "TheBloke/WizardCoder-15B-1.0-GGML",
300
+ REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
301
+ model_file=_,
302
+ model_type="llama", # "starcoder", AutoConfig.from_pretrained(REPO_ID)
303
+ threads=cpu_count,
304
+ )
305
+
306
+ logger.info("done load llm")
307
+
308
+ GENERATION_CONFIG = GenerationConfig(
309
+ temperature=0.2,
310
+ top_k=0,
311
+ top_p=0.9,
312
+ repetition_penalty=1.0,
313
+ max_new_tokens=512, # adjust as needed
314
+ seed=42,
315
+ reset=False, # reset history (cache)
316
+ stream=True, # streaming per word/token
317
+ threads=cpu_count,
318
+ stop=["<|im_end|>", "|<"], # TODO possible fix of stop
319
+ )
320
+
321
+ css = """
322
+ .importantButton {
323
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
324
+ border: none !important;
325
+ }
326
+ .importantButton:hover {
327
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
328
+ border: none !important;
329
+ }
330
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
331
+ .xsmall {font-size: x-small;}
332
+ """
333
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
334
+ examples = [
335
+ ["How to pick a lock? Provide detailed steps."],
336
+ ["Explain the plot of Cinderella in a sentence."],
337
+ [
338
+ "How long does it take to become proficient in French, and what are the best methods for retaining information?"
339
+ ],
340
+ ["What are some common mistakes to avoid when writing code?"],
341
+ ["Build a prompt to generate a beautiful portrait of a horse"],
342
+ ["Suggest four metaphors to describe the benefits of AI"],
343
+ ["Write a pop song about leaving home for the sandy beaches."],
344
+ ["Write a summary demonstrating my ability to tame lions"],
345
+ ["鲁迅和周树人什么关系 说中文"],
346
+ ["鲁迅和周树人什么关系"],
347
+ ["鲁迅和周树人什么关系 用英文回答"],
348
+ ["从前有一头牛,这头牛后面有什么?"],
349
+ ["正无穷大加一大于正无穷大吗?"],
350
+ ["正无穷大加正无穷大大于正无穷大吗?"],
351
+ ["-2的平方根等于什么"],
352
+ ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
353
+ ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
354
+ ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
355
+ [f"{etext} 翻成中文,列出3个版本"],
356
+ [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本"],
357
+ ["假定 1 + 2 = 4, 试求 7 + 8"],
358
+ ["判断一个数是不是质数的 javascript 码"],
359
+ ["实现python 里 range(10)的 javascript 码"],
360
+ ["实现python 里 [*(range(10)]的 javascript 码"],
361
+ ["Erkläre die Handlung von Cinderella in einem Satz."],
362
+ ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
363
+ ]
364
+
365
+ with gr.Blocks(
366
+ # title="mpt-30b-chat-ggml",
367
+ title=f"{MODEL_FILENAME}",
368
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
369
+ css=css,
370
+ ) as block:
371
+ with gr.Accordion("🎈 Info", open=False):
372
+ # gr.HTML(
373
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
374
+ # )
375
+ gr.Markdown(
376
+ f"""<h5><center><{REPO_ID}>{MODEL_FILENAME}</center></h4>
377
+ The bot only speaks English.
378
+
379
+ Most examples are meant for another model.
380
+ You probably should try to test
381
+ some related prompts.
382
+ """,
383
+ elem_classes="xsmall",
384
+ )
385
+
386
+ # chatbot = gr.Chatbot().style(height=700) # 500
387
+ chatbot = gr.Chatbot(height=500)
388
+ buff = gr.Textbox(show_label=False, visible=False)
389
+ with gr.Row():
390
+ with gr.Column(scale=5):
391
+ msg = gr.Textbox(
392
+ label="Chat Message Box",
393
+ placeholder="Ask me anything (press Enter or click Submit to send)",
394
+ show_label=False,
395
+ ).style(container=False)
396
+ with gr.Column(scale=1, min_width=50):
397
+ with gr.Row():
398
+ submit = gr.Button("Submit", elem_classes="xsmall")
399
+ stop = gr.Button("Stop", visible=False)
400
+ clear = gr.Button("Clear History", visible=True)
401
+ with gr.Row(visible=False):
402
+ with gr.Accordion("Advanced Options:", open=False):
403
+ with gr.Row():
404
+ with gr.Column(scale=2):
405
+ system = gr.Textbox(
406
+ label="System Prompt",
407
+ value=default_system_prompt,
408
+ show_label=False,
409
+ ).style(container=False)
410
+ with gr.Column():
411
+ with gr.Row():
412
+ change = gr.Button("Change System Prompt")
413
+ reset = gr.Button("Reset System Prompt")
414
+
415
+ with gr.Accordion("Example Inputs", open=True):
416
+ examples = gr.Examples(
417
+ examples=examples,
418
+ inputs=[msg],
419
+ examples_per_page=40,
420
+ )
421
+
422
+ # with gr.Row():
423
+ with gr.Accordion("Disclaimer", open=False):
424
+ _ = "-".join(MODEL_FILENAME.split("-")[:2])
425
+ gr.Markdown(
426
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
427
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
428
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
429
+ "biased, or otherwise offensive outputs.",
430
+ elem_classes=["disclaimer"],
431
+ )
432
+ _ = """
433
+ msg.submit(
434
+ # fn=conversation.user_turn,
435
+ fn=predict,
436
+ inputs=[msg, chatbot],
437
+ outputs=[msg, chatbot],
438
+ # queue=True,
439
+ show_progress="full",
440
+ api_name="predict",
441
+ )
442
+ submit.click(
443
+ fn=lambda x, y: ("",) + predict(x, y)[1:], # clear msg
444
+ inputs=[msg, chatbot],
445
+ outputs=[msg, chatbot],
446
+ queue=True,
447
+ show_progress="full",
448
+ )
449
+ # """
450
+ msg.submit(
451
+ # fn=conversation.user_turn,
452
+ fn=predict_str,
453
+ inputs=[msg, chatbot],
454
+ outputs=[msg, chatbot],
455
+ queue=True,
456
+ show_progress="full",
457
+ api_name="predict",
458
+ ).then(bot_str, chatbot, chatbot)
459
+ submit.click(
460
+ fn=lambda x, y: ("",) + predict_str(x, y)[1:], # clear msg
461
+ inputs=[msg, chatbot],
462
+ outputs=[msg, chatbot],
463
+ queue=True,
464
+ show_progress="full",
465
+ ).then(bot_str, chatbot, chatbot)
466
+
467
+ clear.click(lambda: None, None, chatbot, queue=False)
468
+
469
+ # update buff Textbox, every: units in seconds)
470
+ # https://huggingface.co/spaces/julien-c/nvidia-smi/discussions
471
+ # does not work
472
+ # AttributeError: 'Blocks' object has no attribute 'run_forever'
473
+ # block.run_forever(lambda: ns.response, None, [buff], every=1)
474
+
475
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
476
+ input_text = gr.Text()
477
+ api_btn = gr.Button("Go", variant="primary")
478
+ out_text = gr.Text()
479
+ api_btn.click(
480
+ predict_api,
481
+ input_text,
482
+ out_text,
483
+ # show_progress="full",
484
+ api_name="api",
485
+ )
486
+
487
+ # concurrency_count=5, max_size=20
488
+ # max_size=36, concurrency_count=14
489
+ block.queue(concurrency_count=5, max_size=20).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ctransformers==0.2.10
2
+ transformers==4.30.2
3
+ huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil