pinaki-ds mikeee commited on
Commit
22fb6b8
0 Parent(s):

Duplicate from mikeee/langchain-llama2-7b-chat-uncensored-ggml

Browse files

Co-authored-by: mikeee <mikeee@users.noreply.huggingface.co>

Files changed (8) hide show
  1. .gitattributes +35 -0
  2. .gitignore +12 -0
  3. .ruff.toml +17 -0
  4. .stignore +103 -0
  5. README.md +13 -0
  6. app.py +554 -0
  7. requirements.txt +9 -0
  8. run-app.sh +1 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ call-activate.bat
2
+ okteto.yml
3
+ okteto-up.bat
4
+ install-sw.sh
5
+ install-sw1.sh
6
+ start-sshd.sh
7
+ pyproject.toml
8
+ models
9
+ .ruff_cache
10
+ run-nodemon.sh
11
+ app-.py
12
+ nodemon.json
.ruff.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assume Python 3.10.
2
+ target-version = "py310"
3
+ # Decrease the maximum line length to 79 characters.
4
+ line-length = 300
5
+
6
+ # pyflakes, pycodestyle, isort
7
+ # flake8 YTT, pydocstyle D, pylint PLC
8
+ select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
9
+ # select = ["ALL"]
10
+
11
+ # D103 Missing docstring in public function
12
+ # D101 Missing docstring in public class
13
+ # `multi-line-summary-first-line` (D212)
14
+ # `one-blank-line-before-class` (D203)
15
+ extend-ignore = ["D103", "D101", "D212", "D203"]
16
+
17
+ exclude = [".venv"]
.stignore ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models
2
+ *.bin
3
+ .git
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build
15
+ develop-eggs
16
+ dist
17
+ downloads
18
+ eggs
19
+ .eggs
20
+ lib
21
+ lib64
22
+ parts
23
+ sdist
24
+ var
25
+ wheels
26
+ pip-wheel-metadata
27
+ share/python-wheels
28
+ *.egg-info
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Translations
44
+ *.mo
45
+ *.pot
46
+
47
+ # Django stuff:
48
+ *.log
49
+ local_settings.py
50
+ db.sqlite3
51
+
52
+ # Flask stuff:
53
+ instance
54
+ .webassets-cache
55
+
56
+ # Scrapy stuff:
57
+ .scrapy
58
+
59
+ # Sphinx documentation
60
+ docs/_build
61
+
62
+ # PyBuilder
63
+ target
64
+
65
+ # Jupyter Notebook
66
+ .ipynb_checkpoints
67
+
68
+ # IPython
69
+ profile_default
70
+ ipython_config.py
71
+
72
+ # pyenv
73
+ .python-version
74
+
75
+ # celery beat schedule file
76
+ celerybeat-schedule
77
+
78
+ # SageMath parsed files
79
+ *.sage.py
80
+
81
+ # Environments
82
+ .env
83
+ .venv
84
+ env
85
+ venv
86
+ ENV
87
+ env.bak
88
+ venv.bak
89
+
90
+ # Spyder project settings
91
+ .spyderproject
92
+ .spyproject
93
+
94
+ # Rope project settings
95
+ .ropeproject
96
+
97
+ # mypy
98
+ .mypy_cache
99
+ .dmypy.json
100
+ dmypy.json
101
+
102
+ # Pyre type checker
103
+ .pyre
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: langchain-llama2-7b-chat-uncensored-ggml
3
+ emoji: 🚀
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.37.0
8
+ app_file: app.py
9
+ pinned: true
10
+ duplicated_from: mikeee/langchain-llama2-7b-chat-uncensored-ggml
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run codes."""
2
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
+ # ruff: noqa: E501
4
+ import gc
5
+ import os
6
+ import platform
7
+ import random
8
+ import time
9
+ from collections import deque
10
+ from pathlib import Path
11
+ from threading import Thread
12
+ from typing import Any, Dict, List, Union
13
+
14
+ # from types import SimpleNamespace
15
+ import gradio as gr
16
+ import psutil
17
+ from about_time import about_time
18
+ from ctransformers import Config
19
+ from dl_hf_model import dl_hf_model
20
+ from langchain.callbacks.base import BaseCallbackHandler
21
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
22
+ from langchain.chains import ConversationChain
23
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
24
+
25
+ # from ctransformers import AutoModelForCausalLM
26
+ from langchain.llms import CTransformers
27
+ from langchain.prompts import PromptTemplate
28
+ from langchain.schema import LLMResult
29
+ from loguru import logger
30
+
31
+ deq = deque()
32
+ sig_end = object() # signals the processing is done
33
+
34
+ # from langchain.llms import OpenAI
35
+
36
+ filename_list = [
37
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q2_K.bin",
38
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_L.bin",
39
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_M.bin",
40
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q3_K_S.bin",
41
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_0.bin",
42
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_1.bin",
43
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin",
44
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_S.bin",
45
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_0.bin",
46
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
47
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_M.bin",
48
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_K_S.bin",
49
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q6_K.bin",
50
+ "Wizard-Vicuna-7B-Uncensored.ggmlv3.q8_0.bin",
51
+ ]
52
+
53
+ URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin" # 4.05G
54
+
55
+ url = "https://huggingface.co/savvamadar/ggml-gpt4all-j-v1.3-groovy/blob/main/ggml-gpt4all-j-v1.3-groovy.bin"
56
+ url = "https://huggingface.co/TheBloke/Llama-2-13B-GGML/blob/main/llama-2-13b.ggmlv3.q4_K_S.bin" # 7.37G
57
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin"
58
+ url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.bin" # 6.93G
59
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q3_K_L.binhttps://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin" # 7.87G
60
+
61
+ url = "https://huggingface.co/localmodels/Llama-2-13B-Chat-ggml/blob/main/llama-2-13b-chat.ggmlv3.q4_K_S.bin" # 7.37G
62
+
63
+ _ = (
64
+ "golay" in platform.node()
65
+ or "okteto" in platform.node()
66
+ or Path("/kaggle").exists()
67
+ # or psutil.cpu_count(logical=False) < 4
68
+ or 1 # run 7b in hf
69
+ )
70
+
71
+ if _:
72
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
73
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
74
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
75
+ url = "https://huggingface.co/TheBloke/llama2_7b_chat_uncensored-GGML/blob/main/llama2_7b_chat_uncensored.ggmlv3.q4_K_M.bin" # 4.08G
76
+
77
+
78
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
79
+
80
+ ### Instruction: {user_prompt}
81
+
82
+ ### Response:
83
+ """
84
+
85
+ prompt_template = """System: You are a helpful,
86
+ respectful and honest assistant. Always answer as
87
+ helpfully as possible, while being safe. Your answers
88
+ should not include any harmful, unethical, racist,
89
+ sexist, toxic, dangerous, or illegal content. Please
90
+ ensure that your responses are socially unbiased and
91
+ positive in nature. If a question does not make any
92
+ sense, or is not factually coherent, explain why instead
93
+ of answering something not correct. If you don't know
94
+ the answer to a question, please don't share false
95
+ information.
96
+ User: {prompt}
97
+ Assistant: """
98
+
99
+ prompt_template = """System: You are a helpful assistant.
100
+ User: {prompt}
101
+ Assistant: """
102
+
103
+ prompt_template = """Question: {question}
104
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
105
+
106
+ prompt_template = """[INST] <>
107
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
108
+ <>
109
+
110
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
111
+ [/INST]"""
112
+
113
+ prompt_template = """[INST] <<SYS>>
114
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
115
+
116
+ {question} [/INST]
117
+ """
118
+
119
+ prompt_template = """[INST] <<SYS>>
120
+ You are a helpful assistant.
121
+ <</SYS>>
122
+
123
+ {question} [/INST]
124
+ """
125
+
126
+ prompt_template = """### HUMAN:
127
+ {question}
128
+
129
+ ### RESPONSE:"""
130
+
131
+ prompt_template = """### HUMAN:
132
+ You are a helpful assistant. Think step by step.
133
+ {history}
134
+ {input}
135
+ ### RESPONSE:"""
136
+
137
+ prompt_template = """You are a helpful assistant. Let's think step by step.
138
+ {history}
139
+ ### HUMAN:
140
+ {input}
141
+ ### RESPONSE:"""
142
+
143
+ # PromptTemplate(input_variables=['history', 'input'], output_parser=None, partial_variables={}, template='The following is afriendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:', template_format='f-string', validate_template=True)
144
+
145
+ human_prefix = "### HUMAN"
146
+ ai_prefix = "### RESPONSE"
147
+ stop = [f"{human_prefix}:"]
148
+
149
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
150
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
151
+
152
+ # logger.debug(f"{stop_string=} not used")
153
+
154
+ os.environ["TZ"] = "Asia/Shanghai"
155
+ try:
156
+ time.tzset() # type: ignore # pylint: disable=no-member
157
+ except Exception:
158
+ # Windows
159
+ logger.warning("Windows, cant run time.tzset()")
160
+
161
+
162
+ class DequeCallbackHandler(BaseCallbackHandler):
163
+ """Mediate gradio and stream output."""
164
+
165
+ def __init__(self, deq_: deque):
166
+ """Init deque for FIFO, may need to upgrade to queue.Queue or queue.SimpleQueue."""
167
+ self.q = deq_
168
+
169
+ # def on_chat_model_start(self): self.q.clear()
170
+
171
+ def on_llm_start(
172
+ self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
173
+ ) -> None:
174
+ """Run when LLM starts running. Clean the queue."""
175
+ self.q.clear()
176
+
177
+ def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
178
+ """Run on new LLM token. Only available when streaming is enabled."""
179
+ self.q.append(token)
180
+
181
+ def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
182
+ """Run when LLM ends running."""
183
+ self.q.append(sig_end)
184
+
185
+ def on_llm_error(
186
+ self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
187
+ ) -> None:
188
+ """Run when LLM errors."""
189
+ self.q.append(sig_end)
190
+
191
+
192
+ _ = psutil.cpu_count(logical=False) - 1
193
+ cpu_count: int = int(_) if _ else 1
194
+ logger.debug(f"{cpu_count=}")
195
+
196
+ LLM = None
197
+ gc.collect()
198
+
199
+ try:
200
+ model_loc, file_size = dl_hf_model(url)
201
+ except Exception as exc_:
202
+ logger.error(exc_)
203
+ raise SystemExit(1) from exc_
204
+
205
+ config = Config()
206
+ # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
207
+ config.stream = True
208
+ config.stop = stop
209
+ config.threads = cpu_count
210
+
211
+ deqcb = DequeCallbackHandler(deq)
212
+
213
+ # LLM = AutoModelForCausalLM.from_pretrained(
214
+ LLM = CTransformers(
215
+ model=model_loc,
216
+ model_type="llama",
217
+ callbacks=[StreamingStdOutCallbackHandler(), deqcb],
218
+ # config=config,
219
+ **vars(config),
220
+ )
221
+
222
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
223
+
224
+ prompt = PromptTemplate(
225
+ input_variables=["history", "input"],
226
+ output_parser=None,
227
+ partial_variables={},
228
+ template=prompt_template,
229
+ template_format="f-string",
230
+ validate_template=True,
231
+ )
232
+
233
+ memory = ConversationBufferWindowMemory(
234
+ human_prefix=human_prefix,
235
+ ai_prefix=ai_prefix,
236
+ ) # default k=5
237
+
238
+ conversation = ConversationChain(
239
+ llm=LLM,
240
+ prompt=prompt,
241
+ memory=memory,
242
+ verbose=True,
243
+ )
244
+ logger.debug(f"{conversation.prompt.template=}") # type: ignore
245
+
246
+ # for api access ===
247
+ config = Config()
248
+ # Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
249
+ config.stop = stop
250
+ config.threads = cpu_count
251
+
252
+ try:
253
+ LLM_api = CTransformers(
254
+ model=model_loc,
255
+ model_type="llama",
256
+ # callbacks=[StreamingStdOutCallbackHandler(), deqcb],
257
+ callbacks=[StreamingStdOutCallbackHandler()],
258
+ **vars(config),
259
+ )
260
+ conversation_api = ConversationChain(
261
+ llm=LLM_api, # need a separate LLM, or else deq may be messed up
262
+ prompt=prompt,
263
+ verbose=True,
264
+ )
265
+ except Exception as exc_:
266
+ logger.error(exc_)
267
+ conversation_api = None
268
+ logger.warning("Not able to instantiate conversation_api, api will not work")
269
+
270
+ # conversation.predict(input="Hello, my name is Andrea")
271
+
272
+
273
+ def user(user_message, history):
274
+ # return user_message, history + [[user_message, None]]
275
+ history.append([user_message, None])
276
+ return user_message, history # keep user_message
277
+
278
+
279
+ def user1(user_message, history):
280
+ # return user_message, history + [[user_message, None]]
281
+ history.append([user_message, None])
282
+ return "", history # clear user_message
283
+
284
+
285
+ def bot_(history):
286
+ user_message = history[-1][0]
287
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
288
+ bot_message = user_message + ": " + resp
289
+ history[-1][1] = ""
290
+ for character in bot_message:
291
+ history[-1][1] += character
292
+ time.sleep(0.02)
293
+ yield history
294
+
295
+ history[-1][1] = resp
296
+ yield history
297
+
298
+
299
+ def bot(history):
300
+ user_message = history[-1][0]
301
+ response = []
302
+
303
+ logger.debug(f"{user_message=}")
304
+
305
+ # conversation.predict(input="What's my name?")
306
+ thr = Thread(target=conversation.predict, kwargs={"input": user_message})
307
+ thr.start()
308
+
309
+ # preocess deq
310
+ response = []
311
+ flag = 1
312
+ then = time.time()
313
+ prefix = "" # to please pyright
314
+ with about_time() as atime: # type: ignore
315
+ while True:
316
+ if deq:
317
+ if flag:
318
+ prefix = f"({time.time() - then:.2f}s) "
319
+ flag = 0
320
+ _ = deq.popleft()
321
+ if _ is sig_end:
322
+ break
323
+ # print(_, end='')
324
+ response.append(_)
325
+ history[-1][1] = prefix + "".join(response).strip()
326
+ yield history
327
+ else:
328
+ time.sleep(0.01)
329
+ _ = (
330
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
331
+ f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
332
+ )
333
+
334
+ history[-1][1] = "".join(response) + f"\n{_}"
335
+ yield history
336
+
337
+
338
+ def predict_api(user_prompt):
339
+ if conversation_api is None:
340
+ return "conversation_api is None, probably due to insufficient memory, api not usable"
341
+
342
+ logger.debug(f"api: {user_prompt=}")
343
+ try:
344
+ _ = """
345
+ response = generate(
346
+ prompt,
347
+ config=config,
348
+ )
349
+ # """
350
+ response = conversation_api.predict(input=user_prompt)
351
+ logger.debug(f"api: {response=}")
352
+ except Exception as exc:
353
+ logger.error(exc)
354
+ response = f"{exc=}"
355
+ # bot = {"inputs": [response]}
356
+ # bot = [(prompt, response)]
357
+
358
+ return response.strip()
359
+
360
+
361
+ css = """
362
+ .importantButton {
363
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
364
+ border: none !important;
365
+ }
366
+ .importantButton:hover {
367
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
368
+ border: none !important;
369
+ }
370
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
371
+ .xsmall {font-size: x-small;}
372
+ """
373
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
374
+ examples_list = [
375
+ ["Hello I am mike."],
376
+ ["What's my name?"],
377
+ ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
378
+ [
379
+ "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
380
+ ],
381
+ ["When was Justin Bieber born?"],
382
+ ["What NFL team won the Super Bowl in 1994?"],
383
+ ["How to pick a lock? Provide detailed steps."],
384
+ [
385
+ "If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"
386
+ ],
387
+ ["is infinity + 1 bigger than infinity?"],
388
+ ["Explain the plot of Cinderella in a sentence."],
389
+ [
390
+ "How long does it take to become proficient in French, and what are the best methods for retaining information?"
391
+ ],
392
+ ["What are some common mistakes to avoid when writing code?"],
393
+ ["Build a prompt to generate a beautiful portrait of a horse"],
394
+ ["Suggest four metaphors to describe the benefits of AI"],
395
+ ["Write a pop song about leaving home for the sandy beaches."],
396
+ ["Write a pop song about having hot sex on a sandy beach."],
397
+ ["Write a summary demonstrating my ability to tame lions"],
398
+ ["鲁迅和周树人什么关系? 说中文。"],
399
+ ["鲁迅和周树人什么关系?"],
400
+ ["鲁迅和周树人什么关系? 用英文回答。"],
401
+ ["从前有一头牛,这头牛后面有什么?"],
402
+ ["正无穷大加一大于正无穷大吗?"],
403
+ ["正无穷大加正无穷大大于正无穷大吗?"],
404
+ ["-2的平方根等于什么?"],
405
+ ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
406
+ ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
407
+ ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
408
+ [f"{etext} 翻成中文,列出3个版本。"],
409
+ [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本。"],
410
+ ["假定 1 + 2 = 4, 试求 7 + 8。"],
411
+ ["给出判断一个数是不是质数的 javascript 码。"],
412
+ ["给出实现python 里 range(10)的 javascript 码。"],
413
+ ["给出实现python 里 [*(range(10)]的 javascript 码。"],
414
+ ["Erkläre die Handlung von Cinderella in einem Satz."],
415
+ ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch."],
416
+ ]
417
+
418
+ logger.info("start block")
419
+
420
+ with gr.Blocks(
421
+ title=f"{Path(model_loc).name}",
422
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
423
+ css=css,
424
+ ) as block:
425
+ # buff_var = gr.State("")
426
+ with gr.Accordion("🎈 Info", open=False):
427
+ # gr.HTML(
428
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
429
+ # )
430
+ gr.Markdown(
431
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
432
+ The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
433
+ It typically takes about 120 seconds for the first response to appear.
434
+
435
+ Most examples are meant for another model.
436
+ You probably should try to test
437
+ some related prompts.""",
438
+ elem_classes="xsmall",
439
+ )
440
+
441
+ chatbot = gr.Chatbot(height=500)
442
+
443
+ with gr.Row():
444
+ with gr.Column(scale=5):
445
+ msg = gr.Textbox(
446
+ label="Chat Message Box",
447
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
448
+ show_label=False,
449
+ # container=False,
450
+ lines=6,
451
+ max_lines=30,
452
+ show_copy_button=True,
453
+ # ).style(container=False)
454
+ )
455
+ with gr.Column(scale=1, min_width=50):
456
+ with gr.Row():
457
+ submit = gr.Button("Submit", elem_classes="xsmall")
458
+ stop = gr.Button("Stop", visible=True)
459
+ clear = gr.Button("Clear History", visible=True)
460
+ with gr.Row(visible=False):
461
+ with gr.Accordion("Advanced Options:", open=False):
462
+ with gr.Row():
463
+ with gr.Column(scale=2):
464
+ system = gr.Textbox(
465
+ label="System Prompt",
466
+ value=prompt_template,
467
+ show_label=False,
468
+ container=False,
469
+ # ).style(container=False)
470
+ )
471
+ with gr.Column():
472
+ with gr.Row():
473
+ change = gr.Button("Change System Prompt")
474
+ reset = gr.Button("Reset System Prompt")
475
+
476
+ with gr.Accordion("Example Inputs", open=True):
477
+ examples = gr.Examples(
478
+ examples=examples_list,
479
+ inputs=[msg],
480
+ examples_per_page=40,
481
+ )
482
+
483
+ with gr.Accordion("Disclaimer", open=False):
484
+ _ = Path(model_loc).name
485
+ gr.Markdown(
486
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
487
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
488
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
489
+ "biased, or otherwise offensive outputs.",
490
+ elem_classes=["disclaimer"],
491
+ )
492
+
493
+ msg_submit_event = msg.submit(
494
+ # fn=conversation.user_turn,
495
+ fn=user,
496
+ inputs=[msg, chatbot],
497
+ outputs=[msg, chatbot],
498
+ queue=True,
499
+ show_progress="full",
500
+ # api_name=None,
501
+ ).then(bot, chatbot, chatbot, queue=True)
502
+ submit_click_event = submit.click(
503
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
504
+ fn=user1, # clear msg
505
+ inputs=[msg, chatbot],
506
+ outputs=[msg, chatbot],
507
+ queue=True,
508
+ # queue=False,
509
+ show_progress="full",
510
+ # api_name=None,
511
+ ).then(bot, chatbot, chatbot, queue=True)
512
+ stop.click(
513
+ fn=None,
514
+ inputs=None,
515
+ outputs=None,
516
+ cancels=[msg_submit_event, submit_click_event],
517
+ queue=False,
518
+ )
519
+
520
+ # TODO: clear conversation memory as well
521
+ clear.click(lambda: None, None, chatbot, queue=False)
522
+
523
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
524
+ input_text = gr.Text()
525
+ api_btn = gr.Button("Go", variant="primary")
526
+ out_text = gr.Text()
527
+
528
+ if conversation_api is not None:
529
+ api_btn.click(
530
+ predict_api,
531
+ input_text,
532
+ out_text,
533
+ api_name="api",
534
+ )
535
+
536
+ # concurrency_count=5, max_size=20
537
+ # max_size=36, concurrency_count=14
538
+ # CPU cpu_count=2 16G, model 7G
539
+ # CPU UPGRADE cpu_count=8 32G, model 7G
540
+
541
+ # does not work
542
+ _ = """
543
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
544
+ # concurrency_count = max(_, 1)
545
+ if psutil.cpu_count(logical=False) >= 8:
546
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
547
+ else:
548
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
549
+ # """
550
+
551
+ concurrency_count = 1
552
+ logger.info(f"{concurrency_count=}")
553
+
554
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ # huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model
9
+ langchain
run-app.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ nodemon -w app.py -x python app.py