0xSynapse commited on
Commit
608db1a
1 Parent(s): dc9e463

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +432 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run codes."""
2
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
+ # ruff: noqa: E501
4
+ import gc
5
+ import os
6
+ import platform
7
+ import random
8
+ import time
9
+ from dataclasses import asdict, dataclass
10
+ from pathlib import Path
11
+
12
+ # from types import SimpleNamespace
13
+ import gradio as gr
14
+ import psutil
15
+ from about_time import about_time
16
+ from ctransformers import AutoModelForCausalLM
17
+ from dl_hf_model import dl_hf_model
18
+ from loguru import logger
19
+
20
+
21
+
22
+
23
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
24
+ #url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
25
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
26
+
27
+
28
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
29
+
30
+ ### Instruction: {user_prompt}
31
+
32
+ ### Response:
33
+ """
34
+
35
+ prompt_template = """System: You are a helpful,
36
+ respectful and honest assistant. Always answer as
37
+ helpfully as possible, while being safe. Your answers
38
+ should not include any harmful, unethical, racist,
39
+ sexist, toxic, dangerous, or illegal content. Please
40
+ ensure that your responses are socially unbiased and
41
+ positive in nature. If a question does not make any
42
+ sense, or is not factually coherent, explain why instead
43
+ of answering something not correct. If you don't know
44
+ the answer to a question, please don't share false
45
+ information.
46
+ User: {prompt}
47
+ Assistant: """
48
+
49
+ prompt_template = """System: You are a helpful assistant.
50
+ User: {prompt}
51
+ Assistant: """
52
+
53
+ prompt_template = """Question: {question}
54
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
55
+
56
+ prompt_template = """[INST] <>
57
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
58
+ <>
59
+
60
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
61
+ [/INST]"""
62
+
63
+ prompt_template = """[INST] <<SYS>>
64
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
65
+
66
+ {question} [/INST]
67
+ """
68
+
69
+ prompt_template = """[INST] <<SYS>>
70
+ You are a helpful assistant.
71
+ <</SYS>>
72
+
73
+ {question} [/INST]
74
+ """
75
+
76
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
77
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
78
+
79
+ logger.debug(f"{stop_string=}")
80
+
81
+ _ = psutil.cpu_count(logical=False) - 1
82
+ cpu_count: int = int(_) if _ else 1
83
+ logger.debug(f"{cpu_count=}")
84
+
85
+ LLM = None
86
+ gc.collect()
87
+
88
+ try:
89
+ model_loc, file_size = dl_hf_model(url)
90
+ except Exception as exc_:
91
+ logger.error(exc_)
92
+ raise SystemExit(1) from exc_
93
+
94
+ LLM = AutoModelForCausalLM.from_pretrained(
95
+ model_loc,
96
+ model_type="llama",
97
+ # threads=cpu_count,
98
+ )
99
+
100
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
101
+
102
+ os.environ["TZ"] = "Asia/Shanghai"
103
+ try:
104
+ time.tzset() # type: ignore # pylint: disable=no-member
105
+ except Exception:
106
+ # Windows
107
+ logger.warning("Windows, cant run time.tzset()")
108
+
109
+ _ = """
110
+ ns = SimpleNamespace(
111
+ response="",
112
+ generator=(_ for _ in []),
113
+ )
114
+ # """
115
+
116
+ @dataclass
117
+ class GenerationConfig:
118
+ temperature: float = 0.7
119
+ top_k: int = 50
120
+ top_p: float = 0.9
121
+ repetition_penalty: float = 1.0
122
+ max_new_tokens: int = 512
123
+ seed: int = 42
124
+ reset: bool = False
125
+ stream: bool = True
126
+ # threads: int = cpu_count
127
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
128
+
129
+
130
+ def generate(
131
+ question: str,
132
+ llm=LLM,
133
+ config: GenerationConfig = GenerationConfig(),
134
+ ):
135
+ """Run model inference, will return a Generator if streaming is true."""
136
+ # _ = prompt_template.format(question=question)
137
+ # print(_)
138
+
139
+ prompt = prompt_template.format(question=question)
140
+
141
+ return llm(
142
+ prompt,
143
+ **asdict(config),
144
+ )
145
+
146
+
147
+ logger.debug(f"{asdict(GenerationConfig())=}")
148
+
149
+
150
+ def user(user_message, history):
151
+ # return user_message, history + [[user_message, None]]
152
+ history.append([user_message, None])
153
+ return user_message, history # keep user_message
154
+
155
+
156
+ def user1(user_message, history):
157
+ # return user_message, history + [[user_message, None]]
158
+ history.append([user_message, None])
159
+ return "", history # clear user_message
160
+
161
+
162
+ def bot_(history):
163
+ user_message = history[-1][0]
164
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
165
+ bot_message = user_message + ": " + resp
166
+ history[-1][1] = ""
167
+ for character in bot_message:
168
+ history[-1][1] += character
169
+ time.sleep(0.02)
170
+ yield history
171
+
172
+ history[-1][1] = resp
173
+ yield history
174
+
175
+
176
+ def bot(history):
177
+ user_message = history[-1][0]
178
+ response = []
179
+
180
+ logger.debug(f"{user_message=}")
181
+
182
+ with about_time() as atime: # type: ignore
183
+ flag = 1
184
+ prefix = ""
185
+ then = time.time()
186
+
187
+ logger.debug("about to generate")
188
+
189
+ config = GenerationConfig(reset=True)
190
+ for elm in generate(user_message, config=config):
191
+ if flag == 1:
192
+ logger.debug("in the loop")
193
+ prefix = f"({time.time() - then:.2f}s) "
194
+ flag = 0
195
+ print(prefix, end="", flush=True)
196
+ logger.debug(f"{prefix=}")
197
+ print(elm, end="", flush=True)
198
+ # logger.debug(f"{elm}")
199
+
200
+ response.append(elm)
201
+ history[-1][1] = prefix + "".join(response)
202
+ yield history
203
+
204
+ _ = (
205
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
206
+ f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
207
+ )
208
+
209
+ history[-1][1] = "".join(response) + f"\n{_}"
210
+ yield history
211
+
212
+
213
+ def predict_api(prompt):
214
+ logger.debug(f"{prompt=}")
215
+ try:
216
+ # user_prompt = prompt
217
+ config = GenerationConfig(
218
+ temperature=0.2,
219
+ top_k=10,
220
+ top_p=0.9,
221
+ repetition_penalty=1.0,
222
+ max_new_tokens=512, # adjust as needed
223
+ seed=42,
224
+ reset=True, # reset history (cache)
225
+ stream=False,
226
+ # threads=cpu_count,
227
+ # stop=prompt_prefix[1:2],
228
+ )
229
+
230
+ response = generate(
231
+ prompt,
232
+ config=config,
233
+ )
234
+
235
+ logger.debug(f"api: {response=}")
236
+ except Exception as exc:
237
+ logger.error(exc)
238
+ response = f"{exc=}"
239
+ # bot = {"inputs": [response]}
240
+ # bot = [(prompt, response)]
241
+
242
+ return response
243
+
244
+
245
+ css = """
246
+ .importantButton {
247
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
248
+ border: none !important;
249
+ }
250
+ .importantButton:hover {
251
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
252
+ border: none !important;
253
+ }
254
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
255
+ .xsmall {font-size: x-small;}
256
+ """
257
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
258
+ examples_list = [
259
+ ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
260
+ [
261
+ "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
262
+ ],
263
+ ["How to pick a lock? Provide detailed steps."],
264
+ ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hung together at the same time for drying , then how long will it take to dry a cloth?"],
265
+ ["is infinity + 1 bigger than infinity?"],
266
+ ["Explain the plot of Cinderella in a sentence."],
267
+ [
268
+ "How long does it take to become proficient in French, and what are the best methods for retaining information?"
269
+ ],
270
+ ["What are some common mistakes to avoid when writing code?"],
271
+ ["Build a prompt to generate a beautiful portrait of a horse"],
272
+ ["Suggest four metaphors to describe the benefits of AI"],
273
+ ["Write a pop song about leaving home for the sandy beaches."],
274
+ ["Write a summary demonstrating my ability to tame lions"],
275
+ ["鲁迅和周树人什么关系? 说中文。"],
276
+ ["鲁迅和周树人什么关系?"],
277
+ ["鲁迅和周树人什么关系? 用英文回答。"],
278
+ ["从前有一头牛,这头牛后面有什么?"],
279
+ ["正无穷大加一大于正无穷大吗?"],
280
+ ["正无穷大加正无穷大大于正无穷大吗?"],
281
+ ["-2的平方根等于什么?"],
282
+ ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
283
+ ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
284
+ ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
285
+ [f"{etext} 翻成中文,列出3个版本。"],
286
+ [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本。"],
287
+ ["假定 1 + 2 = 4, 试求 7 + 8。"],
288
+ ["给出判断一个数是不是质数的 javascript 码。"],
289
+ ["给出实现python 里 range(10)的 javascript 码。"],
290
+ ["给出实现python 里 [*(range(10)]的 javascript 码。"],
291
+ ["Erkläre die Handlung von Cinderella in einem Satz."],
292
+ ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch."],
293
+ ]
294
+
295
+ logger.info("start block")
296
+
297
+ with gr.Blocks(
298
+ title=f"{Path(model_loc).name}",
299
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
300
+ css=css,
301
+ ) as block:
302
+ # buff_var = gr.State("")
303
+ with gr.Accordion("🎈 Info", open=False):
304
+ # gr.HTML(
305
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
306
+ # )
307
+ gr.Markdown(
308
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
309
+ Most examples are meant for another model.
310
+ You probably should try to test
311
+ some related prompts.""",
312
+ elem_classes="xsmall",
313
+ )
314
+
315
+ # chatbot = gr.Chatbot().style(height=700) # 500
316
+ chatbot = gr.Chatbot(height=500)
317
+
318
+ # buff = gr.Textbox(show_label=False, visible=True)
319
+
320
+ with gr.Row():
321
+ with gr.Column(scale=5):
322
+ msg = gr.Textbox(
323
+ label="Chat Message Box",
324
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
325
+ show_label=False,
326
+ # container=False,
327
+ lines=6,
328
+ max_lines=30,
329
+ show_copy_button=True,
330
+ # ).style(container=False)
331
+ )
332
+ with gr.Column(scale=1, min_width=50):
333
+ with gr.Row():
334
+ submit = gr.Button("Submit", elem_classes="xsmall")
335
+ stop = gr.Button("Stop", visible=True)
336
+ clear = gr.Button("Clear History", visible=True)
337
+ with gr.Row(visible=False):
338
+ with gr.Accordion("Advanced Options:", open=False):
339
+ with gr.Row():
340
+ with gr.Column(scale=2):
341
+ system = gr.Textbox(
342
+ label="System Prompt",
343
+ value=prompt_template,
344
+ show_label=False,
345
+ container=False,
346
+ # ).style(container=False)
347
+ )
348
+ with gr.Column():
349
+ with gr.Row():
350
+ change = gr.Button("Change System Prompt")
351
+ reset = gr.Button("Reset System Prompt")
352
+
353
+ with gr.Accordion("Example Inputs", open=True):
354
+ examples = gr.Examples(
355
+ examples=examples_list,
356
+ inputs=[msg],
357
+ examples_per_page=40,
358
+ )
359
+
360
+ # with gr.Row():
361
+ with gr.Accordion("Disclaimer", open=False):
362
+ _ = Path(model_loc).name
363
+ gr.Markdown(
364
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
365
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
366
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
367
+ "biased, or otherwise offensive outputs.",
368
+ elem_classes=["disclaimer"],
369
+ )
370
+
371
+ msg_submit_event = msg.submit(
372
+ # fn=conversation.user_turn,
373
+ fn=user,
374
+ inputs=[msg, chatbot],
375
+ outputs=[msg, chatbot],
376
+ queue=True,
377
+ show_progress="full",
378
+ # api_name=None,
379
+ ).then(bot, chatbot, chatbot, queue=True)
380
+ submit_click_event = submit.click(
381
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
382
+ fn=user1, # clear msg
383
+ inputs=[msg, chatbot],
384
+ outputs=[msg, chatbot],
385
+ queue=True,
386
+ # queue=False,
387
+ show_progress="full",
388
+ # api_name=None,
389
+ ).then(bot, chatbot, chatbot, queue=True)
390
+ stop.click(
391
+ fn=None,
392
+ inputs=None,
393
+ outputs=None,
394
+ cancels=[msg_submit_event, submit_click_event],
395
+ queue=False,
396
+ )
397
+ clear.click(lambda: None, None, chatbot, queue=False)
398
+
399
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
400
+ input_text = gr.Text()
401
+ api_btn = gr.Button("Go", variant="primary")
402
+ out_text = gr.Text()
403
+
404
+ api_btn.click(
405
+ predict_api,
406
+ input_text,
407
+ out_text,
408
+ api_name="api",
409
+ )
410
+
411
+ # block.load(update_buff, [], buff, every=1)
412
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
413
+
414
+ # concurrency_count=5, max_size=20
415
+ # max_size=36, concurrency_count=14
416
+ # CPU cpu_count=2 16G, model 7G
417
+ # CPU UPGRADE cpu_count=8 32G, model 7G
418
+
419
+ # does not work
420
+ _ = """
421
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
422
+ # concurrency_count = max(_, 1)
423
+ if psutil.cpu_count(logical=False) >= 8:
424
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
425
+ else:
426
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
427
+ # """
428
+
429
+ concurrency_count = 1
430
+ logger.info(f"{concurrency_count=}")
431
+
432
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model