dnzengou commited on
Commit
94007a0
1 Parent(s): ef4c506

add application files

Browse files
Files changed (2) hide show
  1. app.py +408 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run codes."""
2
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
+ # ruff: noqa: E501
4
+ import gc
5
+ import os
6
+ import platform
7
+ import random
8
+ import time
9
+ from dataclasses import asdict, dataclass
10
+ from pathlib import Path
11
+
12
+ # from types import SimpleNamespace
13
+ import gradio as gr
14
+ import psutil
15
+ from about_time import about_time
16
+ from ctransformers import AutoModelForCausalLM
17
+ from dl_hf_model import dl_hf_model
18
+ from loguru import logger
19
+
20
+
21
+
22
+
23
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
24
+ #url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
25
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
26
+
27
+
28
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
29
+
30
+ ### Instruction: {user_prompt}
31
+
32
+ ### Response:
33
+ """
34
+
35
+ prompt_template = """System: You are a helpful,
36
+ respectful and honest assistant. Always answer as
37
+ helpfully as possible, while being safe. Your answers
38
+ should not include any harmful, unethical, racist,
39
+ sexist, toxic, dangerous, or illegal content. Please
40
+ ensure that your responses are socially unbiased and
41
+ positive in nature. If a question does not make any
42
+ sense, or is not factually coherent, explain why instead
43
+ of answering something not correct. If you don't know
44
+ the answer to a question, please don't share false
45
+ information.
46
+ User: {prompt}
47
+ Assistant: """
48
+
49
+ prompt_template = """System: You are a helpful assistant.
50
+ User: {prompt}
51
+ Assistant: """
52
+
53
+ prompt_template = """Question: {question}
54
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
55
+
56
+ prompt_template = """[INST] <>
57
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
58
+ <>
59
+
60
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
61
+ [/INST]"""
62
+
63
+ prompt_template = """[INST] <<SYS>>
64
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
65
+
66
+ {question} [/INST]
67
+ """
68
+
69
+ prompt_template = """[INST] <<SYS>>
70
+ You are a helpful assistant.
71
+ <</SYS>>
72
+
73
+ {question} [/INST]
74
+ """
75
+
76
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
77
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
78
+
79
+ logger.debug(f"{stop_string=}")
80
+
81
+ _ = psutil.cpu_count(logical=False) - 1
82
+ cpu_count: int = int(_) if _ else 1
83
+ logger.debug(f"{cpu_count=}")
84
+
85
+ LLM = None
86
+ gc.collect()
87
+
88
+ try:
89
+ model_loc, file_size = dl_hf_model(url)
90
+ except Exception as exc_:
91
+ logger.error(exc_)
92
+ raise SystemExit(1) from exc_
93
+
94
+ LLM = AutoModelForCausalLM.from_pretrained(
95
+ model_loc,
96
+ model_type="llama",
97
+ # threads=cpu_count,
98
+ )
99
+
100
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
101
+
102
+ os.environ["TZ"] = "Asia/Shanghai"
103
+ try:
104
+ time.tzset() # type: ignore # pylint: disable=no-member
105
+ except Exception:
106
+ # Windows
107
+ logger.warning("Windows, cant run time.tzset()")
108
+
109
+ _ = """
110
+ ns = SimpleNamespace(
111
+ response="",
112
+ generator=(_ for _ in []),
113
+ )
114
+ # """
115
+
116
+ @dataclass
117
+ class GenerationConfig:
118
+ temperature: float = 0.7
119
+ top_k: int = 50
120
+ top_p: float = 0.9
121
+ repetition_penalty: float = 1.0
122
+ max_new_tokens: int = 512
123
+ seed: int = 42
124
+ reset: bool = False
125
+ stream: bool = True
126
+ # threads: int = cpu_count
127
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
128
+
129
+
130
+ def generate(
131
+ question: str,
132
+ llm=LLM,
133
+ config: GenerationConfig = GenerationConfig(),
134
+ ):
135
+ """Run model inference, will return a Generator if streaming is true."""
136
+ # _ = prompt_template.format(question=question)
137
+ # print(_)
138
+
139
+ prompt = prompt_template.format(question=question)
140
+
141
+ return llm(
142
+ prompt,
143
+ **asdict(config),
144
+ )
145
+
146
+
147
+ logger.debug(f"{asdict(GenerationConfig())=}")
148
+
149
+
150
+ def user(user_message, history):
151
+ # return user_message, history + [[user_message, None]]
152
+ history.append([user_message, None])
153
+ return user_message, history # keep user_message
154
+
155
+
156
+ def user1(user_message, history):
157
+ # return user_message, history + [[user_message, None]]
158
+ history.append([user_message, None])
159
+ return "", history # clear user_message
160
+
161
+
162
+ def bot_(history):
163
+ user_message = history[-1][0]
164
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
165
+ bot_message = user_message + ": " + resp
166
+ history[-1][1] = ""
167
+ for character in bot_message:
168
+ history[-1][1] += character
169
+ time.sleep(0.02)
170
+ yield history
171
+
172
+ history[-1][1] = resp
173
+ yield history
174
+
175
+
176
+ def bot(history):
177
+ user_message = history[-1][0]
178
+ response = []
179
+
180
+ logger.debug(f"{user_message=}")
181
+
182
+ with about_time() as atime: # type: ignore
183
+ flag = 1
184
+ prefix = ""
185
+ then = time.time()
186
+
187
+ logger.debug("about to generate")
188
+
189
+ config = GenerationConfig(reset=True)
190
+ for elm in generate(user_message, config=config):
191
+ if flag == 1:
192
+ logger.debug("in the loop")
193
+ prefix = f"({time.time() - then:.2f}s) "
194
+ flag = 0
195
+ print(prefix, end="", flush=True)
196
+ logger.debug(f"{prefix=}")
197
+ print(elm, end="", flush=True)
198
+ # logger.debug(f"{elm}")
199
+
200
+ response.append(elm)
201
+ history[-1][1] = prefix + "".join(response)
202
+ yield history
203
+
204
+ _ = (
205
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
206
+ f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
207
+ )
208
+
209
+ history[-1][1] = "".join(response) + f"\n{_}"
210
+ yield history
211
+
212
+
213
+ def predict_api(prompt):
214
+ logger.debug(f"{prompt=}")
215
+ try:
216
+ # user_prompt = prompt
217
+ config = GenerationConfig(
218
+ temperature=0.2,
219
+ top_k=10,
220
+ top_p=0.9,
221
+ repetition_penalty=1.0,
222
+ max_new_tokens=512, # adjust as needed
223
+ seed=42,
224
+ reset=True, # reset history (cache)
225
+ stream=False,
226
+ # threads=cpu_count,
227
+ # stop=prompt_prefix[1:2],
228
+ )
229
+
230
+ response = generate(
231
+ prompt,
232
+ config=config,
233
+ )
234
+
235
+ logger.debug(f"api: {response=}")
236
+ except Exception as exc:
237
+ logger.error(exc)
238
+ response = f"{exc=}"
239
+ # bot = {"inputs": [response]}
240
+ # bot = [(prompt, response)]
241
+
242
+ return response
243
+
244
+
245
+ css = """
246
+ .importantButton {
247
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
248
+ border: none !important;
249
+ }
250
+ .importantButton:hover {
251
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
252
+ border: none !important;
253
+ }
254
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
255
+ .xsmall {font-size: x-small;}
256
+ """
257
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
258
+ examples_list = [
259
+ ["What is the capital of India"],
260
+ ["How to play Chess? Provide detailed steps."],
261
+ ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hung together at the same time for drying , then how long will it take to dry a cloth?"],
262
+ ["is infinity + 1 bigger than infinity?"],
263
+ ["Explain the plot of Oppenheimer 2023 movie in a sentence."],
264
+ ["How long does it take to become proficient in French, and what are the best methods for retaining information?"],
265
+ ["What are some common mistakes to avoid when writing code?"],
266
+ ["Build a prompt to generate a beautiful portrait of a horse"],
267
+ ["Suggest four metaphors to describe the benefits of AI"],
268
+ ["Write most important points of Bhagavad Gita"],
269
+ ["Write a summary Why is it so hard to understand Quantum mechanics"],
270
+
271
+ ]
272
+
273
+ logger.info("start block")
274
+
275
+ with gr.Blocks(
276
+ title="LlamaGPT🤖",
277
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
278
+ css=css,
279
+ ) as block:
280
+ # buff_var = gr.State("")
281
+ with gr.Accordion("LlamaGPT🧠", open=False, style={"text-align": "center", "font-weight": "bold"}):
282
+
283
+ gr.Markdown(
284
+ f"""<div style="text-align: center;">
285
+ <h5>Gradio Demo for Meta's Llama 2 7B-chat</h5><br>
286
+ Few examples are there as prompts to test the model. You probably should try on your own related prompts to test the bot.
287
+ </div>""",
288
+ elem_classes="xsmall",
289
+ )
290
+
291
+ # chatbot = gr.Chatbot().style(height=700) # 500
292
+ chatbot = gr.Chatbot(height=500)
293
+
294
+ # buff = gr.Textbox(show_label=False, visible=True)
295
+
296
+ with gr.Row():
297
+ with gr.Column(scale=5):
298
+ msg = gr.Textbox(
299
+ label="Chat Message Box",
300
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
301
+ show_label=False,
302
+ # container=False,
303
+ lines=6,
304
+ max_lines=30,
305
+ show_copy_button=True,
306
+ # ).style(container=False)
307
+ )
308
+ with gr.Column(scale=1, min_width=50):
309
+ with gr.Row():
310
+ submit = gr.Button("Submit", elem_classes="xsmall")
311
+ stop = gr.Button("Stop", visible=True)
312
+ clear = gr.Button("Clear History", visible=True)
313
+ with gr.Row(visible=False):
314
+ with gr.Accordion("Advanced Options:", open=False):
315
+ with gr.Row():
316
+ with gr.Column(scale=2):
317
+ system = gr.Textbox(
318
+ label="System Prompt",
319
+ value=prompt_template,
320
+ show_label=False,
321
+ container=False,
322
+ # ).style(container=False)
323
+ )
324
+ with gr.Column():
325
+ with gr.Row():
326
+ change = gr.Button("Change System Prompt")
327
+ reset = gr.Button("Reset System Prompt")
328
+
329
+ with gr.Accordion("Example Inputs", open=True):
330
+ examples = gr.Examples(
331
+ examples=examples_list,
332
+ inputs=[msg],
333
+ examples_per_page=40,
334
+ )
335
+
336
+ # with gr.Row():
337
+ with gr.Accordion("Disclaimer", open=False):
338
+ _ = Path(model_loc).name
339
+ gr.Markdown(
340
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
341
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
342
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
343
+ "biased, or otherwise offensive outputs.",
344
+ elem_classes=["disclaimer"],
345
+ )
346
+
347
+ msg_submit_event = msg.submit(
348
+ # fn=conversation.user_turn,
349
+ fn=user,
350
+ inputs=[msg, chatbot],
351
+ outputs=[msg, chatbot],
352
+ queue=True,
353
+ show_progress="full",
354
+ # api_name=None,
355
+ ).then(bot, chatbot, chatbot, queue=True)
356
+ submit_click_event = submit.click(
357
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
358
+ fn=user1, # clear msg
359
+ inputs=[msg, chatbot],
360
+ outputs=[msg, chatbot],
361
+ queue=True,
362
+ # queue=False,
363
+ show_progress="full",
364
+ # api_name=None,
365
+ ).then(bot, chatbot, chatbot, queue=True)
366
+ stop.click(
367
+ fn=None,
368
+ inputs=None,
369
+ outputs=None,
370
+ cancels=[msg_submit_event, submit_click_event],
371
+ queue=False,
372
+ )
373
+ clear.click(lambda: None, None, chatbot, queue=False)
374
+
375
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
376
+ input_text = gr.Text()
377
+ api_btn = gr.Button("Go", variant="primary")
378
+ out_text = gr.Text()
379
+
380
+ api_btn.click(
381
+ predict_api,
382
+ input_text,
383
+ out_text,
384
+ api_name="api",
385
+ )
386
+
387
+ # block.load(update_buff, [], buff, every=1)
388
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
389
+
390
+ # concurrency_count=5, max_size=20
391
+ # max_size=36, concurrency_count=14
392
+ # CPU cpu_count=2 16G, model 7G
393
+ # CPU UPGRADE cpu_count=8 32G, model 7G
394
+
395
+ # does not work
396
+ _ = """
397
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
398
+ # concurrency_count = max(_, 1)
399
+ if psutil.cpu_count(logical=False) >= 8:
400
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
401
+ else:
402
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
403
+ # """
404
+
405
+ concurrency_count = 1
406
+ logger.info(f"{concurrency_count=}")
407
+
408
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model