ffreemt commited on
Commit
4ce3dee
1 Parent(s): e78c6b5

Update requirements.txt ctransformers

Browse files
Files changed (4) hide show
  1. .ruff.toml +18 -0
  2. app.py +372 -1
  3. examples_list.py +45 -0
  4. requirements.txt +8 -0
.ruff.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assume Python 3.10.
2
+ target-version = "py310"
3
+ # Decrease the maximum line length to 79 characters.
4
+ line-length = 300
5
+
6
+ # pyflakes, pycodestyle, isort
7
+ # flake8 YTT, pydocstyle D, pylint PLC
8
+ select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
9
+ # select = ["ALL"]
10
+
11
+ # D100 Missing docstring in public module
12
+ # D103 Missing docstring in public function
13
+ # D101 Missing docstring in public class
14
+ # `multi-line-summary-first-line` (D212)
15
+ # `one-blank-line-before-class` (D203)
16
+ extend-ignore = ["D100", "D103", "D101", "D212", "D203"]
17
+
18
+ exclude = [".venv"]
app.py CHANGED
@@ -1,4 +1,375 @@
 
 
 
1
  import gradio as gr
2
 
3
  # gr.Interface.load("models/s3nh/garage-bAInd-Stable-Platypus2-13B-GGML").launch()
4
- gr.load("models/s3nh/garage-bAInd-Stable-Platypus2-13B-GGML").queue(max_size=5).launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Run codes.
3
+
4
  import gradio as gr
5
 
6
  # gr.Interface.load("models/s3nh/garage-bAInd-Stable-Platypus2-13B-GGML").launch()
7
+ # gr.load("models/s3nh/garage-bAInd-Stable-Platypus2-13B-GGML").queue(max_size=5).launch(debug=True)
8
+ """
9
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
10
+ # ruff: noqa: E501
11
+ import gc
12
+ import os
13
+ import platform
14
+ import random
15
+ import time
16
+ from dataclasses import asdict, dataclass, field
17
+ from pathlib import Path
18
+
19
+ # from types import SimpleNamespace
20
+ import gradio as gr
21
+ import psutil
22
+ from about_time import about_time
23
+ from ctransformers import AutoModelForCausalLM
24
+ from dl_hf_model import dl_hf_model
25
+ from loguru import logger
26
+
27
+ from examples_list import examples_list
28
+
29
+ url = "https://huggingface.co/s3nh/garage-bAInd-Stable-Platypus2-13B-GGML/blob/main/garage-bAInd-Stable-Platypus2-13B.ggmlv3.q4_1.bin" # 8.17G
30
+ url = "https://huggingface.co/s3nh/garage-bAInd-Stable-Platypus2-13B-GGML/blob/main/garage-bAInd-Stable-Platypus2-13B.ggmlv3.q4_0.bin" # 7.37G
31
+
32
+ LLM = None
33
+ gc.collect()
34
+
35
+ try:
36
+ logger.debug(f" dl {url}")
37
+ model_loc, file_size = dl_hf_model(url)
38
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
39
+ except Exception as exc_:
40
+ logger.error(exc_)
41
+ raise SystemExit(1) from exc_
42
+
43
+ # raise SystemExit(0)
44
+
45
+ # Prompt template: Guanaco
46
+ # {past_history}
47
+ prompt_template = """You are a helpful assistant. Let's think step by step.
48
+ ### Human:
49
+ {question}
50
+ ### Assistant:"""
51
+
52
+ # Prompt template: garage-bAInd/Stable-Platypus2-13B
53
+ prompt_template = """
54
+ ### Instruction:
55
+
56
+ {question}
57
+
58
+ ### Response:"""
59
+
60
+ human_prefix = "### Instruction"
61
+ ai_prefix = "### Response"
62
+ stop_list = [f"{human_prefix}:"]
63
+
64
+ _ = psutil.cpu_count(logical=False) - 1
65
+ cpu_count: int = int(_) if _ else 1
66
+ logger.debug(f"{cpu_count=}")
67
+
68
+ logger.debug(f"{model_loc=}")
69
+ LLM = AutoModelForCausalLM.from_pretrained(
70
+ model_loc,
71
+ model_type="llama",
72
+ threads=cpu_count,
73
+ )
74
+
75
+ os.environ["TZ"] = "Asia/Shanghai"
76
+ try:
77
+ time.tzset() # type: ignore # pylint: disable=no-member
78
+ except Exception:
79
+ # Windows
80
+ logger.warning("Windows, cant run time.tzset()")
81
+
82
+
83
+ @dataclass
84
+ class GenerationConfig:
85
+ temperature: float = 0.7
86
+ top_k: int = 50
87
+ top_p: float = 0.9
88
+ repetition_penalty: float = 1.0
89
+ max_new_tokens: int = 512
90
+ seed: int = 42
91
+ reset: bool = False
92
+ stream: bool = True
93
+ threads: int = cpu_count
94
+ stop: list[str] = field(default_factory=lambda: stop_list)
95
+
96
+
97
+ def generate(
98
+ question: str,
99
+ llm=LLM,
100
+ config: GenerationConfig = GenerationConfig(),
101
+ ):
102
+ """Run model inference, will return a Generator if streaming is true."""
103
+ # _ = prompt_template.format(question=question)
104
+ # print(_)
105
+
106
+ prompt = prompt_template.format(question=question)
107
+
108
+ return llm(
109
+ prompt,
110
+ **asdict(config),
111
+ )
112
+
113
+
114
+ logger.debug(f"{asdict(GenerationConfig())=}")
115
+
116
+
117
+ def user(user_message, history):
118
+ # return user_message, history + [[user_message, None]]
119
+ if history is None:
120
+ history = []
121
+ history.append([user_message, None])
122
+ return user_message, history # keep user_message
123
+
124
+
125
+ def user1(user_message, history):
126
+ # return user_message, history + [[user_message, None]]
127
+ if history is None:
128
+ history = []
129
+ history.append([user_message, None])
130
+ return "", history # clear user_message
131
+
132
+
133
+ def bot_(history):
134
+ user_message = history[-1][0]
135
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
136
+ bot_message = user_message + ": " + resp
137
+ history[-1][1] = ""
138
+ for character in bot_message:
139
+ history[-1][1] += character
140
+ time.sleep(0.02)
141
+ yield history
142
+
143
+ history[-1][1] = resp
144
+ yield history
145
+
146
+
147
+ def bot(history):
148
+ user_message = ""
149
+ try:
150
+ user_message = history[-1][0]
151
+ except Exception as exc:
152
+ logger.error(exc)
153
+ response = []
154
+
155
+ logger.debug(f"{user_message=}")
156
+
157
+ with about_time() as atime: # type: ignore
158
+ flag = 1
159
+ prefix = ""
160
+ then = time.time()
161
+
162
+ logger.debug("about to generate")
163
+
164
+ config = GenerationConfig(reset=True)
165
+ for elm in generate(user_message, config=config):
166
+ if flag == 1:
167
+ logger.debug("in the loop")
168
+ prefix = f"({time.time() - then:.2f}s) "
169
+ flag = 0
170
+ print(prefix, end="", flush=True)
171
+ logger.debug(f"{prefix=}")
172
+ print(elm, end="", flush=True)
173
+ # logger.debug(f"{elm}")
174
+
175
+ response.append(elm)
176
+ history[-1][1] = prefix + "".join(response)
177
+ yield history
178
+
179
+ _ = (
180
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
181
+ f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
182
+ )
183
+
184
+ history[-1][1] = "".join(response) + f"\n{_}"
185
+ yield history
186
+
187
+
188
+ def predict_api(prompt):
189
+ logger.debug(f"{prompt=}")
190
+ try:
191
+ # user_prompt = prompt
192
+ config = GenerationConfig(
193
+ temperature=0.2,
194
+ top_k=10,
195
+ top_p=0.9,
196
+ repetition_penalty=1.0,
197
+ max_new_tokens=512, # adjust as needed
198
+ seed=42,
199
+ reset=True, # reset history (cache)
200
+ stream=False,
201
+ # threads=cpu_count,
202
+ # stop=prompt_prefix[1:2],
203
+ )
204
+
205
+ response = generate(
206
+ prompt,
207
+ config=config,
208
+ )
209
+
210
+ logger.debug(f"api: {response=}")
211
+ except Exception as exc:
212
+ logger.error(exc)
213
+ response = f"{exc=}"
214
+ # bot = {"inputs": [response]}
215
+ # bot = [(prompt, response)]
216
+
217
+ return response
218
+
219
+
220
+ css = """
221
+ .importantButton {
222
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
223
+ border: none !important;
224
+ }
225
+ .importantButton:hover {
226
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
227
+ border: none !important;
228
+ }
229
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
230
+ .xsmall {font-size: x-small;}
231
+ """
232
+
233
+ logger.info("start block")
234
+
235
+ with gr.Blocks(
236
+ title=f"{Path(model_loc).name}",
237
+ # theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
238
+ theme=gr.themes.Glass(text_size="sm", spacing_size="sm"),
239
+ css=css,
240
+ ) as block:
241
+ # buff_var = gr.State("")
242
+ with gr.Accordion("🎈 Info", open=False):
243
+ gr.Markdown(
244
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
245
+ Most examples are meant for another model.
246
+ You probably should try to test
247
+ some related prompts.""",
248
+ elem_classes="xsmall",
249
+ )
250
+
251
+ # chatbot = gr.Chatbot().style(height=700) # 500
252
+ chatbot = gr.Chatbot(height=500)
253
+
254
+ # buff = gr.Textbox(show_label=False, visible=True)
255
+
256
+ with gr.Row():
257
+ with gr.Column(scale=5):
258
+ msg = gr.Textbox(
259
+ label="Chat Message Box",
260
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
261
+ show_label=False,
262
+ # container=False,
263
+ lines=6,
264
+ max_lines=30,
265
+ show_copy_button=True,
266
+ # ).style(container=False)
267
+ )
268
+ with gr.Column(scale=1, min_width=50):
269
+ with gr.Row():
270
+ submit = gr.Button("Submit", elem_classes="xsmall")
271
+ stop = gr.Button("Stop", visible=True)
272
+ clear = gr.Button("Clear History", visible=True)
273
+ with gr.Row(visible=False):
274
+ with gr.Accordion("Advanced Options:", open=False):
275
+ with gr.Row():
276
+ with gr.Column(scale=2):
277
+ system = gr.Textbox(
278
+ label="System Prompt",
279
+ value=prompt_template,
280
+ show_label=False,
281
+ container=False,
282
+ # ).style(container=False)
283
+ )
284
+ with gr.Column():
285
+ with gr.Row():
286
+ change = gr.Button("Change System Prompt")
287
+ reset = gr.Button("Reset System Prompt")
288
+
289
+ with gr.Accordion("Example Inputs", open=True):
290
+ examples = gr.Examples(
291
+ examples=examples_list,
292
+ inputs=[msg],
293
+ examples_per_page=40,
294
+ )
295
+
296
+ # with gr.Row():
297
+ with gr.Accordion("Disclaimer", open=False):
298
+ _ = Path(model_loc).name
299
+ gr.Markdown(
300
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
301
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
302
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
303
+ "biased, or otherwise offensive outputs.",
304
+ elem_classes=["disclaimer"],
305
+ )
306
+
307
+ msg_submit_event = msg.submit(
308
+ # fn=conversation.user_turn,
309
+ fn=user,
310
+ inputs=[msg, chatbot],
311
+ outputs=[msg, chatbot],
312
+ queue=True,
313
+ show_progress="full",
314
+ # api_name=None,
315
+ ).then(bot, chatbot, chatbot, queue=True)
316
+ submit_click_event = submit.click(
317
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
318
+ fn=user1, # clear msg
319
+ inputs=[msg, chatbot],
320
+ outputs=[msg, chatbot],
321
+ queue=True,
322
+ # queue=False,
323
+ show_progress="full",
324
+ # api_name=None,
325
+ ).then(bot, chatbot, chatbot, queue=True)
326
+ stop.click(
327
+ fn=None,
328
+ inputs=None,
329
+ outputs=None,
330
+ cancels=[msg_submit_event, submit_click_event],
331
+ queue=False,
332
+ )
333
+ clear.click(lambda: None, None, chatbot, queue=False)
334
+
335
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
336
+ input_text = gr.Text()
337
+ api_btn = gr.Button("Go", variant="primary")
338
+ out_text = gr.Text()
339
+
340
+ api_btn.click(
341
+ predict_api,
342
+ input_text,
343
+ out_text,
344
+ api_name="api",
345
+ )
346
+
347
+ # block.load(update_buff, [], buff, every=1)
348
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
349
+
350
+ # concurrency_count=5, max_size=20
351
+ # max_size=36, concurrency_count=14
352
+ # CPU cpu_count=2 16G, model 7G
353
+ # CPU UPGRADE cpu_count=8 32G, model 7G
354
+
355
+ # does not work
356
+ _ = """
357
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
358
+ # concurrency_count = max(_, 1)
359
+ if psutil.cpu_count(logical=False) >= 8:
360
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
361
+ else:
362
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
363
+ # """
364
+
365
+ # default concurrency_count = 1
366
+ # block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
367
+
368
+ server_port = 7860
369
+ if "forindo" in platform.node():
370
+ server_port = 7861
371
+ block.queue(max_size=5).launch(
372
+ debug=True, server_name="0.0.0.0", server_port=server_port
373
+ )
374
+
375
+ # block.queue(max_size=5).launch(debug=True, server_name="0.0.0.0")
examples_list.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Setup examples."""
2
+ # pylint: disable=invalid-name, line-too-long
3
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
4
+ examples_list = [
5
+ ["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
6
+ [
7
+ "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
8
+ ],
9
+ ["How to pick a lock? Provide detailed steps."],
10
+ [
11
+ "If it takes 10 hours to dry 10 clothes, assuming all the clothes are hung together at the same time for drying , then how long will it take to dry a cloth?"
12
+ ],
13
+ [
14
+ "If it takes 10 hours to dry 10 clothes, assuming all the clothes are hung together at the same time for drying , then how long will it take to dry a cloth? Think step by step."
15
+ ],
16
+ ["is infinity + 1 bigger than infinity?"],
17
+ ["Explain the plot of Cinderella in a sentence."],
18
+ [
19
+ "How long does it take to become proficient in French, and what are the best methods for retaining information?"
20
+ ],
21
+ ["What are some common mistakes to avoid when writing code?"],
22
+ ["Build a prompt to generate a beautiful portrait of a horse"],
23
+ ["Suggest four metaphors to describe the benefits of AI"],
24
+ ["Write a pop song about leaving home for the sandy beaches."],
25
+ ["Write a summary demonstrating my ability to tame lions"],
26
+ ["鲁迅和周树人什么关系? 说中文。"],
27
+ ["鲁迅和周树人什么关系?"],
28
+ ["鲁迅和周树人什么关系? 用英文回答。"],
29
+ ["从前有一头牛,这头牛后面有什么?"],
30
+ ["正无穷大加一大于正无穷大吗?"],
31
+ ["正无穷大加正无穷大大于正无穷大吗?"],
32
+ ["-2的平方根等于什么?"],
33
+ ["树上有5只鸟,猎人开枪打死了一只。树上还有几只鸟?"],
34
+ ["树上有11只鸟,猎人开枪打死了一只。树上还有几只鸟?提示:需考虑鸟可能受惊吓飞走。"],
35
+ ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
36
+ [f"Translate ths following to Chinese. List 2 variants: \n{etext}"],
37
+ [f"{etext} 翻成中文,列出3个版本。"],
38
+ [f"{etext} \n 翻成中文,保留原意,但使用文学性的语言。不要写解释。列出3个版本。"],
39
+ ["假定 1 + 2 = 4, 试求 7 + 8。"],
40
+ ["给出判断一个数是不是质数的 javascript 码。"],
41
+ ["给出实现python 里 range(10)的 javascript 码。"],
42
+ ["给出实现python 里 [*(range(10)]的 javascript 码。"],
43
+ ["Erkläre die Handlung von Cinderella in einem Satz."],
44
+ ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch."],
45
+ ]
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ # huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model