Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
6f9f106
1
Parent(s):
ad353f0
Update from collections import deque
Browse files
app.py
CHANGED
@@ -6,8 +6,8 @@ import os
|
|
6 |
import platform
|
7 |
import random
|
8 |
import time
|
|
|
9 |
from pathlib import Path
|
10 |
-
from queue import deque
|
11 |
from threading import Thread
|
12 |
from typing import Any, Dict, List, Union
|
13 |
|
@@ -134,7 +134,7 @@ You are a helpful assistant. Think step by step.
|
|
134 |
{input}
|
135 |
### RESPONSE:"""
|
136 |
|
137 |
-
prompt_template = """You are a helpful assistant.
|
138 |
{history}
|
139 |
### HUMAN:
|
140 |
{input}
|
@@ -186,7 +186,7 @@ class DequeCallbackHandler(BaseCallbackHandler):
|
|
186 |
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
187 |
) -> None:
|
188 |
"""Run when LLM errors."""
|
189 |
-
self.q.
|
190 |
|
191 |
|
192 |
_ = psutil.cpu_count(logical=False) - 1
|
@@ -203,6 +203,7 @@ except Exception as exc_:
|
|
203 |
raise SystemExit(1) from exc_
|
204 |
|
205 |
config = Config()
|
|
|
206 |
config.stream = True
|
207 |
config.stop = stop
|
208 |
config.threads=cpu_count
|
@@ -241,7 +242,31 @@ conversation = ConversationChain(
|
|
241 |
memory=memory,
|
242 |
verbose=True,
|
243 |
)
|
244 |
-
logger.debug(f"{conversation.prompt.template=}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
# conversation.predict(input="Hello, my name is Andrea")
|
247 |
|
@@ -286,6 +311,7 @@ def bot(history):
|
|
286 |
response = []
|
287 |
flag = 1
|
288 |
then = time.time()
|
|
|
289 |
with about_time() as atime: # type: ignore
|
290 |
while True:
|
291 |
if deq:
|
@@ -311,33 +337,18 @@ def bot(history):
|
|
311 |
|
312 |
|
313 |
def predict_api(user_prompt):
|
314 |
-
|
|
|
|
|
|
|
315 |
try:
|
316 |
-
# user_prompt = prompt
|
317 |
-
Config(
|
318 |
-
temperature=0.2,
|
319 |
-
top_k=10,
|
320 |
-
top_p=0.9,
|
321 |
-
repetition_penalty=1.0,
|
322 |
-
max_new_tokens=512, # adjust as needed
|
323 |
-
seed=42,
|
324 |
-
reset=True, # reset history (cache)
|
325 |
-
stream=False,
|
326 |
-
# threads=cpu_count,
|
327 |
-
# stop=prompt_prefix[1:2],
|
328 |
-
)
|
329 |
_ = """
|
330 |
response = generate(
|
331 |
prompt,
|
332 |
config=config,
|
333 |
)
|
334 |
# """
|
335 |
-
|
336 |
-
llm=LLM,
|
337 |
-
prompt=prompt,
|
338 |
-
verbose=True,
|
339 |
-
)
|
340 |
-
response = conversation1.predict(input=user_prompt)
|
341 |
logger.debug(f"api: {response=}")
|
342 |
except Exception as exc:
|
343 |
logger.error(exc)
|
@@ -368,6 +379,8 @@ examples_list = [
|
|
368 |
[
|
369 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
370 |
],
|
|
|
|
|
371 |
["How to pick a lock? Provide detailed steps."],
|
372 |
["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
|
373 |
["is infinity + 1 bigger than infinity?"],
|
@@ -506,6 +519,8 @@ with gr.Blocks(
|
|
506 |
cancels=[msg_submit_event, submit_click_event],
|
507 |
queue=False,
|
508 |
)
|
|
|
|
|
509 |
clear.click(lambda: None, None, chatbot, queue=False)
|
510 |
|
511 |
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
|
@@ -513,12 +528,13 @@ with gr.Blocks(
|
|
513 |
api_btn = gr.Button("Go", variant="primary")
|
514 |
out_text = gr.Text()
|
515 |
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
|
|
522 |
|
523 |
# block.load(update_buff, [], buff, every=1)
|
524 |
# block.load(update_buff, [buff_var], [buff_var, buff], every=1)
|
|
|
6 |
import platform
|
7 |
import random
|
8 |
import time
|
9 |
+
from collections import deque
|
10 |
from pathlib import Path
|
|
|
11 |
from threading import Thread
|
12 |
from typing import Any, Dict, List, Union
|
13 |
|
|
|
134 |
{input}
|
135 |
### RESPONSE:"""
|
136 |
|
137 |
+
prompt_template = """You are a helpful assistant. Let's think step by step.
|
138 |
{history}
|
139 |
### HUMAN:
|
140 |
{input}
|
|
|
186 |
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
187 |
) -> None:
|
188 |
"""Run when LLM errors."""
|
189 |
+
self.q.append(sig_end)
|
190 |
|
191 |
|
192 |
_ = psutil.cpu_count(logical=False) - 1
|
|
|
203 |
raise SystemExit(1) from exc_
|
204 |
|
205 |
config = Config()
|
206 |
+
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
207 |
config.stream = True
|
208 |
config.stop = stop
|
209 |
config.threads=cpu_count
|
|
|
242 |
memory=memory,
|
243 |
verbose=True,
|
244 |
)
|
245 |
+
logger.debug(f"{conversation.prompt.template=}") # type: ignore
|
246 |
+
|
247 |
+
# for api access ===
|
248 |
+
config = Config()
|
249 |
+
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
250 |
+
config.stop = stop
|
251 |
+
config.threads=cpu_count
|
252 |
+
|
253 |
+
try:
|
254 |
+
LLM_api = CTransformers(
|
255 |
+
model=model_loc,
|
256 |
+
model_type="llama",
|
257 |
+
# callbacks=[StreamingStdOutCallbackHandler(), deqcb],
|
258 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
259 |
+
**vars(config),
|
260 |
+
)
|
261 |
+
conversation_api = ConversationChain(
|
262 |
+
llm=LLM_api, # need a separate LLM, or else deq may be messed up
|
263 |
+
prompt=prompt,
|
264 |
+
verbose=True,
|
265 |
+
)
|
266 |
+
except Exception as exc_:
|
267 |
+
logger.error(exc_)
|
268 |
+
conversation_api = None
|
269 |
+
logger.warning("Not able to instantiate conversation_api, api will not work")
|
270 |
|
271 |
# conversation.predict(input="Hello, my name is Andrea")
|
272 |
|
|
|
311 |
response = []
|
312 |
flag = 1
|
313 |
then = time.time()
|
314 |
+
prefix = "" # to please pyright
|
315 |
with about_time() as atime: # type: ignore
|
316 |
while True:
|
317 |
if deq:
|
|
|
337 |
|
338 |
|
339 |
def predict_api(user_prompt):
|
340 |
+
if conversation_api is None:
|
341 |
+
return "conversation_api is None, probably due to insufficient memory, api not usable"
|
342 |
+
|
343 |
+
logger.debug(f"api: {user_prompt=}")
|
344 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
_ = """
|
346 |
response = generate(
|
347 |
prompt,
|
348 |
config=config,
|
349 |
)
|
350 |
# """
|
351 |
+
response = conversation_api.predict(input=user_prompt)
|
|
|
|
|
|
|
|
|
|
|
352 |
logger.debug(f"api: {response=}")
|
353 |
except Exception as exc:
|
354 |
logger.error(exc)
|
|
|
379 |
[
|
380 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
381 |
],
|
382 |
+
["When was Justin Bieber born?"],
|
383 |
+
["What NFL team won the Super Bowl in 1994?"],
|
384 |
["How to pick a lock? Provide detailed steps."],
|
385 |
["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
|
386 |
["is infinity + 1 bigger than infinity?"],
|
|
|
519 |
cancels=[msg_submit_event, submit_click_event],
|
520 |
queue=False,
|
521 |
)
|
522 |
+
|
523 |
+
# TODO: clear conversation memory as well
|
524 |
clear.click(lambda: None, None, chatbot, queue=False)
|
525 |
|
526 |
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
|
|
|
528 |
api_btn = gr.Button("Go", variant="primary")
|
529 |
out_text = gr.Text()
|
530 |
|
531 |
+
if conversation_api is not None:
|
532 |
+
api_btn.click(
|
533 |
+
predict_api,
|
534 |
+
input_text,
|
535 |
+
out_text,
|
536 |
+
api_name="api",
|
537 |
+
)
|
538 |
|
539 |
# block.load(update_buff, [], buff, every=1)
|
540 |
# block.load(update_buff, [buff_var], [buff_var, buff], every=1)
|