Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
4180709
1
Parent(s):
6f9f106
Update
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ from langchain.schema import LLMResult
|
|
29 |
from loguru import logger
|
30 |
|
31 |
deq = deque()
|
32 |
-
sig_end = object()
|
33 |
|
34 |
# from langchain.llms import OpenAI
|
35 |
|
@@ -206,7 +206,7 @@ config = Config()
|
|
206 |
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
207 |
config.stream = True
|
208 |
config.stop = stop
|
209 |
-
config.threads=cpu_count
|
210 |
|
211 |
deqcb = DequeCallbackHandler(deq)
|
212 |
|
@@ -221,14 +221,13 @@ LLM = CTransformers(
|
|
221 |
|
222 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
223 |
|
224 |
-
|
225 |
prompt = PromptTemplate(
|
226 |
-
input_variables=[
|
227 |
output_parser=None,
|
228 |
partial_variables={},
|
229 |
template=prompt_template,
|
230 |
-
template_format=
|
231 |
-
validate_template=True
|
232 |
)
|
233 |
|
234 |
memory = ConversationBufferWindowMemory(
|
@@ -248,7 +247,7 @@ logger.debug(f"{conversation.prompt.template=}") # type: ignore
|
|
248 |
config = Config()
|
249 |
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
250 |
config.stop = stop
|
251 |
-
config.threads=cpu_count
|
252 |
|
253 |
try:
|
254 |
LLM_api = CTransformers(
|
@@ -332,7 +331,7 @@ def bot(history):
|
|
332 |
f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
|
333 |
)
|
334 |
|
335 |
-
history[-1][1] = "".join(response)
|
336 |
yield history
|
337 |
|
338 |
|
@@ -373,8 +372,8 @@ css = """
|
|
373 |
"""
|
374 |
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
375 |
examples_list = [
|
376 |
-
|
377 |
-
|
378 |
["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
|
379 |
[
|
380 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
@@ -382,7 +381,9 @@ examples_list = [
|
|
382 |
["When was Justin Bieber born?"],
|
383 |
["What NFL team won the Super Bowl in 1994?"],
|
384 |
["How to pick a lock? Provide detailed steps."],
|
385 |
-
[
|
|
|
|
|
386 |
["is infinity + 1 bigger than infinity?"],
|
387 |
["Explain the plot of Cinderella in a sentence."],
|
388 |
[
|
@@ -429,7 +430,7 @@ with gr.Blocks(
|
|
429 |
gr.Markdown(
|
430 |
f"""<h5><center>{Path(model_loc).name}</center></h4>
|
431 |
The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
432 |
-
It typically takes
|
433 |
|
434 |
Most examples are meant for another model.
|
435 |
You probably should try to test
|
@@ -437,11 +438,8 @@ with gr.Blocks(
|
|
437 |
elem_classes="xsmall",
|
438 |
)
|
439 |
|
440 |
-
# chatbot = gr.Chatbot().style(height=700) # 500
|
441 |
chatbot = gr.Chatbot(height=500)
|
442 |
|
443 |
-
# buff = gr.Textbox(show_label=False, visible=True)
|
444 |
-
|
445 |
with gr.Row():
|
446 |
with gr.Column(scale=5):
|
447 |
msg = gr.Textbox(
|
@@ -482,7 +480,6 @@ with gr.Blocks(
|
|
482 |
examples_per_page=40,
|
483 |
)
|
484 |
|
485 |
-
# with gr.Row():
|
486 |
with gr.Accordion("Disclaimer", open=False):
|
487 |
_ = Path(model_loc).name
|
488 |
gr.Markdown(
|
@@ -536,9 +533,6 @@ with gr.Blocks(
|
|
536 |
api_name="api",
|
537 |
)
|
538 |
|
539 |
-
# block.load(update_buff, [], buff, every=1)
|
540 |
-
# block.load(update_buff, [buff_var], [buff_var, buff], every=1)
|
541 |
-
|
542 |
# concurrency_count=5, max_size=20
|
543 |
# max_size=36, concurrency_count=14
|
544 |
# CPU cpu_count=2 16G, model 7G
|
|
|
29 |
from loguru import logger
|
30 |
|
31 |
deq = deque()
|
32 |
+
sig_end = object() # signals the processing is done
|
33 |
|
34 |
# from langchain.llms import OpenAI
|
35 |
|
|
|
206 |
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
207 |
config.stream = True
|
208 |
config.stop = stop
|
209 |
+
config.threads = cpu_count
|
210 |
|
211 |
deqcb = DequeCallbackHandler(deq)
|
212 |
|
|
|
221 |
|
222 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
223 |
|
|
|
224 |
prompt = PromptTemplate(
|
225 |
+
input_variables=["history", "input"],
|
226 |
output_parser=None,
|
227 |
partial_variables={},
|
228 |
template=prompt_template,
|
229 |
+
template_format="f-string",
|
230 |
+
validate_template=True,
|
231 |
)
|
232 |
|
233 |
memory = ConversationBufferWindowMemory(
|
|
|
247 |
config = Config()
|
248 |
# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
|
249 |
config.stop = stop
|
250 |
+
config.threads = cpu_count
|
251 |
|
252 |
try:
|
253 |
LLM_api = CTransformers(
|
|
|
331 |
f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
|
332 |
)
|
333 |
|
334 |
+
history[-1][1] = "".join(response) + f"\n{_}"
|
335 |
yield history
|
336 |
|
337 |
|
|
|
372 |
"""
|
373 |
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
374 |
examples_list = [
|
375 |
+
["Hello I am mike."],
|
376 |
+
["What's my name?"],
|
377 |
["What NFL team won the Super Bowl in the year Justin Bieber was born?"],
|
378 |
[
|
379 |
"What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
|
|
|
381 |
["When was Justin Bieber born?"],
|
382 |
["What NFL team won the Super Bowl in 1994?"],
|
383 |
["How to pick a lock? Provide detailed steps."],
|
384 |
+
[
|
385 |
+
"If it takes 10 hours to dry 10 clothes, assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"
|
386 |
+
],
|
387 |
["is infinity + 1 bigger than infinity?"],
|
388 |
["Explain the plot of Cinderella in a sentence."],
|
389 |
[
|
|
|
430 |
gr.Markdown(
|
431 |
f"""<h5><center>{Path(model_loc).name}</center></h4>
|
432 |
The bot can conduct multi-turn conversations, i.e. it remembers past dialogs. The process time is longer.
|
433 |
+
It typically takes about 120 seconds for the first response to appear.
|
434 |
|
435 |
Most examples are meant for another model.
|
436 |
You probably should try to test
|
|
|
438 |
elem_classes="xsmall",
|
439 |
)
|
440 |
|
|
|
441 |
chatbot = gr.Chatbot(height=500)
|
442 |
|
|
|
|
|
443 |
with gr.Row():
|
444 |
with gr.Column(scale=5):
|
445 |
msg = gr.Textbox(
|
|
|
480 |
examples_per_page=40,
|
481 |
)
|
482 |
|
|
|
483 |
with gr.Accordion("Disclaimer", open=False):
|
484 |
_ = Path(model_loc).name
|
485 |
gr.Markdown(
|
|
|
533 |
api_name="api",
|
534 |
)
|
535 |
|
|
|
|
|
|
|
536 |
# concurrency_count=5, max_size=20
|
537 |
# max_size=36, concurrency_count=14
|
538 |
# CPU cpu_count=2 16G, model 7G
|