Tobias Bergmann
commited on
Commit
·
668ee0d
1
Parent(s):
3a8892f
streaming
Browse files
app.py
CHANGED
@@ -29,34 +29,28 @@ pipe = Llama(
|
|
29 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
|
30 |
if not message:
|
31 |
return "", history
|
|
|
32 |
prompt = message
|
33 |
-
|
34 |
-
# Initialize reply
|
35 |
-
reply = ""
|
36 |
-
|
37 |
history.append([message, ""])
|
38 |
|
39 |
-
#
|
|
|
|
|
|
|
40 |
stream = pipe(
|
41 |
prompt,
|
42 |
-
max_tokens=max_new_tokens,
|
43 |
stop=["</s>"],
|
44 |
stream=True
|
45 |
)
|
46 |
-
|
47 |
for output in stream:
|
48 |
-
# This loop will receive partial output (one token at a time)
|
49 |
new_text = output['choices'][0]['text']
|
50 |
-
|
51 |
-
#
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
history[-1][1] = reply
|
56 |
-
|
57 |
-
# Yield for incremental display on chat
|
58 |
-
yield "", history
|
59 |
-
|
60 |
with gr.Blocks() as demo:
|
61 |
gr.Markdown(DESCRIPTION)
|
62 |
chatbot = gr.Chatbot()
|
@@ -69,5 +63,5 @@ with gr.Blocks() as demo:
|
|
69 |
label="Max New Tokens",
|
70 |
)
|
71 |
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
|
72 |
-
|
73 |
demo.queue().launch()
|
|
|
29 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
|
30 |
if not message:
|
31 |
return "", history
|
32 |
+
|
33 |
prompt = message
|
|
|
|
|
|
|
|
|
34 |
history.append([message, ""])
|
35 |
|
36 |
+
# Initialize reply for this round
|
37 |
+
reply = ""
|
38 |
+
|
39 |
+
# This will produce a generator of output chunks
|
40 |
stream = pipe(
|
41 |
prompt,
|
42 |
+
max_tokens=max_new_tokens,
|
43 |
stop=["</s>"],
|
44 |
stream=True
|
45 |
)
|
46 |
+
|
47 |
for output in stream:
|
|
|
48 |
new_text = output['choices'][0]['text']
|
49 |
+
reply += new_text
|
50 |
+
history[-1][1] = reply # Update the current reply in history
|
51 |
+
yield "", history
|
52 |
+
return "", history # Always return at the end to terminate the generator
|
53 |
+
|
|
|
|
|
|
|
|
|
|
|
54 |
with gr.Blocks() as demo:
|
55 |
gr.Markdown(DESCRIPTION)
|
56 |
chatbot = gr.Chatbot()
|
|
|
63 |
label="Max New Tokens",
|
64 |
)
|
65 |
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
|
66 |
+
|
67 |
demo.queue().launch()
|