Tobias Bergmann commited on
Commit
668ee0d
·
1 Parent(s): 3a8892f
Files changed (1) hide show
  1. app.py +13 -19
app.py CHANGED
@@ -29,34 +29,28 @@ pipe = Llama(
29
  def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
30
  if not message:
31
  return "", history
 
32
  prompt = message
33
-
34
- # Initialize reply
35
- reply = ""
36
-
37
  history.append([message, ""])
38
 
39
- # Use stream=True for streaming
 
 
 
40
  stream = pipe(
41
  prompt,
42
- max_tokens=max_new_tokens,
43
  stop=["</s>"],
44
  stream=True
45
  )
46
-
47
  for output in stream:
48
- # This loop will receive partial output (one token at a time)
49
  new_text = output['choices'][0]['text']
50
-
51
- # Append to the current reply
52
- reply += new_text
53
-
54
- # Update the history
55
- history[-1][1] = reply
56
-
57
- # Yield for incremental display on chat
58
- yield "", history
59
-
60
  with gr.Blocks() as demo:
61
  gr.Markdown(DESCRIPTION)
62
  chatbot = gr.Chatbot()
@@ -69,5 +63,5 @@ with gr.Blocks() as demo:
69
  label="Max New Tokens",
70
  )
71
  textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
72
-
73
  demo.queue().launch()
 
29
  def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
30
  if not message:
31
  return "", history
32
+
33
  prompt = message
 
 
 
 
34
  history.append([message, ""])
35
 
36
+ # Initialize reply for this round
37
+ reply = ""
38
+
39
+ # This will produce a generator of output chunks
40
  stream = pipe(
41
  prompt,
42
+ max_tokens=max_new_tokens,
43
  stop=["</s>"],
44
  stream=True
45
  )
46
+
47
  for output in stream:
 
48
  new_text = output['choices'][0]['text']
49
+ reply += new_text
50
+ history[-1][1] = reply # Update the current reply in history
51
+ yield "", history
52
+ return "", history # Always return at the end to terminate the generator
53
+
 
 
 
 
 
54
  with gr.Blocks() as demo:
55
  gr.Markdown(DESCRIPTION)
56
  chatbot = gr.Chatbot()
 
63
  label="Max New Tokens",
64
  )
65
  textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
66
+
67
  demo.queue().launch()