reshinthadith radames commited on
Commit
c58f313
·
1 Parent(s): 14e5da3

add stop button (#16)

Browse files

- add stop generation button (70e5847840282b458fdfbb6270ad742996d66b35)
- return partial (875db5af231c2778e3467c9348f2429c813c1d51)


Co-authored-by: Radamés Ajna <radames@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +32 -20
app.py CHANGED
@@ -29,20 +29,25 @@ class StopOnTokens(StoppingCriteria):
29
  return True
30
  return False
31
 
32
- def chat(curr_system_message, user_message, history):
 
33
  # Append the user's message to the conversation history
34
- history = history + [[user_message, ""]]
35
- # Initialize a StopOnTokens object
 
 
 
36
  stop = StopOnTokens()
37
 
38
  # Construct the input message string for the model by concatenating the current system message and conversation history
39
  messages = curr_system_message + \
40
  "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
41
  for item in history])
42
-
43
  # Tokenize the messages string
44
  model_inputs = tok([messages], return_tensors="pt").to("cuda")
45
- streamer = TextIteratorStreamer(tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
 
46
  generate_kwargs = dict(
47
  model_inputs,
48
  streamer=streamer,
@@ -57,35 +62,42 @@ def chat(curr_system_message, user_message, history):
57
  t = Thread(target=m.generate, kwargs=generate_kwargs)
58
  t.start()
59
 
60
- print(history)
61
  # Initialize an empty string to store the generated text
62
  partial_text = ""
63
  for new_text in streamer:
64
- print(new_text)
65
  partial_text += new_text
66
  history[-1][1] = partial_text
67
  # Yield an empty string to cleanup the message textbox and the updated conversation history
68
- yield "", history
 
69
 
70
 
71
  with gr.Blocks() as demo:
72
- #history = gr.State([])
73
  gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
74
  gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
75
  chatbot = gr.Chatbot().style(height=500)
76
  with gr.Row():
77
- with gr.Column(scale=0.70):
78
- msg = gr.Textbox(label="Chat Message Box", placeholder="Chat Message Box", show_label=False).style(container=False)
79
- with gr.Column(scale=0.30):
80
- with gr.Row():
81
- submit = gr.Button("Submit")
82
- clear = gr.Button("Clear")
 
 
83
  system_msg = gr.Textbox(
84
  start_message, label="System Message", interactive=False, visible=False)
85
 
86
- msg.submit(fn=chat, inputs=[system_msg, msg, chatbot], outputs=[msg, chatbot], queue=True)
87
- submit.click(fn=chat, inputs=[system_msg, msg, chatbot], outputs=[msg, chatbot], queue=True)
88
- clear.click(lambda: [None, []], None, [chatbot], queue=False)
 
 
 
 
89
 
90
- demo.queue(concurrency_count=2)
91
- demo.launch()
 
29
  return True
30
  return False
31
 
32
+
33
+ def user(message, history):
34
  # Append the user's message to the conversation history
35
+ return "", history + [[message, ""]]
36
+
37
+
38
+ def chat(curr_system_message, history):
39
+ # Initialize a StopOnTokens object
40
  stop = StopOnTokens()
41
 
42
  # Construct the input message string for the model by concatenating the current system message and conversation history
43
  messages = curr_system_message + \
44
  "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
45
  for item in history])
46
+
47
  # Tokenize the messages string
48
  model_inputs = tok([messages], return_tensors="pt").to("cuda")
49
+ streamer = TextIteratorStreamer(
50
+ tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
51
  generate_kwargs = dict(
52
  model_inputs,
53
  streamer=streamer,
 
62
  t = Thread(target=m.generate, kwargs=generate_kwargs)
63
  t.start()
64
 
65
+ # print(history)
66
  # Initialize an empty string to store the generated text
67
  partial_text = ""
68
  for new_text in streamer:
69
+ # print(new_text)
70
  partial_text += new_text
71
  history[-1][1] = partial_text
72
  # Yield an empty string to cleanup the message textbox and the updated conversation history
73
+ yield history
74
+ return partial_text
75
 
76
 
77
  with gr.Blocks() as demo:
78
+ # history = gr.State([])
79
  gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
80
  gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
81
  chatbot = gr.Chatbot().style(height=500)
82
  with gr.Row():
83
+ with gr.Column():
84
+ msg = gr.Textbox(label="Chat Message Box", placeholder="Chat Message Box",
85
+ show_label=False).style(container=False)
86
+ with gr.Column():
87
+ with gr.Row():
88
+ submit = gr.Button("Submit")
89
+ stop = gr.Button("Stop")
90
+ clear = gr.Button("Clear")
91
  system_msg = gr.Textbox(
92
  start_message, label="System Message", interactive=False, visible=False)
93
 
94
+ submit_event = msg.submit(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
95
+ fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
96
+ submit_click_event = submit.click(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
97
+ fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
98
+ stop.click(fn=None, inputs=None, outputs=None, cancels=[
99
+ submit_event, submit_click_event], queue=False)
100
+ clear.click(lambda: None, None, [chatbot], queue=False)
101
 
102
+ demo.queue(max_size=32, concurrency_count=2)
103
+ demo.launch()