stablelm-2-1_6b-zephyr

Paused

App Files Files Community

reshinthadith

radames commited on Apr 21, 2023

Commit

c58f313

•

1 Parent(s): 14e5da3

add stop button (#16)

Browse files

- add stop generation button (70e5847840282b458fdfbb6270ad742996d66b35)
- return partial (875db5af231c2778e3467c9348f2429c813c1d51)

Co-authored-by: Radamés Ajna <radames@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +32 -20

app.py CHANGED Viewed

@@ -29,20 +29,25 @@ class StopOnTokens(StoppingCriteria):
                 return True
         return False
-def chat(curr_system_message, user_message, history):
     # Append the user's message to the conversation history
-    history = history + [[user_message, ""]]
-    # Initialize a StopOnTokens object
     stop = StopOnTokens()
     # Construct the input message string for the model by concatenating the current system message and conversation history
     messages = curr_system_message + \
         "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
                 for item in history])
     # Tokenize the messages string
     model_inputs = tok([messages], return_tensors="pt").to("cuda")
-    streamer = TextIteratorStreamer(tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
@@ -57,35 +62,42 @@ def chat(curr_system_message, user_message, history):
     t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
-    print(history)
     # Initialize an empty string to store the generated text
     partial_text = ""
     for new_text in streamer:
-        print(new_text)
         partial_text += new_text
         history[-1][1] = partial_text
         # Yield an empty string to cleanup the message textbox and the updated conversation history
-        yield "", history
 with gr.Blocks() as demo:
-    #history = gr.State([])
     gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
     gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
     chatbot = gr.Chatbot().style(height=500)
     with gr.Row():
-        with gr.Column(scale=0.70):
-            msg = gr.Textbox(label="Chat Message Box", placeholder="Chat Message Box", show_label=False).style(container=False)
-        with gr.Column(scale=0.30):
-          with gr.Row():
-              submit = gr.Button("Submit")
-              clear = gr.Button("Clear")
     system_msg = gr.Textbox(
         start_message, label="System Message", interactive=False, visible=False)
-    msg.submit(fn=chat, inputs=[system_msg, msg, chatbot], outputs=[msg, chatbot], queue=True)
-    submit.click(fn=chat, inputs=[system_msg, msg, chatbot], outputs=[msg, chatbot], queue=True)
-    clear.click(lambda: [None, []], None, [chatbot], queue=False)
-demo.queue(concurrency_count=2)
-demo.launch()

                 return True
         return False
+def user(message, history):
     # Append the user's message to the conversation history
+    return "", history + [[message, ""]]
+def chat(curr_system_message, history):
+    # Initialize a StopOnTokens object
     stop = StopOnTokens()
     # Construct the input message string for the model by concatenating the current system message and conversation history
     messages = curr_system_message + \
         "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
                 for item in history])
     # Tokenize the messages string
     model_inputs = tok([messages], return_tensors="pt").to("cuda")
+    streamer = TextIteratorStreamer(
+        tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
     t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
+    # print(history)
     # Initialize an empty string to store the generated text
     partial_text = ""
     for new_text in streamer:
+        # print(new_text)
         partial_text += new_text
         history[-1][1] = partial_text
         # Yield an empty string to cleanup the message textbox and the updated conversation history
+        yield history
+    return partial_text
 with gr.Blocks() as demo:
+    # history = gr.State([])
     gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
     gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
     chatbot = gr.Chatbot().style(height=500)
     with gr.Row():
+        with gr.Column():
+            msg = gr.Textbox(label="Chat Message Box", placeholder="Chat Message Box",
+                             show_label=False).style(container=False)
+        with gr.Column():
+            with gr.Row():
+                submit = gr.Button("Submit")
+                stop = gr.Button("Stop")
+                clear = gr.Button("Clear")
     system_msg = gr.Textbox(
         start_message, label="System Message", interactive=False, visible=False)
+    submit_event = msg.submit(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
+        fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
+    submit_click_event = submit.click(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
+        fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
+    stop.click(fn=None, inputs=None, outputs=None, cancels=[
+               submit_event, submit_click_event], queue=False)
+    clear.click(lambda: None, None, [chatbot], queue=False)
+demo.queue(max_size=32, concurrency_count=2)
+demo.launch()