stablelm-2-1_6b-zephyr

Paused

App Files Files Community

dmayhem93 commited on Jan 19

Commit

8a546d4

•

1 Parent(s): c58f313

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -36

app.py CHANGED Viewed

@@ -9,43 +9,27 @@ from threading import Thread
 print(f"Starting to load the model to memory")
 m = AutoModelForCausalLM.from_pretrained(
-    "stabilityai/stablelm-tuned-alpha-7b", torch_dtype=torch.float16).cuda()
-tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
-generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")
-start_message = """<|SYSTEM|># StableAssistant
-- StableAssistant is A helpful and harmless Open Source AI Language Model developed by Stability and CarperAI.
-- StableAssistant is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
-- StableAssistant is more than just an information source, StableAssistant is also able to write poetry, short stories, and make jokes.
-- StableAssistant will refuse to participate in anything that could harm a human."""
-class StopOnTokens(StoppingCriteria):
-    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        stop_ids = [50278, 50279, 50277, 1, 0]
-        for stop_id in stop_ids:
-            if input_ids[0][-1] == stop_id:
-                return True
-        return False
 def user(message, history):
     # Append the user's message to the conversation history
     return "", history + [[message, ""]]
-def chat(curr_system_message, history):
-    # Initialize a StopOnTokens object
-    stop = StopOnTokens()
-    # Construct the input message string for the model by concatenating the current system message and conversation history
-    messages = curr_system_message + \
-        "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
-                for item in history])
     # Tokenize the messages string
-    model_inputs = tok([messages], return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(
         tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
@@ -55,9 +39,8 @@ def chat(curr_system_message, history):
         do_sample=True,
         top_p=0.95,
         top_k=1000,
-        temperature=1.0,
         num_beams=1,
-        stopping_criteria=StoppingCriteriaList([stop])
     )
     t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
@@ -76,8 +59,8 @@ def chat(curr_system_message, history):
 with gr.Blocks() as demo:
     # history = gr.State([])
-    gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
-    gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
     chatbot = gr.Chatbot().style(height=500)
     with gr.Row():
         with gr.Column():
@@ -88,13 +71,11 @@ with gr.Blocks() as demo:
                 submit = gr.Button("Submit")
                 stop = gr.Button("Stop")
                 clear = gr.Button("Clear")
-    system_msg = gr.Textbox(
-        start_message, label="System Message", interactive=False, visible=False)
     submit_event = msg.submit(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
-        fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
     submit_click_event = submit.click(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
-        fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
     stop.click(fn=None, inputs=None, outputs=None, cancels=[
                submit_event, submit_click_event], queue=False)
     clear.click(lambda: None, None, [chatbot], queue=False)

 print(f"Starting to load the model to memory")
 m = AutoModelForCausalLM.from_pretrained(
+    "stabilityai/stablelm-2-1_6b-zephyr", torch_dtype=torch.float16, trust_remote_code=True)
+tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-1_6b-zephyr", trust_remote_code=True)
+generator = pipeline('text-generation', model=m, tokenizer=tok)
 print(f"Sucessfully loaded the model to the memory")
+start_message = ""
 def user(message, history):
     # Append the user's message to the conversation history
     return "", history + [[message, ""]]
+def chat(history):
+    chat = []
+    for item in history:
+        chat.append({"role": "user", "content": item[0]})
+        if item[1] is not None:
+            chat.append({"role": "assistant", "content": item[0]})
+    messages = tokenizer.apply_chat_template(chat, tokenize=False)
     # Tokenize the messages string
+    model_inputs = tok([messages], return_tensors="pt")
     streamer = TextIteratorStreamer(
         tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         do_sample=True,
         top_p=0.95,
         top_k=1000,
+        temperature=0.75,
         num_beams=1,
     )
     t = Thread(target=m.generate, kwargs=generate_kwargs)
     t.start()
 with gr.Blocks() as demo:
     # history = gr.State([])
+    gr.Markdown("## Stable LM 1.6b Zephyr")
+    gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
     chatbot = gr.Chatbot().style(height=500)
     with gr.Row():
         with gr.Column():
                 submit = gr.Button("Submit")
                 stop = gr.Button("Stop")
                 clear = gr.Button("Clear")
     submit_event = msg.submit(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
+        fn=chat, inputs=[chatbot], outputs=[chatbot], queue=True)
     submit_click_event = submit.click(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
+        fn=chat, inputs=[chatbot], outputs=[chatbot], queue=True)
     stop.click(fn=None, inputs=None, outputs=None, cancels=[
                submit_event, submit_click_event], queue=False)
     clear.click(lambda: None, None, [chatbot], queue=False)