dmayhem93 commited on
Commit
8a546d4
1 Parent(s): c58f313

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -36
app.py CHANGED
@@ -9,43 +9,27 @@ from threading import Thread
9
 
10
  print(f"Starting to load the model to memory")
11
  m = AutoModelForCausalLM.from_pretrained(
12
- "stabilityai/stablelm-tuned-alpha-7b", torch_dtype=torch.float16).cuda()
13
- tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
14
- generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
15
  print(f"Sucessfully loaded the model to the memory")
16
 
17
- start_message = """<|SYSTEM|># StableAssistant
18
- - StableAssistant is A helpful and harmless Open Source AI Language Model developed by Stability and CarperAI.
19
- - StableAssistant is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
20
- - StableAssistant is more than just an information source, StableAssistant is also able to write poetry, short stories, and make jokes.
21
- - StableAssistant will refuse to participate in anything that could harm a human."""
22
-
23
-
24
- class StopOnTokens(StoppingCriteria):
25
- def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
26
- stop_ids = [50278, 50279, 50277, 1, 0]
27
- for stop_id in stop_ids:
28
- if input_ids[0][-1] == stop_id:
29
- return True
30
- return False
31
-
32
 
33
  def user(message, history):
34
  # Append the user's message to the conversation history
35
  return "", history + [[message, ""]]
36
 
37
 
38
- def chat(curr_system_message, history):
39
- # Initialize a StopOnTokens object
40
- stop = StopOnTokens()
41
-
42
- # Construct the input message string for the model by concatenating the current system message and conversation history
43
- messages = curr_system_message + \
44
- "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
45
- for item in history])
46
-
47
  # Tokenize the messages string
48
- model_inputs = tok([messages], return_tensors="pt").to("cuda")
49
  streamer = TextIteratorStreamer(
50
  tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
51
  generate_kwargs = dict(
@@ -55,9 +39,8 @@ def chat(curr_system_message, history):
55
  do_sample=True,
56
  top_p=0.95,
57
  top_k=1000,
58
- temperature=1.0,
59
  num_beams=1,
60
- stopping_criteria=StoppingCriteriaList([stop])
61
  )
62
  t = Thread(target=m.generate, kwargs=generate_kwargs)
63
  t.start()
@@ -76,8 +59,8 @@ def chat(curr_system_message, history):
76
 
77
  with gr.Blocks() as demo:
78
  # history = gr.State([])
79
- gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
80
- gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
81
  chatbot = gr.Chatbot().style(height=500)
82
  with gr.Row():
83
  with gr.Column():
@@ -88,13 +71,11 @@ with gr.Blocks() as demo:
88
  submit = gr.Button("Submit")
89
  stop = gr.Button("Stop")
90
  clear = gr.Button("Clear")
91
- system_msg = gr.Textbox(
92
- start_message, label="System Message", interactive=False, visible=False)
93
 
94
  submit_event = msg.submit(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
95
- fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
96
  submit_click_event = submit.click(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
97
- fn=chat, inputs=[system_msg, chatbot], outputs=[chatbot], queue=True)
98
  stop.click(fn=None, inputs=None, outputs=None, cancels=[
99
  submit_event, submit_click_event], queue=False)
100
  clear.click(lambda: None, None, [chatbot], queue=False)
 
9
 
10
  print(f"Starting to load the model to memory")
11
  m = AutoModelForCausalLM.from_pretrained(
12
+ "stabilityai/stablelm-2-1_6b-zephyr", torch_dtype=torch.float16, trust_remote_code=True)
13
+ tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-1_6b-zephyr", trust_remote_code=True)
14
+ generator = pipeline('text-generation', model=m, tokenizer=tok)
15
  print(f"Sucessfully loaded the model to the memory")
16
 
17
+ start_message = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def user(message, history):
20
  # Append the user's message to the conversation history
21
  return "", history + [[message, ""]]
22
 
23
 
24
+ def chat(history):
25
+ chat = []
26
+ for item in history:
27
+ chat.append({"role": "user", "content": item[0]})
28
+ if item[1] is not None:
29
+ chat.append({"role": "assistant", "content": item[0]})
30
+ messages = tokenizer.apply_chat_template(chat, tokenize=False)
 
 
31
  # Tokenize the messages string
32
+ model_inputs = tok([messages], return_tensors="pt")
33
  streamer = TextIteratorStreamer(
34
  tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
35
  generate_kwargs = dict(
 
39
  do_sample=True,
40
  top_p=0.95,
41
  top_k=1000,
42
+ temperature=0.75,
43
  num_beams=1,
 
44
  )
45
  t = Thread(target=m.generate, kwargs=generate_kwargs)
46
  t.start()
 
59
 
60
  with gr.Blocks() as demo:
61
  # history = gr.State([])
62
+ gr.Markdown("## Stable LM 1.6b Zephyr")
63
+ gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-2-1_6b-zephyr?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
64
  chatbot = gr.Chatbot().style(height=500)
65
  with gr.Row():
66
  with gr.Column():
 
71
  submit = gr.Button("Submit")
72
  stop = gr.Button("Stop")
73
  clear = gr.Button("Clear")
 
 
74
 
75
  submit_event = msg.submit(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
76
+ fn=chat, inputs=[chatbot], outputs=[chatbot], queue=True)
77
  submit_click_event = submit.click(fn=user, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False).then(
78
+ fn=chat, inputs=[chatbot], outputs=[chatbot], queue=True)
79
  stop.click(fn=None, inputs=None, outputs=None, cancels=[
80
  submit_event, submit_click_event], queue=False)
81
  clear.click(lambda: None, None, [chatbot], queue=False)