Spaces:

kingabzpro
/

falcon-1b-ChatBot

Runtime error

App Files Files Community

kingabzpro commited on Jul 19, 2023

Commit

9d1c8f9

•

1 Parent(s): 3c20001

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -28

app.py CHANGED Viewed

@@ -1,6 +1,11 @@
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 title = "🦅Falcon 🗨️ChatBot"
@@ -12,54 +17,67 @@ tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
 model = AutoModelForCausalLM.from_pretrained(
     "tiiuae/falcon-rw-1b",
     trust_remote_code=True,
-    torch_dtype=torch.float16
 )
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        stop_ids = [29, 0]
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:
                 return True
         return False
-def predict(message, history):
-    history_transformer_format = history + [[message, ""]]
     stop = StopOnTokens()
-    #Construct the input message string for the model by concatenating the current system message and conversation history
-    messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]])  #curr_system_message +
-                for item in history_transformer_format])
-    #Tokenize the messages string
-    model_inputs = tokenizer([messages], return_tensors="pt")
-    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        model_inputs,
-        streamer=streamer,
-        max_new_tokens=1024,
         do_sample=True,
-        top_p=0.95,
-        top_k=1000,
-        temperature=1.0,
-        num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
-        )
-    #t = Thread(target=model.generate, kwargs=generate_kwargs)
-    #t.start()
-    model.generate(**generate_kwargs)
     #Initialize an empty string to store the generated text
-    partial_message  = ""
-    for new_token in streamer:
-        if new_token != '<':
-            partial_message += new_token
-            yield partial_message
-gr.ChatInterface(predict,
     title=title,
     description=description,
     examples=examples,

 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
+import time
+import numpy as np
+from torch.nn import functional as F
+import os
+from threading import Thread
 title = "🦅Falcon 🗨️ChatBot"
 model = AutoModelForCausalLM.from_pretrained(
     "tiiuae/falcon-rw-1b",
     trust_remote_code=True,
+    torch_dtype=torch.float16,
+    load_in_8bit=True
 )
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [0]
         for stop_id in stop_ids:
             if input_ids[0][-1] == stop_id:
                 return True
         return False
+def user(message, history):
+    # Append the user's message to the conversation history
+    return "", history + [[message, ""]]
+def chat(curr_system_message, history):
+    # Initialize a StopOnTokens object
     stop = StopOnTokens()
+    # Construct the input message string for the model by concatenating the current system message and conversation history
+    messages = curr_system_message + \
+        "".join(["".join(["<user>: "+item[0], "<chatbot>: "+item[1]])
+                for item in history])
+    # Tokenize the messages string
+    tokens = tokenizer([messages], return_tensors="pt")
+    streamer = TextIteratorStreamer(
+        tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
+    token_ids = tokens.input_ids
+    attention_mask=tokens.attention_mask
     generate_kwargs = dict(
+       input_ids=token_ids,
+        attention_mask = attention_mask,
+        streamer = streamer,
+        max_length=2048,
         do_sample=True,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+        temperature = 0.7,
         stopping_criteria=StoppingCriteriaList([stop])
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
     #Initialize an empty string to store the generated text
+    partial_text = ""
+    for new_text in streamer:
+        # print(new_text)
+        partial_text += new_text
+        history[-1][1] = partial_text
+        # Yield an empty string to cleanup the message textbox and the updated conversation history
+        yield history
+    return partial_text
+gr.ChatInterface(chat,
     title=title,
     description=description,
     examples=examples,