kingabzpro commited on
Commit
64109dd
1 Parent(s): 32dbeaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
4
- from threading import Thread
5
 
6
 
7
  title = "🦅Falcon 🗨️ChatBot"
@@ -9,7 +8,7 @@ description = "Falcon-RW-1B is a 1B parameters causal decoder-only model built b
9
  examples = [["How are you?"]]
10
 
11
 
12
- tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b",torch_dtype=torch.float16)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  "tiiuae/falcon-rw-1b",
15
  trust_remote_code=True,
@@ -30,13 +29,16 @@ def predict(message, history):
30
  history_transformer_format = history + [[message, ""]]
31
  stop = StopOnTokens()
32
 
 
33
  messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]]) #curr_system_message +
34
  for item in history_transformer_format])
35
 
 
36
  model_inputs = tokenizer([messages], return_tensors="pt")
37
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
38
  generate_kwargs = dict(
39
  model_inputs,
 
40
  max_new_tokens=1024,
41
  do_sample=True,
42
  top_p=0.95,
@@ -45,9 +47,11 @@ def predict(message, history):
45
  num_beams=1,
46
  stopping_criteria=StoppingCriteriaList([stop])
47
  )
48
- t = Thread(target=model.generate, kwargs=generate_kwargs)
49
- t.start()
 
50
 
 
51
  partial_message = ""
52
  for new_token in streamer:
53
  if new_token != '<':
@@ -64,4 +68,4 @@ gr.ChatInterface(predict,
64
  undo_btn="Delete Previous",
65
  clear_btn="Clear",
66
  chatbot=gr.Chatbot(height=300),
67
- textbox=gr.Textbox(placeholder="Chat with me")).launch()
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 
4
 
5
 
6
  title = "🦅Falcon 🗨️ChatBot"
 
8
  examples = [["How are you?"]]
9
 
10
 
11
+ tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
12
  model = AutoModelForCausalLM.from_pretrained(
13
  "tiiuae/falcon-rw-1b",
14
  trust_remote_code=True,
 
29
  history_transformer_format = history + [[message, ""]]
30
  stop = StopOnTokens()
31
 
32
+ #Construct the input message string for the model by concatenating the current system message and conversation history
33
  messages = "".join(["".join(["\n<human>:"+item[0], "\n<bot>:"+item[1]]) #curr_system_message +
34
  for item in history_transformer_format])
35
 
36
+ #Tokenize the messages string
37
  model_inputs = tokenizer([messages], return_tensors="pt")
38
  streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
39
  generate_kwargs = dict(
40
  model_inputs,
41
+ streamer=streamer,
42
  max_new_tokens=1024,
43
  do_sample=True,
44
  top_p=0.95,
 
47
  num_beams=1,
48
  stopping_criteria=StoppingCriteriaList([stop])
49
  )
50
+ #t = Thread(target=model.generate, kwargs=generate_kwargs)
51
+ #t.start()
52
+ model.generate(**generate_kwargs)
53
 
54
+ #Initialize an empty string to store the generated text
55
  partial_message = ""
56
  for new_token in streamer:
57
  if new_token != '<':
 
68
  undo_btn="Delete Previous",
69
  clear_btn="Clear",
70
  chatbot=gr.Chatbot(height=300),
71
+ textbox=gr.Textbox(placeholder="Chat with me").queue().launch()