hysts HF staff commited on
Commit
54995d2
1 Parent(s): b4ca5ac
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -57,15 +57,15 @@ def generate(
57
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
58
  conversation.append({"role": "user", "content": message})
59
 
60
- chat = tokenizer.apply_chat_template(conversation, tokenize=False)
61
- inputs = tokenizer(chat, return_tensors="pt", add_special_tokens=False).to("cuda")
62
- if len(inputs) > MAX_INPUT_TOKEN_LENGTH:
63
- inputs = inputs[-MAX_INPUT_TOKEN_LENGTH:]
64
  gr.Warning("Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
 
65
 
66
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
67
  generate_kwargs = dict(
68
- inputs,
69
  streamer=streamer,
70
  max_new_tokens=max_new_tokens,
71
  do_sample=True,
 
57
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
58
  conversation.append({"role": "user", "content": message})
59
 
60
+ input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
61
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
62
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
 
63
  gr.Warning("Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
64
+ input_ids = input_ids.to(model.device)
65
 
66
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
67
  generate_kwargs = dict(
68
+ {"input_ids": input_ids},
69
  streamer=streamer,
70
  max_new_tokens=max_new_tokens,
71
  do_sample=True,