FINGU-AI commited on
Commit
9834006
1 Parent(s): e46f531

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -7,6 +7,8 @@ import random
7
  import time
8
  import re
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
 
 
10
 
11
 
12
  # Set an environment variable
@@ -45,10 +47,10 @@ def inference(query):
45
 
46
  tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
47
  outputs = model.generate(tokenized_chat, **generation_params)
48
- decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
49
  assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
50
- response_ = assistant_response.replace('<|im_end|>', "")
51
- return response_
52
  # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
53
  # return outputs
54
 
 
7
  import time
8
  import re
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
10
+ import transformers
11
+
12
 
13
 
14
  # Set an environment variable
 
47
 
48
  tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
49
  outputs = model.generate(tokenized_chat, **generation_params)
50
+ decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
51
  assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
52
+ # response_ = assistant_response.replace('<|im_end|>', "")
53
+ return assistant_response
54
  # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
55
  # return outputs
56