Artples commited on
Commit
3f60a5e
1 Parent(s): 619cddc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -27,7 +27,7 @@ if torch.cuda.is_available():
27
  model_id = "FuseAI/FuseChat-7B-VaRM"
28
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
29
  tokenizer = AutoTokenizer.from_pretrained(model_id)
30
- tokenizer.use_default_system_prompt = False
31
 
32
 
33
  @spaces.GPU(enable_queue=True)
@@ -54,7 +54,7 @@ def generate(
54
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
55
  input_ids = input_ids.to(model.device)
56
 
57
- streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=False)
58
  generate_kwargs = dict(
59
  {"input_ids": input_ids},
60
  streamer=streamer,
 
27
  model_id = "FuseAI/FuseChat-7B-VaRM"
28
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
29
  tokenizer = AutoTokenizer.from_pretrained(model_id)
30
+ tokenizer.use_default_system_prompt = True
31
 
32
 
33
  @spaces.GPU(enable_queue=True)
 
54
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
55
  input_ids = input_ids.to(model.device)
56
 
57
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
58
  generate_kwargs = dict(
59
  {"input_ids": input_ids},
60
  streamer=streamer,