nttwt1597 commited on
Commit
f81b95a
·
verified ·
1 Parent(s): 307ae39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -49,10 +49,10 @@ model, tokenizer = FastLanguageModel.from_pretrained(
49
  load_in_4bit = True,
50
  )
51
  model.load_adapter(peft_model_adapter_id, token=token)
52
- # terminators = [
53
- # tokenizer.eos_token_id,
54
- # tokenizer.convert_tokens_to_ids("<|eot_id|>")
55
- # ]
56
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
57
 
58
  from transformers import pipeline, TextIteratorStreamer
@@ -79,7 +79,7 @@ def run_model_on_text(text):
79
 
80
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
81
 
82
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, repetition_penalty=1.2,)
83
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
84
  thread.start()
85
 
 
49
  load_in_4bit = True,
50
  )
51
  model.load_adapter(peft_model_adapter_id, token=token)
52
+ terminators = [
53
+ tokenizer.eos_token_id,
54
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
55
+ ]
56
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
57
 
58
  from transformers import pipeline, TextIteratorStreamer
 
79
 
80
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
81
 
82
+ generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, do_sample=True, repetition_penalty=1.2,)
83
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
84
  thread.start()
85