Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,10 +49,10 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
| 49 |
load_in_4bit = True,
|
| 50 |
)
|
| 51 |
model.load_adapter(peft_model_adapter_id, token=token)
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
| 57 |
|
| 58 |
from transformers import pipeline, TextIteratorStreamer
|
|
@@ -79,7 +79,7 @@ def run_model_on_text(text):
|
|
| 79 |
|
| 80 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 81 |
|
| 82 |
-
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, repetition_penalty=1.2,)
|
| 83 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 84 |
thread.start()
|
| 85 |
|
|
|
|
| 49 |
load_in_4bit = True,
|
| 50 |
)
|
| 51 |
model.load_adapter(peft_model_adapter_id, token=token)
|
| 52 |
+
terminators = [
|
| 53 |
+
tokenizer.eos_token_id,
|
| 54 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 55 |
+
]
|
| 56 |
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
| 57 |
|
| 58 |
from transformers import pipeline, TextIteratorStreamer
|
|
|
|
| 79 |
|
| 80 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 81 |
|
| 82 |
+
generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, do_sample=True, repetition_penalty=1.2,)
|
| 83 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 84 |
thread.start()
|
| 85 |
|