Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,7 +27,7 @@ peft_model_adapter_id = "nttwt1597/test_v2_cancer_v3"
|
|
| 27 |
|
| 28 |
#Commented out IPython magic to ensure Python compatibility.
|
| 29 |
#%%capture
|
| 30 |
-
major_version, minor_version = torch.cuda.get_device_capability()
|
| 31 |
# Must install separately since Colab has torch 2.2.1, which breaks packages
|
| 32 |
#!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
| 33 |
# if major_version >= 8:
|
|
@@ -49,10 +49,12 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
| 49 |
load_in_4bit = True,
|
| 50 |
)
|
| 51 |
model.load_adapter(peft_model_adapter_id, token=token)
|
|
|
|
| 52 |
terminators = [
|
| 53 |
tokenizer.eos_token_id,
|
| 54 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 55 |
]
|
|
|
|
| 56 |
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
| 57 |
|
| 58 |
from transformers import pipeline, TextIteratorStreamer
|
|
@@ -76,9 +78,6 @@ def run_model_on_text(text):
|
|
| 76 |
prompt = format_prompt(text)
|
| 77 |
inputs = tokenizer(prompt, return_tensors='pt')
|
| 78 |
|
| 79 |
-
# prompt is a new string stored in memory is not cuda.
|
| 80 |
-
# inputs = inputs.to(device)
|
| 81 |
-
|
| 82 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 83 |
|
| 84 |
generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, repetition_penalty=1.8,)
|
|
|
|
| 27 |
|
| 28 |
#Commented out IPython magic to ensure Python compatibility.
|
| 29 |
#%%capture
|
| 30 |
+
# major_version, minor_version = torch.cuda.get_device_capability()
|
| 31 |
# Must install separately since Colab has torch 2.2.1, which breaks packages
|
| 32 |
#!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
| 33 |
# if major_version >= 8:
|
|
|
|
| 49 |
load_in_4bit = True,
|
| 50 |
)
|
| 51 |
model.load_adapter(peft_model_adapter_id, token=token)
|
| 52 |
+
|
| 53 |
terminators = [
|
| 54 |
tokenizer.eos_token_id,
|
| 55 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 56 |
]
|
| 57 |
+
|
| 58 |
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
| 59 |
|
| 60 |
from transformers import pipeline, TextIteratorStreamer
|
|
|
|
| 78 |
prompt = format_prompt(text)
|
| 79 |
inputs = tokenizer(prompt, return_tensors='pt')
|
| 80 |
|
|
|
|
|
|
|
|
|
|
| 81 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 82 |
|
| 83 |
generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, repetition_penalty=1.8,)
|