nttwt1597 commited on
Commit
f3ce64a
·
verified ·
1 Parent(s): 2777380

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -27,7 +27,7 @@ peft_model_adapter_id = "nttwt1597/test_v2_cancer_v3"
27
 
28
  #Commented out IPython magic to ensure Python compatibility.
29
  #%%capture
30
- major_version, minor_version = torch.cuda.get_device_capability()
31
  # Must install separately since Colab has torch 2.2.1, which breaks packages
32
  #!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
33
  # if major_version >= 8:
@@ -49,10 +49,12 @@ model, tokenizer = FastLanguageModel.from_pretrained(
49
  load_in_4bit = True,
50
  )
51
  model.load_adapter(peft_model_adapter_id, token=token)
 
52
  terminators = [
53
  tokenizer.eos_token_id,
54
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
55
  ]
 
56
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
57
 
58
  from transformers import pipeline, TextIteratorStreamer
@@ -76,9 +78,6 @@ def run_model_on_text(text):
76
  prompt = format_prompt(text)
77
  inputs = tokenizer(prompt, return_tensors='pt')
78
 
79
- # prompt is a new string stored in memory is not cuda.
80
- # inputs = inputs.to(device)
81
-
82
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
83
 
84
  generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, repetition_penalty=1.8,)
 
27
 
28
  #Commented out IPython magic to ensure Python compatibility.
29
  #%%capture
30
+ # major_version, minor_version = torch.cuda.get_device_capability()
31
  # Must install separately since Colab has torch 2.2.1, which breaks packages
32
  #!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
33
  # if major_version >= 8:
 
49
  load_in_4bit = True,
50
  )
51
  model.load_adapter(peft_model_adapter_id, token=token)
52
+
53
  terminators = [
54
  tokenizer.eos_token_id,
55
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
56
  ]
57
+
58
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
59
 
60
  from transformers import pipeline, TextIteratorStreamer
 
78
  prompt = format_prompt(text)
79
  inputs = tokenizer(prompt, return_tensors='pt')
80
 
 
 
 
81
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
82
 
83
  generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, repetition_penalty=1.8,)