Ravis-gemini

Sleeping

nttwt1597 commited on May 30, 2024

Commit

f3ce64a

verified ·

1 Parent(s): 2777380

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ peft_model_adapter_id = "nttwt1597/test_v2_cancer_v3"
 #Commented out IPython magic to ensure Python compatibility.
 #%%capture
-major_version, minor_version = torch.cuda.get_device_capability()
 # Must install separately since Colab has torch 2.2.1, which breaks packages
 #!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
 # if major_version >= 8:
@@ -49,10 +49,12 @@ model, tokenizer = FastLanguageModel.from_pretrained(
     load_in_4bit = True,
 )
 model.load_adapter(peft_model_adapter_id, token=token)
 terminators = [
     tokenizer.eos_token_id,
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
 FastLanguageModel.for_inference(model) # Enable native 2x faster inference
 from transformers import pipeline, TextIteratorStreamer
@@ -76,9 +78,6 @@ def run_model_on_text(text):
   prompt = format_prompt(text)
   inputs = tokenizer(prompt, return_tensors='pt')
-  # prompt is a new string stored in memory is not cuda.
-  # inputs = inputs.to(device)
   streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
   generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, repetition_penalty=1.8,)

 #Commented out IPython magic to ensure Python compatibility.
 #%%capture
+# major_version, minor_version = torch.cuda.get_device_capability()
 # Must install separately since Colab has torch 2.2.1, which breaks packages
 #!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
 # if major_version >= 8:
     load_in_4bit = True,
 )
 model.load_adapter(peft_model_adapter_id, token=token)
 terminators = [
     tokenizer.eos_token_id,
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
 FastLanguageModel.for_inference(model) # Enable native 2x faster inference
 from transformers import pipeline, TextIteratorStreamer
   prompt = format_prompt(text)
   inputs = tokenizer(prompt, return_tensors='pt')
   streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
   generation_kwargs = dict(inputs, streamer=streamer,eos_token_id=terminators, max_new_tokens=1024, repetition_penalty=1.8,)