ammarnasr commited on
Commit
f2fdfe8
1 Parent(s): 1366ad0
Files changed (1) hide show
  1. handler.py +3 -2
handler.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
- from peft import PeftConfig, PeftModel
 
3
  import torch.cuda
4
  from typing import Any, Dict
5
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -7,7 +8,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
7
  class EndpointHandler():
8
  def __init__(self, path=""):
9
  config = PeftConfig.from_pretrained(path)
10
- model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, load_in_8bit=True, device_map='auto')
11
  self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
12
  # Load the Lora model
13
  self.model = PeftModel.from_pretrained(model, path)
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from peft import PeftConfig
3
+ from peft import PeftModel
4
  import torch.cuda
5
  from typing import Any, Dict
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
8
  class EndpointHandler():
9
  def __init__(self, path=""):
10
  config = PeftConfig.from_pretrained(path)
11
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
12
  self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
13
  # Load the Lora model
14
  self.model = PeftModel.from_pretrained(model, path)