rlanner-echocap commited on
Commit
742a64f
1 Parent(s): 0975dcd

Update handler.py

Browse files

Moving device assignment to pipeline call

Files changed (1) hide show
  1. handler.py +3 -7
handler.py CHANGED
@@ -10,14 +10,10 @@ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.
10
  class EndpointHandler:
11
  def __init__(self, path=""):
12
  # load the model
13
- tokenizer = AutoTokenizer.from_pretrained(path)
14
-
15
- config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
16
- config.init_device = 'cuda:0'
17
-
18
- model = AutoModelForCausalLM.from_pretrained(path, config=config, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
19
  # create inference pipeline
20
- self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
21
 
22
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
23
  inputs = data.pop("inputs", data)
 
10
  class EndpointHandler:
11
  def __init__(self, path=""):
12
  # load the model
13
+ tokenizer = AutoTokenizer.from_pretrained(path)
14
+ model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
 
 
 
 
15
  # create inference pipeline
16
+ self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cuda:0')
17
 
18
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
19
  inputs = data.pop("inputs", data)