philschmid HF staff commited on
Commit
4d500b9
1 Parent(s): a9d9c29

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +3 -3
handler.py CHANGED
@@ -2,15 +2,15 @@ import torch
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
- # check for GPU
6
- device = 0 if torch.cuda.is_available() else -1
7
 
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
  # load the model
12
  tokenizer = AutoTokenizer.from_pretrained(path)
13
- model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
14
  # create inference pipeline
15
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
16
 
 
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
+ # get dtype
6
+ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
7
 
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
  # load the model
12
  tokenizer = AutoTokenizer.from_pretrained(path)
13
+ model = AutoModelForCausalLM.from_pretrained(path, device_map="auto",torch_dtype=dtype)
14
  # create inference pipeline
15
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
16