philschmid HF staff commited on
Commit
abdc7a2
1 Parent(s): 1895bb8

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +2 -1
handler.py CHANGED
@@ -8,7 +8,8 @@ class EndpointHandler():
8
  # Preload all the elements you are going to need at inference.
9
  # pseudo:
10
  self.tokenizer = AutoTokenizer.from_pretrained("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", use_fast=False)
11
- self.model = AutoGPTQForCausalLM.from_quantized("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", device="cuda:0", use_triton=False, use_safetensors=True, trust_remote_code=True)
 
12
 
13
 
14
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
 
8
  # Preload all the elements you are going to need at inference.
9
  # pseudo:
10
  self.tokenizer = AutoTokenizer.from_pretrained("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", use_fast=False)
11
+ self.model = AutoGPTQForCausalLM.from_quantized("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", device="cuda:0", use_triton=False, use_safetensors=True, torch_dtype=torch.float32, trust_remote_code=True)
12
+
13
 
14
 
15
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: