jscore2023 commited on
Commit
1f1bde4
1 Parent(s): 9a52399

Delete handler.py

Browse files
Files changed (1) hide show
  1. handler.py +0 -41
handler.py DELETED
@@ -1,41 +0,0 @@
1
- import torch
2
-
3
- from typing import Any, Dict
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
-
6
-
7
- class EndpointHandler:
8
- def __init__(self, path=""):
9
- # load model and tokenizer from path
10
- self.tokenizer = AutoTokenizer.from_pretrained(path)
11
- self.model = AutoModelForCausalLM.from_pretrained(
12
- path,
13
- return_dict=True,
14
- quantization_config=bnb_config,
15
- device_map="auto",
16
- trust_remote_code=True
17
- )
18
-
19
- # self.model = AutoModelForCausalLM.from_pretrained(
20
- # path, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True
21
- # )
22
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
23
-
24
- def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
25
- # process input
26
- inputs = data.pop("inputs", data)
27
- parameters = data.pop("parameters", None)
28
-
29
- # preprocess
30
- inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
31
-
32
- # pass inputs with all kwargs in data
33
- if parameters is not None:
34
- outputs = self.model.generate(**inputs, **parameters)
35
- else:
36
- outputs = self.model.generate(**inputs)
37
-
38
- # postprocess the prediction
39
- prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
40
-
41
- return [{"generated_text": prediction}]