viethoangtranduong commited on
Commit
bde8fae
1 Parent(s): 1593b6e

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +14 -76
handler.py CHANGED
@@ -2,84 +2,13 @@ import torch
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- # MAX_TOKENS_IN_BATCH = 4_000
6
- # DEFAULT_MAX_NEW_TOKENS = 10
7
-
8
-
9
- # class EndpointHandler():
10
- # def __init__(self, path: str = ""):
11
-
12
- # self.tokenizer = AutoTokenizer.from_pretrained(path)
13
- # self.model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16)
14
- # self.model = self.model.to('cuda:0')
15
-
16
- # def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
17
- # """
18
- # Args:
19
- # data (:obj:):
20
- # includes the input data and the parameters for the inference.
21
- # Return:
22
- # A :obj:`list`:. The list contains the answer and scores of the inference inputs
23
- # """
24
-
25
- # prompts = [f"<human>: {prompt}\n<bot>:" for prompt in data["inputs"]]
26
-
27
- # self.tokenizer.pad_token = self.tokenizer.eos_token
28
- # inputs = self.tokenizer(prompts, padding=True, return_tensors='pt').to(self.model.device)
29
- # input_length = inputs.input_ids.shape[1]
30
-
31
- # outputs = self.model.generate(
32
- # **inputs, **data["parameters"]
33
- # )
34
-
35
- # output_strs = self.tokenizer.batch_decode(outputs[:, input_length:], skip_special_tokens=True)
36
-
37
- # return [{"generated_text": output_strs}]
38
-
39
- # import torch
40
- # from typing import Dict, List, Any
41
- # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
42
-
43
- # # get dtype
44
- # dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
45
-
46
-
47
- # class EndpointHandler:
48
- # def __init__(self, path=""):
49
- # # load the model
50
- # tokenizer = AutoTokenizer.from_pretrained(path)
51
- # model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=torch.float16)
52
-
53
- # def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
54
- # """
55
- # Args:
56
- # data (:obj:):
57
- # includes the input data and the parameters for the inference.
58
- # Return:
59
- # A :obj:`list`:. The list contains the answer and scores of the inference inputs
60
- # """
61
-
62
- # inputs = data.pop("inputs", data)
63
- # parameters = data.pop("parameters", None)
64
-
65
- # prompts = [f"<human>: {prompt}\n<bot>:" for prompt in inputs]
66
-
67
- # # pass inputs with all kwargs in data
68
- # if parameters is not None:
69
- # prediction = self.pipeline(inputs, **parameters)
70
- # else:
71
- # prediction = self.pipeline(inputs)
72
- # # postprocess the prediction
73
- # return prediction
74
-
75
-
76
- class EndpointHandler():
77
  def __init__(self, path: str = ""):
78
 
79
- self.tokenizer = AutoTokenizer.from_pretrained(path)
80
  self.model = AutoModelForCausalLM.from_pretrained(path, device_map = "auto", torch_dtype=torch.float16)
81
 
82
- def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
83
  """
84
  Args:
85
  data (:obj:):
@@ -87,13 +16,22 @@ class EndpointHandler():
87
  Return:
88
  A :obj:`list`:. The list contains the answer and scores of the inference inputs
89
  """
 
 
 
 
90
 
91
- prompts = [f"<human>: {prompt}\n<bot>:" for prompt in data["inputs"]]
92
 
93
  self.tokenizer.pad_token = self.tokenizer.eos_token
94
- inputs = self.tokenizer(prompts, padding=True, return_tensors='pt').to(self.model.device)
 
 
95
  input_length = inputs.input_ids.shape[1]
96
 
 
 
 
97
  outputs = self.model.generate(
98
  **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.7, top_k=50
99
  )
 
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
+ class EndpointHandler:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def __init__(self, path: str = ""):
7
 
8
+ self.tokenizer = AutoTokenizer.from_pretrained(path, padding_side = "left")
9
  self.model = AutoModelForCausalLM.from_pretrained(path, device_map = "auto", torch_dtype=torch.float16)
10
 
11
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
12
  """
13
  Args:
14
  data (:obj:):
 
16
  Return:
17
  A :obj:`list`:. The list contains the answer and scores of the inference inputs
18
  """
19
+
20
+ # process input
21
+ inputs_dict = data.pop("inputs", data)
22
+ parameters = data.pop("parameters", {})
23
 
24
+ prompts = [f"<human>: {prompt}\n<bot>:" for prompt in inputs_dict]
25
 
26
  self.tokenizer.pad_token = self.tokenizer.eos_token
27
+
28
+ inputs = self.tokenizer(prompts, truncation=True, max_length=2048-512,
29
+ return_tensors='pt', padding=True).to(self.model.device)
30
  input_length = inputs.input_ids.shape[1]
31
 
32
+ if parameters.get("deterministic", False):
33
+ torch.manual_seed(42)
34
+
35
  outputs = self.model.generate(
36
  **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.7, top_k=50
37
  )