booksouls commited on
Commit
983b8e1
1 Parent(s): 820c012

upload handler.py and requirements.txt

Browse files
Files changed (2) hide show
  1. handler.py +43 -0
  2. requirements.txt +3 -0
handler.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
3
+ from typing import Any
4
+
5
+ class EndpointHandler():
6
+ def __init__(self, path=""):
7
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(path, device_map="auto")
8
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
9
+
10
+ def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
11
+ inputs = data.get("inputs")
12
+ parameters = data.get("parameters")
13
+
14
+ if inputs is None:
15
+ raise ValueError(f"'inputs' is missing from the request body")
16
+
17
+ if not isinstance(inputs, str):
18
+ raise ValueError(f"Expected 'inputs' to be a str, but found {type(inputs)}")
19
+
20
+ if parameters is not None and not isinstance(parameters, dict):
21
+ raise ValueError(f"Expected 'parameters' to be a dict, but found {type(parameters)}")
22
+
23
+ # Truncate the tokens to 1024 to prevent errors with BART and long text.
24
+ tokens = self.tokenizer(
25
+ inputs,
26
+ max_length=1024,
27
+ truncation=True,
28
+ return_tensors="pt",
29
+ return_attention_mask=False,
30
+ )
31
+
32
+ # Ensure the input_ids and the model are both on the GPU to prevent errors.
33
+ input_ids = tokens.input_ids.to("cuda")
34
+
35
+ # Gradient calculation is not needed for inference.
36
+ with torch.no_grad():
37
+ if parameters is None:
38
+ output = self.model.generate(input_ids)
39
+ else:
40
+ output = self.model.generate(input_ids, **parameters)
41
+
42
+ generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
43
+ return {"generated_text": generated_text}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ accelerate==0.31.0
2
+ bitsandbytes==0.43.1
3
+ transformers==4.40.2