Update handler.py (#1)

- Update handler.py (28d71e804c8f898dab115c467cae57ba75117c0d)
- add accelerate (fcdae4394c3facf62719d9929ce63599fe1fdd7e)
- Create new file (f37fc950cf7c6ca71412d20b1fd1d5c83aeb04a1)

Co-authored-by: Philipp Schmid <philschmid@users.noreply.huggingface.co>

Files changed (2) hide show

handler.py CHANGED Viewed

@@ -1,26 +1,28 @@
-import torch
-from typing import  Dict, List, Any
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # check for GPU
-device = 0 if torch.cuda.is_available() else -1
-class EndpointHandler():
-    def __init__(self, path=""):
-        # load the model
-        tokenizer = AutoTokenizer.from_pretrained(path)
-        model = AutoModelForSeq2SeqLM.from_pretrained(path ,low_cpu_mem_usage=True)
-        # create inference pipeline
-        self.pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer,device=device)
-    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
-        inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", None)
-        # pass inputs with all kwargs in data
-        if parameters is not None:
-            prediction = self.pipeline(inputs, **parameters)
-        else:
-            prediction = self.pipeline(inputs)
-        # postprocess the prediction
-        return prediction

+import torch
+from typing import  Dict, List, Any
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # check for GPU
+device = 0 if torch.cuda.is_available() else -1
+class EndpointHandler():
+    def __init__(self, path=""):
+        # load the model
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForSeq2SeqLM.from_pretrained(path ,low_cpu_mem_usage=True)
+        # create inference pipeline
+        self.pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer,device=device)
+    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            prediction = self.pipeline(inputs, **parameters)
+        else:
+            prediction = self.pipeline(inputs)
+        # postprocess the prediction
+        return prediction

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ accelerate