booksouls
/

summarization-endpoint

Text2Text Generation

Inference Endpoints

8-bit precision

Model card Files Files and versions Community

booksouls commited on Jun 21

Commit

983b8e1

•

1 Parent(s): 820c012

upload handler.py and requirements.txt

Files changed (2) hide show

handler.py +43 -0
requirements.txt +3 -0

handler.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
+from typing import Any
+class EndpointHandler():
+    def __init__(self, path=""):
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(path, device_map="auto")
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+    def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
+        inputs = data.get("inputs")
+        parameters = data.get("parameters")
+        if inputs is None:
+            raise ValueError(f"'inputs' is missing from the request body")
+        if not isinstance(inputs, str):
+            raise ValueError(f"Expected 'inputs' to be a str, but found {type(inputs)}")
+        if parameters is not None and not isinstance(parameters, dict):
+            raise ValueError(f"Expected 'parameters' to be a dict, but found {type(parameters)}")
+        # Truncate the tokens to 1024 to prevent errors with BART and long text.
+        tokens = self.tokenizer(
+            inputs,
+            max_length=1024,
+            truncation=True,
+            return_tensors="pt",
+            return_attention_mask=False,
+        )
+        # Ensure the input_ids and the model are both on the GPU to prevent errors.
+        input_ids = tokens.input_ids.to("cuda")
+        # Gradient calculation is not needed for inference.
+        with torch.no_grad():
+            if parameters is None:
+                output = self.model.generate(input_ids)
+            else:
+                output = self.model.generate(input_ids, **parameters)
+        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
+        return {"generated_text": generated_text}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+accelerate==0.31.0
+bitsandbytes==0.43.1
+transformers==4.40.2