ChirathD
/

Blip-2-test-2

Visual Question Answering

Inference Endpoints

8-bit precision

Model card Files Files and versions Community

ChirathD commited on Dec 7, 2023

Commit

edff4d8

•

1 Parent(s): 752710d

Create handler.py

Files changed (1) hide show

handler.py +64 -0

handler.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# +
+from typing import  Dict, List, Any
+from PIL import Image
+import torch
+import os
+import io
+import base64
+from io import BytesIO
+# from transformers import BlipForConditionalGeneration, BlipProcessor
+from transformers import Blip2Processor, Blip2ForConditionalGeneration
+# -
+# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+class EndpointHandler():
+    def __init__(self, path=""):
+        # load the optimized model
+        print("####### Start Deploying #####")
+        self.processor = Blip2Processor.from_pretrained("ChirathD/Blip-2-test-1")
+        self.model = Blip2ForConditionalGeneration.from_pretrained("ChirathD/Blip-2-test-1")
+        # self.model.eval()
+        # self.model = self.model.to(device)
+    def __call__(self, data: Any) -> Dict[str, Any]:
+        """
+        Args:
+            data (:obj:):
+                includes the input data and the parameters for the inference.
+        Return:
+            A :obj:`dict`:. The object returned should be a dict of one list like {"captions": ["A hugging face at the office"]} containing :
+                - "caption": A string corresponding to the generated caption.
+        """
+        print(data)
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", {})
+        print(input)
+        image_bytes = base64.b64decode(inputs)
+        image_io = io.BytesIO(image_bytes)
+        image = Image.open(image_io)
+        inputs = self.processor(images=image, return_tensors="pt")
+        pixel_values = inputs.pixel_values
+        generated_ids = self.model.generate(pixel_values=pixel_values, max_length=25)
+        generated_caption = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        print(generated_caption)
+        # raw_images = [Image.open(BytesIO(_img)) for _img in inputs]
+        # processed_image = self.processor(images=raw_images, return_tensors="pt")
+        # processed_image["pixel_values"] = processed_image["pixel_values"].to(device)
+        # processed_image = {**processed_image, **parameters}
+        # with torch.no_grad():
+        #     out = self.model.generate(
+        #         **processed_image
+        #     )
+        # captions = self.processor.batch_decode(out, skip_special_tokens=True)
+        return {"captions": generated_caption}