rushabh14
/

sam-vit-base-with-handler

Model card Files Files and versions

CodeJackR commited on Jun 23

Commit

2f4ef92

·

1 Parent(s): e0fb0e6

Fix image upload errors

Files changed (1) hide show

handler.py +19 -12

handler.py CHANGED Viewed

@@ -29,26 +29,33 @@ class EndpointHandler():
             self.model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
             self.processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
-    def __call__(self, data):
         """
         Called on every HTTP request.
-        Expecting base64 encoded image in the 'inputs' field.
         """
         # 1. Parse and decode the input image
-        image_data = data.pop("inputs", None)
-        if not image_data:
-            raise ValueError("Missing 'inputs' key with a base64 image string.")
-        if isinstance(image_data, str) and image_data.startswith("data:"):
-            image_data = image_data.split(",", 1)[1]
-        image_bytes = base64.b64decode(image_data)
-        img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         # 2. Prepare prompts and process the image
         height, width = img.size[1], img.size[0]
-        input_points = [[[width // 2, height // 2]]]  # Center point
-        input_labels = [[1]]  # Positive prompt
         inputs = self.processor(img, input_points=input_points, input_labels=input_labels, return_tensors="pt").to(device)

             self.model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
             self.processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Called on every HTTP request.
+        Handles both base64-encoded images and PIL images.
         """
         # 1. Parse and decode the input image
+        inputs = data.pop("inputs", None)
+        if inputs is None:
+            raise ValueError("Missing 'inputs' key in the payload.")
+        # Check the type of inputs to handle both base64 strings and pre-processed PIL Images
+        if isinstance(inputs, Image.Image):
+            # Input is already a PIL Image
+            img = inputs.convert("RGB")
+        elif isinstance(inputs, str):
+            # Input is a base64-encoded string
+            if inputs.startswith("data:"):
+                inputs = inputs.split(",", 1)[1]  # Handle data URL format
+            image_bytes = base64.b64decode(inputs)
+            img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        else:
+            raise TypeError("Unsupported input type. Expected a PIL Image or a base64 encoded string.")
         # 2. Prepare prompts and process the image
         height, width = img.size[1], img.size[0]
+        input_points = [[[width // 2, height // 2]]]
+        input_labels = [[1]]
         inputs = self.processor(img, input_points=input_points, input_labels=input_labels, return_tensors="pt").to(device)