rushabh14
/

sam-vit-base-with-handler

@@ -9,6 +9,9 @@ from transformers import SamModel, SamProcessor
 from typing import Dict, List, Any
 import torch.nn.functional as F
 class EndpointHandler():
     def __init__(self, path=""):
         """
@@ -17,51 +20,29 @@ class EndpointHandler():
         """
         try:
             # Load the model and processor from the local path
-            self.model = SamModel.from_pretrained(path)
             self.processor = SamProcessor.from_pretrained(path)
         except Exception as e:
             # Fallback to loading from a known SAM model if local loading fails
             print(f"Failed to load from local path: {e}")
             print("Attempting to load from facebook/sam-vit-base")
-            self.model = SamModel.from_pretrained("facebook/sam-vit-base")
             self.processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
-    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Called on every HTTP request.
-        Expecting base64 encoded image in the 'inputs' field or 'image' field.
         """
-        # Handle different input formats
-        if "inputs" in data:
-            if isinstance(data["inputs"], str):
-                # Handle data URL format (data:image/jpeg;base64,...)
-                image_data = data["inputs"]
-                if image_data.startswith("data:"):
-                    # Strip data URL prefix
-                    image_data = image_data.split(",", 1)[1]
-                # Base64 encoded image
-                image_bytes = base64.b64decode(image_data)
-            elif isinstance(data["inputs"], dict) and "image" in data["inputs"]:
-                # Nested structure with image field
-                image_data = data["inputs"]["image"]
-                if image_data.startswith("data:"):
-                    # Strip data URL prefix
-                    image_data = image_data.split(",", 1)[1]
-                image_bytes = base64.b64decode(image_data)
-            else:
-                raise ValueError("Invalid input format. Expected base64 encoded image string.")
-        elif "image" in data:
-            # Direct image field
-            image_data = data["image"]
-            if image_data.startswith("data:"):
-                # Strip data URL prefix
-                image_data = image_data.split(",", 1)[1]
-            image_bytes = base64.b64decode(image_data)
-        else:
-            raise ValueError("No image found in request. Expected 'inputs' or 'image' field with base64 encoded image.")
-        # Process the image
-        img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         # SAM requires input prompts, so we'll generate a center point prompt
         height, width = img.size[1], img.size[0]  # PIL returns (width, height)
@@ -120,8 +101,14 @@ class EndpointHandler():
         out.seek(0)
         mask_base64 = base64.b64encode(out.getvalue()).decode('utf-8')
         # Return in the expected format
-        return [{"mask_png_base64": mask_base64, "num_masks": 1}]
 def main():
     # Hardcoded input and output paths

 from typing import Dict, List, Any
 import torch.nn.functional as F
+# set device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 class EndpointHandler():
     def __init__(self, path=""):
         """
         """
         try:
             # Load the model and processor from the local path
+            self.model = SamModel.from_pretrained(path).to(device)
             self.processor = SamProcessor.from_pretrained(path)
         except Exception as e:
             # Fallback to loading from a known SAM model if local loading fails
             print(f"Failed to load from local path: {e}")
             print("Attempting to load from facebook/sam-vit-base")
+            self.model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
             self.processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+    def __call__(self, data: Any) -> Any:
         """
         Called on every HTTP request.
+        Args:
+            data (:obj:):
+                includes the input data and the parameters for the inference.
         """
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", {})
+        raw_images = [Image.open(io.BytesIO(_img)) for _img in inputs]
+        # img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        img = raw_images[0]
         # SAM requires input prompts, so we'll generate a center point prompt
         height, width = img.size[1], img.size[0]  # PIL returns (width, height)
         out.seek(0)
         mask_base64 = base64.b64encode(out.getvalue()).decode('utf-8')
+        # Decode the returned mask and save
+        mask_bytes = base64.b64decode(mask_base64)
+        mask_img = Image.open(io.BytesIO(mask_bytes)).convert("RGB")
+        # mask_img.save(output_path, format="JPEG")
+        # print(f"Wrote mask to {output_path}")
         # Return in the expected format
+        return mask_img
 def main():
     # Hardcoded input and output paths