add test endpoints and make handler work

Files changed (5) hide show

README.md +12 -0
__pycache__/handler.cpython-310.pyc +0 -0
handler.py +58 -59
test_flask.py +44 -0
test_local.py +48 -0

README.md CHANGED Viewed

@@ -8,8 +8,20 @@ Repository for SAM 2: Segment Anything in Images and Videos, a foundation model
 The official code is publicly release in this [repo](https://github.com/facebookresearch/segment-anything-2/).
 ## Usage
 For image prediction:
 ```python

 The official code is publicly release in this [repo](https://github.com/facebookresearch/segment-anything-2/).
+# SAM2 Small Inference Endpoint
+This repository contains the code for running SAM2 (Segment Anything Model 2) small model as a Hugging Face inference endpoint.
+## Model Details
+- Model: SAM2 Hiera Small
+- Source: facebook/sam2-hiera-small
+- Type: Segmentation model
 ## Usage
+Send a POST request with an image to get segmentation masks:
 For image prediction:
 ```python

__pycache__/handler.cpython-310.pyc ADDED Viewed

Binary file (2.2 kB). View file

handler.py CHANGED Viewed

@@ -5,11 +5,34 @@ import numpy as np
 from PIL import Image
 import io
 import base64
-class EndpointHandler:
-    def __init__(self, path=""):
-        """Initialize the handler with SAM2 model"""
         self.predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-small")
     def _load_image(self, image_data: Union[str, bytes]) -> Image.Image:
         """Load image from binary or base64 data"""
@@ -24,67 +47,43 @@ class EndpointHandler:
         except Exception as e:
             raise ValueError(f"Failed to load image: {str(e)}")
-    def __call__(self, data: Union[Dict[str, Any], bytes]) -> Dict[str, Any]:
-        """
-        Handle incoming request data
-        Args:
-            data: Either raw bytes or dictionary containing:
-                - image data (raw binary or base64)
-                - optional point_coords: List of [x,y] coordinates for clicks
-                - optional point_labels: List of 1 (foreground) or 0 (background)
-        Returns:
-            Dictionary containing masks and scores
-        """
-        try:
-            # Handle different input formats
-            if isinstance(data, dict):
-                image_data = data.get("inputs", data)
-                # Get optional point prompts
-                point_coords = data.get("point_coords", None)
-                point_labels = data.get("point_labels", None)
-            else:
-                image_data = data
-                point_coords = None
-                point_labels = None
-            # Load and convert image
-            image = self._load_image(image_data)
-            image_array = np.array(image)
-            # Process with SAM2
-            with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
-                self.predictor.set_image(image_array)
-                # If point prompts provided, use them
-                if point_coords is not None and point_labels is not None:
-                    point_coords = np.array(point_coords)
-                    point_labels = np.array(point_labels)
-                    masks, scores, logits = self.predictor.predict(
                         point_coords=point_coords,
                         point_labels=point_labels
                     )
-                else:
-                    # Default automatic mask generation
-                    masks, scores, logits = self.predictor.predict()
-            # Convert outputs to JSON-serializable format
-            if masks is not None:
-                masks = [mask.tolist() for mask in masks]
-                scores = scores.tolist() if scores is not None else None
-                return {
-                    "masks": masks,
-                    "scores": scores,
-                    "status": "success"
-                }
             else:
-                return {
-                    "error": "No masks generated",
-                    "status": "error"
-                }
-        except Exception as e:
             return {
-                "error": str(e),
-                "status": "error"
-            }

 from PIL import Image
 import io
 import base64
+from huggingface_hub import InferenceEndpoint
+class EndpointHandler(InferenceEndpoint):
+    def __init__(self):
+        """Initialize the handler with mock predictor for local testing"""
+        # Comment out real model for local testing
         self.predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-small")
+        # Mock predictor for local testing
+        # class MockPredictor:
+        #     def set_image(self, image):
+        #         print(f"Mock: set_image called with shape {image.shape}")
+        #     def predict(self, point_coords=None, point_labels=None):
+        #         print("Mock: predict called")
+        #         if point_coords is not None:
+        #             print(f"Mock: with point coords {point_coords}")
+        #             print(f"Mock: with point labels {point_labels}")
+        #             # Return mock mask focused around the point
+        #             mock_masks = [np.zeros((100, 100), dtype=bool) for _ in range(1)]
+        #             mock_scores = np.array([0.95])  # Higher confidence for point prompt
+        #         else:
+        #             # Return multiple mock masks for automatic mode
+        #             mock_masks = [np.zeros((100, 100), dtype=bool) for _ in range(3)]
+        #             mock_scores = np.array([0.9, 0.8, 0.7])
+        #         return mock_masks, mock_scores, None
+        self.predictor = MockPredictor()
     def _load_image(self, image_data: Union[str, bytes]) -> Image.Image:
         """Load image from binary or base64 data"""
         except Exception as e:
             raise ValueError(f"Failed to load image: {str(e)}")
+    def __call__(self, image_bytes):
+        # Get point prompts if provided in request
+        if isinstance(image_bytes, dict):
+            point_coords = image_bytes.get('point_coords')
+            point_labels = image_bytes.get('point_labels')
+            image_bytes = image_bytes['image']
+        else:
+            point_coords = None
+            point_labels = None
+        # Convert bytes to image
+        image = Image.open(io.BytesIO(image_bytes))
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        image_array = np.array(image)
+        # Run inference (will use mock predictor locally)
+        with torch.inference_mode():
+            if torch.cuda.is_available():
+                with torch.autocast("cuda", dtype=torch.bfloat16):
+                    self.predictor.set_image(image_array)
+                    masks, scores, _ = self.predictor.predict(
                         point_coords=point_coords,
                         point_labels=point_labels
                     )
             else:
+                self.predictor.set_image(image_array)
+                masks, scores, _ = self.predictor.predict(
+                    point_coords=point_coords,
+                    point_labels=point_labels
+                )
+        # Format output
+        if masks is not None:
             return {
+                "masks": [mask.tolist() for mask in masks],
+                "scores": scores.tolist() if scores is not None else None,
+                "status": "success"
+            }
+        return {"error": "No masks generated", "status": "error"}

test_flask.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from flask import Flask, request, jsonify
+from handler import EndpointHandler
+import torch
+app = Flask(__name__)
+# Initialize the handler
+handler = EndpointHandler()
+@app.route('/predict', methods=['POST'])
+def predict():
+    if 'file' not in request.files:
+        return jsonify({'error': 'No file provided'}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({'error': 'No file selected'}), 400
+    # Read the file bytes
+    image_bytes = file.read()
+    # Get point prompts if provided
+    point_coords = request.form.get('point_coords')
+    point_labels = request.form.get('point_labels')
+    # Process with handler
+    try:
+        if point_coords and point_labels:
+            # Convert string inputs to lists
+            point_coords = eval(point_coords)  # e.g. "[[500, 375]]"
+            point_labels = eval(point_labels)  # e.g. "[1]"
+            result = handler({
+                'image': image_bytes,
+                'point_coords': point_coords,
+                'point_labels': point_labels
+            })
+        else:
+            result = handler(image_bytes)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+if __name__ == '__main__':
+    app.run(debug=True, port=5000)

test_local.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import requests
+from pathlib import Path
+def test_endpoint(image_path, point_coords=None, point_labels=None):
+    # URL for local Flask server
+    url = "http://localhost:5000/predict"
+    # Open image file
+    with open(image_path, 'rb') as f:
+        files = {'file': f}
+        data = {}
+        # Add point prompts if provided
+        if point_coords is not None and point_labels is not None:
+            data['point_coords'] = str(point_coords)
+            data['point_labels'] = str(point_labels)
+        # Make request
+        response = requests.post(url, files=files, data=data)
+    print(f"Status Code: {response.status_code}")
+    if response.status_code == 200:
+        result = response.json()
+        print("\nSuccess!")
+        print(f"Number of masks: {len(result['masks']) if 'masks' in result else 0}")
+        print(f"Scores: {result['scores'] if 'scores' in result else None}")
+    else:
+        print(f"Error: {response.text}")
+if __name__ == "__main__":
+    # Test with your image
+    image_path = Path("images/20250121_gauge_0001.jpg")
+    if not image_path.exists():
+        print(f"Error: Image not found at {image_path}")
+        exit(1)
+    # Test without points
+    print("\nTesting without points...")
+    print(f"Testing with image: {image_path}")
+    test_endpoint(image_path)
+    # Test with points
+    print("\nTesting with points...")
+    test_endpoint(
+        image_path,
+        point_coords=[[500, 375]],  # Example coordinates
+        point_labels=[1]  # 1 for foreground
+    )