Spaces:

MLBench
/

Inspectech_segmentation

Running

App Files Files Community

Ayesha-Majeed commited on 21 days ago

Commit

20be3dc

verified ·

1 Parent(s): 3fc284c

Update binary_segmentation.py

Browse files

Files changed (1) hide show

binary_segmentation.py +124 -40

binary_segmentation.py CHANGED Viewed

@@ -566,81 +566,165 @@ class BinarySegmenter:
         except ImportError:
             raise ImportError("RMBG requires: pip install transformers")
     def segment(
         self,
         image: np.ndarray,
         threshold: float = 0.5,
         return_type: Literal["mask", "rgba", "both"] = "mask"
     ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
-        """
-        Segment foreground object from image.
-        Args:
-            image: Input image as numpy array (H, W, 3) in RGB or BGR
-            threshold: Threshold for binary mask (0-1)
-            return_type: What to return - "mask", "rgba", or "both"
-        Returns:
-            Tuple of (binary_mask, rgba_image) based on return_type
-        """
-        # Convert BGR to RGB if needed
         if len(image.shape) == 3 and image.shape[2] == 3:
-            if image[0, 0, 0] != image[0, 0, 2]:  # Simple heuristic
-                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            else:
-                image_rgb = image
         else:
             raise ValueError("Input must be a color image (H, W, 3)")
-        # Convert to PIL
         image_pil = Image.fromarray(image_rgb)
-        original_size = image_pil.size
-        # Transform
         input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
         if DEVICE == "cpu":
             input_tensor = input_tensor.float()
         # Inference
         with torch.no_grad():
             if self.model_type == "u2netp":
                 outputs = self.model(input_tensor)
-                pred = outputs[0]  # Main output
             else:  # birefnet or rmbg
                 pred = self.model(input_tensor)[-1].sigmoid()
-        # Post-process
         pred = pred.squeeze().cpu().numpy()
-        # Resize to original
-        pred_resized = cv2.resize(pred, original_size, interpolation=cv2.INTER_LINEAR)
         # Normalize to 0-255
-        pred_normalized = ((pred_resized - pred_resized.min()) /
-                          (pred_resized.max() - pred_resized.min() + 1e-8) * 255)
-        # Create binary mask
         binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
-        # Optional: Morphological operations for cleaner mask
         kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
         binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
         binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
         # Create RGBA if needed
         rgba_image = None
         if return_type in ["rgba", "both"]:
-            # Create 4-channel image
             rgba = np.dstack([image_rgb, binary_mask])
             rgba_image = Image.fromarray(rgba, mode='RGBA')
-        # Return based on type
         if return_type == "mask":
             return binary_mask, None
         elif return_type == "rgba":
             return None, rgba_image
-        else:  # both
             return binary_mask, rgba_image
     def batch_segment(

         except ImportError:
             raise ImportError("RMBG requires: pip install transformers")
+    # def segment(
+    #     self,
+    #     image: np.ndarray,
+    #     threshold: float = 0.5,
+    #     return_type: Literal["mask", "rgba", "both"] = "mask"
+    # ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
+    #     """
+    #     Segment foreground object from image.
+    #     Args:
+    #         image: Input image as numpy array (H, W, 3) in RGB or BGR
+    #         threshold: Threshold for binary mask (0-1)
+    #         return_type: What to return - "mask", "rgba", or "both"
+    #     Returns:
+    #         Tuple of (binary_mask, rgba_image) based on return_type
+    #     """
+    #     # Convert BGR to RGB if needed
+    #     if len(image.shape) == 3 and image.shape[2] == 3:
+    #         if image[0, 0, 0] != image[0, 0, 2]:  # Simple heuristic
+    #             image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    #         else:
+    #             image_rgb = image
+    #     else:
+    #         raise ValueError("Input must be a color image (H, W, 3)")
+    #     # Convert to PIL
+    #     image_pil = Image.fromarray(image_rgb)
+    #     original_size = image_pil.size
+    #     # Transform
+    #     input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
+    #     if DEVICE == "cpu":
+    #         input_tensor = input_tensor.float()
+    #     # Inference
+    #     with torch.no_grad():
+    #         if self.model_type == "u2netp":
+    #             outputs = self.model(input_tensor)
+    #             pred = outputs[0]  # Main output
+    #         else:  # birefnet or rmbg
+    #             pred = self.model(input_tensor)[-1].sigmoid()
+    #     # Post-process
+    #     pred = pred.squeeze().cpu().numpy()
+    #     # Resize to original
+    #     pred_resized = cv2.resize(pred, original_size, interpolation=cv2.INTER_LINEAR)
+    #     # Normalize to 0-255
+    #     pred_normalized = ((pred_resized - pred_resized.min()) /
+    #                       (pred_resized.max() - pred_resized.min() + 1e-8) * 255)
+    #     # Create binary mask
+    #     binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
+    #     # Optional: Morphological operations for cleaner mask
+    #     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    #     binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
+    #     binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
+    #     # Create RGBA if needed
+    #     rgba_image = None
+    #     if return_type in ["rgba", "both"]:
+    #         # Create 4-channel image
+    #         rgba = np.dstack([image_rgb, binary_mask])
+    #         rgba_image = Image.fromarray(rgba, mode='RGBA')
+    #     # Return based on type
+    #     if return_type == "mask":
+    #         return binary_mask, None
+    #     elif return_type == "rgba":
+    #         return None, rgba_image
+    #     else:  # both
+    #         return binary_mask, rgba_image
     def segment(
         self,
         image: np.ndarray,
         threshold: float = 0.5,
         return_type: Literal["mask", "rgba", "both"] = "mask"
     ) -> Tuple[Optional[np.ndarray], Optional[Image.Image]]:
+        # Convert BGR to RGB
         if len(image.shape) == 3 and image.shape[2] == 3:
+            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         else:
             raise ValueError("Input must be a color image (H, W, 3)")
+        # Store ORIGINAL dimensions (H, W) from numpy
+        orig_h, orig_w = image.shape[:2]
+        # Convert to PIL for transforms
         image_pil = Image.fromarray(image_rgb)
+        # Transform (model resizes internally e.g. 320x320 / 512x512)
         input_tensor = self.transform(image_pil).unsqueeze(0).to(DEVICE)
         if DEVICE == "cpu":
             input_tensor = input_tensor.float()
         # Inference
         with torch.no_grad():
             if self.model_type == "u2netp":
                 outputs = self.model(input_tensor)
+                pred = outputs[0]
             else:  # birefnet or rmbg
                 pred = self.model(input_tensor)[-1].sigmoid()
+        # Post-process - squeeze to 2D
         pred = pred.squeeze().cpu().numpy()
+        # ✅ FIX: Resize back to ORIGINAL (width, height) for cv2
+        # cv2.resize takes (width, height) = (orig_w, orig_h)
+        pred_resized = cv2.resize(
+            pred,
+            (orig_w, orig_h),          # ← correct order for cv2
+            interpolation=cv2.INTER_LINEAR
+        )
+        # Verify shape matches original
+        assert pred_resized.shape == (orig_h, orig_w), \
+            f"Shape mismatch! Got {pred_resized.shape}, expected ({orig_h}, {orig_w})"
         # Normalize to 0-255
+        pred_normalized = (
+            (pred_resized - pred_resized.min()) /
+            (pred_resized.max() - pred_resized.min() + 1e-8) * 255
+        )
+        # Binary mask
         binary_mask = (pred_normalized > (threshold * 255)).astype(np.uint8) * 255
+        # Morphological cleanup
         kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
         binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
         binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
+        # ✅ Verify final mask dimensions match input
+        assert binary_mask.shape == (orig_h, orig_w), \
+            f"Final mask mismatch! Got {binary_mask.shape}, expected ({orig_h}, {orig_w})"
+        logger.info(f"Input shape: ({orig_h}, {orig_w}) | Output mask shape: {binary_mask.shape} ✅")
         # Create RGBA if needed
         rgba_image = None
         if return_type in ["rgba", "both"]:
             rgba = np.dstack([image_rgb, binary_mask])
             rgba_image = Image.fromarray(rgba, mode='RGBA')
+            # ✅ Verify RGBA dimensions
+            assert rgba_image.size == (orig_w, orig_h), \
+                f"RGBA size mismatch! Got {rgba_image.size}, expected ({orig_w}, {orig_h})"
         if return_type == "mask":
             return binary_mask, None
         elif return_type == "rgba":
             return None, rgba_image
+        else:
             return binary_mask, rgba_image
     def batch_segment(