Spaces:

VikramSingh178
/

picpilot-server

Paused

App Files Files Community

VikramSingh178 commited on May 26

Commit

cca63d4

•

1 Parent(s): 88e9206

chore: Install libgl1-mesa-glx for compatibility with image processing libraries

Browse files

Files changed (4) hide show

models/yolov8s.pt.REMOVED.git-id +1 -0
run.sh +1 -0
scripts/__pycache__/config.cpython-310.pyc +0 -0
scripts/utils.py +100 -85

models/yolov8s.pt.REMOVED.git-id ADDED Viewed

	@@ -0,0 +1 @@


1	+ 5f7efb1ee991ebccb1ee9a360066829e6435a168

run.sh CHANGED Viewed

@@ -1,2 +1,3 @@
 apt-get update && apt-get install python3-dev
 pip install -r requirements.txt

 apt-get update && apt-get install python3-dev
 pip install -r requirements.txt
+apt install libgl1-mesa-glx

scripts/__pycache__/config.cpython-310.pyc CHANGED Viewed

Binary files a/scripts/__pycache__/config.cpython-310.pyc and b/scripts/__pycache__/config.cpython-310.pyc differ

scripts/utils.py CHANGED Viewed

@@ -4,7 +4,8 @@ from transformers import SamModel, SamProcessor
 import numpy as np
 from PIL import Image
 from config import SEGMENTATION_MODEL_NAME
 def accelerator():
     """
@@ -14,12 +15,11 @@ def accelerator():
         str: The name of the device accelerator ('cuda', 'mps', or 'cpu').
     """
     if torch.cuda.is_available():
-        device = "cuda"
     elif torch.backends.mps.is_available():
-        device = "mps"
     else:
-        device = "cpu"
-    return device
 class ImageAugmentation:
@@ -27,109 +27,124 @@ class ImageAugmentation:
     Class for centering an image on a white background using ROI.
     Attributes:
-        background_size (tuple): Size of the larger background where the image will be placed.
     """
-    def __init__(self, background_size=(1920, 1080)):
         """
         Initialize ImageAugmentation class.
         Args:
-            background_size (tuple, optional): Size of the larger background. Default is (1920, 1080).
         """
-        self.background_size = background_size
-    def center_image_on_background(self, image, roi):
         """
-        Center the input image on a larger background using ROI.
         Args:
-            image (numpy.ndarray): Input image.
-            roi (tuple): Coordinates of the region of interest (x, y, width, height).
         Returns:
-            numpy.ndarray: Image centered on a larger background.
         """
-        w, h = self.background_size
-        bg = np.ones((h, w, 3), dtype=np.uint8) * 255  # White background
-        x, y, roi_w, roi_h = roi
-        bg[
-            (h - roi_h) // 2 : (h - roi_h) // 2 + roi_h,
-            (w - roi_w) // 2 : (w - roi_w) // 2 + roi_w,
-        ] = image
-        return bg
-    def detect_region_of_interest(self, image):
         """
-        Detect the region of interest in the input image.
         Args:
-            image (numpy.ndarray): Input image.
         Returns:
-            tuple: Coordinates of the region of interest (x, y, width, height).
         """
-        # Convert image to grayscale
-        grayscale_image = np.array(Image.fromarray(image).convert("L"))
-        # Calculate bounding box of non-zero region
-        bbox = Image.fromarray(grayscale_image).getbbox()
         return bbox
-def generate_bbox(image):
-    """
-    Generate bounding box for the input image.
-    Args:
-        image_path (str): Path to the input image.
-    Returns:
-        tuple: Bounding box coordinates (x, y, width, height).
-    """
-    # Load YOLOv5 model
-    model = YOLO("../models/yolov8s.pt")
-    results = model(image)
-    # Get bounding box coordinates
-    bbox = results[0].boxes.xyxy.int().tolist()
-    return bbox
-def generate_mask(image):
-    """
-    Generates masks for the given image using a segmentation model.
-    Args:
-        image: The input image for which masks need to be generated.
-    Returns:
-        masks: A tensor containing the generated masks.
-    Raises:
-        None
-    """
-    model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
-    processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
-    inputs = processor(
-        image, input_boxes=[generate_bbox(image)], return_tensors="pt"
-    ).to(torch.float)
-    inputs.to(device=accelerator())
-    outputs = model(**inputs)
-    mask = processor.image_processor.post_process_masks(
-        outputs.pred_masks.cpu(),
-        inputs["original_sizes"].cpu(),
-        inputs["reshaped_input_sizes"].cpu(),
-    )
-    return mask
 if __name__ == "__main__":
-    augmenter = ImageAugmentation()
-    image_path = "/Users/vikram/Python/product_diffusion_api/sample_data/example1.jpg"
-    image = np.array(Image.open(image_path).convert("RGB"))
-    roi = augmenter.detect_region_of_interest(image)
-    centered_image = augmenter.center_image_on_background(image, roi)
-    masks = generate_mask(Image.fromarray(centered_image))
-    masks = np.array(masks)
-    mask_image = Image.fromarray(masks[0])
-    mask_image.save("mask.jpg")

 import numpy as np
 from PIL import Image
 from config import SEGMENTATION_MODEL_NAME
+import cv2
+import matplotlib.pyplot as plt
 def accelerator():
     """
         str: The name of the device accelerator ('cuda', 'mps', or 'cpu').
     """
     if torch.cuda.is_available():
+        return "cuda"
     elif torch.backends.mps.is_available():
+        return "mps"
     else:
+        return "cpu"
 class ImageAugmentation:
     Class for centering an image on a white background using ROI.
     Attributes:
+        target_width (int): Desired width of the extended image.
+        target_height (int): Desired height of the extended image.
+        roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
     """
+    def __init__(self, target_width, target_height, roi_scale=0.5):
         """
         Initialize ImageAugmentation class.
         Args:
+            target_width (int): Desired width of the extended image.
+            target_height (int): Desired height of the extended image.
+            roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
         """
+        self.target_width = target_width
+        self.target_height = target_height
+        self.roi_scale = roi_scale
+    def extend_image(self, image_path):
         """
+        Extends the given image to the specified target dimensions while maintaining the aspect ratio of the original image.
+        The image is centered based on the detected region of interest (ROI).
         Args:
+            image_path (str): The path to the image file.
         Returns:
+            PIL.Image.Image: The extended image with the specified dimensions.
         """
+        # Open the original image
+        original_image = cv2.imread(image_path)
+        # Convert the image to grayscale for better edge detection
+        gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
+        # Perform edge detection to find contours
+        edges = cv2.Canny(gray_image, 50, 150)
+        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        # Find the largest contour (assumed to be the ROI)
+        largest_contour = max(contours, key=cv2.contourArea)
+        # Get the bounding box of the largest contour
+        x, y, w, h = cv2.boundingRect(largest_contour)
+        # Calculate the center of the bounding box
+        roi_center_x = x + w // 2
+        roi_center_y = y + h // 2
+        # Calculate the top-left coordinates of the ROI
+        roi_x = max(0, roi_center_x - self.target_width // 2)
+        roi_y = max(0, roi_center_y - self.target_height // 2)
+        # Crop the ROI from the original image
+        roi = original_image[roi_y:roi_y+self.target_height, roi_x:roi_x+self.target_width]
+        # Create a new white background image with the target dimensions
+        extended_image = np.ones((self.target_height, self.target_width, 3), dtype=np.uint8) * 255
+        # Calculate the paste position for centering the ROI
+        paste_x = (self.target_width - roi.shape[1]) // 2
+        paste_y = (self.target_height - roi.shape[0]) // 2
+        # Paste the ROI onto the white background
+        extended_image[paste_y:paste_y+roi.shape[0], paste_x:paste_x+roi.shape[1]] = roi
+        return Image.fromarray(cv2.cvtColor(extended_image, cv2.COLOR_BGR2RGB))
+    def generate_bbox(self, image):
         """
+        Generate bounding box for the input image.
         Args:
+            image: The input image.
         Returns:
+            list: Bounding box coordinates [x_min, y_min, x_max, y_max].
         """
+        model = YOLO("yolov8s.pt")
+        results = model(image)
+        bbox = results[0].boxes.xyxy.tolist()
         return bbox
+    def generate_mask(self, image, bbox):
+        """
+        Generates masks for the given image using a segmentation model.
+        Args:
+            image: The input image for which masks need to be generated.
+            bbox: Bounding box coordinates [x_min, y_min, x_max, y_max].
+        Returns:
+            numpy.ndarray: The generated mask.
+        """
+        model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
+        processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
+        # Ensure bbox is in the correct format
+        bbox_list = [bbox]  # Convert bbox to list of lists
+        # Pass bbox as a list of lists to SamProcessor
+        inputs = processor(image, input_boxes=bbox_list, return_tensors="pt").to(device=accelerator())
+        with torch.no_grad():
+          outputs = model(**inputs)
+        masks = processor.image_processor.post_process_masks(
+            outputs.pred_masks,
+            inputs["original_sizes"],
+            inputs["reshaped_input_sizes"],
+        )
+        return masks[0].cpu().numpy()
 if __name__ == "__main__":
+    augmenter = ImageAugmentation(target_width=1920, target_height=1080, roi_scale=0.3)
+    image_path = "/home/product_diffusion_api/sample_data/example1.jpg"
+    extended_image = augmenter.extend_image(image_path)
+    bbox = augmenter.generate_bbox(extended_image)
+    mask = augmenter.generate_mask(extended_image, bbox)
+    plt.imsave('mask.jpg', mask)
+    #Image.fromarray(mask).save("centered_image_with_mask.jpg")