Spaces:

VikramSingh178
/

picpilot-server

Running

App Files Files Community

Vikramjeet Singh commited on May 27

Commit

9e3aee1

•

2 Parent(s): a2d3846 d1a4430

Merge pull request #24 from VikramxD/v2

Browse files

V2

Former-commit-id: 62e8d7135a648288fb058a3c574167c155d48711

Files changed (5) hide show

scripts/__pycache__/config.cpython-310.pyc +0 -0
scripts/config.py +2 -1
scripts/extended_image.png +0 -0
scripts/mask.png +0 -0
scripts/utils.py +53 -101

scripts/__pycache__/config.cpython-310.pyc CHANGED Viewed

Binary files a/scripts/__pycache__/config.cpython-310.pyc and b/scripts/__pycache__/config.cpython-310.pyc differ

scripts/config.py CHANGED Viewed

@@ -6,7 +6,8 @@ DATASET_NAME= "hahminlew/kream-product-blip-captions"
 PROJECT_NAME = "Product Photography"
 PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
 CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
-SEGMENTATION_MODEL_NAME = "facebook/sam-vit-huge"

 PROJECT_NAME = "Product Photography"
 PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
 CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
+SEGMENTATION_MODEL_NAME = "facebook/sam-vit-large"
+DETECTION_MODEL_NAME = "yolov8s"

scripts/extended_image.png ADDED Viewed

scripts/mask.png ADDED Viewed

scripts/utils.py CHANGED Viewed

@@ -2,10 +2,11 @@ import torch
 from ultralytics import YOLO
 from transformers import SamModel, SamProcessor
 import numpy as np
-from PIL import Image
-from config import SEGMENTATION_MODEL_NAME
-import cv2
-import matplotlib.pyplot as plt
 def accelerator():
     """
@@ -21,7 +22,6 @@ def accelerator():
     else:
         return "cpu"
 class ImageAugmentation:
     """
     Class for centering an image on a white background using ROI.
@@ -32,119 +32,71 @@ class ImageAugmentation:
         roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
     """
-    def __init__(self, target_width, target_height, roi_scale=0.5):
-        """
-        Initialize ImageAugmentation class.
-        Args:
-            target_width (int): Desired width of the extended image.
-            target_height (int): Desired height of the extended image.
-            roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
-        """
         self.target_width = target_width
         self.target_height = target_height
         self.roi_scale = roi_scale
-    def extend_image(self, image_path):
         """
-        Extends the given image to the specified target dimensions while maintaining the aspect ratio of the original image.
-        The image is centered based on the detected region of interest (ROI).
-        Args:
-            image_path (str): The path to the image file.
-        Returns:
-            PIL.Image.Image: The extended image with the specified dimensions.
         """
-        # Open the original image
-        original_image = cv2.imread(image_path)
-        # Convert the image to grayscale for better edge detection
-        gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
-        # Perform edge detection to find contours
-        edges = cv2.Canny(gray_image, 50, 150)
-        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        # Find the largest contour (assumed to be the ROI)
-        largest_contour = max(contours, key=cv2.contourArea)
-        # Get the bounding box of the largest contour
-        x, y, w, h = cv2.boundingRect(largest_contour)
-        # Calculate the center of the bounding box
-        roi_center_x = x + w // 2
-        roi_center_y = y + h // 2
-        # Calculate the top-left coordinates of the ROI
-        roi_x = max(0, roi_center_x - self.target_width // 2)
-        roi_y = max(0, roi_center_y - self.target_height // 2)
-        # Crop the ROI from the original image
-        roi = original_image[roi_y:roi_y+self.target_height, roi_x:roi_x+self.target_width]
-        # Create a new white background image with the target dimensions
-        extended_image = np.ones((self.target_height, self.target_width, 3), dtype=np.uint8) * 255
-        # Calculate the paste position for centering the ROI
-        paste_x = (self.target_width - roi.shape[1]) // 2
-        paste_y = (self.target_height - roi.shape[0]) // 2
-        # Paste the ROI onto the white background
-        extended_image[paste_y:paste_y+roi.shape[0], paste_x:paste_x+roi.shape[1]] = roi
-        return Image.fromarray(cv2.cvtColor(extended_image, cv2.COLOR_BGR2RGB))
-    def generate_bbox(self, image):
         """
-        Generate bounding box for the input image.
         Args:
-            image: The input image.
         Returns:
-            list: Bounding box coordinates [x_min, y_min, x_max, y_max].
         """
-        model = YOLO("yolov8s.pt")
-        results = model(image)
-        bbox = results[0].boxes.xyxy.tolist()
-        return bbox
-    def generate_mask(self, image, bbox):
-        """
-        Generates masks for the given image using a segmentation model.
-        Args:
-            image: The input image for which masks need to be generated.
-            bbox: Bounding box coordinates [x_min, y_min, x_max, y_max].
-        Returns:
-            numpy.ndarray: The generated mask.
         """
-        model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
-        processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
-        # Ensure bbox is in the correct format
-        bbox_list = [bbox]  # Convert bbox to list of lists
-        # Pass bbox as a list of lists to SamProcessor
-        inputs = processor(image, input_boxes=bbox_list, return_tensors="pt").to(device=accelerator())
-        with torch.no_grad():
-          outputs = model(**inputs)
-        masks = processor.image_processor.post_process_masks(
-            outputs.pred_masks,
-            inputs["original_sizes"],
-            inputs["reshaped_input_sizes"],
-        )
-        return masks[0].cpu().numpy()
 if __name__ == "__main__":
-    augmenter = ImageAugmentation(target_width=1920, target_height=1080, roi_scale=0.3)
-    image_path = "/home/product_diffusion_api/sample_data/example1.jpg"
-    extended_image = augmenter.extend_image(image_path)
-    bbox = augmenter.generate_bbox(extended_image)
-    mask = augmenter.generate_mask(extended_image, bbox)
-    plt.imsave('mask.jpg', mask)
-    #Image.fromarray(mask).save("centered_image_with_mask.jpg")

 from ultralytics import YOLO
 from transformers import SamModel, SamProcessor
 import numpy as np
+from PIL import Image, ImageOps
+from config import SEGMENTATION_MODEL_NAME, DETECTION_MODEL_NAME
+from diffusers.utils import load_image
 def accelerator():
     """
     else:
         return "cpu"
 class ImageAugmentation:
     """
     Class for centering an image on a white background using ROI.
         roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
     """
+    def __init__(self, target_width, target_height, roi_scale=0.6):
         self.target_width = target_width
         self.target_height = target_height
         self.roi_scale = roi_scale
+    def extend_image(self, image: Image) -> Image:
         """
+        Extends an image to fit within the specified target dimensions while maintaining the aspect ratio.
         """
+        original_width, original_height = image.size
+        scale = min(self.target_width / original_width, self.target_height / original_height)
+        new_width = int(original_width * scale * self.roi_scale)
+        new_height = int(original_height * scale * self.roi_scale)
+        resized_image = image.resize((new_width, new_height))
+        extended_image = Image.new("RGB", (self.target_width, self.target_height), "white")
+        paste_x = (self.target_width - new_width) // 2
+        paste_y = (self.target_height - new_height) // 2
+        extended_image.paste(resized_image, (paste_x, paste_y))
+        return extended_image
+    def generate_mask_from_bbox(self,image: Image, segmentation_model: str ,detection_model) -> Image:
         """
+        Generates a mask from the bounding box of an image using YOLO and SAM-ViT models.
         Args:
+            image_path (str): The path to the input image.
         Returns:
+            numpy.ndarray: The generated mask as a NumPy array.
         """
+        yolo = YOLO(detection_model)
+        processor = SamProcessor.from_pretrained(segmentation_model)
+        model = SamModel.from_pretrained(segmentation_model).to(device=accelerator())
+        results = yolo(image)
+        bboxes = results[0].boxes.xyxy.tolist()
+        input_boxes = [[[bboxes[0]]]]
+        inputs = processor(load_image(image), input_boxes=input_boxes, return_tensors="pt").to("cuda")
+        with torch.no_grad():
+            outputs = model(**inputs)
+        mask = processor.image_processor.post_process_masks(
+            outputs.pred_masks.cpu(),
+            inputs["original_sizes"].cpu(),
+            inputs["reshaped_input_sizes"].cpu()
+        )[0][0][0].numpy()
+        mask_image = Image.fromarray(mask)
+        return mask_image
+    def invert_mask(self, mask_image: np.ndarray) -> np.ndarray:
+        """
+        Inverts the given mask image.
         """
+        inverted_mask_pil = ImageOps.invert(mask_image.convert("L"))
+        return inverted_mask_pil
 if __name__ == "__main__":
+    augmenter = ImageAugmentation(target_width=2560, target_height=1440, roi_scale=0.7)
+    image_path = "/home/product_diffusion_api/sample_data/example3.jpg"
+    image = Image.open(image_path)
+    extended_image = augmenter.extend_image(image)
+    mask = augmenter.generate_mask_from_bbox(extended_image, SEGMENTATION_MODEL_NAME, DETECTION_MODEL_NAME)
+    inverted_mask_image = augmenter.invert_mask(mask)
+    mask.save("mask.jpg")
+    inverted_mask_image.save("inverted_mask.jpg")