VikramSingh178 commited on
Commit
a76141d
1 Parent(s): f3cfe0c

Update import statement for accelerator and add image augmentation functionality

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. requirements.txt +2 -1
  3. scripts/config.py +1 -0
  4. scripts/utils.py +80 -74
.gitignore CHANGED
@@ -3,3 +3,5 @@
3
  variables.tf
4
  .terraform
5
  config.env
 
 
 
3
  variables.tf
4
  .terraform
5
  config.env
6
+ /scripts/yolov8s*
7
+ /scripts/*jpg
requirements.txt CHANGED
@@ -20,4 +20,5 @@ Jinja2
20
  datasets
21
  peft
22
  async-batcher
23
- ultralytics
 
 
20
  datasets
21
  peft
22
  async-batcher
23
+ ultralytics
24
+ opencv-python-headless
scripts/config.py CHANGED
@@ -6,6 +6,7 @@ DATASET_NAME= "hahminlew/kream-product-blip-captions"
6
  PROJECT_NAME = "Product Photography"
7
  PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
8
  CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
 
9
 
10
 
11
 
 
6
  PROJECT_NAME = "Product Photography"
7
  PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
8
  CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
9
+ SEGMENTATION_MODEL_NAME = "facebook/sam-vit-huge"
10
 
11
 
12
 
scripts/utils.py CHANGED
@@ -1,8 +1,9 @@
1
  import torch
2
  from ultralytics import YOLO
3
- from transformers import pipeline
4
- import cv2
5
  import numpy as np
 
 
6
 
7
 
8
  def accelerator():
@@ -22,84 +23,89 @@ def accelerator():
22
 
23
 
24
 
 
 
 
 
 
 
 
25
 
26
- def center_scaled_roi(image_path, bg_size, scale_factor):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  """
28
- Center and scale the region of interest (ROI) within a background image.
29
 
30
  Args:
31
- image_path (str): The path to the original image.
32
- bg_size (tuple): The size (width, height) of the background image.
33
- scale_factor (float): The scaling factor to apply to the ROI.
34
 
35
  Returns:
36
- numpy.ndarray: The background image with the scaled ROI centered.
37
-
38
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- original_image = cv2.imread(image_path)
41
- height, width = original_image.shape[:2]
42
-
43
- # Convert the image to grayscale
44
- gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
45
-
46
- # Apply Gaussian blur to reduce noise
47
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
48
-
49
- # Perform edge detection using Canny
50
- edges = cv2.Canny(blurred, 50, 150)
51
-
52
- # Find contours in the edged image
53
- contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
54
-
55
- # Initialize variables to store ROI coordinates
56
- roi_x, roi_y, roi_w, roi_h = 0, 0, 0, 0
57
-
58
- # Loop over the contours
59
- for contour in contours:
60
- # Approximate the contour
61
- peri = cv2.arcLength(contour, True)
62
- approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
63
-
64
- # If the contour has 4 vertices, it's likely a rectangle
65
- if len(approx) == 4:
66
- # Get the bounding box of the contour
67
- x, y, w, h = cv2.boundingRect(approx)
68
- roi_x, roi_y, roi_w, roi_h = x, y, w, h
69
- break
70
-
71
- # Calculate dimensions for the background
72
- bg_width, bg_height = bg_size
73
-
74
- # Resize the ROI based on the scale factor
75
- scaled_roi_w = int(roi_w * scale_factor)
76
- scaled_roi_h = int(roi_h * scale_factor)
77
-
78
- # Calculate offsets to center the scaled ROI within the background
79
- x_offset = (bg_width - scaled_roi_w) // 2
80
- y_offset = (bg_height - scaled_roi_h) // 2
81
-
82
- # Resize the original image
83
- scaled_image = cv2.resize(original_image, (scaled_roi_w, scaled_roi_h))
84
-
85
- # Create a blank background
86
- background = np.zeros((bg_height, bg_width, 3), dtype=np.uint8)
87
-
88
- # Place the scaled ROI onto the background
89
- background[y_offset:y_offset+scaled_roi_h, x_offset:x_offset+scaled_roi_w] = scaled_image
90
-
91
- return background
92
-
93
- # Define dimensions for the background (larger than the ROI)
94
- bg_width, bg_height = 800, 600
95
-
96
- # Define the scale factor
97
- scale_factor = 0.5 # Adjust this value as needed
98
-
99
- # Call the function to center the scaled ROI within the background
100
- centered_scaled_roi = center_scaled_roi('image.jpg', (bg_width, bg_height), scale_factor)
101
 
102
- # Display the centered scaled ROI
103
- cv2.imshow('Centered Scaled ROI', centered_scaled_roi)
104
- cv2.waitKey(0)
105
- cv2.destroyAllWindows()
 
1
  import torch
2
  from ultralytics import YOLO
3
+ from transformers import SamModel,SamProcessor
 
4
  import numpy as np
5
+ from PIL import Image
6
+ from config import SEGMENTATION_MODEL_NAME
7
 
8
 
9
  def accelerator():
 
23
 
24
 
25
 
26
+ class ImageAugmentation:
27
+ """
28
+ Class for centering an image on a white background using ROI.
29
+
30
+ Attributes:
31
+ background_size (tuple): Size of the larger background where the image will be placed.
32
+ """
33
 
34
+ def __init__(self, background_size=(1920, 1080)):
35
+ """
36
+ Initialize ImageAugmentation class.
37
+
38
+ Args:
39
+ background_size (tuple, optional): Size of the larger background. Default is (1920, 1080).
40
+ """
41
+ self.background_size = background_size
42
+
43
+ def center_image_on_background(self, image, roi):
44
+ """
45
+ Center the input image on a larger background using ROI.
46
+
47
+ Args:
48
+ image (numpy.ndarray): Input image.
49
+ roi (tuple): Coordinates of the region of interest (x, y, width, height).
50
+
51
+ Returns:
52
+ numpy.ndarray: Image centered on a larger background.
53
+ """
54
+ w, h = self.background_size
55
+ bg = np.ones((h, w, 3), dtype=np.uint8) * 255 # White background
56
+ x, y, roi_w, roi_h = roi
57
+ bg[(h - roi_h) // 2:(h - roi_h) // 2 + roi_h, (w - roi_w) // 2:(w - roi_w) // 2 + roi_w] = image
58
+ return bg
59
+
60
+ def detect_region_of_interest(self, image):
61
+ """
62
+ Detect the region of interest in the input image.
63
+
64
+ Args:
65
+ image (numpy.ndarray): Input image.
66
+
67
+ Returns:
68
+ tuple: Coordinates of the region of interest (x, y, width, height).
69
+ """
70
+ # Convert image to grayscale
71
+ grayscale_image = np.array(Image.fromarray(image).convert("L"))
72
+
73
+ # Calculate bounding box of non-zero region
74
+ bbox = Image.fromarray(grayscale_image).getbbox()
75
+ return bbox
76
+
77
+ def generate_bbox(image):
78
  """
79
+ Generate bounding box for the input image.
80
 
81
  Args:
82
+ image_path (str): Path to the input image.
 
 
83
 
84
  Returns:
85
+ tuple: Bounding box coordinates (x, y, width, height).
 
86
  """
87
+ # Load YOLOv5 model
88
+ model = YOLO("yolov8s.pt")
89
+ results = model(image)
90
+ # Get bounding box coordinates
91
+ bbox = results[0].boxes.xyxy.int().tolist()
92
+ return bbox
93
+
94
+ def generate_mask():
95
+ model = SamModel.from_pretrained("SEGMENTATION_MODEL_NAMEz")
96
+ processor = SamProcessor.from_pretrained("SEGMENTATION_MODEL_NAME")
97
+
98
+
99
+
100
+
101
+ if __name__ == "__main__":
102
+ augmenter = ImageAugmentation()
103
+ image_path = "/Users/vikram/Python/product_diffusion_api/sample_data/example1.jpg"
104
+ image = np.array(Image.open(image_path).convert("RGB"))
105
+ roi = augmenter.detect_region_of_interest(image)
106
+ centered_image = augmenter.center_image_on_background(image, roi)
107
+ bbox = generate_bbox(centered_image)
108
+ print(bbox)
109
+
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111