VikramSingh178 commited on
Commit
cca63d4
1 Parent(s): 88e9206

chore: Install libgl1-mesa-glx for compatibility with image processing libraries

Browse files
models/yolov8s.pt.REMOVED.git-id ADDED
@@ -0,0 +1 @@
 
 
1
+ 5f7efb1ee991ebccb1ee9a360066829e6435a168
run.sh CHANGED
@@ -1,2 +1,3 @@
1
  apt-get update && apt-get install python3-dev
2
  pip install -r requirements.txt
 
 
1
  apt-get update && apt-get install python3-dev
2
  pip install -r requirements.txt
3
+ apt install libgl1-mesa-glx
scripts/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/scripts/__pycache__/config.cpython-310.pyc and b/scripts/__pycache__/config.cpython-310.pyc differ
 
scripts/utils.py CHANGED
@@ -4,7 +4,8 @@ from transformers import SamModel, SamProcessor
4
  import numpy as np
5
  from PIL import Image
6
  from config import SEGMENTATION_MODEL_NAME
7
-
 
8
 
9
  def accelerator():
10
  """
@@ -14,12 +15,11 @@ def accelerator():
14
  str: The name of the device accelerator ('cuda', 'mps', or 'cpu').
15
  """
16
  if torch.cuda.is_available():
17
- device = "cuda"
18
  elif torch.backends.mps.is_available():
19
- device = "mps"
20
  else:
21
- device = "cpu"
22
- return device
23
 
24
 
25
  class ImageAugmentation:
@@ -27,109 +27,124 @@ class ImageAugmentation:
27
  Class for centering an image on a white background using ROI.
28
 
29
  Attributes:
30
- background_size (tuple): Size of the larger background where the image will be placed.
 
 
31
  """
32
 
33
- def __init__(self, background_size=(1920, 1080)):
34
  """
35
  Initialize ImageAugmentation class.
36
 
37
  Args:
38
- background_size (tuple, optional): Size of the larger background. Default is (1920, 1080).
 
 
39
  """
40
- self.background_size = background_size
 
 
41
 
42
- def center_image_on_background(self, image, roi):
43
  """
44
- Center the input image on a larger background using ROI.
 
45
 
46
  Args:
47
- image (numpy.ndarray): Input image.
48
- roi (tuple): Coordinates of the region of interest (x, y, width, height).
49
 
50
  Returns:
51
- numpy.ndarray: Image centered on a larger background.
52
  """
53
- w, h = self.background_size
54
- bg = np.ones((h, w, 3), dtype=np.uint8) * 255 # White background
55
- x, y, roi_w, roi_h = roi
56
- bg[
57
- (h - roi_h) // 2 : (h - roi_h) // 2 + roi_h,
58
- (w - roi_w) // 2 : (w - roi_w) // 2 + roi_w,
59
- ] = image
60
- return bg
61
-
62
- def detect_region_of_interest(self, image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  """
64
- Detect the region of interest in the input image.
65
 
66
  Args:
67
- image (numpy.ndarray): Input image.
68
 
69
  Returns:
70
- tuple: Coordinates of the region of interest (x, y, width, height).
71
  """
72
- # Convert image to grayscale
73
- grayscale_image = np.array(Image.fromarray(image).convert("L"))
74
-
75
- # Calculate bounding box of non-zero region
76
- bbox = Image.fromarray(grayscale_image).getbbox()
77
  return bbox
78
 
 
 
 
79
 
80
- def generate_bbox(image):
81
- """
82
- Generate bounding box for the input image.
83
-
84
- Args:
85
- image_path (str): Path to the input image.
86
-
87
- Returns:
88
- tuple: Bounding box coordinates (x, y, width, height).
89
- """
90
- # Load YOLOv5 model
91
- model = YOLO("../models/yolov8s.pt")
92
- results = model(image)
93
- # Get bounding box coordinates
94
- bbox = results[0].boxes.xyxy.int().tolist()
95
- return bbox
96
-
97
-
98
- def generate_mask(image):
99
- """
100
- Generates masks for the given image using a segmentation model.
101
-
102
- Args:
103
- image: The input image for which masks need to be generated.
104
-
105
- Returns:
106
- masks: A tensor containing the generated masks.
107
-
108
- Raises:
109
- None
110
- """
111
- model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
112
- processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
113
- inputs = processor(
114
- image, input_boxes=[generate_bbox(image)], return_tensors="pt"
115
- ).to(torch.float)
116
- inputs.to(device=accelerator())
117
- outputs = model(**inputs)
118
- mask = processor.image_processor.post_process_masks(
119
- outputs.pred_masks.cpu(),
120
- inputs["original_sizes"].cpu(),
121
- inputs["reshaped_input_sizes"].cpu(),
122
- )
123
- return mask
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  if __name__ == "__main__":
127
- augmenter = ImageAugmentation()
128
- image_path = "/Users/vikram/Python/product_diffusion_api/sample_data/example1.jpg"
129
- image = np.array(Image.open(image_path).convert("RGB"))
130
- roi = augmenter.detect_region_of_interest(image)
131
- centered_image = augmenter.center_image_on_background(image, roi)
132
- masks = generate_mask(Image.fromarray(centered_image))
133
- masks = np.array(masks)
134
- mask_image = Image.fromarray(masks[0])
135
- mask_image.save("mask.jpg")
 
4
  import numpy as np
5
  from PIL import Image
6
  from config import SEGMENTATION_MODEL_NAME
7
+ import cv2
8
+ import matplotlib.pyplot as plt
9
 
10
  def accelerator():
11
  """
 
15
  str: The name of the device accelerator ('cuda', 'mps', or 'cpu').
16
  """
17
  if torch.cuda.is_available():
18
+ return "cuda"
19
  elif torch.backends.mps.is_available():
20
+ return "mps"
21
  else:
22
+ return "cpu"
 
23
 
24
 
25
  class ImageAugmentation:
 
27
  Class for centering an image on a white background using ROI.
28
 
29
  Attributes:
30
+ target_width (int): Desired width of the extended image.
31
+ target_height (int): Desired height of the extended image.
32
+ roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
33
  """
34
 
35
+ def __init__(self, target_width, target_height, roi_scale=0.5):
36
  """
37
  Initialize ImageAugmentation class.
38
 
39
  Args:
40
+ target_width (int): Desired width of the extended image.
41
+ target_height (int): Desired height of the extended image.
42
+ roi_scale (float): Scale factor to determine the size of the region of interest (ROI) in the original image.
43
  """
44
+ self.target_width = target_width
45
+ self.target_height = target_height
46
+ self.roi_scale = roi_scale
47
 
48
+ def extend_image(self, image_path):
49
  """
50
+ Extends the given image to the specified target dimensions while maintaining the aspect ratio of the original image.
51
+ The image is centered based on the detected region of interest (ROI).
52
 
53
  Args:
54
+ image_path (str): The path to the image file.
 
55
 
56
  Returns:
57
+ PIL.Image.Image: The extended image with the specified dimensions.
58
  """
59
+ # Open the original image
60
+ original_image = cv2.imread(image_path)
61
+
62
+ # Convert the image to grayscale for better edge detection
63
+ gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
64
+
65
+ # Perform edge detection to find contours
66
+ edges = cv2.Canny(gray_image, 50, 150)
67
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
68
+
69
+ # Find the largest contour (assumed to be the ROI)
70
+ largest_contour = max(contours, key=cv2.contourArea)
71
+
72
+ # Get the bounding box of the largest contour
73
+ x, y, w, h = cv2.boundingRect(largest_contour)
74
+
75
+ # Calculate the center of the bounding box
76
+ roi_center_x = x + w // 2
77
+ roi_center_y = y + h // 2
78
+
79
+ # Calculate the top-left coordinates of the ROI
80
+ roi_x = max(0, roi_center_x - self.target_width // 2)
81
+ roi_y = max(0, roi_center_y - self.target_height // 2)
82
+
83
+ # Crop the ROI from the original image
84
+ roi = original_image[roi_y:roi_y+self.target_height, roi_x:roi_x+self.target_width]
85
+
86
+ # Create a new white background image with the target dimensions
87
+ extended_image = np.ones((self.target_height, self.target_width, 3), dtype=np.uint8) * 255
88
+
89
+ # Calculate the paste position for centering the ROI
90
+ paste_x = (self.target_width - roi.shape[1]) // 2
91
+ paste_y = (self.target_height - roi.shape[0]) // 2
92
+
93
+ # Paste the ROI onto the white background
94
+ extended_image[paste_y:paste_y+roi.shape[0], paste_x:paste_x+roi.shape[1]] = roi
95
+
96
+ return Image.fromarray(cv2.cvtColor(extended_image, cv2.COLOR_BGR2RGB))
97
+
98
+
99
+ def generate_bbox(self, image):
100
  """
101
+ Generate bounding box for the input image.
102
 
103
  Args:
104
+ image: The input image.
105
 
106
  Returns:
107
+ list: Bounding box coordinates [x_min, y_min, x_max, y_max].
108
  """
109
+ model = YOLO("yolov8s.pt")
110
+ results = model(image)
111
+ bbox = results[0].boxes.xyxy.tolist()
 
 
112
  return bbox
113
 
114
+ def generate_mask(self, image, bbox):
115
+ """
116
+ Generates masks for the given image using a segmentation model.
117
 
118
+ Args:
119
+ image: The input image for which masks need to be generated.
120
+ bbox: Bounding box coordinates [x_min, y_min, x_max, y_max].
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ Returns:
123
+ numpy.ndarray: The generated mask.
124
+ """
125
+ model = SamModel.from_pretrained(SEGMENTATION_MODEL_NAME).to(device=accelerator())
126
+ processor = SamProcessor.from_pretrained(SEGMENTATION_MODEL_NAME)
127
+
128
+ # Ensure bbox is in the correct format
129
+ bbox_list = [bbox] # Convert bbox to list of lists
130
+
131
+ # Pass bbox as a list of lists to SamProcessor
132
+ inputs = processor(image, input_boxes=bbox_list, return_tensors="pt").to(device=accelerator())
133
+ with torch.no_grad():
134
+ outputs = model(**inputs)
135
+ masks = processor.image_processor.post_process_masks(
136
+ outputs.pred_masks,
137
+ inputs["original_sizes"],
138
+ inputs["reshaped_input_sizes"],
139
+ )
140
+
141
+ return masks[0].cpu().numpy()
142
 
143
  if __name__ == "__main__":
144
+ augmenter = ImageAugmentation(target_width=1920, target_height=1080, roi_scale=0.3)
145
+ image_path = "/home/product_diffusion_api/sample_data/example1.jpg"
146
+ extended_image = augmenter.extend_image(image_path)
147
+ bbox = augmenter.generate_bbox(extended_image)
148
+ mask = augmenter.generate_mask(extended_image, bbox)
149
+ plt.imsave('mask.jpg', mask)
150
+ #Image.fromarray(mask).save("centered_image_with_mask.jpg")