Spaces:
Build error
Build error
VikramSingh178
commited on
Commit
•
a76141d
1
Parent(s):
f3cfe0c
Update import statement for accelerator and add image augmentation functionality
Browse files- .gitignore +2 -0
- requirements.txt +2 -1
- scripts/config.py +1 -0
- scripts/utils.py +80 -74
.gitignore
CHANGED
@@ -3,3 +3,5 @@
|
|
3 |
variables.tf
|
4 |
.terraform
|
5 |
config.env
|
|
|
|
|
|
3 |
variables.tf
|
4 |
.terraform
|
5 |
config.env
|
6 |
+
/scripts/yolov8s*
|
7 |
+
/scripts/*jpg
|
requirements.txt
CHANGED
@@ -20,4 +20,5 @@ Jinja2
|
|
20 |
datasets
|
21 |
peft
|
22 |
async-batcher
|
23 |
-
ultralytics
|
|
|
|
20 |
datasets
|
21 |
peft
|
22 |
async-batcher
|
23 |
+
ultralytics
|
24 |
+
opencv-python-headless
|
scripts/config.py
CHANGED
@@ -6,6 +6,7 @@ DATASET_NAME= "hahminlew/kream-product-blip-captions"
|
|
6 |
PROJECT_NAME = "Product Photography"
|
7 |
PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
|
8 |
CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
|
|
|
9 |
|
10 |
|
11 |
|
|
|
6 |
PROJECT_NAME = "Product Photography"
|
7 |
PRODUCTS_10k_DATASET = "VikramSingh178/Products-10k-BLIP-captions"
|
8 |
CAPTIONING_MODEL_NAME = "Salesforce/blip-image-captioning-base"
|
9 |
+
SEGMENTATION_MODEL_NAME = "facebook/sam-vit-huge"
|
10 |
|
11 |
|
12 |
|
scripts/utils.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import torch
|
2 |
from ultralytics import YOLO
|
3 |
-
from transformers import
|
4 |
-
import cv2
|
5 |
import numpy as np
|
|
|
|
|
6 |
|
7 |
|
8 |
def accelerator():
|
@@ -22,84 +23,89 @@ def accelerator():
|
|
22 |
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"""
|
28 |
-
|
29 |
|
30 |
Args:
|
31 |
-
image_path (str):
|
32 |
-
bg_size (tuple): The size (width, height) of the background image.
|
33 |
-
scale_factor (float): The scaling factor to apply to the ROI.
|
34 |
|
35 |
Returns:
|
36 |
-
|
37 |
-
|
38 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
original_image = cv2.imread(image_path)
|
41 |
-
height, width = original_image.shape[:2]
|
42 |
-
|
43 |
-
# Convert the image to grayscale
|
44 |
-
gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
|
45 |
-
|
46 |
-
# Apply Gaussian blur to reduce noise
|
47 |
-
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
48 |
-
|
49 |
-
# Perform edge detection using Canny
|
50 |
-
edges = cv2.Canny(blurred, 50, 150)
|
51 |
-
|
52 |
-
# Find contours in the edged image
|
53 |
-
contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
54 |
-
|
55 |
-
# Initialize variables to store ROI coordinates
|
56 |
-
roi_x, roi_y, roi_w, roi_h = 0, 0, 0, 0
|
57 |
-
|
58 |
-
# Loop over the contours
|
59 |
-
for contour in contours:
|
60 |
-
# Approximate the contour
|
61 |
-
peri = cv2.arcLength(contour, True)
|
62 |
-
approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
|
63 |
-
|
64 |
-
# If the contour has 4 vertices, it's likely a rectangle
|
65 |
-
if len(approx) == 4:
|
66 |
-
# Get the bounding box of the contour
|
67 |
-
x, y, w, h = cv2.boundingRect(approx)
|
68 |
-
roi_x, roi_y, roi_w, roi_h = x, y, w, h
|
69 |
-
break
|
70 |
-
|
71 |
-
# Calculate dimensions for the background
|
72 |
-
bg_width, bg_height = bg_size
|
73 |
-
|
74 |
-
# Resize the ROI based on the scale factor
|
75 |
-
scaled_roi_w = int(roi_w * scale_factor)
|
76 |
-
scaled_roi_h = int(roi_h * scale_factor)
|
77 |
-
|
78 |
-
# Calculate offsets to center the scaled ROI within the background
|
79 |
-
x_offset = (bg_width - scaled_roi_w) // 2
|
80 |
-
y_offset = (bg_height - scaled_roi_h) // 2
|
81 |
-
|
82 |
-
# Resize the original image
|
83 |
-
scaled_image = cv2.resize(original_image, (scaled_roi_w, scaled_roi_h))
|
84 |
-
|
85 |
-
# Create a blank background
|
86 |
-
background = np.zeros((bg_height, bg_width, 3), dtype=np.uint8)
|
87 |
-
|
88 |
-
# Place the scaled ROI onto the background
|
89 |
-
background[y_offset:y_offset+scaled_roi_h, x_offset:x_offset+scaled_roi_w] = scaled_image
|
90 |
-
|
91 |
-
return background
|
92 |
-
|
93 |
-
# Define dimensions for the background (larger than the ROI)
|
94 |
-
bg_width, bg_height = 800, 600
|
95 |
-
|
96 |
-
# Define the scale factor
|
97 |
-
scale_factor = 0.5 # Adjust this value as needed
|
98 |
-
|
99 |
-
# Call the function to center the scaled ROI within the background
|
100 |
-
centered_scaled_roi = center_scaled_roi('image.jpg', (bg_width, bg_height), scale_factor)
|
101 |
|
102 |
-
# Display the centered scaled ROI
|
103 |
-
cv2.imshow('Centered Scaled ROI', centered_scaled_roi)
|
104 |
-
cv2.waitKey(0)
|
105 |
-
cv2.destroyAllWindows()
|
|
|
1 |
import torch
|
2 |
from ultralytics import YOLO
|
3 |
+
from transformers import SamModel,SamProcessor
|
|
|
4 |
import numpy as np
|
5 |
+
from PIL import Image
|
6 |
+
from config import SEGMENTATION_MODEL_NAME
|
7 |
|
8 |
|
9 |
def accelerator():
|
|
|
23 |
|
24 |
|
25 |
|
26 |
+
class ImageAugmentation:
|
27 |
+
"""
|
28 |
+
Class for centering an image on a white background using ROI.
|
29 |
+
|
30 |
+
Attributes:
|
31 |
+
background_size (tuple): Size of the larger background where the image will be placed.
|
32 |
+
"""
|
33 |
|
34 |
+
def __init__(self, background_size=(1920, 1080)):
|
35 |
+
"""
|
36 |
+
Initialize ImageAugmentation class.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
background_size (tuple, optional): Size of the larger background. Default is (1920, 1080).
|
40 |
+
"""
|
41 |
+
self.background_size = background_size
|
42 |
+
|
43 |
+
def center_image_on_background(self, image, roi):
|
44 |
+
"""
|
45 |
+
Center the input image on a larger background using ROI.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
image (numpy.ndarray): Input image.
|
49 |
+
roi (tuple): Coordinates of the region of interest (x, y, width, height).
|
50 |
+
|
51 |
+
Returns:
|
52 |
+
numpy.ndarray: Image centered on a larger background.
|
53 |
+
"""
|
54 |
+
w, h = self.background_size
|
55 |
+
bg = np.ones((h, w, 3), dtype=np.uint8) * 255 # White background
|
56 |
+
x, y, roi_w, roi_h = roi
|
57 |
+
bg[(h - roi_h) // 2:(h - roi_h) // 2 + roi_h, (w - roi_w) // 2:(w - roi_w) // 2 + roi_w] = image
|
58 |
+
return bg
|
59 |
+
|
60 |
+
def detect_region_of_interest(self, image):
|
61 |
+
"""
|
62 |
+
Detect the region of interest in the input image.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
image (numpy.ndarray): Input image.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
tuple: Coordinates of the region of interest (x, y, width, height).
|
69 |
+
"""
|
70 |
+
# Convert image to grayscale
|
71 |
+
grayscale_image = np.array(Image.fromarray(image).convert("L"))
|
72 |
+
|
73 |
+
# Calculate bounding box of non-zero region
|
74 |
+
bbox = Image.fromarray(grayscale_image).getbbox()
|
75 |
+
return bbox
|
76 |
+
|
77 |
+
def generate_bbox(image):
|
78 |
"""
|
79 |
+
Generate bounding box for the input image.
|
80 |
|
81 |
Args:
|
82 |
+
image_path (str): Path to the input image.
|
|
|
|
|
83 |
|
84 |
Returns:
|
85 |
+
tuple: Bounding box coordinates (x, y, width, height).
|
|
|
86 |
"""
|
87 |
+
# Load YOLOv5 model
|
88 |
+
model = YOLO("yolov8s.pt")
|
89 |
+
results = model(image)
|
90 |
+
# Get bounding box coordinates
|
91 |
+
bbox = results[0].boxes.xyxy.int().tolist()
|
92 |
+
return bbox
|
93 |
+
|
94 |
+
def generate_mask():
|
95 |
+
model = SamModel.from_pretrained("SEGMENTATION_MODEL_NAMEz")
|
96 |
+
processor = SamProcessor.from_pretrained("SEGMENTATION_MODEL_NAME")
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
if __name__ == "__main__":
|
102 |
+
augmenter = ImageAugmentation()
|
103 |
+
image_path = "/Users/vikram/Python/product_diffusion_api/sample_data/example1.jpg"
|
104 |
+
image = np.array(Image.open(image_path).convert("RGB"))
|
105 |
+
roi = augmenter.detect_region_of_interest(image)
|
106 |
+
centered_image = augmenter.center_image_on_background(image, roi)
|
107 |
+
bbox = generate_bbox(centered_image)
|
108 |
+
print(bbox)
|
109 |
+
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
|
|
|
|
|
|
|