import os import io from ultralytics import YOLO import cv2 import numpy as np from PIL import Image from iopaint.single_processing import batch_inpaint_cv2 import gradio as gr from bgremover import process # set current working directory cache instead of default os.environ["TORCH_HOME"] = "./pretrained-model" os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model" def resize_image(input_image_path, width=640, height=640): """Resizes an image from image data and returns the resized image.""" try: # Read the image using cv2.imread img = cv2.imread(input_image_path, cv2.IMREAD_COLOR) # Resize while maintaining the aspect ratio shape = img.shape[:2] # current shape [height, width] new_shape = (width, height) # the shape to resize to # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # Resize the image im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) # Pad the image color = (114, 114, 114) # color used for padding dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding # divide padding into 2 sides dw /= 2 dh /= 2 # compute padding on all corners top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im except Exception as e: raise gr.Error("Error in resizing image!") def process_images(input_image, append_image, default_class="chair"): if not input_image: raise gr.Error("Please upload a main image.") if not append_image: raise gr.Error("Please upload an object image.") # Resize input image and get base64 data of resized image img = resize_image(input_image) if img is None: raise gr.Error("Failed to decode resized image!") H, W, _ = img.shape x_point = 0 y_point = 0 width = 1 height = 1 # Load a model model = YOLO('pretrained-model/yolov8m-seg.pt') # pretrained YOLOv8m-seg model # Run batched inference on a list of images results = model(img, imgsz=(W,H), conf=0.5) # chair class 56 with confidence >= 0.5 names = model.names class_found = False for result in results: for i, label in enumerate(result.boxes.cls): # Check if the label matches the chair label if names[int(label)] == default_class: class_found = True # Convert the tensor to a numpy array chair_mask_np = result.masks.data[i].numpy() kernel = np.ones((5, 5), np.uint8) # Create a 5x5 kernel for dilation chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) # Apply dilation # Find contours to get bounding box contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Iterate over contours to find the bounding box of each object for contour in contours: x, y, w, h = cv2.boundingRect(contour) x_point = x y_point = y width = w height = h # Get the corresponding mask mask = result.masks.data[i].numpy() * 255 dilated_mask = cv2.dilate(mask, kernel, iterations=2) # Apply dilation # Resize the mask to match the dimensions of the original image resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0])) # call repainting and merge function output_numpy = repaitingAndMerge(append_image,width, height, x_point, y_point, img, resized_mask) # Return the output numpy image in the API response return output_numpy # return class not found in prediction if not class_found: raise gr.Error(f'{default_class} object not found in the image') def repaitingAndMerge(append_image_path, width, height, xposition, yposition, input_base, mask_base): # lama inpainting start print("lama inpainting start") inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base) print("lama inpainting end") # Create PIL Image from NumPy array final_image = Image.fromarray(inpaint_result_np) print("merge start") # Load the append image using cv2.imread append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED) # Resize the append image while preserving transparency resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA) # Convert the resized image to RGBA format (assuming it's in BGRA format) resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA) # Create a PIL Image from the resized image with transparent background #append_image_pil = Image.fromarray(resized_image) # remove the bg from image append_image_pil = process(resized_image) # Paste the append image onto the final image final_image.paste(append_image_pil, (xposition, yposition), append_image_pil) # Save the resulting image print("merge end") # Convert the final image to base64 with io.BytesIO() as output_buffer: final_image.save(output_buffer, format='PNG') output_numpy = np.array(final_image) return output_numpy