Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (C) 2024-present Naver Corporation. All rights reserved. | |
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only). | |
# | |
# -------------------------------------------------------- | |
# utilitary functions about images (loading/converting...) | |
# -------------------------------------------------------- | |
import os | |
import torch | |
import numpy as np | |
import PIL.Image | |
from PIL.ImageOps import exif_transpose | |
import torchvision.transforms as tvf | |
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" | |
import cv2 # noqa | |
import rembg | |
rembg_session = rembg.new_session() | |
import time | |
from PIL import Image | |
from rembg import remove | |
from segment_anything import sam_model_registry, SamPredictor | |
def sam_init(): | |
sam_checkpoint = os.path.join("./sam_pt/sam_vit_h_4b8939.pth") | |
if os.path.exists(sam_checkpoint) is False: | |
os.system("wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -P ./sam_pt/") | |
model_type = "vit_h" | |
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device=f"cuda:{0 if torch.cuda.is_available() else 'cpu'}") | |
predictor = SamPredictor(sam) | |
return predictor | |
def sam_segment(predictor, input_image, *bbox_coords): | |
bbox = np.array(bbox_coords) | |
image = np.asarray(input_image) | |
start_time = time.time() | |
predictor.set_image(image) | |
masks_bbox, scores_bbox, logits_bbox = predictor.predict( | |
box=bbox, | |
multimask_output=True | |
) | |
print(f"SAM Time: {time.time() - start_time:.3f}s") | |
out_image = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8) | |
out_image[:, :, :3] = image | |
out_image_bbox = out_image.copy() | |
out_image_bbox[:, :, 3] = masks_bbox[-1].astype(np.uint8) * 255 | |
torch.cuda.empty_cache() | |
return Image.fromarray(out_image_bbox, mode='RGBA') | |
predictor = sam_init() | |
try: | |
from pillow_heif import register_heif_opener # noqa | |
register_heif_opener() | |
heif_support_enabled = True | |
except ImportError: | |
heif_support_enabled = False | |
ImgNorm = tvf.Compose([tvf.ToTensor(), tvf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) | |
def imread_cv2(path, options=cv2.IMREAD_COLOR): | |
""" Open an image or a depthmap with opencv-python. | |
""" | |
if path.endswith(('.exr', 'EXR')): | |
options = cv2.IMREAD_ANYDEPTH | |
img = cv2.imread(path, options) | |
if img is None: | |
raise IOError(f'Could not load image={path} with {options=}') | |
if img.ndim == 3: | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
return img | |
def rgb(ftensor, true_shape=None): | |
if isinstance(ftensor, list): | |
return [rgb(x, true_shape=true_shape) for x in ftensor] | |
if isinstance(ftensor, torch.Tensor): | |
ftensor = ftensor.detach().cpu().numpy() # H,W,3 | |
if ftensor.ndim == 3 and ftensor.shape[0] == 3: | |
ftensor = ftensor.transpose(1, 2, 0) | |
elif ftensor.ndim == 4 and ftensor.shape[1] == 3: | |
ftensor = ftensor.transpose(0, 2, 3, 1) | |
if true_shape is not None: | |
H, W = true_shape | |
ftensor = ftensor[:H, :W] | |
if ftensor.dtype == np.uint8: | |
img = np.float32(ftensor) / 255 | |
else: | |
img = (ftensor * 0.5) + 0.5 | |
return img.clip(min=0, max=1) | |
def _resize_pil_image(img, long_edge_size): | |
S = max(img.size) | |
if S > long_edge_size: | |
interp = PIL.Image.LANCZOS | |
elif S <= long_edge_size: | |
interp = PIL.Image.BICUBIC | |
new_size = tuple(int(round(x*long_edge_size/S)) for x in img.size) | |
return img.resize(new_size, interp) | |
def load_images(folder_or_list, size, square_ok=False, verbose=True, do_remove_background=True, rembg_session=None): | |
""" open and convert all images in a list or folder to proper input format for DUSt3R | |
""" | |
if isinstance(folder_or_list, str): | |
if verbose: | |
print(f'>> Loading images from {folder_or_list}') | |
root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list)) | |
elif isinstance(folder_or_list, list): | |
if verbose: | |
print(f'>> Loading a list of {len(folder_or_list)} images') | |
root, folder_content = '', folder_or_list | |
else: | |
raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})') | |
supported_images_extensions = ['.jpg', '.jpeg', '.png'] | |
if heif_support_enabled: | |
supported_images_extensions += ['.heic', '.heif'] | |
supported_images_extensions = tuple(supported_images_extensions) | |
imgs = [] | |
imgs_rgba = [] | |
for path in folder_content: | |
if not path.lower().endswith(supported_images_extensions): | |
continue | |
img = exif_transpose(PIL.Image.open(os.path.join(root, path))).convert('RGB') | |
# remove background if needed | |
if do_remove_background: | |
# if rembg_session is None: | |
# rembg_session = rembg.new_session() | |
# image = rembg.remove(img, session=rembg_session) | |
# foreground = np.array(image)[..., -1] > 127 | |
image_nobg = remove(img, alpha_matting=True, session=rembg_session) | |
arr = np.asarray(image_nobg)[:, :, -1] | |
x_nonzero = np.nonzero(arr.sum(axis=0)) | |
y_nonzero = np.nonzero(arr.sum(axis=1)) | |
x_min = int(x_nonzero[0].min()) | |
y_min = int(y_nonzero[0].min()) | |
x_max = int(x_nonzero[0].max()) | |
y_max = int(y_nonzero[0].max()) | |
input_image = sam_segment(predictor, img.convert('RGB'), x_min, y_min, x_max, y_max) | |
foreground = np.array(input_image)[..., -1] > 127 | |
else: | |
foreground = img[..., -1] > 127 | |
W1, H1 = img.size | |
if size == 224: | |
# resize short side to 224 (then crop) | |
img = _resize_pil_image(img, round(size * max(W1/H1, H1/W1))) | |
# resize foreground mask | |
foreground = cv2.resize(foreground.astype(np.uint8), img.size, interpolation=cv2.INTER_NEAREST) | |
else: | |
# resize long side to 512 | |
img = _resize_pil_image(img, size) | |
# resize foreground mask | |
foreground = cv2.resize(foreground.astype(np.uint8), img.size, interpolation=cv2.INTER_NEAREST) | |
W, H = img.size | |
cx, cy = W//2, H//2 | |
if size == 224: | |
half = min(cx, cy) | |
img = img.crop((cx-half, cy-half, cx+half, cy+half)) | |
# foreground crop | |
foreground = foreground[cy-half:cy+half, cx-half:cx+half] | |
else: | |
halfw, halfh = ((2*cx)//16)*8, ((2*cy)//16)*8 | |
if not (square_ok) and W == H: | |
halfh = 3*halfw/4 | |
img = img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh)) | |
# foreground crop | |
foreground = foreground[cy-halfh:cy+halfh, cx-halfw:cx+halfw] | |
W2, H2 = img.size | |
if verbose: | |
print(f' - adding {path} with resolution {W1}x{H1} --> {W2}x{H2}') | |
imgs.append(dict(img=ImgNorm(img)[None], true_shape=np.int32( | |
[img.size[::-1]]), idx=len(imgs), instance=str(len(imgs)))) | |
imgs_rgba.append(PIL.Image.fromarray((255*np.concatenate([np.array(img)/255.0, foreground[..., None]], axis=-1)).astype(np.uint8))) | |
assert imgs, 'no images foud at '+root | |
if verbose: | |
print(f' (Found {len(imgs)} images)') | |
return imgs, imgs_rgba | |