from os import path as osp import cv2 import numpy as np from torch.utils.data import Dataset from dataset.img_utils import masks2bbox, resize, crop class BaseDataset(Dataset): def __init__(self, data_paths, input_size=(224, 224)): self.data_paths = data_paths # RGB image files self.input_size = input_size opencv2py3d = np.eye(4) opencv2py3d[0, 0] = opencv2py3d[1, 1] = -1 self.opencv2py3d = opencv2py3d def __len__(self): return len(self.data_paths) def load_masks(self, rgb_file): person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.png") if not osp.isfile(person_mask_file): person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.jpg") obj_mask_file = None for pat in [".obj_rend_mask.png", ".obj_rend_mask.jpg", ".obj_mask.png", ".obj_mask.jpg", ".object_rend.png"]: obj_mask_file = rgb_file.replace('.color.jpg', pat) if osp.isfile(obj_mask_file): break person_mask = cv2.imread(person_mask_file, cv2.IMREAD_GRAYSCALE) obj_mask = cv2.imread(obj_mask_file, cv2.IMREAD_GRAYSCALE) return person_mask, obj_mask def get_crop_params(self, mask_hum, mask_obj, bbox_exp=1.0): "compute bounding box based on masks" bmin, bmax = masks2bbox([mask_hum, mask_obj]) crop_center = (bmin + bmax) // 2 # crop_size = np.max(bmax - bmin) crop_size = int(np.max(bmax - bmin) * bbox_exp) if crop_size % 2 == 1: crop_size += 1 # make sure it is an even number return bmax, bmin, crop_center, crop_size def is_behave_dataset(self, image_width): assert image_width in [2048, 1920, 1024, 960], f'unknwon image width {image_width}!' if image_width in [2048, 1024]: is_behave = True else: is_behave = False return is_behave def compute_K_roi(self, bbox_square, image_width=2048, image_height=1536, fx=979.7844, fy=979.840, cx=1018.952, cy=779.486): "return results in ndc coordinate, this is correct!!!" x, y, b, w = bbox_square assert b == w is_behave = self.is_behave_dataset(image_width) if is_behave: assert image_height / image_width == 0.75, f"invalid image aspect ratio: width={image_width}, height={image_height}" # the image might be rendered at different size ratio = image_width/2048. fx, fy = 979.7844*ratio, 979.840*ratio cx, cy = 1018.952*ratio, 779.486*ratio else: assert image_height / image_width == 9/16, f"invalid image aspect ratio: width={image_width}, height={image_height}" # intercap camera ratio = image_width/1920 fx, fy = 918.457763671875*ratio, 918.4373779296875*ratio cx, cy = 956.9661865234375*ratio, 555.944580078125*ratio cx, cy = cx - x, cy - y scale = b/2. # in ndc cx_ = (scale - cx)/scale cy_ = (scale - cy)/scale fx_ = fx/scale fy_ = fy/scale K_roi = np.array([ [fx_, 0, cx_, 0], [0., fy_, cy_, 0, ], [0, 0, 0, 1.], [0, 0, 1, 0] ]) return K_roi def crop_full_image(self, mask_hum, mask_obj, rgb_full, crop_masks, bbox_exp=1.0): """ crop the image based on the given masks :param mask_hum: :param mask_obj: :param rgb_full: :param crop_masks: a list of masks used to do the crop :return: Kroi, cropped human, object mask and RGB images (background masked out). """ bmax, bmin, crop_center, crop_size = self.get_crop_params(*crop_masks, bbox_exp) rgb = resize(crop(rgb_full, crop_center, crop_size), self.input_size) / 255. person_mask = resize(crop(mask_hum, crop_center, crop_size), self.input_size) / 255. obj_mask = resize(crop(mask_obj, crop_center, crop_size), self.input_size) / 255. xywh = np.concatenate([crop_center - crop_size // 2, np.array([crop_size, crop_size])]) Kroi = self.compute_K_roi(xywh, rgb_full.shape[1], rgb_full.shape[0]) # mask bkg out mask_comb = (person_mask > 0.5) | (obj_mask > 0.5) rgb = rgb * np.expand_dims(mask_comb, -1) return Kroi, obj_mask, person_mask, rgb