xiexh20's picture
add hdm demo v1
2fd6166
raw history blame
No virus
4.48 kB
from os import path as osp
import cv2
import numpy as np
from torch.utils.data import Dataset
from dataset.img_utils import masks2bbox, resize, crop
class BaseDataset(Dataset):
def __init__(self, data_paths, input_size=(224, 224)):
self.data_paths = data_paths # RGB image files
self.input_size = input_size
opencv2py3d = np.eye(4)
opencv2py3d[0, 0] = opencv2py3d[1, 1] = -1
self.opencv2py3d = opencv2py3d
def __len__(self):
return len(self.data_paths)
def load_masks(self, rgb_file):
person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.png")
if not osp.isfile(person_mask_file):
person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.jpg")
obj_mask_file = None
for pat in [".obj_rend_mask.png", ".obj_rend_mask.jpg", ".obj_mask.png", ".obj_mask.jpg", ".object_rend.png"]:
obj_mask_file = rgb_file.replace('.color.jpg', pat)
if osp.isfile(obj_mask_file):
break
person_mask = cv2.imread(person_mask_file, cv2.IMREAD_GRAYSCALE)
obj_mask = cv2.imread(obj_mask_file, cv2.IMREAD_GRAYSCALE)
return person_mask, obj_mask
def get_crop_params(self, mask_hum, mask_obj, bbox_exp=1.0):
"compute bounding box based on masks"
bmin, bmax = masks2bbox([mask_hum, mask_obj])
crop_center = (bmin + bmax) // 2
# crop_size = np.max(bmax - bmin)
crop_size = int(np.max(bmax - bmin) * bbox_exp)
if crop_size % 2 == 1:
crop_size += 1 # make sure it is an even number
return bmax, bmin, crop_center, crop_size
def is_behave_dataset(self, image_width):
assert image_width in [2048, 1920, 1024, 960], f'unknwon image width {image_width}!'
if image_width in [2048, 1024]:
is_behave = True
else:
is_behave = False
return is_behave
def compute_K_roi(self, bbox_square,
image_width=2048,
image_height=1536,
fx=979.7844, fy=979.840,
cx=1018.952, cy=779.486):
"return results in ndc coordinate, this is correct!!!"
x, y, b, w = bbox_square
assert b == w
is_behave = self.is_behave_dataset(image_width)
if is_behave:
assert image_height / image_width == 0.75, f"invalid image aspect ratio: width={image_width}, height={image_height}"
# the image might be rendered at different size
ratio = image_width/2048.
fx, fy = 979.7844*ratio, 979.840*ratio
cx, cy = 1018.952*ratio, 779.486*ratio
else:
assert image_height / image_width == 9/16, f"invalid image aspect ratio: width={image_width}, height={image_height}"
# intercap camera
ratio = image_width/1920
fx, fy = 918.457763671875*ratio, 918.4373779296875*ratio
cx, cy = 956.9661865234375*ratio, 555.944580078125*ratio
cx, cy = cx - x, cy - y
scale = b/2.
# in ndc
cx_ = (scale - cx)/scale
cy_ = (scale - cy)/scale
fx_ = fx/scale
fy_ = fy/scale
K_roi = np.array([
[fx_, 0, cx_, 0],
[0., fy_, cy_, 0, ],
[0, 0, 0, 1.],
[0, 0, 1, 0]
])
return K_roi
def crop_full_image(self, mask_hum, mask_obj, rgb_full, crop_masks, bbox_exp=1.0):
"""
crop the image based on the given masks
:param mask_hum:
:param mask_obj:
:param rgb_full:
:param crop_masks: a list of masks used to do the crop
:return: Kroi, cropped human, object mask and RGB images (background masked out).
"""
bmax, bmin, crop_center, crop_size = self.get_crop_params(*crop_masks, bbox_exp)
rgb = resize(crop(rgb_full, crop_center, crop_size), self.input_size) / 255.
person_mask = resize(crop(mask_hum, crop_center, crop_size), self.input_size) / 255.
obj_mask = resize(crop(mask_obj, crop_center, crop_size), self.input_size) / 255.
xywh = np.concatenate([crop_center - crop_size // 2, np.array([crop_size, crop_size])])
Kroi = self.compute_K_roi(xywh, rgb_full.shape[1], rgb_full.shape[0])
# mask bkg out
mask_comb = (person_mask > 0.5) | (obj_mask > 0.5)
rgb = rgb * np.expand_dims(mask_comb, -1)
return Kroi, obj_mask, person_mask, rgb