Spaces:
Sleeping
Sleeping
File size: 4,484 Bytes
2fd6166 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from os import path as osp
import cv2
import numpy as np
from torch.utils.data import Dataset
from dataset.img_utils import masks2bbox, resize, crop
class BaseDataset(Dataset):
def __init__(self, data_paths, input_size=(224, 224)):
self.data_paths = data_paths # RGB image files
self.input_size = input_size
opencv2py3d = np.eye(4)
opencv2py3d[0, 0] = opencv2py3d[1, 1] = -1
self.opencv2py3d = opencv2py3d
def __len__(self):
return len(self.data_paths)
def load_masks(self, rgb_file):
person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.png")
if not osp.isfile(person_mask_file):
person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.jpg")
obj_mask_file = None
for pat in [".obj_rend_mask.png", ".obj_rend_mask.jpg", ".obj_mask.png", ".obj_mask.jpg", ".object_rend.png"]:
obj_mask_file = rgb_file.replace('.color.jpg', pat)
if osp.isfile(obj_mask_file):
break
person_mask = cv2.imread(person_mask_file, cv2.IMREAD_GRAYSCALE)
obj_mask = cv2.imread(obj_mask_file, cv2.IMREAD_GRAYSCALE)
return person_mask, obj_mask
def get_crop_params(self, mask_hum, mask_obj, bbox_exp=1.0):
"compute bounding box based on masks"
bmin, bmax = masks2bbox([mask_hum, mask_obj])
crop_center = (bmin + bmax) // 2
# crop_size = np.max(bmax - bmin)
crop_size = int(np.max(bmax - bmin) * bbox_exp)
if crop_size % 2 == 1:
crop_size += 1 # make sure it is an even number
return bmax, bmin, crop_center, crop_size
def is_behave_dataset(self, image_width):
assert image_width in [2048, 1920, 1024, 960], f'unknwon image width {image_width}!'
if image_width in [2048, 1024]:
is_behave = True
else:
is_behave = False
return is_behave
def compute_K_roi(self, bbox_square,
image_width=2048,
image_height=1536,
fx=979.7844, fy=979.840,
cx=1018.952, cy=779.486):
"return results in ndc coordinate, this is correct!!!"
x, y, b, w = bbox_square
assert b == w
is_behave = self.is_behave_dataset(image_width)
if is_behave:
assert image_height / image_width == 0.75, f"invalid image aspect ratio: width={image_width}, height={image_height}"
# the image might be rendered at different size
ratio = image_width/2048.
fx, fy = 979.7844*ratio, 979.840*ratio
cx, cy = 1018.952*ratio, 779.486*ratio
else:
assert image_height / image_width == 9/16, f"invalid image aspect ratio: width={image_width}, height={image_height}"
# intercap camera
ratio = image_width/1920
fx, fy = 918.457763671875*ratio, 918.4373779296875*ratio
cx, cy = 956.9661865234375*ratio, 555.944580078125*ratio
cx, cy = cx - x, cy - y
scale = b/2.
# in ndc
cx_ = (scale - cx)/scale
cy_ = (scale - cy)/scale
fx_ = fx/scale
fy_ = fy/scale
K_roi = np.array([
[fx_, 0, cx_, 0],
[0., fy_, cy_, 0, ],
[0, 0, 0, 1.],
[0, 0, 1, 0]
])
return K_roi
def crop_full_image(self, mask_hum, mask_obj, rgb_full, crop_masks, bbox_exp=1.0):
"""
crop the image based on the given masks
:param mask_hum:
:param mask_obj:
:param rgb_full:
:param crop_masks: a list of masks used to do the crop
:return: Kroi, cropped human, object mask and RGB images (background masked out).
"""
bmax, bmin, crop_center, crop_size = self.get_crop_params(*crop_masks, bbox_exp)
rgb = resize(crop(rgb_full, crop_center, crop_size), self.input_size) / 255.
person_mask = resize(crop(mask_hum, crop_center, crop_size), self.input_size) / 255.
obj_mask = resize(crop(mask_obj, crop_center, crop_size), self.input_size) / 255.
xywh = np.concatenate([crop_center - crop_size // 2, np.array([crop_size, crop_size])])
Kroi = self.compute_K_roi(xywh, rgb_full.shape[1], rgb_full.shape[0])
# mask bkg out
mask_comb = (person_mask > 0.5) | (obj_mask > 0.5)
rgb = rgb * np.expand_dims(mask_comb, -1)
return Kroi, obj_mask, person_mask, rgb
|