Spaces:
Runtime error
Runtime error
""" | |
This file contains functions that are used to perform data augmentation. | |
""" | |
import cv2 | |
import numpy as np | |
import torch | |
from loguru import logger | |
def get_transform(center, scale, res, rot=0): | |
"""Generate transformation matrix.""" | |
h = 200 * scale | |
t = np.zeros((3, 3)) | |
t[0, 0] = float(res[1]) / h | |
t[1, 1] = float(res[0]) / h | |
t[0, 2] = res[1] * (-float(center[0]) / h + 0.5) | |
t[1, 2] = res[0] * (-float(center[1]) / h + 0.5) | |
t[2, 2] = 1 | |
if not rot == 0: | |
rot = -rot # To match direction of rotation from cropping | |
rot_mat = np.zeros((3, 3)) | |
rot_rad = rot * np.pi / 180 | |
sn, cs = np.sin(rot_rad), np.cos(rot_rad) | |
rot_mat[0, :2] = [cs, -sn] | |
rot_mat[1, :2] = [sn, cs] | |
rot_mat[2, 2] = 1 | |
# Need to rotate around center | |
t_mat = np.eye(3) | |
t_mat[0, 2] = -res[1] / 2 | |
t_mat[1, 2] = -res[0] / 2 | |
t_inv = t_mat.copy() | |
t_inv[:2, 2] *= -1 | |
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) | |
return t | |
def transform(pt, center, scale, res, invert=0, rot=0): | |
"""Transform pixel location to different reference.""" | |
t = get_transform(center, scale, res, rot=rot) | |
if invert: | |
t = np.linalg.inv(t) | |
new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.0]).T | |
new_pt = np.dot(t, new_pt) | |
return new_pt[:2].astype(int) + 1 | |
def rotate_2d(pt_2d, rot_rad): | |
x = pt_2d[0] | |
y = pt_2d[1] | |
sn, cs = np.sin(rot_rad), np.cos(rot_rad) | |
xx = x * cs - y * sn | |
yy = x * sn + y * cs | |
return np.array([xx, yy], dtype=np.float32) | |
def gen_trans_from_patch_cv( | |
c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False | |
): | |
# augment size with scale | |
src_w = src_width * scale | |
src_h = src_height * scale | |
src_center = np.array([c_x, c_y], dtype=np.float32) | |
# augment rotation | |
rot_rad = np.pi * rot / 180 | |
src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad) | |
src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad) | |
dst_w = dst_width | |
dst_h = dst_height | |
dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32) | |
dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32) | |
dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32) | |
src = np.zeros((3, 2), dtype=np.float32) | |
src[0, :] = src_center | |
src[1, :] = src_center + src_downdir | |
src[2, :] = src_center + src_rightdir | |
dst = np.zeros((3, 2), dtype=np.float32) | |
dst[0, :] = dst_center | |
dst[1, :] = dst_center + dst_downdir | |
dst[2, :] = dst_center + dst_rightdir | |
if inv: | |
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) | |
else: | |
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) | |
trans = trans.astype(np.float32) | |
return trans | |
def generate_patch_image( | |
cvimg, | |
bbox, | |
scale, | |
rot, | |
out_shape, | |
interpl_strategy, | |
gauss_kernel=5, | |
gauss_sigma=8.0, | |
): | |
img = cvimg.copy() | |
bb_c_x = float(bbox[0]) | |
bb_c_y = float(bbox[1]) | |
bb_width = float(bbox[2]) | |
bb_height = float(bbox[3]) | |
trans = gen_trans_from_patch_cv( | |
bb_c_x, bb_c_y, bb_width, bb_height, out_shape[1], out_shape[0], scale, rot | |
) | |
# anti-aliasing | |
blur = cv2.GaussianBlur(img, (gauss_kernel, gauss_kernel), gauss_sigma) | |
img_patch = cv2.warpAffine( | |
blur, trans, (int(out_shape[1]), int(out_shape[0])), flags=interpl_strategy | |
) | |
img_patch = img_patch.astype(np.float32) | |
inv_trans = gen_trans_from_patch_cv( | |
bb_c_x, | |
bb_c_y, | |
bb_width, | |
bb_height, | |
out_shape[1], | |
out_shape[0], | |
scale, | |
rot, | |
inv=True, | |
) | |
return img_patch, trans, inv_trans | |
def augm_params(is_train, flip_prob, noise_factor, rot_factor, scale_factor): | |
"""Get augmentation parameters.""" | |
flip = 0 # flipping | |
pn = np.ones(3) # per channel pixel-noise | |
rot = 0 # rotation | |
sc = 1 # scaling | |
if is_train: | |
# We flip with probability 1/2 | |
if np.random.uniform() <= flip_prob: | |
flip = 1 | |
assert False, "Flipping not supported" | |
# Each channel is multiplied with a number | |
# in the area [1-opt.noiseFactor,1+opt.noiseFactor] | |
pn = np.random.uniform(1 - noise_factor, 1 + noise_factor, 3) | |
# The rotation is a number in the area [-2*rotFactor, 2*rotFactor] | |
rot = min( | |
2 * rot_factor, | |
max( | |
-2 * rot_factor, | |
np.random.randn() * rot_factor, | |
), | |
) | |
# The scale is multiplied with a number | |
# in the area [1-scaleFactor,1+scaleFactor] | |
sc = min( | |
1 + scale_factor, | |
max( | |
1 - scale_factor, | |
np.random.randn() * scale_factor + 1, | |
), | |
) | |
# but it is zero with probability 3/5 | |
if np.random.uniform() <= 0.6: | |
rot = 0 | |
augm_dict = {} | |
augm_dict["flip"] = flip | |
augm_dict["pn"] = pn | |
augm_dict["rot"] = rot | |
augm_dict["sc"] = sc | |
return augm_dict | |
def rgb_processing(is_train, rgb_img, center, bbox_dim, augm_dict, img_res): | |
rot = augm_dict["rot"] | |
sc = augm_dict["sc"] | |
pn = augm_dict["pn"] | |
scale = sc * bbox_dim | |
crop_dim = int(scale * 200) | |
# faster cropping!! | |
rgb_img = generate_patch_image( | |
rgb_img, | |
[center[0], center[1], crop_dim, crop_dim], | |
1.0, | |
rot, | |
[img_res, img_res], | |
cv2.INTER_CUBIC, | |
)[0] | |
# in the rgb image we add pixel noise in a channel-wise manner | |
rgb_img[:, :, 0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 0] * pn[0])) | |
rgb_img[:, :, 1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 1] * pn[1])) | |
rgb_img[:, :, 2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 2] * pn[2])) | |
rgb_img = np.transpose(rgb_img.astype("float32"), (2, 0, 1)) / 255.0 | |
return rgb_img | |
def transform_kp2d(kp2d, bbox): | |
# bbox: (cx, cy, scale) in the original image space | |
# scale is normalized | |
assert isinstance(kp2d, np.ndarray) | |
assert len(kp2d.shape) == 2 | |
cx, cy, scale = bbox | |
s = 200 * scale # to px | |
cap_dim = 1000 # px | |
factor = cap_dim / (1.5 * s) | |
kp2d_cropped = np.copy(kp2d) | |
kp2d_cropped[:, 0] -= cx - 1.5 / 2 * s | |
kp2d_cropped[:, 1] -= cy - 1.5 / 2 * s | |
kp2d_cropped[:, 0] *= factor | |
kp2d_cropped[:, 1] *= factor | |
return kp2d_cropped | |
def j2d_processing(kp, center, bbox_dim, augm_dict, img_res): | |
"""Process gt 2D keypoints and apply all augmentation transforms.""" | |
scale = augm_dict["sc"] * bbox_dim | |
rot = augm_dict["rot"] | |
nparts = kp.shape[0] | |
for i in range(nparts): | |
kp[i, 0:2] = transform( | |
kp[i, 0:2] + 1, | |
center, | |
scale, | |
[img_res, img_res], | |
rot=rot, | |
) | |
# convert to normalized coordinates | |
kp = normalize_kp2d_np(kp, img_res) | |
kp = kp.astype("float32") | |
return kp | |
def pose_processing(pose, augm_dict): | |
"""Process SMPL theta parameters and apply all augmentation transforms.""" | |
rot = augm_dict["rot"] | |
# rotation or the pose parameters | |
pose[:3] = rot_aa(pose[:3], rot) | |
# flip the pose parameters | |
# (72),float | |
pose = pose.astype("float32") | |
return pose | |
def rot_aa(aa, rot): | |
"""Rotate axis angle parameters.""" | |
# pose parameters | |
R = np.array( | |
[ | |
[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], | |
[np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], | |
[0, 0, 1], | |
] | |
) | |
# find the rotation of the body in camera frame | |
per_rdg, _ = cv2.Rodrigues(aa) | |
# apply the global rotation to the global orientation | |
resrot, _ = cv2.Rodrigues(np.dot(R, per_rdg)) | |
aa = (resrot.T)[0] | |
return aa | |
def denormalize_images(images): | |
images = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape( | |
1, 3, 1, 1 | |
) | |
images = images + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape( | |
1, 3, 1, 1 | |
) | |
return images | |
def read_img(img_fn, dummy_shape): | |
try: | |
cv_img = _read_img(img_fn) | |
except: | |
logger.warning(f"Unable to load {img_fn}") | |
cv_img = np.zeros(dummy_shape, dtype=np.float32) | |
return cv_img, False | |
return cv_img, True | |
def _read_img(img_fn): | |
img = cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB) | |
return img.astype(np.float32) | |
def normalize_kp2d_np(kp2d: np.ndarray, img_res): | |
assert kp2d.shape[1] == 3 | |
kp2d_normalized = kp2d.copy() | |
kp2d_normalized[:, :2] = 2.0 * kp2d[:, :2] / img_res - 1.0 | |
return kp2d_normalized | |
def unnormalize_2d_kp(kp_2d_np: np.ndarray, res): | |
assert kp_2d_np.shape[1] == 3 | |
kp_2d = np.copy(kp_2d_np) | |
kp_2d[:, :2] = 0.5 * res * (kp_2d[:, :2] + 1) | |
return kp_2d | |
def normalize_kp2d(kp2d: torch.Tensor, img_res): | |
assert len(kp2d.shape) == 3 | |
kp2d_normalized = kp2d.clone() | |
kp2d_normalized[:, :, :2] = 2.0 * kp2d[:, :, :2] / img_res - 1.0 | |
return kp2d_normalized | |
def unormalize_kp2d(kp2d_normalized: torch.Tensor, img_res): | |
assert len(kp2d_normalized.shape) == 3 | |
assert kp2d_normalized.shape[2] == 2 | |
kp2d = kp2d_normalized.clone() | |
kp2d = 0.5 * img_res * (kp2d + 1) | |
return kp2d | |
def get_wp_intrix(fixed_focal: float, img_res): | |
# consruct weak perspective on patch | |
camera_center = np.array([img_res // 2, img_res // 2]) | |
intrx = torch.zeros([3, 3]) | |
intrx[0, 0] = fixed_focal | |
intrx[1, 1] = fixed_focal | |
intrx[2, 2] = 1.0 | |
intrx[0, -1] = camera_center[0] | |
intrx[1, -1] = camera_center[1] | |
return intrx | |
def get_aug_intrix( | |
intrx, fixed_focal: float, img_res, use_gt_k, bbox_cx, bbox_cy, scale | |
): | |
""" | |
This function returns camera intrinsics under scaling. | |
If use_gt_k, the GT K is used, but scaled based on the amount of scaling in the patch. | |
Else, we construct an intrinsic camera with a fixed focal length and fixed camera center. | |
""" | |
if not use_gt_k: | |
# consruct weak perspective on patch | |
intrx = get_wp_intrix(fixed_focal, img_res) | |
else: | |
# update the GT intrinsics (full image space) | |
# such that it matches the scale of the patch | |
dim = scale * 200.0 # bbox size | |
k_scale = float(img_res) / dim # resized_dim / bbox_size in full image space | |
""" | |
# x1 and y1: top-left corner of bbox | |
intrinsics after data augmentation | |
fx' = k*fx | |
fy' = k*fy | |
cx' = k*(cx - x1) | |
cy' = k*(cy - y1) | |
""" | |
intrx[0, 0] *= k_scale # k*fx | |
intrx[1, 1] *= k_scale # k*fy | |
intrx[0, 2] -= bbox_cx - dim / 2.0 | |
intrx[1, 2] -= bbox_cy - dim / 2.0 | |
intrx[0, 2] *= k_scale | |
intrx[1, 2] *= k_scale | |
return intrx | |