import math import logging import cv2 import random import numpy as np from normalization.body_normalization import BODY_IDENTIFIERS from normalization.hand_normalization import HAND_IDENTIFIERS HAND_IDENTIFIERS = [id + "_0" for id in HAND_IDENTIFIERS] + [id + "_1" for id in HAND_IDENTIFIERS] ARM_IDENTIFIERS_ORDER = ["neck", "$side$Shoulder", "$side$Elbow", "$side$Wrist"] def __random_pass(prob): return random.random() < prob def __numpy_to_dictionary(data_array: np.ndarray) -> dict: """ Supplementary method converting a NumPy array of body landmark data into dictionaries. The array data must match the order of the BODY_IDENTIFIERS list. """ output = {} for landmark_index, identifier in enumerate(BODY_IDENTIFIERS): output[identifier] = data_array[:, landmark_index].tolist() return output def __dictionary_to_numpy(landmarks_dict: dict) -> np.ndarray: """ Supplementary method converting dictionaries of body landmark data into respective NumPy arrays. The resulting array will match the order of the BODY_IDENTIFIERS list. """ output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS), 2)) for landmark_index, identifier in enumerate(BODY_IDENTIFIERS): output[:, landmark_index, 0] = np.array(landmarks_dict[identifier])[:, 0] output[:, landmark_index, 1] = np.array(landmarks_dict[identifier])[:, 1] return output def __rotate(origin: tuple, point: tuple, angle: float): """ Rotates a point counterclockwise by a given angle around a given origin. :param origin: Landmark in the (X, Y) format of the origin from which to count angle of rotation :param point: Landmark in the (X, Y) format to be rotated :param angle: Angle under which the point shall be rotated :return: New landmarks (coordinates) """ ox, oy = origin px, py = point qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy) qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy) return qx, qy def __preprocess_row_sign(sign: dict) -> (dict, dict): """ Supplementary method splitting the single-dictionary skeletal data into two dictionaries of body and hand landmarks respectively. """ sign_eval = sign if "nose_X" in sign_eval: body_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])] for identifier in BODY_IDENTIFIERS} hand_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])] for identifier in HAND_IDENTIFIERS} else: body_landmarks = {identifier: sign_eval[identifier] for identifier in BODY_IDENTIFIERS} hand_landmarks = {identifier: sign_eval[identifier] for identifier in HAND_IDENTIFIERS} return body_landmarks, hand_landmarks def __wrap_sign_into_row(body_identifiers: dict, hand_identifiers: dict) -> dict: """ Supplementary method for merging body and hand data into a single dictionary. """ return {**body_identifiers, **hand_identifiers} def augment_rotate(sign: dict, angle_range: tuple) -> dict: """ AUGMENTATION TECHNIQUE. All the joint coordinates in each frame are rotated by a random angle up to 13 degrees with the center of rotation lying in the center of the frame, which is equal to [0.5; 0.5]. :param sign: Dictionary with sequential skeletal data of the signing person :param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the angle by which the landmarks will be rotated from :return: Dictionary with augmented (by rotation) sequential skeletal data of the signing person """ body_landmarks, hand_landmarks = __preprocess_row_sign(sign) angle = math.radians(random.uniform(*angle_range)) body_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in body_landmarks.items()} hand_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in hand_landmarks.items()} return __wrap_sign_into_row(body_landmarks, hand_landmarks) def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict: """ AUGMENTATION TECHNIQUE. - Squeeze. All the frames are squeezed from both horizontal sides. Two different random proportions up to 15% of the original frame's width for both left and right side are cut. - Perspective transformation. The joint coordinates are projected onto a new plane with a spatially defined center of projection, which simulates recording the sign video with a slight tilt. Each time, the right or left side, as well as the proportion by which both the width and height will be reduced, are chosen randomly. This proportion is selected from a uniform distribution on the [0; 1) interval. Subsequently, the new plane is delineated by reducing the width at the desired side and the respective vertical edge (height) at both of its adjacent corners. :param sign: Dictionary with sequential skeletal data of the signing person :param type: Type of shear augmentation to perform (either 'squeeze' or 'perspective') :param squeeze_ratio: Tuple containing the relative range from what the proportion of the original width will be randomly chosen. These proportions will either be cut from both sides or used to construct the new projection :return: Dictionary with augmented (by squeezing or perspective transformation) sequential skeletal data of the signing person """ body_landmarks, hand_landmarks = __preprocess_row_sign(sign) if type == "squeeze": move_left = random.uniform(*squeeze_ratio) move_right = random.uniform(*squeeze_ratio) src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32) dest = np.array(((0 + move_left, 1), (1 - move_right, 1), (0 + move_left, 0), (1 - move_right, 0)), dtype=np.float32) mtx = cv2.getPerspectiveTransform(src, dest) elif type == "perspective": move_ratio = random.uniform(*squeeze_ratio) src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32) if __random_pass(0.5): dest = np.array(((0 + move_ratio, 1 - move_ratio), (1, 1), (0 + move_ratio, 0 + move_ratio), (1, 0)), dtype=np.float32) else: dest = np.array(((0, 1), (1 - move_ratio, 1 - move_ratio), (0, 0), (1 - move_ratio, 0 + move_ratio)), dtype=np.float32) mtx = cv2.getPerspectiveTransform(src, dest) else: logging.error("Unsupported shear type provided.") return {} landmarks_array = __dictionary_to_numpy(body_landmarks) augmented_landmarks = cv2.perspectiveTransform(np.array(landmarks_array, dtype=np.float32), mtx) augmented_zero_landmark = cv2.perspectiveTransform(np.array([[[0, 0]]], dtype=np.float32), mtx)[0][0] augmented_landmarks = np.stack([np.where(sub == augmented_zero_landmark, [0, 0], sub) for sub in augmented_landmarks]) body_landmarks = __numpy_to_dictionary(augmented_landmarks) return __wrap_sign_into_row(body_landmarks, hand_landmarks) def augment_arm_joint_rotate(sign: dict, probability: float, angle_range: tuple) -> dict: """ AUGMENTATION TECHNIQUE. The joint coordinates of both arms are passed successively, and the impending landmark is slightly rotated with respect to the current one. The chance of each joint to be rotated is 3:10 and the angle of alternation is a uniform random angle up to +-4 degrees. This simulates slight, negligible variances in each execution of a sign, which do not change its semantic meaning. :param sign: Dictionary with sequential skeletal data of the signing person :param probability: Probability of each joint to be rotated (float from the range [0, 1]) :param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the angle by which the landmarks will be rotated from :return: Dictionary with augmented (by arm joint rotation) sequential skeletal data of the signing person """ body_landmarks, hand_landmarks = __preprocess_row_sign(sign) # Iterate over both directions (both hands) for side in ["left", "right"]: # Iterate gradually over the landmarks on arm for landmark_index, landmark_origin in enumerate(ARM_IDENTIFIERS_ORDER): landmark_origin = landmark_origin.replace("$side$", side) # End the process on the current hand if the landmark is not present if landmark_origin not in body_landmarks: break # Perform rotation by provided probability if __random_pass(probability): angle = math.radians(random.uniform(*angle_range)) for to_be_rotated in ARM_IDENTIFIERS_ORDER[landmark_index + 1:]: to_be_rotated = to_be_rotated.replace("$side$", side) # Skip if the landmark is not present if to_be_rotated not in body_landmarks: continue body_landmarks[to_be_rotated] = [__rotate(body_landmarks[landmark_origin][frame_index], frame, angle) for frame_index, frame in enumerate(body_landmarks[to_be_rotated])] return __wrap_sign_into_row(body_landmarks, hand_landmarks) if __name__ == "__main__": pass