import torch from maskrcnn_benchmark.config import cfg # transpose FLIP_LEFT_RIGHT = 0 FLIP_TOP_BOTTOM = 1 class Keypoints(object): def __init__(self, keypoints, size, mode=None): # FIXME remove check once we have better integration with device # in my version this would consistently return a CPU tensor device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu") keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device) num_keypoints = keypoints.shape[0] if num_keypoints: keypoints = keypoints.view(num_keypoints, -1, 3) # TODO should I split them? # self.visibility = keypoints[..., 2] self.keypoints = keypoints # [..., :2] self.size = size self.mode = mode self.extra_fields = {} def crop(self, box): raise NotImplementedError() def resize(self, size, *args, **kwargs): ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size)) ratio_w, ratio_h = ratios resized_data = self.keypoints.clone() resized_data[..., 0] *= ratio_w resized_data[..., 1] *= ratio_h keypoints = type(self)(resized_data, size, self.mode) for k, v in self.extra_fields.items(): keypoints.add_field(k, v) return keypoints def transpose(self, method): if method not in (FLIP_LEFT_RIGHT,): raise NotImplementedError("Only FLIP_LEFT_RIGHT implemented") flip_inds = self.FLIP_INDS flipped_data = self.keypoints[:, flip_inds] width = self.size[0] TO_REMOVE = 1 # Flip x coordinates flipped_data[..., 0] = width - flipped_data[..., 0] - TO_REMOVE # Maintain COCO convention that if visibility == 0, then x, y = 0 inds = flipped_data[..., 2] == 0 flipped_data[inds] = 0 keypoints = type(self)(flipped_data, self.size, self.mode) for k, v in self.extra_fields.items(): keypoints.add_field(k, v) return keypoints def to(self, *args, **kwargs): keypoints = type(self)(self.keypoints.to(*args, **kwargs), self.size, self.mode) for k, v in self.extra_fields.items(): if hasattr(v, "to"): v = v.to(*args, **kwargs) keypoints.add_field(k, v) return keypoints def __getitem__(self, item): keypoints = type(self)(self.keypoints[item], self.size, self.mode) for k, v in self.extra_fields.items(): keypoints.add_field(k, v[item]) return keypoints def add_field(self, field, field_data): self.extra_fields[field] = field_data def get_field(self, field): return self.extra_fields[field] def __repr__(self): s = self.__class__.__name__ + "(" s += "num_instances={}, ".format(len(self.keypoints)) s += "image_width={}, ".format(self.size[0]) s += "image_height={})".format(self.size[1]) return s class PersonKeypoints(Keypoints): _NAMES = [ "nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", ] _FLIP_MAP = { "left_eye": "right_eye", "left_ear": "right_ear", "left_shoulder": "right_shoulder", "left_elbow": "right_elbow", "left_wrist": "right_wrist", "left_hip": "right_hip", "left_knee": "right_knee", "left_ankle": "right_ankle", } def __init__(self, *args, **kwargs): super(PersonKeypoints, self).__init__(*args, **kwargs) if len(cfg.MODEL.ROI_KEYPOINT_HEAD.KEYPOINT_NAME) > 0: self.NAMES = cfg.MODEL.ROI_KEYPOINT_HEAD.KEYPOINT_NAME self.FLIP_MAP = { l: r for l, r in PersonKeypoints._FLIP_MAP.items() if l in cfg.MODEL.ROI_KEYPOINT_HEAD.KEYPOINT_NAME } else: self.NAMES = PersonKeypoints._NAMES self.FLIP_MAP = PersonKeypoints._FLIP_MAP self.FLIP_INDS = self._create_flip_indices(self.NAMES, self.FLIP_MAP) self.CONNECTIONS = self._kp_connections(self.NAMES) def to_coco_format(self): coco_result = [] for i in range(self.keypoints.shape[0]): coco_kps = [0] * len(PersonKeypoints._NAMES) * 3 for ki, name in enumerate(self.NAMES): coco_kps[3 * PersonKeypoints._NAMES.index(name)] = self.keypoints[i, ki, 0].item() coco_kps[3 * PersonKeypoints._NAMES.index(name) + 1] = self.keypoints[i, ki, 1].item() coco_kps[3 * PersonKeypoints._NAMES.index(name) + 2] = self.keypoints[i, ki, 2].item() coco_result.append(coco_kps) return coco_result def _create_flip_indices(self, names, flip_map): full_flip_map = flip_map.copy() full_flip_map.update({v: k for k, v in flip_map.items()}) flipped_names = [i if i not in full_flip_map else full_flip_map[i] for i in names] flip_indices = [names.index(i) for i in flipped_names] return torch.tensor(flip_indices) def _kp_connections(self, keypoints): CONNECTIONS = [ ["left_eye", "right_eye"], ["left_eye", "nose"], ["right_eye", "nose"], ["right_eye", "right_ear"], ["left_eye", "left_ear"], ["right_shoulder", "right_elbow"], ["right_elbow", "right_wrist"], ["left_shoulder", "left_elbow"], ["left_elbow", "left_wrist"], ["right_hip", "right_knee"], ["right_knee", "right_ankle"], ["left_hip", "left_knee"], ["left_knee", "left_ankle"], ["right_shoulder", "left_shoulder"], ["right_hip", "left_hip"], ] kp_lines = [ [keypoints.index(conn[0]), keypoints.index(conn[1])] for conn in CONNECTIONS if conn[0] in self.NAMES and conn[1] in self.NAMES ] return kp_lines # TODO make this nicer, this is a direct translation from C2 (but removing the inner loop) def keypoints_to_heat_map(keypoints, rois, heatmap_size): if rois.numel() == 0: return rois.new().long(), rois.new().long() offset_x = rois[:, 0] offset_y = rois[:, 1] scale_x = heatmap_size / (rois[:, 2] - rois[:, 0]) scale_y = heatmap_size / (rois[:, 3] - rois[:, 1]) offset_x = offset_x[:, None] offset_y = offset_y[:, None] scale_x = scale_x[:, None] scale_y = scale_y[:, None] x = keypoints[..., 0] y = keypoints[..., 1] x_boundary_inds = x == rois[:, 2][:, None] y_boundary_inds = y == rois[:, 3][:, None] x = (x - offset_x) * scale_x x = x.floor().long() y = (y - offset_y) * scale_y y = y.floor().long() x[x_boundary_inds] = heatmap_size - 1 y[y_boundary_inds] = heatmap_size - 1 valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size) vis = keypoints[..., 2] > 0 valid = (valid_loc & vis).long() lin_ind = y * heatmap_size + x heatmaps = lin_ind * valid return heatmaps, valid