Spaces:
Runtime error
Runtime error
# ------------------------------------------------------------------------------ | |
# Copyright (c) Microsoft | |
# Licensed under the MIT License. | |
# Written by Bin Xiao (Bin.Xiao@microsoft.com) | |
# Modified by Zigang Geng (zigang@mail.ustc.edu.cn) | |
# ------------------------------------------------------------------------------ | |
from __future__ import annotations | |
import logging | |
import os | |
import json | |
import copy | |
import math | |
import random | |
from pathlib import Path | |
from typing import Any | |
import cv2 | |
import numpy as np | |
import torch | |
import torchvision | |
from einops import rearrange | |
from PIL import Image | |
from torch.utils.data import Dataset | |
import torchvision.transforms as transforms | |
from pycocotools.coco import COCO | |
logger = logging.getLogger(__name__) | |
colors = { | |
'red': (255, 0, 0), | |
'green': (0, 255, 0), | |
'blue': (0, 0, 255), | |
'yellow': (255, 255, 0), | |
'cyan': (0, 255, 255), | |
'magenta': (255, 0, 255), | |
'gray': (128, 128, 128), | |
'white': (255, 255, 255), | |
'black': (0, 0, 0)} | |
def readTXT(txt_path): | |
with open(txt_path, 'r') as f: | |
listInTXT = [line.strip() for line in f] | |
return listInTXT | |
class PoseDataset(Dataset): | |
def __init__(self, root, image_set, is_train, max_prompt_num=5, min_prompt_num=1, | |
radius=10, size=256, transparency=0.0, sample_weight=1.0, transform=None): | |
self.sample_weight = sample_weight | |
self.max_prompt_num = max_prompt_num | |
self.min_prompt_num = min_prompt_num | |
self.radius = radius | |
self.transparency = transparency | |
self.num_joints = 0 | |
self.pixel_std = 200 | |
self.flip_pairs = [] | |
self.parent_ids = [] | |
self.keypoints_type = {} | |
self.is_train = is_train | |
self.image_set = image_set | |
self.root = root | |
self.scale_factor = 0.35 | |
self.rotation_factor = 45 | |
self.flip = True | |
self.num_joints_half_body = 8 | |
self.prob_half_body = 0.3 | |
self.image_size = np.array((size, size)) | |
self.heatmap_size = np.array((size, size)) | |
self.transform = transform | |
self.db = [] | |
pose_diverse_prompt_path = 'dataset/prompt/prompt_pose.txt' | |
self.pose_diverse_prompt_list = [] | |
with open(pose_diverse_prompt_path) as f: | |
line = f.readline() | |
while line: | |
line = line.strip('\n') | |
self.pose_diverse_prompt_list.append(line) | |
line = f.readline() | |
def _get_db(self): | |
raise NotImplementedError | |
def evaluate(self, preds, output_dir, *args, **kwargs): | |
raise NotImplementedError | |
def half_body_transform(self, joints, joints_vis): | |
upper_joints = [] | |
lower_joints = [] | |
for joint_id in range(self.num_joints): | |
if joints_vis[joint_id][0] > 0: | |
if joint_id in self.upper_body_ids: | |
upper_joints.append(joints[joint_id]) | |
else: | |
lower_joints.append(joints[joint_id]) | |
if np.random.randn() < 0.5 and len(upper_joints) > 2: | |
selected_joints = upper_joints | |
else: | |
selected_joints = lower_joints \ | |
if len(lower_joints) > 2 else upper_joints | |
if len(selected_joints) < 2: | |
return None, None | |
selected_joints = np.array(selected_joints, dtype=np.float32) | |
center = selected_joints.mean(axis=0)[:2] | |
left_top = np.amin(selected_joints, axis=0) | |
right_bottom = np.amax(selected_joints, axis=0) | |
w = right_bottom[0] - left_top[0] | |
h = right_bottom[1] - left_top[1] | |
if w > self.aspect_ratio * h: | |
h = w * 1.0 / self.aspect_ratio | |
elif w < self.aspect_ratio * h: | |
w = h * self.aspect_ratio | |
scale = np.array( | |
[ | |
w * 1.0 / self.pixel_std, | |
h * 1.0 / self.pixel_std | |
], | |
dtype=np.float32 | |
) | |
scale = scale * 1.5 | |
return center, scale | |
def __len__(self,): | |
return int(len(self.db) * self.sample_weight) | |
def __getitem__(self, idx): | |
if self.sample_weight >= 1: | |
idx = idx % len(self.db) | |
else: | |
idx = int(idx / self.sample_weight) + random.randint(0, int(1 / self.sample_weight) - 1) | |
db_rec = copy.deepcopy(self.db[idx]) | |
image_file = db_rec['image'] | |
filename = db_rec['filename'] if 'filename' in db_rec else '' | |
imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else '' | |
data_numpy = cv2.imread( | |
image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION | |
) | |
data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB) | |
if data_numpy is None: | |
logger.error('=> fail to read {}'.format(image_file)) | |
raise ValueError('Fail to read {}'.format(image_file)) | |
joints = db_rec['joints_3d'] | |
joints_vis = db_rec['joints_3d_vis'] | |
c = db_rec['center'] | |
s = db_rec['scale'] | |
score = db_rec['score'] if 'score' in db_rec else 1 | |
r = 0 | |
if self.is_train: | |
if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body | |
and np.random.rand() < self.prob_half_body): | |
c_half_body, s_half_body = self.half_body_transform( | |
joints, joints_vis | |
) | |
if c_half_body is not None and s_half_body is not None: | |
c, s = c_half_body, s_half_body | |
sf = self.scale_factor | |
rf = self.rotation_factor | |
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) | |
r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \ | |
if random.random() <= 0.6 else 0 | |
if self.flip and random.random() <= 0.5: | |
data_numpy = data_numpy[:, ::-1, :] | |
joints, joints_vis = fliplr_joints( | |
joints, joints_vis, data_numpy.shape[1], self.flip_pairs) | |
c[0] = data_numpy.shape[1] - c[0] - 1 | |
trans = get_affine_transform(c, s, r, self.image_size) | |
input = cv2.warpAffine( | |
data_numpy, | |
trans, | |
(int(self.image_size[0]), int(self.image_size[1])), | |
flags=cv2.INTER_LINEAR) | |
if self.transform: | |
input = self.transform(input) | |
for i in range(self.num_joints): | |
if joints_vis[i, 0] > 0.0: | |
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) | |
target, prompt = self.generate_target(input, joints, joints_vis) | |
# return Image.fromarray(input), Image.fromarray(target), prompt | |
image_0 = rearrange(2 * torch.tensor(np.array(input)).float() / 255 - 1, "h w c -> c h w") | |
image_1 = rearrange(2 * torch.tensor(np.array(target)).float() / 255 - 1, "h w c -> c h w") | |
return dict(edited=image_1, edit=dict(c_concat=image_0, c_crossattn=prompt)) | |
def generate_target(self, input, joints, joints_vis): | |
''' | |
:param input: [height, width, 3] | |
:param joints: [num_joints, 3] | |
:param joints_vis: [num_joints, 3] | |
:return: target | |
''' | |
radius = self.radius | |
target = copy.deepcopy(input) | |
joint_num = random.randint(self.min_prompt_num, self.max_prompt_num) | |
joint_ids = np.random.choice([i for i in range(self.num_joints)], joint_num, replace=False) | |
random_color_names = random.sample(list(colors.keys()), len(joint_ids)) | |
random_marker_names = ['circle' for i in range(len(joint_ids))] | |
prompt = "" | |
for color_idx, joint_id in enumerate(joint_ids): | |
feat_stride = self.image_size / self.heatmap_size | |
mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) | |
mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) | |
# Check that any part of the gaussian is in-bounds | |
ul = [int(mu_x - radius), int(mu_y - radius)] | |
br = [int(mu_x + radius + 1), int(mu_y + radius + 1)] | |
if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ | |
or br[0] < 0 or br[1] < 0: | |
# If not, just return the image as is | |
joints_vis[joint_id][0] = 0 | |
continue | |
marker_size = 2 * radius + 1 | |
g = np.zeros((marker_size, marker_size)) | |
x, y = np.indices((marker_size, marker_size)) | |
interval = int((marker_size - marker_size / math.sqrt(2)) // 2) | |
mask = (x - radius) ** 2 + (y - radius) ** 2 <= radius ** 2 + 1 | |
g[mask] = 1 | |
# Usable gaussian range | |
g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] | |
g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] | |
# Image range | |
img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) | |
img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) | |
v = joints_vis[joint_id][0] | |
random_color_name = random_color_names[color_idx] | |
random_color = colors[random_color_name] | |
prompt += random.choice(self.pose_diverse_prompt_list).format( | |
color=random_color_name, | |
joint=self.keypoints_type[joint_id]) | |
if v > 0.5: | |
target[img_y[0]:img_y[1], img_x[0]:img_x[1]][g[g_y[0]:g_y[1], g_x[0]:g_x[1]]>0] \ | |
= self.transparency*target[img_y[0]:img_y[1], img_x[0]:img_x[1]][g[g_y[0]:g_y[1], g_x[0]:g_x[1]]>0] \ | |
+ (1-self.transparency)*np.array(random_color) | |
return target, prompt | |
class COCODataset(PoseDataset): | |
def __init__(self, root, image_set, is_train, max_prompt_num=5, min_prompt_num=1, | |
radius=10, size=256, transparency=0.0, sample_weight=1.0, transform=None): | |
super().__init__(root, image_set, is_train, max_prompt_num, min_prompt_num, | |
radius, size, transparency, sample_weight, transform) | |
self.keypoints_type = { | |
0: "nose", | |
1: "left eye", | |
2: "right eye", | |
3: "left ear", | |
4: "right ear", | |
5: "left shoulder", | |
6: "right shoulder", | |
7: "left elbow", | |
8: "right elbow", | |
9: "left wrist", | |
10: "right wrist", | |
11: "left hip", | |
12: "right hip", | |
13: "left knee", | |
14: "right knee", | |
15: "left ankle", | |
16: "right ankle" | |
} | |
self.image_width = size | |
self.image_height = size | |
self.aspect_ratio = self.image_width * 1.0 / self.image_height | |
self.pixel_std = 200 | |
self.coco = COCO(self._get_ann_file_keypoint()) | |
# deal with class names | |
cats = [cat['name'] | |
for cat in self.coco.loadCats(self.coco.getCatIds())] | |
self.classes = ['__background__'] + cats | |
logger.info('=> classes: {}'.format(self.classes)) | |
self.num_classes = len(self.classes) | |
self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) | |
self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) | |
self._coco_ind_to_class_ind = dict( | |
[ | |
(self._class_to_coco_ind[cls], self._class_to_ind[cls]) | |
for cls in self.classes[1:] | |
] | |
) | |
# load image file names | |
self.image_set_index = self._load_image_set_index() | |
self.num_images = len(self.image_set_index) | |
logger.info('=> num_images: {}'.format(self.num_images)) | |
self.num_joints = 17 | |
self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], | |
[9, 10], [11, 12], [13, 14], [15, 16]] | |
self.parent_ids = None | |
self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) | |
self.lower_body_ids = (11, 12, 13, 14, 15, 16) | |
if 'coco' in self.root: | |
self.db = self._get_db() | |
logger.info('=> load {} samples'.format(len(self.db))) | |
def _get_ann_file_keypoint(self): | |
""" self.root / annotations / person_keypoints_train2017.json """ | |
if 'coco' in self.root: | |
prefix = 'person_keypoints' \ | |
if 'test' not in self.image_set else 'image_info' | |
return os.path.join( | |
self.root, | |
'annotations', | |
prefix + '_' + self.image_set + '.json' | |
) | |
elif 'crowdpose' in self.root: | |
prefix = 'crowdpose' | |
return os.path.join( | |
self.root, | |
'json', | |
prefix + '_' + self.image_set + '.json' | |
) | |
elif 'aic' in self.root: | |
prefix = 'aic' | |
return os.path.join( | |
self.root, | |
'annotations', | |
prefix + '_' + self.image_set + '.json' | |
) | |
else: | |
raise ValueError('Please write the path for this new dataset.') | |
def _load_image_set_index(self): | |
""" image id: int """ | |
image_ids = self.coco.getImgIds() | |
return image_ids | |
def _get_db(self): | |
gt_db = self._load_coco_keypoint_annotations() | |
return gt_db | |
def _load_coco_keypoint_annotations(self): | |
""" ground truth bbox and keypoints """ | |
gt_db = [] | |
for index in self.image_set_index: | |
gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) | |
return gt_db | |
def _load_coco_keypoint_annotation_kernal(self, index): | |
""" | |
coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] | |
iscrowd: | |
crowd instances are handled by marking their overlaps with all categories to -1 | |
and later excluded in training | |
bbox: | |
[x1, y1, w, h] | |
:param index: coco image id | |
:return: db entry | |
""" | |
im_ann = self.coco.loadImgs(index)[0] | |
width = im_ann['width'] | |
height = im_ann['height'] | |
annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False) | |
objs = self.coco.loadAnns(annIds) | |
# sanitize bboxes | |
valid_objs = [] | |
for obj in objs: | |
x, y, w, h = obj['bbox'] | |
x1 = np.max((0, x)) | |
y1 = np.max((0, y)) | |
x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) | |
y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) | |
if 'crowdpose' in self.root: | |
obj['area'] = 1 | |
if obj['area'] > 0 and x2 >= x1 and y2 >= y1: | |
obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1] | |
valid_objs.append(obj) | |
objs = valid_objs | |
rec = [] | |
for obj in objs: | |
cls = self._coco_ind_to_class_ind[obj['category_id']] | |
if cls != 1: | |
continue | |
# ignore objs without keypoints annotation | |
if max(obj['keypoints']) == 0: | |
continue | |
joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32) | |
joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float32) | |
for ipt in range(self.num_joints): | |
joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0] | |
joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1] | |
joints_3d[ipt, 2] = 0 | |
t_vis = obj['keypoints'][ipt * 3 + 2] | |
if t_vis > 1: | |
t_vis = 1 | |
joints_3d_vis[ipt, 0] = t_vis | |
joints_3d_vis[ipt, 1] = t_vis | |
joints_3d_vis[ipt, 2] = 0 | |
center, scale = self._box2cs(obj['clean_bbox'][:4]) | |
rec.append({ | |
'image': self.image_path_from_index(index, im_ann), | |
'center': center, | |
'scale': scale, | |
'joints_3d': joints_3d, | |
'joints_3d_vis': joints_3d_vis, | |
'filename': '', | |
'imgnum': 0, | |
}) | |
return rec | |
def _box2cs(self, box): | |
x, y, w, h = box[:4] | |
return self._xywh2cs(x, y, w, h) | |
def _xywh2cs(self, x, y, w, h): | |
center = np.zeros((2), dtype=np.float32) | |
center[0] = x + w * 0.5 | |
center[1] = y + h * 0.5 | |
if w > self.aspect_ratio * h: | |
h = w * 1.0 / self.aspect_ratio | |
elif w < self.aspect_ratio * h: | |
w = h * self.aspect_ratio | |
scale = np.array( | |
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], | |
dtype=np.float32) | |
if center[0] != -1: | |
scale = scale * 1.25 | |
return center, scale | |
def image_path_from_index(self, index, im_ann): | |
""" example: images / train2017 / 000000119993.jpg """ | |
if 'coco' in self.root: | |
file_name = '%012d.jpg' % index | |
if '2014' in self.image_set: | |
file_name = 'COCO_%s_' % self.image_set + file_name | |
prefix = 'test2017' if 'test' in self.image_set else self.image_set | |
data_name = prefix | |
image_path = os.path.join( | |
self.root, 'images', data_name, file_name) | |
return image_path | |
elif 'crowdpose' in self.root: | |
file_name = f'{index}.jpg' | |
image_path = os.path.join( | |
self.root, 'images', file_name) | |
return image_path | |
elif 'aic' in self.root: | |
file_name = im_ann["file_name"] | |
image_path = os.path.join( | |
self.root, 'ai_challenger_keypoint_train_20170902', 'keypoint_train_images_20170902', file_name) | |
return image_path | |
def flip_back(output_flipped, matched_parts): | |
''' | |
ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width) | |
''' | |
assert output_flipped.ndim == 4,\ | |
'output_flipped should be [batch_size, num_joints, height, width]' | |
output_flipped = output_flipped[:, :, :, ::-1] | |
for pair in matched_parts: | |
tmp = output_flipped[:, pair[0], :, :].copy() | |
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] | |
output_flipped[:, pair[1], :, :] = tmp | |
return output_flipped | |
def fliplr_joints(joints, joints_vis, width, matched_parts): | |
""" | |
flip coords | |
""" | |
# Flip horizontal | |
joints[:, 0] = width - joints[:, 0] - 1 | |
# Change left-right parts | |
for pair in matched_parts: | |
joints[pair[0], :], joints[pair[1], :] = \ | |
joints[pair[1], :], joints[pair[0], :].copy() | |
joints_vis[pair[0], :], joints_vis[pair[1], :] = \ | |
joints_vis[pair[1], :], joints_vis[pair[0], :].copy() | |
return joints*joints_vis, joints_vis | |
def get_affine_transform( | |
center, scale, rot, output_size, | |
shift=np.array([0, 0], dtype=np.float32), inv=0 | |
): | |
if not isinstance(scale, np.ndarray) and not isinstance(scale, list): | |
print(scale) | |
scale = np.array([scale, scale]) | |
scale_tmp = scale * 200.0 | |
src_w = scale_tmp[0] | |
dst_w = output_size[0] | |
dst_h = output_size[1] | |
rot_rad = np.pi * rot / 180 | |
src_dir = get_dir([0, src_w * -0.5], rot_rad) | |
dst_dir = np.array([0, dst_w * -0.5], np.float32) | |
src = np.zeros((3, 2), dtype=np.float32) | |
dst = np.zeros((3, 2), dtype=np.float32) | |
src[0, :] = center + scale_tmp * shift | |
src[1, :] = center + src_dir + scale_tmp * shift | |
dst[0, :] = [dst_w * 0.5, dst_h * 0.5] | |
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir | |
src[2:, :] = get_3rd_point(src[0, :], src[1, :]) | |
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) | |
if inv: | |
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) | |
else: | |
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) | |
return trans | |
def affine_transform(pt, t): | |
new_pt = np.array([pt[0], pt[1], 1.]).T | |
new_pt = np.dot(t, new_pt) | |
return new_pt[:2] | |
def get_3rd_point(a, b): | |
direct = a - b | |
return b + np.array([-direct[1], direct[0]], dtype=np.float32) | |
def get_dir(src_point, rot_rad): | |
sn, cs = np.sin(rot_rad), np.cos(rot_rad) | |
src_result = [0, 0] | |
src_result[0] = src_point[0] * cs - src_point[1] * sn | |
src_result[1] = src_point[0] * sn + src_point[1] * cs | |
return src_result | |
class CrowdPoseDataset(COCODataset): | |
def __init__(self, root, image_set, is_train, max_prompt_num=5, min_prompt_num=1, | |
radius=10, size=256, transparency=0.0, sample_weight=1.0, transform=None): | |
super().__init__(root, image_set, is_train, max_prompt_num, min_prompt_num, | |
radius, size, transparency, sample_weight, transform) | |
self.keypoints_type = { | |
0: 'left_shoulder', | |
1: 'right_shoulder', | |
2: 'left_elbow', | |
3: 'right_elbow', | |
4: 'left_wrist', | |
5: 'right_wrist', | |
6: 'left_hip', | |
7: 'right_hip', | |
8: 'left_knee', | |
9: 'right_knee', | |
10: 'left_ankle', | |
11: 'right_ankle', | |
12: 'top_head', | |
13: 'neck' | |
} | |
self.num_joints = 14 | |
self.prob_half_body = -1 | |
self.flip_pairs = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]] | |
self.parent_ids = None | |
self.upper_body_ids = (0, 1, 2, 3, 4, 5, 12, 13) | |
self.lower_body_ids = (6, 7, 8, 9, 10, 11) | |
self.db = self._get_db() | |
logger.info('=> load {} samples'.format(len(self.db))) | |
class AICDataset(COCODataset): | |
def __init__(self, root, image_set, is_train, max_prompt_num=5, min_prompt_num=1, | |
radius=10, size=256, transparency=0.0, sample_weight=1.0, transform=None): | |
super().__init__(root, image_set, is_train, max_prompt_num, min_prompt_num, | |
radius, size, transparency, sample_weight, transform) | |
self.keypoints_type = { | |
0: "right_shoulder", | |
1: "right_elbow", | |
2: "right_wrist", | |
3: "left_shoulder", | |
4: "left_elbow", | |
5: "left_wrist", | |
6: "right_hip", | |
7: "right_knee", | |
8: "right_ankle", | |
9: "left_hip", | |
10: "left_knee", | |
11: "left_ankle", | |
12: "head_top", | |
13: "neck" | |
} | |
self.num_joints = 14 | |
self.prob_half_body = -1 | |
self.flip_pairs = [[0, 3], [1, 4], [2, 5], [6, 9], [7, 10], [8, 11]] | |
self.parent_ids = None | |
self.upper_body_ids = (0, 1, 2, 3, 4, 5, 12, 13) | |
self.lower_body_ids = (6, 7, 8, 9, 10, 11) | |
self.db = self._get_db() | |
logger.info('=> load {} samples'.format(len(self.db))) | |
class MPIIDataset(PoseDataset): | |
def __init__(self, root, image_set, is_train, max_prompt_num=5, min_prompt_num=1, | |
radius=10, size=256, transparency=0.0, sample_weight=1.0, transform=None): | |
super().__init__(root, image_set, is_train, max_prompt_num, min_prompt_num, | |
radius, size, transparency, sample_weight, transform) | |
self.keypoints_type = { | |
0: 'right_ankle', | |
1: 'right_knee', | |
2: 'right_hip', | |
3: 'left_hip', | |
4: 'left_knee', | |
5: 'left_ankle', | |
6: 'pelvis', | |
7: 'thorax', | |
8: 'upper_neck', | |
9: 'head_top', | |
10: 'right_wrist', | |
11: 'right_elbow', | |
12: 'right_shoulder', | |
13: 'left_shoulder', | |
14: 'left_elbow', | |
15: 'left_wrist' | |
} | |
self.data_format = 'jpg' | |
self.num_joints = 16 | |
self.prob_half_body = -1 | |
self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] | |
self.parent_ids = None | |
self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15) | |
self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6) | |
self.db = self._get_db() | |
logger.info('=> load {} samples'.format(len(self.db))) | |
def _get_db(self): | |
# create train/val split | |
file_name = os.path.join( | |
self.root, 'annot', self.image_set+'.json' | |
) | |
with open(file_name) as anno_file: | |
anno = json.load(anno_file) | |
gt_db = [] | |
for a in anno: | |
image_name = a['image'] | |
c = np.array(a['center'], dtype=np.float32) | |
s = np.array([a['scale'], a['scale']], dtype=np.float32) | |
# Adjust center/scale slightly to avoid cropping limbs | |
if c[0] != -1: | |
c[1] = c[1] + 15 * s[1] | |
s = s * 1.25 | |
# MPII uses matlab format, index is based 1, | |
# we should first convert to 0-based index | |
c = c - 1 | |
joints_3d = np.zeros((self.num_joints, 3), dtype=np.float32) | |
joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float32) | |
if self.image_set != 'test': | |
joints = np.array(a['joints']) | |
joints[:, 0:2] = joints[:, 0:2] - 1 | |
joints_vis = np.array(a['joints_vis']) | |
assert len(joints) == self.num_joints, \ | |
'joint num diff: {} vs {}'.format(len(joints), | |
self.num_joints) | |
joints_3d[:, 0:2] = joints[:, 0:2] | |
joints_3d_vis[:, 0] = joints_vis[:] | |
joints_3d_vis[:, 1] = joints_vis[:] | |
image_dir = 'images.zip@' if self.data_format == 'zip' else 'images' | |
gt_db.append( | |
{ | |
'image': os.path.join(self.root, image_dir, image_name), | |
'center': c, | |
'scale': s, | |
'joints_3d': joints_3d, | |
'joints_3d_vis': joints_3d_vis, | |
'filename': '', | |
'imgnum': 0, | |
} | |
) | |
return gt_db | |