Upload 2120 files

7b7527a almost 3 years ago

24.2 kB

	# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	this code is base on https://github.com/open-mmlab/mmpose
	"""
	import os
	import cv2
	import numpy as np
	import json
	import copy
	import pycocotools
	from pycocotools.coco import COCO
	from .dataset import DetDataset
	from ppdet.core.workspace import register, serializable


	@serializable
	class KeypointBottomUpBaseDataset(DetDataset):
	"""Base class for bottom-up datasets.

	All datasets should subclass it.
	All subclasses should overwrite:
	Methods:`_get_imganno`

	Args:
	dataset_dir (str): Root path to the dataset.
	anno_path (str): Relative path to the annotation file.
	image_dir (str): Path to a directory where images are held.
	Default: None.
	num_joints (int): keypoint numbers
	transform (composed(operators)): A sequence of data transforms.
	shard (list): [rank, worldsize], the distributed env params
	test_mode (bool): Store True when building test or
	validation dataset. Default: False.
	"""

	def __init__(self,
	dataset_dir,
	image_dir,
	anno_path,
	num_joints,
	transform=[],
	shard=[0, 1],
	test_mode=False):
	super().__init__(dataset_dir, image_dir, anno_path)
	self.image_info = {}
	self.ann_info = {}

	self.img_prefix = os.path.join(dataset_dir, image_dir)
	self.transform = transform
	self.test_mode = test_mode

	self.ann_info['num_joints'] = num_joints
	self.img_ids = []

	def parse_dataset(self):
	pass

	def __len__(self):
	"""Get dataset length."""
	return len(self.img_ids)

	def _get_imganno(self, idx):
	"""Get anno for a single image."""
	raise NotImplementedError

	def __getitem__(self, idx):
	"""Prepare image for training given the index."""
	records = copy.deepcopy(self._get_imganno(idx))
	records['image'] = cv2.imread(records['image_file'])
	records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
	if 'mask' in records:
	records['mask'] = (records['mask'] + 0).astype('uint8')
	records = self.transform(records)
	return records

	def parse_dataset(self):
	return


	@register
	@serializable
	class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
	"""COCO dataset for bottom-up pose estimation.

	The dataset loads raw features and apply specified transforms
	to return a dict containing the image tensors and other information.

	COCO keypoint indexes::

	0: 'nose',
	1: 'left_eye',
	2: 'right_eye',
	3: 'left_ear',
	4: 'right_ear',
	5: 'left_shoulder',
	6: 'right_shoulder',
	7: 'left_elbow',
	8: 'right_elbow',
	9: 'left_wrist',
	10: 'right_wrist',
	11: 'left_hip',
	12: 'right_hip',
	13: 'left_knee',
	14: 'right_knee',
	15: 'left_ankle',
	16: 'right_ankle'

	Args:
	dataset_dir (str): Root path to the dataset.
	anno_path (str): Relative path to the annotation file.
	image_dir (str): Path to a directory where images are held.
	Default: None.
	num_joints (int): keypoint numbers
	transform (composed(operators)): A sequence of data transforms.
	shard (list): [rank, worldsize], the distributed env params
	test_mode (bool): Store True when building test or
	validation dataset. Default: False.
	"""

	def __init__(self,
	dataset_dir,
	image_dir,
	anno_path,
	num_joints,
	transform=[],
	shard=[0, 1],
	test_mode=False,
	return_mask=True,
	return_bbox=True,
	return_area=True,
	return_class=True):
	super().__init__(dataset_dir, image_dir, anno_path, num_joints,
	transform, shard, test_mode)

	self.ann_file = os.path.join(dataset_dir, anno_path)
	self.shard = shard
	self.test_mode = test_mode
	self.return_mask = return_mask
	self.return_bbox = return_bbox
	self.return_area = return_area
	self.return_class = return_class

	def parse_dataset(self):
	self.coco = COCO(self.ann_file)

	self.img_ids = self.coco.getImgIds()
	if not self.test_mode:
	self.img_ids_tmp = []
	for img_id in self.img_ids:
	ann_ids = self.coco.getAnnIds(imgIds=img_id)
	anno = self.coco.loadAnns(ann_ids)
	anno = [obj for obj in anno if obj['iscrowd'] == 0]
	if len(anno) == 0:
	continue
	self.img_ids_tmp.append(img_id)
	self.img_ids = self.img_ids_tmp

	blocknum = int(len(self.img_ids) / self.shard[1])
	self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
	self.shard[0] + 1))]
	self.num_images = len(self.img_ids)
	self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
	self.dataset_name = 'coco'

	cat_ids = self.coco.getCatIds()
	self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
	print('=> num_images: {}'.format(self.num_images))

	@staticmethod
	def _get_mapping_id_name(imgs):
	"""
	Args:
	imgs (dict): dict of image info.

	Returns:
	tuple: Image name & id mapping dicts.

	- id2name (dict): Mapping image id to name.
	- name2id (dict): Mapping image name to id.
	"""
	id2name = {}
	name2id = {}
	for image_id, image in imgs.items():
	file_name = image['file_name']
	id2name[image_id] = file_name
	name2id[file_name] = image_id

	return id2name, name2id

	def _get_imganno(self, idx):
	"""Get anno for a single image.

	Args:
	idx (int): image idx

	Returns:
	dict: info for model training
	"""
	coco = self.coco
	img_id = self.img_ids[idx]
	ann_ids = coco.getAnnIds(imgIds=img_id)
	anno = coco.loadAnns(ann_ids)

	anno = [
	obj for obj in anno
	if obj['iscrowd'] == 0 and obj['num_keypoints'] > 0
	]

	db_rec = {}
	joints, orgsize = self._get_joints(anno, idx)
	db_rec['gt_joints'] = joints
	db_rec['im_shape'] = orgsize

	if self.return_bbox:
	db_rec['gt_bbox'] = self._get_bboxs(anno, idx)

	if self.return_class:
	db_rec['gt_class'] = self._get_labels(anno, idx)

	if self.return_area:
	db_rec['gt_areas'] = self._get_areas(anno, idx)

	if self.return_mask:
	db_rec['mask'] = self._get_mask(anno, idx)

	db_rec['im_id'] = img_id
	db_rec['image_file'] = os.path.join(self.img_prefix,
	self.id2name[img_id])

	return db_rec

	def _get_joints(self, anno, idx):
	"""Get joints for all people in an image."""
	num_people = len(anno)

	joints = np.zeros(
	(num_people, self.ann_info['num_joints'], 3), dtype=np.float32)

	for i, obj in enumerate(anno):
	joints[i, :self.ann_info['num_joints'], :3] = \
	np.array(obj['keypoints']).reshape([-1, 3])

	img_info = self.coco.loadImgs(self.img_ids[idx])[0]
	orgsize = np.array([img_info['height'], img_info['width'], 1])

	return joints, orgsize

	def _get_bboxs(self, anno, idx):
	num_people = len(anno)
	gt_bboxes = np.zeros((num_people, 4), dtype=np.float32)

	for idx, obj in enumerate(anno):
	if 'bbox' in obj:
	gt_bboxes[idx, :] = obj['bbox']

	gt_bboxes[:, 2] += gt_bboxes[:, 0]
	gt_bboxes[:, 3] += gt_bboxes[:, 1]
	return gt_bboxes

	def _get_labels(self, anno, idx):
	num_people = len(anno)
	gt_labels = np.zeros((num_people, 1), dtype=np.float32)

	for idx, obj in enumerate(anno):
	if 'category_id' in obj:
	catid = obj['category_id']
	gt_labels[idx, 0] = self.catid2clsid[catid]
	return gt_labels

	def _get_areas(self, anno, idx):
	num_people = len(anno)
	gt_areas = np.zeros((num_people, ), dtype=np.float32)

	for idx, obj in enumerate(anno):
	if 'area' in obj:
	gt_areas[idx, ] = obj['area']
	return gt_areas

	def _get_mask(self, anno, idx):
	"""Get ignore masks to mask out losses."""
	coco = self.coco
	img_info = coco.loadImgs(self.img_ids[idx])[0]

	m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)

	for obj in anno:
	if 'segmentation' in obj:
	if obj['iscrowd']:
	rle = pycocotools.mask.frPyObjects(obj['segmentation'],
	img_info['height'],
	img_info['width'])
	m += pycocotools.mask.decode(rle)
	elif obj['num_keypoints'] == 0:
	rles = pycocotools.mask.frPyObjects(obj['segmentation'],
	img_info['height'],
	img_info['width'])
	for rle in rles:
	m += pycocotools.mask.decode(rle)

	return m < 0.5


	@register
	@serializable
	class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
	"""CrowdPose dataset for bottom-up pose estimation.

	The dataset loads raw features and apply specified transforms
	to return a dict containing the image tensors and other information.

	CrowdPose keypoint indexes::

	0: 'left_shoulder',
	1: 'right_shoulder',
	2: 'left_elbow',
	3: 'right_elbow',
	4: 'left_wrist',
	5: 'right_wrist',
	6: 'left_hip',
	7: 'right_hip',
	8: 'left_knee',
	9: 'right_knee',
	10: 'left_ankle',
	11: 'right_ankle',
	12: 'top_head',
	13: 'neck'

	Args:
	dataset_dir (str): Root path to the dataset.
	anno_path (str): Relative path to the annotation file.
	image_dir (str): Path to a directory where images are held.
	Default: None.
	num_joints (int): keypoint numbers
	transform (composed(operators)): A sequence of data transforms.
	shard (list): [rank, worldsize], the distributed env params
	test_mode (bool): Store True when building test or
	validation dataset. Default: False.
	"""

	def __init__(self,
	dataset_dir,
	image_dir,
	anno_path,
	num_joints,
	transform=[],
	shard=[0, 1],
	test_mode=False):
	super().__init__(dataset_dir, image_dir, anno_path, num_joints,
	transform, shard, test_mode)

	self.ann_file = os.path.join(dataset_dir, anno_path)
	self.shard = shard
	self.test_mode = test_mode

	def parse_dataset(self):
	self.coco = COCO(self.ann_file)

	self.img_ids = self.coco.getImgIds()
	if not self.test_mode:
	self.img_ids = [
	img_id for img_id in self.img_ids
	if len(self.coco.getAnnIds(
	imgIds=img_id, iscrowd=None)) > 0
	]
	blocknum = int(len(self.img_ids) / self.shard[1])
	self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
	self.shard[0] + 1))]
	self.num_images = len(self.img_ids)
	self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)

	self.dataset_name = 'crowdpose'
	print('=> num_images: {}'.format(self.num_images))


	@serializable
	class KeypointTopDownBaseDataset(DetDataset):
	"""Base class for top_down datasets.

	All datasets should subclass it.
	All subclasses should overwrite:
	Methods:`_get_db`

	Args:
	dataset_dir (str): Root path to the dataset.
	image_dir (str): Path to a directory where images are held.
	anno_path (str): Relative path to the annotation file.
	num_joints (int): keypoint numbers
	transform (composed(operators)): A sequence of data transforms.
	"""

	def __init__(self,
	dataset_dir,
	image_dir,
	anno_path,
	num_joints,
	transform=[]):
	super().__init__(dataset_dir, image_dir, anno_path)
	self.image_info = {}
	self.ann_info = {}

	self.img_prefix = os.path.join(dataset_dir, image_dir)
	self.transform = transform

	self.ann_info['num_joints'] = num_joints
	self.db = []

	def __len__(self):
	"""Get dataset length."""
	return len(self.db)

	def _get_db(self):
	"""Get a sample"""
	raise NotImplementedError

	def __getitem__(self, idx):
	"""Prepare sample for training given the index."""
	records = copy.deepcopy(self.db[idx])
	records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR \|
	cv2.IMREAD_IGNORE_ORIENTATION)
	records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
	records['score'] = records['score'] if 'score' in records else 1
	records = self.transform(records)
	# print('records', records)
	return records


	@register
	@serializable
	class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
	"""COCO dataset for top-down pose estimation.

	The dataset loads raw features and apply specified transforms
	to return a dict containing the image tensors and other information.

	COCO keypoint indexes:

	0: 'nose',
	1: 'left_eye',
	2: 'right_eye',
	3: 'left_ear',
	4: 'right_ear',
	5: 'left_shoulder',
	6: 'right_shoulder',
	7: 'left_elbow',
	8: 'right_elbow',
	9: 'left_wrist',
	10: 'right_wrist',
	11: 'left_hip',
	12: 'right_hip',
	13: 'left_knee',
	14: 'right_knee',
	15: 'left_ankle',
	16: 'right_ankle'

	Args:
	dataset_dir (str): Root path to the dataset.
	image_dir (str): Path to a directory where images are held.
	anno_path (str): Relative path to the annotation file.
	num_joints (int): Keypoint numbers
	trainsize (list):[w, h] Image target size
	transform (composed(operators)): A sequence of data transforms.
	bbox_file (str): Path to a detection bbox file
	Default: None.
	use_gt_bbox (bool): Whether to use ground truth bbox
	Default: True.
	pixel_std (int): The pixel std of the scale
	Default: 200.
	image_thre (float): The threshold to filter the detection box
	Default: 0.0.
	"""

	def __init__(self,
	dataset_dir,
	image_dir,
	anno_path,
	num_joints,
	trainsize,
	transform=[],
	bbox_file=None,
	use_gt_bbox=True,
	pixel_std=200,
	image_thre=0.0):
	super().__init__(dataset_dir, image_dir, anno_path, num_joints,
	transform)

	self.bbox_file = bbox_file
	self.use_gt_bbox = use_gt_bbox
	self.trainsize = trainsize
	self.pixel_std = pixel_std
	self.image_thre = image_thre
	self.dataset_name = 'coco'

	def parse_dataset(self):
	if self.use_gt_bbox:
	self.db = self._load_coco_keypoint_annotations()
	else:
	self.db = self._load_coco_person_detection_results()

	def _load_coco_keypoint_annotations(self):
	coco = COCO(self.get_anno())
	img_ids = coco.getImgIds()
	gt_db = []
	for index in img_ids:
	im_ann = coco.loadImgs(index)[0]
	width = im_ann['width']
	height = im_ann['height']
	file_name = im_ann['file_name']
	im_id = int(im_ann["id"])

	annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
	objs = coco.loadAnns(annIds)

	valid_objs = []
	for obj in objs:
	x, y, w, h = obj['bbox']
	x1 = np.max((0, x))
	y1 = np.max((0, y))
	x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
	y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
	if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
	obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
	valid_objs.append(obj)
	objs = valid_objs

	rec = []
	for obj in objs:
	if max(obj['keypoints']) == 0:
	continue

	joints = np.zeros(
	(self.ann_info['num_joints'], 3), dtype=np.float32)
	joints_vis = np.zeros(
	(self.ann_info['num_joints'], 3), dtype=np.float32)
	for ipt in range(self.ann_info['num_joints']):
	joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
	joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
	joints[ipt, 2] = 0
	t_vis = obj['keypoints'][ipt * 3 + 2]
	if t_vis > 1:
	t_vis = 1
	joints_vis[ipt, 0] = t_vis
	joints_vis[ipt, 1] = t_vis
	joints_vis[ipt, 2] = 0

	center, scale = self._box2cs(obj['clean_bbox'][:4])
	rec.append({
	'image_file': os.path.join(self.img_prefix, file_name),
	'center': center,
	'scale': scale,
	'gt_joints': joints,
	'joints_vis': joints_vis,
	'im_id': im_id,
	})
	gt_db.extend(rec)

	return gt_db

	def _box2cs(self, box):
	x, y, w, h = box[:4]
	center = np.zeros((2), dtype=np.float32)
	center[0] = x + w * 0.5
	center[1] = y + h * 0.5
	aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]

	if w > aspect_ratio * h:
	h = w * 1.0 / aspect_ratio
	elif w < aspect_ratio * h:
	w = h * aspect_ratio
	scale = np.array(
	[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
	dtype=np.float32)
	if center[0] != -1:
	scale = scale * 1.25

	return center, scale

	def _load_coco_person_detection_results(self):
	all_boxes = None
	bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
	with open(bbox_file_path, 'r') as f:
	all_boxes = json.load(f)

	if not all_boxes:
	print('=> Load %s fail!' % bbox_file_path)
	return None

	kpt_db = []
	for n_img in range(0, len(all_boxes)):
	det_res = all_boxes[n_img]
	if det_res['category_id'] != 1:
	continue
	file_name = det_res[
	'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
	'image_id']
	img_name = os.path.join(self.img_prefix, file_name)
	box = det_res['bbox']
	score = det_res['score']
	im_id = int(det_res['image_id'])

	if score < self.image_thre:
	continue

	center, scale = self._box2cs(box)
	joints = np.zeros(
	(self.ann_info['num_joints'], 3), dtype=np.float32)
	joints_vis = np.ones(
	(self.ann_info['num_joints'], 3), dtype=np.float32)
	kpt_db.append({
	'image_file': img_name,
	'im_id': im_id,
	'center': center,
	'scale': scale,
	'score': score,
	'gt_joints': joints,
	'joints_vis': joints_vis,
	})

	return kpt_db


	@register
	@serializable
	class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
	"""MPII dataset for topdown pose estimation.

	The dataset loads raw features and apply specified transforms
	to return a dict containing the image tensors and other information.

	MPII keypoint indexes::

	0: 'right_ankle',
	1: 'right_knee',
	2: 'right_hip',
	3: 'left_hip',
	4: 'left_knee',
	5: 'left_ankle',
	6: 'pelvis',
	7: 'thorax',
	8: 'upper_neck',
	9: 'head_top',
	10: 'right_wrist',
	11: 'right_elbow',
	12: 'right_shoulder',
	13: 'left_shoulder',
	14: 'left_elbow',
	15: 'left_wrist',

	Args:
	dataset_dir (str): Root path to the dataset.
	image_dir (str): Path to a directory where images are held.
	anno_path (str): Relative path to the annotation file.
	num_joints (int): Keypoint numbers
	trainsize (list):[w, h] Image target size
	transform (composed(operators)): A sequence of data transforms.
	"""

	def __init__(self,
	dataset_dir,
	image_dir,
	anno_path,
	num_joints,
	transform=[]):
	super().__init__(dataset_dir, image_dir, anno_path, num_joints,
	transform)

	self.dataset_name = 'mpii'

	def parse_dataset(self):
	with open(self.get_anno()) as anno_file:
	anno = json.load(anno_file)

	gt_db = []
	for a in anno:
	image_name = a['image']
	im_id = a['image_id'] if 'image_id' in a else int(
	os.path.splitext(image_name)[0])

	c = np.array(a['center'], dtype=np.float32)
	s = np.array([a['scale'], a['scale']], dtype=np.float32)

	# Adjust center/scale slightly to avoid cropping limbs
	if c[0] != -1:
	c[1] = c[1] + 15 * s[1]
	s = s * 1.25
	c = c - 1

	joints = np.zeros(
	(self.ann_info['num_joints'], 3), dtype=np.float32)
	joints_vis = np.zeros(
	(self.ann_info['num_joints'], 3), dtype=np.float32)
	if 'gt_joints' in a:
	joints_ = np.array(a['gt_joints'])
	joints_[:, 0:2] = joints_[:, 0:2] - 1
	joints_vis_ = np.array(a['joints_vis'])
	assert len(joints_) == self.ann_info[
	'num_joints'], 'joint num diff: {} vs {}'.format(
	len(joints_), self.ann_info['num_joints'])

	joints[:, 0:2] = joints_[:, 0:2]
	joints_vis[:, 0] = joints_vis_[:]
	joints_vis[:, 1] = joints_vis_[:]

	gt_db.append({
	'image_file': os.path.join(self.img_prefix, image_name),
	'im_id': im_id,
	'center': c,
	'scale': s,
	'gt_joints': joints,
	'joints_vis': joints_vis
	})
	print("number length: {}".format(len(gt_db)))
	self.db = gt_db