show / mmpose-0.29.0 /tests /test_models /test_top_down_forward.py

thanks to show ❤

3bbb319 over 2 years ago

18.5 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import copy

	import numpy as np
	import pytest
	import torch

	from mmpose.models.detectors import PoseWarper, TopDown


	def test_vipnas_forward():
	# model settings

	channel_cfg = dict(
	num_output_channels=17,
	dataset_joints=17,
	dataset_channel=[
	[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
	],
	inference_channel=[
	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
	])

	model_cfg = dict(
	type='TopDown',
	pretrained=None,
	backbone=dict(type='ViPNAS_ResNet', depth=50),
	keypoint_head=dict(
	type='ViPNASHeatmapSimpleHead',
	in_channels=608,
	out_channels=channel_cfg['num_output_channels'],
	loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
	train_cfg=dict(),
	test_cfg=dict(
	flip_test=True,
	post_process='default',
	shift_heatmap=True,
	modulate_kernel=11))

	detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
	model_cfg['train_cfg'], model_cfg['test_cfg'],
	model_cfg['pretrained'])

	input_shape = (1, 3, 256, 256)
	mm_inputs = _demo_mm_inputs(input_shape)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)


	def test_topdown_forward():
	model_cfg = dict(
	type='TopDown',
	pretrained=None,
	backbone=dict(type='ResNet', depth=18),
	keypoint_head=dict(
	type='TopdownHeatmapSimpleHead',
	in_channels=512,
	out_channels=17,
	loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
	train_cfg=dict(),
	test_cfg=dict(
	flip_test=True,
	post_process='default',
	shift_heatmap=True,
	modulate_kernel=11))

	detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
	model_cfg['train_cfg'], model_cfg['test_cfg'],
	model_cfg['pretrained'])

	with pytest.raises(TypeError):
	detector.init_weights(pretrained=dict())
	detector.pretrained = model_cfg['pretrained']
	detector.init_weights()

	input_shape = (1, 3, 256, 256)
	mm_inputs = _demo_mm_inputs(input_shape)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

	# flip test
	model_cfg = dict(
	type='TopDown',
	pretrained=None,
	backbone=dict(
	type='HourglassNet',
	num_stacks=1,
	),
	keypoint_head=dict(
	type='TopdownHeatmapMultiStageHead',
	in_channels=256,
	out_channels=17,
	num_stages=1,
	num_deconv_layers=0,
	extra=dict(final_conv_kernel=1, ),
	loss_keypoint=dict(type='JointsMSELoss', use_target_weight=False)),
	train_cfg=dict(),
	test_cfg=dict(
	flip_test=True,
	post_process='default',
	shift_heatmap=True,
	modulate_kernel=11))

	detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
	model_cfg['train_cfg'], model_cfg['test_cfg'],
	model_cfg['pretrained'])

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

	model_cfg = dict(
	type='TopDown',
	pretrained=None,
	backbone=dict(
	type='HourglassNet',
	num_stacks=1,
	),
	keypoint_head=dict(
	type='TopdownHeatmapMultiStageHead',
	in_channels=256,
	out_channels=17,
	num_stages=1,
	num_deconv_layers=0,
	extra=dict(final_conv_kernel=1, ),
	loss_keypoint=[
	dict(
	type='JointsMSELoss',
	use_target_weight=True,
	loss_weight=1.)
	]),
	train_cfg=dict(),
	test_cfg=dict(
	flip_test=True,
	post_process='default',
	shift_heatmap=True,
	modulate_kernel=11))

	detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
	model_cfg['train_cfg'], model_cfg['test_cfg'],
	model_cfg['pretrained'])

	detector.init_weights()

	input_shape = (1, 3, 256, 256)
	mm_inputs = _demo_mm_inputs(input_shape, num_outputs=None)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)
	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

	model_cfg = dict(
	type='TopDown',
	pretrained=None,
	backbone=dict(
	type='RSN',
	unit_channels=256,
	num_stages=1,
	num_units=4,
	num_blocks=[2, 2, 2, 2],
	num_steps=4,
	norm_cfg=dict(type='BN')),
	keypoint_head=dict(
	type='TopdownHeatmapMSMUHead',
	out_shape=(64, 48),
	unit_channels=256,
	out_channels=17,
	num_stages=1,
	num_units=4,
	use_prm=False,
	norm_cfg=dict(type='BN'),
	loss_keypoint=[dict(type='JointsMSELoss', use_target_weight=True)]
	* 3 + [dict(type='JointsOHKMMSELoss', use_target_weight=True)]),
	train_cfg=dict(num_units=4),
	test_cfg=dict(
	flip_test=True,
	post_process='default',
	shift_heatmap=False,
	unbiased_decoding=False,
	modulate_kernel=5))

	detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],
	model_cfg['train_cfg'], model_cfg['test_cfg'],
	model_cfg['pretrained'])

	detector.init_weights()

	input_shape = (1, 3, 256, 192)
	mm_inputs = _demo_mm_inputs(input_shape, num_outputs=4)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)
	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
	_ = detector.forward_dummy(imgs)

	model_cfg = dict(
	type='TopDown',
	pretrained=None,
	backbone=dict(type='ResNet', depth=18),
	neck=dict(type='GlobalAveragePooling'),
	keypoint_head=dict(
	type='DeepposeRegressionHead',
	in_channels=512,
	num_joints=17,
	loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)),
	train_cfg=dict(),
	test_cfg=dict(
	flip_test=True,
	regression_flip_shift=True,
	))

	detector = TopDown(model_cfg['backbone'], model_cfg['neck'],
	model_cfg['keypoint_head'], model_cfg['train_cfg'],
	model_cfg['test_cfg'], model_cfg['pretrained'])

	with pytest.raises(TypeError):
	detector.init_weights(pretrained=dict())
	detector.pretrained = model_cfg['pretrained']
	detector.init_weights()

	input_shape = (1, 3, 256, 256)
	mm_inputs = _demo_mm_inputs(input_shape)

	imgs = mm_inputs.pop('imgs')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)


	def test_posewarper_forward():
	# test PoseWarper
	model_cfg = dict(
	type='PoseWarper',
	pretrained=None,
	backbone=dict(
	type='HRNet',
	in_channels=3,
	extra=dict(
	stage1=dict(
	num_modules=1,
	num_branches=1,
	block='BOTTLENECK',
	num_blocks=(4, ),
	num_channels=(64, )),
	stage2=dict(
	num_modules=1,
	num_branches=2,
	block='BASIC',
	num_blocks=(4, 4),
	num_channels=(48, 96)),
	stage3=dict(
	num_modules=4,
	num_branches=3,
	block='BASIC',
	num_blocks=(4, 4, 4),
	num_channels=(48, 96, 192)),
	stage4=dict(
	num_modules=3,
	num_branches=4,
	block='BASIC',
	num_blocks=(4, 4, 4, 4),
	num_channels=(48, 96, 192, 384))),
	frozen_stages=4,
	),
	concat_tensors=True,
	neck=dict(
	type='PoseWarperNeck',
	in_channels=48,
	freeze_trans_layer=True,
	out_channels=17,
	inner_channels=128,
	deform_groups=17,
	dilations=(3, 6, 12, 18, 24),
	trans_conv_kernel=1,
	res_blocks_cfg=dict(block='BASIC', num_blocks=20),
	offsets_kernel=3,
	deform_conv_kernel=3),
	keypoint_head=dict(
	type='TopdownHeatmapSimpleHead',
	in_channels=17,
	out_channels=17,
	num_deconv_layers=0,
	extra=dict(final_conv_kernel=0, ),
	loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
	train_cfg=dict(),
	test_cfg=dict(
	flip_test=False,
	post_process='default',
	shift_heatmap=True,
	modulate_kernel=11))

	detector = PoseWarper(model_cfg['backbone'], model_cfg['neck'],
	model_cfg['keypoint_head'], model_cfg['train_cfg'],
	model_cfg['test_cfg'], model_cfg['pretrained'], None,
	model_cfg['concat_tensors'])
	assert detector.concat_tensors

	detector.init_weights()

	input_shape = (2, 3, 64, 64)
	num_frames = 2
	mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
	_ = detector.forward_dummy(imgs)

	# test argument 'concat_tensors'
	model_cfg_copy = copy.deepcopy(model_cfg)
	model_cfg_copy['concat_tensors'] = False

	detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
	model_cfg_copy['keypoint_head'],
	model_cfg_copy['train_cfg'],
	model_cfg_copy['test_cfg'],
	model_cfg_copy['pretrained'], None,
	model_cfg_copy['concat_tensors'])
	assert not detector.concat_tensors

	detector.init_weights()

	input_shape = (2, 3, 64, 64)
	num_frames = 2
	mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
	_ = detector.forward_dummy(imgs)

	# flip test
	model_cfg_copy = copy.deepcopy(model_cfg)
	model_cfg_copy['test_cfg']['flip_test'] = True

	detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
	model_cfg_copy['keypoint_head'],
	model_cfg_copy['train_cfg'],
	model_cfg_copy['test_cfg'],
	model_cfg_copy['pretrained'], None,
	model_cfg_copy['concat_tensors'])

	detector.init_weights()

	input_shape = (1, 3, 64, 64)
	num_frames = 2
	mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
	_ = detector.forward_dummy(imgs)

	# test different number of dilations
	model_cfg_copy = copy.deepcopy(model_cfg)
	model_cfg_copy['neck']['dilations'] = (3, 6, 12)

	detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
	model_cfg_copy['keypoint_head'],
	model_cfg_copy['train_cfg'],
	model_cfg_copy['test_cfg'],
	model_cfg_copy['pretrained'], None,
	model_cfg_copy['concat_tensors'])

	detector.init_weights()

	input_shape = (2, 3, 64, 64)
	num_frames = 2
	mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
	_ = detector.forward_dummy(imgs)

	# test different backbone, change head accordingly
	model_cfg_copy = copy.deepcopy(model_cfg)
	model_cfg_copy['backbone'] = dict(type='ResNet', depth=18)
	model_cfg_copy['neck']['in_channels'] = 512
	model_cfg_copy['keypoint_head'] = dict(
	type='TopdownHeatmapSimpleHead',
	in_channels=17,
	out_channels=17,
	loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))

	detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],
	model_cfg_copy['keypoint_head'],
	model_cfg_copy['train_cfg'],
	model_cfg_copy['test_cfg'],
	model_cfg_copy['pretrained'], None,
	model_cfg_copy['concat_tensors'])

	detector.init_weights()

	input_shape = (1, 3, 64, 64)
	num_frames = 2
	mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)

	imgs = mm_inputs.pop('imgs')
	target = mm_inputs.pop('target')
	target_weight = mm_inputs.pop('target_weight')
	img_metas = mm_inputs.pop('img_metas')

	# Test forward train
	losses = detector.forward(
	imgs, target, target_weight, img_metas, return_loss=True)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	_ = detector.forward(imgs, img_metas=img_metas, return_loss=False)
	_ = detector.forward_dummy(imgs)


	def _demo_mm_inputs(
	input_shape=(1, 3, 256, 256), num_outputs=None, num_frames=1):
	"""Create a superset of inputs needed to run test or train batches.

	Args:
	input_shape (tuple):
	input batch dimensions
	num_frames (int):
	number of frames for each sample, default: 1,
	if larger than 1, return a list of tensors
	"""
	(N, C, H, W) = input_shape

	rng = np.random.RandomState(0)

	imgs = rng.rand(*input_shape)
	if num_outputs is not None:
	target = np.zeros([N, num_outputs, 17, H // 4, W // 4],
	dtype=np.float32)
	target_weight = np.ones([N, num_outputs, 17, 1], dtype=np.float32)
	else:
	target = np.zeros([N, 17, H // 4, W // 4], dtype=np.float32)
	target_weight = np.ones([N, 17, 1], dtype=np.float32)

	img_metas = [{
	'img_shape': (H, W, C),
	'center': np.array([W / 2, H / 2]),
	'scale': np.array([0.5, 0.5]),
	'bbox_score': 1.0,
	'bbox_id': 0,
	'flip_pairs': [],
	'inference_channel': np.arange(17),
	'image_file': '<demo>.png',
	'frame_weight': np.random.uniform(0, 1, num_frames),
	} for _ in range(N)]

	mm_inputs = {
	'target': torch.FloatTensor(target),
	'target_weight': torch.FloatTensor(target_weight),
	'img_metas': img_metas
	}

	if num_frames == 1:
	imgs = torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True)
	else:

	imgs = [
	torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True)
	for _ in range(num_frames)
	]

	mm_inputs['imgs'] = imgs
	return mm_inputs