|
|
|
import copy |
|
|
|
import numpy as np |
|
import pytest |
|
import torch |
|
|
|
from mmpose.models.detectors import PoseWarper, TopDown |
|
|
|
|
|
def test_vipnas_forward(): |
|
|
|
|
|
channel_cfg = dict( |
|
num_output_channels=17, |
|
dataset_joints=17, |
|
dataset_channel=[ |
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], |
|
], |
|
inference_channel=[ |
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 |
|
]) |
|
|
|
model_cfg = dict( |
|
type='TopDown', |
|
pretrained=None, |
|
backbone=dict(type='ViPNAS_ResNet', depth=50), |
|
keypoint_head=dict( |
|
type='ViPNASHeatmapSimpleHead', |
|
in_channels=608, |
|
out_channels=channel_cfg['num_output_channels'], |
|
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), |
|
train_cfg=dict(), |
|
test_cfg=dict( |
|
flip_test=True, |
|
post_process='default', |
|
shift_heatmap=True, |
|
modulate_kernel=11)) |
|
|
|
detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'], |
|
model_cfg['train_cfg'], model_cfg['test_cfg'], |
|
model_cfg['pretrained']) |
|
|
|
input_shape = (1, 3, 256, 256) |
|
mm_inputs = _demo_mm_inputs(input_shape) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
|
|
|
|
def test_topdown_forward(): |
|
model_cfg = dict( |
|
type='TopDown', |
|
pretrained=None, |
|
backbone=dict(type='ResNet', depth=18), |
|
keypoint_head=dict( |
|
type='TopdownHeatmapSimpleHead', |
|
in_channels=512, |
|
out_channels=17, |
|
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), |
|
train_cfg=dict(), |
|
test_cfg=dict( |
|
flip_test=True, |
|
post_process='default', |
|
shift_heatmap=True, |
|
modulate_kernel=11)) |
|
|
|
detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'], |
|
model_cfg['train_cfg'], model_cfg['test_cfg'], |
|
model_cfg['pretrained']) |
|
|
|
with pytest.raises(TypeError): |
|
detector.init_weights(pretrained=dict()) |
|
detector.pretrained = model_cfg['pretrained'] |
|
detector.init_weights() |
|
|
|
input_shape = (1, 3, 256, 256) |
|
mm_inputs = _demo_mm_inputs(input_shape) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
|
|
|
|
model_cfg = dict( |
|
type='TopDown', |
|
pretrained=None, |
|
backbone=dict( |
|
type='HourglassNet', |
|
num_stacks=1, |
|
), |
|
keypoint_head=dict( |
|
type='TopdownHeatmapMultiStageHead', |
|
in_channels=256, |
|
out_channels=17, |
|
num_stages=1, |
|
num_deconv_layers=0, |
|
extra=dict(final_conv_kernel=1, ), |
|
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=False)), |
|
train_cfg=dict(), |
|
test_cfg=dict( |
|
flip_test=True, |
|
post_process='default', |
|
shift_heatmap=True, |
|
modulate_kernel=11)) |
|
|
|
detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'], |
|
model_cfg['train_cfg'], model_cfg['test_cfg'], |
|
model_cfg['pretrained']) |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
|
|
model_cfg = dict( |
|
type='TopDown', |
|
pretrained=None, |
|
backbone=dict( |
|
type='HourglassNet', |
|
num_stacks=1, |
|
), |
|
keypoint_head=dict( |
|
type='TopdownHeatmapMultiStageHead', |
|
in_channels=256, |
|
out_channels=17, |
|
num_stages=1, |
|
num_deconv_layers=0, |
|
extra=dict(final_conv_kernel=1, ), |
|
loss_keypoint=[ |
|
dict( |
|
type='JointsMSELoss', |
|
use_target_weight=True, |
|
loss_weight=1.) |
|
]), |
|
train_cfg=dict(), |
|
test_cfg=dict( |
|
flip_test=True, |
|
post_process='default', |
|
shift_heatmap=True, |
|
modulate_kernel=11)) |
|
|
|
detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'], |
|
model_cfg['train_cfg'], model_cfg['test_cfg'], |
|
model_cfg['pretrained']) |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (1, 3, 256, 256) |
|
mm_inputs = _demo_mm_inputs(input_shape, num_outputs=None) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
|
|
model_cfg = dict( |
|
type='TopDown', |
|
pretrained=None, |
|
backbone=dict( |
|
type='RSN', |
|
unit_channels=256, |
|
num_stages=1, |
|
num_units=4, |
|
num_blocks=[2, 2, 2, 2], |
|
num_steps=4, |
|
norm_cfg=dict(type='BN')), |
|
keypoint_head=dict( |
|
type='TopdownHeatmapMSMUHead', |
|
out_shape=(64, 48), |
|
unit_channels=256, |
|
out_channels=17, |
|
num_stages=1, |
|
num_units=4, |
|
use_prm=False, |
|
norm_cfg=dict(type='BN'), |
|
loss_keypoint=[dict(type='JointsMSELoss', use_target_weight=True)] |
|
* 3 + [dict(type='JointsOHKMMSELoss', use_target_weight=True)]), |
|
train_cfg=dict(num_units=4), |
|
test_cfg=dict( |
|
flip_test=True, |
|
post_process='default', |
|
shift_heatmap=False, |
|
unbiased_decoding=False, |
|
modulate_kernel=5)) |
|
|
|
detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'], |
|
model_cfg['train_cfg'], model_cfg['test_cfg'], |
|
model_cfg['pretrained']) |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (1, 3, 256, 192) |
|
mm_inputs = _demo_mm_inputs(input_shape, num_outputs=4) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
_ = detector.forward_dummy(imgs) |
|
|
|
model_cfg = dict( |
|
type='TopDown', |
|
pretrained=None, |
|
backbone=dict(type='ResNet', depth=18), |
|
neck=dict(type='GlobalAveragePooling'), |
|
keypoint_head=dict( |
|
type='DeepposeRegressionHead', |
|
in_channels=512, |
|
num_joints=17, |
|
loss_keypoint=dict(type='SmoothL1Loss', use_target_weight=True)), |
|
train_cfg=dict(), |
|
test_cfg=dict( |
|
flip_test=True, |
|
regression_flip_shift=True, |
|
)) |
|
|
|
detector = TopDown(model_cfg['backbone'], model_cfg['neck'], |
|
model_cfg['keypoint_head'], model_cfg['train_cfg'], |
|
model_cfg['test_cfg'], model_cfg['pretrained']) |
|
|
|
with pytest.raises(TypeError): |
|
detector.init_weights(pretrained=dict()) |
|
detector.pretrained = model_cfg['pretrained'] |
|
detector.init_weights() |
|
|
|
input_shape = (1, 3, 256, 256) |
|
mm_inputs = _demo_mm_inputs(input_shape) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
|
|
|
|
def test_posewarper_forward(): |
|
|
|
model_cfg = dict( |
|
type='PoseWarper', |
|
pretrained=None, |
|
backbone=dict( |
|
type='HRNet', |
|
in_channels=3, |
|
extra=dict( |
|
stage1=dict( |
|
num_modules=1, |
|
num_branches=1, |
|
block='BOTTLENECK', |
|
num_blocks=(4, ), |
|
num_channels=(64, )), |
|
stage2=dict( |
|
num_modules=1, |
|
num_branches=2, |
|
block='BASIC', |
|
num_blocks=(4, 4), |
|
num_channels=(48, 96)), |
|
stage3=dict( |
|
num_modules=4, |
|
num_branches=3, |
|
block='BASIC', |
|
num_blocks=(4, 4, 4), |
|
num_channels=(48, 96, 192)), |
|
stage4=dict( |
|
num_modules=3, |
|
num_branches=4, |
|
block='BASIC', |
|
num_blocks=(4, 4, 4, 4), |
|
num_channels=(48, 96, 192, 384))), |
|
frozen_stages=4, |
|
), |
|
concat_tensors=True, |
|
neck=dict( |
|
type='PoseWarperNeck', |
|
in_channels=48, |
|
freeze_trans_layer=True, |
|
out_channels=17, |
|
inner_channels=128, |
|
deform_groups=17, |
|
dilations=(3, 6, 12, 18, 24), |
|
trans_conv_kernel=1, |
|
res_blocks_cfg=dict(block='BASIC', num_blocks=20), |
|
offsets_kernel=3, |
|
deform_conv_kernel=3), |
|
keypoint_head=dict( |
|
type='TopdownHeatmapSimpleHead', |
|
in_channels=17, |
|
out_channels=17, |
|
num_deconv_layers=0, |
|
extra=dict(final_conv_kernel=0, ), |
|
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), |
|
train_cfg=dict(), |
|
test_cfg=dict( |
|
flip_test=False, |
|
post_process='default', |
|
shift_heatmap=True, |
|
modulate_kernel=11)) |
|
|
|
detector = PoseWarper(model_cfg['backbone'], model_cfg['neck'], |
|
model_cfg['keypoint_head'], model_cfg['train_cfg'], |
|
model_cfg['test_cfg'], model_cfg['pretrained'], None, |
|
model_cfg['concat_tensors']) |
|
assert detector.concat_tensors |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (2, 3, 64, 64) |
|
num_frames = 2 |
|
mm_inputs = _demo_mm_inputs(input_shape, None, num_frames) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
_ = detector.forward_dummy(imgs) |
|
|
|
|
|
model_cfg_copy = copy.deepcopy(model_cfg) |
|
model_cfg_copy['concat_tensors'] = False |
|
|
|
detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'], |
|
model_cfg_copy['keypoint_head'], |
|
model_cfg_copy['train_cfg'], |
|
model_cfg_copy['test_cfg'], |
|
model_cfg_copy['pretrained'], None, |
|
model_cfg_copy['concat_tensors']) |
|
assert not detector.concat_tensors |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (2, 3, 64, 64) |
|
num_frames = 2 |
|
mm_inputs = _demo_mm_inputs(input_shape, None, num_frames) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
_ = detector.forward_dummy(imgs) |
|
|
|
|
|
model_cfg_copy = copy.deepcopy(model_cfg) |
|
model_cfg_copy['test_cfg']['flip_test'] = True |
|
|
|
detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'], |
|
model_cfg_copy['keypoint_head'], |
|
model_cfg_copy['train_cfg'], |
|
model_cfg_copy['test_cfg'], |
|
model_cfg_copy['pretrained'], None, |
|
model_cfg_copy['concat_tensors']) |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (1, 3, 64, 64) |
|
num_frames = 2 |
|
mm_inputs = _demo_mm_inputs(input_shape, None, num_frames) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
_ = detector.forward_dummy(imgs) |
|
|
|
|
|
model_cfg_copy = copy.deepcopy(model_cfg) |
|
model_cfg_copy['neck']['dilations'] = (3, 6, 12) |
|
|
|
detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'], |
|
model_cfg_copy['keypoint_head'], |
|
model_cfg_copy['train_cfg'], |
|
model_cfg_copy['test_cfg'], |
|
model_cfg_copy['pretrained'], None, |
|
model_cfg_copy['concat_tensors']) |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (2, 3, 64, 64) |
|
num_frames = 2 |
|
mm_inputs = _demo_mm_inputs(input_shape, None, num_frames) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
_ = detector.forward_dummy(imgs) |
|
|
|
|
|
model_cfg_copy = copy.deepcopy(model_cfg) |
|
model_cfg_copy['backbone'] = dict(type='ResNet', depth=18) |
|
model_cfg_copy['neck']['in_channels'] = 512 |
|
model_cfg_copy['keypoint_head'] = dict( |
|
type='TopdownHeatmapSimpleHead', |
|
in_channels=17, |
|
out_channels=17, |
|
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)) |
|
|
|
detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'], |
|
model_cfg_copy['keypoint_head'], |
|
model_cfg_copy['train_cfg'], |
|
model_cfg_copy['test_cfg'], |
|
model_cfg_copy['pretrained'], None, |
|
model_cfg_copy['concat_tensors']) |
|
|
|
detector.init_weights() |
|
|
|
input_shape = (1, 3, 64, 64) |
|
num_frames = 2 |
|
mm_inputs = _demo_mm_inputs(input_shape, None, num_frames) |
|
|
|
imgs = mm_inputs.pop('imgs') |
|
target = mm_inputs.pop('target') |
|
target_weight = mm_inputs.pop('target_weight') |
|
img_metas = mm_inputs.pop('img_metas') |
|
|
|
|
|
losses = detector.forward( |
|
imgs, target, target_weight, img_metas, return_loss=True) |
|
assert isinstance(losses, dict) |
|
|
|
|
|
with torch.no_grad(): |
|
_ = detector.forward(imgs, img_metas=img_metas, return_loss=False) |
|
_ = detector.forward_dummy(imgs) |
|
|
|
|
|
def _demo_mm_inputs( |
|
input_shape=(1, 3, 256, 256), num_outputs=None, num_frames=1): |
|
"""Create a superset of inputs needed to run test or train batches. |
|
|
|
Args: |
|
input_shape (tuple): |
|
input batch dimensions |
|
num_frames (int): |
|
number of frames for each sample, default: 1, |
|
if larger than 1, return a list of tensors |
|
""" |
|
(N, C, H, W) = input_shape |
|
|
|
rng = np.random.RandomState(0) |
|
|
|
imgs = rng.rand(*input_shape) |
|
if num_outputs is not None: |
|
target = np.zeros([N, num_outputs, 17, H // 4, W // 4], |
|
dtype=np.float32) |
|
target_weight = np.ones([N, num_outputs, 17, 1], dtype=np.float32) |
|
else: |
|
target = np.zeros([N, 17, H // 4, W // 4], dtype=np.float32) |
|
target_weight = np.ones([N, 17, 1], dtype=np.float32) |
|
|
|
img_metas = [{ |
|
'img_shape': (H, W, C), |
|
'center': np.array([W / 2, H / 2]), |
|
'scale': np.array([0.5, 0.5]), |
|
'bbox_score': 1.0, |
|
'bbox_id': 0, |
|
'flip_pairs': [], |
|
'inference_channel': np.arange(17), |
|
'image_file': '<demo>.png', |
|
'frame_weight': np.random.uniform(0, 1, num_frames), |
|
} for _ in range(N)] |
|
|
|
mm_inputs = { |
|
'target': torch.FloatTensor(target), |
|
'target_weight': torch.FloatTensor(target_weight), |
|
'img_metas': img_metas |
|
} |
|
|
|
if num_frames == 1: |
|
imgs = torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True) |
|
else: |
|
|
|
imgs = [ |
|
torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True) |
|
for _ in range(num_frames) |
|
] |
|
|
|
mm_inputs['imgs'] = imgs |
|
return mm_inputs |
|
|