Spaces:
Build error
Build error
# Copyright (c) OpenMMLab. All rights reserved. | |
"""pytest tests/test_forward.py.""" | |
import copy | |
from os.path import dirname, exists, join | |
import numpy as np | |
import pytest | |
import torch | |
def _get_config_directory(): | |
"""Find the predefined detector config directory.""" | |
try: | |
# Assume we are running in the source mmdetection repo | |
repo_dpath = dirname(dirname(dirname(__file__))) | |
except NameError: | |
# For IPython development when this __file__ is not defined | |
import mmdet | |
repo_dpath = dirname(dirname(mmdet.__file__)) | |
config_dpath = join(repo_dpath, 'configs') | |
if not exists(config_dpath): | |
raise Exception('Cannot find config path') | |
return config_dpath | |
def _get_config_module(fname): | |
"""Load a configuration as a python module.""" | |
from mmcv import Config | |
config_dpath = _get_config_directory() | |
config_fpath = join(config_dpath, fname) | |
config_mod = Config.fromfile(config_fpath) | |
return config_mod | |
def _get_detector_cfg(fname): | |
"""Grab configs necessary to create a detector. | |
These are deep copied to allow for safe modification of parameters without | |
influencing other tests. | |
""" | |
config = _get_config_module(fname) | |
model = copy.deepcopy(config.model) | |
return model | |
def _replace_r50_with_r18(model): | |
"""Replace ResNet50 with ResNet18 in config.""" | |
model = copy.deepcopy(model) | |
if model.backbone.type == 'ResNet': | |
model.backbone.depth = 18 | |
model.backbone.base_channels = 2 | |
model.neck.in_channels = [2, 4, 8, 16] | |
return model | |
def test_sparse_rcnn_forward(): | |
config_path = 'sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py' | |
model = _get_detector_cfg(config_path) | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
detector.init_weights() | |
input_shape = (1, 3, 100, 100) | |
mm_inputs = _demo_mm_inputs(input_shape, num_items=[5]) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
# Test forward train with non-empty truth batch | |
detector.train() | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_bboxes = [item for item in gt_bboxes] | |
gt_labels = mm_inputs['gt_labels'] | |
gt_labels = [item for item in gt_labels] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
detector.forward_dummy(imgs) | |
# Test forward train with an empty truth batch | |
mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_bboxes = [item for item in gt_bboxes] | |
gt_labels = mm_inputs['gt_labels'] | |
gt_labels = [item for item in gt_labels] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
rescale=True, | |
return_loss=False) | |
batch_results.append(result) | |
# test empty proposal in roi_head | |
with torch.no_grad(): | |
# test no proposal in the whole batch | |
detector.roi_head.simple_test([imgs[0][None, :]], torch.empty( | |
(1, 0, 4)), torch.empty((1, 100, 4)), [img_metas[0]], | |
torch.ones((1, 4))) | |
def test_rpn_forward(): | |
model = _get_detector_cfg('rpn/rpn_r50_fpn_1x_coco.py') | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (1, 3, 100, 100) | |
mm_inputs = _demo_mm_inputs(input_shape) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
# Test forward train | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
losses = detector.forward( | |
imgs, img_metas, gt_bboxes=gt_bboxes, return_loss=True) | |
assert isinstance(losses, dict) | |
# Test forward test | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
return_loss=False) | |
batch_results.append(result) | |
def test_single_stage_forward_gpu(cfg_file): | |
if not torch.cuda.is_available(): | |
import pytest | |
pytest.skip('test requires GPU and torch+cuda') | |
model = _get_detector_cfg(cfg_file) | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (2, 3, 128, 128) | |
mm_inputs = _demo_mm_inputs(input_shape) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
detector = detector.cuda() | |
imgs = imgs.cuda() | |
# Test forward train | |
gt_bboxes = [b.cuda() for b in mm_inputs['gt_bboxes']] | |
gt_labels = [g.cuda() for g in mm_inputs['gt_labels']] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
return_loss=False) | |
batch_results.append(result) | |
def test_faster_rcnn_ohem_forward(): | |
model = _get_detector_cfg( | |
'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py') | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (1, 3, 100, 100) | |
# Test forward train with a non-empty truth batch | |
mm_inputs = _demo_mm_inputs(input_shape, num_items=[10]) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward train with an empty truth batch | |
mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test RoI forward train with an empty proposals | |
feature = detector.extract_feat(imgs[0][None, :]) | |
losses = detector.roi_head.forward_train( | |
feature, | |
img_metas, [torch.empty((0, 5))], | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels) | |
assert isinstance(losses, dict) | |
def test_two_stage_forward(cfg_file): | |
models_with_semantic = [ | |
'htc/htc_r50_fpn_1x_coco.py', | |
'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py', | |
'scnet/scnet_r50_fpn_20e_coco.py', | |
] | |
if cfg_file in models_with_semantic: | |
with_semantic = True | |
else: | |
with_semantic = False | |
model = _get_detector_cfg(cfg_file) | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
# Save cost | |
if cfg_file in [ | |
'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py' # noqa: E501 | |
]: | |
model.roi_head.bbox_head.num_classes = 80 | |
model.roi_head.bbox_head.loss_cls.num_classes = 80 | |
model.roi_head.mask_head.num_classes = 80 | |
model.test_cfg.rcnn.score_thr = 0.05 | |
model.test_cfg.rcnn.max_per_img = 100 | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (1, 3, 128, 128) | |
# Test forward train with a non-empty truth batch | |
mm_inputs = _demo_mm_inputs( | |
input_shape, num_items=[10], with_semantic=with_semantic) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
loss.requires_grad_(True) | |
assert float(loss.item()) > 0 | |
loss.backward() | |
# Test forward train with an empty truth batch | |
mm_inputs = _demo_mm_inputs( | |
input_shape, num_items=[0], with_semantic=with_semantic) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
loss.requires_grad_(True) | |
assert float(loss.item()) > 0 | |
loss.backward() | |
# Test RoI forward train with an empty proposals | |
if cfg_file in [ | |
'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py' # noqa: E501 | |
]: | |
mm_inputs.pop('gt_semantic_seg') | |
feature = detector.extract_feat(imgs[0][None, :]) | |
losses = detector.roi_head.forward_train(feature, img_metas, | |
[torch.empty( | |
(0, 5))], **mm_inputs) | |
assert isinstance(losses, dict) | |
# Test forward test | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
return_loss=False) | |
batch_results.append(result) | |
cascade_models = [ | |
'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py', | |
'htc/htc_r50_fpn_1x_coco.py', | |
'scnet/scnet_r50_fpn_20e_coco.py', | |
] | |
# test empty proposal in roi_head | |
with torch.no_grad(): | |
# test no proposal in the whole batch | |
detector.simple_test( | |
imgs[0][None, :], [img_metas[0]], proposals=[torch.empty((0, 4))]) | |
# test no proposal of aug | |
features = detector.extract_feats([imgs[0][None, :]] * 2) | |
detector.roi_head.aug_test(features, [torch.empty((0, 4))] * 2, | |
[[img_metas[0]]] * 2) | |
# test rcnn_test_cfg is None | |
if cfg_file not in cascade_models: | |
feature = detector.extract_feat(imgs[0][None, :]) | |
bboxes, scores = detector.roi_head.simple_test_bboxes( | |
feature, [img_metas[0]], [torch.empty((0, 4))], None) | |
assert all([bbox.shape == torch.Size((0, 4)) for bbox in bboxes]) | |
assert all([ | |
score.shape == torch.Size( | |
(0, detector.roi_head.bbox_head.fc_cls.out_features)) | |
for score in scores | |
]) | |
# test no proposal in the some image | |
x1y1 = torch.randint(1, 100, (10, 2)).float() | |
# x2y2 must be greater than x1y1 | |
x2y2 = x1y1 + torch.randint(1, 100, (10, 2)) | |
detector.simple_test( | |
imgs[0][None, :].repeat(2, 1, 1, 1), [img_metas[0]] * 2, | |
proposals=[torch.empty((0, 4)), | |
torch.cat([x1y1, x2y2], dim=-1)]) | |
# test no proposal of aug | |
detector.roi_head.aug_test( | |
features, [torch.cat([x1y1, x2y2], dim=-1), | |
torch.empty((0, 4))], [[img_metas[0]]] * 2) | |
# test rcnn_test_cfg is None | |
if cfg_file not in cascade_models: | |
feature = detector.extract_feat(imgs[0][None, :].repeat( | |
2, 1, 1, 1)) | |
bboxes, scores = detector.roi_head.simple_test_bboxes( | |
feature, [img_metas[0]] * 2, | |
[torch.empty((0, 4)), | |
torch.cat([x1y1, x2y2], dim=-1)], None) | |
assert bboxes[0].shape == torch.Size((0, 4)) | |
assert scores[0].shape == torch.Size( | |
(0, detector.roi_head.bbox_head.fc_cls.out_features)) | |
def test_single_stage_forward_cpu(cfg_file): | |
model = _get_detector_cfg(cfg_file) | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (1, 3, 300, 300) | |
mm_inputs = _demo_mm_inputs(input_shape) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
# Test forward train | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
return_loss=False) | |
batch_results.append(result) | |
def _demo_mm_inputs(input_shape=(1, 3, 300, 300), | |
num_items=None, num_classes=10, | |
with_semantic=False): # yapf: disable | |
"""Create a superset of inputs needed to run test or train batches. | |
Args: | |
input_shape (tuple): | |
input batch dimensions | |
num_items (None | List[int]): | |
specifies the number of boxes in each batch item | |
num_classes (int): | |
number of different labels a box might have | |
""" | |
from mmdet.core import BitmapMasks | |
(N, C, H, W) = input_shape | |
rng = np.random.RandomState(0) | |
imgs = rng.rand(*input_shape) | |
img_metas = [{ | |
'img_shape': (H, W, C), | |
'ori_shape': (H, W, C), | |
'pad_shape': (H, W, C), | |
'filename': '<demo>.png', | |
'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]), | |
'flip': False, | |
'flip_direction': None, | |
} for _ in range(N)] | |
gt_bboxes = [] | |
gt_labels = [] | |
gt_masks = [] | |
for batch_idx in range(N): | |
if num_items is None: | |
num_boxes = rng.randint(1, 10) | |
else: | |
num_boxes = num_items[batch_idx] | |
cx, cy, bw, bh = rng.rand(num_boxes, 4).T | |
tl_x = ((cx * W) - (W * bw / 2)).clip(0, W) | |
tl_y = ((cy * H) - (H * bh / 2)).clip(0, H) | |
br_x = ((cx * W) + (W * bw / 2)).clip(0, W) | |
br_y = ((cy * H) + (H * bh / 2)).clip(0, H) | |
boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T | |
class_idxs = rng.randint(1, num_classes, size=num_boxes) | |
gt_bboxes.append(torch.FloatTensor(boxes)) | |
gt_labels.append(torch.LongTensor(class_idxs)) | |
mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8) | |
gt_masks.append(BitmapMasks(mask, H, W)) | |
mm_inputs = { | |
'imgs': torch.FloatTensor(imgs).requires_grad_(True), | |
'img_metas': img_metas, | |
'gt_bboxes': gt_bboxes, | |
'gt_labels': gt_labels, | |
'gt_bboxes_ignore': None, | |
'gt_masks': gt_masks, | |
} | |
if with_semantic: | |
# assume gt_semantic_seg using scale 1/8 of the img | |
gt_semantic_seg = np.random.randint( | |
0, num_classes, (1, 1, H // 8, W // 8), dtype=np.uint8) | |
mm_inputs.update( | |
{'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)}) | |
return mm_inputs | |
def test_yolact_forward(): | |
model = _get_detector_cfg('yolact/yolact_r50_1x8_coco.py') | |
model = _replace_r50_with_r18(model) | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (1, 3, 100, 100) | |
mm_inputs = _demo_mm_inputs(input_shape) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
# Test forward train | |
detector.train() | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
gt_masks = mm_inputs['gt_masks'] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
gt_masks=gt_masks, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
# Test forward dummy for get_flops | |
detector.forward_dummy(imgs) | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
rescale=True, | |
return_loss=False) | |
batch_results.append(result) | |
def test_detr_forward(): | |
model = _get_detector_cfg('detr/detr_r50_8x2_150e_coco.py') | |
model.backbone.depth = 18 | |
model.bbox_head.in_channels = 512 | |
model.backbone.init_cfg = None | |
from mmdet.models import build_detector | |
detector = build_detector(model) | |
input_shape = (1, 3, 100, 100) | |
mm_inputs = _demo_mm_inputs(input_shape) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
# Test forward train with non-empty truth batch | |
detector.train() | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward train with an empty truth batch | |
mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
losses = detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in imgs] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
rescale=True, | |
return_loss=False) | |
batch_results.append(result) | |
def test_inference_detector(): | |
from mmcv import ConfigDict | |
from mmdet.apis import inference_detector | |
from mmdet.models import build_detector | |
# small RetinaNet | |
num_class = 3 | |
model_dict = dict( | |
type='RetinaNet', | |
backbone=dict( | |
type='ResNet', | |
depth=18, | |
num_stages=4, | |
out_indices=(3, ), | |
norm_cfg=dict(type='BN', requires_grad=False), | |
norm_eval=True, | |
style='pytorch'), | |
neck=None, | |
bbox_head=dict( | |
type='RetinaHead', | |
num_classes=num_class, | |
in_channels=512, | |
stacked_convs=1, | |
feat_channels=256, | |
anchor_generator=dict( | |
type='AnchorGenerator', | |
octave_base_scale=4, | |
scales_per_octave=3, | |
ratios=[0.5], | |
strides=[32]), | |
bbox_coder=dict( | |
type='DeltaXYWHBBoxCoder', | |
target_means=[.0, .0, .0, .0], | |
target_stds=[1.0, 1.0, 1.0, 1.0]), | |
), | |
test_cfg=dict( | |
nms_pre=1000, | |
min_bbox_size=0, | |
score_thr=0.05, | |
nms=dict(type='nms', iou_threshold=0.5), | |
max_per_img=100)) | |
rng = np.random.RandomState(0) | |
img1 = rng.rand(100, 100, 3) | |
img2 = rng.rand(100, 100, 3) | |
model = build_detector(ConfigDict(model_dict)) | |
config = _get_config_module('retinanet/retinanet_r50_fpn_1x_coco.py') | |
model.cfg = config | |
# test single image | |
result = inference_detector(model, img1) | |
assert len(result) == num_class | |
# test multiple image | |
result = inference_detector(model, [img1, img2]) | |
assert len(result) == 2 and len(result[0]) == num_class | |
def test_yolox_random_size(): | |
from mmdet.models import build_detector | |
model = _get_detector_cfg('yolox/yolox_tiny_8x8_300e_coco.py') | |
model.random_size_range = (2, 2) | |
model.input_size = (64, 96) | |
model.random_size_interval = 1 | |
detector = build_detector(model) | |
input_shape = (1, 3, 64, 64) | |
mm_inputs = _demo_mm_inputs(input_shape) | |
imgs = mm_inputs.pop('imgs') | |
img_metas = mm_inputs.pop('img_metas') | |
# Test forward train with non-empty truth batch | |
detector.train() | |
gt_bboxes = mm_inputs['gt_bboxes'] | |
gt_labels = mm_inputs['gt_labels'] | |
detector.forward( | |
imgs, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
return_loss=True) | |
assert detector._input_size == (64, 96) | |
def test_maskformer_forward(): | |
model_cfg = _get_detector_cfg( | |
'maskformer/maskformer_r50_mstrain_16x1_75e_coco.py') | |
base_channels = 32 | |
model_cfg.backbone.depth = 18 | |
model_cfg.backbone.init_cfg = None | |
model_cfg.backbone.base_channels = base_channels | |
model_cfg.panoptic_head.in_channels = [ | |
base_channels * 2**i for i in range(4) | |
] | |
model_cfg.panoptic_head.feat_channels = base_channels | |
model_cfg.panoptic_head.out_channels = base_channels | |
model_cfg.panoptic_head.pixel_decoder.encoder.\ | |
transformerlayers.attn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.pixel_decoder.encoder.\ | |
transformerlayers.ffn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.pixel_decoder.encoder.\ | |
transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 | |
model_cfg.panoptic_head.pixel_decoder.\ | |
positional_encoding.num_feats = base_channels // 2 | |
model_cfg.panoptic_head.positional_encoding.\ | |
num_feats = base_channels // 2 | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.attn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.ffn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.feedforward_channels = base_channels * 8 | |
from mmdet.core import BitmapMasks | |
from mmdet.models import build_detector | |
detector = build_detector(model_cfg) | |
# Test forward train with non-empty truth batch | |
detector.train() | |
img_metas = [ | |
{ | |
'batch_input_shape': (128, 160), | |
'img_shape': (126, 160, 3), | |
'ori_shape': (63, 80, 3), | |
'pad_shape': (128, 160, 3) | |
}, | |
] | |
img = torch.rand((1, 3, 128, 160)) | |
gt_bboxes = None | |
gt_labels = [ | |
torch.tensor([10]).long(), | |
] | |
thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32) | |
thing_mask1[0, :50] = 1 | |
gt_masks = [ | |
BitmapMasks(thing_mask1, 128, 160), | |
] | |
stuff_mask1 = torch.zeros((1, 128, 160)).long() | |
stuff_mask1[0, :50] = 10 | |
stuff_mask1[0, 50:] = 100 | |
gt_semantic_seg = [ | |
stuff_mask1, | |
] | |
losses = detector.forward( | |
img=img, | |
img_metas=img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
gt_masks=gt_masks, | |
gt_semantic_seg=gt_semantic_seg, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward train with an empty truth batch | |
gt_bboxes = [ | |
torch.empty((0, 4)).float(), | |
] | |
gt_labels = [ | |
torch.empty((0, )).long(), | |
] | |
mask = np.zeros((0, 128, 160), dtype=np.uint8) | |
gt_masks = [ | |
BitmapMasks(mask, 128, 160), | |
] | |
gt_semantic_seg = [ | |
torch.randint(0, 133, (0, 128, 160)), | |
] | |
losses = detector.forward( | |
img, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
gt_masks=gt_masks, | |
gt_semantic_seg=gt_semantic_seg, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in img] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
rescale=True, | |
return_loss=False) | |
batch_results.append(result) | |
def test_mask2former_forward(cfg_file): | |
# Test Panoptic Segmentation and Instance Segmentation | |
model_cfg = _get_detector_cfg(cfg_file) | |
base_channels = 32 | |
model_cfg.backbone.depth = 18 | |
model_cfg.backbone.init_cfg = None | |
model_cfg.backbone.base_channels = base_channels | |
model_cfg.panoptic_head.in_channels = [ | |
base_channels * 2**i for i in range(4) | |
] | |
model_cfg.panoptic_head.feat_channels = base_channels | |
model_cfg.panoptic_head.out_channels = base_channels | |
model_cfg.panoptic_head.pixel_decoder.encoder.\ | |
transformerlayers.attn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.pixel_decoder.encoder.\ | |
transformerlayers.ffn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.pixel_decoder.encoder.\ | |
transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 4 | |
model_cfg.panoptic_head.pixel_decoder.\ | |
positional_encoding.num_feats = base_channels // 2 | |
model_cfg.panoptic_head.positional_encoding.\ | |
num_feats = base_channels // 2 | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.attn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.ffn_cfgs.embed_dims = base_channels | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8 | |
model_cfg.panoptic_head.transformer_decoder.\ | |
transformerlayers.feedforward_channels = base_channels * 8 | |
num_stuff_classes = model_cfg.panoptic_head.num_stuff_classes | |
from mmdet.core import BitmapMasks | |
from mmdet.models import build_detector | |
detector = build_detector(model_cfg) | |
def _forward_train(): | |
losses = detector.forward( | |
img, | |
img_metas, | |
gt_bboxes=gt_bboxes, | |
gt_labels=gt_labels, | |
gt_masks=gt_masks, | |
gt_semantic_seg=gt_semantic_seg, | |
return_loss=True) | |
assert isinstance(losses, dict) | |
loss, _ = detector._parse_losses(losses) | |
assert float(loss.item()) > 0 | |
# Test forward train with non-empty truth batch | |
detector.train() | |
img_metas = [ | |
{ | |
'batch_input_shape': (128, 160), | |
'img_shape': (126, 160, 3), | |
'ori_shape': (63, 80, 3), | |
'pad_shape': (128, 160, 3) | |
}, | |
] | |
img = torch.rand((1, 3, 128, 160)) | |
gt_bboxes = None | |
gt_labels = [ | |
torch.tensor([10]).long(), | |
] | |
thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32) | |
thing_mask1[0, :50] = 1 | |
gt_masks = [ | |
BitmapMasks(thing_mask1, 128, 160), | |
] | |
stuff_mask1 = torch.zeros((1, 128, 160)).long() | |
stuff_mask1[0, :50] = 10 | |
stuff_mask1[0, 50:] = 100 | |
gt_semantic_seg = [ | |
stuff_mask1, | |
] | |
_forward_train() | |
# Test forward train with non-empty truth batch and gt_semantic_seg=None | |
gt_semantic_seg = None | |
_forward_train() | |
# Test forward train with an empty truth batch | |
gt_bboxes = [ | |
torch.empty((0, 4)).float(), | |
] | |
gt_labels = [ | |
torch.empty((0, )).long(), | |
] | |
mask = np.zeros((0, 128, 160), dtype=np.uint8) | |
gt_masks = [ | |
BitmapMasks(mask, 128, 160), | |
] | |
gt_semantic_seg = [ | |
torch.randint(0, 133, (0, 128, 160)), | |
] | |
_forward_train() | |
# Test forward train with an empty truth batch and gt_semantic_seg=None | |
gt_semantic_seg = None | |
_forward_train() | |
# Test forward test | |
detector.eval() | |
with torch.no_grad(): | |
img_list = [g[None, :] for g in img] | |
batch_results = [] | |
for one_img, one_meta in zip(img_list, img_metas): | |
result = detector.forward([one_img], [[one_meta]], | |
rescale=True, | |
return_loss=False) | |
if num_stuff_classes > 0: | |
assert isinstance(result[0], dict) | |
else: | |
assert isinstance(result[0], tuple) | |
batch_results.append(result) | |