File size: 4,598 Bytes
6c9ac8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase

import numpy as np
import torch
from mmengine.config import ConfigDict
from mmengine.structures import InstanceData

from mmdet import *  # noqa
from mmdet.models.dense_heads import SOLOV2Head
from mmdet.structures.mask import BitmapMasks


def _rand_masks(num_items, bboxes, img_w, img_h):
    rng = np.random.RandomState(0)
    masks = np.zeros((num_items, img_h, img_w))
    for i, bbox in enumerate(bboxes):
        bbox = bbox.astype(np.int32)
        mask = (rng.rand(1, bbox[3] - bbox[1], bbox[2] - bbox[0]) >
                0.3).astype(np.int64)
        masks[i:i + 1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = mask
    return BitmapMasks(masks, height=img_h, width=img_w)


def _fake_mask_feature_head():
    mask_feature_head = ConfigDict(
        feat_channels=128,
        start_level=0,
        end_level=3,
        out_channels=256,
        mask_stride=4,
        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))
    return mask_feature_head


class TestSOLOv2Head(TestCase):

    def test_solov2_head_loss(self):
        """Tests mask head loss when truth is empty and non-empty."""
        s = 256
        img_metas = [{
            'img_shape': (s, s, 3),
            'ori_shape': (s, s, 3),
            'scale_factor': 1,
            'batch_input_shape': (s, s, 3)
        }]

        mask_feature_head = _fake_mask_feature_head()

        mask_head = SOLOV2Head(
            num_classes=4, in_channels=1, mask_feature_head=mask_feature_head)

        # SOLO head expects a multiple levels of features per image
        feats = []
        for i in range(len(mask_head.strides)):
            feats.append(
                torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2))))
        feats = tuple(feats)

        mask_outs = mask_head.forward(feats)

        # Test that empty ground truth encourages the network to
        # predict background
        gt_instances = InstanceData()
        gt_instances.bboxes = torch.empty(0, 4)
        gt_instances.labels = torch.LongTensor([])
        gt_instances.masks = _rand_masks(0, gt_instances.bboxes.numpy(), s, s)

        empty_gt_losses = mask_head.loss_by_feat(
            *mask_outs,
            batch_gt_instances=[gt_instances],
            batch_img_metas=img_metas)
        # When there is no truth, the cls loss should be nonzero but
        # there should be no box loss.
        empty_cls_loss = empty_gt_losses['loss_cls']
        empty_mask_loss = empty_gt_losses['loss_mask']
        self.assertGreater(empty_cls_loss.item(), 0,
                           'cls loss should be non-zero')
        self.assertEqual(
            empty_mask_loss.item(), 0,
            'there should be no mask loss when there are no true mask')

        # When truth is non-empty then both cls and box loss
        # should be nonzero for random inputs
        gt_instances = InstanceData()
        gt_instances.bboxes = torch.Tensor(
            [[23.6667, 23.8757, 238.6326, 151.8874]])
        gt_instances.labels = torch.LongTensor([2])
        gt_instances.masks = _rand_masks(1, gt_instances.bboxes.numpy(), s, s)

        one_gt_losses = mask_head.loss_by_feat(
            *mask_outs,
            batch_gt_instances=[gt_instances],
            batch_img_metas=img_metas)
        onegt_cls_loss = one_gt_losses['loss_cls']
        onegt_mask_loss = one_gt_losses['loss_mask']
        self.assertGreater(onegt_cls_loss.item(), 0,
                           'cls loss should be non-zero')
        self.assertGreater(onegt_mask_loss.item(), 0,
                           'mask loss should be non-zero')

    def test_solov2_head_empty_result(self):
        s = 256
        img_metas = {
            'img_shape': (s, s, 3),
            'ori_shape': (s, s, 3),
            'scale_factor': 1,
            'batch_input_shape': (s, s, 3)
        }

        mask_feature_head = _fake_mask_feature_head()
        mask_head = SOLOV2Head(
            num_classes=4, in_channels=1, mask_feature_head=mask_feature_head)

        kernel_preds = torch.empty(0, 128)
        cls_scores = torch.empty(0, 80)
        mask_feats = torch.empty(0, 16, 16)
        test_cfg = ConfigDict(
            score_thr=0.1,
            mask_thr=0.5,
        )
        results = mask_head._predict_by_feat_single(
            kernel_preds=kernel_preds,
            cls_scores=cls_scores,
            mask_feats=mask_feats,
            img_meta=img_metas,
            cfg=test_cfg)

        self.assertIsInstance(results, InstanceData)
        self.assertEqual(len(results), 0)