|
import torch |
|
import torch.nn as nn |
|
from mmcv.cnn import ConvModule |
|
|
|
from ..builder import HEADS |
|
from ..utils import SelfAttentionBlock as _SelfAttentionBlock |
|
from .decode_head import BaseDecodeHead |
|
|
|
|
|
class PPMConcat(nn.ModuleList): |
|
"""Pyramid Pooling Module that only concat the features of each layer. |
|
|
|
Args: |
|
pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid |
|
Module. |
|
""" |
|
|
|
def __init__(self, pool_scales=(1, 3, 6, 8)): |
|
super(PPMConcat, self).__init__( |
|
[nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) |
|
|
|
def forward(self, feats): |
|
"""Forward function.""" |
|
ppm_outs = [] |
|
for ppm in self: |
|
ppm_out = ppm(feats) |
|
ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) |
|
concat_outs = torch.cat(ppm_outs, dim=2) |
|
return concat_outs |
|
|
|
|
|
class SelfAttentionBlock(_SelfAttentionBlock): |
|
"""Make a ANN used SelfAttentionBlock. |
|
|
|
Args: |
|
low_in_channels (int): Input channels of lower level feature, |
|
which is the key feature for self-attention. |
|
high_in_channels (int): Input channels of higher level feature, |
|
which is the query feature for self-attention. |
|
channels (int): Output channels of key/query transform. |
|
out_channels (int): Output channels. |
|
share_key_query (bool): Whether share projection weight between key |
|
and query projection. |
|
query_scale (int): The scale of query feature map. |
|
key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid |
|
Module of key feature. |
|
conv_cfg (dict|None): Config of conv layers. |
|
norm_cfg (dict|None): Config of norm layers. |
|
act_cfg (dict|None): Config of activation layers. |
|
""" |
|
|
|
def __init__(self, low_in_channels, high_in_channels, channels, |
|
out_channels, share_key_query, query_scale, key_pool_scales, |
|
conv_cfg, norm_cfg, act_cfg): |
|
key_psp = PPMConcat(key_pool_scales) |
|
if query_scale > 1: |
|
query_downsample = nn.MaxPool2d(kernel_size=query_scale) |
|
else: |
|
query_downsample = None |
|
super(SelfAttentionBlock, self).__init__( |
|
key_in_channels=low_in_channels, |
|
query_in_channels=high_in_channels, |
|
channels=channels, |
|
out_channels=out_channels, |
|
share_key_query=share_key_query, |
|
query_downsample=query_downsample, |
|
key_downsample=key_psp, |
|
key_query_num_convs=1, |
|
key_query_norm=True, |
|
value_out_num_convs=1, |
|
value_out_norm=False, |
|
matmul_norm=True, |
|
with_out=True, |
|
conv_cfg=conv_cfg, |
|
norm_cfg=norm_cfg, |
|
act_cfg=act_cfg) |
|
|
|
|
|
class AFNB(nn.Module): |
|
"""Asymmetric Fusion Non-local Block(AFNB) |
|
|
|
Args: |
|
low_in_channels (int): Input channels of lower level feature, |
|
which is the key feature for self-attention. |
|
high_in_channels (int): Input channels of higher level feature, |
|
which is the query feature for self-attention. |
|
channels (int): Output channels of key/query transform. |
|
out_channels (int): Output channels. |
|
and query projection. |
|
query_scales (tuple[int]): The scales of query feature map. |
|
Default: (1,) |
|
key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid |
|
Module of key feature. |
|
conv_cfg (dict|None): Config of conv layers. |
|
norm_cfg (dict|None): Config of norm layers. |
|
act_cfg (dict|None): Config of activation layers. |
|
""" |
|
|
|
def __init__(self, low_in_channels, high_in_channels, channels, |
|
out_channels, query_scales, key_pool_scales, conv_cfg, |
|
norm_cfg, act_cfg): |
|
super(AFNB, self).__init__() |
|
self.stages = nn.ModuleList() |
|
for query_scale in query_scales: |
|
self.stages.append( |
|
SelfAttentionBlock( |
|
low_in_channels=low_in_channels, |
|
high_in_channels=high_in_channels, |
|
channels=channels, |
|
out_channels=out_channels, |
|
share_key_query=False, |
|
query_scale=query_scale, |
|
key_pool_scales=key_pool_scales, |
|
conv_cfg=conv_cfg, |
|
norm_cfg=norm_cfg, |
|
act_cfg=act_cfg)) |
|
self.bottleneck = ConvModule( |
|
out_channels + high_in_channels, |
|
out_channels, |
|
1, |
|
conv_cfg=conv_cfg, |
|
norm_cfg=norm_cfg, |
|
act_cfg=None) |
|
|
|
def forward(self, low_feats, high_feats): |
|
"""Forward function.""" |
|
priors = [stage(high_feats, low_feats) for stage in self.stages] |
|
context = torch.stack(priors, dim=0).sum(dim=0) |
|
output = self.bottleneck(torch.cat([context, high_feats], 1)) |
|
return output |
|
|
|
|
|
class APNB(nn.Module): |
|
"""Asymmetric Pyramid Non-local Block (APNB) |
|
|
|
Args: |
|
in_channels (int): Input channels of key/query feature, |
|
which is the key feature for self-attention. |
|
channels (int): Output channels of key/query transform. |
|
out_channels (int): Output channels. |
|
query_scales (tuple[int]): The scales of query feature map. |
|
Default: (1,) |
|
key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid |
|
Module of key feature. |
|
conv_cfg (dict|None): Config of conv layers. |
|
norm_cfg (dict|None): Config of norm layers. |
|
act_cfg (dict|None): Config of activation layers. |
|
""" |
|
|
|
def __init__(self, in_channels, channels, out_channels, query_scales, |
|
key_pool_scales, conv_cfg, norm_cfg, act_cfg): |
|
super(APNB, self).__init__() |
|
self.stages = nn.ModuleList() |
|
for query_scale in query_scales: |
|
self.stages.append( |
|
SelfAttentionBlock( |
|
low_in_channels=in_channels, |
|
high_in_channels=in_channels, |
|
channels=channels, |
|
out_channels=out_channels, |
|
share_key_query=True, |
|
query_scale=query_scale, |
|
key_pool_scales=key_pool_scales, |
|
conv_cfg=conv_cfg, |
|
norm_cfg=norm_cfg, |
|
act_cfg=act_cfg)) |
|
self.bottleneck = ConvModule( |
|
2 * in_channels, |
|
out_channels, |
|
1, |
|
conv_cfg=conv_cfg, |
|
norm_cfg=norm_cfg, |
|
act_cfg=act_cfg) |
|
|
|
def forward(self, feats): |
|
"""Forward function.""" |
|
priors = [stage(feats, feats) for stage in self.stages] |
|
context = torch.stack(priors, dim=0).sum(dim=0) |
|
output = self.bottleneck(torch.cat([context, feats], 1)) |
|
return output |
|
|
|
|
|
@HEADS.register_module() |
|
class ANNHead(BaseDecodeHead): |
|
"""Asymmetric Non-local Neural Networks for Semantic Segmentation. |
|
|
|
This head is the implementation of `ANNNet |
|
<https://arxiv.org/abs/1908.07678>`_. |
|
|
|
Args: |
|
project_channels (int): Projection channels for Nonlocal. |
|
query_scales (tuple[int]): The scales of query feature map. |
|
Default: (1,) |
|
key_pool_scales (tuple[int]): The pooling scales of key feature map. |
|
Default: (1, 3, 6, 8). |
|
""" |
|
|
|
def __init__(self, |
|
project_channels, |
|
query_scales=(1, ), |
|
key_pool_scales=(1, 3, 6, 8), |
|
**kwargs): |
|
super(ANNHead, self).__init__( |
|
input_transform='multiple_select', **kwargs) |
|
assert len(self.in_channels) == 2 |
|
low_in_channels, high_in_channels = self.in_channels |
|
self.project_channels = project_channels |
|
self.fusion = AFNB( |
|
low_in_channels=low_in_channels, |
|
high_in_channels=high_in_channels, |
|
out_channels=high_in_channels, |
|
channels=project_channels, |
|
query_scales=query_scales, |
|
key_pool_scales=key_pool_scales, |
|
conv_cfg=self.conv_cfg, |
|
norm_cfg=self.norm_cfg, |
|
act_cfg=self.act_cfg) |
|
self.bottleneck = ConvModule( |
|
high_in_channels, |
|
self.channels, |
|
3, |
|
padding=1, |
|
conv_cfg=self.conv_cfg, |
|
norm_cfg=self.norm_cfg, |
|
act_cfg=self.act_cfg) |
|
self.context = APNB( |
|
in_channels=self.channels, |
|
out_channels=self.channels, |
|
channels=project_channels, |
|
query_scales=query_scales, |
|
key_pool_scales=key_pool_scales, |
|
conv_cfg=self.conv_cfg, |
|
norm_cfg=self.norm_cfg, |
|
act_cfg=self.act_cfg) |
|
|
|
def forward(self, inputs): |
|
"""Forward function.""" |
|
low_feats, high_feats = self._transform_inputs(inputs) |
|
output = self.fusion(low_feats, high_feats) |
|
output = self.dropout(output) |
|
output = self.bottleneck(output) |
|
output = self.context(output) |
|
output = self.cls_seg(output) |
|
|
|
return output |
|
|