|
|
|
import torch |
|
from torch import nn |
|
|
|
from ..utils import constant_init, kaiming_init |
|
from .registry import PLUGIN_LAYERS |
|
|
|
|
|
def last_zero_init(m): |
|
if isinstance(m, nn.Sequential): |
|
constant_init(m[-1], val=0) |
|
else: |
|
constant_init(m, val=0) |
|
|
|
|
|
@PLUGIN_LAYERS.register_module() |
|
class ContextBlock(nn.Module): |
|
"""ContextBlock module in GCNet. |
|
|
|
See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' |
|
(https://arxiv.org/abs/1904.11492) for details. |
|
|
|
Args: |
|
in_channels (int): Channels of the input feature map. |
|
ratio (float): Ratio of channels of transform bottleneck |
|
pooling_type (str): Pooling method for context modeling. |
|
Options are 'att' and 'avg', stand for attention pooling and |
|
average pooling respectively. Default: 'att'. |
|
fusion_types (Sequence[str]): Fusion method for feature fusion, |
|
Options are 'channels_add', 'channel_mul', stand for channelwise |
|
addition and multiplication respectively. Default: ('channel_add',) |
|
""" |
|
|
|
_abbr_ = 'context_block' |
|
|
|
def __init__(self, |
|
in_channels, |
|
ratio, |
|
pooling_type='att', |
|
fusion_types=('channel_add', )): |
|
super(ContextBlock, self).__init__() |
|
assert pooling_type in ['avg', 'att'] |
|
assert isinstance(fusion_types, (list, tuple)) |
|
valid_fusion_types = ['channel_add', 'channel_mul'] |
|
assert all([f in valid_fusion_types for f in fusion_types]) |
|
assert len(fusion_types) > 0, 'at least one fusion should be used' |
|
self.in_channels = in_channels |
|
self.ratio = ratio |
|
self.planes = int(in_channels * ratio) |
|
self.pooling_type = pooling_type |
|
self.fusion_types = fusion_types |
|
if pooling_type == 'att': |
|
self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) |
|
self.softmax = nn.Softmax(dim=2) |
|
else: |
|
self.avg_pool = nn.AdaptiveAvgPool2d(1) |
|
if 'channel_add' in fusion_types: |
|
self.channel_add_conv = nn.Sequential( |
|
nn.Conv2d(self.in_channels, self.planes, kernel_size=1), |
|
nn.LayerNorm([self.planes, 1, 1]), |
|
nn.ReLU(inplace=True), |
|
nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) |
|
else: |
|
self.channel_add_conv = None |
|
if 'channel_mul' in fusion_types: |
|
self.channel_mul_conv = nn.Sequential( |
|
nn.Conv2d(self.in_channels, self.planes, kernel_size=1), |
|
nn.LayerNorm([self.planes, 1, 1]), |
|
nn.ReLU(inplace=True), |
|
nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) |
|
else: |
|
self.channel_mul_conv = None |
|
self.reset_parameters() |
|
|
|
def reset_parameters(self): |
|
if self.pooling_type == 'att': |
|
kaiming_init(self.conv_mask, mode='fan_in') |
|
self.conv_mask.inited = True |
|
|
|
if self.channel_add_conv is not None: |
|
last_zero_init(self.channel_add_conv) |
|
if self.channel_mul_conv is not None: |
|
last_zero_init(self.channel_mul_conv) |
|
|
|
def spatial_pool(self, x): |
|
batch, channel, height, width = x.size() |
|
if self.pooling_type == 'att': |
|
input_x = x |
|
|
|
input_x = input_x.view(batch, channel, height * width) |
|
|
|
input_x = input_x.unsqueeze(1) |
|
|
|
context_mask = self.conv_mask(x) |
|
|
|
context_mask = context_mask.view(batch, 1, height * width) |
|
|
|
context_mask = self.softmax(context_mask) |
|
|
|
context_mask = context_mask.unsqueeze(-1) |
|
|
|
context = torch.matmul(input_x, context_mask) |
|
|
|
context = context.view(batch, channel, 1, 1) |
|
else: |
|
|
|
context = self.avg_pool(x) |
|
|
|
return context |
|
|
|
def forward(self, x): |
|
|
|
context = self.spatial_pool(x) |
|
|
|
out = x |
|
if self.channel_mul_conv is not None: |
|
|
|
channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) |
|
out = out * channel_mul_term |
|
if self.channel_add_conv is not None: |
|
|
|
channel_add_term = self.channel_add_conv(context) |
|
out = out + channel_add_term |
|
|
|
return out |
|
|