Spaces:
Sleeping
Sleeping
import torch | |
import torch.nn.functional as F | |
from torch import nn | |
from .deform_conv import ModulatedDeformConv | |
from .dyrelu import h_sigmoid, DYReLU | |
class Conv3x3Norm(torch.nn.Module): | |
def __init__(self, in_channels, out_channels, stride, deformable=False, use_gn=False): | |
super(Conv3x3Norm, self).__init__() | |
if deformable: | |
self.conv = ModulatedDeformConv(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) | |
else: | |
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) | |
if use_gn: | |
self.bn = nn.GroupNorm(num_groups=16, num_channels=out_channels) | |
else: | |
self.bn = None | |
def forward(self, input, **kwargs): | |
x = self.conv(input, **kwargs) | |
if self.bn: | |
x = self.bn(x) | |
return x | |
class DyConv(nn.Module): | |
def __init__( | |
self, | |
in_channels=256, | |
out_channels=256, | |
conv_func=Conv3x3Norm, | |
use_dyfuse=True, | |
use_dyrelu=False, | |
use_deform=False, | |
): | |
super(DyConv, self).__init__() | |
self.DyConv = nn.ModuleList() | |
self.DyConv.append(conv_func(in_channels, out_channels, 1)) | |
self.DyConv.append(conv_func(in_channels, out_channels, 1)) | |
self.DyConv.append(conv_func(in_channels, out_channels, 2)) | |
if use_dyfuse: | |
self.AttnConv = nn.Sequential( | |
nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, 1, kernel_size=1), nn.ReLU(inplace=True) | |
) | |
self.h_sigmoid = h_sigmoid() | |
else: | |
self.AttnConv = None | |
if use_dyrelu: | |
self.relu = DYReLU(in_channels, out_channels) | |
else: | |
self.relu = nn.ReLU() | |
if use_deform: | |
self.offset = nn.Conv2d(in_channels, 27, kernel_size=3, stride=1, padding=1) | |
else: | |
self.offset = None | |
self.init_weights() | |
def init_weights(self): | |
for m in self.DyConv.modules(): | |
if isinstance(m, nn.Conv2d): | |
nn.init.normal_(m.weight.data, 0, 0.01) | |
if m.bias is not None: | |
m.bias.data.zero_() | |
if self.AttnConv is not None: | |
for m in self.AttnConv.modules(): | |
if isinstance(m, nn.Conv2d): | |
nn.init.normal_(m.weight.data, 0, 0.01) | |
if m.bias is not None: | |
m.bias.data.zero_() | |
def forward(self, x): | |
next_x = [] | |
for level, feature in enumerate(x): | |
conv_args = dict() | |
if self.offset is not None: | |
offset_mask = self.offset(feature) | |
offset = offset_mask[:, :18, :, :] | |
mask = offset_mask[:, 18:, :, :].sigmoid() | |
conv_args = dict(offset=offset, mask=mask) | |
temp_fea = [self.DyConv[1](feature, **conv_args)] | |
if level > 0: | |
temp_fea.append(self.DyConv[2](x[level - 1], **conv_args)) | |
if level < len(x) - 1: | |
temp_fea.append( | |
F.upsample_bilinear( | |
self.DyConv[0](x[level + 1], **conv_args), size=[feature.size(2), feature.size(3)] | |
) | |
) | |
mean_fea = torch.mean(torch.stack(temp_fea), dim=0, keepdim=False) | |
if self.AttnConv is not None: | |
attn_fea = [] | |
res_fea = [] | |
for fea in temp_fea: | |
res_fea.append(fea) | |
attn_fea.append(self.AttnConv(fea)) | |
res_fea = torch.stack(res_fea) | |
spa_pyr_attn = self.h_sigmoid(torch.stack(attn_fea)) | |
mean_fea = torch.mean(res_fea * spa_pyr_attn, dim=0, keepdim=False) | |
next_x.append(self.relu(mean_fea)) | |
return next_x | |
class DyHead(nn.Module): | |
def __init__(self, cfg, in_channels): | |
super(DyHead, self).__init__() | |
self.cfg = cfg | |
channels = cfg.MODEL.DYHEAD.CHANNELS | |
use_gn = cfg.MODEL.DYHEAD.USE_GN | |
use_dyrelu = cfg.MODEL.DYHEAD.USE_DYRELU | |
use_dyfuse = cfg.MODEL.DYHEAD.USE_DYFUSE | |
use_deform = cfg.MODEL.DYHEAD.USE_DFCONV | |
conv_func = lambda i, o, s: Conv3x3Norm(i, o, s, deformable=use_deform, use_gn=use_gn) | |
dyhead_tower = [] | |
for i in range(cfg.MODEL.DYHEAD.NUM_CONVS): | |
dyhead_tower.append( | |
DyConv( | |
in_channels if i == 0 else channels, | |
channels, | |
conv_func=conv_func, | |
use_dyrelu=use_dyrelu, | |
use_dyfuse=use_dyfuse, | |
use_deform=use_deform, | |
) | |
) | |
self.add_module("dyhead_tower", nn.Sequential(*dyhead_tower)) | |
def forward(self, x): | |
dyhead_tower = self.dyhead_tower(x) | |
return dyhead_tower | |