| |
| |
|
|
| import warnings |
|
|
| import torch.nn as nn |
| from mmcv.cnn import ConvModule |
| from mmengine.model import BaseModule |
| from torch.nn.modules.batchnorm import _BatchNorm |
|
|
| from mmdet.registry import MODELS |
|
|
|
|
| class ResBlock(BaseModule): |
| """The basic residual block used in Darknet. Each ResBlock consists of two |
| ConvModules and the input is added to the final output. Each ConvModule is |
| composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer |
| has half of the number of the filters as much as the second convLayer. The |
| first convLayer has filter size of 1x1 and the second one has the filter |
| size of 3x3. |
| |
| Args: |
| in_channels (int): The input channels. Must be even. |
| conv_cfg (dict): Config dict for convolution layer. Default: None. |
| norm_cfg (dict): Dictionary to construct and config norm layer. |
| Default: dict(type='BN', requires_grad=True) |
| act_cfg (dict): Config dict for activation layer. |
| Default: dict(type='LeakyReLU', negative_slope=0.1). |
| init_cfg (dict or list[dict], optional): Initialization config dict. |
| Default: None |
| """ |
|
|
| def __init__(self, |
| in_channels, |
| conv_cfg=None, |
| norm_cfg=dict(type='BN', requires_grad=True), |
| act_cfg=dict(type='LeakyReLU', negative_slope=0.1), |
| init_cfg=None): |
| super(ResBlock, self).__init__(init_cfg) |
| assert in_channels % 2 == 0 |
| half_in_channels = in_channels // 2 |
|
|
| |
| cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) |
|
|
| self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg) |
| self.conv2 = ConvModule( |
| half_in_channels, in_channels, 3, padding=1, **cfg) |
|
|
| def forward(self, x): |
| residual = x |
| out = self.conv1(x) |
| out = self.conv2(out) |
| out = out + residual |
|
|
| return out |
|
|
|
|
| @MODELS.register_module() |
| class Darknet(BaseModule): |
| """Darknet backbone. |
| |
| Args: |
| depth (int): Depth of Darknet. Currently only support 53. |
| out_indices (Sequence[int]): Output from which stages. |
| frozen_stages (int): Stages to be frozen (stop grad and set eval mode). |
| -1 means not freezing any parameters. Default: -1. |
| conv_cfg (dict): Config dict for convolution layer. Default: None. |
| norm_cfg (dict): Dictionary to construct and config norm layer. |
| Default: dict(type='BN', requires_grad=True) |
| act_cfg (dict): Config dict for activation layer. |
| Default: dict(type='LeakyReLU', negative_slope=0.1). |
| norm_eval (bool): Whether to set norm layers to eval mode, namely, |
| freeze running stats (mean and var). Note: Effect on Batch Norm |
| and its variants only. |
| pretrained (str, optional): model pretrained path. Default: None |
| init_cfg (dict or list[dict], optional): Initialization config dict. |
| Default: None |
| |
| Example: |
| >>> from mmdet.models import Darknet |
| >>> import torch |
| >>> self = Darknet(depth=53) |
| >>> self.eval() |
| >>> inputs = torch.rand(1, 3, 416, 416) |
| >>> level_outputs = self.forward(inputs) |
| >>> for level_out in level_outputs: |
| ... print(tuple(level_out.shape)) |
| ... |
| (1, 256, 52, 52) |
| (1, 512, 26, 26) |
| (1, 1024, 13, 13) |
| """ |
|
|
| |
| arch_settings = { |
| 53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512), |
| (512, 1024))) |
| } |
|
|
| def __init__(self, |
| depth=53, |
| out_indices=(3, 4, 5), |
| frozen_stages=-1, |
| conv_cfg=None, |
| norm_cfg=dict(type='BN', requires_grad=True), |
| act_cfg=dict(type='LeakyReLU', negative_slope=0.1), |
| norm_eval=True, |
| pretrained=None, |
| init_cfg=None): |
| super(Darknet, self).__init__(init_cfg) |
| if depth not in self.arch_settings: |
| raise KeyError(f'invalid depth {depth} for darknet') |
|
|
| self.depth = depth |
| self.out_indices = out_indices |
| self.frozen_stages = frozen_stages |
| self.layers, self.channels = self.arch_settings[depth] |
|
|
| cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) |
|
|
| self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg) |
|
|
| self.cr_blocks = ['conv1'] |
| for i, n_layers in enumerate(self.layers): |
| layer_name = f'conv_res_block{i + 1}' |
| in_c, out_c = self.channels[i] |
| self.add_module( |
| layer_name, |
| self.make_conv_res_block(in_c, out_c, n_layers, **cfg)) |
| self.cr_blocks.append(layer_name) |
|
|
| self.norm_eval = norm_eval |
|
|
| assert not (init_cfg and pretrained), \ |
| 'init_cfg and pretrained cannot be specified at the same time' |
| if isinstance(pretrained, str): |
| warnings.warn('DeprecationWarning: pretrained is deprecated, ' |
| 'please use "init_cfg" instead') |
| self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) |
| elif pretrained is None: |
| if init_cfg is None: |
| self.init_cfg = [ |
| dict(type='Kaiming', layer='Conv2d'), |
| dict( |
| type='Constant', |
| val=1, |
| layer=['_BatchNorm', 'GroupNorm']) |
| ] |
| else: |
| raise TypeError('pretrained must be a str or None') |
|
|
| def forward(self, x): |
| outs = [] |
| for i, layer_name in enumerate(self.cr_blocks): |
| cr_block = getattr(self, layer_name) |
| x = cr_block(x) |
| if i in self.out_indices: |
| outs.append(x) |
|
|
| return tuple(outs) |
|
|
| def _freeze_stages(self): |
| if self.frozen_stages >= 0: |
| for i in range(self.frozen_stages): |
| m = getattr(self, self.cr_blocks[i]) |
| m.eval() |
| for param in m.parameters(): |
| param.requires_grad = False |
|
|
| def train(self, mode=True): |
| super(Darknet, self).train(mode) |
| self._freeze_stages() |
| if mode and self.norm_eval: |
| for m in self.modules(): |
| if isinstance(m, _BatchNorm): |
| m.eval() |
|
|
| @staticmethod |
| def make_conv_res_block(in_channels, |
| out_channels, |
| res_repeat, |
| conv_cfg=None, |
| norm_cfg=dict(type='BN', requires_grad=True), |
| act_cfg=dict(type='LeakyReLU', |
| negative_slope=0.1)): |
| """In Darknet backbone, ConvLayer is usually followed by ResBlock. This |
| function will make that. The Conv layers always have 3x3 filters with |
| stride=2. The number of the filters in Conv layer is the same as the |
| out channels of the ResBlock. |
| |
| Args: |
| in_channels (int): The number of input channels. |
| out_channels (int): The number of output channels. |
| res_repeat (int): The number of ResBlocks. |
| conv_cfg (dict): Config dict for convolution layer. Default: None. |
| norm_cfg (dict): Dictionary to construct and config norm layer. |
| Default: dict(type='BN', requires_grad=True) |
| act_cfg (dict): Config dict for activation layer. |
| Default: dict(type='LeakyReLU', negative_slope=0.1). |
| """ |
|
|
| cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) |
|
|
| model = nn.Sequential() |
| model.add_module( |
| 'conv', |
| ConvModule( |
| in_channels, out_channels, 3, stride=2, padding=1, **cfg)) |
| for idx in range(res_repeat): |
| model.add_module('res{}'.format(idx), |
| ResBlock(out_channels, **cfg)) |
| return model |
|
|