# Copyright (c) OpenMMLab. All rights reserved. import math from typing import Sequence import torch.nn as nn from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule from mmdet.models.backbones.csp_darknet import CSPLayer from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS from .base_yolo_neck import BaseYOLONeck @MODELS.register_module() class CSPNeXtPAFPN(BaseYOLONeck): """Path Aggregation Network with CSPNeXt blocks. Args: in_channels (Sequence[int]): Number of input channels per scale. out_channels (int): Number of output channels (used at each scale) deepen_factor (float): Depth multiplier, multiply number of blocks in CSP layer by this amount. Defaults to 1.0. widen_factor (float): Width multiplier, multiply number of channels in each layer by this amount. Defaults to 1.0. num_csp_blocks (int): Number of bottlenecks in CSPLayer. Defaults to 3. use_depthwise (bool): Whether to use depthwise separable convolution in blocks. Defaults to False. expand_ratio (float): Ratio to adjust the number of channels of the hidden layer. Defaults to 0.5. upsample_cfg (dict): Config dict for interpolate layer. Default: `dict(scale_factor=2, mode='nearest')` conv_cfg (dict, optional): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN') act_cfg (dict): Config dict for activation layer. Default: dict(type='SiLU', inplace=True) init_cfg (dict or list[dict], optional): Initialization config dict. Default: None. """ def __init__( self, in_channels: Sequence[int], out_channels: int, deepen_factor: float = 1.0, widen_factor: float = 1.0, num_csp_blocks: int = 3, freeze_all: bool = False, use_depthwise: bool = False, expand_ratio: float = 0.5, upsample_cfg: ConfigType = dict(scale_factor=2, mode='nearest'), conv_cfg: bool = None, norm_cfg: ConfigType = dict(type='BN'), act_cfg: ConfigType = dict(type='SiLU', inplace=True), init_cfg: OptMultiConfig = dict( type='Kaiming', layer='Conv2d', a=math.sqrt(5), distribution='uniform', mode='fan_in', nonlinearity='leaky_relu') ) -> None: self.num_csp_blocks = round(num_csp_blocks * deepen_factor) self.conv = DepthwiseSeparableConvModule \ if use_depthwise else ConvModule self.upsample_cfg = upsample_cfg self.expand_ratio = expand_ratio self.conv_cfg = conv_cfg super().__init__( in_channels=[ int(channel * widen_factor) for channel in in_channels ], out_channels=int(out_channels * widen_factor), deepen_factor=deepen_factor, widen_factor=widen_factor, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, init_cfg=init_cfg) def build_reduce_layer(self, idx: int) -> nn.Module: """build reduce layer. Args: idx (int): layer idx. Returns: nn.Module: The reduce layer. """ if idx == len(self.in_channels) - 1: layer = self.conv( self.in_channels[idx], self.in_channels[idx - 1], 1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) else: layer = nn.Identity() return layer def build_upsample_layer(self, *args, **kwargs) -> nn.Module: """build upsample layer.""" return nn.Upsample(**self.upsample_cfg) def build_top_down_layer(self, idx: int) -> nn.Module: """build top down layer. Args: idx (int): layer idx. Returns: nn.Module: The top down layer. """ if idx == 1: return CSPLayer( self.in_channels[idx - 1] * 2, self.in_channels[idx - 1], num_blocks=self.num_csp_blocks, add_identity=False, use_cspnext_block=True, expand_ratio=self.expand_ratio, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) else: return nn.Sequential( CSPLayer( self.in_channels[idx - 1] * 2, self.in_channels[idx - 1], num_blocks=self.num_csp_blocks, add_identity=False, use_cspnext_block=True, expand_ratio=self.expand_ratio, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg), self.conv( self.in_channels[idx - 1], self.in_channels[idx - 2], kernel_size=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg)) def build_downsample_layer(self, idx: int) -> nn.Module: """build downsample layer. Args: idx (int): layer idx. Returns: nn.Module: The downsample layer. """ return self.conv( self.in_channels[idx], self.in_channels[idx], kernel_size=3, stride=2, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. Args: idx (int): layer idx. Returns: nn.Module: The bottom up layer. """ return CSPLayer( self.in_channels[idx] * 2, self.in_channels[idx + 1], num_blocks=self.num_csp_blocks, add_identity=False, use_cspnext_block=True, expand_ratio=self.expand_ratio, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) def build_out_layer(self, idx: int) -> nn.Module: """build out layer. Args: idx (int): layer idx. Returns: nn.Module: The out layer. """ return self.conv( self.in_channels[idx], self.out_channels, 3, padding=1, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg)