|
from typing import Callable, List, Optional |
|
|
|
import torch |
|
from torch import Tensor |
|
|
|
from .vision_transformer_utils import _log_api_usage_once |
|
|
|
|
|
interpolate = torch.nn.functional.interpolate |
|
|
|
|
|
|
|
class FrozenBatchNorm2d(torch.nn.Module): |
|
""" |
|
BatchNorm2d where the batch statistics and the affine parameters are fixed |
|
|
|
Args: |
|
num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)`` |
|
eps (float): a value added to the denominator for numerical stability. Default: 1e-5 |
|
""" |
|
|
|
def __init__( |
|
self, |
|
num_features: int, |
|
eps: float = 1e-5, |
|
): |
|
super().__init__() |
|
_log_api_usage_once(self) |
|
self.eps = eps |
|
self.register_buffer("weight", torch.ones(num_features)) |
|
self.register_buffer("bias", torch.zeros(num_features)) |
|
self.register_buffer("running_mean", torch.zeros(num_features)) |
|
self.register_buffer("running_var", torch.ones(num_features)) |
|
|
|
def _load_from_state_dict( |
|
self, |
|
state_dict: dict, |
|
prefix: str, |
|
local_metadata: dict, |
|
strict: bool, |
|
missing_keys: List[str], |
|
unexpected_keys: List[str], |
|
error_msgs: List[str], |
|
): |
|
num_batches_tracked_key = prefix + "num_batches_tracked" |
|
if num_batches_tracked_key in state_dict: |
|
del state_dict[num_batches_tracked_key] |
|
|
|
super()._load_from_state_dict( |
|
state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs |
|
) |
|
|
|
def forward(self, x: Tensor) -> Tensor: |
|
|
|
|
|
w = self.weight.reshape(1, -1, 1, 1) |
|
b = self.bias.reshape(1, -1, 1, 1) |
|
rv = self.running_var.reshape(1, -1, 1, 1) |
|
rm = self.running_mean.reshape(1, -1, 1, 1) |
|
scale = w * (rv + self.eps).rsqrt() |
|
bias = b - rm * scale |
|
return x * scale + bias |
|
|
|
def __repr__(self) -> str: |
|
return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})" |
|
|
|
|
|
class ConvNormActivation(torch.nn.Sequential): |
|
""" |
|
Configurable block used for Convolution-Normalzation-Activation blocks. |
|
|
|
Args: |
|
in_channels (int): Number of channels in the input image |
|
out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block |
|
kernel_size: (int, optional): Size of the convolving kernel. Default: 3 |
|
stride (int, optional): Stride of the convolution. Default: 1 |
|
padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation`` |
|
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 |
|
norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d`` |
|
activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU`` |
|
dilation (int): Spacing between kernel elements. Default: 1 |
|
inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` |
|
bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. |
|
|
|
""" |
|
|
|
def __init__( |
|
self, |
|
in_channels: int, |
|
out_channels: int, |
|
kernel_size: int = 3, |
|
stride: int = 1, |
|
padding: Optional[int] = None, |
|
groups: int = 1, |
|
norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, |
|
activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, |
|
dilation: int = 1, |
|
inplace: Optional[bool] = True, |
|
bias: Optional[bool] = None, |
|
) -> None: |
|
if padding is None: |
|
padding = (kernel_size - 1) // 2 * dilation |
|
if bias is None: |
|
bias = norm_layer is None |
|
layers = [ |
|
torch.nn.Conv2d( |
|
in_channels, |
|
out_channels, |
|
kernel_size, |
|
stride, |
|
padding, |
|
dilation=dilation, |
|
groups=groups, |
|
bias=bias, |
|
) |
|
] |
|
if norm_layer is not None: |
|
layers.append(norm_layer(out_channels)) |
|
if activation_layer is not None: |
|
params = {} if inplace is None else {"inplace": inplace} |
|
layers.append(activation_layer(**params)) |
|
super().__init__(*layers) |
|
_log_api_usage_once(self) |
|
self.out_channels = out_channels |
|
|
|
|
|
class SqueezeExcitation(torch.nn.Module): |
|
""" |
|
This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). |
|
Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3. |
|
|
|
Args: |
|
input_channels (int): Number of channels in the input image |
|
squeeze_channels (int): Number of squeeze channels |
|
activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` |
|
scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` |
|
""" |
|
|
|
def __init__( |
|
self, |
|
input_channels: int, |
|
squeeze_channels: int, |
|
activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, |
|
scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, |
|
) -> None: |
|
super().__init__() |
|
_log_api_usage_once(self) |
|
self.avgpool = torch.nn.AdaptiveAvgPool2d(1) |
|
self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) |
|
self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) |
|
self.activation = activation() |
|
self.scale_activation = scale_activation() |
|
|
|
def _scale(self, input: Tensor) -> Tensor: |
|
scale = self.avgpool(input) |
|
scale = self.fc1(scale) |
|
scale = self.activation(scale) |
|
scale = self.fc2(scale) |
|
return self.scale_activation(scale) |
|
|
|
def forward(self, input: Tensor) -> Tensor: |
|
scale = self._scale(input) |
|
return scale * input |
|
|