Spaces:
Sleeping
Sleeping
from typing import Callable, List, Optional | |
import torch | |
from torch import Tensor | |
from .vision_transformer_utils import _log_api_usage_once | |
interpolate = torch.nn.functional.interpolate | |
# This is not in nn | |
class FrozenBatchNorm2d(torch.nn.Module): | |
""" | |
BatchNorm2d where the batch statistics and the affine parameters are fixed | |
Args: | |
num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)`` | |
eps (float): a value added to the denominator for numerical stability. Default: 1e-5 | |
""" | |
def __init__( | |
self, | |
num_features: int, | |
eps: float = 1e-5, | |
): | |
super().__init__() | |
_log_api_usage_once(self) | |
self.eps = eps | |
self.register_buffer("weight", torch.ones(num_features)) | |
self.register_buffer("bias", torch.zeros(num_features)) | |
self.register_buffer("running_mean", torch.zeros(num_features)) | |
self.register_buffer("running_var", torch.ones(num_features)) | |
def _load_from_state_dict( | |
self, | |
state_dict: dict, | |
prefix: str, | |
local_metadata: dict, | |
strict: bool, | |
missing_keys: List[str], | |
unexpected_keys: List[str], | |
error_msgs: List[str], | |
): | |
num_batches_tracked_key = prefix + "num_batches_tracked" | |
if num_batches_tracked_key in state_dict: | |
del state_dict[num_batches_tracked_key] | |
super()._load_from_state_dict( | |
state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs | |
) | |
def forward(self, x: Tensor) -> Tensor: | |
# move reshapes to the beginning | |
# to make it fuser-friendly | |
w = self.weight.reshape(1, -1, 1, 1) | |
b = self.bias.reshape(1, -1, 1, 1) | |
rv = self.running_var.reshape(1, -1, 1, 1) | |
rm = self.running_mean.reshape(1, -1, 1, 1) | |
scale = w * (rv + self.eps).rsqrt() | |
bias = b - rm * scale | |
return x * scale + bias | |
def __repr__(self) -> str: | |
return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})" | |
class ConvNormActivation(torch.nn.Sequential): | |
""" | |
Configurable block used for Convolution-Normalzation-Activation blocks. | |
Args: | |
in_channels (int): Number of channels in the input image | |
out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block | |
kernel_size: (int, optional): Size of the convolving kernel. Default: 3 | |
stride (int, optional): Stride of the convolution. Default: 1 | |
padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation`` | |
groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 | |
norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d`` | |
activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU`` | |
dilation (int): Spacing between kernel elements. Default: 1 | |
inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` | |
bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. | |
""" | |
def __init__( | |
self, | |
in_channels: int, | |
out_channels: int, | |
kernel_size: int = 3, | |
stride: int = 1, | |
padding: Optional[int] = None, | |
groups: int = 1, | |
norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, | |
activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, | |
dilation: int = 1, | |
inplace: Optional[bool] = True, | |
bias: Optional[bool] = None, | |
) -> None: | |
if padding is None: | |
padding = (kernel_size - 1) // 2 * dilation | |
if bias is None: | |
bias = norm_layer is None | |
layers = [ | |
torch.nn.Conv2d( | |
in_channels, | |
out_channels, | |
kernel_size, | |
stride, | |
padding, | |
dilation=dilation, | |
groups=groups, | |
bias=bias, | |
) | |
] | |
if norm_layer is not None: | |
layers.append(norm_layer(out_channels)) | |
if activation_layer is not None: | |
params = {} if inplace is None else {"inplace": inplace} | |
layers.append(activation_layer(**params)) | |
super().__init__(*layers) | |
_log_api_usage_once(self) | |
self.out_channels = out_channels | |
class SqueezeExcitation(torch.nn.Module): | |
""" | |
This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). | |
Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3. | |
Args: | |
input_channels (int): Number of channels in the input image | |
squeeze_channels (int): Number of squeeze channels | |
activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` | |
scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` | |
""" | |
def __init__( | |
self, | |
input_channels: int, | |
squeeze_channels: int, | |
activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, | |
scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, | |
) -> None: | |
super().__init__() | |
_log_api_usage_once(self) | |
self.avgpool = torch.nn.AdaptiveAvgPool2d(1) | |
self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) | |
self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) | |
self.activation = activation() | |
self.scale_activation = scale_activation() | |
def _scale(self, input: Tensor) -> Tensor: | |
scale = self.avgpool(input) | |
scale = self.fc1(scale) | |
scale = self.activation(scale) | |
scale = self.fc2(scale) | |
return self.scale_activation(scale) | |
def forward(self, input: Tensor) -> Tensor: | |
scale = self._scale(input) | |
return scale * input | |