Spaces:
Runtime error
Runtime error
from __future__ import division, absolute_import | |
import torch | |
from torch import nn | |
from torch.nn import functional as F | |
__all__ = ['MuDeep'] | |
class ConvBlock(nn.Module): | |
"""Basic convolutional block. | |
convolution + batch normalization + relu. | |
Args: | |
in_c (int): number of input channels. | |
out_c (int): number of output channels. | |
k (int or tuple): kernel size. | |
s (int or tuple): stride. | |
p (int or tuple): padding. | |
""" | |
def __init__(self, in_c, out_c, k, s, p): | |
super(ConvBlock, self).__init__() | |
self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p) | |
self.bn = nn.BatchNorm2d(out_c) | |
def forward(self, x): | |
return F.relu(self.bn(self.conv(x))) | |
class ConvLayers(nn.Module): | |
"""Preprocessing layers.""" | |
def __init__(self): | |
super(ConvLayers, self).__init__() | |
self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1) | |
self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1) | |
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |
def forward(self, x): | |
x = self.conv1(x) | |
x = self.conv2(x) | |
x = self.maxpool(x) | |
return x | |
class MultiScaleA(nn.Module): | |
"""Multi-scale stream layer A (Sec.3.1)""" | |
def __init__(self): | |
super(MultiScaleA, self).__init__() | |
self.stream1 = nn.Sequential( | |
ConvBlock(96, 96, k=1, s=1, p=0), | |
ConvBlock(96, 24, k=3, s=1, p=1), | |
) | |
self.stream2 = nn.Sequential( | |
nn.AvgPool2d(kernel_size=3, stride=1, padding=1), | |
ConvBlock(96, 24, k=1, s=1, p=0), | |
) | |
self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0) | |
self.stream4 = nn.Sequential( | |
ConvBlock(96, 16, k=1, s=1, p=0), | |
ConvBlock(16, 24, k=3, s=1, p=1), | |
ConvBlock(24, 24, k=3, s=1, p=1), | |
) | |
def forward(self, x): | |
s1 = self.stream1(x) | |
s2 = self.stream2(x) | |
s3 = self.stream3(x) | |
s4 = self.stream4(x) | |
y = torch.cat([s1, s2, s3, s4], dim=1) | |
return y | |
class Reduction(nn.Module): | |
"""Reduction layer (Sec.3.1)""" | |
def __init__(self): | |
super(Reduction, self).__init__() | |
self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |
self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1) | |
self.stream3 = nn.Sequential( | |
ConvBlock(96, 48, k=1, s=1, p=0), | |
ConvBlock(48, 56, k=3, s=1, p=1), | |
ConvBlock(56, 64, k=3, s=2, p=1), | |
) | |
def forward(self, x): | |
s1 = self.stream1(x) | |
s2 = self.stream2(x) | |
s3 = self.stream3(x) | |
y = torch.cat([s1, s2, s3], dim=1) | |
return y | |
class MultiScaleB(nn.Module): | |
"""Multi-scale stream layer B (Sec.3.1)""" | |
def __init__(self): | |
super(MultiScaleB, self).__init__() | |
self.stream1 = nn.Sequential( | |
nn.AvgPool2d(kernel_size=3, stride=1, padding=1), | |
ConvBlock(256, 256, k=1, s=1, p=0), | |
) | |
self.stream2 = nn.Sequential( | |
ConvBlock(256, 64, k=1, s=1, p=0), | |
ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)), | |
ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)), | |
) | |
self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0) | |
self.stream4 = nn.Sequential( | |
ConvBlock(256, 64, k=1, s=1, p=0), | |
ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)), | |
ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)), | |
ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)), | |
ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)), | |
) | |
def forward(self, x): | |
s1 = self.stream1(x) | |
s2 = self.stream2(x) | |
s3 = self.stream3(x) | |
s4 = self.stream4(x) | |
return s1, s2, s3, s4 | |
class Fusion(nn.Module): | |
"""Saliency-based learning fusion layer (Sec.3.2)""" | |
def __init__(self): | |
super(Fusion, self).__init__() | |
self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1)) | |
self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1)) | |
self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1)) | |
self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1)) | |
# We add an average pooling layer to reduce the spatial dimension | |
# of feature maps, which differs from the original paper. | |
self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0) | |
def forward(self, x1, x2, x3, x4): | |
s1 = self.a1.expand_as(x1) * x1 | |
s2 = self.a2.expand_as(x2) * x2 | |
s3 = self.a3.expand_as(x3) * x3 | |
s4 = self.a4.expand_as(x4) * x4 | |
y = self.avgpool(s1 + s2 + s3 + s4) | |
return y | |
class MuDeep(nn.Module): | |
"""Multiscale deep neural network. | |
Reference: | |
Qian et al. Multi-scale Deep Learning Architectures | |
for Person Re-identification. ICCV 2017. | |
Public keys: | |
- ``mudeep``: Multiscale deep neural network. | |
""" | |
def __init__(self, num_classes, loss='softmax', **kwargs): | |
super(MuDeep, self).__init__() | |
self.loss = loss | |
self.block1 = ConvLayers() | |
self.block2 = MultiScaleA() | |
self.block3 = Reduction() | |
self.block4 = MultiScaleB() | |
self.block5 = Fusion() | |
# Due to this fully connected layer, input image has to be fixed | |
# in shape, i.e. (3, 256, 128), such that the last convolutional feature | |
# maps are of shape (256, 16, 8). If input shape is changed, | |
# the input dimension of this layer has to be changed accordingly. | |
self.fc = nn.Sequential( | |
nn.Linear(256 * 16 * 8, 4096), | |
nn.BatchNorm1d(4096), | |
nn.ReLU(), | |
) | |
self.classifier = nn.Linear(4096, num_classes) | |
self.feat_dim = 4096 | |
def featuremaps(self, x): | |
x = self.block1(x) | |
x = self.block2(x) | |
x = self.block3(x) | |
x = self.block4(x) | |
x = self.block5(*x) | |
return x | |
def forward(self, x): | |
x = self.featuremaps(x) | |
x = x.view(x.size(0), -1) | |
x = self.fc(x) | |
y = self.classifier(x) | |
if not self.training: | |
return x | |
if self.loss == 'softmax': | |
return y | |
elif self.loss == 'triplet': | |
return y, x | |
else: | |
raise KeyError('Unsupported loss: {}'.format(self.loss)) | |