IDD-YOLO-Tracking

Runtime error

IDD-YOLO-Tracking / trackers /strongsort /deep /models /mudeep.py

Bhaskar Saranga

Added tracker

e215925 over 1 year ago

6.3 kB

	from __future__ import division, absolute_import
	import torch
	from torch import nn
	from torch.nn import functional as F

	__all__ = ['MuDeep']


	class ConvBlock(nn.Module):
	"""Basic convolutional block.

	convolution + batch normalization + relu.

	Args:
	in_c (int): number of input channels.
	out_c (int): number of output channels.
	k (int or tuple): kernel size.
	s (int or tuple): stride.
	p (int or tuple): padding.
	"""

	def __init__(self, in_c, out_c, k, s, p):
	super(ConvBlock, self).__init__()
	self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
	self.bn = nn.BatchNorm2d(out_c)

	def forward(self, x):
	return F.relu(self.bn(self.conv(x)))


	class ConvLayers(nn.Module):
	"""Preprocessing layers."""

	def __init__(self):
	super(ConvLayers, self).__init__()
	self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1)
	self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1)
	self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

	def forward(self, x):
	x = self.conv1(x)
	x = self.conv2(x)
	x = self.maxpool(x)
	return x


	class MultiScaleA(nn.Module):
	"""Multi-scale stream layer A (Sec.3.1)"""

	def __init__(self):
	super(MultiScaleA, self).__init__()
	self.stream1 = nn.Sequential(
	ConvBlock(96, 96, k=1, s=1, p=0),
	ConvBlock(96, 24, k=3, s=1, p=1),
	)
	self.stream2 = nn.Sequential(
	nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
	ConvBlock(96, 24, k=1, s=1, p=0),
	)
	self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0)
	self.stream4 = nn.Sequential(
	ConvBlock(96, 16, k=1, s=1, p=0),
	ConvBlock(16, 24, k=3, s=1, p=1),
	ConvBlock(24, 24, k=3, s=1, p=1),
	)

	def forward(self, x):
	s1 = self.stream1(x)
	s2 = self.stream2(x)
	s3 = self.stream3(x)
	s4 = self.stream4(x)
	y = torch.cat([s1, s2, s3, s4], dim=1)
	return y


	class Reduction(nn.Module):
	"""Reduction layer (Sec.3.1)"""

	def __init__(self):
	super(Reduction, self).__init__()
	self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
	self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1)
	self.stream3 = nn.Sequential(
	ConvBlock(96, 48, k=1, s=1, p=0),
	ConvBlock(48, 56, k=3, s=1, p=1),
	ConvBlock(56, 64, k=3, s=2, p=1),
	)

	def forward(self, x):
	s1 = self.stream1(x)
	s2 = self.stream2(x)
	s3 = self.stream3(x)
	y = torch.cat([s1, s2, s3], dim=1)
	return y


	class MultiScaleB(nn.Module):
	"""Multi-scale stream layer B (Sec.3.1)"""

	def __init__(self):
	super(MultiScaleB, self).__init__()
	self.stream1 = nn.Sequential(
	nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
	ConvBlock(256, 256, k=1, s=1, p=0),
	)
	self.stream2 = nn.Sequential(
	ConvBlock(256, 64, k=1, s=1, p=0),
	ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)),
	ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),
	)
	self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0)
	self.stream4 = nn.Sequential(
	ConvBlock(256, 64, k=1, s=1, p=0),
	ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)),
	ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)),
	ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)),
	ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),
	)

	def forward(self, x):
	s1 = self.stream1(x)
	s2 = self.stream2(x)
	s3 = self.stream3(x)
	s4 = self.stream4(x)
	return s1, s2, s3, s4


	class Fusion(nn.Module):
	"""Saliency-based learning fusion layer (Sec.3.2)"""

	def __init__(self):
	super(Fusion, self).__init__()
	self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1))
	self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1))
	self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1))
	self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1))

	# We add an average pooling layer to reduce the spatial dimension
	# of feature maps, which differs from the original paper.
	self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)

	def forward(self, x1, x2, x3, x4):
	s1 = self.a1.expand_as(x1) * x1
	s2 = self.a2.expand_as(x2) * x2
	s3 = self.a3.expand_as(x3) * x3
	s4 = self.a4.expand_as(x4) * x4
	y = self.avgpool(s1 + s2 + s3 + s4)
	return y


	class MuDeep(nn.Module):
	"""Multiscale deep neural network.

	Reference:
	Qian et al. Multi-scale Deep Learning Architectures
	for Person Re-identification. ICCV 2017.

	Public keys:
	- ``mudeep``: Multiscale deep neural network.
	"""

	def __init__(self, num_classes, loss='softmax', **kwargs):
	super(MuDeep, self).__init__()
	self.loss = loss

	self.block1 = ConvLayers()
	self.block2 = MultiScaleA()
	self.block3 = Reduction()
	self.block4 = MultiScaleB()
	self.block5 = Fusion()

	# Due to this fully connected layer, input image has to be fixed
	# in shape, i.e. (3, 256, 128), such that the last convolutional feature
	# maps are of shape (256, 16, 8). If input shape is changed,
	# the input dimension of this layer has to be changed accordingly.
	self.fc = nn.Sequential(
	nn.Linear(256 * 16 * 8, 4096),
	nn.BatchNorm1d(4096),
	nn.ReLU(),
	)
	self.classifier = nn.Linear(4096, num_classes)
	self.feat_dim = 4096

	def featuremaps(self, x):
	x = self.block1(x)
	x = self.block2(x)
	x = self.block3(x)
	x = self.block4(x)
	x = self.block5(*x)
	return x

	def forward(self, x):
	x = self.featuremaps(x)
	x = x.view(x.size(0), -1)
	x = self.fc(x)
	y = self.classifier(x)

	if not self.training:
	return x

	if self.loss == 'softmax':
	return y
	elif self.loss == 'triplet':
	return y, x
	else:
	raise KeyError('Unsupported loss: {}'.format(self.loss))