"""DeepPhys - 2D Convolutional Attention Network. DeepPhys: Video-Based Physiological Measurement Using Convolutional Attention Networks ECCV, 2018 Weixuan Chen, Daniel McDuff """ import torch import torch.nn as nn class Attention_mask(nn.Module): def __init__(self): super(Attention_mask, self).__init__() def forward(self, x): xsum = torch.sum(x, dim=2, keepdim=True) xsum = torch.sum(xsum, dim=3, keepdim=True) xshape = tuple(x.size()) return x / xsum * xshape[2] * xshape[3] * 0.5 def get_config(self): """May be generated manually. """ config = super(Attention_mask, self).get_config() return config class DeepPhys(nn.Module): def __init__(self, in_channels=3, nb_filters1=32, nb_filters2=64, kernel_size=3, dropout_rate1=0.25, dropout_rate2=0.5, pool_size=(2, 2), nb_dense=128, img_size=36): """Definition of DeepPhys. Args: in_channels: the number of input channel. Default: 3 img_size: height/width of each frame. Default: 36. Returns: DeepPhys model. """ super(DeepPhys, self).__init__() self.in_channels = in_channels self.kernel_size = kernel_size self.dropout_rate1 = dropout_rate1 self.dropout_rate2 = dropout_rate2 self.pool_size = pool_size self.nb_filters1 = nb_filters1 self.nb_filters2 = nb_filters2 self.nb_dense = nb_dense # Motion branch convs self.motion_conv1 = nn.Conv2d(self.in_channels, self.nb_filters1, kernel_size=self.kernel_size, padding=(1, 1), bias=True) self.motion_conv2 = nn.Conv2d(self.nb_filters1, self.nb_filters1, kernel_size=self.kernel_size, bias=True) self.motion_conv3 = nn.Conv2d(self.nb_filters1, self.nb_filters2, kernel_size=self.kernel_size, padding=(1, 1), bias=True) self.motion_conv4 = nn.Conv2d(self.nb_filters2, self.nb_filters2, kernel_size=self.kernel_size, bias=True) # Apperance branch convs self.apperance_conv1 = nn.Conv2d(self.in_channels, self.nb_filters1, kernel_size=self.kernel_size, padding=(1, 1), bias=True) self.apperance_conv2 = nn.Conv2d(self.nb_filters1, self.nb_filters1, kernel_size=self.kernel_size, bias=True) self.apperance_conv3 = nn.Conv2d(self.nb_filters1, self.nb_filters2, kernel_size=self.kernel_size, padding=(1, 1), bias=True) self.apperance_conv4 = nn.Conv2d(self.nb_filters2, self.nb_filters2, kernel_size=self.kernel_size, bias=True) # Attention layers self.apperance_att_conv1 = nn.Conv2d(self.nb_filters1, 1, kernel_size=1, padding=(0, 0), bias=True) self.attn_mask_1 = Attention_mask() self.apperance_att_conv2 = nn.Conv2d(self.nb_filters2, 1, kernel_size=1, padding=(0, 0), bias=True) self.attn_mask_2 = Attention_mask() # Avg pooling self.avg_pooling_1 = nn.AvgPool2d(self.pool_size) self.avg_pooling_2 = nn.AvgPool2d(self.pool_size) self.avg_pooling_3 = nn.AvgPool2d(self.pool_size) # Dropout layers self.dropout_1 = nn.Dropout(self.dropout_rate1) self.dropout_2 = nn.Dropout(self.dropout_rate1) self.dropout_3 = nn.Dropout(self.dropout_rate1) self.dropout_4 = nn.Dropout(self.dropout_rate2) # Dense layers if img_size == 36: self.final_dense_1 = nn.Linear(3136, self.nb_dense, bias=True) elif img_size == 72: self.final_dense_1 = nn.Linear(16384, self.nb_dense, bias=True) elif img_size == 96: self.final_dense_1 = nn.Linear(30976, self.nb_dense, bias=True) else: raise Exception('Unsupported image size') self.final_dense_2 = nn.Linear(self.nb_dense, 1, bias=True) def forward(self, inputs, params=None): diff_input = inputs[:, :3, :, :] raw_input = inputs[:, 3:, :, :] d1 = torch.tanh(self.motion_conv1(diff_input)) d2 = torch.tanh(self.motion_conv2(d1)) r1 = torch.tanh(self.apperance_conv1(raw_input)) r2 = torch.tanh(self.apperance_conv2(r1)) g1 = torch.sigmoid(self.apperance_att_conv1(r2)) g1 = self.attn_mask_1(g1) gated1 = d2 * g1 d3 = self.avg_pooling_1(gated1) d4 = self.dropout_1(d3) r3 = self.avg_pooling_2(r2) r4 = self.dropout_2(r3) d5 = torch.tanh(self.motion_conv3(d4)) d6 = torch.tanh(self.motion_conv4(d5)) r5 = torch.tanh(self.apperance_conv3(r4)) r6 = torch.tanh(self.apperance_conv4(r5)) g2 = torch.sigmoid(self.apperance_att_conv2(r6)) g2 = self.attn_mask_2(g2) gated2 = d6 * g2 d7 = self.avg_pooling_3(gated2) d8 = self.dropout_3(d7) d9 = d8.reshape(d8.size(0), -1) d10 = torch.tanh(self.final_dense_1(d9)) d11 = self.dropout_4(d10) out = self.final_dense_2(d11) return out