DepthPoseEstimation / layers.py
mkalia's picture
Upload layers.py
e015760 verified
raw
history blame
33.2 kB
# Copyright Niantic 2019. Patent Pending. All rights reserved.
#
# This software is licensed under the terms of the Monodepth2 licence
# which allows for non-commercial use only, the full terms of which are made
# available in the LICENSE file.
from __future__ import absolute_import, division, print_function
import numpy as np
from scipy.spatial.transform import Rotation as R
import torch
import torch.nn as nn
import torch.nn.functional as F
# from torchmetrics.image.fid import FrechetInceptionDistance
# def silog(real1, fake1):
# # filter out invalid pixels
# real = real1.clone()
# fake = fake1.clone()
# N = (real>0).float().sum()
# mask1 = (real<=0)
# mask2 = (fake<=0)
# mask3 = mask1+mask2
# # mask = 1.0 - (mask3>0).float()
# mask = (mask3>0)
# fake[mask] = 1.
# real[mask] = 1.
# loss_ = torch.log(real)-torch.log(fake)
# loss = torch.sqrt((torch.sum( loss_ ** 2) / N ) - ((torch.sum(loss_)/N)**2))
# return loss
class SpatialTransformer(nn.Module):
def __init__(self, size, mode='bilinear'):
"""
Instiantiate the block
:param size: size of input to the spatial transformer block
:param mode: method of interpolation for grid_sampler
"""
super(SpatialTransformer, self).__init__()
# Create sampling grid
vectors = [torch.arange(0, s) for s in size]
grids = torch.meshgrid(vectors)
grid = torch.stack(grids) # y, x, z
grid = torch.unsqueeze(grid, 0) # add batch
grid = grid.type(torch.FloatTensor)
self.register_buffer('grid', grid)
self.mode = mode
def forward(self, src, flow):
"""
Push the src and flow through the spatial transform block
:param src: the source image
:param flow: the output from the U-Net
"""
new_locs = self.grid + flow
shape = flow.shape[2:]
# Need to normalize grid values to [-1, 1] for resampler
for i in range(len(shape)):
new_locs[:, i, ...] = 2*(new_locs[:, i, ...]/(shape[i]-1) - 0.5)
if len(shape) == 2:
new_locs = new_locs.permute(0, 2, 3, 1)
new_locs = new_locs[..., [1, 0]]
elif len(shape) == 3:
new_locs = new_locs.permute(0, 2, 3, 4, 1)
new_locs = new_locs[..., [2, 1, 0]]
return F.grid_sample(src, new_locs, mode=self.mode, padding_mode="border")
class optical_flow(nn.Module):
def __init__(self, size, batch_size, height, width, eps=1e-7):
super(optical_flow, self).__init__()
# Create sampling grid
vectors = [torch.arange(0, s) for s in size]
grids = torch.meshgrid(vectors)
grid = torch.stack(grids) # y, x, z
grid = torch.unsqueeze(grid, 0) # add batch
grid = grid.type(torch.FloatTensor)
self.register_buffer('grid', grid)
self.batch_size = batch_size
self.height = height
self.width = width
self.eps = eps
def forward(self, points, K, T):
P = torch.matmul(K, T)[:, :3, :]
cam_points = torch.matmul(P, points)
pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
optical_flow = pix_coords[:, [1,0], ...] - self.grid
return optical_flow
def get_corresponding_map(data):
"""
:param data: unnormalized coordinates Bx2xHxW
:return: Bx1xHxW
"""
B, _, H, W = data.size()
# x = data[:, 0, :, :].view(B, -1).clamp(0, W - 1) # BxN (N=H*W)
# y = data[:, 1, :, :].view(B, -1).clamp(0, H - 1)
x = data[:, 0, :, :].view(B, -1) # BxN (N=H*W)
y = data[:, 1, :, :].view(B, -1)
# invalid = (x < 0) | (x > W - 1) | (y < 0) | (y > H - 1) # BxN
# invalid = invalid.repeat([1, 4])
x1 = torch.floor(x)
x_floor = x1.clamp(0, W - 1)
y1 = torch.floor(y)
y_floor = y1.clamp(0, H - 1)
x0 = x1 + 1
x_ceil = x0.clamp(0, W - 1)
y0 = y1 + 1
y_ceil = y0.clamp(0, H - 1)
x_ceil_out = x0 != x_ceil
y_ceil_out = y0 != y_ceil
x_floor_out = x1 != x_floor
y_floor_out = y1 != y_floor
invalid = torch.cat([x_ceil_out | y_ceil_out,
x_ceil_out | y_floor_out,
x_floor_out | y_ceil_out,
x_floor_out | y_floor_out], dim=1)
# encode coordinates, since the scatter function can only index along one axis
corresponding_map = torch.zeros(B, H * W).type_as(data)
indices = torch.cat([x_ceil + y_ceil * W,
x_ceil + y_floor * W,
x_floor + y_ceil * W,
x_floor + y_floor * W], 1).long() # BxN (N=4*H*W)
values = torch.cat([(1 - torch.abs(x - x_ceil)) * (1 - torch.abs(y - y_ceil)),
(1 - torch.abs(x - x_ceil)) * (1 - torch.abs(y - y_floor)),
(1 - torch.abs(x - x_floor)) * (1 - torch.abs(y - y_ceil)),
(1 - torch.abs(x - x_floor)) * (1 - torch.abs(y - y_floor))],
1)
# values = torch.ones_like(values)
values[invalid] = 0
corresponding_map.scatter_add_(1, indices, values)
# decode coordinates
corresponding_map = corresponding_map.view(B, H, W)
return corresponding_map.unsqueeze(1)
class get_occu_mask_backward(nn.Module):
def __init__(self, size):
super(get_occu_mask_backward, self).__init__()
# Create sampling grid
vectors = [torch.arange(0, s) for s in size]
grids = torch.meshgrid(vectors)
grid = torch.stack(grids) # y, x, z
grid = torch.unsqueeze(grid, 0) # add batch
grid = grid.type(torch.FloatTensor)
self.register_buffer('grid', grid)
def forward(self, flow, th=0.95):
new_locs = self.grid + flow
new_locs = new_locs[:, [1,0], ...]
corr_map = get_corresponding_map(new_locs)
occu_map = corr_map
occu_mask = (occu_map > th).float()
return occu_mask, occu_map
class get_occu_mask_bidirection(nn.Module):
def __init__(self, size, mode='bilinear'):
super(get_occu_mask_bidirection, self).__init__()
# Create sampling grid
vectors = [torch.arange(0, s) for s in size]
grids = torch.meshgrid(vectors)
grid = torch.stack(grids) # y, x, z
grid = torch.unsqueeze(grid, 0) # add batch
grid = grid.type(torch.FloatTensor)
self.register_buffer('grid', grid)
self.mode = mode
def forward(self, flow12, flow21, scale=0.01, bias=0.5):
new_locs = self.grid + flow12
shape = flow12.shape[2:]
# Need to normalize grid values to [-1, 1] for resampler
for i in range(len(shape)):
new_locs[:, i, ...] = 2*(new_locs[:, i, ...]/(shape[i]-1) - 0.5)
if len(shape) == 2:
new_locs = new_locs.permute(0, 2, 3, 1)
new_locs = new_locs[..., [1, 0]]
elif len(shape) == 3:
new_locs = new_locs.permute(0, 2, 3, 4, 1)
new_locs = new_locs[..., [2, 1, 0]]
flow21_warped = F.grid_sample(flow21, new_locs, mode=self.mode, padding_mode="border")
flow12_diff = torch.abs(flow12 + flow21_warped)
# mag = (flow12 * flow12).sum(1, keepdim=True) + \
# (flow21_warped * flow21_warped).sum(1, keepdim=True)
# occ_thresh = scale * mag + bias
# occ_mask = (flow12_diff * flow12_diff).sum(1, keepdim=True) < occ_thresh
return flow12_diff
# functions
def _axis_angle_rotation(axis: str, angle: torch.Tensor) -> torch.Tensor:
"""
Return the rotation matrices for one of the rotations about an axis
of which Euler angles describe, for each value of the angle given.
Args:
axis: Axis label "X" or "Y or "Z".
angle: any shape tensor of Euler angles in radians
Returns:
Rotation matrices as tensor of shape (..., 3, 3).
"""
cos = torch.cos(angle)
sin = torch.sin(angle)
one = torch.ones_like(angle)
zero = torch.zeros_like(angle)
if axis == "X":
R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos)
elif axis == "Y":
R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos)
elif axis == "Z":
R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one)
else:
raise ValueError("letter must be either X, Y or Z.")
return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3))
def euler_angles_to_matrix(euler_angles: torch.Tensor, convention: str) -> torch.Tensor:
"""
Convert rotations given as Euler angles in radians to rotation matrices.
Args:
euler_angles: Euler angles in radians as tensor of shape (..., 3).
convention: Convention string of three uppercase letters from
{"X", "Y", and "Z"}.
Returns:
Rotation matrices as tensor of shape (..., 3, 3).
"""
if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3:
raise ValueError("Invalid input euler angles.")
if len(convention) != 3:
raise ValueError("Convention must have 3 letters.")
if convention[1] in (convention[0], convention[2]):
raise ValueError(f"Invalid convention {convention}.")
for letter in convention:
if letter not in ("X", "Y", "Z"):
raise ValueError(f"Invalid letter {letter} in convention string.")
matrices = [
_axis_angle_rotation(c, e)
for c, e in zip(convention, torch.unbind(euler_angles, -1))
]
# return functools.reduce(torch.matmul, matrices)
rotation_matrices = torch.matmul(torch.matmul(matrices[0], matrices[1]), matrices[2])
rot = torch.zeros((rotation_matrices.shape[0], 4, 4)).to(device=rotation_matrices.device)
rot[:, :3, :3] = rotation_matrices.squeeze()
rot[:, 3, 3] = 1
return rot
def _angle_from_tan(
axis: str, other_axis: str, data, horizontal: bool, tait_bryan: bool
) -> torch.Tensor:
"""
Extract the first or third Euler angle from the two members of
the matrix which are positive constant times its sine and cosine.
Args:
axis: Axis label "X" or "Y or "Z" for the angle we are finding.
other_axis: Axis label "X" or "Y or "Z" for the middle axis in the
convention.
data: Rotation matrices as tensor of shape (..., 3, 3).
horizontal: Whether we are looking for the angle for the third axis,
which means the relevant entries are in the same row of the
rotation matrix. If not, they are in the same column.
tait_bryan: Whether the first and third axes in the convention differ.
Returns:
Euler Angles in radians for each matrix in data as a tensor
of shape (...).
"""
i1, i2 = {"X": (2, 1), "Y": (0, 2), "Z": (1, 0)}[axis]
if horizontal:
i2, i1 = i1, i2
even = (axis + other_axis) in ["XY", "YZ", "ZX"]
if horizontal == even:
return torch.atan2(data[..., i1], data[..., i2])
if tait_bryan:
return torch.atan2(-data[..., i2], data[..., i1])
return torch.atan2(data[..., i2], -data[..., i1])
def matrix_2_euler_vector(matrix, convention = 'ZYX', roll = True):
# matrix = matrix_in.copy()
euler = (matrix_to_euler_angles(matrix[:, :3,:3], convention)) # to match with scipy euler = -euler and transpose of this
if roll:
euler[0] = 0.0
t = matrix[:, :3,3]
out = torch.cat([euler, t], dim = 0)
return out
def _index_from_letter(letter: str) -> int:
if letter == "X":
return 0
if letter == "Y":
return 1
if letter == "Z":
return 2
raise ValueError("letter must be either X, Y or Z.")
def matrix_to_euler_angles(matrix: torch.Tensor, convention: str) -> torch.Tensor:
"""
Convert rotations given as rotation matrices to Euler angles in radians.
Args:
matrix: Rotation matrices as tensor of shape (..., 3, 3).
convention: Convention string of three uppercase letters.
Returns:
Euler angles in radians as tensor of shape (..., 3).
"""
if len(convention) != 3:
raise ValueError("Convention must have 3 letters.")
if convention[1] in (convention[0], convention[2]):
raise ValueError(f"Invalid convention {convention}.")
for letter in convention:
if letter not in ("X", "Y", "Z"):
raise ValueError(f"Invalid letter {letter} in convention string.")
if matrix.size(-1) != 3 or matrix.size(-2) != 3:
raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.")
i0 = _index_from_letter(convention[0])
i2 = _index_from_letter(convention[2])
tait_bryan = i0 != i2
if tait_bryan:
central_angle = torch.asin(
matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0)
)
else:
central_angle = torch.acos(matrix[..., i0, i0])
o = (
_angle_from_tan(
convention[0], convention[1], matrix[..., i2], False, tait_bryan
),
central_angle,
_angle_from_tan(
convention[2], convention[1], matrix[..., i0, :], True, tait_bryan
),
)
return torch.stack(o, -1)
def computeFID(real_images, fake_images, fid_criterion):
# metric = FrechetInceptionDistance(feature)
fid_criterion.update(real_images, real=True)
fid_criterion.update(fake_images, real=False)
return fid_criterion.compute()
class SLlog(nn.Module):
def __init__(self):
super(SLlog, self).__init__()
def forward(self, fake1, real1):
if not fake1.shape == real1.shape:
_,_,H,W = real1.shape
fake = F.upsample(fake, size=(H,W), mode='bilinear')
# filter out invalid pixels
real = real1.clone()
fake = fake1.clone()
N = (real>0).float().sum()
mask1 = (real<=0)
mask2 = (fake<=0)
mask3 = mask1+mask2
# mask = 1.0 - (mask3>0).float()
mask = (mask3>0)
fake[mask] = 1.
real[mask] = 1.
loss_ = torch.log(real)-torch.log(fake)
loss = torch.sqrt((torch.sum( loss_ ** 2) / N ) - ((torch.sum(loss_)/N)**2))
# loss = 100.* torch.sum( torch.abs(torch.log(real)-torch.log(fake)) ) / N
return loss
class RMSE_log(nn.Module):
def __init__(self, use_cuda):
super(RMSE_log, self).__init__()
self.eps = 1e-8
self.use_cuda = use_cuda
def forward(self, fake, real):
mask = real<1.
n,_,h,w = real.size()
fake = F.upsample(fake, size=(h,w), mode='bilinear')
fake += self.eps
N = len(real[mask])
loss = torch.sqrt( torch.sum( torch.abs(torch.log(real[mask])-torch.log(fake[mask])) ** 2 ) / N )
return loss
def depth_to_disp(depth, min_disp=0.00001, max_disp = 1.000001):
"""Convert network's sigmoid output into depth prediction
The formula for this conversion is given in the 'additional considerations'
section of the paper.
"""
min_depth = 1 / max_disp
max_depth = 1 / min_disp
scaled_depth = min_depth + (max_depth - min_depth) * depth
disp = 1 / scaled_depth
return scaled_depth, disp
def disp_to_depth(disp, min_depth, max_depth):
"""Convert network's sigmoid output into depth prediction
The formula for this conversion is given in the 'additional considerations'
section of the paper.
"""
min_disp = 1 / max_depth
max_disp = 1 / min_depth
scaled_disp = min_disp + (max_disp - min_disp) * disp
depth = 1 / scaled_disp
return scaled_disp, depth
def disp_to_depth_no_scaling(disp):
"""Convert network's sigmoid output into depth prediction
The formula for this conversion is given in the 'additional considerations'
section of the paper.
"""
depth = 1 / (disp + 1e-7)
return depth
def transformation_from_parameters(axisangle, translation, invert=False):
"""Convert the network's (axisangle, translation) output into a 4x4 matrix
"""
R = rot_from_axisangle(axisangle)
t = translation.clone()
if invert:
R = R.transpose(1, 2) # uncomment beore running
t *= -1
T = get_translation_matrix(t)
if invert:
M = torch.matmul(R, T)
else:
M = torch.matmul(T, R)
return M
def transformation_from_parameters_euler(euler, translation, invert=False):
"""Convert the network's (axisangle, translation) output into a 4x4 matrix
"""
# R = torch.transpose(euler_angles_to_matrix(euler, 'ZYX'), 0, 1).permute(1, 0, 2) # to match with scipy euler = -euler and transpose of this
R = euler_angles_to_matrix(euler, 'ZYX') # to match with scipy euler = -euler and transpose of this
t = translation.clone()
if invert:
R = R.transpose(1, 2)
t *= -1
T = get_translation_matrix(t)
if invert:
M = torch.matmul(R, T)
else:
M = torch.matmul(T, R)
return M
def get_translation_matrix(translation_vector):
"""Convert a translation vector into a 4x4 transformation matrix
"""
T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device)
t = translation_vector.contiguous().view(-1, 3, 1)
T[:, 0, 0] = 1
T[:, 1, 1] = 1
T[:, 2, 2] = 1
T[:, 3, 3] = 1
T[:, :3, 3, None] = t
return T
def rot_from_euler(vec):
rot = R.from_euler('zyx', vec, degrees=True)
return
def rot_from_axisangle(vec):
"""Convert an axisangle rotation into a 4x4 transformation matrix
(adapted from https://github.com/Wallacoloo/printipi)
Input 'vec' has to be Bx1x3
"""
angle = torch.norm(vec, 2, 2, True)
axis = vec / (angle + 1e-7)
ca = torch.cos(angle)
sa = torch.sin(angle)
C = 1 - ca
x = axis[..., 0].unsqueeze(1)
y = axis[..., 1].unsqueeze(1)
z = axis[..., 2].unsqueeze(1)
xs = x * sa
ys = y * sa
zs = z * sa
xC = x * C
yC = y * C
zC = z * C
xyC = x * yC
yzC = y * zC
zxC = z * xC
rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device)
rot[:, 0, 0] = torch.squeeze(x * xC + ca)
rot[:, 0, 1] = torch.squeeze(xyC - zs)
rot[:, 0, 2] = torch.squeeze(zxC + ys)
rot[:, 1, 0] = torch.squeeze(xyC + zs)
rot[:, 1, 1] = torch.squeeze(y * yC + ca)
rot[:, 1, 2] = torch.squeeze(yzC - xs)
rot[:, 2, 0] = torch.squeeze(zxC - ys)
rot[:, 2, 1] = torch.squeeze(yzC + xs)
rot[:, 2, 2] = torch.squeeze(z * zC + ca)
rot[:, 3, 3] = 1
return rot
class ConvBlock(nn.Module):
"""Layer to perform a convolution followed by ELU
"""
def __init__(self, in_channels, out_channels):
super(ConvBlock, self).__init__()
self.conv = Conv3x3(in_channels, out_channels)
self.nonlin = nn.ELU(inplace=True)
def forward(self, x):
out = self.conv(x)
out = self.nonlin(out)
return out
def batchNorm(num_ch_dec):
return nn.BatchNorm2d(num_ch_dec)
class Conv3x3(nn.Module):
"""Layer to pad and convolve input
"""
def __init__(self, in_channels, out_channels, use_refl=True):
super(Conv3x3, self).__init__()
if use_refl:
self.pad = nn.ReflectionPad2d(1)
else:
self.pad = nn.ZeroPad2d(1)
self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
def forward(self, x):
out = self.pad(x)
out = self.conv(out)
return out
class BackprojectDepth(nn.Module):
"""Layer to transform a depth image into a point cloud
"""
def __init__(self, batch_size, height, width):
super(BackprojectDepth, self).__init__()
self.batch_size = batch_size
self.height = height
self.width = width
meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
requires_grad=False)
self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
requires_grad=False)
self.pix_coords = torch.unsqueeze(torch.stack(
[self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1),
requires_grad=False)
def forward(self, depth, inv_K):
cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
cam_points = depth.view(self.batch_size, 1, -1) * cam_points
cam_points = torch.cat([cam_points, self.ones], 1)
return cam_points
class Project3D(nn.Module):
"""Layer which projects 3D points into a camera with intrinsics K and at position T
"""
def __init__(self, batch_size, height, width, eps=1e-7):
super(Project3D, self).__init__()
self.batch_size = batch_size
self.height = height
self.width = width
self.eps = eps
def forward(self, points, K, T):
P = torch.matmul(K, T)[:, :3, :]
cam_points = torch.matmul(P, points)
pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
pix_coords = pix_coords.permute(0, 2, 3, 1)
pix_coords[..., 0] /= self.width - 1
pix_coords[..., 1] /= self.height - 1
pix_coords = (pix_coords - 0.5) * 2
return pix_coords
def upsample(x):
"""Upsample input tensor by a factor of 2
"""
return F.interpolate(x, scale_factor=2, mode="nearest")
class deconv(nn.Module):
"""Layer to perform a convolution followed by ELU
"""
def __init__(self, ch_in, ch_out):
super(deconv, self).__init__()
self.deconvlayer = nn.ConvTranspose2d(ch_in, ch_out, 3, stride=2, padding=1)
def forward(self, x):
out = self.deconvlayer(x)
return out
def get_smooth_loss_gauss_mask(disp, img, gauss_mask):
"""Computes the smoothness loss for a disparity image
The color image is used for edge-aware smoothness
"""
grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:])
grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :])
# weighted mean
# grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:])*gauss_mask[:, :, :, :-1], 1, keepdim=True)
# grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :])*gauss_mask[:, :, :-1, :], 1, keepdim=True)
grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True)
grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True)
grad_disp_x *= torch.exp(-grad_img_x)
grad_disp_y *= torch.exp(-grad_img_y)
# take weighted mean
grad_disp_x*=gauss_mask[:, :, :, :-1]
grad_disp_y*=gauss_mask[:, :, :-1, :]
return grad_disp_x.mean() + grad_disp_y.mean()
def get_smooth_loss(disp, img):
"""Computes the smoothness loss for a disparity image
The color image is used for edge-aware smoothness
"""
grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:])
grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :])
grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True)
grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True)
grad_disp_x *= torch.exp(-grad_img_x)
grad_disp_y *= torch.exp(-grad_img_y)
return grad_disp_x.mean() + grad_disp_y.mean()
class SSIM(nn.Module):
"""Layer to compute the SSIM loss between a pair of images
"""
def __init__(self):
super(SSIM, self).__init__()
self.mu_x_pool = nn.AvgPool2d(3, 1)
self.mu_y_pool = nn.AvgPool2d(3, 1)
self.sig_x_pool = nn.AvgPool2d(3, 1)
self.sig_y_pool = nn.AvgPool2d(3, 1)
self.sig_xy_pool = nn.AvgPool2d(3, 1)
self.refl = nn.ReflectionPad2d(1)
self.C1 = 0.01 ** 2
self.C2 = 0.03 ** 2
def forward(self, x, y):
x = self.refl(x)
y = self.refl(y)
mu_x = self.mu_x_pool(x)
mu_y = self.mu_y_pool(y)
sigma_x = self.sig_x_pool(x ** 2) - mu_x ** 2
sigma_y = self.sig_y_pool(y ** 2) - mu_y ** 2
sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y
SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2)
SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2)
return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1)
def compute_depth_errors(gt, pred):
"""Computation of error metrics between predicted and ground truth depths
"""
thresh = torch.max((gt / pred), (pred / gt))
a1 = (thresh < 1.25 ).float().mean()
a2 = (thresh < 1.25 ** 2).float().mean()
a3 = (thresh < 1.25 ** 3).float().mean()
rmse = (gt - pred) ** 2
rmse = torch.sqrt(rmse.mean())
rmse_log = (torch.log(gt) - torch.log(pred)) ** 2
rmse_log = torch.sqrt(rmse_log.mean())
abs_rel = torch.mean(torch.abs(gt - pred) / gt)
sq_rel = torch.mean((gt - pred) ** 2 / gt)
return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
""" Parts of the U-Net model """
class InstanceNormDoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.InstanceNorm2d(mid_channels, affine = True),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
class DoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
class DoubleConvIN(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.InstanceNorm2d(mid_channels,affine = True).to('cuda'),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.InstanceNorm2d(out_channels,affine = True).to('cuda'),
nn.ReLU(inplace=True))
def forward(self, x):
return self.double_conv(x)
class Down(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class DownIN(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConvIN(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class Up(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
# if you have padding issues, see
# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
class UpIN(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConvIN(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConvIN(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
# if you have padding issues, see
# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
# def gaussian_fn(M, std):
# n = torch.arange(0, M) - (M - 1.0) / 2.0
# sig2 = 2 * std * std
# w = torch.exp(-n ** 2 / sig2)
# return w
# def gkern(kernlen=256, std=128):
# """Returns a 2D Gaussian kernel array."""
# gkern1d = gaussian_fn(kernlen, std=std)
# gkern2d = torch.outer(gkern1d, gkern1d)
# return gkern2d
# A = np.random.rand(256*256).reshape([256,256])
# A = torch.from_numpy(A)
# guassian_filter = gkern(256, std=32)
# class GaussianLayer(nn.Module):
# def __init__(self):
# super(GaussianLayer, self).__init__()
# self.seq = nn.Sequential(
# nn.ReflectionPad2d(10),
# nn.Conv2d(3, 3, 21, stride=1, padding=0, bias=None, groups=3)
# )
# self.weights_init()
# def forward(self, x):
# return self.seq(x)
# def weights_init(self):
# n= np.zeros((21,21))
# n[10,10] = 1
# k = scipy.ndimage.gaussian_filter(n,sigma=3)
# for name, f in self.named_parameters():
# f.data.copy_(torch.from_numpy(k))