Spaces:

Yiting1009
/

Underwater_Image_Restoration

Runtime error

App Files Files Community

Yiting1009 commited on May 24, 2023

Commit

5d87992

1 Parent(s): 8b1d8da

Upload 26 files

Browse files

Files changed (27) hide show

.gitattributes +2 -0
app.py +27 -0
canyons_intrinsics.json +5 -0
flatiron_1.tiff +3 -0
flatiron_2.tiff +3 -0
horse_canyon_1.tiff +0 -0
horse_canyon_2.tiff +0 -0
src/.DS_Store +0 -0
src/networks/__init__.py +7 -0
src/networks/__pycache__/__init__.cpython-39.pyc +0 -0
src/networks/__pycache__/depth_decoder.cpython-39.pyc +0 -0
src/networks/__pycache__/pose_cnn.cpython-39.pyc +0 -0
src/networks/__pycache__/pose_decoder.cpython-39.pyc +0 -0
src/networks/__pycache__/resnet_encoder.cpython-39.pyc +0 -0
src/networks/__pycache__/restoration_model.cpython-39.pyc +0 -0
src/networks/depth_decoder.py +101 -0
src/networks/pose_cnn.py +47 -0
src/networks/pose_decoder.py +52 -0
src/networks/resnet_encoder.py +431 -0
src/networks/restoration_model.py +273 -0
src/weights/depth.pth +3 -0
src/weights/encoder.pth +3 -0
src/weights/pose.pth +3 -0
src/weights/pose_encoder.pth +3 -0
src/weights/uie_model.pth +3 -0
test_simple.py +169 -0
utils.py +118 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+flatiron_1.tiff filter=lfs diff=lfs merge=lfs -text
+flatiron_2.tiff filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
+import warnings
+warnings.filterwarnings('ignore')
+import gradio as gr
+from PIL import Image
+from test_simple import test_simple
+def predict(image: Image):
+    return test_simple(image)
+title = "Underwater Image Restoration"
+iface = gr.Interface(
+    predict,
+    inputs=gr.Image(type="pil"),
+    outputs="image",
+    title=title,
+    allow_flagging="never",
+    examples=[
+        ["flatiron_1.tiff"],
+        ["flatiron_2.tiff"],
+        ["horse_canyon_1.tiff"],
+        ["horse_canyon_2.tiff"],
+    ],
+)
+iface.launch(share=True)

canyons_intrinsics.json ADDED Viewed

	@@ -0,0 +1,5 @@

+[
+    [1.21, 0, 0.5],
+    [0, 1.93, 0.5],
+    [0, 0, 1.0]
+]

flatiron_1.tiff ADDED Viewed

Git LFS Details

SHA256: d631bcde7c7e35e9545cca059f0d3db88516417c8a1e7f6307bf04bcc0655a1d
Pointer size: 132 Bytes
Size of remote file: 1.08 MB

flatiron_2.tiff ADDED Viewed

Git LFS Details

SHA256: 0459cd1c4e592a2b2d0b35ba94a18ff7e810b90f4f99c08c22339975152ceafe
Pointer size: 132 Bytes
Size of remote file: 1.01 MB

horse_canyon_1.tiff ADDED Viewed

horse_canyon_2.tiff ADDED Viewed

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/networks/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# flake8: noqa: F401
+from .resnet_encoder import ResnetEncoder, ResnetEncoderMatching
+from .depth_decoder import DepthDecoder
+from .pose_decoder import PoseDecoder
+from .pose_cnn import PoseCNN
+from .restoration_model import MainModel
+# from .layers import BackprojectDepth, Project3D, ConvBlock, Conv3x3, upsample

src/networks/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (422 Bytes). View file

src/networks/__pycache__/depth_decoder.cpython-39.pyc ADDED Viewed

Binary file (3.17 kB). View file

src/networks/__pycache__/pose_cnn.cpython-39.pyc ADDED Viewed

Binary file (1.35 kB). View file

src/networks/__pycache__/pose_decoder.cpython-39.pyc ADDED Viewed

Binary file (1.77 kB). View file

src/networks/__pycache__/resnet_encoder.cpython-39.pyc ADDED Viewed

Binary file (12.7 kB). View file

src/networks/__pycache__/restoration_model.cpython-39.pyc ADDED Viewed

Binary file (7.88 kB). View file

src/networks/depth_decoder.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Copyright Niantic 2021. Patent Pending. All rights reserved.
+#
+# This software is licensed under the terms of the ManyDepth licence
+# which allows for non-commercial use only, the full terms of which are made
+# available in the LICENSE file.
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+class ConvBlock(nn.Module):
+    """Layer to perform a convolution followed by ELU
+    """
+    def __init__(self, in_channels, out_channels):
+        super(ConvBlock, self).__init__()
+        self.conv = Conv3x3(in_channels, out_channels)
+        self.nonlin = nn.ELU(inplace=True)
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.nonlin(out)
+        return out
+class Conv3x3(nn.Module):
+    """Layer to pad and convolve input
+    """
+    def __init__(self, in_channels, out_channels, use_refl=True):
+        super(Conv3x3, self).__init__()
+        if use_refl:
+            self.pad = nn.ReflectionPad2d(1)
+        else:
+            self.pad = nn.ZeroPad2d(1)
+        self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
+    def forward(self, x):
+        out = self.pad(x)
+        out = self.conv(out)
+        return out
+def upsample(x):
+    """Upsample input tensor by a factor of 2
+    """
+    return F.interpolate(x, scale_factor=2, mode="nearest")
+class DepthDecoder(nn.Module):
+    def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
+        super(DepthDecoder, self).__init__()
+        self.num_output_channels = num_output_channels
+        self.use_skips = use_skips
+        self.upsample_mode = 'nearest'
+        self.scales = scales
+        self.num_ch_enc = num_ch_enc
+        self.num_ch_dec = np.array([16, 32, 64, 128, 256])
+        # decoder
+        self.convs = OrderedDict()
+        for i in range(4, -1, -1):
+            # upconv_0
+            num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
+            num_ch_out = self.num_ch_dec[i]
+            self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)
+            # upconv_1
+            num_ch_in = self.num_ch_dec[i]
+            if self.use_skips and i > 0:
+                num_ch_in += self.num_ch_enc[i - 1]
+            num_ch_out = self.num_ch_dec[i]
+            self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)
+        for s in self.scales:
+            self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)
+        self.decoder = nn.ModuleList(list(self.convs.values()))
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, input_features):
+        self.outputs = {}
+        # decoder
+        x = input_features[-1]
+        for i in range(4, -1, -1):
+            x = self.convs[("upconv", i, 0)](x)
+            x = [upsample(x)]
+            if self.use_skips and i > 0:
+                x += [input_features[i - 1]]
+            x = torch.cat(x, 1)
+            x = self.convs[("upconv", i, 1)](x)
+            if i in self.scales:
+                self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x))
+        return self.outputs

src/networks/pose_cnn.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright Niantic 2021. Patent Pending. All rights reserved.
+#
+# This software is licensed under the terms of the ManyDepth licence
+# which allows for non-commercial use only, the full terms of which are made
+# available in the LICENSE file.
+import torch.nn as nn
+class PoseCNN(nn.Module):
+    def __init__(self, num_input_frames):
+        super(PoseCNN, self).__init__()
+        self.num_input_frames = num_input_frames
+        self.convs = {}
+        self.convs[0] = nn.Conv2d(3 * num_input_frames, 16, 7, 2, 3)
+        self.convs[1] = nn.Conv2d(16, 32, 5, 2, 2)
+        self.convs[2] = nn.Conv2d(32, 64, 3, 2, 1)
+        self.convs[3] = nn.Conv2d(64, 128, 3, 2, 1)
+        self.convs[4] = nn.Conv2d(128, 256, 3, 2, 1)
+        self.convs[5] = nn.Conv2d(256, 256, 3, 2, 1)
+        self.convs[6] = nn.Conv2d(256, 256, 3, 2, 1)
+        self.pose_conv = nn.Conv2d(256, 6 * (num_input_frames - 1), 1)
+        self.num_convs = len(self.convs)
+        self.relu = nn.ReLU(True)
+        self.net = nn.ModuleList(list(self.convs.values()))
+    def forward(self, out):
+        for i in range(self.num_convs):
+            out = self.convs[i](out)
+            out = self.relu(out)
+        out = self.pose_conv(out)
+        out = out.mean(3).mean(2)
+        out = 0.01 * out.view(-1, self.num_input_frames - 1, 1, 6)
+        axisangle = out[..., :3]
+        translation = out[..., 3:]
+        return axisangle, translation

src/networks/pose_decoder.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright Niantic 2021. Patent Pending. All rights reserved.
+#
+# This software is licensed under the terms of the ManyDepth licence
+# which allows for non-commercial use only, the full terms of which are made
+# available in the LICENSE file.
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+class PoseDecoder(nn.Module):
+    def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1):
+        super(PoseDecoder, self).__init__()
+        self.num_ch_enc = num_ch_enc
+        self.num_input_features = num_input_features
+        if num_frames_to_predict_for is None:
+            num_frames_to_predict_for = num_input_features - 1
+        self.num_frames_to_predict_for = num_frames_to_predict_for
+        self.convs = OrderedDict()
+        self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1)
+        self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1)
+        self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1)
+        self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1)
+        self.relu = nn.ReLU()
+        self.net = nn.ModuleList(list(self.convs.values()))
+    def forward(self, input_features):
+        last_features = [f[-1] for f in input_features]
+        cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features]
+        cat_features = torch.cat(cat_features, 1)
+        out = cat_features
+        for i in range(3):
+            out = self.convs[("pose", i)](out)
+            if i != 2:
+                out = self.relu(out)
+        out = out.mean(3).mean(2)
+        out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6)
+        axisangle = out[..., :3]
+        translation = out[..., 3:]
+        return axisangle, translation

src/networks/resnet_encoder.py ADDED Viewed

	@@ -0,0 +1,431 @@

+# Copyright Niantic 2021. Patent Pending. All rights reserved.
+#
+# This software is licensed under the terms of the ManyDepth licence
+# which allows for non-commercial use only, the full terms of which are made
+# available in the LICENSE file.
+import os
+os.environ["MKL_NUM_THREADS"] = "1"  # noqa F402
+os.environ["NUMEXPR_NUM_THREADS"] = "1"  # noqa F402
+os.environ["OMP_NUM_THREADS"] = "1"  # noqa F402
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+import torch.utils.model_zoo as model_zoo
+class BackprojectDepth(nn.Module):
+    """Layer to transform a depth image into a point cloud
+    """
+    def __init__(self, batch_size, height, width):
+        super(BackprojectDepth, self).__init__()
+        self.batch_size = batch_size
+        self.height = height
+        self.width = width
+        meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
+        self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
+        self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
+                                      requires_grad=False)
+        self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
+                                 requires_grad=False)
+        self.pix_coords = torch.unsqueeze(torch.stack(
+            [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
+        self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
+        self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1),
+                                       requires_grad=False)
+    def forward(self, depth, inv_K):
+        cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
+        cam_points = depth.view(self.batch_size, 1, -1) * cam_points
+        cam_points = torch.cat([cam_points, self.ones], 1)
+        return cam_points
+class Project3D(nn.Module):
+    """Layer which projects 3D points into a camera with intrinsics K and at position T
+    """
+    def __init__(self, batch_size, height, width, eps=1e-7):
+        super(Project3D, self).__init__()
+        self.batch_size = batch_size
+        self.height = height
+        self.width = width
+        self.eps = eps
+    def forward(self, points, K, T):
+        P = torch.matmul(K, T)[:, :3, :]
+        cam_points = torch.matmul(P, points)
+        pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
+        pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
+        pix_coords = pix_coords.permute(0, 2, 3, 1)
+        pix_coords[..., 0] /= self.width - 1
+        pix_coords[..., 1] /= self.height - 1
+        pix_coords = (pix_coords - 0.5) * 2
+        return pix_coords
+class ResNetMultiImageInput(models.ResNet):
+    """Constructs a resnet model with varying number of input images.
+    Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+    """
+    def __init__(self, block, layers, num_classes=1000, num_input_images=1):
+        super(ResNetMultiImageInput, self).__init__(block, layers)
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(
+            num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1):
+    """Constructs a ResNet model.
+    Args:
+        num_layers (int): Number of resnet layers. Must be 18 or 50
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        num_input_images (int): Number of frames stacked as input
+    """
+    assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet"
+    blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers]
+    block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers]
+    model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images)
+    if pretrained:
+        loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)])
+        loaded['conv1.weight'] = torch.cat(
+            [loaded['conv1.weight']] * num_input_images, 1) / num_input_images
+        model.load_state_dict(loaded)
+    return model
+class ResnetEncoderMatching(nn.Module):
+    """Resnet encoder adapted to include a cost volume after the 2nd block.
+    Setting adaptive_bins=True will recompute the depth bins used for matching upon each
+    forward pass - this is required for training from monocular video as there is an unknown scale.
+    """
+    def __init__(self, num_layers, pretrained, input_height, input_width,
+                 min_depth_bin=0.1, max_depth_bin=20.0, num_depth_bins=96,
+                 adaptive_bins=False, depth_binning='linear'):
+        super(ResnetEncoderMatching, self).__init__()
+        self.adaptive_bins = adaptive_bins
+        self.depth_binning = depth_binning
+        self.set_missing_to_max = True
+        self.num_ch_enc = np.array([64, 64, 128, 256, 512])
+        self.num_depth_bins = num_depth_bins
+        # we build the cost volume at 1/4 resolution
+        self.matching_height, self.matching_width = input_height // 4, input_width // 4
+        self.is_cuda = False
+        self.warp_depths = None
+        self.depth_bins = None
+        resnets = {18: models.resnet18,
+                   34: models.resnet34,
+                   50: models.resnet50,
+                   101: models.resnet101,
+                   152: models.resnet152}
+        if num_layers not in resnets:
+            raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
+        encoder = resnets[num_layers](pretrained)
+        self.layer0 = nn.Sequential(encoder.conv1,  encoder.bn1, encoder.relu)
+        self.layer1 = nn.Sequential(encoder.maxpool,  encoder.layer1)
+        self.layer2 = encoder.layer2
+        self.layer3 = encoder.layer3
+        self.layer4 = encoder.layer4
+        if num_layers > 34:
+            self.num_ch_enc[1:] *= 4
+        self.backprojector = BackprojectDepth(batch_size=self.num_depth_bins,
+                                              height=self.matching_height,
+                                              width=self.matching_width)
+        self.projector = Project3D(batch_size=self.num_depth_bins,
+                                   height=self.matching_height,
+                                   width=self.matching_width)
+        self.compute_depth_bins(min_depth_bin, max_depth_bin)
+        self.prematching_conv = nn.Sequential(nn.Conv2d(64, out_channels=16,
+                                                        kernel_size=1, stride=1, padding=0),
+                                              nn.ReLU(inplace=True)
+                                              )
+        self.reduce_conv = nn.Sequential(nn.Conv2d(self.num_ch_enc[1] + self.num_depth_bins,
+                                                   out_channels=self.num_ch_enc[1],
+                                                   kernel_size=3, stride=1, padding=1),
+                                         nn.ReLU(inplace=True)
+                                         )
+    def compute_depth_bins(self, min_depth_bin, max_depth_bin):
+        """Compute the depths bins used to build the cost volume. Bins will depend upon
+        self.depth_binning, to either be linear in depth (linear) or linear in inverse depth
+        (inverse)"""
+        if self.depth_binning == 'inverse':
+            self.depth_bins = 1 / np.linspace(1 / max_depth_bin,
+                                              1 / min_depth_bin,
+                                              self.num_depth_bins)[::-1]  # maintain depth order
+        elif self.depth_binning == 'linear':
+            self.depth_bins = np.linspace(min_depth_bin, max_depth_bin, self.num_depth_bins)
+        else:
+            raise NotImplementedError
+        self.depth_bins = torch.from_numpy(self.depth_bins).float()
+        self.warp_depths = []
+        for depth in self.depth_bins:
+            depth = torch.ones((1, self.matching_height, self.matching_width)) * depth
+            self.warp_depths.append(depth)
+        self.warp_depths = torch.stack(self.warp_depths, 0).float()
+        if self.is_cuda:
+            self.warp_depths = self.warp_depths.cuda()
+    def match_features(self, current_feats, lookup_feats, relative_poses, K, invK):
+        """Compute a cost volume based on L1 difference between current_feats and lookup_feats.
+        We backwards warp the lookup_feats into the current frame using the estimated relative
+        pose, known intrinsics and using hypothesised depths self.warp_depths (which are either
+        linear in depth or linear in inverse depth).
+        If relative_pose == 0 then this indicates that the lookup frame is missing (i.e. we are
+        at the start of a sequence), and so we skip it"""
+        batch_cost_volume = []  # store all cost volumes of the batch
+        cost_volume_masks = []  # store locations of '0's in cost volume for confidence
+        for batch_idx in range(len(current_feats)):
+            volume_shape = (self.num_depth_bins, self.matching_height, self.matching_width)
+            cost_volume = torch.zeros(volume_shape, dtype=torch.float, device=current_feats.device)
+            counts = torch.zeros(volume_shape, dtype=torch.float, device=current_feats.device)
+            # select an item from batch of ref feats
+            _lookup_feats = lookup_feats[batch_idx:batch_idx + 1]
+            _lookup_poses = relative_poses[batch_idx:batch_idx + 1]
+            _K = K[batch_idx:batch_idx + 1]
+            _invK = invK[batch_idx:batch_idx + 1]
+            world_points = self.backprojector(self.warp_depths, _invK)
+            # loop through ref images adding to the current cost volume
+            for lookup_idx in range(_lookup_feats.shape[1]):
+                lookup_feat = _lookup_feats[:, lookup_idx]  # 1 x C x H x W
+                lookup_pose = _lookup_poses[:, lookup_idx]
+                # ignore missing images
+                if lookup_pose.sum() == 0:
+                    continue
+                lookup_feat = lookup_feat.repeat([self.num_depth_bins, 1, 1, 1])
+                pix_locs = self.projector(world_points, _K, lookup_pose)
+                warped = F.grid_sample(lookup_feat, pix_locs, padding_mode='zeros', mode='bilinear',
+                                       align_corners=True)
+                # mask values landing outside the image (and near the border)
+                # we want to ignore edge pixels of the lookup images and the current image
+                # because of zero padding in ResNet
+                # Masking of ref image border
+                x_vals = (pix_locs[..., 0].detach() / 2 + 0.5) * (
+                    self.matching_width - 1)  # convert from (-1, 1) to pixel values
+                y_vals = (pix_locs[..., 1].detach() / 2 + 0.5) * (self.matching_height - 1)
+                edge_mask = (x_vals >= 2.0) * (x_vals <= self.matching_width - 2) * \
+                            (y_vals >= 2.0) * (y_vals <= self.matching_height - 2)
+                edge_mask = edge_mask.float()
+                # masking of current image
+                current_mask = torch.zeros_like(edge_mask)
+                current_mask[:, 2:-2, 2:-2] = 1.0
+                edge_mask = edge_mask * current_mask
+                diffs = torch.abs(warped - current_feats[batch_idx:batch_idx + 1]).mean(
+                    1) * edge_mask
+                # integrate into cost volume
+                cost_volume = cost_volume + diffs
+                counts = counts + (diffs > 0).float()
+            # average over lookup images
+            cost_volume = cost_volume / (counts + 1e-7)
+            # if some missing values for a pixel location (i.e. some depths landed outside) then
+            # set to max of existing values
+            missing_val_mask = (cost_volume == 0).float()
+            if self.set_missing_to_max:
+                cost_volume = cost_volume * (1 - missing_val_mask) + \
+                    cost_volume.max(0)[0].unsqueeze(0) * missing_val_mask
+            batch_cost_volume.append(cost_volume)
+            cost_volume_masks.append(missing_val_mask)
+        batch_cost_volume = torch.stack(batch_cost_volume, 0)
+        cost_volume_masks = torch.stack(cost_volume_masks, 0)
+        return batch_cost_volume, cost_volume_masks
+    def feature_extraction(self, image, return_all_feats=False):
+        """ Run feature extraction on an image - first 2 blocks of ResNet"""
+        image = (image - 0.45) / 0.225  # imagenet normalisation
+        feats_0 = self.layer0(image)
+        feats_1 = self.layer1(feats_0)
+        if return_all_feats:
+            return [feats_0, feats_1]
+        else:
+            return feats_1
+    def indices_to_disparity(self, indices):
+        """Convert cost volume indices to 1/depth for visualisation"""
+        batch, height, width = indices.shape
+        depth = self.depth_bins[indices.reshape(-1).cpu()]
+        disp = 1 / depth.reshape((batch, height, width))
+        return disp
+    def compute_confidence_mask(self, cost_volume, num_bins_threshold=None):
+        """ Returns a 'confidence' mask based on how many times a depth bin was observed"""
+        if num_bins_threshold is None:
+            num_bins_threshold = self.num_depth_bins
+        confidence_mask = ((cost_volume > 0).sum(1) == num_bins_threshold).float()
+        return confidence_mask
+    def forward(self, current_image, lookup_images, poses, K, invK,
+                min_depth_bin=None, max_depth_bin=None
+                ):
+        # feature extraction
+        self.features = self.feature_extraction(current_image, return_all_feats=True)
+        current_feats = self.features[-1]
+        # print('current_feats:', current_feats.shape)
+        # feature extraction on lookup images - disable gradients to save memory
+        with torch.no_grad():
+            if self.adaptive_bins:
+                self.compute_depth_bins(min_depth_bin, max_depth_bin)
+            batch_size, num_frames, chns, height, width = lookup_images.shape
+            lookup_images = lookup_images.reshape(batch_size * num_frames, chns, height, width)
+            lookup_feats = self.feature_extraction(lookup_images,
+                                                   return_all_feats=False)
+            _, chns, height, width = lookup_feats.shape
+            lookup_feats = lookup_feats.reshape(batch_size, num_frames, chns, height, width)
+            # print('lookup_feats:', lookup_feats.shape)
+            # warp features to find cost volume
+            cost_volume, missing_mask = \
+                self.match_features(current_feats, lookup_feats, poses, K, invK)
+            confidence_mask = self.compute_confidence_mask(cost_volume.detach() *
+                                                           (1 - missing_mask.detach()))
+        # for visualisation - ignore 0s in cost volume for minimum
+        viz_cost_vol = cost_volume.clone().detach()
+        viz_cost_vol[viz_cost_vol == 0] = 100
+        mins, argmin = torch.min(viz_cost_vol, 1)
+        lowest_cost = self.indices_to_disparity(argmin)
+        # mask the cost volume based on the confidence
+        cost_volume *= confidence_mask.unsqueeze(1)
+        post_matching_feats = self.reduce_conv(torch.cat([self.features[-1], cost_volume], 1))
+        # print('post_matching_feats:', post_matching_feats.shape)
+        self.features.append(self.layer2(post_matching_feats))
+        self.features.append(self.layer3(self.features[-1]))
+        self.features.append(self.layer4(self.features[-1]))
+        return self.features, lowest_cost, confidence_mask
+    def cuda(self):
+        super().cuda()
+        self.backprojector.cuda()
+        self.projector.cuda()
+        self.is_cuda = True
+        if self.warp_depths is not None:
+            self.warp_depths = self.warp_depths.cuda()
+    def cpu(self):
+        super().cpu()
+        self.backprojector.cpu()
+        self.projector.cpu()
+        self.is_cuda = False
+        if self.warp_depths is not None:
+            self.warp_depths = self.warp_depths.cpu()
+    def to(self, device):
+        if str(device) == 'cpu':
+            self.cpu()
+        elif str(device) == 'cuda':
+            self.cuda()
+        else:
+            raise NotImplementedError
+class ResnetEncoder(nn.Module):
+    """Pytorch module for a resnet encoder
+    """
+    def __init__(self, num_layers, pretrained, num_input_images=1, **kwargs):
+        super(ResnetEncoder, self).__init__()
+        self.num_ch_enc = np.array([64, 64, 128, 256, 512])
+        resnets = {18: models.resnet18,
+                   34: models.resnet34,
+                   50: models.resnet50,
+                   101: models.resnet101,
+                   152: models.resnet152}
+        if num_layers not in resnets:
+            raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
+        if num_input_images > 1:
+            self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images)
+        else:
+            self.encoder = resnets[num_layers](pretrained)
+        if num_layers > 34:
+            self.num_ch_enc[1:] *= 4
+    def forward(self, input_image):
+        self.features = []
+        x = (input_image - 0.45) / 0.225
+        x = self.encoder.conv1(x)
+        x = self.encoder.bn1(x)
+        self.features.append(self.encoder.relu(x))
+        self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
+        self.features.append(self.encoder.layer2(self.features[-1]))
+        self.features.append(self.encoder.layer3(self.features[-1]))
+        self.features.append(self.encoder.layer4(self.features[-1]))
+        return self.features

src/networks/restoration_model.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import os
+os.environ["MKL_NUM_THREADS"] = "1"  # noqa F402
+os.environ["NUMEXPR_NUM_THREADS"] = "1"  # noqa F402
+os.environ["OMP_NUM_THREADS"] = "1"  # noqa F402
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+def make_model():
+	return MainModel()
+class DoubleConv(nn.Module):
+	def __init__(self, in_ch, out_ch):
+		super(DoubleConv, self).__init__()
+		self.conv = nn.Sequential(
+			nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False, padding_mode='reflect'),
+			nn.GroupNorm(num_channels=out_ch, num_groups=8, affine=True),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False, padding_mode='reflect'),
+			nn.GroupNorm(num_channels=out_ch, num_groups=8, affine=True),
+			nn.ReLU(inplace=True)
+		)
+	def forward(self, x):
+		x = self.conv(x)
+		return x
+class InDoubleConv(nn.Module):
+	def __init__(self, in_ch, out_ch):
+		super(InDoubleConv, self).__init__()
+		self.conv = nn.Sequential(
+			nn.Conv2d(in_ch, out_ch, 9, stride=4, padding=4, bias=False, padding_mode='reflect'),
+			nn.GroupNorm(num_channels=out_ch, num_groups=8, affine=True),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False, padding_mode='reflect'),
+			nn.GroupNorm(num_channels=out_ch, num_groups=8, affine=True),
+			nn.ReLU(inplace=True)
+		)
+	def forward(self, x):
+		x = self.conv(x)
+		return x
+class InConv(nn.Module):
+	def __init__(self, in_ch, out_ch):
+		super(InConv, self).__init__()
+		self.conv = nn.Sequential(
+			nn.Conv2d(1, 64, 7, stride = 4, padding=3,  bias=False, padding_mode='reflect'),
+			nn.GroupNorm(num_channels=64, num_groups=8, affine=True),
+			nn.ReLU(inplace=True)
+		)
+		self.convf = nn.Sequential(
+			nn.Conv2d(64, 64, 3, padding=1, bias=False, padding_mode='reflect'),
+			nn.GroupNorm(num_channels=64, num_groups=8, affine=True),
+			nn.ReLU(inplace=False)
+		)
+	def forward(self, x):
+		R = x[:, 0:1, :, :]
+		G = x[:, 1:2, :, :]
+		B = x[:, 2:3, :, :]
+		xR = torch.unsqueeze(self.conv(R), 1)
+		xG = torch.unsqueeze(self.conv(G), 1)
+		xB = torch.unsqueeze(self.conv(B), 1)
+		x = torch.cat([xR, xG, xB], 1)
+		x, _ = torch.min(x, dim=1)
+		return self.convf(x)
+class SKConv(nn.Module):
+	def __init__(self, outfeatures=64, infeatures=1, M=4, L=32):
+		super(SKConv, self).__init__()
+		self.M = M
+		self.convs = nn.ModuleList([])
+		in_conv = InConv(in_ch=infeatures, out_ch=outfeatures)
+		for i in range(M):
+			if i==0:
+				self.convs.append(in_conv)
+			else:
+				self.convs.append(nn.Sequential(
+					nn.Upsample(scale_factor=1/(2**i), mode='bilinear', align_corners=True),
+					in_conv,
+					nn.Upsample(scale_factor=2**i, mode='bilinear', align_corners=True)
+				))
+		self.fc = nn.Linear(outfeatures, L)
+		self.fcs = nn.ModuleList([])
+		for i in range(M):
+			self.fcs.append(
+			nn.Linear(L, outfeatures)
+			)
+		self.softmax = nn.Softmax(dim=1)
+	def forward(self, x):
+		for i, conv in enumerate(self.convs):
+			fea = conv(x).unsqueeze(dim=1)
+			if i == 0:
+				feas = fea
+			else:
+				feas = torch.cat([feas, fea], dim=1)
+		fea_U = torch.sum(feas, dim=1)  # fea_U:(1, 64, H, W)
+		fea_s = fea_U.mean(-1).mean(-1)  # (1, 64)
+		fea_z = self.fc(fea_s)  # (1, 32)
+		for i, fc in enumerate(self.fcs):
+			vector = fc(fea_z).unsqueeze(dim=1)
+			if i == 0:
+				attention_vectors = vector
+			else:
+				attention_vectors = torch.cat([attention_vectors, vector], dim=1)
+		attention_vectors = self.softmax(attention_vectors)  # (1, 3, 64)
+		attention_vectors = attention_vectors.unsqueeze(-1).unsqueeze(-1)  # (1, 3, 64, 1, 1)
+		fea_v = (feas * attention_vectors).sum(dim=1)  # (1, 64, H, W)
+		return fea_v
+class estimation(nn.Module):
+	def __init__(self):
+		super(estimation, self).__init__()
+		self.InConv = SKConv(outfeatures=64, infeatures=1, M=3 ,L=32)
+		self.convt_1 = DoubleConv(64, 64)
+		self.up_1 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
+		self.OutConv_1 = nn.Conv2d(64, 6, 3, padding = 1, stride=1, bias=False, padding_mode='reflect')
+		self.convt_2 = DoubleConv(64, 64)
+		self.up_2 = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
+		self.OutConv_2 = nn.Conv2d(64, 3, 3, padding = 1, stride=1, bias=False, padding_mode='reflect')
+		self.inconv_1 = InDoubleConv(3, 64)
+		self.maxpool_1 = nn.MaxPool2d(15, 7)
+		self.doubleconv_1 = DoubleConv(64, 64)
+		self.pool_1 = nn.AdaptiveAvgPool2d(1)
+		self.dense_1 = nn.Linear(64, 3, bias=False)
+		self.inconv_2 = InDoubleConv(3, 64)
+		self.maxpool_2 = nn.MaxPool2d(15, 7)
+		self.doubleconv_2 = DoubleConv(64, 64)
+		self.pool_2 = nn.AdaptiveAvgPool2d(1)
+		self.dense_2 = nn.Linear(64, 3, bias=False)
+	def forward(self, x):
+		xmin = self.InConv(x)
+		beta = self.OutConv_1(self.up_1(self.convt_1(xmin)))
+		beta = torch.sigmoid(beta) + 1e-12
+		atm = self.inconv_2(x)
+		atm = torch.mul(atm, xmin)
+		atm = self.pool_2(self.doubleconv_2(self.maxpool_2(atm)))
+		atm = atm.view(-1, 64)
+		atm = torch.sigmoid(self.dense_2(atm))
+		return beta, atm
+class JNet(torch.nn.Module):
+    def __init__(self, num=64):
+        super().__init__()
+        self.conv1 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(3, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.conv2 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(num, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.conv3 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(num, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.conv4 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(num, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.final = torch.nn.Sequential(
+            torch.nn.Conv2d(num, 3, 1, 1, 0),
+            torch.nn.Sigmoid()
+        )
+    def forward(self, data):
+        data = self.conv1(data)
+        data = self.conv2(data)
+        data = self.conv3(data)
+        data = self.conv4(data)
+        data1 = self.final(data)
+        return data1
+class TNet(torch.nn.Module):
+    def __init__(self, num=64):
+        super().__init__()
+        self.conv1 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(3, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.conv2 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(num, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.conv3 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(num, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.conv4 = torch.nn.Sequential(
+            torch.nn.ReflectionPad2d(1),
+            torch.nn.Conv2d(num, num, 3, 1, 0),
+            torch.nn.InstanceNorm2d(num),
+            torch.nn.ReLU()
+        )
+        self.final = torch.nn.Sequential(
+            torch.nn.Conv2d(num, 6, 1, 1, 0),
+            torch.nn.Sigmoid()
+        )
+    def forward(self, data):
+        data = self.conv1(data)
+        data = self.conv2(data)
+        data = self.conv3(data)
+        data = self.conv4(data)
+        data1 = self.final(data)
+        return data1
+class MainModel(nn.Module):
+	def __init__(self):
+		super().__init__()
+		self.estimation = estimation()
+		self.Jnet = JNet()
+		# self.unet_J = UNet(n_channels=3, n_classes=3, bilinear=True)
+		# self.Tnet = TNet()
+	def forward(self, img):
+		beta, A = self.estimation(img)
+		beta_d = beta[:, :3, :, :]
+		beta_b = beta[:, 3:, :, :]
+		J = self.Jnet(img)
+		A = torch.unsqueeze(torch.unsqueeze(A, 2), 2)
+		A = A.expand_as(J)
+		return [beta_d, beta_b], J, A
+def weights_init(m):
+	classname = m.__class__.__name__
+	if classname.find('Conv2d') != -1:
+		m.weight.data.normal_(0.0, 0.001)
+	if classname.find('Linear') != -1:
+		m.weight.data.normal_(0.0, 0.001)

src/weights/depth.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:185fdc2788a039352584f942fbd7c47e70eb32472ee92770e4eb90c9ee8f3cd7
+size 12621521

src/weights/encoder.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1c1df619da0aed60d1bb68b4a0012b0d6f541836f64ffe6a10aa098ef4c0732
+size 76780611

src/weights/pose.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31360cc629502594dd329c756ad71b2ce6e2c42ae580b52bf07f399b3d9a2322
+size 5260687

src/weights/pose_encoder.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c6b0a44f764c42ea39d272abf6a072ba13430da7efea43dfdbd6a2e73a0562e
+size 46875213

src/weights/uie_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b13897691126aa5aa806aa95dc1599186546e5c585c26fad9c30e60d6b2d7a5f
+size 2300389

test_simple.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# Copyright Niantic 2021. Patent Pending. All rights reserved.
+#
+# This software is licensed under the terms of the ManyDepth licence
+# which allows for non-commercial use only, the full terms of which are made
+# available in the LICENSE file.
+import os
+import time
+import json
+import argparse
+import numpy as np
+from PIL import Image
+import matplotlib as mpl
+import matplotlib.cm as cm
+import torch
+from torch import Tensor
+import torchvision
+from torchvision import transforms
+import torch.nn.functional as F
+from src.networks import *
+from utils import transformation_from_parameters, disp_to_depth, line
+def load_and_preprocess_image(image, resize_width, resize_height):
+    image_ori = image.convert('RGB')
+    W, H = image_ori.size
+    W_resized = W - W % 32
+    H_resized = H - H % 32
+    img_ori_npy = np.array(image_ori)[0:H_resized, 0:W_resized]
+    image = image_ori.resize((resize_width, resize_height), Image.Resampling.LANCZOS)
+    image = transforms.ToTensor()(image)
+    image_ori = transforms.ToTensor()(img_ori_npy).unsqueeze(0)
+    image = line(image).unsqueeze(0)
+    if torch.cuda.is_available():
+        return image_ori.cuda(), image.cuda(), (H, W)
+    return image_ori, image, (H, W)
+def load_and_preprocess_intrinsics(intrinsics_path, resize_width, resize_height):
+    K = np.eye(4)
+    with open(intrinsics_path, 'r') as f:
+        K[:3, :3] = np.array(json.load(f))
+    # Convert normalised intrinsics to 1/4 size unnormalised intrinsics.
+    # (The cost volume construction expects the intrinsics corresponding to 1/4 size images)
+    K[0, :] *= resize_width // 4
+    K[1, :] *= resize_height // 4
+    invK = torch.Tensor(np.linalg.pinv(K)).unsqueeze(0)
+    K = torch.Tensor(K).unsqueeze(0)
+    if torch.cuda.is_available():
+        return K.cuda(), invK.cuda()
+    return K, invK
+def tensor2img(img: Tensor) -> np.ndarray:
+    return (255.0 * img.permute(1, 2, 0).cpu().detach().numpy()).astype(np.uint8)
+def test_simple(image: Image):
+    """Function to predict for a single image or folder of images
+    """
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    # Loading pretrained model
+    encoder_dict = torch.load("src/weights/encoder.pth", map_location=device)
+    encoder = ResnetEncoderMatching(18, False,
+                                    input_width=encoder_dict['width'],
+                                    input_height=encoder_dict['height'],
+                                    adaptive_bins=True,
+                                    min_depth_bin=encoder_dict['min_depth_bin'],
+                                    max_depth_bin=encoder_dict['max_depth_bin'],
+                                    depth_binning='linear',
+                                    num_depth_bins=96)
+    filtered_dict_enc = {k: v for k, v in encoder_dict.items() if k in encoder.state_dict()}
+    encoder.load_state_dict(filtered_dict_enc)
+    depth_decoder = DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4))
+    loaded_dict = torch.load("src/weights/depth.pth", map_location=device)
+    depth_decoder.load_state_dict(loaded_dict)
+    pose_enc_dict = torch.load("src/weights/pose_encoder.pth", map_location=device)
+    pose_dec_dict = torch.load("src/weights/pose.pth", map_location=device)
+    pose_enc = ResnetEncoder(18, False, num_input_images=2)
+    pose_dec = PoseDecoder(pose_enc.num_ch_enc,
+                            num_input_features=1,
+                            num_frames_to_predict_for=2)
+    pose_enc.load_state_dict(pose_enc_dict, strict=True)
+    pose_dec.load_state_dict(pose_dec_dict, strict=True)
+    restoration_dict = torch.load("src/weights/uie_model.pth", map_location=device)
+    uie_model = MainModel()
+    uie_model.load_state_dict(restoration_dict, strict=False)
+    # Setting states of networks
+    encoder.eval()
+    depth_decoder.eval()
+    pose_enc.eval()
+    pose_dec.eval()
+    uie_model.eval()
+    if torch.cuda.is_available():
+        encoder.cuda()
+        depth_decoder.cuda()
+        pose_enc.cuda()
+        pose_dec.cuda()
+        uie_model.cuda()
+    # Load input data
+    input_image_ori, input_image, original_size = load_and_preprocess_image(image,
+                                                    resize_width=encoder_dict['width'],
+                                                    resize_height=encoder_dict['height'])
+    source_image_ori, source_image, _ = load_and_preprocess_image(image,
+                                            resize_width=encoder_dict['width'],
+                                            resize_height=encoder_dict['height'])
+    K, invK = load_and_preprocess_intrinsics('canyons_intrinsics.json',
+                                             resize_width=encoder_dict['width'],
+                                             resize_height=encoder_dict['height'])
+    with torch.no_grad():
+        # Estimate poses
+        pose_inputs = [source_image, input_image]
+        pose_inputs = [pose_enc(torch.cat(pose_inputs, 1))]
+        axisangle, translation = pose_dec(pose_inputs)
+        pose = transformation_from_parameters(axisangle[:, 0], translation[:, 0], invert=True)
+        pose *= 0  # zero poses are a signal to the encoder not to construct a cost volume
+        source_image *= 0
+        # Estimate depth
+        output, lowest_cost, _ = encoder(current_image=input_image,
+                                         lookup_images=source_image.unsqueeze(1),
+                                         poses=pose.unsqueeze(1),
+                                         K=K,
+                                         invK=invK,
+                                         min_depth_bin=encoder_dict['min_depth_bin'],
+                                         max_depth_bin=encoder_dict['max_depth_bin'])
+        output = depth_decoder(output)
+        sigmoid_output = output[("disp", 0)]
+        _, depth_output = disp_to_depth(sigmoid_output, min_depth=0.1, max_depth=20)
+        sigmoid_output_resized = F.interpolate(
+            sigmoid_output, original_size, mode="bilinear", align_corners=False)
+        sigmoid_output_resized = sigmoid_output_resized.cpu().numpy()[:, 0]
+        depth = F.interpolate(
+            depth_output, input_image_ori.shape[2:], mode="bilinear", align_corners=False)
+        beta, J, A = uie_model(input_image_ori)
+        beta[0] = 5.0 * beta[0]
+        beta[1] = 5.0 * beta[1]
+        t1 = torch.exp(-beta[0] * depth)
+        D1 = J * t1
+        B1 = (1 - torch.exp(-beta[1] * depth)) * A
+        I_rec = D1 + B1
+        J_out = Image.open(tensor2img(J[0]))
+        return J_out

utils.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright Niantic 2021. Patent Pending. All rights reserved.
+#
+# This software is licensed under the terms of the ManyDepth licence
+# which allows for non-commercial use only, the full terms of which are made
+# available in the LICENSE file.
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def disp_to_depth(disp, min_depth=0.1, max_depth=100):
+    """Convert network's sigmoid output into depth prediction
+    The formula for this conversion is given in the 'additional considerations'
+    section of the paper.
+    """
+    min_disp = 1 / max_depth  # 0.05
+    max_disp = 1 / min_depth  # 10
+    scaled_disp = min_disp + (max_disp - min_disp) * disp
+    depth = 1 / scaled_disp
+    return scaled_disp, depth
+def transformation_from_parameters(axisangle, translation, invert=False):
+    """Convert the network's (axisangle, translation) output into a 4x4 matrix
+    """
+    R = rot_from_axisangle(axisangle)
+    t = translation.clone()
+    if invert:
+        R = R.transpose(1, 2)
+        t *= -1
+    T = get_translation_matrix(t)
+    if invert:
+        M = torch.matmul(R, T)
+    else:
+        M = torch.matmul(T, R)
+    return M
+def get_translation_matrix(translation_vector):
+    """Convert a translation vector into a 4x4 transformation matrix
+    """
+    T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device)
+    t = translation_vector.contiguous().view(-1, 3, 1)
+    T[:, 0, 0] = 1
+    T[:, 1, 1] = 1
+    T[:, 2, 2] = 1
+    T[:, 3, 3] = 1
+    T[:, :3, 3, None] = t
+    return T
+def rot_from_axisangle(vec):
+    """Convert an axisangle rotation into a 4x4 transformation matrix
+    (adapted from https://github.com/Wallacoloo/printipi)
+    Input 'vec' has to be Bx1x3
+    """
+    angle = torch.norm(vec, 2, 2, True)
+    axis = vec / (angle + 1e-7)
+    ca = torch.cos(angle)
+    sa = torch.sin(angle)
+    C = 1 - ca
+    x = axis[..., 0].unsqueeze(1)
+    y = axis[..., 1].unsqueeze(1)
+    z = axis[..., 2].unsqueeze(1)
+    xs = x * sa
+    ys = y * sa
+    zs = z * sa
+    xC = x * C
+    yC = y * C
+    zC = z * C
+    xyC = x * yC
+    yzC = y * zC
+    zxC = z * xC
+    rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device)
+    rot[:, 0, 0] = torch.squeeze(x * xC + ca)
+    rot[:, 0, 1] = torch.squeeze(xyC - zs)
+    rot[:, 0, 2] = torch.squeeze(zxC + ys)
+    rot[:, 1, 0] = torch.squeeze(xyC + zs)
+    rot[:, 1, 1] = torch.squeeze(y * yC + ca)
+    rot[:, 1, 2] = torch.squeeze(yzC - xs)
+    rot[:, 2, 0] = torch.squeeze(zxC - ys)
+    rot[:, 2, 1] = torch.squeeze(yzC + xs)
+    rot[:, 2, 2] = torch.squeeze(z * zC + ca)
+    rot[:, 3, 3] = 1
+    return rot
+def normalize(img):
+    return (img - img.min()) / (img.max() - img.min())
+def line(img):
+    img = img.unsqueeze(0)
+    if img.shape[1] == 1:
+        q5, q95 = torch.quantile(img.flatten(), q=torch.tensor((0.05, 0.95), device=img.device))
+        img[img < q5] = q5
+        img[img > q95] = q95
+        return normalize(img)
+    elif img.shape[1] == 3:
+        for c in range(3):
+            img[:, c:c+1] = line(img[:, c:c+1])
+        return img.squeeze()