geowizard

Runtime error

App Files Files Community

lemonaddie commited on Mar 23

Commit

2e23827

•

1 Parent(s): 87f795e

Upload 11 files

Browse files

Files changed (11) hide show

utils/batch_size.py +63 -0
utils/colormap.py +45 -0
utils/common.py +42 -0
utils/dataset_configuration.py +81 -0
utils/de_normalized.py +33 -0
utils/depth2normal.py +186 -0
utils/depth_ensemble.py +115 -0
utils/image_util.py +83 -0
utils/normal_ensemble.py +22 -0
utils/seed_all.py +33 -0
utils/surface_normal.py +213 -0

utils/batch_size.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import torch
+import math
+# Search table for suggested max. inference batch size
+bs_search_table = [
+    # tested on A100-PCIE-80GB
+    {"res": 768, "total_vram": 79, "bs": 35, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 79, "bs": 20, "dtype": torch.float32},
+    # tested on A100-PCIE-40GB
+    {"res": 768, "total_vram": 39, "bs": 15, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 39, "bs": 8, "dtype": torch.float32},
+    {"res": 768, "total_vram": 39, "bs": 30, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 39, "bs": 15, "dtype": torch.float16},
+    # tested on RTX3090, RTX4090
+    {"res": 512, "total_vram": 23, "bs": 20, "dtype": torch.float32},
+    {"res": 768, "total_vram": 23, "bs": 7, "dtype": torch.float32},
+    {"res": 1024, "total_vram": 23, "bs": 3, "dtype": torch.float32},
+    {"res": 512, "total_vram": 23, "bs": 40, "dtype": torch.float16},
+    {"res": 768, "total_vram": 23, "bs": 18, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 23, "bs": 10, "dtype": torch.float16},
+    # tested on GTX1080Ti
+    {"res": 512, "total_vram": 10, "bs": 5, "dtype": torch.float32},
+    {"res": 768, "total_vram": 10, "bs": 2, "dtype": torch.float32},
+    {"res": 512, "total_vram": 10, "bs": 10, "dtype": torch.float16},
+    {"res": 768, "total_vram": 10, "bs": 5, "dtype": torch.float16},
+    {"res": 1024, "total_vram": 10, "bs": 3, "dtype": torch.float16},
+]
+def find_batch_size(ensemble_size: int, input_res: int, dtype: torch.dtype) -> int:
+    """
+    Automatically search for suitable operating batch size.
+    Args:
+        ensemble_size (`int`):
+            Number of predictions to be ensembled.
+        input_res (`int`):
+            Operating resolution of the input image.
+    Returns:
+        `int`: Operating batch size.
+    """
+    if not torch.cuda.is_available():
+        return 1
+    total_vram = torch.cuda.mem_get_info()[1] / 1024.0**3
+    filtered_bs_search_table = [s for s in bs_search_table if s["dtype"] == dtype]
+    for settings in sorted(
+        filtered_bs_search_table,
+        key=lambda k: (k["res"], -k["total_vram"]),
+    ):
+        if input_res <= settings["res"] and total_vram >= settings["total_vram"]:
+            bs = settings["bs"]
+            if bs > ensemble_size:
+                bs = ensemble_size
+            elif bs > math.ceil(ensemble_size / 2) and bs < ensemble_size:
+                bs = math.ceil(ensemble_size / 2)
+            return bs
+    return 1

utils/colormap.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import numpy as np
+import cv2
+def kitti_colormap(disparity, maxval=-1):
+	"""
+	A utility function to reproduce KITTI fake colormap
+	Arguments:
+	  - disparity: numpy float32 array of dimension HxW
+	  - maxval: maximum disparity value for normalization (if equal to -1, the maximum value in disparity will be used)
+	Returns a numpy uint8 array of shape HxWx3.
+	"""
+	if maxval < 0:
+		maxval = np.max(disparity)
+	colormap = np.asarray([[0,0,0,114],[0,0,1,185],[1,0,0,114],[1,0,1,174],[0,1,0,114],[0,1,1,185],[1,1,0,114],[1,1,1,0]])
+	weights = np.asarray([8.771929824561404,5.405405405405405,8.771929824561404,5.747126436781609,8.771929824561404,5.405405405405405,8.771929824561404,0])
+	cumsum = np.asarray([0,0.114,0.299,0.413,0.587,0.701,0.8859999999999999,0.9999999999999999])
+	colored_disp = np.zeros([disparity.shape[0], disparity.shape[1], 3])
+	values = np.expand_dims(np.minimum(np.maximum(disparity/maxval, 0.), 1.), -1)
+	bins = np.repeat(np.repeat(np.expand_dims(np.expand_dims(cumsum,axis=0),axis=0), disparity.shape[1], axis=1), disparity.shape[0], axis=0)
+	diffs = np.where((np.repeat(values, 8, axis=-1) - bins) > 0, -1000, (np.repeat(values, 8, axis=-1) - bins))
+	index = np.argmax(diffs, axis=-1)-1
+	w = 1-(values[:,:,0]-cumsum[index])*np.asarray(weights)[index]
+	colored_disp[:,:,2] = (w*colormap[index][:,:,0] + (1.-w)*colormap[index+1][:,:,0])
+	colored_disp[:,:,1] = (w*colormap[index][:,:,1] + (1.-w)*colormap[index+1][:,:,1])
+	colored_disp[:,:,0] = (w*colormap[index][:,:,2] + (1.-w)*colormap[index+1][:,:,2])
+	return (colored_disp*np.expand_dims((disparity>0),-1)*255).astype(np.uint8)
+def read_16bit_gt(path):
+	"""
+	A utility function to read KITTI 16bit gt
+	Arguments:
+	  - path: filepath
+	Returns a numpy float32 array of shape HxW.
+	"""
+	gt = cv2.imread(path,-1).astype(np.float32)/256.
+	return gt

utils/common.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import json
+import yaml
+import logging
+import os
+import numpy as np
+import sys
+def load_loss_scheme(loss_config):
+    with open(loss_config, 'r') as f:
+        loss_json = yaml.safe_load(f)
+    return loss_json
+DEBUG =0
+logger = logging.getLogger()
+if DEBUG:
+    #coloredlogs.install(level='DEBUG')
+    logger.setLevel(logging.DEBUG)
+else:
+    #coloredlogs.install(level='INFO')
+    logger.setLevel(logging.INFO)
+strhdlr = logging.StreamHandler()
+logger.addHandler(strhdlr)
+formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s %(message)s')
+strhdlr.setFormatter(formatter)
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+def check_path(path):
+    if not os.path.exists(path):
+        os.makedirs(path, exist_ok=True)

utils/dataset_configuration.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import sys
+sys.path.append("..")
+from dataloader.mix_loader import MixDataset
+from torch.utils.data import DataLoader
+from dataloader import transforms
+import os
+# Get Dataset Here
+def prepare_dataset(data_dir=None,
+                    batch_size=1,
+                    test_batch=1,
+                    datathread=4,
+                    logger=None):
+    # set the config parameters
+    dataset_config_dict = dict()
+    train_dataset = MixDataset(data_dir=data_dir)
+    img_height, img_width = train_dataset.get_img_size()
+    datathread = datathread
+    if os.environ.get('datathread') is not None:
+        datathread = int(os.environ.get('datathread'))
+    if logger is not None:
+        logger.info("Use %d processes to load data..." % datathread)
+    train_loader = DataLoader(train_dataset, batch_size = batch_size, \
+                            shuffle = True, num_workers = datathread, \
+                            pin_memory = True)
+    num_batches_per_epoch = len(train_loader)
+    dataset_config_dict['num_batches_per_epoch'] = num_batches_per_epoch
+    dataset_config_dict['img_size'] = (img_height,img_width)
+    return train_loader, dataset_config_dict
+def depth_scale_shift_normalization(depth):
+    bsz = depth.shape[0]
+    depth_ = depth[:,0,:,:].reshape(bsz,-1).cpu().numpy()
+    min_value = torch.from_numpy(np.percentile(a=depth_,q=2,axis=1)).to(depth)[...,None,None,None]
+    max_value = torch.from_numpy(np.percentile(a=depth_,q=98,axis=1)).to(depth)[...,None,None,None]
+    normalized_depth = ((depth - min_value)/(max_value-min_value+1e-5) - 0.5) * 2
+    normalized_depth = torch.clip(normalized_depth, -1., 1.)
+    return normalized_depth
+def resize_max_res_tensor(input_tensor, mode, recom_resolution=768):
+    assert input_tensor.shape[1]==3
+    original_H, original_W = input_tensor.shape[2:]
+    downscale_factor = min(recom_resolution/original_H, recom_resolution/original_W)
+    if mode == 'normal':
+        resized_input_tensor = F.interpolate(input_tensor,
+                                            scale_factor=downscale_factor,
+                                            mode='nearest')
+    else:
+        resized_input_tensor = F.interpolate(input_tensor,
+                                            scale_factor=downscale_factor,
+                                            mode='bilinear',
+                                            align_corners=False)
+    if mode == 'depth':
+        return resized_input_tensor / downscale_factor
+    else:
+        return resized_input_tensor

utils/de_normalized.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import numpy as np
+from scipy.optimize import least_squares
+import torch
+def align_scale_shift(pred, target, clip_max):
+    mask = (target > 0) & (target < clip_max)
+    if mask.sum() > 10:
+        target_mask = target[mask]
+        pred_mask = pred[mask]
+        scale, shift = np.polyfit(pred_mask, target_mask, deg=1)
+        return scale, shift
+    else:
+        return 1, 0
+def align_scale(pred: torch.tensor, target: torch.tensor):
+    mask = target > 0
+    if torch.sum(mask) > 10:
+        scale = torch.median(target[mask]) / (torch.median(pred[mask]) + 1e-8)
+    else:
+        scale = 1
+    pred_scale = pred * scale
+    return pred_scale, scale
+def align_shift(pred: torch.tensor, target: torch.tensor):
+    mask = target > 0
+    if torch.sum(mask) > 10:
+        shift = torch.median(target[mask]) - (torch.median(pred[mask]) + 1e-8)
+    else:
+        shift = 0
+    pred_shift = pred + shift
+    return pred_shift, shift

utils/depth2normal.py ADDED Viewed

	@@ -0,0 +1,186 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import pickle
+import os
+import h5py
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+import glob
+def init_image_coor(height, width):
+    x_row = np.arange(0, width)
+    x = np.tile(x_row, (height, 1))
+    x = x[np.newaxis, :, :]
+    x = x.astype(np.float32)
+    x = torch.from_numpy(x.copy()).cuda()
+    u_u0 = x - width/2.0
+    y_col = np.arange(0, height)  # y_col = np.arange(0, height)
+    y = np.tile(y_col, (width, 1)).T
+    y = y[np.newaxis, :, :]
+    y = y.astype(np.float32)
+    y = torch.from_numpy(y.copy()).cuda()
+    v_v0 = y - height/2.0
+    return u_u0, v_v0
+def depth_to_xyz(depth, focal_length):
+    b, c, h, w = depth.shape
+    u_u0, v_v0 = init_image_coor(h, w)
+    x = u_u0 * depth / focal_length[0]
+    y = v_v0 * depth / focal_length[1]
+    z = depth
+    pw = torch.cat([x, y, z], 1).permute(0, 2, 3, 1) # [b, h, w, c]
+    return pw
+def get_surface_normal(xyz, patch_size=5):
+    # xyz: [1, h, w, 3]
+    x, y, z = torch.unbind(xyz, dim=3)
+    x = torch.unsqueeze(x, 0)
+    y = torch.unsqueeze(y, 0)
+    z = torch.unsqueeze(z, 0)
+    xx = x * x
+    yy = y * y
+    zz = z * z
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    patch_weight = torch.ones((1, 1, patch_size, patch_size), requires_grad=False).cuda()
+    xx_patch = nn.functional.conv2d(xx, weight=patch_weight, padding=int(patch_size / 2))
+    yy_patch = nn.functional.conv2d(yy, weight=patch_weight, padding=int(patch_size / 2))
+    zz_patch = nn.functional.conv2d(zz, weight=patch_weight, padding=int(patch_size / 2))
+    xy_patch = nn.functional.conv2d(xy, weight=patch_weight, padding=int(patch_size / 2))
+    xz_patch = nn.functional.conv2d(xz, weight=patch_weight, padding=int(patch_size / 2))
+    yz_patch = nn.functional.conv2d(yz, weight=patch_weight, padding=int(patch_size / 2))
+    ATA = torch.stack([xx_patch, xy_patch, xz_patch, xy_patch, yy_patch, yz_patch, xz_patch, yz_patch, zz_patch],
+                      dim=4)
+    ATA = torch.squeeze(ATA)
+    ATA = torch.reshape(ATA, (ATA.size(0), ATA.size(1), 3, 3))
+    eps_identity = 1e-6 * torch.eye(3, device=ATA.device, dtype=ATA.dtype)[None, None, :, :].repeat([ATA.size(0), ATA.size(1), 1, 1])
+    ATA = ATA + eps_identity
+    x_patch = nn.functional.conv2d(x, weight=patch_weight, padding=int(patch_size / 2))
+    y_patch = nn.functional.conv2d(y, weight=patch_weight, padding=int(patch_size / 2))
+    z_patch = nn.functional.conv2d(z, weight=patch_weight, padding=int(patch_size / 2))
+    AT1 = torch.stack([x_patch, y_patch, z_patch], dim=4)
+    AT1 = torch.squeeze(AT1)
+    AT1 = torch.unsqueeze(AT1, 3)
+    patch_num = 4
+    patch_x = int(AT1.size(1) / patch_num)
+    patch_y = int(AT1.size(0) / patch_num)
+    n_img = torch.randn(AT1.shape).cuda()
+    overlap = patch_size // 2 + 1
+    for x in range(int(patch_num)):
+        for y in range(int(patch_num)):
+            left_flg = 0 if x == 0 else 1
+            right_flg = 0 if x == patch_num -1 else 1
+            top_flg = 0 if y == 0 else 1
+            btm_flg = 0 if y == patch_num - 1 else 1
+            at1 = AT1[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
+                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
+            ata = ATA[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
+                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
+            # n_img_tmp, _ = torch.solve(at1, ata)
+            n_img_tmp = torch.linalg.solve(ata, at1)
+            n_img_tmp_select = n_img_tmp[top_flg * overlap:patch_y + top_flg * overlap, left_flg * overlap:patch_x + left_flg * overlap, :, :]
+            n_img[y * patch_y:y * patch_y + patch_y, x * patch_x:x * patch_x + patch_x, :, :] = n_img_tmp_select
+    n_img_L2 = torch.sqrt(torch.sum(n_img ** 2, dim=2, keepdim=True))
+    n_img_norm = n_img / n_img_L2
+    # re-orient normals consistently
+    orient_mask = torch.sum(torch.squeeze(n_img_norm) * torch.squeeze(xyz), dim=2) > 0
+    n_img_norm[orient_mask] *= -1
+    return n_img_norm
+def get_surface_normalv2(xyz, patch_size=5):
+    """
+    xyz: xyz coordinates
+    patch: [p1, p2, p3,
+            p4, p5, p6,
+            p7, p8, p9]
+    surface_normal = [(p9-p1) x (p3-p7)] + [(p6-p4) - (p8-p2)]
+    return: normal [h, w, 3, b]
+    """
+    b, h, w, c = xyz.shape
+    half_patch = patch_size // 2
+    xyz_pad = torch.zeros((b, h + patch_size - 1, w + patch_size - 1, c), dtype=xyz.dtype, device=xyz.device)
+    xyz_pad[:, half_patch:-half_patch, half_patch:-half_patch, :] = xyz
+    # xyz_left_top = xyz_pad[:, :h, :w, :]  # p1
+    # xyz_right_bottom = xyz_pad[:, -h:, -w:, :]# p9
+    # xyz_left_bottom = xyz_pad[:, -h:, :w, :]   # p7
+    # xyz_right_top = xyz_pad[:, :h, -w:, :]  # p3
+    # xyz_cross1 = xyz_left_top - xyz_right_bottom  # p1p9
+    # xyz_cross2 = xyz_left_bottom - xyz_right_top  # p7p3
+    xyz_left = xyz_pad[:, half_patch:half_patch + h, :w, :]  # p4
+    xyz_right = xyz_pad[:, half_patch:half_patch + h, -w:, :]  # p6
+    xyz_top = xyz_pad[:, :h, half_patch:half_patch + w, :]  # p2
+    xyz_bottom = xyz_pad[:, -h:, half_patch:half_patch + w, :]  # p8
+    xyz_horizon = xyz_left - xyz_right  # p4p6
+    xyz_vertical = xyz_top - xyz_bottom  # p2p8
+    xyz_left_in = xyz_pad[:, half_patch:half_patch + h, 1:w+1, :]  # p4
+    xyz_right_in = xyz_pad[:, half_patch:half_patch + h, patch_size-1:patch_size-1+w, :]  # p6
+    xyz_top_in = xyz_pad[:, 1:h+1, half_patch:half_patch + w, :]  # p2
+    xyz_bottom_in = xyz_pad[:, patch_size-1:patch_size-1+h, half_patch:half_patch + w, :]  # p8
+    xyz_horizon_in = xyz_left_in - xyz_right_in  # p4p6
+    xyz_vertical_in = xyz_top_in - xyz_bottom_in  # p2p8
+    n_img_1 = torch.cross(xyz_horizon_in, xyz_vertical_in, dim=3)
+    n_img_2 = torch.cross(xyz_horizon, xyz_vertical, dim=3)
+    # re-orient normals consistently
+    orient_mask = torch.sum(n_img_1 * xyz, dim=3) > 0
+    n_img_1[orient_mask] *= -1
+    orient_mask = torch.sum(n_img_2 * xyz, dim=3) > 0
+    n_img_2[orient_mask] *= -1
+    n_img1_L2 = torch.sqrt(torch.sum(n_img_1 ** 2, dim=3, keepdim=True))
+    n_img1_norm = n_img_1 / (n_img1_L2 + 1e-8)
+    n_img2_L2 = torch.sqrt(torch.sum(n_img_2 ** 2, dim=3, keepdim=True))
+    n_img2_norm = n_img_2 / (n_img2_L2 + 1e-8)
+    # average 2 norms
+    n_img_aver = n_img1_norm + n_img2_norm
+    n_img_aver_L2 = torch.sqrt(torch.sum(n_img_aver ** 2, dim=3, keepdim=True))
+    n_img_aver_norm = n_img_aver / (n_img_aver_L2 + 1e-8)
+    # re-orient normals consistently
+    orient_mask = torch.sum(n_img_aver_norm * xyz, dim=3) > 0
+    n_img_aver_norm[orient_mask] *= -1
+    n_img_aver_norm_out = n_img_aver_norm.permute((1, 2, 3, 0))  # [h, w, c, b]
+    # a = torch.sum(n_img1_norm_out*n_img2_norm_out, dim=2).cpu().numpy().squeeze()
+    # plt.imshow(np.abs(a), cmap='rainbow')
+    # plt.show()
+    return n_img_aver_norm_out#n_img1_norm.permute((1, 2, 3, 0))
+def surface_normal_from_depth(depth, focal_length, valid_mask=None):
+    # para depth: depth map, [b, c, h, w]
+    b, c, h, w = depth.shape
+    focal_length = focal_length[:, None, None, None]
+    depth_filter = nn.functional.avg_pool2d(depth, kernel_size=3, stride=1, padding=1)
+    #depth_filter = nn.functional.avg_pool2d(depth_filter, kernel_size=3, stride=1, padding=1)
+    xyz = depth_to_xyz(depth_filter, focal_length)
+    sn_batch = []
+    for i in range(b):
+        xyz_i = xyz[i, :][None, :, :, :]
+        #normal = get_surface_normalv2(xyz_i)
+        normal = get_surface_normal(xyz_i)
+        sn_batch.append(normal)
+    sn_batch = torch.cat(sn_batch, dim=3).permute((3, 2, 0, 1))  # [b, c, h, w]
+    if valid_mask != None:
+        mask_invalid = (~valid_mask).repeat(1, 3, 1, 1)
+        sn_batch[mask_invalid] = 0.0
+    return sn_batch

utils/depth_ensemble.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import numpy as np
+import torch
+from scipy.optimize import minimize
+def inter_distances(tensors: torch.Tensor):
+    """
+    To calculate the distance between each two depth maps.
+    """
+    distances = []
+    for i, j in torch.combinations(torch.arange(tensors.shape[0])):
+        arr1 = tensors[i : i + 1]
+        arr2 = tensors[j : j + 1]
+        distances.append(arr1 - arr2)
+    dist = torch.concat(distances, dim=0)
+    return dist
+def ensemble_depths(input_images:torch.Tensor,
+                    regularizer_strength: float =0.02,
+                    max_iter: int =2,
+                    tol:float =1e-3,
+                    reduction: str='median',
+                    max_res: int=None):
+    """
+    To ensemble multiple affine-invariant depth images (up to scale and shift),
+        by aligning estimating the scale and shift
+    """
+    device = input_images.device
+    dtype = input_images.dtype
+    np_dtype = np.float32
+    original_input = input_images.clone()
+    n_img = input_images.shape[0]
+    ori_shape = input_images.shape
+    if max_res is not None:
+        scale_factor = torch.min(max_res / torch.tensor(ori_shape[-2:]))
+        if scale_factor < 1:
+            downscaler = torch.nn.Upsample(scale_factor=scale_factor, mode="nearest")
+            input_images = downscaler(torch.from_numpy(input_images)).numpy()
+    # init guess
+    _min = np.min(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) # get the min value of each possible depth
+    _max = np.max(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) # get the max value of each possible depth
+    s_init = 1.0 / (_max - _min).reshape((-1, 1, 1)) #(10,1,1) : re-scale'f scale
+    t_init = (-1 * s_init.flatten() * _min.flatten()).reshape((-1, 1, 1)) #(10,1,1)
+    x = np.concatenate([s_init, t_init]).reshape(-1).astype(np_dtype) #(20,)
+    input_images = input_images.to(device)
+    # objective function
+    def closure(x):
+        l = len(x)
+        s = x[: int(l / 2)]
+        t = x[int(l / 2) :]
+        s = torch.from_numpy(s).to(dtype=dtype).to(device)
+        t = torch.from_numpy(t).to(dtype=dtype).to(device)
+        transformed_arrays = input_images * s.view((-1, 1, 1)) + t.view((-1, 1, 1))
+        dists = inter_distances(transformed_arrays)
+        sqrt_dist = torch.sqrt(torch.mean(dists**2))
+        if "mean" == reduction:
+            pred = torch.mean(transformed_arrays, dim=0)
+        elif "median" == reduction:
+            pred = torch.median(transformed_arrays, dim=0).values
+        else:
+            raise ValueError
+        near_err = torch.sqrt((0 - torch.min(pred)) ** 2)
+        far_err = torch.sqrt((1 - torch.max(pred)) ** 2)
+        err = sqrt_dist + (near_err + far_err) * regularizer_strength
+        err = err.detach().cpu().numpy().astype(np_dtype)
+        return err
+    res = minimize(
+        closure, x, method="BFGS", tol=tol, options={"maxiter": max_iter, "disp": False}
+    )
+    x = res.x
+    l = len(x)
+    s = x[: int(l / 2)]
+    t = x[int(l / 2) :]
+    # Prediction
+    s = torch.from_numpy(s).to(dtype=dtype).to(device)
+    t = torch.from_numpy(t).to(dtype=dtype).to(device)
+    transformed_arrays = original_input * s.view(-1, 1, 1) + t.view(-1, 1, 1) #[10,H,W]
+    if "mean" == reduction:
+        aligned_images = torch.mean(transformed_arrays, dim=0)
+        std = torch.std(transformed_arrays, dim=0)
+        uncertainty = std
+    elif "median" == reduction:
+        aligned_images = torch.median(transformed_arrays, dim=0).values
+        # MAD (median absolute deviation) as uncertainty indicator
+        abs_dev = torch.abs(transformed_arrays - aligned_images)
+        mad = torch.median(abs_dev, dim=0).values
+        uncertainty = mad
+    # Scale and shift to [0, 1]
+    _min = torch.min(aligned_images)
+    _max = torch.max(aligned_images)
+    aligned_images = (aligned_images - _min) / (_max - _min)
+    uncertainty /= _max - _min
+    return aligned_images, uncertainty

utils/image_util.py ADDED Viewed

	@@ -0,0 +1,83 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import matplotlib
+import numpy as np
+import torch
+from PIL import Image
+def resize_max_res(img: Image.Image, max_edge_resolution: int) -> Image.Image:
+    """
+    Resize image to limit maximum edge length while keeping aspect ratio.
+    Args:
+        img (`Image.Image`):
+            Image to be resized.
+        max_edge_resolution (`int`):
+            Maximum edge length (pixel).
+    Returns:
+        `Image.Image`: Resized image.
+    """
+    original_width, original_height = img.size
+    downscale_factor = min(
+        max_edge_resolution / original_width, max_edge_resolution / original_height
+    )
+    new_width = int(original_width * downscale_factor)
+    new_height = int(original_height * downscale_factor)
+    resized_img = img.resize((new_width, new_height))
+    return resized_img
+def colorize_depth_maps(
+    depth_map, min_depth, max_depth, cmap="Spectral", valid_mask=None
+):
+    """
+    Colorize depth maps.
+    """
+    assert len(depth_map.shape) >= 2, "Invalid dimension"
+    if isinstance(depth_map, torch.Tensor):
+        depth = depth_map.detach().clone().squeeze().numpy()
+    elif isinstance(depth_map, np.ndarray):
+        depth = depth_map.copy().squeeze()
+    # reshape to [ (B,) H, W ]
+    if depth.ndim < 3:
+        depth = depth[np.newaxis, :, :]
+    # colorize
+    cm = matplotlib.colormaps[cmap]
+    depth = ((depth - min_depth) / (max_depth - min_depth)).clip(0, 1)
+    img_colored_np = cm(depth, bytes=False)[:, :, :, 0:3]  # value from 0 to 1
+    img_colored_np = np.rollaxis(img_colored_np, 3, 1)
+    if valid_mask is not None:
+        if isinstance(depth_map, torch.Tensor):
+            valid_mask = valid_mask.detach().numpy()
+        valid_mask = valid_mask.squeeze()  # [H, W] or [B, H, W]
+        if valid_mask.ndim < 3:
+            valid_mask = valid_mask[np.newaxis, np.newaxis, :, :]
+        else:
+            valid_mask = valid_mask[:, np.newaxis, :, :]
+        valid_mask = np.repeat(valid_mask, 3, axis=1)
+        img_colored_np[~valid_mask] = 0
+    if isinstance(depth_map, torch.Tensor):
+        img_colored = torch.from_numpy(img_colored_np).float()
+    elif isinstance(depth_map, np.ndarray):
+        img_colored = img_colored_np
+    return img_colored
+def chw2hwc(chw):
+    assert 3 == len(chw.shape)
+    if isinstance(chw, torch.Tensor):
+        hwc = torch.permute(chw, (1, 2, 0))
+    elif isinstance(chw, np.ndarray):
+        hwc = np.moveaxis(chw, 0, -1)
+    return hwc

utils/normal_ensemble.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import numpy as np
+import torch
+def ensemble_normals(input_images:torch.Tensor):
+    normal_preds = input_images
+    bsz, d, h, w = normal_preds.shape
+    normal_preds = normal_preds / (torch.norm(normal_preds, p=2, dim=1).unsqueeze(1)+1e-5)
+    phi = torch.atan2(normal_preds[:,1,:,:], normal_preds[:,0,:,:]).mean(dim=0)
+    theta = torch.atan2(torch.norm(normal_preds[:,:2,:,:], p=2, dim=1), normal_preds[:,2,:,:]).mean(dim=0)
+    normal_pred = torch.zeros((d,h,w)).to(normal_preds)
+    normal_pred[0,:,:] = torch.sin(theta) * torch.cos(phi)
+    normal_pred[1,:,:] = torch.sin(theta) * torch.sin(phi)
+    normal_pred[2,:,:] = torch.cos(theta)
+    angle_error = torch.acos(torch.cosine_similarity(normal_pred[None], normal_preds, dim=1))
+    normal_idx = torch.argmin(angle_error.reshape(bsz,-1).sum(-1))
+    return normal_preds[normal_idx]

utils/seed_all.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --------------------------------------------------------------------------
+# If you find this code useful, we kindly ask you to cite our paper in your work.
+# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
+# More information about the method can be found at https://marigoldmonodepth.github.io
+# --------------------------------------------------------------------------
+import numpy as np
+import random
+import torch
+def seed_all(seed: int = 0):
+    """
+    Set random seeds of all components.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)

utils/surface_normal.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# A reimplemented version in public environments by Xiao Fu and Mu Hu
+import torch
+import numpy as np
+import torch.nn as nn
+def init_image_coor(height, width):
+    x_row = np.arange(0, width)
+    x = np.tile(x_row, (height, 1))
+    x = x[np.newaxis, :, :]
+    x = x.astype(np.float32)
+    x = torch.from_numpy(x.copy()).cuda()
+    u_u0 = x - width/2.0
+    y_col = np.arange(0, height)  # y_col = np.arange(0, height)
+    y = np.tile(y_col, (width, 1)).T
+    y = y[np.newaxis, :, :]
+    y = y.astype(np.float32)
+    y = torch.from_numpy(y.copy()).cuda()
+    v_v0 = y - height/2.0
+    return u_u0, v_v0
+def depth_to_xyz(depth, focal_length):
+    b, c, h, w = depth.shape
+    u_u0, v_v0 = init_image_coor(h, w)
+    x = u_u0 * depth / focal_length
+    y = v_v0 * depth / focal_length
+    z = depth
+    pw = torch.cat([x, y, z], 1).permute(0, 2, 3, 1) # [b, h, w, c]
+    return pw
+def get_surface_normal(xyz, patch_size=3):
+    # xyz: [1, h, w, 3]
+    x, y, z = torch.unbind(xyz, dim=3)
+    x = torch.unsqueeze(x, 0)
+    y = torch.unsqueeze(y, 0)
+    z = torch.unsqueeze(z, 0)
+    xx = x * x
+    yy = y * y
+    zz = z * z
+    xy = x * y
+    xz = x * z
+    yz = y * z
+    patch_weight = torch.ones((1, 1, patch_size, patch_size), requires_grad=False).cuda()
+    xx_patch = nn.functional.conv2d(xx, weight=patch_weight, padding=int(patch_size / 2))
+    yy_patch = nn.functional.conv2d(yy, weight=patch_weight, padding=int(patch_size / 2))
+    zz_patch = nn.functional.conv2d(zz, weight=patch_weight, padding=int(patch_size / 2))
+    xy_patch = nn.functional.conv2d(xy, weight=patch_weight, padding=int(patch_size / 2))
+    xz_patch = nn.functional.conv2d(xz, weight=patch_weight, padding=int(patch_size / 2))
+    yz_patch = nn.functional.conv2d(yz, weight=patch_weight, padding=int(patch_size / 2))
+    ATA = torch.stack([xx_patch, xy_patch, xz_patch, xy_patch, yy_patch, yz_patch, xz_patch, yz_patch, zz_patch],
+                      dim=4)
+    ATA = torch.squeeze(ATA)
+    ATA = torch.reshape(ATA, (ATA.size(0), ATA.size(1), 3, 3))
+    eps_identity = 1e-6 * torch.eye(3, device=ATA.device, dtype=ATA.dtype)[None, None, :, :].repeat([ATA.size(0), ATA.size(1), 1, 1])
+    ATA = ATA + eps_identity
+    x_patch = nn.functional.conv2d(x, weight=patch_weight, padding=int(patch_size / 2))
+    y_patch = nn.functional.conv2d(y, weight=patch_weight, padding=int(patch_size / 2))
+    z_patch = nn.functional.conv2d(z, weight=patch_weight, padding=int(patch_size / 2))
+    AT1 = torch.stack([x_patch, y_patch, z_patch], dim=4)
+    AT1 = torch.squeeze(AT1)
+    AT1 = torch.unsqueeze(AT1, 3)
+    patch_num = 4
+    patch_x = int(AT1.size(1) / patch_num)
+    patch_y = int(AT1.size(0) / patch_num)
+    n_img = torch.randn(AT1.shape).cuda()
+    overlap = patch_size // 2 + 1
+    for x in range(int(patch_num)):
+        for y in range(int(patch_num)):
+            left_flg = 0 if x == 0 else 1
+            right_flg = 0 if x == patch_num -1 else 1
+            top_flg = 0 if y == 0 else 1
+            btm_flg = 0 if y == patch_num - 1 else 1
+            at1 = AT1[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
+                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
+            ata = ATA[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
+                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
+            n_img_tmp, _ = torch.solve(at1, ata)
+            n_img_tmp_select = n_img_tmp[top_flg * overlap:patch_y + top_flg * overlap, left_flg * overlap:patch_x + left_flg * overlap, :, :]
+            n_img[y * patch_y:y * patch_y + patch_y, x * patch_x:x * patch_x + patch_x, :, :] = n_img_tmp_select
+    n_img_L2 = torch.sqrt(torch.sum(n_img ** 2, dim=2, keepdim=True))
+    n_img_norm = n_img / n_img_L2
+    # re-orient normals consistently
+    orient_mask = torch.sum(torch.squeeze(n_img_norm) * torch.squeeze(xyz), dim=2) > 0
+    n_img_norm[orient_mask] *= -1
+    return n_img_norm
+def get_surface_normalv2(xyz, patch_size=3):
+    """
+    xyz: xyz coordinates
+    patch: [p1, p2, p3,
+            p4, p5, p6,
+            p7, p8, p9]
+    surface_normal = [(p9-p1) x (p3-p7)] + [(p6-p4) - (p8-p2)]
+    return: normal [h, w, 3, b]
+    """
+    b, h, w, c = xyz.shape
+    half_patch = patch_size // 2
+    xyz_pad = torch.zeros((b, h + patch_size - 1, w + patch_size - 1, c), dtype=xyz.dtype, device=xyz.device)
+    xyz_pad[:, half_patch:-half_patch, half_patch:-half_patch, :] = xyz
+    # xyz_left_top = xyz_pad[:, :h, :w, :]  # p1
+    # xyz_right_bottom = xyz_pad[:, -h:, -w:, :]# p9
+    # xyz_left_bottom = xyz_pad[:, -h:, :w, :]   # p7
+    # xyz_right_top = xyz_pad[:, :h, -w:, :]  # p3
+    # xyz_cross1 = xyz_left_top - xyz_right_bottom  # p1p9
+    # xyz_cross2 = xyz_left_bottom - xyz_right_top  # p7p3
+    xyz_left = xyz_pad[:, half_patch:half_patch + h, :w, :]  # p4
+    xyz_right = xyz_pad[:, half_patch:half_patch + h, -w:, :]  # p6
+    xyz_top = xyz_pad[:, :h, half_patch:half_patch + w, :]  # p2
+    xyz_bottom = xyz_pad[:, -h:, half_patch:half_patch + w, :]  # p8
+    xyz_horizon = xyz_left - xyz_right  # p4p6
+    xyz_vertical = xyz_top - xyz_bottom  # p2p8
+    xyz_left_in = xyz_pad[:, half_patch:half_patch + h, 1:w+1, :]  # p4
+    xyz_right_in = xyz_pad[:, half_patch:half_patch + h, patch_size-1:patch_size-1+w, :]  # p6
+    xyz_top_in = xyz_pad[:, 1:h+1, half_patch:half_patch + w, :]  # p2
+    xyz_bottom_in = xyz_pad[:, patch_size-1:patch_size-1+h, half_patch:half_patch + w, :]  # p8
+    xyz_horizon_in = xyz_left_in - xyz_right_in  # p4p6
+    xyz_vertical_in = xyz_top_in - xyz_bottom_in  # p2p8
+    n_img_1 = torch.cross(xyz_horizon_in, xyz_vertical_in, dim=3)
+    n_img_2 = torch.cross(xyz_horizon, xyz_vertical, dim=3)
+    # re-orient normals consistently
+    orient_mask = torch.sum(n_img_1 * xyz, dim=3) > 0
+    n_img_1[orient_mask] *= -1
+    orient_mask = torch.sum(n_img_2 * xyz, dim=3) > 0
+    n_img_2[orient_mask] *= -1
+    n_img1_L2 = torch.sqrt(torch.sum(n_img_1 ** 2, dim=3, keepdim=True))
+    n_img1_norm = n_img_1 / (n_img1_L2 + 1e-8)
+    n_img2_L2 = torch.sqrt(torch.sum(n_img_2 ** 2, dim=3, keepdim=True))
+    n_img2_norm = n_img_2 / (n_img2_L2 + 1e-8)
+    # average 2 norms
+    n_img_aver = n_img1_norm + n_img2_norm
+    n_img_aver_L2 = torch.sqrt(torch.sum(n_img_aver ** 2, dim=3, keepdim=True))
+    n_img_aver_norm = n_img_aver / (n_img_aver_L2 + 1e-8)
+    # re-orient normals consistently
+    orient_mask = torch.sum(n_img_aver_norm * xyz, dim=3) > 0
+    n_img_aver_norm[orient_mask] *= -1
+    n_img_aver_norm_out = n_img_aver_norm.permute((1, 2, 3, 0))  # [h, w, c, b]
+    # a = torch.sum(n_img1_norm_out*n_img2_norm_out, dim=2).cpu().numpy().squeeze()
+    # plt.imshow(np.abs(a), cmap='rainbow')
+    # plt.show()
+    return n_img_aver_norm_out#n_img1_norm.permute((1, 2, 3, 0))
+def surface_normal_from_depth(depth, focal_length, valid_mask=None):
+    # para depth: depth map, [b, c, h, w]
+    b, c, h, w = depth.shape
+    focal_length = focal_length[:, None, None, None]
+    depth_filter = nn.functional.avg_pool2d(depth, kernel_size=3, stride=1, padding=1)
+    depth_filter = nn.functional.avg_pool2d(depth_filter, kernel_size=3, stride=1, padding=1)
+    xyz = depth_to_xyz(depth_filter, focal_length)
+    sn_batch = []
+    for i in range(b):
+        xyz_i = xyz[i, :][None, :, :, :]
+        normal = get_surface_normalv2(xyz_i)
+        sn_batch.append(normal)
+    sn_batch = torch.cat(sn_batch, dim=3).permute((3, 2, 0, 1))  # [b, c, h, w]
+    mask_invalid = (~valid_mask).repeat(1, 3, 1, 1)
+    sn_batch[mask_invalid] = 0.0
+    return sn_batch
+def vis_normal(normal):
+    """
+    Visualize surface normal. Transfer surface normal value from [-1, 1] to [0, 255]
+    @para normal: surface normal, [h, w, 3], numpy.array
+    """
+    n_img_L2 = np.sqrt(np.sum(normal ** 2, axis=2, keepdims=True))
+    n_img_norm = normal / (n_img_L2 + 1e-8)
+    normal_vis = n_img_norm * 127
+    normal_vis += 128
+    normal_vis = normal_vis.astype(np.uint8)
+    return normal_vis
+def vis_normal2(normals):
+    '''
+    Montage of normal maps. Vectors are unit length and backfaces thresholded.
+    '''
+    x = normals[:, :, 0] # horizontal; pos right
+    y = normals[:, :, 1] # depth; pos far
+    z = normals[:, :, 2] # vertical; pos up
+    backfacing = (z > 0)
+    norm = np.sqrt(np.sum(normals**2, axis=2))
+    zero = (norm < 1e-5)
+    x += 1.0; x *= 0.5
+    y += 1.0; y *= 0.5
+    z = np.abs(z)
+    x[zero] = 0.0
+    y[zero] = 0.0
+    z[zero] = 0.0
+    normals[:, :, 0] = x  # horizontal; pos right
+    normals[:, :, 1] = y  # depth; pos far
+    normals[:, :, 2] = z # vertical; pos up
+    return normals
+if __name__ == '__main__':
+    import cv2, os