diff --git a/.gitignore b/.gitignore index 44ba5fb4af464c337cfb7ddca5bea9544e7fc494..e058d9cb376260cdec47bbb3b45bef2b78d6dc1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,169 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + # pixi environments .pixi *.egg-info - +thirdparty/* +data/* +checkpoints/* \ No newline at end of file diff --git a/config/default.yaml b/config/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e69ebc8cc04b4855e43b6305a1eced9eda15a59 --- /dev/null +++ b/config/default.yaml @@ -0,0 +1,19 @@ +### DPVO Config File ### + +# VO config (increase for better accuracy) +PATCHES_PER_FRAME: 96 +REMOVAL_WINDOW: 22 +OPTIMIZATION_WINDOW: 10 +PATCH_LIFETIME: 13 + +# threshold for keyframe removal +KEYFRAME_THRESH: 15.0 + +# camera motion model +MOTION_MODEL: 'DAMPED_LINEAR' +MOTION_DAMPING: 0.5 + +# maybe use mixed precision for inference +MIXED_PRECISION: True + +GRADIENT_BIAS: False diff --git a/config/fast.yaml b/config/fast.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3482c4313c12acba504fbeccbfb0c3c605ec53e4 --- /dev/null +++ b/config/fast.yaml @@ -0,0 +1,20 @@ + +### DPVO Config File ### + +# VO config (increase for better accuracy) +PATCHES_PER_FRAME: 48 +REMOVAL_WINDOW: 16 +OPTIMIZATION_WINDOW: 7 +PATCH_LIFETIME: 11 + +# threshold for keyframe removal +KEYFRAME_THRESH: 15.0 + +# camera motion model +MOTION_MODEL: 'DAMPED_LINEAR' +MOTION_DAMPING: 0.5 + +# maybe use mixed precision for inference +MIXED_PRECISION: True + +GRADIENT_BIAS: False \ No newline at end of file diff --git a/mini_dpvo/__init__.py b/mini_dpvo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mini_dpvo/altcorr/__init__.py b/mini_dpvo/altcorr/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e94d30c390bcff235acfb7feee2ea3c4554e011b --- /dev/null +++ b/mini_dpvo/altcorr/__init__.py @@ -0,0 +1 @@ +from .correlation import corr, patchify \ No newline at end of file diff --git a/mini_dpvo/altcorr/correlation.cpp b/mini_dpvo/altcorr/correlation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7cd16793c8fc3381bde54a163145048e4ee0c560 --- /dev/null +++ b/mini_dpvo/altcorr/correlation.cpp @@ -0,0 +1,63 @@ +#include +#include + +// CUDA forward declarations +std::vector corr_cuda_forward( + torch::Tensor fmap1, + torch::Tensor fmap2, + torch::Tensor coords, + torch::Tensor ii, + torch::Tensor jj, + int radius); + +std::vector corr_cuda_backward( + torch::Tensor fmap1, + torch::Tensor fmap2, + torch::Tensor coords, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor corr_grad, + int radius); + +std::vector patchify_cuda_forward( + torch::Tensor net, torch::Tensor coords, int radius); + +std::vector patchify_cuda_backward( + torch::Tensor net, torch::Tensor coords, torch::Tensor gradient, int radius); + +std::vector corr_forward( + torch::Tensor fmap1, + torch::Tensor fmap2, + torch::Tensor coords, + torch::Tensor ii, + torch::Tensor jj, int radius) { + return corr_cuda_forward(fmap1, fmap2, coords, ii, jj, radius); +} + +std::vector corr_backward( + torch::Tensor fmap1, + torch::Tensor fmap2, + torch::Tensor coords, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor corr_grad, int radius) { + return corr_cuda_backward(fmap1, fmap2, coords, ii, jj, corr_grad, radius); +} + +std::vector patchify_forward( + torch::Tensor net, torch::Tensor coords, int radius) { + return patchify_cuda_forward(net, coords, radius); +} + +std::vector patchify_backward( + torch::Tensor net, torch::Tensor coords, torch::Tensor gradient, int radius) { + return patchify_cuda_backward(net, coords, gradient, radius); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &corr_forward, "CORR forward"); + m.def("backward", &corr_backward, "CORR backward"); + + m.def("patchify_forward", &patchify_forward, "PATCHIFY forward"); + m.def("patchify_backward", &patchify_backward, "PATCHIFY backward"); +} \ No newline at end of file diff --git a/mini_dpvo/altcorr/correlation.py b/mini_dpvo/altcorr/correlation.py new file mode 100644 index 0000000000000000000000000000000000000000..9b9d8d9d4b01fb603b9978c8788a717d7961b20c --- /dev/null +++ b/mini_dpvo/altcorr/correlation.py @@ -0,0 +1,74 @@ +import torch +import cuda_corr + +class CorrLayer(torch.autograd.Function): + @staticmethod + def forward(ctx, fmap1, fmap2, coords, ii, jj, radius, dropout): + """ forward correlation """ + ctx.save_for_backward(fmap1, fmap2, coords, ii, jj) + ctx.radius = radius + ctx.dropout = dropout + corr, = cuda_corr.forward(fmap1, fmap2, coords, ii, jj, radius) + + return corr + + @staticmethod + def backward(ctx, grad): + """ backward correlation """ + fmap1, fmap2, coords, ii, jj = ctx.saved_tensors + + if ctx.dropout < 1: + perm = torch.rand(len(ii), device="cuda") < ctx.dropout + coords = coords[:,perm] + grad = grad[:,perm] + ii = ii[perm] + jj = jj[perm] + + fmap1_grad, fmap2_grad = \ + cuda_corr.backward(fmap1, fmap2, coords, ii, jj, grad, ctx.radius) + + return fmap1_grad, fmap2_grad, None, None, None, None, None + + +class PatchLayer(torch.autograd.Function): + @staticmethod + def forward(ctx, net, coords, radius): + """ forward patchify """ + ctx.radius = radius + ctx.save_for_backward(net, coords) + + patches, = cuda_corr.patchify_forward(net, coords, radius) + return patches + + @staticmethod + def backward(ctx, grad): + """ backward patchify """ + net, coords = ctx.saved_tensors + grad, = cuda_corr.patchify_backward(net, coords, grad, ctx.radius) + + return grad, None, None + +def patchify(net, coords, radius, mode='bilinear'): + """ extract patches """ + + patches = PatchLayer.apply(net, coords, radius) + + if mode == 'bilinear': + offset = (coords - coords.floor()).to(net.device) + dx, dy = offset[:,:,None,None,None].unbind(dim=-1) + + d = 2 * radius + 1 + x00 = (1-dy) * (1-dx) * patches[...,:d,:d] + x01 = (1-dy) * ( dx) * patches[...,:d,1:] + x10 = ( dy) * (1-dx) * patches[...,1:,:d] + x11 = ( dy) * ( dx) * patches[...,1:,1:] + + return x00 + x01 + x10 + x11 + + return patches + + +def corr(fmap1, fmap2, coords, ii, jj, radius=1, dropout=1): + return CorrLayer.apply(fmap1, fmap2, coords, ii, jj, radius, dropout) + + diff --git a/mini_dpvo/altcorr/correlation_kernel.cu b/mini_dpvo/altcorr/correlation_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f4faa73863c071e126e62a77bffb6bcc98f8d55b --- /dev/null +++ b/mini_dpvo/altcorr/correlation_kernel.cu @@ -0,0 +1,333 @@ +#include +#include +#include +#include + +using namespace torch::indexing; + +#define THREADS 256 +#define BLOCKS(n) (n + THREADS - 1) / THREADS + +__forceinline__ __device__ +bool within_bounds(int h, int w, int H, int W) { + return h >= 0 && h < H && w >= 0 && w < W; +} + +template +__global__ void patchify_forward_kernel(int R, + const torch::PackedTensorAccessor32 net, + const torch::PackedTensorAccessor32 coords, + torch::PackedTensorAccessor32 patches) +{ + // diameter + const int D = 2*R + 2; + + const int B = coords.size(0); + const int M = coords.size(1); + const int C = net.size(1); + const int H = net.size(2); + const int W = net.size(3); + + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < B * M * D * D) { + const int ii = n % D; n /= D; + const int jj = n % D; n /= D; + const int m = n % M; n /= M; + + const float x = coords[n][m][0]; + const float y = coords[n][m][1]; + const int i = static_cast(floor(y)) + (ii - R); + const int j = static_cast(floor(x)) + (jj - R); + + if (within_bounds(i, j, H, W)) { + for (int k=0; k +__global__ void patchify_backward_kernel(int R, + const torch::PackedTensorAccessor32 patch_gradient, + const torch::PackedTensorAccessor32 coords, + torch::PackedTensorAccessor32 gradient) +{ + // diameter + const int D = 2*R + 2; + + const int B = coords.size(0); + const int M = coords.size(1); + const int C = gradient.size(1); + const int H = gradient.size(2); + const int W = gradient.size(3); + + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < B * M * D * D) { + const int ii = n % D; n /= D; + const int jj = n % D; n /= D; + const int m = n % M; n /= M; + + const float x = coords[n][m][0]; + const float y = coords[n][m][1]; + const int i = static_cast(floor(y)) + (ii - R); + const int j = static_cast(floor(x)) + (jj - R); + + if (within_bounds(i, j, H, W)) { + for (int k=0; k +__global__ void corr_forward_kernel(int R, + const torch::PackedTensorAccessor32 fmap1, + const torch::PackedTensorAccessor32 fmap2, + const torch::PackedTensorAccessor32 coords, + const torch::PackedTensorAccessor32 us, + const torch::PackedTensorAccessor32 vs, + torch::PackedTensorAccessor32 corr) +{ + // diameter + const int D = 2*R + 2; + + const int B = coords.size(0); + const int M = coords.size(1); + const int H = coords.size(3); + const int W = coords.size(4); + + const int C = fmap1.size(2); + const int H2 = fmap2.size(3); + const int W2 = fmap2.size(4); + + int n = blockIdx.x * blockDim.x + threadIdx.x; + + if (n < B * M * H * W * D * D) { + const int jj = n % D; n /= D; + const int ii = n % D; n /= D; + const int j0 = n % W; n /= W; + const int i0 = n % H; n /= H; + const int m = n % M; n /= M; + + const int ix = us[m]; + const int jx = vs[m]; + + const float x = coords[n][m][0][i0][j0]; + const float y = coords[n][m][1][i0][j0]; + + const int i1 = static_cast(floor(y)) + (ii - R); + const int j1 = static_cast(floor(x)) + (jj - R); + + scalar_t s = 0; + if (within_bounds(i1, j1, H2, W2)) { + + #pragma unroll 8 + for (int i=0; i +__global__ void corr_backward_kernel(int R, + const torch::PackedTensorAccessor32 fmap1, + const torch::PackedTensorAccessor32 fmap2, + const torch::PackedTensorAccessor32 coords, + const torch::PackedTensorAccessor32 us, + const torch::PackedTensorAccessor32 vs, + const torch::PackedTensorAccessor32 corr_grad, + torch::PackedTensorAccessor32 fmap1_grad, + torch::PackedTensorAccessor32 fmap2_grad) +{ + // diameter + const int D = 2*R + 2; + + const int B = coords.size(0); + const int M = coords.size(1); + const int H = coords.size(3); + const int W = coords.size(4); + + const int C = fmap1.size(2); + const int H2 = fmap2.size(3); + const int W2 = fmap2.size(4); + + int n = blockIdx.x * blockDim.x + threadIdx.x; + + if (n < B * M * H * W * D * D) { + const int jj = n % D; n /= D; + const int ii = n % D; n /= D; + const int j0 = n % W; n /= W; + const int i0 = n % H; n /= H; + const int m = n % M; n /= M; + + const int ix = us[m]; + const int jx = vs[m]; + + const float x = coords[n][m][0][i0][j0]; + const float y = coords[n][m][1][i0][j0]; + + const int i1 = static_cast(floor(y)) + (ii - R); + const int j1 = static_cast(floor(x)) + (jj - R); + + const scalar_t g = (scalar_t) corr_grad[n][m][ii][jj][i0][j0]; + + if (within_bounds(i1, j1, H2, W2)) { + #pragma unroll 32 + for (int i=0; i corr_cuda_forward( + torch::Tensor fmap1, + torch::Tensor fmap2, + torch::Tensor coords, + torch::Tensor ii, + torch::Tensor jj, + int radius) +{ + const int B = coords.size(0); + const int M = coords.size(1); + + const int H = coords.size(3); + const int W = coords.size(4); + const int D = 2 * radius + 2; + + auto opts = fmap1.options(); + auto corr = torch::empty({B, M, D, D, H, W}, opts); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.type(), "corr_forward_kernel", ([&] { + corr_forward_kernel<<>>(radius, + fmap1.packed_accessor32(), + fmap2.packed_accessor32(), + coords.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), + corr.packed_accessor32()); + })); + + torch::Tensor x = coords.index({Slice(), Slice(), 0, None, None}); + torch::Tensor y = coords.index({Slice(), Slice(), 1, None, None}); + torch::Tensor dx = x - x.floor(); dx = dx.to(fmap1.dtype()); + torch::Tensor dy = y - y.floor(); dy = dy.to(fmap2.dtype()); + + torch::Tensor out; + out = (1 - dx) * (1 - dy) * corr.index({Slice(), Slice(), Slice(0, D-1), Slice(0, D-1)}); + out += (dx) * (1 - dy) * corr.index({Slice(), Slice(), Slice(0, D-1), Slice(1, D-0)}); + out += (1 - dx) * (dy) * corr.index({Slice(), Slice(), Slice(1, D-0), Slice(0, D-1)}); + out += (dx) * (dy) * corr.index({Slice(), Slice(), Slice(1, D-0), Slice(1, D-0)}); + + return { out.permute({0,1,3,2,4,5}) }; +} + + +std::vector corr_cuda_backward( + torch::Tensor fmap1, + torch::Tensor fmap2, + torch::Tensor coords, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor grad, + int radius) +{ + const int B = coords.size(0); + const int M = coords.size(1); + + const int H = coords.size(3); + const int W = coords.size(4); + const int D = 2 * radius + 2; + + grad = grad.permute({0,1,3,2,4,5}).contiguous(); + torch::Tensor x = coords.index({Slice(), Slice(), 0, None, None}); + torch::Tensor y = coords.index({Slice(), Slice(), 1, None, None}); + torch::Tensor dx = x - x.floor(); + torch::Tensor dy = y - y.floor(); + + auto opts = torch::TensorOptions().dtype(torch::kFloat).device(torch::kCUDA); + torch::Tensor g1 = torch::zeros({B, M, D, D, H, W}, grad.options()); + torch::Tensor g2 = torch::zeros({B, M, D, D, H, W}, grad.options()); + torch::Tensor g3 = torch::zeros({B, M, D, D, H, W}, grad.options()); + torch::Tensor g4 = torch::zeros({B, M, D, D, H, W}, grad.options()); + + g1.index_put_({Slice(), Slice(), Slice(0, D-1), Slice(0, D-1)}, (1 - dx) * (1 - dy) * grad); + g2.index_put_({Slice(), Slice(), Slice(0, D-1), Slice(1, D-0)}, (dx) * (1 - dy) * grad); + g3.index_put_({Slice(), Slice(), Slice(1, D-0), Slice(0, D-1)}, (1 - dx) * (dy) * grad); + g4.index_put_({Slice(), Slice(), Slice(1, D-0), Slice(1, D-0)}, (dx) * (dy) * grad); + + torch::Tensor corr_grad = g1 + g2 + g3 + g4; + auto fmap1_grad = torch::zeros_like(fmap1); + auto fmap2_grad = torch::zeros_like(fmap2); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.type(), "corr_backward_kernel", ([&] { + corr_backward_kernel<<>>(radius, + fmap1.packed_accessor32(), + fmap2.packed_accessor32(), + coords.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), + corr_grad.packed_accessor32(), + fmap1_grad.packed_accessor32(), + fmap2_grad.packed_accessor32()); + })); + + return {fmap1_grad, fmap2_grad}; +} + +std::vector patchify_cuda_forward( + torch::Tensor net, torch::Tensor coords, int radius) +{ + const int B = coords.size(0); + const int M = coords.size(1); + const int C = net.size(1); + const int D = 2 * radius + 2; + + auto opts = net.options(); + auto patches = torch::zeros({B, M, C, D, D}, opts); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF(net.type(), "patchify_forward_kernel", ([&] { + patchify_forward_kernel<<>>(radius, + net.packed_accessor32(), + coords.packed_accessor32(), + patches.packed_accessor32()); + })); + + return { patches }; +} + + +std::vector patchify_cuda_backward( + torch::Tensor net, + torch::Tensor coords, + torch::Tensor gradient, + int radius) +{ + const int B = coords.size(0); + const int M = coords.size(1); + const int C = net.size(1); + const int H = net.size(2); + const int W = net.size(3); + const int D = 2 * radius + 2; + + torch::Tensor net_gradient = torch::zeros_like(net); + + AT_DISPATCH_FLOATING_TYPES_AND_HALF(net.type(), "patchify_backward_kernel", ([&] { + patchify_backward_kernel<<>>(radius, + gradient.packed_accessor32(), + coords.packed_accessor32(), + net_gradient.packed_accessor32()); + })); + + return { net_gradient }; +} \ No newline at end of file diff --git a/mini_dpvo/api/__init__.py b/mini_dpvo/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mini_dpvo/api/inference.py b/mini_dpvo/api/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..00d79ea567ab28e8a0ba89d311807a92eef8df51 --- /dev/null +++ b/mini_dpvo/api/inference.py @@ -0,0 +1,190 @@ +import numpy as np +import os +import torch +from pathlib import Path +from multiprocessing import Process, Queue +from yacs.config import CfgNode + +from mini_dpvo.utils import Timer +from mini_dpvo.dpvo import DPVO +from mini_dpvo.stream import image_stream, video_stream + +import rerun as rr +from jaxtyping import UInt8, Float64, Float32 +from scipy.spatial.transform import Rotation +from dataclasses import dataclass + +from timeit import default_timer as timer + + +@dataclass +class DPVOPrediction: + final_poses: Float32[torch.Tensor, "num_keyframes 7"] # noqa: F722 + tstamps: Float64[torch.Tensor, "num_keyframes"] # noqa: F821 + final_points: Float32[torch.Tensor, "buffer_size*num_patches 3"] # noqa: F722 + final_colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] # noqa: F722 + + +def log_trajectory( + parent_log_path: Path, + poses: Float32[torch.Tensor, "buffer_size 7"], + points: Float32[torch.Tensor, "buffer_size*num_patches 3"], + colors: UInt8[torch.Tensor, "buffer_size num_patches 3"], + intri_np: Float64[np.ndarray, "4"], + bgr_hw3: UInt8[np.ndarray, "h w 3"], +): + cam_log_path = f"{parent_log_path}/camera" + rr.log(f"{cam_log_path}/pinhole/image", rr.Image(bgr_hw3[..., ::-1])) + rr.log( + f"{cam_log_path}/pinhole", + rr.Pinhole( + height=bgr_hw3.shape[0], + width=bgr_hw3.shape[1], + focal_length=[intri_np[0], intri_np[1]], + principal_point=[intri_np[2], intri_np[3]], + ), + ) + + poses_mask = ~(poses[:, :6] == 0).all(dim=1) + points_mask = ~(points == 0).all(dim=1) + + nonzero_poses = poses[poses_mask] + nonzero_points = points[points_mask] + + last_index = nonzero_poses.shape[0] - 1 + # get last non-zero pose, and the index of the last non-zero pose + quat_pose = nonzero_poses[last_index].numpy(force=True) + trans_quat = quat_pose[:3] + rotation_quat = Rotation.from_quat(quat_pose[3:]) + + mat3x3 = rotation_quat.as_matrix() + rr.log( + f"{cam_log_path}", + rr.Transform3D(translation=trans_quat, mat3x3=mat3x3, from_parent=True), + ) + + # outlier removal + trajectory_center = np.median(nonzero_poses[:, :3].numpy(force=True), axis=0) + radii = lambda a: np.linalg.norm(a - trajectory_center, axis=1) + points_np = nonzero_points.view(-1, 3).numpy(force=True) + colors_np = colors.view(-1, 3)[points_mask].numpy(force=True) + inlier_mask = ( + radii(points_np) < radii(nonzero_poses[:, :3].numpy(force=True)).max() * 5 + ) + points_filtered = points_np[inlier_mask] + colors_filtered = colors_np[inlier_mask] + + # log all points and colors at the same time + rr.log( + f"{parent_log_path}/pointcloud", + rr.Points3D( + positions=points_filtered, + colors=colors_filtered, + ), + ) + + +def log_final( + parent_log_path: Path, + final_poses: Float32[torch.Tensor, "num_keyframes 7"], + tstamps: Float64[torch.Tensor, "num_keyframes"], # noqa: F821 + final_points: Float32[torch.Tensor, "buffer_size*num_patches 3"], + final_colors: UInt8[torch.Tensor, "buffer_size num_patches 3"], +): + for idx, (pose_quat, tstamp) in enumerate(zip(final_poses, tstamps)): + cam_log_path = f"{parent_log_path}/camera_{idx}" + trans_quat = pose_quat[:3] + R_33 = Rotation.from_quat(pose_quat[3:]).as_matrix() + rr.log( + f"{cam_log_path}", + rr.Transform3D(translation=trans_quat, mat3x3=R_33, from_parent=False), + ) + + +def create_reader( + imagedir: str, calib: str, stride: int, skip: int, queue: Queue +) -> Process: + if os.path.isdir(imagedir): + reader = Process( + target=image_stream, args=(queue, imagedir, calib, stride, skip) + ) + else: + reader = Process( + target=video_stream, args=(queue, imagedir, calib, stride, skip) + ) + + return reader + + +@torch.no_grad() +def run( + cfg: CfgNode, + network_path: str, + imagedir: str, + calib: str, + stride: int = 1, + skip: int = 0, + vis_during: bool = True, + timeit: bool = False, +) -> tuple[DPVOPrediction, float]: + slam = None + queue = Queue(maxsize=8) + reader: Process = create_reader(imagedir, calib, stride, skip, queue) + reader.start() + + if vis_during: + parent_log_path = Path("world") + rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True) + + start = timer() + + while True: + t: int + bgr_hw3: UInt8[np.ndarray, "h w 3"] + intri_np: Float64[np.ndarray, "4"] + (t, bgr_hw3, intri_np) = queue.get() + # queue will have a (-1, image, intrinsics) tuple when the reader is done + if t < 0: + break + + if vis_during: + rr.set_time_sequence(timeline="timestep", sequence=t) + + bgr_3hw: UInt8[torch.Tensor, "h w 3"] = ( + torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda() + ) + intri_torch: Float64[torch.Tensor, "4"] = torch.from_numpy(intri_np).cuda() + + if slam is None: + slam = DPVO(cfg, network_path, ht=bgr_3hw.shape[1], wd=bgr_3hw.shape[2]) + + with Timer("SLAM", enabled=timeit): + slam(t, bgr_3hw, intri_torch) + + if slam.is_initialized and vis_during: + poses: Float32[torch.Tensor, "buffer_size 7"] = slam.poses_ + points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = slam.points_ + colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_ + log_trajectory(parent_log_path, poses, points, colors, intri_np, bgr_hw3) + + for _ in range(12): + slam.update() + + total_time: float = timer() - start + print(f"Total time: {total_time:.2f}s") + + reader.join() + + final_poses: Float32[torch.Tensor, "num_keyframes 7"] + tstamps: Float64[torch.Tensor, "num_keyframes"] # noqa: F821 + + final_poses, tstamps = slam.terminate() + final_points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = slam.points_ + final_colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_ + dpvo_pred = DPVOPrediction( + final_poses=final_poses, + tstamps=tstamps, + final_points=final_points, + final_colors=final_colors, + ) + return dpvo_pred, total_time diff --git a/mini_dpvo/ba.py b/mini_dpvo/ba.py new file mode 100644 index 0000000000000000000000000000000000000000..d23ccc4db5e86773172d3bb0d1b061f192143590 --- /dev/null +++ b/mini_dpvo/ba.py @@ -0,0 +1,182 @@ +import torch +from torch_scatter import scatter_sum + +from . import fastba +from . import lietorch +from .lietorch import SE3 + +from .utils import Timer + +from . import projective_ops as pops + +class CholeskySolver(torch.autograd.Function): + @staticmethod + def forward(ctx, H, b): + # don't crash training if cholesky decomp fails + U, info = torch.linalg.cholesky_ex(H) + + if torch.any(info): + ctx.failed = True + return torch.zeros_like(b) + + xs = torch.cholesky_solve(b, U) + ctx.save_for_backward(U, xs) + ctx.failed = False + + return xs + + @staticmethod + def backward(ctx, grad_x): + if ctx.failed: + return None, None + + U, xs = ctx.saved_tensors + dz = torch.cholesky_solve(grad_x, U) + dH = -torch.matmul(xs, dz.transpose(-1,-2)) + + return dH, dz + +# utility functions for scattering ops +def safe_scatter_add_mat(A, ii, jj, n, m): + v = (ii >= 0) & (jj >= 0) & (ii < n) & (jj < m) + return scatter_sum(A[:,v], ii[v]*m + jj[v], dim=1, dim_size=n*m) + +def safe_scatter_add_vec(b, ii, n): + v = (ii >= 0) & (ii < n) + return scatter_sum(b[:,v], ii[v], dim=1, dim_size=n) + +# apply retraction operator to inv-depth maps +def disp_retr(disps, dz, ii): + ii = ii.to(device=dz.device) + return disps + scatter_sum(dz, ii, dim=1, dim_size=disps.shape[1]) + +# apply retraction operator to poses +def pose_retr(poses, dx, ii): + ii = ii.to(device=dx.device) + return poses.retr(scatter_sum(dx, ii, dim=1, dim_size=poses.shape[1])) + +def block_matmul(A, B): + """ block matrix multiply """ + b, n1, m1, p1, q1 = A.shape + b, n2, m2, p2, q2 = B.shape + A = A.permute(0, 1, 3, 2, 4).reshape(b, n1*p1, m1*q1) + B = B.permute(0, 1, 3, 2, 4).reshape(b, n2*p2, m2*q2) + return torch.matmul(A, B).reshape(b, n1, p1, m2, q2).permute(0, 1, 3, 2, 4) + +def block_solve(A, B, ep=1.0, lm=1e-4): + """ block matrix solve """ + b, n1, m1, p1, q1 = A.shape + b, n2, m2, p2, q2 = B.shape + A = A.permute(0, 1, 3, 2, 4).reshape(b, n1*p1, m1*q1) + B = B.permute(0, 1, 3, 2, 4).reshape(b, n2*p2, m2*q2) + + A = A + (ep + lm * A) * torch.eye(n1*p1, device=A.device) + + X = CholeskySolver.apply(A, B) + return X.reshape(b, n1, p1, m2, q2).permute(0, 1, 3, 2, 4) + + +def block_show(A): + import matplotlib.pyplot as plt + b, n1, m1, p1, q1 = A.shape + A = A.permute(0, 1, 3, 2, 4).reshape(b, n1*p1, m1*q1) + plt.imshow(A[0].detach().cpu().numpy()) + plt.show() + +def BA(poses, patches, intrinsics, targets, weights, lmbda, ii, jj, kk, bounds, ep=100.0, PRINT=False, fixedp=1, structure_only=False): + """ bundle adjustment """ + + b = 1 + n = max(ii.max().item(), jj.max().item()) + 1 + + coords, v, (Ji, Jj, Jz) = \ + pops.transform(poses, patches, intrinsics, ii, jj, kk, jacobian=True) + + p = coords.shape[3] + r = targets - coords[...,p//2,p//2,:] + + v *= (r.norm(dim=-1) < 250).float() + + in_bounds = \ + (coords[...,p//2,p//2,0] > bounds[0]) & \ + (coords[...,p//2,p//2,1] > bounds[1]) & \ + (coords[...,p//2,p//2,0] < bounds[2]) & \ + (coords[...,p//2,p//2,1] < bounds[3]) + + v *= in_bounds.float() + + if PRINT: + print((r * v[...,None]).norm(dim=-1).mean().item()) + + r = (v[...,None] * r).unsqueeze(dim=-1) + weights = (v[...,None] * weights).unsqueeze(dim=-1) + + wJiT = (weights * Ji).transpose(2,3) + wJjT = (weights * Jj).transpose(2,3) + wJzT = (weights * Jz).transpose(2,3) + + Bii = torch.matmul(wJiT, Ji) + Bij = torch.matmul(wJiT, Jj) + Bji = torch.matmul(wJjT, Ji) + Bjj = torch.matmul(wJjT, Jj) + + Eik = torch.matmul(wJiT, Jz) + Ejk = torch.matmul(wJjT, Jz) + + vi = torch.matmul(wJiT, r) + vj = torch.matmul(wJjT, r) + + # fix first pose + ii = ii.clone() + jj = jj.clone() + + n = n - fixedp + ii = ii - fixedp + jj = jj - fixedp + + kx, kk = torch.unique(kk, return_inverse=True, sorted=True) + m = len(kx) + + B = safe_scatter_add_mat(Bii, ii, ii, n, n).view(b, n, n, 6, 6) + \ + safe_scatter_add_mat(Bij, ii, jj, n, n).view(b, n, n, 6, 6) + \ + safe_scatter_add_mat(Bji, jj, ii, n, n).view(b, n, n, 6, 6) + \ + safe_scatter_add_mat(Bjj, jj, jj, n, n).view(b, n, n, 6, 6) + + E = safe_scatter_add_mat(Eik, ii, kk, n, m).view(b, n, m, 6, 1) + \ + safe_scatter_add_mat(Ejk, jj, kk, n, m).view(b, n, m, 6, 1) + + C = safe_scatter_add_vec(torch.matmul(wJzT, Jz), kk, m) + + v = safe_scatter_add_vec(vi, ii, n).view(b, n, 1, 6, 1) + \ + safe_scatter_add_vec(vj, jj, n).view(b, n, 1, 6, 1) + + w = safe_scatter_add_vec(torch.matmul(wJzT, r), kk, m) + + if isinstance(lmbda, torch.Tensor): + lmbda = lmbda.reshape(*C.shape) + + Q = 1.0 / (C + lmbda) + + ### solve w/ schur complement ### + EQ = E * Q[:,None] + + if structure_only or n == 0: + dZ = (Q * w).view(b, -1, 1, 1) + + else: + S = B - block_matmul(EQ, E.permute(0,2,1,4,3)) + y = v - block_matmul(EQ, w.unsqueeze(dim=2)) + dX = block_solve(S, y, ep=ep, lm=1e-4) + + dZ = Q * (w - block_matmul(E.permute(0,2,1,4,3), dX).squeeze(dim=-1)) + dX = dX.view(b, -1, 6) + dZ = dZ.view(b, -1, 1, 1) + + x, y, disps = patches.unbind(dim=2) + disps = disp_retr(disps, dZ, kx).clamp(min=1e-3, max=10.0) + patches = torch.stack([x, y, disps], dim=2) + + if not structure_only and n > 0: + poses = pose_retr(poses, dX, fixedp + torch.arange(n)) + + return poses, patches diff --git a/mini_dpvo/blocks.py b/mini_dpvo/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..9642de1088b29483b6fea355a04d7247f33b2330 --- /dev/null +++ b/mini_dpvo/blocks.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import torch_scatter + +class LayerNorm1D(nn.Module): + def __init__(self, dim): + super(LayerNorm1D, self).__init__() + self.norm = nn.LayerNorm(dim, eps=1e-4) + + def forward(self, x): + return self.norm(x.transpose(1,2)).transpose(1,2) + +class GatedResidual(nn.Module): + def __init__(self, dim): + super().__init__() + + self.gate = nn.Sequential( + nn.Linear(dim, dim), + nn.Sigmoid()) + + self.res = nn.Sequential( + nn.Linear(dim, dim), + nn.ReLU(inplace=True), + nn.Linear(dim, dim)) + + def forward(self, x): + return x + self.gate(x) * self.res(x) + +class SoftAgg(nn.Module): + def __init__(self, dim=512, expand=True): + super(SoftAgg, self).__init__() + self.dim = dim + self.expand = expand + self.f = nn.Linear(self.dim, self.dim) + self.g = nn.Linear(self.dim, self.dim) + self.h = nn.Linear(self.dim, self.dim) + + def forward(self, x, ix): + _, jx = torch.unique(ix, return_inverse=True) + w = torch_scatter.scatter_softmax(self.g(x), jx, dim=1) + y = torch_scatter.scatter_sum(self.f(x) * w, jx, dim=1) + + if self.expand: + return self.h(y)[:,jx] + + return self.h(y) + +class SoftAggBasic(nn.Module): + def __init__(self, dim=512, expand=True): + super(SoftAggBasic, self).__init__() + self.dim = dim + self.expand = expand + self.f = nn.Linear(self.dim, self.dim) + self.g = nn.Linear(self.dim, 1) + self.h = nn.Linear(self.dim, self.dim) + + def forward(self, x, ix): + _, jx = torch.unique(ix, return_inverse=True) + w = torch_scatter.scatter_softmax(self.g(x), jx, dim=1) + y = torch_scatter.scatter_sum(self.f(x) * w, jx, dim=1) + + if self.expand: + return self.h(y)[:,jx] + + return self.h(y) + + +### Gradient Clipping and Zeroing Operations ### + +GRAD_CLIP = 0.1 + +class GradClip(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, grad_x): + grad_x = torch.where(torch.isnan(grad_x), torch.zeros_like(grad_x), grad_x) + return grad_x.clamp(min=-0.01, max=0.01) + +class GradientClip(nn.Module): + def __init__(self): + super(GradientClip, self).__init__() + + def forward(self, x): + return GradClip.apply(x) + +class GradZero(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, grad_x): + grad_x = torch.where(torch.isnan(grad_x), torch.zeros_like(grad_x), grad_x) + grad_x = torch.where(torch.abs(grad_x) > GRAD_CLIP, torch.zeros_like(grad_x), grad_x) + return grad_x + +class GradientZero(nn.Module): + def __init__(self): + super(GradientZero, self).__init__() + + def forward(self, x): + return GradZero.apply(x) + + +class GradMag(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, grad_x): + print(grad_x.abs().mean()) + return grad_x diff --git a/mini_dpvo/config.py b/mini_dpvo/config.py new file mode 100644 index 0000000000000000000000000000000000000000..be1c50032b6d359286b5a69965354db5cbf2193f --- /dev/null +++ b/mini_dpvo/config.py @@ -0,0 +1,27 @@ +from yacs.config import CfgNode as CN + +_C = CN() + +# max number of keyframes +_C.BUFFER_SIZE = 2048 + +# bias patch selection towards high gradient regions? +_C.GRADIENT_BIAS = True + +# VO config (increase for better accuracy) +_C.PATCHES_PER_FRAME = 80 +_C.REMOVAL_WINDOW = 20 +_C.OPTIMIZATION_WINDOW = 12 +_C.PATCH_LIFETIME = 12 + +# threshold for keyframe removal +_C.KEYFRAME_INDEX = 4 +_C.KEYFRAME_THRESH = 12.5 + +# camera motion model +_C.MOTION_MODEL = 'DAMPED_LINEAR' +_C.MOTION_DAMPING = 0.5 + +_C.MIXED_PRECISION = True + +cfg = _C diff --git a/mini_dpvo/data_readers/__init__.py b/mini_dpvo/data_readers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/mini_dpvo/data_readers/__init__.py @@ -0,0 +1 @@ + diff --git a/mini_dpvo/data_readers/augmentation.py b/mini_dpvo/data_readers/augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..6d1fba71fa5a0203b7aa73bb958204ce7e58034d --- /dev/null +++ b/mini_dpvo/data_readers/augmentation.py @@ -0,0 +1,66 @@ +import torch +import torchvision.transforms as transforms +import numpy as np +import torch.nn.functional as F + + +class RGBDAugmentor: + """ perform augmentation on RGB-D video """ + + def __init__(self, crop_size): + self.crop_size = crop_size + self.augcolor = transforms.Compose([ + transforms.ToPILImage(), + transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2/3.14), + transforms.RandomGrayscale(p=0.1), + transforms.RandomInvert(p=0.1), + transforms.ToTensor()]) + + self.max_scale = 0.5 + + def spatial_transform(self, images, depths, poses, intrinsics): + """ cropping and resizing """ + ht, wd = images.shape[2:] + + max_scale = self.max_scale + min_scale = np.log2(np.maximum( + (self.crop_size[0] + 1) / float(ht), + (self.crop_size[1] + 1) / float(wd))) + + scale = 1 + if np.random.rand() < 0.8: + scale = 2 ** np.random.uniform(0.0, max_scale) + + intrinsics = scale * intrinsics + + ht1 = int(scale * ht) + wd1 = int(scale * wd) + + depths = depths.unsqueeze(dim=1) + + images = F.interpolate(images, (ht1, wd1), mode='bicubic', align_corners=False) + depths = F.interpolate(depths, (ht1, wd1), recompute_scale_factor=False) + + # always perform center crop (TODO: try non-center crops) + y0 = (images.shape[2] - self.crop_size[0]) // 2 + x0 = (images.shape[3] - self.crop_size[1]) // 2 + + intrinsics = intrinsics - torch.tensor([0.0, 0.0, x0, y0]) + images = images[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] + depths = depths[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]] + + depths = depths.squeeze(dim=1) + return images, poses, depths, intrinsics + + def color_transform(self, images): + """ color jittering """ + num, ch, ht, wd = images.shape + images = images.permute(1, 2, 3, 0).reshape(ch, ht, wd*num) + images = 255 * self.augcolor(images[[2,1,0]] / 255.0) + return images[[2,1,0]].reshape(ch, ht, wd, num).permute(3,0,1,2).contiguous() + + def __call__(self, images, poses, depths, intrinsics): + if np.random.rand() < 0.5: + images = self.color_transform(images) + + return self.spatial_transform(images, depths, poses, intrinsics) diff --git a/mini_dpvo/data_readers/base.py b/mini_dpvo/data_readers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..847d0d8c13152678e47cf9f50c4b71f7a1d0f7ed --- /dev/null +++ b/mini_dpvo/data_readers/base.py @@ -0,0 +1,176 @@ +import numpy as np +import torch +import torch.utils.data as data +import torch.nn.functional as F + +import csv +import os +import cv2 +import math +import random +import json +import pickle +import os.path as osp + +from .augmentation import RGBDAugmentor +from .rgbd_utils import * + +class RGBDDataset(data.Dataset): + def __init__(self, name, datapath, n_frames=4, crop_size=[480,640], fmin=10.0, fmax=75.0, aug=True, sample=True): + """ Base class for RGBD dataset """ + self.aug = None + self.root = datapath + self.name = name + + self.aug = aug + self.sample = sample + + self.n_frames = n_frames + self.fmin = fmin # exclude very easy examples + self.fmax = fmax # exclude very hard examples + + if self.aug: + self.aug = RGBDAugmentor(crop_size=crop_size) + + # building dataset is expensive, cache so only needs to be performed once + cur_path = osp.dirname(osp.abspath(__file__)) + if not os.path.isdir(osp.join(cur_path, 'cache')): + os.mkdir(osp.join(cur_path, 'cache')) + + self.scene_info = \ + pickle.load(open('datasets/TartanAir.pickle', 'rb'))[0] + + self._build_dataset_index() + + def _build_dataset_index(self): + self.dataset_index = [] + for scene in self.scene_info: + if not self.__class__.is_test_scene(scene): + graph = self.scene_info[scene]['graph'] + for i in graph: + if i < len(graph) - 65: + self.dataset_index.append((scene, i)) + else: + print("Reserving {} for validation".format(scene)) + + @staticmethod + def image_read(image_file): + return cv2.imread(image_file) + + @staticmethod + def depth_read(depth_file): + return np.load(depth_file) + + def build_frame_graph(self, poses, depths, intrinsics, f=16, max_flow=256): + """ compute optical flow distance between all pairs of frames """ + def read_disp(fn): + depth = self.__class__.depth_read(fn)[f//2::f, f//2::f] + depth[depth < 0.01] = np.mean(depth) + return 1.0 / depth + + poses = np.array(poses) + intrinsics = np.array(intrinsics) / f + + disps = np.stack(list(map(read_disp, depths)), 0) + d = f * compute_distance_matrix_flow(poses, disps, intrinsics) + + graph = {} + for i in range(d.shape[0]): + j, = np.where(d[i] < max_flow) + graph[i] = (j, d[i,j]) + + return graph + + def __getitem__(self, index): + """ return training video """ + + index = index % len(self.dataset_index) + scene_id, ix = self.dataset_index[index] + + frame_graph = self.scene_info[scene_id]['graph'] + images_list = self.scene_info[scene_id]['images'] + depths_list = self.scene_info[scene_id]['depths'] + poses_list = self.scene_info[scene_id]['poses'] + intrinsics_list = self.scene_info[scene_id]['intrinsics'] + + # stride = np.random.choice([1,2,3]) + + d = np.random.uniform(self.fmin, self.fmax) + s = 1 + + inds = [ ix ] + + while len(inds) < self.n_frames: + # get other frames within flow threshold + + if self.sample: + k = (frame_graph[ix][1] > self.fmin) & (frame_graph[ix][1] < self.fmax) + frames = frame_graph[ix][0][k] + + # prefer frames forward in time + if np.count_nonzero(frames[frames > ix]): + ix = np.random.choice(frames[frames > ix]) + + elif ix + 1 < len(images_list): + ix = ix + 1 + + elif np.count_nonzero(frames): + ix = np.random.choice(frames) + + else: + i = frame_graph[ix][0].copy() + g = frame_graph[ix][1].copy() + + g[g > d] = -1 + if s > 0: + g[i <= ix] = -1 + else: + g[i >= ix] = -1 + + if len(g) > 0 and np.max(g) > 0: + ix = i[np.argmax(g)] + else: + if ix + s >= len(images_list) or ix + s < 0: + s *= -1 + + ix = ix + s + + inds += [ ix ] + + + images, depths, poses, intrinsics = [], [], [], [] + for i in inds: + images.append(self.__class__.image_read(images_list[i])) + depths.append(self.__class__.depth_read(depths_list[i])) + poses.append(poses_list[i]) + intrinsics.append(intrinsics_list[i]) + + images = np.stack(images).astype(np.float32) + depths = np.stack(depths).astype(np.float32) + poses = np.stack(poses).astype(np.float32) + intrinsics = np.stack(intrinsics).astype(np.float32) + + images = torch.from_numpy(images).float() + images = images.permute(0, 3, 1, 2) + + disps = torch.from_numpy(1.0 / depths) + poses = torch.from_numpy(poses) + intrinsics = torch.from_numpy(intrinsics) + + if self.aug: + images, poses, disps, intrinsics = \ + self.aug(images, poses, disps, intrinsics) + + # normalize depth + s = .7 * torch.quantile(disps, .98) + disps = disps / s + poses[...,:3] *= s + + return images, poses, disps, intrinsics + + def __len__(self): + return len(self.dataset_index) + + def __imul__(self, x): + self.dataset_index *= x + return self diff --git a/mini_dpvo/data_readers/factory.py b/mini_dpvo/data_readers/factory.py new file mode 100644 index 0000000000000000000000000000000000000000..7bb0b8a0f41c92f23deaac888d0241cbc36428de --- /dev/null +++ b/mini_dpvo/data_readers/factory.py @@ -0,0 +1,26 @@ + +import pickle +import os +import os.path as osp + +# RGBD-Dataset +from .tartan import TartanAir + +def dataset_factory(dataset_list, **kwargs): + """ create a combined dataset """ + + from torch.utils.data import ConcatDataset + + dataset_map = { + 'tartan': (TartanAir, ), + } + + db_list = [] + for key in dataset_list: + # cache datasets for faster future loading + db = dataset_map[key][0](**kwargs) + + print("Dataset {} has {} images".format(key, len(db))) + db_list.append(db) + + return ConcatDataset(db_list) diff --git a/mini_dpvo/data_readers/frame_utils.py b/mini_dpvo/data_readers/frame_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f977726724e6442fcd060178c1fe7ac7428ba611 --- /dev/null +++ b/mini_dpvo/data_readers/frame_utils.py @@ -0,0 +1,164 @@ +import numpy as np +from PIL import Image +from os.path import * +import re +import cv2 +cv2.setNumThreads(0) + + +TAG_CHAR = np.array([202021.25], np.float32) + +def readFlowKITTI(filename): + flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR) + flow = flow[:,:,::-1].astype(np.float32) + flow, valid = flow[:, :, :2], flow[:, :, 2] + flow = (flow - 2**15) / 64.0 + return flow, valid + +def readFlow(fn): + """ Read .flo file in Middlebury format""" + # Code adapted from: + # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy + + # WARNING: this will work on little-endian architectures (eg Intel x86) only! + # print 'fn = %s'%(fn) + with open(fn, 'rb') as f: + magic = np.fromfile(f, np.float32, count=1) + if 202021.25 != magic: + print('Magic number incorrect. Invalid .flo file') + return None + else: + w = np.fromfile(f, np.int32, count=1) + h = np.fromfile(f, np.int32, count=1) + # print 'Reading %d x %d flo file\n' % (w, h) + data = np.fromfile(f, np.float32, count=2*int(w)*int(h)) + # Reshape data into 3D array (columns, rows, bands) + # The reshape here is for visualization, the original code is (w,h,2) + return np.resize(data, (int(h), int(w), 2)) + +def readPFM(file): + file = open(file, 'rb') + + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().rstrip() + if header == b'PF': + color = True + elif header == b'Pf': + color = False + else: + raise Exception('Not a PFM file.') + + try: + dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline()) + except: + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) + + if dim_match: + width, height = map(int, dim_match.groups()) + else: + raise Exception('Malformed PFM header.') + + scale = float(file.readline().rstrip()) + if scale < 0: # little-endian + endian = '<' + scale = -scale + else: + endian = '>' # big-endian + + data = np.fromfile(file, endian + 'f') + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + return data + + +def writeFlow(filename,uv,v=None): + """ Write optical flow to file. + + If v is None, uv is assumed to contain both u and v channels, + stacked in depth. + Original code by Deqing Sun, adapted from Daniel Scharstein. + """ + nBands = 2 + + if v is None: + assert(uv.ndim == 3) + assert(uv.shape[2] == 2) + u = uv[:,:,0] + v = uv[:,:,1] + else: + u = uv + + assert(u.shape == v.shape) + height,width = u.shape + f = open(filename,'wb') + # write the header + f.write(TAG_CHAR) + np.array(width).astype(np.int32).tofile(f) + np.array(height).astype(np.int32).tofile(f) + # arrange into matrix form + tmp = np.zeros((height, width*nBands)) + tmp[:,np.arange(width)*2] = u + tmp[:,np.arange(width)*2 + 1] = v + tmp.astype(np.float32).tofile(f) + f.close() + + +def readDPT(filename): + """ Read depth data from file, return as numpy array. """ + f = open(filename,'rb') + check = np.fromfile(f,dtype=np.float32,count=1)[0] + TAG_FLOAT = 202021.25 + TAG_CHAR = 'PIEH' + assert check == TAG_FLOAT, ' depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? '.format(TAG_FLOAT,check) + width = np.fromfile(f,dtype=np.int32,count=1)[0] + height = np.fromfile(f,dtype=np.int32,count=1)[0] + size = width*height + assert width > 0 and height > 0 and size > 1 and size < 100000000, ' depth_read:: Wrong input size (width = {0}, height = {1}).'.format(width,height) + depth = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width)) + return depth + +def cam_read(filename): + """ Read camera data, return (M,N) tuple. + M is the intrinsic matrix, N is the extrinsic matrix, so that + x = M*N*X, + with x being a point in homogeneous image pixel coordinates, X being a + point in homogeneous world coordinates.""" + f = open(filename,'rb') + check = np.fromfile(f,dtype=np.float32,count=1)[0] + M = np.fromfile(f,dtype='float64',count=9).reshape((3,3)) + N = np.fromfile(f,dtype='float64',count=12).reshape((3,4)) + + E = np.eye(4) + E[0:3,:] = N + + fx, fy, cx, cy = M[0,0], M[1,1], M[0,2], M[1,2] + kvec = np.array([fx, fy, cx, cy]) + + q = Rotation.from_matrix(E[:3,:3]).as_quat() + pvec = np.concatenate([E[:3,3], q], 0) + + return pvec, kvec + + +def read_gen(file_name, pil=False): + ext = splitext(file_name)[-1] + if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg': + return Image.open(file_name) + elif ext == '.bin' or ext == '.raw': + return np.load(file_name) + elif ext == '.flo': + return readFlow(file_name).astype(np.float32) + elif ext == '.pfm': + return readPFM(file_name).astype(np.float32) + elif ext == '.dpt': + return readDPT(file_name).astype(np.float32) + elif ext == '.cam': + return cam_read(file_name) + return [] diff --git a/mini_dpvo/data_readers/rgbd_utils.py b/mini_dpvo/data_readers/rgbd_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..381ef12aea96a1484b1dfaad46ca96d5c7484a5c --- /dev/null +++ b/mini_dpvo/data_readers/rgbd_utils.py @@ -0,0 +1,188 @@ +import numpy as np +import os.path as osp + +import torch +from ..lietorch import SE3 + +from scipy.spatial.transform import Rotation + +def parse_list(filepath, skiprows=0): + """ read list data """ + data = np.loadtxt(filepath, delimiter=' ', dtype=np.unicode_, skiprows=skiprows) + return data + +def associate_frames(tstamp_image, tstamp_depth, tstamp_pose, max_dt=1.0): + """ pair images, depths, and poses """ + associations = [] + for i, t in enumerate(tstamp_image): + if tstamp_pose is None: + j = np.argmin(np.abs(tstamp_depth - t)) + if (np.abs(tstamp_depth[j] - t) < max_dt): + associations.append((i, j)) + + else: + j = np.argmin(np.abs(tstamp_depth - t)) + k = np.argmin(np.abs(tstamp_pose - t)) + + if (np.abs(tstamp_depth[j] - t) < max_dt) and \ + (np.abs(tstamp_pose[k] - t) < max_dt): + associations.append((i, j, k)) + + return associations + +def loadtum(datapath, frame_rate=-1): + """ read video data in tum-rgbd format """ + if osp.isfile(osp.join(datapath, 'groundtruth.txt')): + pose_list = osp.join(datapath, 'groundtruth.txt') + + elif osp.isfile(osp.join(datapath, 'pose.txt')): + pose_list = osp.join(datapath, 'pose.txt') + + else: + return None, None, None, None + + image_list = osp.join(datapath, 'rgb.txt') + depth_list = osp.join(datapath, 'depth.txt') + + calib_path = osp.join(datapath, 'calibration.txt') + intrinsic = None + if osp.isfile(calib_path): + intrinsic = np.loadtxt(calib_path, delimiter=' ') + intrinsic = intrinsic.astype(np.float64) + + image_data = parse_list(image_list) + depth_data = parse_list(depth_list) + pose_data = parse_list(pose_list, skiprows=1) + pose_vecs = pose_data[:,1:].astype(np.float64) + + tstamp_image = image_data[:,0].astype(np.float64) + tstamp_depth = depth_data[:,0].astype(np.float64) + tstamp_pose = pose_data[:,0].astype(np.float64) + associations = associate_frames(tstamp_image, tstamp_depth, tstamp_pose) + + # print(len(tstamp_image)) + # print(len(associations)) + + indicies = range(len(associations))[::5] + + # indicies = [ 0 ] + # for i in range(1, len(associations)): + # t0 = tstamp_image[associations[indicies[-1]][0]] + # t1 = tstamp_image[associations[i][0]] + # if t1 - t0 > 1.0 / frame_rate: + # indicies += [ i ] + + images, poses, depths, intrinsics, tstamps = [], [], [], [], [] + for ix in indicies: + (i, j, k) = associations[ix] + images += [ osp.join(datapath, image_data[i,1]) ] + depths += [ osp.join(datapath, depth_data[j,1]) ] + poses += [ pose_vecs[k] ] + tstamps += [ tstamp_image[i] ] + + if intrinsic is not None: + intrinsics += [ intrinsic ] + + return images, depths, poses, intrinsics, tstamps + + +def all_pairs_distance_matrix(poses, beta=2.5): + """ compute distance matrix between all pairs of poses """ + poses = np.array(poses, dtype=np.float32) + poses[:,:3] *= beta # scale to balence rot + trans + poses = SE3(torch.from_numpy(poses)) + + r = (poses[:,None].inv() * poses[None,:]).log() + return r.norm(dim=-1).cpu().numpy() + +def pose_matrix_to_quaternion(pose): + """ convert 4x4 pose matrix to (t, q) """ + q = Rotation.from_matrix(pose[:3, :3]).as_quat() + return np.concatenate([pose[:3, 3], q], axis=0) + +def compute_distance_matrix_flow(poses, disps, intrinsics): + """ compute flow magnitude between all pairs of frames """ + if not isinstance(poses, SE3): + poses = torch.from_numpy(poses).float().cuda()[None] + poses = SE3(poses).inv() + + disps = torch.from_numpy(disps).float().cuda()[None] + intrinsics = torch.from_numpy(intrinsics).float().cuda()[None] + + N = poses.shape[1] + + ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N)) + ii = ii.reshape(-1).cuda() + jj = jj.reshape(-1).cuda() + + MAX_FLOW = 100.0 + matrix = np.zeros((N, N), dtype=np.float32) + + s = 2048 + for i in range(0, ii.shape[0], s): + flow1, val1 = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s]) + flow2, val2 = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s]) + + flow = torch.stack([flow1, flow2], dim=2) + val = torch.stack([val1, val2], dim=2) + + mag = flow.norm(dim=-1).clamp(max=MAX_FLOW) + mag = mag.view(mag.shape[1], -1) + val = val.view(val.shape[1], -1) + + mag = (mag * val).mean(-1) / val.mean(-1) + mag[val.mean(-1) < 0.7] = np.inf + + i1 = ii[i:i+s].cpu().numpy() + j1 = jj[i:i+s].cpu().numpy() + matrix[i1, j1] = mag.cpu().numpy() + + return matrix + + +def compute_distance_matrix_flow2(poses, disps, intrinsics, beta=0.4): + """ compute flow magnitude between all pairs of frames """ + # if not isinstance(poses, SE3): + # poses = torch.from_numpy(poses).float().cuda()[None] + # poses = SE3(poses).inv() + + # disps = torch.from_numpy(disps).float().cuda()[None] + # intrinsics = torch.from_numpy(intrinsics).float().cuda()[None] + + N = poses.shape[1] + + ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N)) + ii = ii.reshape(-1) + jj = jj.reshape(-1) + + MAX_FLOW = 128.0 + matrix = np.zeros((N, N), dtype=np.float32) + + s = 2048 + for i in range(0, ii.shape[0], s): + flow1a, val1a = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s], tonly=True) + flow1b, val1b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s]) + flow2a, val2a = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s], tonly=True) + flow2b, val2b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s]) + + flow1 = flow1a + beta * flow1b + val1 = val1a * val2b + + flow2 = flow2a + beta * flow2b + val2 = val2a * val2b + + flow = torch.stack([flow1, flow2], dim=2) + val = torch.stack([val1, val2], dim=2) + + mag = flow.norm(dim=-1).clamp(max=MAX_FLOW) + mag = mag.view(mag.shape[1], -1) + val = val.view(val.shape[1], -1) + + mag = (mag * val).mean(-1) / val.mean(-1) + mag[val.mean(-1) < 0.8] = np.inf + + i1 = ii[i:i+s].cpu().numpy() + j1 = jj[i:i+s].cpu().numpy() + matrix[i1, j1] = mag.cpu().numpy() + + return matrix diff --git a/mini_dpvo/data_readers/tartan.py b/mini_dpvo/data_readers/tartan.py new file mode 100644 index 0000000000000000000000000000000000000000..3539f733a11a89564e6b0a94bcb0353ce9716e1e --- /dev/null +++ b/mini_dpvo/data_readers/tartan.py @@ -0,0 +1,110 @@ + +import numpy as np +import torch +import glob +import cv2 +import os +import os.path as osp + +from ..lietorch import SE3 +from .base import RGBDDataset + +# cur_path = osp.dirname(osp.abspath(__file__)) +# test_split = osp.join(cur_path, 'tartan_test.txt') +# test_split = open(test_split).read().split() + + +test_split = [ + "abandonedfactory/abandonedfactory/Easy/P011", + "abandonedfactory/abandonedfactory/Hard/P011", + "abandonedfactory_night/abandonedfactory_night/Easy/P013", + "abandonedfactory_night/abandonedfactory_night/Hard/P014", + "amusement/amusement/Easy/P008", + "amusement/amusement/Hard/P007", + "carwelding/carwelding/Easy/P007", + "endofworld/endofworld/Easy/P009", + "gascola/gascola/Easy/P008", + "gascola/gascola/Hard/P009", + "hospital/hospital/Easy/P036", + "hospital/hospital/Hard/P049", + "japanesealley/japanesealley/Easy/P007", + "japanesealley/japanesealley/Hard/P005", + "neighborhood/neighborhood/Easy/P021", + "neighborhood/neighborhood/Hard/P017", + "ocean/ocean/Easy/P013", + "ocean/ocean/Hard/P009", + "office2/office2/Easy/P011", + "office2/office2/Hard/P010", + "office/office/Hard/P007", + "oldtown/oldtown/Easy/P007", + "oldtown/oldtown/Hard/P008", + "seasidetown/seasidetown/Easy/P009", + "seasonsforest/seasonsforest/Easy/P011", + "seasonsforest/seasonsforest/Hard/P006", + "seasonsforest_winter/seasonsforest_winter/Easy/P009", + "seasonsforest_winter/seasonsforest_winter/Hard/P018", + "soulcity/soulcity/Easy/P012", + "soulcity/soulcity/Hard/P009", + "westerndesert/westerndesert/Easy/P013", + "westerndesert/westerndesert/Hard/P007", +] + + +class TartanAir(RGBDDataset): + + # scale depths to balance rot & trans + DEPTH_SCALE = 5.0 + + def __init__(self, mode='training', **kwargs): + self.mode = mode + self.n_frames = 2 + super(TartanAir, self).__init__(name='TartanAir', **kwargs) + + @staticmethod + def is_test_scene(scene): + # print(scene, any(x in scene for x in test_split)) + return any(x in scene for x in test_split) + + def _build_dataset(self): + from tqdm import tqdm + print("Building TartanAir dataset") + + scene_info = {} + scenes = glob.glob(osp.join(self.root, '*/*/*/*')) + for scene in tqdm(sorted(scenes)): + images = sorted(glob.glob(osp.join(scene, 'image_left/*.png'))) + depths = sorted(glob.glob(osp.join(scene, 'depth_left/*.npy'))) + + if len(images) != len(depths): + continue + + poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ') + poses = poses[:, [1, 2, 0, 4, 5, 3, 6]] + poses[:,:3] /= TartanAir.DEPTH_SCALE + intrinsics = [TartanAir.calib_read()] * len(images) + + # graph of co-visible frames based on flow + graph = self.build_frame_graph(poses, depths, intrinsics) + + scene = '/'.join(scene.split('/')) + scene_info[scene] = {'images': images, 'depths': depths, + 'poses': poses, 'intrinsics': intrinsics, 'graph': graph} + + return scene_info + + @staticmethod + def calib_read(): + return np.array([320.0, 320.0, 320.0, 240.0]) + + @staticmethod + def image_read(image_file): + return cv2.imread(image_file) + + @staticmethod + def depth_read(depth_file): + depth = np.load(depth_file) / TartanAir.DEPTH_SCALE + depth[depth==np.nan] = 1.0 + depth[depth==np.inf] = 1.0 + return depth + + diff --git a/mini_dpvo/data_readers/tartan_test.txt b/mini_dpvo/data_readers/tartan_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..d52d88da87b3b2096624162da3872116eefb6003 --- /dev/null +++ b/mini_dpvo/data_readers/tartan_test.txt @@ -0,0 +1,32 @@ +abandonedfactory/abandonedfactory/Easy/P011 +abandonedfactory/abandonedfactory/Hard/P011 +abandonedfactory_night/abandonedfactory_night/Easy/P013 +abandonedfactory_night/abandonedfactory_night/Hard/P014 +amusement/amusement/Easy/P008 +amusement/amusement/Hard/P007 +carwelding/carwelding/Easy/P007 +endofworld/endofworld/Easy/P009 +gascola/gascola/Easy/P008 +gascola/gascola/Hard/P009 +hospital/hospital/Easy/P036 +hospital/hospital/Hard/P049 +japanesealley/japanesealley/Easy/P007 +japanesealley/japanesealley/Hard/P005 +neighborhood/neighborhood/Easy/P021 +neighborhood/neighborhood/Hard/P017 +ocean/ocean/Easy/P013 +ocean/ocean/Hard/P009 +office2/office2/Easy/P011 +office2/office2/Hard/P010 +office/office/Hard/P007 +oldtown/oldtown/Easy/P007 +oldtown/oldtown/Hard/P008 +seasidetown/seasidetown/Easy/P009 +seasonsforest/seasonsforest/Easy/P011 +seasonsforest/seasonsforest/Hard/P006 +seasonsforest_winter/seasonsforest_winter/Easy/P009 +seasonsforest_winter/seasonsforest_winter/Hard/P018 +soulcity/soulcity/Easy/P012 +soulcity/soulcity/Hard/P009 +westerndesert/westerndesert/Easy/P013 +westerndesert/westerndesert/Hard/P007 diff --git a/mini_dpvo/dpvo.py b/mini_dpvo/dpvo.py new file mode 100644 index 0000000000000000000000000000000000000000..43d7d62518d56c57785d5b910f62d9719845d430 --- /dev/null +++ b/mini_dpvo/dpvo.py @@ -0,0 +1,410 @@ +import torch +import numpy as np +import torch.nn.functional as F + +from . import fastba +from . import altcorr +from . import lietorch +from .lietorch import SE3 + +from .net import VONet + +from .utils import Timer, flatmeshgrid +from . import projective_ops as pops + +autocast = torch.cuda.amp.autocast +Id = SE3.Identity(1, device="cuda") + + +class DPVO: + def __init__(self, cfg, network, ht=480, wd=640): + self.cfg = cfg + self.load_weights(network) + self.is_initialized = False + self.enable_timing = False + + self.n = 0 # number of frames + self.m = 0 # number of patches + self.M = self.cfg.PATCHES_PER_FRAME + self.N = self.cfg.BUFFER_SIZE + + self.ht = ht # image height + self.wd = wd # image width + + DIM = self.DIM + RES = self.RES + + ### state attributes ### + self.tlist = [] + self.counter = 0 + + # dummy image for visualization + self.image_ = torch.zeros(self.ht, self.wd, 3, dtype=torch.uint8, device="cpu") + + self.tstamps_ = torch.zeros(self.N, dtype=torch.float64, device="cuda") + self.poses_ = torch.zeros(self.N, 7, dtype=torch.float32, device="cuda") + self.patches_ = torch.zeros( + self.N, self.M, 3, self.P, self.P, dtype=torch.float, device="cuda" + ) + self.intrinsics_ = torch.zeros(self.N, 4, dtype=torch.float, device="cuda") + + self.points_ = torch.zeros(self.N * self.M, 3, dtype=torch.float, device="cuda") + self.colors_ = torch.zeros(self.N, self.M, 3, dtype=torch.uint8, device="cuda") + + self.index_ = torch.zeros(self.N, self.M, dtype=torch.long, device="cuda") + self.index_map_ = torch.zeros(self.N, dtype=torch.long, device="cuda") + + ### network attributes ### + self.mem = 32 + + if self.cfg.MIXED_PRECISION: + self.kwargs = kwargs = {"device": "cuda", "dtype": torch.half} + else: + self.kwargs = kwargs = {"device": "cuda", "dtype": torch.float} + + self.imap_ = torch.zeros(self.mem, self.M, DIM, **kwargs) + self.gmap_ = torch.zeros(self.mem, self.M, 128, self.P, self.P, **kwargs) + + ht = ht // RES + wd = wd // RES + + self.fmap1_ = torch.zeros(1, self.mem, 128, ht // 1, wd // 1, **kwargs) + self.fmap2_ = torch.zeros(1, self.mem, 128, ht // 4, wd // 4, **kwargs) + + # feature pyramid + self.pyramid = (self.fmap1_, self.fmap2_) + + self.net = torch.zeros(1, 0, DIM, **kwargs) + self.ii = torch.as_tensor([], dtype=torch.long, device="cuda") + self.jj = torch.as_tensor([], dtype=torch.long, device="cuda") + self.kk = torch.as_tensor([], dtype=torch.long, device="cuda") + + # initialize poses to identity matrix + self.poses_[:, 6] = 1.0 + + # store relative poses for removed frames + self.delta = {} + + def load_weights(self, network): + # load network from checkpoint file + if isinstance(network, str): + from collections import OrderedDict + + state_dict = torch.load(network) + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if "update.lmbda" not in k: + new_state_dict[k.replace("module.", "")] = v + + self.network = VONet() + self.network.load_state_dict(new_state_dict) + + else: + self.network = network + + # steal network attributes + self.DIM = self.network.DIM + self.RES = self.network.RES + self.P = self.network.P + + self.network.cuda() + self.network.eval() + + # if self.cfg.MIXED_PRECISION: + # self.network.half() + + @property + def poses(self): + return self.poses_.view(1, self.N, 7) + + @property + def patches(self): + return self.patches_.view(1, self.N * self.M, 3, 3, 3) + + @property + def intrinsics(self): + return self.intrinsics_.view(1, self.N, 4) + + @property + def ix(self): + return self.index_.view(-1) + + @property + def imap(self): + return self.imap_.view(1, self.mem * self.M, self.DIM) + + @property + def gmap(self): + return self.gmap_.view(1, self.mem * self.M, 128, 3, 3) + + def get_pose(self, t): + if t in self.traj: + return SE3(self.traj[t]) + + t0, dP = self.delta[t] + return dP * self.get_pose(t0) + + def terminate(self): + """interpolate missing poses""" + print("Terminating...") + self.traj = {} + for i in range(self.n): + current_t: int = self.tstamps_[i].item() + self.traj[current_t] = self.poses_[i] + + poses = [self.get_pose(t) for t in range(self.counter)] + poses = lietorch.stack(poses, dim=0) + poses = poses.inv().data.cpu().numpy() + tstamps = np.array(self.tlist, dtype=np.float64) + + return poses, tstamps + + def corr(self, coords, indicies=None): + """local correlation volume""" + ii, jj = indicies if indicies is not None else (self.kk, self.jj) + ii1 = ii % (self.M * self.mem) + jj1 = jj % (self.mem) + corr1 = altcorr.corr(self.gmap, self.pyramid[0], coords / 1, ii1, jj1, 3) + corr2 = altcorr.corr(self.gmap, self.pyramid[1], coords / 4, ii1, jj1, 3) + return torch.stack([corr1, corr2], -1).view(1, len(ii), -1) + + def reproject(self, indicies=None): + """reproject patch k from i -> j""" + (ii, jj, kk) = indicies if indicies is not None else (self.ii, self.jj, self.kk) + coords = pops.transform( + SE3(self.poses), self.patches, self.intrinsics, ii, jj, kk + ) + return coords.permute(0, 1, 4, 2, 3).contiguous() + + def append_factors(self, ii, jj): + self.jj = torch.cat([self.jj, jj]) + self.kk = torch.cat([self.kk, ii]) + self.ii = torch.cat([self.ii, self.ix[ii]]) + + net = torch.zeros(1, len(ii), self.DIM, **self.kwargs) + self.net = torch.cat([self.net, net], dim=1) + + def remove_factors(self, m): + self.ii = self.ii[~m] + self.jj = self.jj[~m] + self.kk = self.kk[~m] + self.net = self.net[:, ~m] + + def motion_probe(self): + """kinda hacky way to ensure enough motion for initialization""" + kk = torch.arange(self.m - self.M, self.m, device="cuda") + jj = self.n * torch.ones_like(kk) + ii = self.ix[kk] + + net = torch.zeros(1, len(ii), self.DIM, **self.kwargs) + coords = self.reproject(indicies=(ii, jj, kk)) + + with autocast(enabled=self.cfg.MIXED_PRECISION): + corr = self.corr(coords, indicies=(kk, jj)) + ctx = self.imap[:, kk % (self.M * self.mem)] + net, (delta, weight, _) = self.network.update( + net, ctx, corr, None, ii, jj, kk + ) + + return torch.quantile(delta.norm(dim=-1).float(), 0.5) + + def motionmag(self, i, j): + k = (self.ii == i) & (self.jj == j) + ii = self.ii[k] + jj = self.jj[k] + kk = self.kk[k] + + flow = pops.flow_mag( + SE3(self.poses), self.patches, self.intrinsics, ii, jj, kk, beta=0.5 + ) + return flow.mean().item() + + def keyframe(self): + i = self.n - self.cfg.KEYFRAME_INDEX - 1 + j = self.n - self.cfg.KEYFRAME_INDEX + 1 + m = self.motionmag(i, j) + self.motionmag(j, i) + + if m / 2 < self.cfg.KEYFRAME_THRESH: + k = self.n - self.cfg.KEYFRAME_INDEX + t0 = self.tstamps_[k - 1].item() + t1 = self.tstamps_[k].item() + + dP = SE3(self.poses_[k]) * SE3(self.poses_[k - 1]).inv() + self.delta[t1] = (t0, dP) + + to_remove = (self.ii == k) | (self.jj == k) + self.remove_factors(to_remove) + + self.kk[self.ii > k] -= self.M + self.ii[self.ii > k] -= 1 + self.jj[self.jj > k] -= 1 + + for i in range(k, self.n - 1): + self.tstamps_[i] = self.tstamps_[i + 1] + self.colors_[i] = self.colors_[i + 1] + self.poses_[i] = self.poses_[i + 1] + self.patches_[i] = self.patches_[i + 1] + self.intrinsics_[i] = self.intrinsics_[i + 1] + + self.imap_[i % self.mem] = self.imap_[(i + 1) % self.mem] + self.gmap_[i % self.mem] = self.gmap_[(i + 1) % self.mem] + self.fmap1_[0, i % self.mem] = self.fmap1_[0, (i + 1) % self.mem] + self.fmap2_[0, i % self.mem] = self.fmap2_[0, (i + 1) % self.mem] + + self.n -= 1 + self.m -= self.M + + to_remove = self.ix[self.kk] < self.n - self.cfg.REMOVAL_WINDOW + self.remove_factors(to_remove) + + def update(self): + with Timer("other", enabled=self.enable_timing): + coords = self.reproject() + + with autocast(enabled=True): + corr = self.corr(coords) + ctx = self.imap[:, self.kk % (self.M * self.mem)] + self.net, (delta, weight, _) = self.network.update( + self.net, ctx, corr, None, self.ii, self.jj, self.kk + ) + + lmbda = torch.as_tensor([1e-4], device="cuda") + weight = weight.float() + target = coords[..., self.P // 2, self.P // 2] + delta.float() + + with Timer("BA", enabled=self.enable_timing): + t0 = self.n - self.cfg.OPTIMIZATION_WINDOW if self.is_initialized else 1 + t0 = max(t0, 1) + + try: + fastba.BA( + self.poses, + self.patches, + self.intrinsics, + target, + weight, + lmbda, + self.ii, + self.jj, + self.kk, + t0, + self.n, + 2, + ) + except: + print("Warning BA failed...") + + points = pops.point_cloud( + SE3(self.poses), + self.patches[:, : self.m], + self.intrinsics, + self.ix[: self.m], + ) + points = (points[..., 1, 1, :3] / points[..., 1, 1, 3:]).reshape(-1, 3) + self.points_[: len(points)] = points[:] + + def __edges_all(self): + return flatmeshgrid( + torch.arange(0, self.m, device="cuda"), + torch.arange(0, self.n, device="cuda"), + indexing="ij", + ) + + def __edges_forw(self): + r = self.cfg.PATCH_LIFETIME + t0 = self.M * max((self.n - r), 0) + t1 = self.M * max((self.n - 1), 0) + return flatmeshgrid( + torch.arange(t0, t1, device="cuda"), + torch.arange(self.n - 1, self.n, device="cuda"), + indexing="ij", + ) + + def __edges_back(self): + r = self.cfg.PATCH_LIFETIME + t0 = self.M * max((self.n - 1), 0) + t1 = self.M * max((self.n - 0), 0) + return flatmeshgrid( + torch.arange(t0, t1, device="cuda"), + torch.arange(max(self.n - r, 0), self.n, device="cuda"), + indexing="ij", + ) + + def __call__(self, tstamp: int, image, intrinsics) -> None: + """track new frame""" + + if (self.n + 1) >= self.N: + raise Exception( + f'The buffer size is too small. You can increase it using "--buffer {self.N*2}"' + ) + + image = 2 * (image[None, None] / 255.0) - 0.5 + + with autocast(enabled=self.cfg.MIXED_PRECISION): + fmap, gmap, imap, patches, _, clr = self.network.patchify( + image, + patches_per_image=self.cfg.PATCHES_PER_FRAME, + gradient_bias=self.cfg.GRADIENT_BIAS, + return_color=True, + ) + + ### update state attributes ### + self.tlist.append(tstamp) + self.tstamps_[self.n] = self.counter + self.intrinsics_[self.n] = intrinsics / self.RES + + # color info for visualization + clr = (clr[0, :, [2, 1, 0]] + 0.5) * (255.0 / 2) + self.colors_[self.n] = clr.to(torch.uint8) + + self.index_[self.n + 1] = self.n + 1 + self.index_map_[self.n + 1] = self.m + self.M + + if self.n > 1: + if self.cfg.MOTION_MODEL == "DAMPED_LINEAR": + P1 = SE3(self.poses_[self.n - 1]) + P2 = SE3(self.poses_[self.n - 2]) + + xi = self.cfg.MOTION_DAMPING * (P1 * P2.inv()).log() + tvec_qvec = (SE3.exp(xi) * P1).data + self.poses_[self.n] = tvec_qvec + else: + tvec_qvec = self.poses[self.n - 1] + self.poses_[self.n] = tvec_qvec + + # TODO better depth initialization + patches[:, :, 2] = torch.rand_like(patches[:, :, 2, 0, 0, None, None]) + if self.is_initialized: + s = torch.median(self.patches_[self.n - 3 : self.n, :, 2]) + patches[:, :, 2] = s + + self.patches_[self.n] = patches + + ### update network attributes ### + self.imap_[self.n % self.mem] = imap.squeeze() + self.gmap_[self.n % self.mem] = gmap.squeeze() + self.fmap1_[:, self.n % self.mem] = F.avg_pool2d(fmap[0], 1, 1) + self.fmap2_[:, self.n % self.mem] = F.avg_pool2d(fmap[0], 4, 4) + + self.counter += 1 + if self.n > 0 and not self.is_initialized: + if self.motion_probe() < 2.0: + self.delta[self.counter - 1] = (self.counter - 2, Id[0]) + return + + self.n += 1 + self.m += self.M + + # relative pose + self.append_factors(*self.__edges_forw()) + self.append_factors(*self.__edges_back()) + + if self.n == 8 and not self.is_initialized: + self.is_initialized = True + + for itr in range(12): + self.update() + + elif self.is_initialized: + self.update() + self.keyframe() diff --git a/mini_dpvo/extractor.py b/mini_dpvo/extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..119141ea07368269e8fa42772d9da3bed8d90629 --- /dev/null +++ b/mini_dpvo/extractor.py @@ -0,0 +1,264 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ResidualBlock(nn.Module): + def __init__(self, in_planes, planes, norm_fn='group', stride=1): + super(ResidualBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1) + self.relu = nn.ReLU(inplace=True) + + num_groups = planes // 8 + + if norm_fn == 'group': + self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) + self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) + if not stride == 1: + self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) + + elif norm_fn == 'batch': + self.norm1 = nn.BatchNorm2d(planes) + self.norm2 = nn.BatchNorm2d(planes) + if not stride == 1: + self.norm3 = nn.BatchNorm2d(planes) + + elif norm_fn == 'instance': + self.norm1 = nn.InstanceNorm2d(planes) + self.norm2 = nn.InstanceNorm2d(planes) + if not stride == 1: + self.norm3 = nn.InstanceNorm2d(planes) + + elif norm_fn == 'none': + self.norm1 = nn.Sequential() + self.norm2 = nn.Sequential() + if not stride == 1: + self.norm3 = nn.Sequential() + + if stride == 1: + self.downsample = None + + else: + self.downsample = nn.Sequential( + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3) + + def forward(self, x): + y = x + y = self.relu(self.norm1(self.conv1(y))) + y = self.relu(self.norm2(self.conv2(y))) + + if self.downsample is not None: + x = self.downsample(x) + + return self.relu(x+y) + + +class BottleneckBlock(nn.Module): + def __init__(self, in_planes, planes, norm_fn='group', stride=1): + super(BottleneckBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0) + self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride) + self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0) + self.relu = nn.ReLU(inplace=True) + + num_groups = planes // 8 + + if norm_fn == 'group': + self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4) + self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4) + self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) + if not stride == 1: + self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes) + + elif norm_fn == 'batch': + self.norm1 = nn.BatchNorm2d(planes//4) + self.norm2 = nn.BatchNorm2d(planes//4) + self.norm3 = nn.BatchNorm2d(planes) + if not stride == 1: + self.norm4 = nn.BatchNorm2d(planes) + + elif norm_fn == 'instance': + self.norm1 = nn.InstanceNorm2d(planes//4) + self.norm2 = nn.InstanceNorm2d(planes//4) + self.norm3 = nn.InstanceNorm2d(planes) + if not stride == 1: + self.norm4 = nn.InstanceNorm2d(planes) + + elif norm_fn == 'none': + self.norm1 = nn.Sequential() + self.norm2 = nn.Sequential() + self.norm3 = nn.Sequential() + if not stride == 1: + self.norm4 = nn.Sequential() + + if stride == 1: + self.downsample = None + + else: + self.downsample = nn.Sequential( + nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4) + + def forward(self, x): + y = x + y = self.relu(self.norm1(self.conv1(y))) + y = self.relu(self.norm2(self.conv2(y))) + y = self.relu(self.norm3(self.conv3(y))) + + if self.downsample is not None: + x = self.downsample(x) + + return self.relu(x+y) + +DIM=32 + +class BasicEncoder(nn.Module): + def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False): + super(BasicEncoder, self).__init__() + self.norm_fn = norm_fn + self.multidim = multidim + + if self.norm_fn == 'group': + self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM) + + elif self.norm_fn == 'batch': + self.norm1 = nn.BatchNorm2d(DIM) + + elif self.norm_fn == 'instance': + self.norm1 = nn.InstanceNorm2d(DIM) + + elif self.norm_fn == 'none': + self.norm1 = nn.Sequential() + + self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3) + self.relu1 = nn.ReLU(inplace=True) + + self.in_planes = DIM + self.layer1 = self._make_layer(DIM, stride=1) + self.layer2 = self._make_layer(2*DIM, stride=2) + self.layer3 = self._make_layer(4*DIM, stride=2) + + # output convolution + self.conv2 = nn.Conv2d(4*DIM, output_dim, kernel_size=1) + + if self.multidim: + self.layer4 = self._make_layer(256, stride=2) + self.layer5 = self._make_layer(512, stride=2) + + self.in_planes = 256 + self.layer6 = self._make_layer(256, stride=1) + + self.in_planes = 128 + self.layer7 = self._make_layer(128, stride=1) + + self.up1 = nn.Conv2d(512, 256, 1) + self.up2 = nn.Conv2d(256, 128, 1) + self.conv3 = nn.Conv2d(128, output_dim, kernel_size=1) + + if dropout > 0: + self.dropout = nn.Dropout2d(p=dropout) + else: + self.dropout = None + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)): + if m.weight is not None: + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def _make_layer(self, dim, stride=1): + layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride) + layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1) + layers = (layer1, layer2) + + self.in_planes = dim + return nn.Sequential(*layers) + + def forward(self, x): + b, n, c1, h1, w1 = x.shape + x = x.view(b*n, c1, h1, w1) + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu1(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + + x = self.conv2(x) + + _, c2, h2, w2 = x.shape + return x.view(b, n, c2, h2, w2) + + +class BasicEncoder4(nn.Module): + def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False): + super(BasicEncoder4, self).__init__() + self.norm_fn = norm_fn + self.multidim = multidim + + if self.norm_fn == 'group': + self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM) + + elif self.norm_fn == 'batch': + self.norm1 = nn.BatchNorm2d(DIM) + + elif self.norm_fn == 'instance': + self.norm1 = nn.InstanceNorm2d(DIM) + + elif self.norm_fn == 'none': + self.norm1 = nn.Sequential() + + self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3) + self.relu1 = nn.ReLU(inplace=True) + + self.in_planes = DIM + self.layer1 = self._make_layer(DIM, stride=1) + self.layer2 = self._make_layer(2*DIM, stride=2) + + # output convolution + self.conv2 = nn.Conv2d(2*DIM, output_dim, kernel_size=1) + + if dropout > 0: + self.dropout = nn.Dropout2d(p=dropout) + else: + self.dropout = None + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)): + if m.weight is not None: + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def _make_layer(self, dim, stride=1): + layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride) + layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1) + layers = (layer1, layer2) + + self.in_planes = dim + return nn.Sequential(*layers) + + def forward(self, x): + b, n, c1, h1, w1 = x.shape + x = x.view(b*n, c1, h1, w1) + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu1(x) + + x = self.layer1(x) + x = self.layer2(x) + + x = self.conv2(x) + + _, c2, h2, w2 = x.shape + return x.view(b, n, c2, h2, w2) diff --git a/mini_dpvo/fastba/__init__.py b/mini_dpvo/fastba/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..16ec66ade2ef4bc7b99673e0144a9e9a4bff2998 --- /dev/null +++ b/mini_dpvo/fastba/__init__.py @@ -0,0 +1 @@ +from .ba import BA, neighbors, reproject \ No newline at end of file diff --git a/mini_dpvo/fastba/ba.cpp b/mini_dpvo/fastba/ba.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1bfb0343a17d5a125add3f1105bf6bec37c310d --- /dev/null +++ b/mini_dpvo/fastba/ba.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include + + +std::vector cuda_ba( + torch::Tensor poses, + torch::Tensor patches, + torch::Tensor intrinsics, + torch::Tensor target, + torch::Tensor weight, + torch::Tensor lmbda, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor kk, + int t0, int t1, int iterations); + + +torch::Tensor cuda_reproject( + torch::Tensor poses, + torch::Tensor patches, + torch::Tensor intrinsics, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor kk); + +std::vector ba( + torch::Tensor poses, + torch::Tensor patches, + torch::Tensor intrinsics, + torch::Tensor target, + torch::Tensor weight, + torch::Tensor lmbda, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor kk, + int t0, int t1, int iterations) { + return cuda_ba(poses, patches, intrinsics, target, weight, lmbda, ii, jj, kk, t0, t1, iterations); +} + + +torch::Tensor reproject( + torch::Tensor poses, + torch::Tensor patches, + torch::Tensor intrinsics, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor kk) { + return cuda_reproject(poses, patches, intrinsics, ii, jj, kk); +} + +// std::vector neighbors(torch::Tensor ii, torch::Tensor jj) +// { +// ii = ii.to(torch::kCPU); +// jj = jj.to(torch::kCPU); +// auto ii_data = ii.accessor(); +// auto jj_data = jj.accessor(); + +// std::unordered_map> graph; +// std::unordered_map> index; +// for (int i=0; i < ii.size(0); i++) { +// const long ix = ii_data[i]; +// const long jx = jj_data[i]; +// if (graph.find(ix) == graph.end()) { +// graph[ix] = std::vector(); +// index[ix] = std::vector(); +// } +// graph[ix].push_back(jx); +// index[ix].push_back( i); +// } + +// auto opts = torch::TensorOptions().dtype(torch::kInt64); +// torch::Tensor ix = torch::empty({ii.size(0)}, opts); +// torch::Tensor jx = torch::empty({jj.size(0)}, opts); + +// auto ix_data = ix.accessor(); +// auto jx_data = jx.accessor(); + +// for (std::pair> element : graph) { +// std::vector& v = graph[element.first]; +// std::vector& idx = index[element.first]; + +// std::stable_sort(idx.begin(), idx.end(), +// [&v](size_t i, size_t j) {return v[i] < v[j];}); + +// ix_data[idx.front()] = -1; +// jx_data[idx.back()] = -1; + +// for (int i=0; i < idx.size(); i++) { +// ix_data[idx[i]] = (i > 0) ? idx[i-1] : -1; +// jx_data[idx[i]] = (i < idx.size() - 1) ? idx[i+1] : -1; +// } +// } + +// ix = ix.to(torch::kCUDA); +// jx = jx.to(torch::kCUDA); + +// return {ix, jx}; +// } + + +std::vector neighbors(torch::Tensor ii, torch::Tensor jj) +{ + + auto tup = torch::_unique(ii, true, true); + torch::Tensor uniq = std::get<0>(tup).to(torch::kCPU); + torch::Tensor perm = std::get<1>(tup).to(torch::kCPU); + + jj = jj.to(torch::kCPU); + auto jj_accessor = jj.accessor(); + + auto perm_accessor = perm.accessor(); + std::vector> index(uniq.size(0)); + for (int i=0; i < ii.size(0); i++) { + index[perm_accessor[i]].push_back(i); + } + + auto opts = torch::TensorOptions().dtype(torch::kInt64); + torch::Tensor ix = torch::empty({ii.size(0)}, opts); + torch::Tensor jx = torch::empty({ii.size(0)}, opts); + + auto ix_accessor = ix.accessor(); + auto jx_accessor = jx.accessor(); + + for (int i=0; i& idx = index[i]; + std::stable_sort(idx.begin(), idx.end(), + [&jj_accessor](size_t i, size_t j) {return jj_accessor[i] < jj_accessor[j];}); + + for (int i=0; i < idx.size(); i++) { + ix_accessor[idx[i]] = (i > 0) ? idx[i-1] : -1; + jx_accessor[idx[i]] = (i < idx.size() - 1) ? idx[i+1] : -1; + } + } + + // for (int i=0; i= 0) std::cout << jj_accessor[ix_accessor[i]] << " "; + // if (jx_accessor[i] >= 0) std::cout << jj_accessor[jx_accessor[i]] << " "; + // std::cout << std::endl; + // } + + ix = ix.to(torch::kCUDA); + jx = jx.to(torch::kCUDA); + + return {ix, jx}; +} + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &ba, "BA forward operator"); + m.def("neighbors", &neighbors, "temporal neighboor indicies"); + m.def("reproject", &reproject, "temporal neighboor indicies"); + +} \ No newline at end of file diff --git a/mini_dpvo/fastba/ba.py b/mini_dpvo/fastba/ba.py new file mode 100644 index 0000000000000000000000000000000000000000..d01d2128319177d0b3d5facf0f53b7693397b245 --- /dev/null +++ b/mini_dpvo/fastba/ba.py @@ -0,0 +1,8 @@ +import torch +import cuda_ba + +neighbors = cuda_ba.neighbors +reproject = cuda_ba.reproject + +def BA(poses, patches, intrinsics, target, weight, lmbda, ii, jj, kk, t0, t1, iterations=2): + return cuda_ba.forward(poses.data, patches, intrinsics, target, weight, lmbda, ii, jj, kk, t0, t1, iterations) \ No newline at end of file diff --git a/mini_dpvo/fastba/ba_cuda.cu b/mini_dpvo/fastba/ba_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..aae791722559a74420ef17e4a4bf97c001f2d24f --- /dev/null +++ b/mini_dpvo/fastba/ba_cuda.cu @@ -0,0 +1,575 @@ +#include +#include +#include + +#include +#include +#include + + +#define GPU_1D_KERNEL_LOOP(i, n) \ + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i 1e-4) { + float a = (1 - cosf(theta)) / theta_sq; + crossInplace(phi, tau); + t[0] += a * tau[0]; + t[1] += a * tau[1]; + t[2] += a * tau[2]; + + float b = (theta - sinf(theta)) / (theta * theta_sq); + crossInplace(phi, tau); + t[0] += b * tau[0]; + t[1] += b * tau[1]; + t[2] += b * tau[2]; + } +} + + +__device__ void +retrSE3(const float *xi, const float* t, const float* q, float* t1, float* q1) { + // retraction on SE3 manifold + + float dt[3] = {0, 0, 0}; + float dq[4] = {0, 0, 0, 1}; + + expSE3(xi, dt, dq); + + q1[0] = dq[3] * q[0] + dq[0] * q[3] + dq[1] * q[2] - dq[2] * q[1]; + q1[1] = dq[3] * q[1] + dq[1] * q[3] + dq[2] * q[0] - dq[0] * q[2]; + q1[2] = dq[3] * q[2] + dq[2] * q[3] + dq[0] * q[1] - dq[1] * q[0]; + q1[3] = dq[3] * q[3] - dq[0] * q[0] - dq[1] * q[1] - dq[2] * q[2]; + + actSO3(dq, t, t1); + t1[0] += dt[0]; + t1[1] += dt[1]; + t1[2] += dt[2]; +} + + + +__global__ void pose_retr_kernel(const int t0, const int t1, + torch::PackedTensorAccessor32 poses, + torch::PackedTensorAccessor32 update) +{ + GPU_1D_KERNEL_LOOP(i, t1 - t0) { + const float t = t0 + i; + float t1[3], t0[3] = { poses[t][0], poses[t][1], poses[t][2] }; + float q1[4], q0[4] = { poses[t][3], poses[t][4], poses[t][5], poses[t][6] }; + + float xi[6] = { + update[i][0], + update[i][1], + update[i][2], + update[i][3], + update[i][4], + update[i][5], + }; + + retrSE3(xi, t0, q0, t1, q1); + + poses[t][0] = t1[0]; + poses[t][1] = t1[1]; + poses[t][2] = t1[2]; + poses[t][3] = q1[0]; + poses[t][4] = q1[1]; + poses[t][5] = q1[2]; + poses[t][6] = q1[3]; + } +} + + +__global__ void patch_retr_kernel( + torch::PackedTensorAccessor32 index, + torch::PackedTensorAccessor32 patches, + torch::PackedTensorAccessor32 update) +{ + GPU_1D_KERNEL_LOOP(n, index.size(0)) { + const int p = patches.size(2); + const int ix = index[n]; + + float d = patches[ix][2][0][0]; + d = d + update[n]; + d = (d > 20) ? 1.0 : d; + d = max(d, 1e-4); + + for (int i=0; i poses, + const torch::PackedTensorAccessor32 patches, + const torch::PackedTensorAccessor32 intrinsics, + const torch::PackedTensorAccessor32 target, + const torch::PackedTensorAccessor32 weight, + const torch::PackedTensorAccessor32 lmbda, + const torch::PackedTensorAccessor32 ii, + const torch::PackedTensorAccessor32 jj, + const torch::PackedTensorAccessor32 kk, + const torch::PackedTensorAccessor32 ku, + torch::PackedTensorAccessor32 B, + torch::PackedTensorAccessor32 E, + torch::PackedTensorAccessor32 C, + torch::PackedTensorAccessor32 v, + torch::PackedTensorAccessor32 u, const int t0) +{ + + __shared__ float fx, fy, cx, cy; + if (threadIdx.x == 0) { + fx = intrinsics[0][0]; + fy = intrinsics[0][1]; + cx = intrinsics[0][2]; + cy = intrinsics[0][3]; + } + + __syncthreads(); + + GPU_1D_KERNEL_LOOP(n, ii.size(0)) { + int k = ku[n]; + int ix = ii[n]; + int jx = jj[n]; + int kx = kk[n]; + + float ti[3] = { poses[ix][0], poses[ix][1], poses[ix][2] }; + float tj[3] = { poses[jx][0], poses[jx][1], poses[jx][2] }; + float qi[4] = { poses[ix][3], poses[ix][4], poses[ix][5], poses[ix][6] }; + float qj[4] = { poses[jx][3], poses[jx][4], poses[jx][5], poses[jx][6] }; + + float Xi[4], Xj[4]; + Xi[0] = (patches[kx][0][1][1] - cx) / fx; + Xi[1] = (patches[kx][1][1][1] - cy) / fy; + Xi[2] = 1.0; + Xi[3] = patches[kx][2][1][1]; + + float tij[3], qij[4]; + relSE3(ti, qi, tj, qj, tij, qij); + actSE3(tij, qij, Xi, Xj); + + const float X = Xj[0]; + const float Y = Xj[1]; + const float Z = Xj[2]; + const float W = Xj[3]; + + const float d = (Z >= 0.2) ? 1.0 / Z : 0.0; + const float d2 = d * d; + + const float x1 = fx * (X / Z) + cx; + const float y1 = fy * (Y / Z) + cy; + + const float rx = target[n][0] - x1; + const float ry = target[n][1] - y1; + + const bool in_bounds = (sqrt(rx*rx + ry*ry) < 128) && (Z > 0.2) && + (x1 > -64) && (y1 > -64) && (x1 < 2*cx + 64) && (y1 < 2*cy + 64); + + const float mask = in_bounds ? 1.0 : 0.0; + + ix = ix - t0; + jx = jx - t0; + + { + const float r = target[n][0] - x1; + const float w = mask * weight[n][0]; + + float Jz = fx * (tij[0] * d - tij[2] * (X * d2)); + float Ji[6], Jj[6] = {fx*W*d, 0, fx*-X*W*d2, fx*-X*Y*d2, fx*(1+X*X*d2), fx*-Y*d}; + + adjSE3(tij, qij, Jj, Ji); + + for (int i=0; i<6; i++) { + for (int j=0; j<6; j++) { + if (ix >= 0) + atomicAdd(&B[6*ix+i][6*ix+j], w * Ji[i] * Ji[j]); + if (jx >= 0) + atomicAdd(&B[6*jx+i][6*jx+j], w * Jj[i] * Jj[j]); + if (ix >= 0 && jx >= 0) { + atomicAdd(&B[6*ix+i][6*jx+j], -w * Ji[i] * Jj[j]); + atomicAdd(&B[6*jx+i][6*ix+j], -w * Jj[i] * Ji[j]); + } + } + } + + for (int i=0; i<6; i++) { + if (ix >= 0) + atomicAdd(&E[6*ix+i][k], -w * Jz * Ji[i]); + if (jx >= 0) + atomicAdd(&E[6*jx+i][k], w * Jz * Jj[i]); + } + + for (int i=0; i<6; i++) { + if (ix >= 0) + atomicAdd(&v[6*ix+i], -w * r * Ji[i]); + if (jx >= 0) + atomicAdd(&v[6*jx+i], w * r * Jj[i]); + } + + atomicAdd(&C[k], w * Jz * Jz); + atomicAdd(&u[k], w * r * Jz); + } + + { + const float r = target[n][1] - y1; + const float w = mask * weight[n][1]; + + float Jz = fy * (tij[1] * d - tij[2] * (Y * d2)); + float Ji[6], Jj[6] = {0, fy*W*d, fy*-Y*W*d2, fy*(-1-Y*Y*d2), fy*(X*Y*d2), fy*X*d}; + + adjSE3(tij, qij, Jj, Ji); + + for (int i=0; i<6; i++) { + for (int j=0; j<6; j++) { + if (ix >= 0) + atomicAdd(&B[6*ix+i][6*ix+j], w * Ji[i] * Ji[j]); + if (jx >= 0) + atomicAdd(&B[6*jx+i][6*jx+j], w * Jj[i] * Jj[j]); + if (ix >= 0 && jx >= 0) { + atomicAdd(&B[6*ix+i][6*jx+j], -w * Ji[i] * Jj[j]); + atomicAdd(&B[6*jx+i][6*ix+j], -w * Jj[i] * Ji[j]); + } + } + } + + for (int i=0; i<6; i++) { + if (ix >= 0) + atomicAdd(&E[6*ix+i][k], -w * Jz * Ji[i]); + if (jx >= 0) + atomicAdd(&E[6*jx+i][k], w * Jz * Jj[i]); + } + + for (int i=0; i<6; i++) { + if (ix >= 0) + atomicAdd(&v[6*ix+i], -w * r * Ji[i]); + if (jx >= 0) + atomicAdd(&v[6*jx+i], w * r * Jj[i]); + } + + atomicAdd(&C[k], w * Jz * Jz); + atomicAdd(&u[k], w * r * Jz); + } + } +} + + +__global__ void reproject( + const torch::PackedTensorAccessor32 poses, + const torch::PackedTensorAccessor32 patches, + const torch::PackedTensorAccessor32 intrinsics, + const torch::PackedTensorAccessor32 ii, + const torch::PackedTensorAccessor32 jj, + const torch::PackedTensorAccessor32 kk, + torch::PackedTensorAccessor32 coords) { + + __shared__ float fx, fy, cx, cy; + if (threadIdx.x == 0) { + fx = intrinsics[0][0]; + fy = intrinsics[0][1]; + cx = intrinsics[0][2]; + cy = intrinsics[0][3]; + } + + __syncthreads(); + + GPU_1D_KERNEL_LOOP(n, ii.size(0)) { + int ix = ii[n]; + int jx = jj[n]; + int kx = kk[n]; + + float ti[3] = { poses[ix][0], poses[ix][1], poses[ix][2] }; + float tj[3] = { poses[jx][0], poses[jx][1], poses[jx][2] }; + float qi[4] = { poses[ix][3], poses[ix][4], poses[ix][5], poses[ix][6] }; + float qj[4] = { poses[jx][3], poses[jx][4], poses[jx][5], poses[jx][6] }; + + float tij[3], qij[4]; + relSE3(ti, qi, tj, qj, tij, qij); + + float Xi[4], Xj[4]; + for (int i=0; i cuda_ba( + torch::Tensor poses, + torch::Tensor patches, + torch::Tensor intrinsics, + torch::Tensor target, + torch::Tensor weight, + torch::Tensor lmbda, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor kk, + const int t0, const int t1, const int iterations) +{ + + auto ktuple = torch::_unique(kk, true, true); + torch::Tensor kx = std::get<0>(ktuple); + torch::Tensor ku = std::get<1>(ktuple); + + const int N = t1 - t0; // number of poses + const int M = kx.size(0); // number of patches + const int P = patches.size(3); // patch size + + auto opts = torch::TensorOptions() + .dtype(torch::kFloat32).device(torch::kCUDA); + + poses = poses.view({-1, 7}); + patches = patches.view({-1,3,P,P}); + intrinsics = intrinsics.view({-1, 4}); + + target = target.view({-1, 2}); + weight = weight.view({-1, 2}); + + const int num = ii.size(0); + torch::Tensor B = torch::empty({6*N, 6*N}, opts); + torch::Tensor E = torch::empty({6*N, 1*M}, opts); + torch::Tensor C = torch::empty({M}, opts); + + torch::Tensor v = torch::empty({6*N}, opts); + torch::Tensor u = torch::empty({1*M}, opts); + + for (int itr=0; itr < iterations; itr++) { + + B.zero_(); + E.zero_(); + C.zero_(); + v.zero_(); + u.zero_(); + + v = v.view({6*N}); + u = u.view({1*M}); + + reprojection_residuals_and_hessian<<>>( + poses.packed_accessor32(), + patches.packed_accessor32(), + intrinsics.packed_accessor32(), + target.packed_accessor32(), + weight.packed_accessor32(), + lmbda.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), + kk.packed_accessor32(), + ku.packed_accessor32(), + B.packed_accessor32(), + E.packed_accessor32(), + C.packed_accessor32(), + v.packed_accessor32(), + u.packed_accessor32(), t0); + + v = v.view({6*N, 1}); + u = u.view({1*M, 1}); + + torch::Tensor Q = 1.0 / (C + lmbda).view({1, M}); + + if (t1 - t0 == 0) { + + torch::Tensor Qt = torch::transpose(Q, 0, 1); + torch::Tensor dZ = Qt * u; + + dZ = dZ.view({M}); + + patch_retr_kernel<<>>( + kx.packed_accessor32(), + patches.packed_accessor32(), + dZ.packed_accessor32()); + + } + + else { + + torch::Tensor EQ = E * Q; + torch::Tensor Et = torch::transpose(E, 0, 1); + torch::Tensor Qt = torch::transpose(Q, 0, 1); + + torch::Tensor S = B - torch::matmul(EQ, Et); + torch::Tensor y = v - torch::matmul(EQ, u); + + torch::Tensor I = torch::eye(6*N, opts); + S += I * (1e-4 * S + 1.0); + + + torch::Tensor U = torch::linalg::cholesky(S); + torch::Tensor dX = torch::cholesky_solve(y, U); + torch::Tensor dZ = Qt * (u - torch::matmul(Et, dX)); + + dX = dX.view({N, 6}); + dZ = dZ.view({M}); + + pose_retr_kernel<<>>(t0, t1, + poses.packed_accessor32(), + dX.packed_accessor32()); + + patch_retr_kernel<<>>( + kx.packed_accessor32(), + patches.packed_accessor32(), + dZ.packed_accessor32()); + } + } + + return {}; +} + + +torch::Tensor cuda_reproject( + torch::Tensor poses, + torch::Tensor patches, + torch::Tensor intrinsics, + torch::Tensor ii, + torch::Tensor jj, + torch::Tensor kk) +{ + + const int N = ii.size(0); + const int P = patches.size(3); // patch size + + poses = poses.view({-1, 7}); + patches = patches.view({-1,3,P,P}); + intrinsics = intrinsics.view({-1, 4}); + + auto opts = torch::TensorOptions() + .dtype(torch::kFloat32).device(torch::kCUDA); + + torch::Tensor coords = torch::empty({N, 2, P, P}, opts); + + reproject<<>>( + poses.packed_accessor32(), + patches.packed_accessor32(), + intrinsics.packed_accessor32(), + ii.packed_accessor32(), + jj.packed_accessor32(), + kk.packed_accessor32(), + coords.packed_accessor32()); + + return coords.view({1, N, 2, P, P}); + +} \ No newline at end of file diff --git a/mini_dpvo/lietorch/__init__.py b/mini_dpvo/lietorch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..275d68e289eb3ccd5c6980b4e886520ca506863e --- /dev/null +++ b/mini_dpvo/lietorch/__init__.py @@ -0,0 +1,2 @@ +__all__ = ['groups'] +from .groups import LieGroupParameter, SO3, RxSO3, SE3, Sim3, cat, stack diff --git a/mini_dpvo/lietorch/broadcasting.py b/mini_dpvo/lietorch/broadcasting.py new file mode 100644 index 0000000000000000000000000000000000000000..9015a3354b57a9ebf5269abf148bdd30545e6380 --- /dev/null +++ b/mini_dpvo/lietorch/broadcasting.py @@ -0,0 +1,31 @@ +import torch +import numpy as np + +def check_broadcastable(x, y): + assert len(x.shape) == len(y.shape) + for (n, m) in zip(x.shape[:-1], y.shape[:-1]): + assert n==m or n==1 or m==1 + +def broadcast_inputs(x, y): + """ Automatic broadcasting of missing dimensions """ + if y is None: + xs, xd = x.shape[:-1], x.shape[-1] + return (x.view(-1, xd).contiguous(), ), x.shape[:-1] + + check_broadcastable(x, y) + + xs, xd = x.shape[:-1], x.shape[-1] + ys, yd = y.shape[:-1], y.shape[-1] + out_shape = [max(n,m) for (n,m) in zip(xs,ys)] + + if x.shape[:-1] == y.shape[-1]: + x1 = x.view(-1, xd) + y1 = y.view(-1, yd) + + else: + x_expand = [m if n==1 else 1 for (n,m) in zip(xs, ys)] + y_expand = [n if m==1 else 1 for (n,m) in zip(xs, ys)] + x1 = x.repeat(x_expand + [1]).reshape(-1, xd).contiguous() + y1 = y.repeat(y_expand + [1]).reshape(-1, yd).contiguous() + + return (x1, y1), tuple(out_shape) diff --git a/mini_dpvo/lietorch/gradcheck.py b/mini_dpvo/lietorch/gradcheck.py new file mode 100644 index 0000000000000000000000000000000000000000..d94dccb6969238e68b5d38a12bfaf5c1c41950f2 --- /dev/null +++ b/mini_dpvo/lietorch/gradcheck.py @@ -0,0 +1,592 @@ +import torch + +TORCH_MAJOR = int(torch.__version__.split('.')[0]) +TORCH_MINOR = int(torch.__version__.split('.')[1]) + +from torch.types import _TensorOrTensors +if TORCH_MAJOR == 1 and TORCH_MINOR < 8: + from torch._six import container_abcs, istuple +else: + import collections.abc as container_abcs + +import torch.testing +from torch.overrides import is_tensor_like +from itertools import product +import warnings +from typing import Callable, Union, Optional, Iterable, List + +def zero_gradients(x): + if isinstance(x, torch.Tensor): + if x.grad is not None: + x.grad.detach_() + x.grad.zero_() + elif isinstance(x, container_abcs.Iterable): + for elem in x: + zero_gradients(elem) + + +def make_jacobian(input, num_out): + if is_tensor_like(input): + if not input.is_floating_point() and not input.is_complex(): + return None + if not input.requires_grad: + return None + return input.new_zeros((input.nelement(), num_out), dtype=input.dtype, layout=torch.strided) + elif isinstance(input, container_abcs.Iterable) and not isinstance(input, str): + jacobians = list(filter( + lambda x: x is not None, (make_jacobian(elem, num_out) for elem in input))) + if not jacobians: + return None + return type(input)(jacobians) # type: ignore + else: + return None + + +def iter_tensors(x: Union[torch.Tensor, Iterable[torch.Tensor]], only_requiring_grad: bool = False) -> Iterable[torch.Tensor]: + if is_tensor_like(x): + # mypy doesn't narrow type of `x` to torch.Tensor + if x.requires_grad or not only_requiring_grad: # type: ignore + yield x # type: ignore + elif isinstance(x, container_abcs.Iterable) and not isinstance(x, str): + for elem in x: + for result in iter_tensors(elem, only_requiring_grad): + yield result + +def get_numerical_jacobian(fn, input, target=None, eps=1e-3, grad_out=1.0): + """ + input: input to `fn` + target: the Tensors wrt whom Jacobians are calculated (default=`input`) + grad_out: grad output value used to calculate gradients. + + Note that `target` may not even be part of `input` to `fn`, so please be + **very careful** in this to not clone `target`. + """ + if target is None: + target = input + output_size = fn(input).numel() + jacobian = make_jacobian(target, output_size) + + # It's much easier to iterate over flattened lists of tensors. + # These are reference to the same objects in jacobian, so any changes + # will be reflected in it as well. + x_tensors = iter_tensors(target, True) + j_tensors = iter_tensors(jacobian) + + def update_jacobians(x, idx, d, d_idx, is_mkldnn=False): + + # compute_jacobian only works for pure real + # or pure imaginary delta + def compute_gradient(delta): + # we currently assume that the norm of delta equals eps + assert(delta == eps or delta == (eps * 1j)) + + def fn_out(): + if not is_mkldnn: + # x is a view into input and so this works + return fn(input).clone() + else: + # convert the dense tensor back to have mkldnn layout + return fn([x.to_mkldnn()]) + + orig = x[idx].item() + x[idx] = orig - delta + outa = fn_out() + x[idx] = orig + delta + outb = fn_out() + x[idx] = orig + r = (outb - outa) / (2 * eps) + return r.detach().reshape(-1) + + # for details on the algorithm used here, refer: + # Section 3.5.3 https://arxiv.org/pdf/1701.00392.pdf + # s = fn(z) where z = x for real valued input + # and z = x + yj for complex valued input + ds_dx = compute_gradient(eps) + if x.is_complex(): # C -> C, C -> R + ds_dy = compute_gradient(eps * 1j) + # conjugate wirtinger derivative + conj_w_d = 0.5 * (ds_dx + ds_dy * 1j) + # wirtinger derivative + w_d = 0.5 * (ds_dx - ds_dy * 1j) + d[d_idx] = grad_out.conjugate() * conj_w_d + grad_out * w_d.conj() + elif ds_dx.is_complex(): # R -> C + # w_d = conj_w_d = 0.5 * ds_dx + # dL_dz_conj = 0.5 * [grad_out.conj() * ds_dx + grad_out * ds_dx.conj()] + # = 0.5 * [grad_out.conj() * ds_dx + (grad_out.conj() * ds_dx).conj()] + # = 0.5 * 2 * real(grad_out.conj() * ds_dx) + # = real(grad_out.conj() * ds_dx) + d[d_idx] = torch.real(grad_out.conjugate() * ds_dx) + else: # R -> R + d[d_idx] = ds_dx * grad_out + + # TODO: compare structure + for x_tensor, d_tensor in zip(x_tensors, j_tensors): + if x_tensor.is_sparse: + def get_stride(size): + dim = len(size) + tmp = 1 + stride = [0] * dim + for i in reversed(range(dim)): + stride[i] = tmp + tmp *= size[i] + return stride + + x_nnz = x_tensor._nnz() + x_size = list(x_tensor.size()) + x_indices = x_tensor._indices().t() + x_values = x_tensor._values() + x_stride = get_stride(x_size) + + # Use .data here to get around the version check + x_values = x_values.data + + for i in range(x_nnz): + x_value = x_values[i] + for x_idx in product(*[range(m) for m in x_values.size()[1:]]): + indices = x_indices[i].tolist() + list(x_idx) + d_idx = sum(indices[k] * x_stride[k] for k in range(len(x_size))) + update_jacobians(x_value, x_idx, d_tensor, d_idx) + elif x_tensor.layout == torch._mkldnn: # type: ignore + # Use .data here to get around the version check + x_tensor = x_tensor.data + if len(input) != 1: + raise ValueError('gradcheck currently only supports functions with 1 input, but got: ', + len(input)) + for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])): + # this is really inefficient, but without indexing implemented, there's + # not really a better way than converting back and forth + x_tensor_dense = x_tensor.to_dense() + update_jacobians(x_tensor_dense, x_idx, d_tensor, d_idx, is_mkldnn=True) + else: + # Use .data here to get around the version check + x_tensor = x_tensor.data + for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])): + update_jacobians(x_tensor, x_idx, d_tensor, d_idx) + + return jacobian + + +def get_analytical_jacobian(input, output, nondet_tol=0.0, grad_out=1.0): + # it is easier to call to_dense() on the sparse output than + # to modify analytical jacobian + if output.is_sparse: + raise ValueError('Sparse output is not supported at gradcheck yet. ' + 'Please call to_dense() on the output of fn for gradcheck.') + if output.layout == torch._mkldnn: # type: ignore + raise ValueError('MKLDNN output is not supported at gradcheck yet. ' + 'Please call to_dense() on the output of fn for gradcheck.') + diff_input_list = list(iter_tensors(input, True)) + jacobian = make_jacobian(input, output.numel()) + jacobian_reentrant = make_jacobian(input, output.numel()) + grad_output = torch.zeros_like(output, memory_format=torch.legacy_contiguous_format) + flat_grad_output = grad_output.view(-1) + reentrant = True + correct_grad_sizes = True + correct_grad_types = True + + for i in range(flat_grad_output.numel()): + flat_grad_output.zero_() + flat_grad_output[i] = grad_out + for jacobian_c in (jacobian, jacobian_reentrant): + grads_input = torch.autograd.grad(output, diff_input_list, grad_output, + retain_graph=True, allow_unused=True) + for jacobian_x, d_x, x in zip(jacobian_c, grads_input, diff_input_list): + if d_x is not None and d_x.size() != x.size(): + correct_grad_sizes = False + elif d_x is not None and d_x.dtype != x.dtype: + correct_grad_types = False + elif jacobian_x.numel() != 0: + if d_x is None: + jacobian_x[:, i].zero_() + else: + d_x_dense = d_x.to_dense() if not d_x.layout == torch.strided else d_x + assert jacobian_x[:, i].numel() == d_x_dense.numel() + jacobian_x[:, i] = d_x_dense.contiguous().view(-1) + + for jacobian_x, jacobian_reentrant_x in zip(jacobian, jacobian_reentrant): + if jacobian_x.numel() != 0 and (jacobian_x - jacobian_reentrant_x).abs().max() > nondet_tol: + reentrant = False + + return jacobian, reentrant, correct_grad_sizes, correct_grad_types + + +def _as_tuple(x): + if TORCH_MAJOR == 1 and TORCH_MINOR < 8: + b_tuple = istuple(x) + else: + b_tuple = isinstance(x, tuple) + + if b_tuple: + return x + elif isinstance(x, list): + return tuple(x) + else: + return x, + + + +def _differentiable_outputs(x): + return tuple(o for o in _as_tuple(x) if o.requires_grad) + + +# Note [VarArg of Tensors] +# ~~~~~~~~~~~~~~~~~~~~~~~~ +# 'func' accepts a vararg of tensors, which isn't expressable in the type system at the moment. +# If https://mypy.readthedocs.io/en/latest/additional_features.html?highlight=callable#extended-callable-types is accepted, +# the '...' first argument of Callable can be replaced with VarArg(Tensor). +# For now, we permit any input. +# the '...' first argument of Callable can be replaced with VarArg(Tensor). +# For now, we permit any input. + +def gradcheck( + func: Callable[..., Union[_TensorOrTensors]], # See Note [VarArg of Tensors] + inputs: _TensorOrTensors, + eps: float = 1e-6, + atol: float = 1e-5, + rtol: float = 1e-3, + raise_exception: bool = True, + check_sparse_nnz: bool = False, + nondet_tol: float = 0.0, + check_undefined_grad: bool = True, + check_grad_dtypes: bool = False +) -> bool: + r"""Check gradients computed via small finite differences against analytical + gradients w.r.t. tensors in :attr:`inputs` that are of floating point or complex type + and with ``requires_grad=True``. + + The check between numerical and analytical gradients uses :func:`~torch.allclose`. + + For complex functions, no notion of Jacobian exists. Gradcheck verifies if the numerical and + analytical values of Wirtinger and Conjugate Wirtinger derivative are consistent. The gradient + computation is done under the assumption that the overall function has a real valued output. + For functions with complex output, gradcheck compares the numerical and analytical gradients + for two values of :attr:`grad_output`: 1 and 1j. For more details, check out + :ref:`complex_autograd-doc`. + + .. note:: + The default values are designed for :attr:`input` of double precision. + This check will likely fail if :attr:`input` is of less precision, e.g., + ``FloatTensor``. + + .. warning:: + If any checked tensor in :attr:`input` has overlapping memory, i.e., + different indices pointing to the same memory address (e.g., from + :func:`torch.expand`), this check will likely fail because the numerical + gradients computed by point perturbation at such indices will change + values at all other indices that share the same memory address. + + Args: + func (function): a Python function that takes Tensor inputs and returns + a Tensor or a tuple of Tensors + inputs (tuple of Tensor or Tensor): inputs to the function + eps (float, optional): perturbation for finite differences + atol (float, optional): absolute tolerance + rtol (float, optional): relative tolerance + raise_exception (bool, optional): indicating whether to raise an exception if + the check fails. The exception gives more information about the + exact nature of the failure. This is helpful when debugging gradchecks. + check_sparse_nnz (bool, optional): if True, gradcheck allows for SparseTensor input, + and for any SparseTensor at input, gradcheck will perform check at nnz positions only. + nondet_tol (float, optional): tolerance for non-determinism. When running + identical inputs through the differentiation, the results must either match + exactly (default, 0.0) or be within this tolerance. + check_undefined_grad (bool, options): if True, check if undefined output grads + are supported and treated as zeros, for ``Tensor`` outputs. + + Returns: + True if all differences satisfy allclose condition + """ + def fail_test(msg): + if raise_exception: + raise RuntimeError(msg) + return False + + tupled_inputs = _as_tuple(inputs) + if not check_sparse_nnz and any(t.is_sparse for t in tupled_inputs if isinstance(t, torch.Tensor)): + return fail_test('gradcheck expects all tensor inputs are dense when check_sparse_nnz is set to False.') + + # Make sure that gradients are saved for at least one input + any_input_requiring_grad = False + for idx, inp in enumerate(tupled_inputs): + if is_tensor_like(inp) and inp.requires_grad: + if not (inp.dtype == torch.float64 or inp.dtype == torch.complex128): + warnings.warn( + f'Input #{idx} requires gradient and ' + 'is not a double precision floating point or complex. ' + 'This check will likely fail if all the inputs are ' + 'not of double precision floating point or complex. ') + content = inp._values() if inp.is_sparse else inp + # TODO: To cover more problematic cases, replace stride = 0 check with + # "any overlap in memory" once we have a proper function to check it. + if content.layout is not torch._mkldnn: # type: ignore + if not all(st > 0 or sz <= 1 for st, sz in zip(content.stride(), content.size())): + raise RuntimeError( + 'The {}th input has a dimension with stride 0. gradcheck only ' + 'supports inputs that are non-overlapping to be able to ' + 'compute the numerical gradients correctly. You should call ' + '.contiguous on the input before passing it to gradcheck.') + any_input_requiring_grad = True + inp.retain_grad() + if not any_input_requiring_grad: + raise ValueError( + 'gradcheck expects at least one input tensor to require gradient, ' + 'but none of the them have requires_grad=True.') + + func_out = func(*tupled_inputs) + output = _differentiable_outputs(func_out) + + if not output: + for i, o in enumerate(func_out): + def fn(input): + return _as_tuple(func(*input))[i] + numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps) + for n in numerical: + if torch.ne(n, 0).sum() > 0: + return fail_test('Numerical gradient for function expected to be zero') + return True + + for i, o in enumerate(output): + if not o.requires_grad: + continue + + def fn(input): + return _as_tuple(func(*input))[i] + + analytical, reentrant, correct_grad_sizes, correct_grad_types = get_analytical_jacobian(tupled_inputs, + o, + nondet_tol=nondet_tol) + numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps) + + return analytical, numerical + + out_is_complex = o.is_complex() + + if out_is_complex: + # analytical vjp with grad_out = 1.0j + analytical_with_imag_grad_out, reentrant_with_imag_grad_out, \ + correct_grad_sizes_with_imag_grad_out, correct_grad_types_with_imag_grad_out \ + = get_analytical_jacobian(tupled_inputs, o, nondet_tol=nondet_tol, grad_out=1j) + numerical_with_imag_grad_out = get_numerical_jacobian(fn, tupled_inputs, eps=eps, grad_out=1j) + + if not correct_grad_types and check_grad_dtypes: + return fail_test('Gradient has dtype mismatch') + + if out_is_complex and not correct_grad_types_with_imag_grad_out and check_grad_dtypes: + return fail_test('Gradient (calculated using complex valued grad output) has dtype mismatch') + + if not correct_grad_sizes: + return fail_test('Analytical gradient has incorrect size') + + if out_is_complex and not correct_grad_sizes_with_imag_grad_out: + return fail_test('Analytical gradient (calculated using complex valued grad output) has incorrect size') + + def checkIfNumericalAnalyticAreClose(a, n, j, error_str=''): + if not torch.allclose(a, n, rtol, atol): + return fail_test(error_str + 'Jacobian mismatch for output %d with respect to input %d,\n' + 'numerical:%s\nanalytical:%s\n' % (i, j, n, a)) + + inp_tensors = iter_tensors(tupled_inputs, True) + + for j, (a, n, inp) in enumerate(zip(analytical, numerical, inp_tensors)): + if a.numel() != 0 or n.numel() != 0: + if o.is_complex(): + # C -> C, R -> C + a_with_imag_grad_out = analytical_with_imag_grad_out[j] + n_with_imag_grad_out = numerical_with_imag_grad_out[j] + checkIfNumericalAnalyticAreClose(a_with_imag_grad_out, n_with_imag_grad_out, j, + "Gradients failed to compare equal for grad output = 1j. ") + if inp.is_complex(): + # C -> R, C -> C + checkIfNumericalAnalyticAreClose(a, n, j, + "Gradients failed to compare equal for grad output = 1. ") + else: + # R -> R, R -> C + checkIfNumericalAnalyticAreClose(a, n, j) + + + def not_reentrant_error(error_str=''): + error_msg = "Backward" + error_str + " is not reentrant, i.e., running backward with same \ + input and grad_output multiple times gives different values, \ + although analytical gradient matches numerical gradient. \ + The tolerance for nondeterminism was {}.".format(nondet_tol) + return fail_test(error_msg) + + if not reentrant: + return not_reentrant_error() + + if out_is_complex and not reentrant_with_imag_grad_out: + return not_reentrant_error(' (calculated using complex valued grad output)') + + # check if the backward multiplies by grad_output + output = _differentiable_outputs(func(*tupled_inputs)) + if any([o.requires_grad for o in output]): + diff_input_list: List[torch.Tensor] = list(iter_tensors(tupled_inputs, True)) + if not diff_input_list: + raise RuntimeError("no Tensors requiring grad found in input") + grads_input = torch.autograd.grad(output, diff_input_list, + [torch.zeros_like(o, memory_format=torch.legacy_contiguous_format) for o in output], + allow_unused=True) + for gi, di in zip(grads_input, diff_input_list): + if gi is None: + continue + if isinstance(gi, torch.Tensor) and gi.layout != torch.strided: + if gi.layout != di.layout: + return fail_test('grad is incorrect layout (' + str(gi.layout) + ' is not ' + str(di.layout) + ')') + if gi.layout == torch.sparse_coo: + if gi.sparse_dim() != di.sparse_dim(): + return fail_test('grad is sparse tensor, but has incorrect sparse_dim') + if gi.dense_dim() != di.dense_dim(): + return fail_test('grad is sparse tensor, but has incorrect dense_dim') + gi = gi.to_dense() + di = di.to_dense() + if not gi.eq(0).all(): + return fail_test('backward not multiplied by grad_output') + if gi.dtype != di.dtype or gi.device != di.device or gi.is_sparse != di.is_sparse: + return fail_test("grad is incorrect type") + if gi.size() != di.size(): + return fail_test('grad is incorrect size') + + if check_undefined_grad: + def warn_bc_breaking(): + warnings.warn(( + 'Backwards compatibility: New undefined gradient support checking ' + 'feature is enabled by default, but it may break existing callers ' + 'of this function. If this is true for you, you can call this ' + 'function with "check_undefined_grad=False" to disable the feature')) + + def check_undefined_grad_support(output_to_check): + grads_output = [torch.zeros_like(o, memory_format=torch.legacy_contiguous_format) for o in output_to_check] + try: + grads_input = torch.autograd.grad(output_to_check, + diff_input_list, + grads_output, + allow_unused=True) + except RuntimeError: + warn_bc_breaking() + return fail_test(( + 'Expected backward function to handle undefined output grads. ' + 'Please look at "Notes about undefined output gradients" in ' + '"tools/autograd/derivatives.yaml"')) + + for gi, i in zip(grads_input, diff_input_list): + if (gi is not None) and (not gi.eq(0).all()): + warn_bc_breaking() + return fail_test(( + 'Expected all input grads to be undefined or zero when all output grads are undefined ' + 'or zero. Please look at "Notes about undefined output gradients" in ' + '"tools/autograd/derivatives.yaml"')) + return True + + # All backward functions must work properly if all output grads are undefined + outputs_to_check = [[ + torch._C._functions.UndefinedGrad()(o) for o in _differentiable_outputs(func(*tupled_inputs)) + # This check filters out Tensor-likes that aren't instances of Tensor. + if isinstance(o, torch.Tensor) + ]] + + # If there are multiple output grads, we should be able to undef one at a time without error + if len(outputs_to_check[0]) > 1: + for undef_grad_idx in range(len(output)): + output_to_check = _differentiable_outputs(func(*tupled_inputs)) + outputs_to_check.append([ + torch._C._functions.UndefinedGrad()(o) if idx == undef_grad_idx else o + for idx, o in enumerate(output_to_check)]) + + for output_to_check in outputs_to_check: + if not check_undefined_grad_support(output_to_check): + return False + + return True + + +def gradgradcheck( + func: Callable[..., _TensorOrTensors], # See Note [VarArg of Tensors] + inputs: _TensorOrTensors, + grad_outputs: Optional[_TensorOrTensors] = None, + eps: float = 1e-6, + atol: float = 1e-5, + rtol: float = 1e-3, + gen_non_contig_grad_outputs: bool = False, + raise_exception: bool = True, + nondet_tol: float = 0.0, + check_undefined_grad: bool = True, + check_grad_dtypes: bool = False +) -> bool: + r"""Check gradients of gradients computed via small finite differences + against analytical gradients w.r.t. tensors in :attr:`inputs` and + :attr:`grad_outputs` that are of floating point or complex type and with + ``requires_grad=True``. + + This function checks that backpropagating through the gradients computed + to the given :attr:`grad_outputs` are correct. + + The check between numerical and analytical gradients uses :func:`~torch.allclose`. + + .. note:: + The default values are designed for :attr:`input` and + :attr:`grad_outputs` of double precision. This check will likely fail if + they are of less precision, e.g., ``FloatTensor``. + + .. warning:: + If any checked tensor in :attr:`input` and :attr:`grad_outputs` has + overlapping memory, i.e., different indices pointing to the same memory + address (e.g., from :func:`torch.expand`), this check will likely fail + because the numerical gradients computed by point perturbation at such + indices will change values at all other indices that share the same + memory address. + + Args: + func (function): a Python function that takes Tensor inputs and returns + a Tensor or a tuple of Tensors + inputs (tuple of Tensor or Tensor): inputs to the function + grad_outputs (tuple of Tensor or Tensor, optional): The gradients with + respect to the function's outputs. + eps (float, optional): perturbation for finite differences + atol (float, optional): absolute tolerance + rtol (float, optional): relative tolerance + gen_non_contig_grad_outputs (bool, optional): if :attr:`grad_outputs` is + ``None`` and :attr:`gen_non_contig_grad_outputs` is ``True``, the + randomly generated gradient outputs are made to be noncontiguous + raise_exception (bool, optional): indicating whether to raise an exception if + the check fails. The exception gives more information about the + exact nature of the failure. This is helpful when debugging gradchecks. + nondet_tol (float, optional): tolerance for non-determinism. When running + identical inputs through the differentiation, the results must either match + exactly (default, 0.0) or be within this tolerance. Note that a small amount + of nondeterminism in the gradient will lead to larger inaccuracies in + the second derivative. + check_undefined_grad (bool, options): if True, check if undefined output grads + are supported and treated as zeros + + Returns: + True if all differences satisfy allclose condition + """ + tupled_inputs = _as_tuple(inputs) + + if grad_outputs is None: + # If grad_outputs is not specified, create random Tensors of the same + # shape, type, and device as the outputs + def randn_like(x): + y = torch.testing.randn_like( + x if (x.is_floating_point() or x.is_complex()) else x.double(), memory_format=torch.legacy_contiguous_format) + if gen_non_contig_grad_outputs: + y = torch.testing.make_non_contiguous(y) + return y.requires_grad_() + outputs = _as_tuple(func(*tupled_inputs)) + tupled_grad_outputs = tuple(randn_like(x) for x in outputs) + else: + tupled_grad_outputs = _as_tuple(grad_outputs) + + num_outputs = len(tupled_grad_outputs) + + def new_func(*args): + input_args = args[:-num_outputs] + grad_outputs = args[-num_outputs:] + outputs = _differentiable_outputs(func(*input_args)) + input_args = tuple(x for x in input_args if isinstance(x, torch.Tensor) and x.requires_grad) + grad_inputs = torch.autograd.grad(outputs, input_args, grad_outputs, create_graph=True) + return grad_inputs + + return gradcheck(new_func, tupled_inputs + tupled_grad_outputs, eps, atol, rtol, raise_exception, + nondet_tol=nondet_tol, check_undefined_grad=check_undefined_grad, + check_grad_dtypes=check_grad_dtypes) diff --git a/mini_dpvo/lietorch/group_ops.py b/mini_dpvo/lietorch/group_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..f2564c2854cf409e3544f44c4f6c24a91000336f --- /dev/null +++ b/mini_dpvo/lietorch/group_ops.py @@ -0,0 +1,102 @@ +import lietorch_backends +import torch +import torch.nn.functional as F + + + +class GroupOp(torch.autograd.Function): + """ group operation base class """ + + @classmethod + def forward(cls, ctx, group_id, *inputs): + ctx.group_id = group_id + ctx.save_for_backward(*inputs) + out = cls.forward_op(ctx.group_id, *inputs) + return out + + @classmethod + def backward(cls, ctx, grad): + error_str = "Backward operation not implemented for {}".format(cls) + assert cls.backward_op is not None, error_str + + inputs = ctx.saved_tensors + grad = grad.contiguous() + grad_inputs = cls.backward_op(ctx.group_id, grad, *inputs) + return (None, ) + tuple(grad_inputs) + + +class Exp(GroupOp): + """ exponential map """ + forward_op, backward_op = lietorch_backends.expm, lietorch_backends.expm_backward + +class Log(GroupOp): + """ logarithm map """ + forward_op, backward_op = lietorch_backends.logm, lietorch_backends.logm_backward + +class Inv(GroupOp): + """ group inverse """ + forward_op, backward_op = lietorch_backends.inv, lietorch_backends.inv_backward + +class Mul(GroupOp): + """ group multiplication """ + forward_op, backward_op = lietorch_backends.mul, lietorch_backends.mul_backward + +class Adj(GroupOp): + """ adjoint operator """ + forward_op, backward_op = lietorch_backends.adj, lietorch_backends.adj_backward + +class AdjT(GroupOp): + """ adjoint operator """ + forward_op, backward_op = lietorch_backends.adjT, lietorch_backends.adjT_backward + +class Act3(GroupOp): + """ action on point """ + forward_op, backward_op = lietorch_backends.act, lietorch_backends.act_backward + +class Act4(GroupOp): + """ action on point """ + forward_op, backward_op = lietorch_backends.act4, lietorch_backends.act4_backward + +class Jinv(GroupOp): + """ adjoint operator """ + forward_op, backward_op = lietorch_backends.Jinv, None + +class ToMatrix(GroupOp): + """ convert to matrix representation """ + forward_op, backward_op = lietorch_backends.as_matrix, None + + + + +### conversion operations to/from Euclidean embeddings ### + +class FromVec(torch.autograd.Function): + """ convert vector into group object """ + + @classmethod + def forward(cls, ctx, group_id, *inputs): + ctx.group_id = group_id + ctx.save_for_backward(*inputs) + return inputs[0] + + @classmethod + def backward(cls, ctx, grad): + inputs = ctx.saved_tensors + J = lietorch_backends.projector(ctx.group_id, *inputs) + return None, torch.matmul(grad.unsqueeze(-2), torch.linalg.pinv(J)).squeeze(-2) + +class ToVec(torch.autograd.Function): + """ convert group object to vector """ + + @classmethod + def forward(cls, ctx, group_id, *inputs): + ctx.group_id = group_id + ctx.save_for_backward(*inputs) + return inputs[0] + + @classmethod + def backward(cls, ctx, grad): + inputs = ctx.saved_tensors + J = lietorch_backends.projector(ctx.group_id, *inputs) + return None, torch.matmul(grad.unsqueeze(-2), J).squeeze(-2) + diff --git a/mini_dpvo/lietorch/groups.py b/mini_dpvo/lietorch/groups.py new file mode 100644 index 0000000000000000000000000000000000000000..1ebd1740b5e379c8aeebaac7f45caeefbfdb67f0 --- /dev/null +++ b/mini_dpvo/lietorch/groups.py @@ -0,0 +1,322 @@ +import torch +import numpy as np + +# group operations implemented in cuda +from .group_ops import Exp, Log, Inv, Mul, Adj, AdjT, Jinv, Act3, Act4, ToMatrix, ToVec, FromVec +from .broadcasting import broadcast_inputs + + +class LieGroupParameter(torch.Tensor): + """ Wrapper class for LieGroup """ + + from torch._C import _disabled_torch_function_impl + __torch_function__ = _disabled_torch_function_impl + + def __new__(cls, group, requires_grad=True): + data = torch.zeros(group.tangent_shape, + device=group.data.device, + dtype=group.data.dtype, + requires_grad=True) + + return torch.Tensor._make_subclass(cls, data, requires_grad) + + def __init__(self, group): + self.group = group + + def retr(self): + return self.group.retr(self) + + def log(self): + return self.retr().log() + + def inv(self): + return self.retr().inv() + + def adj(self, a): + return self.retr().adj(a) + + def __mul__(self, other): + if isinstance(other, LieGroupParameter): + return self.retr() * other.retr() + else: + return self.retr() * other + + def add_(self, update, alpha): + self.group = self.group.exp(alpha*update) * self.group + + def __getitem__(self, index): + return self.retr().__getitem__(index) + + +class LieGroup: + """ Base class for Lie Group """ + + def __init__(self, data): + self.data = data + + def __repr__(self): + return "{}: size={}, device={}, dtype={}".format( + self.group_name, self.shape, self.device, self.dtype) + + @property + def shape(self): + return self.data.shape[:-1] + + @property + def device(self): + return self.data.device + + @property + def dtype(self): + return self.data.dtype + + def vec(self): + return self.apply_op(ToVec, self.data) + + @property + def tangent_shape(self): + return self.data.shape[:-1] + (self.manifold_dim,) + + @classmethod + def Identity(cls, *batch_shape, **kwargs): + """ Construct identity element with batch shape """ + + if isinstance(batch_shape[0], tuple): + batch_shape = batch_shape[0] + + elif isinstance(batch_shape[0], list): + batch_shape = tuple(batch_shape[0]) + + numel = np.prod(batch_shape) + data = cls.id_elem.reshape(1,-1) + + if 'device' in kwargs: + data = data.to(kwargs['device']) + + if 'dtype' in kwargs: + data = data.type(kwargs['dtype']) + + data = data.repeat(numel, 1) + return cls(data).view(batch_shape) + + @classmethod + def IdentityLike(cls, G): + return cls.Identity(G.shape, device=G.data.device, dtype=G.data.dtype) + + @classmethod + def InitFromVec(cls, data): + return cls(cls.apply_op(FromVec, data)) + + @classmethod + def Random(cls, *batch_shape, sigma=1.0, **kwargs): + """ Construct random element with batch_shape by random sampling in tangent space""" + + if isinstance(batch_shape[0], tuple): + batch_shape = batch_shape[0] + + elif isinstance(batch_shape[0], list): + batch_shape = tuple(batch_shape[0]) + + tangent_shape = batch_shape + (cls.manifold_dim,) + xi = torch.randn(tangent_shape, **kwargs) + return cls.exp(sigma * xi) + + @classmethod + def apply_op(cls, op, x, y=None): + """ Apply group operator """ + inputs, out_shape = broadcast_inputs(x, y) + + data = op.apply(cls.group_id, *inputs) + return data.view(out_shape + (-1,)) + + @classmethod + def exp(cls, x): + """ exponential map: x -> X """ + return cls(cls.apply_op(Exp, x)) + + def quaternion(self): + """ extract quaternion """ + return self.apply_op(Quat, self.data) + + def log(self): + """ logarithm map """ + return self.apply_op(Log, self.data) + + def inv(self): + """ group inverse """ + return self.__class__(self.apply_op(Inv, self.data)) + + def mul(self, other): + """ group multiplication """ + return self.__class__(self.apply_op(Mul, self.data, other.data)) + + def retr(self, a): + """ retraction: Exp(a) * X """ + dX = self.__class__.apply_op(Exp, a) + return self.__class__(self.apply_op(Mul, dX, self.data)) + + def adj(self, a): + """ adjoint operator: b = A(X) * a """ + return self.apply_op(Adj, self.data, a) + + def adjT(self, a): + """ transposed adjoint operator: b = a * A(X) """ + return self.apply_op(AdjT, self.data, a) + + def Jinv(self, a): + return self.apply_op(Jinv, self.data, a) + + def act(self, p): + """ action on a point cloud """ + + # action on point + if p.shape[-1] == 3: + return self.apply_op(Act3, self.data, p) + + # action on homogeneous point + elif p.shape[-1] == 4: + return self.apply_op(Act4, self.data, p) + + def matrix(self): + """ convert element to 4x4 matrix """ + I = torch.eye(4, dtype=self.dtype, device=self.device) + I = I.view([1] * (len(self.data.shape) - 1) + [4, 4]) + return self.__class__(self.data[...,None,:]).act(I).transpose(-1,-2) + + def translation(self): + """ extract translation component """ + p = torch.as_tensor([0.0, 0.0, 0.0, 1.0], dtype=self.dtype, device=self.device) + p = p.view([1] * (len(self.data.shape) - 1) + [4,]) + return self.apply_op(Act4, self.data, p) + + def detach(self): + return self.__class__(self.data.detach()) + + def view(self, dims): + data_reshaped = self.data.view(dims + (self.embedded_dim,)) + return self.__class__(data_reshaped) + + def __mul__(self, other): + # group multiplication + + if isinstance(other, LieGroup): + return self.mul(other) + + # action on point + elif isinstance(other, torch.Tensor): + return self.act(other) + + def __getitem__(self, index): + return self.__class__(self.data[index]) + + def __setitem__(self, index, item): + self.data[index] = item.data + + def to(self, *args, **kwargs): + return self.__class__(self.data.to(*args, **kwargs)) + + def cpu(self): + return self.__class__(self.data.cpu()) + + def cuda(self): + return self.__class__(self.data.cuda()) + + def float(self, device): + return self.__class__(self.data.float()) + + def double(self, device): + return self.__class__(self.data.double()) + + def unbind(self, dim=0): + return [self.__class__(x) for x in self.data.unbind(dim=dim)] + + +class SO3(LieGroup): + group_name = 'SO3' + group_id = 1 + manifold_dim = 3 + embedded_dim = 4 + + # unit quaternion + id_elem = torch.as_tensor([0.0, 0.0, 0.0, 1.0]) + + def __init__(self, data): + if isinstance(data, SE3): + data = data.data[..., 3:7] + + super(SO3, self).__init__(data) + + +class RxSO3(LieGroup): + group_name = 'RxSO3' + group_id = 2 + manifold_dim = 4 + embedded_dim = 5 + + # unit quaternion + id_elem = torch.as_tensor([0.0, 0.0, 0.0, 1.0, 1.0]) + + def __init__(self, data): + if isinstance(data, Sim3): + data = data.data[..., 3:8] + + super(RxSO3, self).__init__(data) + + +class SE3(LieGroup): + group_name = 'SE3' + group_id = 3 + manifold_dim = 6 + embedded_dim = 7 + + # translation, unit quaternion + id_elem = torch.as_tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + def __init__(self, data): + if isinstance(data, SO3): + translation = torch.zeros_like(data.data[...,:3]) + data = torch.cat([translation, data.data], -1) + + super(SE3, self).__init__(data) + + def scale(self, s): + t, q = self.data.split([3,4], -1) + t = t * s.unsqueeze(-1) + return SE3(torch.cat([t, q], dim=-1)) + + +class Sim3(LieGroup): + group_name = 'Sim3' + group_id = 4 + manifold_dim = 7 + embedded_dim = 8 + + # translation, unit quaternion, scale + id_elem = torch.as_tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0]) + + def __init__(self, data): + + if isinstance(data, SO3): + scale = torch.ones_like(SO3.data[...,:1]) + translation = torch.zeros_like(SO3.data[...,:3]) + data = torch.cat([translation, SO3.data, scale], -1) + + elif isinstance(data, SE3): + scale = torch.ones_like(data.data[...,:1]) + data = torch.cat([data.data, scale], -1) + + elif isinstance(data, Sim3): + data = data.data + + super(Sim3, self).__init__(data) + + +def cat(group_list, dim): + """ Concatenate groups along dimension """ + data = torch.cat([X.data for X in group_list], dim=dim) + return group_list[0].__class__(data) + +def stack(group_list, dim): + """ Concatenate groups along dimension """ + data = torch.stack([X.data for X in group_list], dim=dim) + return group_list[0].__class__(data) diff --git a/mini_dpvo/lietorch/include/common.h b/mini_dpvo/lietorch/include/common.h new file mode 100644 index 0000000000000000000000000000000000000000..4ddab4622dd5132c6711ce9287ad3020f9db4fb7 --- /dev/null +++ b/mini_dpvo/lietorch/include/common.h @@ -0,0 +1,12 @@ +#ifndef COMMON_H +#define COMMON_H + +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_RUNTIME_NO_MALLOC + +#define EPS 1e-6 +#define PI 3.14159265358979323846 + + +#endif + diff --git a/mini_dpvo/lietorch/include/dispatch.h b/mini_dpvo/lietorch/include/dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9c010a814498ef294cf3ee06363058f4678a2cad --- /dev/null +++ b/mini_dpvo/lietorch/include/dispatch.h @@ -0,0 +1,48 @@ +#ifndef DISPATCH_H +#define DISPATCH_H + +#include + +#include "so3.h" +#include "rxso3.h" +#include "se3.h" +#include "sim3.h" + + +#define PRIVATE_CASE_TYPE(group_index, enum_type, type, ...) \ + case enum_type: { \ + using scalar_t = type; \ + switch (group_index) { \ + case 1: { \ + using group_t = SO3; \ + return __VA_ARGS__(); \ + } \ + case 2: { \ + using group_t = RxSO3; \ + return __VA_ARGS__(); \ + } \ + case 3: { \ + using group_t = SE3; \ + return __VA_ARGS__(); \ + } \ + case 4: { \ + using group_t = Sim3; \ + return __VA_ARGS__(); \ + } \ + } \ + } \ + +#define DISPATCH_GROUP_AND_FLOATING_TYPES(GROUP_INDEX, TYPE, NAME, ...) \ + [&] { \ + const auto& the_type = TYPE; \ + /* don't use TYPE again in case it is an expensive or side-effect op */ \ + at::ScalarType _st = ::detail::scalar_type(the_type); \ + switch (_st) { \ + PRIVATE_CASE_TYPE(GROUP_INDEX, at::ScalarType::Double, double, __VA_ARGS__) \ + PRIVATE_CASE_TYPE(GROUP_INDEX, at::ScalarType::Float, float, __VA_ARGS__) \ + default: break; \ + } \ + }() + +#endif + diff --git a/mini_dpvo/lietorch/include/lietorch_cpu.h b/mini_dpvo/lietorch/include/lietorch_cpu.h new file mode 100644 index 0000000000000000000000000000000000000000..ba7916747ede616e446ac4793ccc8401364461a3 --- /dev/null +++ b/mini_dpvo/lietorch/include/lietorch_cpu.h @@ -0,0 +1,51 @@ + +#ifndef LIETORCH_CPU_H_ +#define LIETORCH_CPU_H_ + +#include +#include + + +// unary operations +torch::Tensor exp_forward_cpu(int, torch::Tensor); +std::vector exp_backward_cpu(int, torch::Tensor, torch::Tensor); + +torch::Tensor log_forward_cpu(int, torch::Tensor); +std::vector log_backward_cpu(int, torch::Tensor, torch::Tensor); + +torch::Tensor inv_forward_cpu(int, torch::Tensor); +std::vector inv_backward_cpu(int, torch::Tensor, torch::Tensor); + +// binary operations +torch::Tensor mul_forward_cpu(int, torch::Tensor, torch::Tensor); +std::vector mul_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor adj_forward_cpu(int, torch::Tensor, torch::Tensor); +std::vector adj_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor adjT_forward_cpu(int, torch::Tensor, torch::Tensor); +std::vector adjT_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor act_forward_cpu(int, torch::Tensor, torch::Tensor); +std::vector act_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor act4_forward_cpu(int, torch::Tensor, torch::Tensor); +std::vector act4_backward_cpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + + +// conversion operations +// std::vector to_vec_backward_cpu(int, torch::Tensor, torch::Tensor); +// std::vector from_vec_backward_cpu(int, torch::Tensor, torch::Tensor); + +// utility operations +torch::Tensor orthogonal_projector_cpu(int, torch::Tensor); + +torch::Tensor as_matrix_forward_cpu(int, torch::Tensor); + +torch::Tensor jleft_forward_cpu(int, torch::Tensor, torch::Tensor); + + +#endif + + + \ No newline at end of file diff --git a/mini_dpvo/lietorch/include/lietorch_gpu.h b/mini_dpvo/lietorch/include/lietorch_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..6a3841ee4c4348a6c2de43c8e6ad310266f83a31 --- /dev/null +++ b/mini_dpvo/lietorch/include/lietorch_gpu.h @@ -0,0 +1,51 @@ + +#ifndef LIETORCH_GPU_H_ +#define LIETORCH_GPU_H_ + +#include +#include +#include +#include + + +// unary operations +torch::Tensor exp_forward_gpu(int, torch::Tensor); +std::vector exp_backward_gpu(int, torch::Tensor, torch::Tensor); + +torch::Tensor log_forward_gpu(int, torch::Tensor); +std::vector log_backward_gpu(int, torch::Tensor, torch::Tensor); + +torch::Tensor inv_forward_gpu(int, torch::Tensor); +std::vector inv_backward_gpu(int, torch::Tensor, torch::Tensor); + +// binary operations +torch::Tensor mul_forward_gpu(int, torch::Tensor, torch::Tensor); +std::vector mul_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor adj_forward_gpu(int, torch::Tensor, torch::Tensor); +std::vector adj_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor adjT_forward_gpu(int, torch::Tensor, torch::Tensor); +std::vector adjT_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor act_forward_gpu(int, torch::Tensor, torch::Tensor); +std::vector act_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +torch::Tensor act4_forward_gpu(int, torch::Tensor, torch::Tensor); +std::vector act4_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor); + +// conversion operations +// std::vector to_vec_backward_gpu(int, torch::Tensor, torch::Tensor); +// std::vector from_vec_backward_gpu(int, torch::Tensor, torch::Tensor); + +// utility operators +torch::Tensor orthogonal_projector_gpu(int, torch::Tensor); + +torch::Tensor as_matrix_forward_gpu(int, torch::Tensor); + +torch::Tensor jleft_forward_gpu(int, torch::Tensor, torch::Tensor); + +#endif + + + \ No newline at end of file diff --git a/mini_dpvo/lietorch/include/rxso3.h b/mini_dpvo/lietorch/include/rxso3.h new file mode 100644 index 0000000000000000000000000000000000000000..e0afbe218fa501187357de340527ffbbf6ef28b7 --- /dev/null +++ b/mini_dpvo/lietorch/include/rxso3.h @@ -0,0 +1,324 @@ + +#ifndef RxSO3_HEADER +#define RxSO3_HEADER + +#include +#include +#include + +#include "common.h" + +template +class RxSO3 { + public: + const static int constexpr K = 4; // manifold dimension + const static int constexpr N = 5; // embedding dimension + + using Vector3 = Eigen::Matrix; + using Vector4 = Eigen::Matrix; + using Matrix3 = Eigen::Matrix; + + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + using Point = Eigen::Matrix; + using Point4 = Eigen::Matrix; + + using Quaternion = Eigen::Quaternion; + using Transformation = Eigen::Matrix; + using Adjoint = Eigen::Matrix; + + + EIGEN_DEVICE_FUNC RxSO3(Quaternion const& q, Scalar const s) + : unit_quaternion(q), scale(s) { + unit_quaternion.normalize(); + }; + + EIGEN_DEVICE_FUNC RxSO3(const Scalar *data) : unit_quaternion(data), scale(data[4]) { + unit_quaternion.normalize(); + }; + + EIGEN_DEVICE_FUNC RxSO3() { + unit_quaternion = Quaternion::Identity(); + scale = Scalar(1.0); + } + + EIGEN_DEVICE_FUNC RxSO3 inv() { + return RxSO3(unit_quaternion.conjugate(), 1.0/scale); + } + + EIGEN_DEVICE_FUNC Data data() const { + Data data_vec; data_vec << unit_quaternion.coeffs(), scale; + return data_vec; + } + + EIGEN_DEVICE_FUNC RxSO3 operator*(RxSO3 const& other) { + return RxSO3(unit_quaternion * other.unit_quaternion, scale * other.scale); + } + + EIGEN_DEVICE_FUNC Point operator*(Point const& p) const { + const Quaternion& q = unit_quaternion; + Point uv = q.vec().cross(p); uv += uv; + return scale * (p + q.w()*uv + q.vec().cross(uv)); + } + + EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const { + Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3); + return p1; + } + + EIGEN_DEVICE_FUNC Adjoint Adj() const { + Adjoint Ad = Adjoint::Identity(); + Ad.template block<3,3>(0,0) = unit_quaternion.toRotationMatrix(); + return Ad; + } + + EIGEN_DEVICE_FUNC Transformation Matrix() const { + return scale * unit_quaternion.toRotationMatrix(); + } + + EIGEN_DEVICE_FUNC Eigen::Matrix Matrix4x4() const { + Eigen::Matrix T; + T = Eigen::Matrix::Identity(); + T.template block<3,3>(0,0) = Matrix(); + return T; + } + + EIGEN_DEVICE_FUNC Eigen::Matrix orthogonal_projector() const { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + + J.template block<3,3>(0,0) = 0.5 * ( + unit_quaternion.w() * Matrix3::Identity() + + SO3::hat(-unit_quaternion.vec()) + ); + + J.template block<1,3>(3,0) = 0.5 * (-unit_quaternion.vec()); + + // scale + J(4,3) = scale; + + return J; + } + + EIGEN_DEVICE_FUNC Transformation Rotation() const { + return unit_quaternion.toRotationMatrix(); + } + + EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const { + return Adj() * a; + } + + EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const { + return Adj().transpose() * a; + } + + EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi_sigma) { + Vector3 const phi = phi_sigma.template segment<3>(0); + return SO3::hat(phi) + phi(3) * Transformation::Identity(); + } + + EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi_sigma) { + Vector3 const phi = phi_sigma.template segment<3>(0); + Matrix3 const Phi = SO3::hat(phi); + + Adjoint ad = Adjoint::Zero(); + ad.template block<3,3>(0,0) = Phi; + + return ad; + } + + EIGEN_DEVICE_FUNC Tangent Log() const { + using std::abs; + using std::atan; + using std::sqrt; + + Scalar squared_n = unit_quaternion.vec().squaredNorm(); + Scalar w = unit_quaternion.w(); + Scalar two_atan_nbyw_by_n; + + /// Atan-based log thanks to + /// + /// C. Hertzberg et al.: + /// "Integrating Generic Sensor Fusion Algorithms with Sound State + /// Representation through Encapsulation of Manifolds" + /// Information Fusion, 2011 + + if (squared_n < EPS * EPS) { + two_atan_nbyw_by_n = Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * w * w); + } else { + Scalar n = sqrt(squared_n); + if (abs(w) < EPS) { + if (w > Scalar(0)) { + two_atan_nbyw_by_n = PI / n; + } else { + two_atan_nbyw_by_n = -PI / n; + } + } else { + two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n; + } + } + + Tangent phi_sigma; + phi_sigma << two_atan_nbyw_by_n * unit_quaternion.vec(), log(scale); + + return phi_sigma; + } + + EIGEN_DEVICE_FUNC static RxSO3 Exp(Tangent const& phi_sigma) { + Vector3 phi = phi_sigma.template segment<3>(0); + Scalar scale = exp(phi_sigma(3)); + + Scalar theta2 = phi.squaredNorm(); + Scalar theta = sqrt(theta2); + Scalar imag_factor; + Scalar real_factor; + + if (theta < EPS) { + Scalar theta4 = theta2 * theta2; + imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4; + real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4; + } else { + imag_factor = sin(.5 * theta) / theta; + real_factor = cos(.5 * theta); + } + + Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z()); + return RxSO3(q, scale); + } + + EIGEN_DEVICE_FUNC static Matrix3 calcW(Tangent const& phi_sigma) { + // left jacobian + using std::abs; + Matrix3 const I = Matrix3::Identity(); + Scalar const one(1); + Scalar const half(0.5); + + Vector3 const phi = phi_sigma.template segment<3>(0); + Scalar const sigma = phi_sigma(3); + Scalar const theta = phi.norm(); + + Matrix3 const Phi = SO3::hat(phi); + Matrix3 const Phi2 = Phi * Phi; + Scalar const scale = exp(sigma); + + Scalar A, B, C; + if (abs(sigma) < EPS) { + C = one; + if (abs(theta) < EPS) { + A = half; + B = Scalar(1. / 6.); + } else { + Scalar theta_sq = theta * theta; + A = (one - cos(theta)) / theta_sq; + B = (theta - sin(theta)) / (theta_sq * theta); + } + } else { + C = (scale - one) / sigma; + if (abs(theta) < EPS) { + Scalar sigma_sq = sigma * sigma; + A = ((sigma - one) * scale + one) / sigma_sq; + B = (scale * half * sigma_sq + scale - one - sigma * scale) / + (sigma_sq * sigma); + } else { + Scalar theta_sq = theta * theta; + Scalar a = scale * sin(theta); + Scalar b = scale * cos(theta); + Scalar c = theta_sq + sigma * sigma; + A = (a * sigma + (one - b) * theta) / (theta * c); + B = (C - ((b - one) * sigma + a * theta) / (c)) * one / (theta_sq); + } + } + return A * Phi + B * Phi2 + C * I; + } + + EIGEN_DEVICE_FUNC static Matrix3 calcWInv(Tangent const& phi_sigma) { + // left jacobian inverse + Matrix3 const I = Matrix3::Identity(); + Scalar const half(0.5); + Scalar const one(1); + Scalar const two(2); + + Vector3 const phi = phi_sigma.template segment<3>(0); + Scalar const sigma = phi_sigma(3); + Scalar const theta = phi.norm(); + Scalar const scale = exp(sigma); + + Matrix3 const Phi = SO3::hat(phi); + Matrix3 const Phi2 = Phi * Phi; + Scalar const scale_sq = scale * scale; + Scalar const theta_sq = theta * theta; + Scalar const sin_theta = sin(theta); + Scalar const cos_theta = cos(theta); + + Scalar a, b, c; + if (abs(sigma * sigma) < EPS) { + c = one - half * sigma; + a = -half; + if (abs(theta_sq) < EPS) { + b = Scalar(1. / 12.); + } else { + b = (theta * sin_theta + two * cos_theta - two) / + (two * theta_sq * (cos_theta - one)); + } + } else { + Scalar const scale_cu = scale_sq * scale; + c = sigma / (scale - one); + if (abs(theta_sq) < EPS) { + a = (-sigma * scale + scale - one) / ((scale - one) * (scale - one)); + b = (scale_sq * sigma - two * scale_sq + scale * sigma + two * scale) / + (two * scale_cu - Scalar(6) * scale_sq + Scalar(6) * scale - two); + } else { + Scalar const s_sin_theta = scale * sin_theta; + Scalar const s_cos_theta = scale * cos_theta; + a = (theta * s_cos_theta - theta - sigma * s_sin_theta) / + (theta * (scale_sq - two * s_cos_theta + one)); + b = -scale * + (theta * s_sin_theta - theta * sin_theta + sigma * s_cos_theta - + scale * sigma + sigma * cos_theta - sigma) / + (theta_sq * (scale_cu - two * scale * s_cos_theta - scale_sq + + two * s_cos_theta + scale - one)); + } + } + return a * Phi + b * Phi2 + c * I; + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi_sigma) { + // left jacobian + Adjoint J = Adjoint::Identity(); + Vector3 phi = phi_sigma.template segment<3>(0); + J.template block<3,3>(0,0) = SO3::left_jacobian(phi); + return J; + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi_sigma) { + // left jacobian inverse + Adjoint Jinv = Adjoint::Identity(); + Vector3 phi = phi_sigma.template segment<3>(0); + Jinv.template block<3,3>(0,0) = SO3::left_jacobian_inverse(phi); + return Jinv; + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act_jacobian(Point const& p) { + // jacobian action on a point + Eigen::Matrix Ja; + Ja << SO3::hat(-p), p; + return Ja; + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act4_jacobian(Point4 const& p) { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = SO3::hat(-p.template segment<3>(0)); + J.template block<3,1>(0,3) = p.template segment<3>(0); + return J; + } + + private: + Quaternion unit_quaternion; + Scalar scale; +}; + +#endif + + diff --git a/mini_dpvo/lietorch/include/se3.h b/mini_dpvo/lietorch/include/se3.h new file mode 100644 index 0000000000000000000000000000000000000000..1707d06b34087f5cc4b6c4f8eb5988841391497b --- /dev/null +++ b/mini_dpvo/lietorch/include/se3.h @@ -0,0 +1,229 @@ + +#ifndef SE3_HEADER +#define SE3_HEADER + +#include +#include +#include + +#include "common.h" +#include "so3.h" + + +template +class SE3 { + public: + const static int constexpr K = 6; // manifold dimension + const static int constexpr N = 7; // embedding dimension + + using Vector3 = Eigen::Matrix; + using Vector4 = Eigen::Matrix; + using Matrix3 = Eigen::Matrix; + + using Tangent = Eigen::Matrix; + using Point = Eigen::Matrix; + using Point4 = Eigen::Matrix; + using Data = Eigen::Matrix; + using Transformation = Eigen::Matrix; + using Adjoint = Eigen::Matrix; + + EIGEN_DEVICE_FUNC SE3() { translation = Vector3::Zero(); } + + EIGEN_DEVICE_FUNC SE3(SO3 const& so3, Vector3 const& t) : so3(so3), translation(t) {}; + + EIGEN_DEVICE_FUNC SE3(const Scalar *data) : translation(data), so3(data+3) {}; + + EIGEN_DEVICE_FUNC SE3 inv() { + return SE3(so3.inv(), -(so3.inv()*translation)); + } + + EIGEN_DEVICE_FUNC Data data() const { + Data data_vec; data_vec << translation, so3.data(); + return data_vec; + } + + EIGEN_DEVICE_FUNC SE3 operator*(SE3 const& other) { + return SE3(so3 * other.so3, translation + so3 * other.translation); + } + + EIGEN_DEVICE_FUNC Point operator*(Point const& p) const { + return so3 * p + translation; + } + + EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const { + Point4 p1; p1 << so3 * p.template segment<3>(0) + translation * p(3), p(3); + return p1; + } + + EIGEN_DEVICE_FUNC Adjoint Adj() const { + Matrix3 R = so3.Matrix(); + Matrix3 tx = SO3::hat(translation); + Matrix3 Zer = Matrix3::Zero(); + + Adjoint Ad; + Ad << R, tx*R, Zer, R; + + return Ad; + } + + EIGEN_DEVICE_FUNC Transformation Matrix() const { + Transformation T = Transformation::Identity(); + T.template block<3,3>(0,0) = so3.Matrix(); + T.template block<3,1>(0,3) = translation; + return T; + } + + EIGEN_DEVICE_FUNC Transformation Matrix4x4() const { + return Matrix(); + } + + EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const { + return Adj() * a; + } + + EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const { + return Adj().transpose() * a; + } + + + EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& tau_phi) { + Vector3 tau = tau_phi.template segment<3>(0); + Vector3 phi = tau_phi.template segment<3>(3); + + Transformation TauPhi = Transformation::Zero(); + TauPhi.template block<3,3>(0,0) = SO3::hat(phi); + TauPhi.template block<3,1>(0,3) = tau; + + return TauPhi; + } + + EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& tau_phi) { + Vector3 tau = tau_phi.template segment<3>(0); + Vector3 phi = tau_phi.template segment<3>(3); + + Matrix3 Tau = SO3::hat(tau); + Matrix3 Phi = SO3::hat(phi); + Matrix3 Zer = Matrix3::Zero(); + + Adjoint ad; + ad << Phi, Tau, Zer, Phi; + + return ad; + } + + EIGEN_DEVICE_FUNC Eigen::Matrix orthogonal_projector() const { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = Matrix3::Identity(); + J.template block<3,3>(0,3) = SO3::hat(-translation); + J.template block<4,4>(3,3) = so3.orthogonal_projector(); + + return J; + } + + EIGEN_DEVICE_FUNC Tangent Log() const { + Vector3 phi = so3.Log(); + Matrix3 Vinv = SO3::left_jacobian_inverse(phi); + + Tangent tau_phi; + tau_phi << Vinv * translation, phi; + + return tau_phi; + } + + EIGEN_DEVICE_FUNC static SE3 Exp(Tangent const& tau_phi) { + Vector3 tau = tau_phi.template segment<3>(0); + Vector3 phi = tau_phi.template segment<3>(3); + + SO3 so3 = SO3::Exp(phi); + Vector3 t = SO3::left_jacobian(phi) * tau; + + return SE3(so3, t); + } + + EIGEN_DEVICE_FUNC static Matrix3 calcQ(Tangent const& tau_phi) { + // Q matrix + Vector3 tau = tau_phi.template segment<3>(0); + Vector3 phi = tau_phi.template segment<3>(3); + Matrix3 Tau = SO3::hat(tau); + Matrix3 Phi = SO3::hat(phi); + + Scalar theta = phi.norm(); + Scalar theta_pow2 = theta * theta; + Scalar theta_pow4 = theta_pow2 * theta_pow2; + + Scalar coef1 = (theta < EPS) ? + Scalar(1.0/6.0) - Scalar(1.0/120.0) * theta_pow2 : + (theta - sin(theta)) / (theta_pow2 * theta); + + Scalar coef2 = (theta < EPS) ? + Scalar(1.0/24.0) - Scalar(1.0/720.0) * theta_pow2 : + (theta_pow2 + 2*cos(theta) - 2) / (2 * theta_pow4); + + Scalar coef3 = (theta < EPS) ? + Scalar(1.0/120.0) - Scalar(1.0/2520.0) * theta_pow2 : + (2*theta - 3*sin(theta) + theta*cos(theta)) / (2 * theta_pow4 * theta); + + Matrix3 Q = Scalar(0.5) * Tau + + coef1 * (Phi*Tau + Tau*Phi + Phi*Tau*Phi) + + coef2 * (Phi*Phi*Tau + Tau*Phi*Phi - 3*Phi*Tau*Phi) + + coef3 * (Phi*Tau*Phi*Phi + Phi*Phi*Tau*Phi); + + return Q; + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& tau_phi) { + // left jacobian + Vector3 phi = tau_phi.template segment<3>(3); + Matrix3 J = SO3::left_jacobian(phi); + Matrix3 Q = SE3::calcQ(tau_phi); + Matrix3 Zer = Matrix3::Zero(); + + Adjoint J6x6; + J6x6 << J, Q, Zer, J; + + return J6x6; + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& tau_phi) { + // left jacobian inverse + Vector3 tau = tau_phi.template segment<3>(0); + Vector3 phi = tau_phi.template segment<3>(3); + Matrix3 Jinv = SO3::left_jacobian_inverse(phi); + Matrix3 Q = SE3::calcQ(tau_phi); + Matrix3 Zer = Matrix3::Zero(); + + Adjoint J6x6; + J6x6 << Jinv, -Jinv * Q * Jinv, Zer, Jinv; + + return J6x6; + + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act_jacobian(Point const& p) { + // jacobian action on a point + Eigen::Matrix J; + J.template block<3,3>(0,0) = Matrix3::Identity(); + J.template block<3,3>(0,3) = SO3::hat(-p); + return J; + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act4_jacobian(Point4 const& p) { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = p(3) * Matrix3::Identity(); + J.template block<3,3>(0,3) = SO3::hat(-p.template segment<3>(0)); + return J; + } + + + + + private: + SO3 so3; + Vector3 translation; + +}; + +#endif + diff --git a/mini_dpvo/lietorch/include/sim3.h b/mini_dpvo/lietorch/include/sim3.h new file mode 100644 index 0000000000000000000000000000000000000000..eb072223a44ebc3748ca58628682014c3b40a849 --- /dev/null +++ b/mini_dpvo/lietorch/include/sim3.h @@ -0,0 +1,217 @@ + +#ifndef Sim3_HEADER +#define Sim3_HEADER + +#include +#include + +#include +#include + +#include "common.h" +#include "so3.h" +#include "rxso3.h" + + +template +class Sim3 { + public: + const static int constexpr K = 7; // manifold dimension + const static int constexpr N = 8; // embedding dimension + + using Vector3 = Eigen::Matrix; + using Vector4 = Eigen::Matrix; + using Matrix3 = Eigen::Matrix; + + using Tangent = Eigen::Matrix; + using Point = Eigen::Matrix; + using Point4 = Eigen::Matrix; + using Data = Eigen::Matrix; + using Transformation = Eigen::Matrix; + using Adjoint = Eigen::Matrix; + + EIGEN_DEVICE_FUNC Sim3() { + translation = Vector3::Zero(); + } + + EIGEN_DEVICE_FUNC Sim3(RxSO3 const& rxso3, Vector3 const& t) + : rxso3(rxso3), translation(t) {}; + + EIGEN_DEVICE_FUNC Sim3(const Scalar *data) + : translation(data), rxso3(data+3) {}; + + EIGEN_DEVICE_FUNC Sim3 inv() { + return Sim3(rxso3.inv(), -(rxso3.inv() * translation)); + } + + EIGEN_DEVICE_FUNC Data data() const { + Data data_vec; data_vec << translation, rxso3.data(); + return data_vec; + } + + EIGEN_DEVICE_FUNC Sim3 operator*(Sim3 const& other) { + return Sim3(rxso3 * other.rxso3, translation + rxso3 * other.translation); + } + + EIGEN_DEVICE_FUNC Point operator*(Point const& p) const { + return (rxso3 * p) + translation; + } + + EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const { + Point4 p1; p1 << rxso3 * p.template segment<3>(0) + p(3) * translation , p(3); + return p1; + } + + EIGEN_DEVICE_FUNC Transformation Matrix() const { + Transformation T = Transformation::Identity(); + T.template block<3,3>(0,0) = rxso3.Matrix(); + T.template block<3,1>(0,3) = translation; + return T; + } + + EIGEN_DEVICE_FUNC Transformation Matrix4x4() const { + Transformation T = Transformation::Identity(); + T.template block<3,3>(0,0) = rxso3.Matrix(); + T.template block<3,1>(0,3) = translation; + return T; + } + + EIGEN_DEVICE_FUNC Eigen::Matrix orthogonal_projector() const { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = Matrix3::Identity(); + J.template block<3,3>(0,3) = SO3::hat(-translation); + J.template block<3,1>(0,6) = translation; + J.template block<5,5>(3,3) = rxso3.orthogonal_projector(); + return J; + } + + EIGEN_DEVICE_FUNC Adjoint Adj() const { + Adjoint Ad = Adjoint::Identity(); + Matrix3 sR = rxso3.Matrix(); + Matrix3 tx = SO3::hat(translation); + Matrix3 R = rxso3.Rotation(); + + Ad.template block<3,3>(0,0) = sR; + Ad.template block<3,3>(0,3) = tx * R; + Ad.template block<3,1>(0,6) = -translation; + Ad.template block<3,3>(3,3) = R; + + return Ad; + } + + EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const { + return Adj() * a; + } + + EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const { + return Adj().transpose() * a; + } + + EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& tau_phi_sigma) { + Vector3 tau = tau_phi_sigma.template segment<3>(0); + Vector3 phi = tau_phi_sigma.template segment<3>(3); + Scalar sigma = tau_phi_sigma(6); + + Matrix3 Phi = SO3::hat(phi); + Matrix3 I = Matrix3::Identity(); + + Transformation Omega = Transformation::Zero(); + Omega.template block<3,3>(0,0) = Phi + sigma * I; + Omega.template block<3,1>(0,3) = tau; + + return Omega; + } + + EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& tau_phi_sigma) { + Adjoint ad = Adjoint::Zero(); + Vector3 tau = tau_phi_sigma.template segment<3>(0); + Vector3 phi = tau_phi_sigma.template segment<3>(3); + Scalar sigma = tau_phi_sigma(6); + + Matrix3 Tau = SO3::hat(tau); + Matrix3 Phi = SO3::hat(phi); + Matrix3 I = Matrix3::Identity(); + + ad.template block<3,3>(0,0) = Phi + sigma * I; + ad.template block<3,3>(0,3) = Tau; + ad.template block<3,1>(0,6) = -tau; + ad.template block<3,3>(3,3) = Phi; + + return ad; + } + + + EIGEN_DEVICE_FUNC Tangent Log() const { + // logarithm map + Vector4 phi_sigma = rxso3.Log(); + Matrix3 W = RxSO3::calcW(phi_sigma); + + Tangent tau_phi_sigma; + tau_phi_sigma << W.inverse() * translation, phi_sigma; + + return tau_phi_sigma; + } + + EIGEN_DEVICE_FUNC static Sim3 Exp(Tangent const& tau_phi_sigma) { + // exponential map + Vector3 tau = tau_phi_sigma.template segment<3>(0); + Vector4 phi_sigma = tau_phi_sigma.template segment<4>(3); + + RxSO3 rxso3 = RxSO3::Exp(phi_sigma); + Matrix3 W = RxSO3::calcW(phi_sigma); + + return Sim3(rxso3, W*tau); + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& tau_phi_sigma) { + // left jacobian + Adjoint const Xi = adj(tau_phi_sigma); + Adjoint const Xi2 = Xi * Xi; + Adjoint const Xi4 = Xi2 * Xi2; + + return Adjoint::Identity() + + Scalar(1.0/2.0)*Xi + + Scalar(1.0/6.0)*Xi2 + + Scalar(1.0/24.0)*Xi*Xi2 + + Scalar(1.0/120.0)*Xi4; + + Scalar(1.0/720.0)*Xi*Xi4; + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& tau_phi_sigma) { + // left jacobian inverse + Adjoint const Xi = adj(tau_phi_sigma); + Adjoint const Xi2 = Xi * Xi; + Adjoint const Xi4 = Xi2 * Xi2; + + return Adjoint::Identity() + - Scalar(1.0/2.0)*Xi + + Scalar(1.0/12.0)*Xi2 + - Scalar(1.0/720.0)*Xi4; + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act_jacobian(Point const& p) { + // jacobian action on a point + Eigen::Matrix J; + J.template block<3,3>(0,0) = Matrix3::Identity(); + J.template block<3,3>(0,3) = SO3::hat(-p); + J.template block<3,1>(0,6) = p; + return J; + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act4_jacobian(Point4 const& p) { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = p(3) * Matrix3::Identity(); + J.template block<3,3>(0,3) = SO3::hat(-p.template segment<3>(0)); + J.template block<3,1>(0,6) = p.template segment<3>(0); + return J; + } + + private: + Vector3 translation; + RxSO3 rxso3; +}; + +#endif + diff --git a/mini_dpvo/lietorch/include/so3.h b/mini_dpvo/lietorch/include/so3.h new file mode 100644 index 0000000000000000000000000000000000000000..bdd9b44a01e6daf7d4685ba3189d2e027c4fa58a --- /dev/null +++ b/mini_dpvo/lietorch/include/so3.h @@ -0,0 +1,229 @@ + +#ifndef SO3_HEADER +#define SO3_HEADER + +#include +#include +#include +#include + +#include "common.h" + +template +class SO3 { + public: + const static int constexpr K = 3; // manifold dimension + const static int constexpr N = 4; // embedding dimension + + using Vector3 = Eigen::Matrix; + using Vector4 = Eigen::Matrix; + using Matrix3 = Eigen::Matrix; + + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + using Point = Eigen::Matrix; + using Point4 = Eigen::Matrix; + using Transformation = Eigen::Matrix; + using Adjoint = Eigen::Matrix; + using Quaternion = Eigen::Quaternion; + + EIGEN_DEVICE_FUNC SO3(Quaternion const& q) : unit_quaternion(q) { + unit_quaternion.normalize(); + }; + + EIGEN_DEVICE_FUNC SO3(const Scalar *data) : unit_quaternion(data) { + unit_quaternion.normalize(); + }; + + EIGEN_DEVICE_FUNC SO3() { + unit_quaternion = Quaternion::Identity(); + } + + EIGEN_DEVICE_FUNC SO3 inv() { + return SO3(unit_quaternion.conjugate()); + } + + EIGEN_DEVICE_FUNC Data data() const { + return unit_quaternion.coeffs(); + } + + EIGEN_DEVICE_FUNC SO3 operator*(SO3 const& other) { + return SO3(unit_quaternion * other.unit_quaternion); + } + + EIGEN_DEVICE_FUNC Point operator*(Point const& p) const { + const Quaternion& q = unit_quaternion; + Point uv = q.vec().cross(p); + uv += uv; + return p + q.w()*uv + q.vec().cross(uv); + } + + EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const { + Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3); + return p1; + } + + EIGEN_DEVICE_FUNC Adjoint Adj() const { + return unit_quaternion.toRotationMatrix(); + } + + EIGEN_DEVICE_FUNC Transformation Matrix() const { + return unit_quaternion.toRotationMatrix(); + } + + EIGEN_DEVICE_FUNC Eigen::Matrix Matrix4x4() const { + Eigen::Matrix T = Eigen::Matrix::Identity(); + T.template block<3,3>(0,0) = Matrix(); + return T; + } + + EIGEN_DEVICE_FUNC Eigen::Matrix orthogonal_projector() const { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = 0.5 * ( + unit_quaternion.w() * Matrix3::Identity() + + SO3::hat(-unit_quaternion.vec()) + ); + + J.template block<1,3>(3,0) = 0.5 * (-unit_quaternion.vec()); + return J; + } + + EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const { + return Adj() * a; + } + + EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const { + return Adj().transpose() * a; + } + + EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi) { + Transformation Phi; + Phi << + 0.0, -phi(2), phi(1), + phi(2), 0.0, -phi(0), + -phi(1), phi(0), 0.0; + + return Phi; + } + + EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi) { + return SO3::hat(phi); + } + + EIGEN_DEVICE_FUNC Tangent Log() const { + using std::abs; + using std::atan; + using std::sqrt; + Scalar squared_n = unit_quaternion.vec().squaredNorm(); + Scalar w = unit_quaternion.w(); + + Scalar two_atan_nbyw_by_n; + + /// Atan-based log thanks to + /// + /// C. Hertzberg et al.: + /// "Integrating Generic Sensor Fusion Algorithms with Sound State + /// Representation through Encapsulation of Manifolds" + /// Information Fusion, 2011 + + if (squared_n < EPS * EPS) { + // If quaternion is normalized and n=0, then w should be 1; + // w=0 should never happen here! + Scalar squared_w = w * w; + two_atan_nbyw_by_n = + Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * squared_w); + } else { + Scalar n = sqrt(squared_n); + if (abs(w) < EPS) { + if (w > Scalar(0)) { + two_atan_nbyw_by_n = Scalar(PI) / n; + } else { + two_atan_nbyw_by_n = -Scalar(PI) / n; + } + } else { + two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n; + } + } + + return two_atan_nbyw_by_n * unit_quaternion.vec(); + } + + EIGEN_DEVICE_FUNC static SO3 Exp(Tangent const& phi) { + Scalar theta2 = phi.squaredNorm(); + Scalar theta = sqrt(theta2); + Scalar imag_factor; + Scalar real_factor; + + if (theta < EPS) { + Scalar theta4 = theta2 * theta2; + imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4; + real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4; + } else { + imag_factor = sin(.5 * theta) / theta; + real_factor = cos(.5 * theta); + } + + Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z()); + return SO3(q); + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi) { + // left jacobian + Matrix3 I = Matrix3::Identity(); + Matrix3 Phi = SO3::hat(phi); + Matrix3 Phi2 = Phi * Phi; + + Scalar theta2 = phi.squaredNorm(); + Scalar theta = sqrt(theta2); + + Scalar coef1 = (theta < EPS) ? + Scalar(1.0/2.0) - Scalar(1.0/24.0) * theta2 : + (1.0 - cos(theta)) / theta2; + + Scalar coef2 = (theta < EPS) ? + Scalar(1.0/6.0) - Scalar(1.0/120.0) * theta2 : + (theta - sin(theta)) / (theta2 * theta); + + return I + coef1 * Phi + coef2 * Phi2; + } + + EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi) { + // left jacobian inverse + Matrix3 I = Matrix3::Identity(); + Matrix3 Phi = SO3::hat(phi); + Matrix3 Phi2 = Phi * Phi; + + Scalar theta2 = phi.squaredNorm(); + Scalar theta = sqrt(theta2); + Scalar half_theta = Scalar(.5) * theta ; + + Scalar coef2 = (theta < EPS) ? Scalar(1.0/12.0) : + (Scalar(1) - + theta * cos(half_theta) / (Scalar(2) * sin(half_theta))) / + (theta * theta); + + return I + Scalar(-0.5) * Phi + coef2 * Phi2; + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act_jacobian(Point const& p) { + // jacobian action on a point + return SO3::hat(-p); + } + + EIGEN_DEVICE_FUNC static Eigen::Matrix act4_jacobian(Point4 const& p) { + // jacobian action on a point + Eigen::Matrix J = Eigen::Matrix::Zero(); + J.template block<3,3>(0,0) = SO3::hat(-p.template segment<3>(0)); + return J; + } + + private: + Quaternion unit_quaternion; + +}; + +#endif + + diff --git a/mini_dpvo/lietorch/run_tests.py b/mini_dpvo/lietorch/run_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..79a05cf8567e8fcc196ab436c351aeb900ec3400 --- /dev/null +++ b/mini_dpvo/lietorch/run_tests.py @@ -0,0 +1,302 @@ +import torch +import lietorch + +from lietorch import SO3, RxSO3, SE3, Sim3 +from gradcheck import gradcheck, get_analytical_jacobian + + +### forward tests ### + +def make_homogeneous(p): + return torch.cat([p, torch.ones_like(p[...,:1])], dim=-1) + +def matv(A, b): + return torch.matmul(A, b[...,None])[..., 0] + +def test_exp_log(Group, device='cuda'): + """ check Log(Exp(x)) == x """ + a = .2*torch.randn(2,3,4,5,6,7,Group.manifold_dim, device=device).double() + b = Group.exp(a).log() + assert torch.allclose(a,b,atol=1e-8), "should be identity" + print("\t-", Group, "Passed exp-log test") + +def test_inv(Group, device='cuda'): + """ check X * X^{-1} == 0 """ + X = Group.exp(.1*torch.randn(2,3,4,5,Group.manifold_dim, device=device).double()) + a = (X * X.inv()).log() + assert torch.allclose(a, torch.zeros_like(a), atol=1e-8), "should be 0" + print("\t-", Group, "Passed inv test") + +def test_adj(Group, device='cuda'): + """ check X * Exp(a) == Exp(Adj(X,a)) * X 0 """ + X = Group.exp(torch.randn(2,3,4,5, Group.manifold_dim, device=device).double()) + a = torch.randn(2,3,4,5, Group.manifold_dim, device=device).double() + + b = X.adj(a) + Y1 = X * Group.exp(a) + Y2 = Group.exp(b) * X + + c = (Y1 * Y2.inv()).log() + assert torch.allclose(c, torch.zeros_like(c), atol=1e-8), "should be 0" + print("\t-", Group, "Passed adj test") + + +def test_act(Group, device='cuda'): + X = Group.exp(torch.randn(1, Group.manifold_dim, device=device).double()) + p = torch.randn(1,3,device=device).double() + + p1 = X.act(p) + p2 = matv(X.matrix(), make_homogeneous(p)) + + assert torch.allclose(p1, p2[...,:3], atol=1e-8), "should be 0" + print("\t-", Group, "Passed act test") + + +### backward tests ### +def test_exp_log_grad(Group, device='cuda', tol=1e-8): + + D = Group.manifold_dim + + def fn(a): + return Group.exp(a).log() + + a = torch.zeros(1, Group.manifold_dim, requires_grad=True, device=device).double() + analytical, reentrant, correct_grad_sizes, correct_grad_types = \ + get_analytical_jacobian((a,), fn(a)) + + assert torch.allclose(analytical[0], torch.eye(D, device=device).double(), atol=tol) + + a = .2 * torch.randn(1, Group.manifold_dim, requires_grad=True, device=device).double() + analytical, reentrant, correct_grad_sizes, correct_grad_types = \ + get_analytical_jacobian((a,), fn(a)) + + assert torch.allclose(analytical[0], torch.eye(D, device=device).double(), atol=tol) + + print("\t-", Group, "Passed eye-grad test") + + +def test_inv_log_grad(Group, device='cuda', tol=1e-8): + + D = Group.manifold_dim + X = Group.exp(.2*torch.randn(1,D,device=device).double()) + + def fn(a): + return (Group.exp(a) * X).inv().log() + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + analytical, numerical = gradcheck(fn, [a], eps=1e-4) + + # assert torch.allclose(analytical[0], numerical[0], atol=tol) + if not torch.allclose(analytical[0], numerical[0], atol=tol): + print(analytical[0]) + print(numerical[0]) + + print("\t-", Group, "Passed inv-grad test") + + +def test_adj_grad(Group, device='cuda'): + D = Group.manifold_dim + X = Group.exp(.5*torch.randn(1,Group.manifold_dim, device=device).double()) + + def fn(a, b): + return (Group.exp(a) * X).adj(b) + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + b = torch.randn(1, D, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a, b], eps=1e-4) + assert torch.allclose(analytical[0], numerical[0], atol=1e-8) + assert torch.allclose(analytical[1], numerical[1], atol=1e-8) + + print("\t-", Group, "Passed adj-grad test") + + +def test_adjT_grad(Group, device='cuda'): + D = Group.manifold_dim + X = Group.exp(.5*torch.randn(1,Group.manifold_dim, device=device).double()) + + def fn(a, b): + return (Group.exp(a) * X).adjT(b) + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + b = torch.randn(1, D, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a, b], eps=1e-4) + + assert torch.allclose(analytical[0], numerical[0], atol=1e-8) + assert torch.allclose(analytical[1], numerical[1], atol=1e-8) + + print("\t-", Group, "Passed adjT-grad test") + + +def test_act_grad(Group, device='cuda'): + D = Group.manifold_dim + X = Group.exp(5*torch.randn(1,D, device=device).double()) + + def fn(a, b): + return (X*Group.exp(a)).act(b) + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + b = torch.randn(1, 3, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a, b], eps=1e-4) + + assert torch.allclose(analytical[0], numerical[0], atol=1e-8) + assert torch.allclose(analytical[1], numerical[1], atol=1e-8) + + print("\t-", Group, "Passed act-grad test") + + +def test_matrix_grad(Group, device='cuda'): + D = Group.manifold_dim + X = Group.exp(torch.randn(1, D, device=device).double()) + + def fn(a): + return (Group.exp(a) * X).matrix() + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + analytical, numerical = gradcheck(fn, [a], eps=1e-4) + assert torch.allclose(analytical[0], numerical[0], atol=1e-6) + + print("\t-", Group, "Passed matrix-grad test") + + +def extract_translation_grad(Group, device='cuda'): + """ prototype function """ + + D = Group.manifold_dim + X = Group.exp(5*torch.randn(1,D, device=device).double()) + + def fn(a): + return (Group.exp(a)*X).translation() + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a], eps=1e-4) + + assert torch.allclose(analytical[0], numerical[0], atol=1e-8) + print("\t-", Group, "Passed translation grad test") + + +def test_vec_grad(Group, device='cuda', tol=1e-6): + + D = Group.manifold_dim + X = Group.exp(5*torch.randn(1,D, device=device).double()) + + def fn(a): + return (Group.exp(a)*X).vec() + + a = torch.zeros(1, D, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a], eps=1e-4) + + assert torch.allclose(analytical[0], numerical[0], atol=tol) + print("\t-", Group, "Passed tovec grad test") + + +def test_fromvec_grad(Group, device='cuda', tol=1e-6): + + def fn(a): + if Group == SO3: + a = a / a.norm(dim=-1, keepdim=True) + + elif Group == RxSO3: + q, s = a.split([4, 1], dim=-1) + q = q / q.norm(dim=-1, keepdim=True) + a = torch.cat([q, s.exp()], dim=-1) + + elif Group == SE3: + t, q = a.split([3, 4], dim=-1) + q = q / q.norm(dim=-1, keepdim=True) + a = torch.cat([t, q], dim=-1) + + elif Group == Sim3: + t, q, s = a.split([3, 4, 1], dim=-1) + q = q / q.norm(dim=-1, keepdim=True) + a = torch.cat([t, q, s.exp()], dim=-1) + + return Group.InitFromVec(a).vec() + + D = Group.embedded_dim + a = torch.randn(1, 2, D, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a], eps=1e-4) + + assert torch.allclose(analytical[0], numerical[0], atol=tol) + print("\t-", Group, "Passed fromvec grad test") + + + +def scale(device='cuda'): + + def fn(a, s): + X = SE3.exp(a) + X.scale(s) + return X.log() + + s = torch.rand(1, requires_grad=True, device=device).double() + a = torch.randn(1, 6, requires_grad=True, device=device).double() + + analytical, numerical = gradcheck(fn, [a, s], eps=1e-3) + print(analytical[1]) + print(numerical[1]) + + + assert torch.allclose(analytical[0], numerical[0], atol=1e-8) + assert torch.allclose(analytical[1], numerical[1], atol=1e-8) + + print("\t-", "Passed se3-to-sim3 test") + + +if __name__ == '__main__': + + + print("Testing lietorch forward pass (CPU) ...") + for Group in [SO3, RxSO3, SE3, Sim3]: + test_exp_log(Group, device='cpu') + test_inv(Group, device='cpu') + test_adj(Group, device='cpu') + test_act(Group, device='cpu') + + print("Testing lietorch backward pass (CPU)...") + for Group in [SO3, RxSO3, SE3, Sim3]: + if Group == Sim3: + tol = 1e-3 + else: + tol = 1e-8 + + test_exp_log_grad(Group, device='cpu', tol=tol) + test_inv_log_grad(Group, device='cpu', tol=tol) + test_adj_grad(Group, device='cpu') + test_adjT_grad(Group, device='cpu') + test_act_grad(Group, device='cpu') + test_matrix_grad(Group, device='cpu') + extract_translation_grad(Group, device='cpu') + test_vec_grad(Group, device='cpu') + test_fromvec_grad(Group, device='cpu') + + print("Testing lietorch forward pass (GPU) ...") + for Group in [SO3, RxSO3, SE3, Sim3]: + test_exp_log(Group, device='cuda') + test_inv(Group, device='cuda') + test_adj(Group, device='cuda') + test_act(Group, device='cuda') + + print("Testing lietorch backward pass (GPU)...") + for Group in [SO3, RxSO3, SE3, Sim3]: + if Group == Sim3: + tol = 1e-3 + else: + tol = 1e-8 + + test_exp_log_grad(Group, device='cuda', tol=tol) + test_inv_log_grad(Group, device='cuda', tol=tol) + test_adj_grad(Group, device='cuda') + test_adjT_grad(Group, device='cuda') + test_act_grad(Group, device='cuda') + test_matrix_grad(Group, device='cuda') + extract_translation_grad(Group, device='cuda') + test_vec_grad(Group, device='cuda') + test_fromvec_grad(Group, device='cuda') + + diff --git a/mini_dpvo/lietorch/src/lietorch.cpp b/mini_dpvo/lietorch/src/lietorch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..36c83eb27a732f7c2a871cc22a17b5c7c7d8db84 --- /dev/null +++ b/mini_dpvo/lietorch/src/lietorch.cpp @@ -0,0 +1,317 @@ +#include +#include +#include "lietorch_gpu.h" +#include "lietorch_cpu.h" + + +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") + + +/* Interface for cuda and c++ group operations + + enum group_t { SO3=1, SE3=2, Sim3=3 }; + X, Y, Z: (uppercase) Lie Group Elements + a, b, c: (lowercase) Lie Algebra Elements +*/ + +// Unary operations +torch::Tensor expm(int group_index, torch::Tensor a) { + CHECK_CONTIGUOUS(a); + if (a.device().type() == torch::DeviceType::CPU) { + return exp_forward_cpu(group_index, a); + + } else if (a.device().type() == torch::DeviceType::CUDA) { + return exp_forward_gpu(group_index, a); + } + + return a; +} + +std::vector expm_backward(int group_index, torch::Tensor grad, torch::Tensor a) { + CHECK_CONTIGUOUS(a); + CHECK_CONTIGUOUS(grad); + if (a.device().type() == torch::DeviceType::CPU) { + return exp_backward_cpu(group_index, grad, a); + + } else if (a.device().type() == torch::DeviceType::CUDA) { + return exp_backward_gpu(group_index, grad, a); + } + + return {}; +} + +torch::Tensor logm(int group_index, torch::Tensor X) { + CHECK_CONTIGUOUS(X); + if (X.device().type() == torch::DeviceType::CPU) { + return log_forward_cpu(group_index, X); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return log_forward_gpu(group_index, X); + } + + return X; +} + +std::vector logm_backward(int group_index, torch::Tensor grad, torch::Tensor X) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return log_backward_cpu(group_index, grad, X); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return log_backward_gpu(group_index, grad, X); + } + + return {}; +} + +torch::Tensor inv(int group_index, torch::Tensor X) { + CHECK_CONTIGUOUS(X); + + if (X.device().type() == torch::DeviceType::CPU) { + return inv_forward_cpu(group_index, X); + } else if (X.device().type() == torch::DeviceType::CUDA) { + return inv_forward_gpu(group_index, X); + } + + return X; +} + +std::vector inv_backward(int group_index, torch::Tensor grad, torch::Tensor X) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return inv_backward_cpu(group_index, grad, X); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return inv_backward_gpu(group_index, grad, X); + } + + return {}; +} + +// Binary operations + +torch::Tensor mul(int group_index, torch::Tensor X, torch::Tensor Y) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(Y); + + if (X.device().type() == torch::DeviceType::CPU) { + return mul_forward_cpu(group_index, X, Y); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return mul_forward_gpu(group_index, X, Y); + } + + return X; +} + +std::vector mul_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(Y); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return mul_backward_cpu(group_index, grad, X, Y); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return mul_backward_gpu(group_index, grad, X, Y); + } + + return {}; +} + +torch::Tensor adj(int group_index, torch::Tensor X, torch::Tensor a) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(a); + + if (X.device().type() == torch::DeviceType::CPU) { + return adj_forward_cpu(group_index, X, a); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return adj_forward_gpu(group_index, X, a); + } + + return X; +} + +std::vector adj_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor a) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(a); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return adj_backward_cpu(group_index, grad, X, a); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return adj_backward_gpu(group_index, grad, X, a); + } + + return {}; +} + +torch::Tensor adjT(int group_index, torch::Tensor X, torch::Tensor a) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(a); + + if (X.device().type() == torch::DeviceType::CPU) { + return adjT_forward_cpu(group_index, X, a); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return adjT_forward_gpu(group_index, X, a); + } + + return X; +} + +std::vector adjT_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor a) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(a); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return adjT_backward_cpu(group_index, grad, X, a); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return adjT_backward_gpu(group_index, grad, X, a); + } + + return {}; +} + + +torch::Tensor act(int group_index, torch::Tensor X, torch::Tensor p) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(p); + + if (X.device().type() == torch::DeviceType::CPU) { + return act_forward_cpu(group_index, X, p); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return act_forward_gpu(group_index, X, p); + } + + return X; +} + +std::vector act_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor p) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(p); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return act_backward_cpu(group_index, grad, X, p); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return act_backward_gpu(group_index, grad, X, p); + } + + return {}; +} + +torch::Tensor act4(int group_index, torch::Tensor X, torch::Tensor p) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(p); + + if (X.device().type() == torch::DeviceType::CPU) { + return act4_forward_cpu(group_index, X, p); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return act4_forward_gpu(group_index, X, p); + } + + return X; +} + +std::vector act4_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor p) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(p); + CHECK_CONTIGUOUS(grad); + + if (X.device().type() == torch::DeviceType::CPU) { + return act4_backward_cpu(group_index, grad, X, p); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return act4_backward_gpu(group_index, grad, X, p); + } + + return {}; +} + + +torch::Tensor projector(int group_index, torch::Tensor X) { + CHECK_CONTIGUOUS(X); + + if (X.device().type() == torch::DeviceType::CPU) { + return orthogonal_projector_cpu(group_index, X); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return orthogonal_projector_gpu(group_index, X); + } + + return X; +} + + +torch::Tensor as_matrix(int group_index, torch::Tensor X) { + CHECK_CONTIGUOUS(X); + + if (X.device().type() == torch::DeviceType::CPU) { + return as_matrix_forward_cpu(group_index, X); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return as_matrix_forward_gpu(group_index, X); + } + + return X; +} + +torch::Tensor Jinv(int group_index, torch::Tensor X, torch::Tensor a) { + CHECK_CONTIGUOUS(X); + CHECK_CONTIGUOUS(a); + + if (X.device().type() == torch::DeviceType::CPU) { + return jleft_forward_cpu(group_index, X, a); + + } else if (X.device().type() == torch::DeviceType::CUDA) { + return jleft_forward_gpu(group_index, X, a); + } + + return a; +} + +// {exp, log, inv, mul, adj, adjT, act, act4} forward/backward bindings +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("expm", &expm, "exp map forward"); + m.def("expm_backward", &expm_backward, "exp map backward"); + + m.def("logm", &logm, "log map forward"); + m.def("logm_backward", &logm_backward, "log map backward"); + + m.def("inv", &inv, "inverse operator"); + m.def("inv_backward", &inv_backward, "inverse operator backward"); + + m.def("mul", &mul, "group operator"); + m.def("mul_backward", &mul_backward, "group operator backward"); + + m.def("adj", &adj, "adjoint operator"); + m.def("adj_backward", &adj_backward, "adjoint operator backward"); + + m.def("adjT", &adjT, "transposed adjoint operator"); + m.def("adjT_backward", &adjT_backward, "transposed adjoint operator backward"); + + m.def("act", &act, "action on point"); + m.def("act_backward", &act_backward, "action on point backward"); + + m.def("act4", &act4, "action on homogeneous point"); + m.def("act4_backward", &act4_backward, "action on homogeneous point backward"); + + // functions with no gradient + m.def("as_matrix", &as_matrix, "convert to matrix"); + m.def("projector", &projector, "orthogonal projection matrix"); + m.def("Jinv", &Jinv, "left inverse jacobian operator"); + +}; + diff --git a/mini_dpvo/lietorch/src/lietorch_cpu.cpp b/mini_dpvo/lietorch/src/lietorch_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a388a12d09f294bd333b2197447d27a3b5a336b --- /dev/null +++ b/mini_dpvo/lietorch/src/lietorch_cpu.cpp @@ -0,0 +1,657 @@ + +#include "lietorch_cpu.h" +#include + +#include +#include "common.h" +#include "dispatch.h" + +#include "so3.h" +#include "rxso3.h" +#include "se3.h" +#include "sim3.h" + + +template +void exp_forward_kernel(const scalar_t* a_ptr, scalar_t* X_ptr, int batch_size) { + // exponential map forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(X_ptr + i*Group::N) = Group::Exp(a).data(); + } + }); +} + +template +void exp_backward_kernel(const scalar_t* grad, const scalar_t* a_ptr, scalar_t* da, int batch_size) { + // exponential map backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(da + i*Group::K) = dX * Group::left_jacobian(a); + } + }); +} + +template +void log_forward_kernel(const scalar_t* X_ptr, scalar_t* a_ptr, int batch_size) { + // logarithm map forward kernel + using Tangent = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(a_ptr + i*Group::K) = a; + } + }); +} + +template +void log_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) { + // logarithm map backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(dX + i*Group::N) = da * Group::left_jacobian_inverse(a); + } + }); +} + +template +void inv_forward_kernel(const scalar_t* X_ptr, scalar_t* Y_ptr, int batch_size) { + // group inverse forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(Y_ptr + i*Group::N) = X.inv().data(); + } + }); +} + +template +void inv_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t *dX, int batch_size) { + // group inverse backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(dX + i*Group::N) = -dY * Y.Adj(); + } + }); +} + +template +void mul_forward_kernel(const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* Z_ptr, int batch_size) { + // group multiplication forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(Z_ptr + i*Group::N) = Z.data(); + } + }); +} + +template +void mul_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* dX, scalar_t* dY, int batch_size) { + // group multiplication backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(dX + i*Group::N) = dZ; + Eigen::Map(dY + i*Group::N) = dZ * X.Adj(); + } + }); +} + +template +void adj_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) { + // adjoint forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(b_ptr + i*Group::K) = X.Adj(a); + } + }); +} + +template +void adj_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int batch_size) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(da + i*Group::K) = db * X.Adj(); + Eigen::Map(dX + i*Group::N) = -db * Group::adj(b); + } + }); +} + +template +void adjT_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) { + // adjoint forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(b_ptr + i*Group::K) = X.AdjT(a); + } + }); +} + +template +void adjT_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int batch_size) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(da + i*Group::K) = X.Adj(db); + Eigen::Map(dX + i*Group::N) = -a * Group::adj(X.Adj(db)); + } + }); +} + + +template +void act_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int batch_size) { + // action on point forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + using Point = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(q_ptr + i*3) = X * p; + } + }); +} + +template +void act_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int batch_size) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Point = Eigen::Matrix; + using PointGrad = Eigen::Matrix; + using Transformation = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(dp + i*3) = dq * X.Matrix().template block<3,3>(0,0); + Eigen::Map(dX + i*Group::N) = dq * Group::act_jacobian(X*p); + } + }); +} + + +// template +// void tovec_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) { +// // group inverse forward kernel +// using Data = Eigen::Matrix; +// using Grad = Eigen::Matrix; + +// at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { +// for (int64_t i=start; i(dX + i*Group::N) = g * X.vec_jacobian(); +// } +// }); +// } + +// template +// void fromvec_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) { +// // group inverse forward kernel +// using Data = Eigen::Matrix; +// using Grad = Eigen::Matrix; + +// at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { +// for (int64_t i=start; i(dX + i*Group::N) = g * X.vec_jacobian(); +// } +// }); +// } + + +template +void act4_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int batch_size) { + // action on homogeneous point forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + using Point = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(q_ptr + i*4) = X.act4(p); + } + }); +} + +template +void act4_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int batch_size) { + // action on homogeneous point backward kernel + + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Point = Eigen::Matrix; + using PointGrad = Eigen::Matrix; + using Transformation = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(dp + i*4) = dq * X.Matrix4x4(); + const Point q = X.act4(p); + Eigen::Map(dX + i*Group::N) = dq * Group::act4_jacobian(q); + } + }); +} + +template +void as_matrix_forward_kernel(const scalar_t* X_ptr, scalar_t* T_ptr, int batch_size) { + // group inverse forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + using Matrix4 = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(T_ptr + i*16) = X.Matrix4x4(); + } + }); +} + +template +void orthogonal_projector_kernel(const scalar_t* X_ptr, scalar_t* P_ptr, int batch_size) { + // group inverse forward kernel + using Proj = Eigen::Matrix; + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(P_ptr + i*Group::N*Group::N) = X.orthogonal_projector(); + } + }); +} + +template +void jleft_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) { + // left-jacobian inverse action + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) { + for (int64_t i=start; i(b_ptr + i*Group::K) = b; + } + }); +} + +// unary operations + +torch::Tensor exp_forward_cpu(int group_id, torch::Tensor a) { + int batch_size = a.size(0); + torch::Tensor X; + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_forward_kernel", ([&] { + X = torch::zeros({batch_size, group_t::N}, a.options()); + exp_forward_kernel( + a.data_ptr(), + X.data_ptr(), + batch_size); + })); + + return X; +} + +std::vector exp_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor a) { + int batch_size = a.size(0); + torch::Tensor da = torch::zeros(a.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_backward_kernel", ([&] { + exp_backward_kernel( + grad.data_ptr(), + a.data_ptr(), + da.data_ptr(), + batch_size); + })); + + return {da}; +} + +torch::Tensor log_forward_cpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor a; + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_forward_kernel", ([&] { + a = torch::zeros({batch_size, group_t::K}, X.options()); + log_forward_kernel( + X.data_ptr(), + a.data_ptr(), + batch_size); + })); + + return a; +} + +std::vector log_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_backward_kernel", ([&] { + log_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + dX.data_ptr(), + batch_size); + })); + + return {dX}; +} + +torch::Tensor inv_forward_cpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor Y = torch::zeros_like(X); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_forward_kernel", ([&] { + inv_forward_kernel( + X.data_ptr(), + Y.data_ptr(), + batch_size); + })); + + return Y; +} + +std::vector inv_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_backward_kernel", ([&] { + inv_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + dX.data_ptr(), + batch_size); + })); + + return {dX}; +} + +// binary operations +torch::Tensor mul_forward_cpu(int group_id, torch::Tensor X, torch::Tensor Y) { + int batch_size = X.size(0); + torch::Tensor Z = torch::zeros_like(X); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_forward_kernel", ([&] { + mul_forward_kernel( + X.data_ptr(), + Y.data_ptr(), + Z.data_ptr(), + batch_size); + })); + + return Z; +} + +std::vector mul_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor dY = torch::zeros(Y.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_backward_kernel", ([&] { + mul_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + Y.data_ptr(), + dX.data_ptr(), + dY.data_ptr(), + batch_size); + })); + + return {dX, dY}; +} + +torch::Tensor adj_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor b = torch::zeros(a.sizes(), a.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_forward_kernel", ([&] { + adj_forward_kernel( + X.data_ptr(), + a.data_ptr(), + b.data_ptr(), + batch_size); + })); + + return b; +} + +std::vector adj_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor da = torch::zeros(a.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_backward_kernel", ([&] { + adj_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + a.data_ptr(), + dX.data_ptr(), + da.data_ptr(), + batch_size); + })); + + return {dX, da}; +} + + +torch::Tensor adjT_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor b = torch::zeros(a.sizes(), a.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_forward_kernel", ([&] { + adjT_forward_kernel( + X.data_ptr(), + a.data_ptr(), + b.data_ptr(), + batch_size); + })); + + return b; +} + +std::vector adjT_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor da = torch::zeros(a.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_backward_kernel", ([&] { + adjT_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + a.data_ptr(), + dX.data_ptr(), + da.data_ptr(), + batch_size); + })); + + return {dX, da}; +} + + +torch::Tensor act_forward_cpu(int group_id, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor q = torch::zeros(p.sizes(), p.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_forward_kernel", ([&] { + act_forward_kernel( + X.data_ptr(), + p.data_ptr(), + q.data_ptr(), + batch_size); + })); + + return q; +} + +std::vector act_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor dp = torch::zeros(p.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_backward_kernel", ([&] { + act_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + p.data_ptr(), + dX.data_ptr(), + dp.data_ptr(), + batch_size); + })); + + return {dX, dp}; +} + + +torch::Tensor act4_forward_cpu(int group_id, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor q = torch::zeros(p.sizes(), p.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_forward_kernel", ([&] { + act4_forward_kernel( + X.data_ptr(), + p.data_ptr(), + q.data_ptr(), + batch_size); + })); + + return q; +} + +std::vector act4_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor dp = torch::zeros(p.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_backward_kernel", ([&] { + act4_backward_kernel( + grad.data_ptr(), + X.data_ptr(), + p.data_ptr(), + dX.data_ptr(), + dp.data_ptr(), + batch_size); + })); + + return {dX, dp}; +} + + +torch::Tensor as_matrix_forward_cpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor T4x4 = torch::zeros({X.size(0), 4, 4}, X.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "as_matrix_forward_kernel", ([&] { + as_matrix_forward_kernel( + X.data_ptr(), + T4x4.data_ptr(), + batch_size); + })); + + return T4x4; +} + + +torch::Tensor orthogonal_projector_cpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor P; + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "orthogonal_projector_kernel", ([&] { + P = torch::zeros({X.size(0), group_t::N, group_t::N}, X.options()); + orthogonal_projector_kernel(X.data_ptr(), P.data_ptr(), batch_size); + })); + + return P; +} + + + +torch::Tensor jleft_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor b = torch::zeros(a.sizes(), a.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "jleft_forward_kernel", ([&] { + jleft_forward_kernel( + X.data_ptr(), + a.data_ptr(), + b.data_ptr(), + batch_size); + })); + + return b; +} \ No newline at end of file diff --git a/mini_dpvo/lietorch/src/lietorch_gpu.cu b/mini_dpvo/lietorch/src/lietorch_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..14dcac9a67358c4499dc007d3c314cc7875d8713 --- /dev/null +++ b/mini_dpvo/lietorch/src/lietorch_gpu.cu @@ -0,0 +1,601 @@ + +#include "lietorch_gpu.h" +#include + +#include "common.h" +#include "dispatch.h" + +#include "so3.h" +#include "rxso3.h" +#include "se3.h" +#include "sim3.h" + +#define GPU_1D_KERNEL_LOOP(i, n) \ + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i +__global__ void exp_forward_kernel(const scalar_t* a_ptr, scalar_t* X_ptr, int num_threads) { + // exponential map forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Tangent a(a_ptr + i*Group::K); + Eigen::Map(X_ptr + i*Group::N) = Group::Exp(a).data(); + } +} + +template +__global__ void exp_backward_kernel(const scalar_t* grad, const scalar_t* a_ptr, scalar_t* da, int num_threads) { + // exponential map backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Tangent a(a_ptr + i*Group::K); + Grad dX(grad + i*Group::N); + Eigen::Map(da + i*Group::K) = dX * Group::left_jacobian(a); + } +} + +template +__global__ void log_forward_kernel(const scalar_t* X_ptr, scalar_t* a_ptr, int num_threads) { + // logarithm map forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Tangent a = Group(X_ptr + i*Group::N).Log(); + Eigen::Map(a_ptr + i*Group::K) = a; + } +} + +template +__global__ void log_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int num_threads) { + // logarithm map backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Tangent a = Group(X_ptr + i*Group::N).Log(); + Grad da(grad + i*Group::K); + Eigen::Map(dX + i*Group::N) = da * Group::left_jacobian_inverse(a); + } +} + +template +__global__ void inv_forward_kernel(const scalar_t* X_ptr, scalar_t* Y_ptr, int num_threads) { + // group inverse forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Eigen::Map(Y_ptr + i*Group::N) = X.inv().data(); + } +} + + +template +__global__ void inv_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t *dX, int num_threads) { + // group inverse backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group Y = Group(X_ptr + i*Group::N).inv(); + Grad dY(grad + i*Group::N); + Eigen::Map(dX + i*Group::N) = -dY * Y.Adj(); + } +} + + +template +__global__ void mul_forward_kernel(const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* Z_ptr, int num_threads) { + // group multiplication forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group Z = Group(X_ptr + i*Group::N) * Group(Y_ptr + i*Group::N); + Eigen::Map(Z_ptr + i*Group::N) = Z.data(); + } +} + +template +__global__ void mul_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* dX, scalar_t* dY, int num_threads) { + // group multiplication backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Grad dZ(grad + i*Group::N); + Group X(X_ptr + i*Group::N); + Eigen::Map(dX + i*Group::N) = dZ; + Eigen::Map(dY + i*Group::N) = dZ * X.Adj(); + } +} + +template +__global__ void adj_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) { + // adjoint forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Tangent a(a_ptr + i*Group::K); + Eigen::Map(b_ptr + i*Group::K) = X.Adj(a); + } +} + +template +__global__ void adj_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int num_threads) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Grad db(grad + i*Group::K); + + Tangent a(a_ptr + i*Group::K); + Tangent b = X.Adj() * a; + + Eigen::Map(da + i*Group::K) = db * X.Adj(); + Eigen::Map(dX + i*Group::N) = -db * Group::adj(b); + } +} + + +template +__global__ void adjT_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) { + // adjoint forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Tangent a(a_ptr + i*Group::K); + Eigen::Map(b_ptr + i*Group::K) = X.AdjT(a); + } +} + +template +__global__ void adjT_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int num_threads) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Tangent db(grad + i*Group::K); + Grad a(a_ptr + i*Group::K); + + Eigen::Map(da + i*Group::K) = X.Adj(db); + Eigen::Map(dX + i*Group::N) = -a * Group::adj(X.Adj(db)); + } +} + +template +__global__ void act_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int num_threads) { + // action on point forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + using Point = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Point p(p_ptr + i*3); + Eigen::Map(q_ptr + i*3) = X * p; + } +} + +template +__global__ void act_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int num_threads) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Point = Eigen::Matrix; + using PointGrad = Eigen::Matrix; + using Transformation = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Point p(p_ptr + i*3); + PointGrad dq(grad + i*3); + + Eigen::Map(dp + i*3) = dq * X.Matrix4x4().block<3,3>(0,0); + Eigen::Map(dX + i*Group::N) = dq * Group::act_jacobian(X*p); + } +} + + +template +__global__ void act4_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int num_threads) { + // action on point forward kernel + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + using Point = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Point p(p_ptr + i*4); + Eigen::Map(q_ptr + i*4) = X.act4(p); + } +} + +template +__global__ void act4_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int num_threads) { + // adjoint backward kernel + using Tangent = Eigen::Matrix; + using Grad = Eigen::Matrix; + using Point = Eigen::Matrix; + using PointGrad = Eigen::Matrix; + using Transformation = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Point p(p_ptr + i*4); + PointGrad dq(grad + i*4); + + Eigen::Map(dp + i*4) = dq * X.Matrix4x4(); + const Point q = X.act4(p); + Eigen::Map(dX + i*Group::N) = dq * Group::act4_jacobian(q); + } +} + +template +__global__ void as_matrix_forward_kernel(const scalar_t* X_ptr, scalar_t* T_ptr, int num_threads) { + // convert to 4x4 matrix representation + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + using Matrix4 = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Eigen::Map(T_ptr + i*16) = X.Matrix4x4(); + } +} + +template +__global__ void orthogonal_projector_kernel(const scalar_t* X_ptr, scalar_t* P_ptr, int num_threads) { + // orthogonal projection matrix + using Proj = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Eigen::Map(P_ptr + i*Group::N*Group::N) = X.orthogonal_projector(); + } +} + +template +__global__ void jleft_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) { + // left jacobian inverse action + using Tangent = Eigen::Matrix; + using Data = Eigen::Matrix; + + GPU_1D_KERNEL_LOOP(i, num_threads) { + Group X(X_ptr + i*Group::N); + Tangent a(a_ptr + i*Group::K); + Tangent b = Group::left_jacobian_inverse(X.Log()) * a; + Eigen::Map(b_ptr + i*Group::K) = b; + } +} + +// unary operations + +torch::Tensor exp_forward_gpu(int group_id, torch::Tensor a) { + int batch_size = a.size(0); + torch::Tensor X; + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_forward_kernel", ([&] { + X = torch::zeros({batch_size, group_t::N}, a.options()); + exp_forward_kernel<<>>( + a.data_ptr(), + X.data_ptr(), + batch_size); + })); + + return X; +} + +std::vector exp_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor a) { + int batch_size = a.size(0); + torch::Tensor da = torch::zeros(a.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_backward_kernel", ([&] { + exp_backward_kernel<<>>( + grad.data_ptr(), + a.data_ptr(), + da.data_ptr(), + batch_size); + })); + + return {da}; +} + +torch::Tensor log_forward_gpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor a; + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_forward_kernel", ([&] { + a = torch::zeros({batch_size, group_t::K}, X.options()); + log_forward_kernel<<>>( + X.data_ptr(), + a.data_ptr(), + batch_size); + })); + + return a; +} + +std::vector log_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_backward_kernel", ([&] { + log_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + dX.data_ptr(), + batch_size); + })); + + return {dX}; +} + +torch::Tensor inv_forward_gpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor Y = torch::zeros_like(X); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_forward_kernel", ([&] { + inv_forward_kernel<<>>( + X.data_ptr(), + Y.data_ptr(), + batch_size); + })); + + return Y; +} + +std::vector inv_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_backward_kernel", ([&] { + inv_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + dX.data_ptr(), + batch_size); + })); + + return {dX}; +} + +// binary operations +torch::Tensor mul_forward_gpu(int group_id, torch::Tensor X, torch::Tensor Y) { + int batch_size = X.size(0); + torch::Tensor Z = torch::zeros_like(X); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_forward_kernel", ([&] { + mul_forward_kernel<<>>( + X.data_ptr(), + Y.data_ptr(), + Z.data_ptr(), + batch_size); + })); + + return Z; +} + +std::vector mul_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor dY = torch::zeros(Y.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_backward_kernel", ([&] { + mul_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + Y.data_ptr(), + dX.data_ptr(), + dY.data_ptr(), + batch_size); + })); + + return {dX, dY}; +} + +torch::Tensor adj_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor b = torch::zeros(a.sizes(), a.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_forward_kernel", ([&] { + adj_forward_kernel<<>>( + X.data_ptr(), + a.data_ptr(), + b.data_ptr(), + batch_size); + })); + + return b; +} + +std::vector adj_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor da = torch::zeros(a.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_backward_kernel", ([&] { + adj_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + a.data_ptr(), + dX.data_ptr(), + da.data_ptr(), + batch_size); + })); + + return {dX, da}; +} + + +torch::Tensor adjT_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor b = torch::zeros(a.sizes(), a.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_forward_kernel", ([&] { + adjT_forward_kernel<<>>( + X.data_ptr(), + a.data_ptr(), + b.data_ptr(), + batch_size); + })); + + return b; +} + +std::vector adjT_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor da = torch::zeros(a.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_backward_kernel", ([&] { + adjT_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + a.data_ptr(), + dX.data_ptr(), + da.data_ptr(), + batch_size); + })); + + return {dX, da}; +} + + + +torch::Tensor act_forward_gpu(int group_id, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor q = torch::zeros(p.sizes(), p.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_forward_kernel", ([&] { + act_forward_kernel<<>>( + X.data_ptr(), + p.data_ptr(), + q.data_ptr(), + batch_size); + })); + + return q; +} + +std::vector act_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor dp = torch::zeros(p.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_backward_kernel", ([&] { + act_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + p.data_ptr(), + dX.data_ptr(), + dp.data_ptr(), + batch_size); + })); + + return {dX, dp}; +} + +torch::Tensor act4_forward_gpu(int group_id, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor q = torch::zeros(p.sizes(), p.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_forward_kernel", ([&] { + act4_forward_kernel<<>>( + X.data_ptr(), + p.data_ptr(), + q.data_ptr(), + batch_size); + })); + + return q; +} + +std::vector act4_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) { + int batch_size = X.size(0); + torch::Tensor dX = torch::zeros(X.sizes(), grad.options()); + torch::Tensor dp = torch::zeros(p.sizes(), grad.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_backward_kernel", ([&] { + act4_backward_kernel<<>>( + grad.data_ptr(), + X.data_ptr(), + p.data_ptr(), + dX.data_ptr(), + dp.data_ptr(), + batch_size); + })); + + return {dX, dp}; +} + + +torch::Tensor as_matrix_forward_gpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor T4x4 = torch::zeros({X.size(0), 4, 4}, X.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "as_matrix_forward_kernel", ([&] { + as_matrix_forward_kernel<<>>( + X.data_ptr(), + T4x4.data_ptr(), + batch_size); + })); + + return T4x4; +} + + +torch::Tensor orthogonal_projector_gpu(int group_id, torch::Tensor X) { + int batch_size = X.size(0); + torch::Tensor P; + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "orthogonal_projector_kernel", ([&] { + P = torch::zeros({X.size(0), group_t::N, group_t::N}, X.options()); + orthogonal_projector_kernel<<>>( + X.data_ptr(), + P.data_ptr(), + batch_size); + })); + + return P; +} + + +torch::Tensor jleft_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) { + int batch_size = X.size(0); + torch::Tensor b = torch::zeros(a.sizes(), a.options()); + + DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "jleft_forward_kernel", ([&] { + jleft_forward_kernel<<>>( + X.data_ptr(), + a.data_ptr(), + b.data_ptr(), + batch_size); + })); + + return b; +} diff --git a/mini_dpvo/logger.py b/mini_dpvo/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..7efeeffbccccdc008c3876d72076fa704c763725 --- /dev/null +++ b/mini_dpvo/logger.py @@ -0,0 +1,58 @@ + +import torch +from torch.utils.tensorboard import SummaryWriter + + +SUM_FREQ = 100 + +class Logger: + def __init__(self, name, scheduler): + self.total_steps = 0 + self.running_loss = {} + self.writer = None + self.name = name + self.scheduler = scheduler + + def _print_training_status(self): + if self.writer is None: + self.writer = SummaryWriter("runs/{}".format(self.name)) + print([k for k in self.running_loss]) + + lr = self.scheduler.get_lr().pop() + metrics_data = [self.running_loss[k]/SUM_FREQ for k in self.running_loss.keys()] + training_str = "[{:6d}, {:10.7f}] ".format(self.total_steps+1, lr) + metrics_str = ("{:10.4f}, "*len(metrics_data)).format(*metrics_data) + + # print the training status + print(training_str + metrics_str) + + for key in self.running_loss: + val = self.running_loss[key] / SUM_FREQ + self.writer.add_scalar(key, val, self.total_steps) + self.running_loss[key] = 0.0 + + def push(self, metrics): + + for key in metrics: + if key not in self.running_loss: + self.running_loss[key] = 0.0 + + self.running_loss[key] += metrics[key] + + if self.total_steps % SUM_FREQ == SUM_FREQ-1: + self._print_training_status() + self.running_loss = {} + + self.total_steps += 1 + + def write_dict(self, results): + if self.writer is None: + self.writer = SummaryWriter("runs/{}".format(self.name)) + print([k for k in self.running_loss]) + + for key in results: + self.writer.add_scalar(key, results[key], self.total_steps) + + def close(self): + self.writer.close() + diff --git a/mini_dpvo/net.py b/mini_dpvo/net.py new file mode 100644 index 0000000000000000000000000000000000000000..82c31265a43f0dd2d93c9d7cdcd1705250558fbb --- /dev/null +++ b/mini_dpvo/net.py @@ -0,0 +1,270 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from collections import OrderedDict + +import torch_scatter +from torch_scatter import scatter_sum + +from . import fastba +from . import altcorr +from . import lietorch +from .lietorch import SE3 + +from .extractor import BasicEncoder, BasicEncoder4 +from .blocks import GradientClip, GatedResidual, SoftAgg + +from .utils import * +from .ba import BA +from . import projective_ops as pops + +autocast = torch.cuda.amp.autocast +import matplotlib.pyplot as plt + +DIM = 384 + +class Update(nn.Module): + def __init__(self, p): + super(Update, self).__init__() + + self.c1 = nn.Sequential( + nn.Linear(DIM, DIM), + nn.ReLU(inplace=True), + nn.Linear(DIM, DIM)) + + self.c2 = nn.Sequential( + nn.Linear(DIM, DIM), + nn.ReLU(inplace=True), + nn.Linear(DIM, DIM)) + + self.norm = nn.LayerNorm(DIM, eps=1e-3) + + self.agg_kk = SoftAgg(DIM) + self.agg_ij = SoftAgg(DIM) + + self.gru = nn.Sequential( + nn.LayerNorm(DIM, eps=1e-3), + GatedResidual(DIM), + nn.LayerNorm(DIM, eps=1e-3), + GatedResidual(DIM), + ) + + self.corr = nn.Sequential( + nn.Linear(2*49*p*p, DIM), + nn.ReLU(inplace=True), + nn.Linear(DIM, DIM), + nn.LayerNorm(DIM, eps=1e-3), + nn.ReLU(inplace=True), + nn.Linear(DIM, DIM), + ) + + self.d = nn.Sequential( + nn.ReLU(inplace=False), + nn.Linear(DIM, 2), + GradientClip()) + + self.w = nn.Sequential( + nn.ReLU(inplace=False), + nn.Linear(DIM, 2), + GradientClip(), + nn.Sigmoid()) + + + def forward(self, net, inp, corr, flow, ii, jj, kk): + """ update operator """ + + net = net + inp + self.corr(corr) + net = self.norm(net) + + ix, jx = fastba.neighbors(kk, jj) + mask_ix = (ix >= 0).float().reshape(1, -1, 1) + mask_jx = (jx >= 0).float().reshape(1, -1, 1) + + net = net + self.c1(mask_ix * net[:,ix]) + net = net + self.c2(mask_jx * net[:,jx]) + + net = net + self.agg_kk(net, kk) + net = net + self.agg_ij(net, ii*12345 + jj) + + net = self.gru(net) + + return net, (self.d(net), self.w(net), None) + + +class Patchifier(nn.Module): + def __init__(self, patch_size=3): + super(Patchifier, self).__init__() + self.patch_size = patch_size + self.fnet = BasicEncoder4(output_dim=128, norm_fn='instance') + self.inet = BasicEncoder4(output_dim=DIM, norm_fn='none') + + def __image_gradient(self, images): + gray = ((images + 0.5) * (255.0 / 2)).sum(dim=2) + dx = gray[...,:-1,1:] - gray[...,:-1,:-1] + dy = gray[...,1:,:-1] - gray[...,:-1,:-1] + g = torch.sqrt(dx**2 + dy**2) + g = F.avg_pool2d(g, 4, 4) + return g + + def forward(self, images, patches_per_image=80, disps=None, gradient_bias=False, return_color=False): + """ extract patches from input images """ + fmap = self.fnet(images) / 4.0 + imap = self.inet(images) / 4.0 + + b, n, c, h, w = fmap.shape + P = self.patch_size + + # bias patch selection towards regions with high gradient + if gradient_bias: + g = self.__image_gradient(images) + x = torch.randint(1, w-1, size=[n, 3*patches_per_image], device="cuda") + y = torch.randint(1, h-1, size=[n, 3*patches_per_image], device="cuda") + + coords = torch.stack([x, y], dim=-1).float() + g = altcorr.patchify(g[0,:,None], coords, 0).view(n, 3 * patches_per_image) + + ix = torch.argsort(g, dim=1) + x = torch.gather(x, 1, ix[:, -patches_per_image:]) + y = torch.gather(y, 1, ix[:, -patches_per_image:]) + + else: + x = torch.randint(1, w-1, size=[n, patches_per_image], device="cuda") + y = torch.randint(1, h-1, size=[n, patches_per_image], device="cuda") + + coords = torch.stack([x, y], dim=-1).float() + imap = altcorr.patchify(imap[0], coords, 0).view(b, -1, DIM, 1, 1) + gmap = altcorr.patchify(fmap[0], coords, P//2).view(b, -1, 128, P, P) + + if return_color: + clr = altcorr.patchify(images[0], 4*(coords + 0.5), 0).view(b, -1, 3) + + if disps is None: + disps = torch.ones(b, n, h, w, device="cuda") + + grid, _ = coords_grid_with_index(disps, device=fmap.device) + patches = altcorr.patchify(grid[0], coords, P//2).view(b, -1, 3, P, P) + + index = torch.arange(n, device="cuda").view(n, 1) + index = index.repeat(1, patches_per_image).reshape(-1) + + if return_color: + return fmap, gmap, imap, patches, index, clr + + return fmap, gmap, imap, patches, index + + +class CorrBlock: + def __init__(self, fmap, gmap, radius=3, dropout=0.2, levels=[1,4]): + self.dropout = dropout + self.radius = radius + self.levels = levels + + self.gmap = gmap + self.pyramid = pyramidify(fmap, lvls=levels) + + def __call__(self, ii, jj, coords): + corrs = [] + for i in range(len(self.levels)): + corrs += [ altcorr.corr(self.gmap, self.pyramid[i], coords / self.levels[i], ii, jj, self.radius, self.dropout) ] + return torch.stack(corrs, -1).view(1, len(ii), -1) + + +class VONet(nn.Module): + def __init__(self, use_viewer=False): + super(VONet, self).__init__() + self.P = 3 + self.patchify = Patchifier(self.P) + self.update = Update(self.P) + + self.DIM = DIM + self.RES = 4 + + + @autocast(enabled=False) + def forward(self, images, poses, disps, intrinsics, M=1024, STEPS=12, P=1, structure_only=False, rescale=False): + """ Estimates SE3 or Sim3 between pair of frames """ + + images = 2 * (images / 255.0) - 0.5 + intrinsics = intrinsics / 4.0 + disps = disps[:, :, 1::4, 1::4].float() + + fmap, gmap, imap, patches, ix = self.patchify(images, disps=disps) + + corr_fn = CorrBlock(fmap, gmap) + + b, N, c, h, w = fmap.shape + p = self.P + + patches_gt = patches.clone() + Ps = poses + + d = patches[..., 2, p//2, p//2] + patches = set_depth(patches, torch.rand_like(d)) + + kk, jj = flatmeshgrid(torch.where(ix < 8)[0], torch.arange(0,8, device="cuda")) + ii = ix[kk] + + imap = imap.view(b, -1, DIM) + net = torch.zeros(b, len(kk), DIM, device="cuda", dtype=torch.float) + + Gs = SE3.IdentityLike(poses) + + if structure_only: + Gs.data[:] = poses.data[:] + + traj = [] + bounds = [-64, -64, w + 64, h + 64] + + while len(traj) < STEPS: + Gs = Gs.detach() + patches = patches.detach() + + n = ii.max() + 1 + if len(traj) >= 8 and n < images.shape[1]: + if not structure_only: Gs.data[:,n] = Gs.data[:,n-1] + kk1, jj1 = flatmeshgrid(torch.where(ix < n)[0], torch.arange(n, n+1, device="cuda")) + kk2, jj2 = flatmeshgrid(torch.where(ix == n)[0], torch.arange(0, n+1, device="cuda")) + + ii = torch.cat([ix[kk1], ix[kk2], ii]) + jj = torch.cat([jj1, jj2, jj]) + kk = torch.cat([kk1, kk2, kk]) + + net1 = torch.zeros(b, len(kk1) + len(kk2), DIM, device="cuda") + net = torch.cat([net1, net], dim=1) + + if np.random.rand() < 0.1: + k = (ii != (n - 4)) & (jj != (n - 4)) + ii = ii[k] + jj = jj[k] + kk = kk[k] + net = net[:,k] + + patches[:,ix==n,2] = torch.median(patches[:,(ix == n-1) | (ix == n-2),2]) + n = ii.max() + 1 + + coords = pops.transform(Gs, patches, intrinsics, ii, jj, kk) + coords1 = coords.permute(0, 1, 4, 2, 3).contiguous() + + corr = corr_fn(kk, jj, coords1) + net, (delta, weight, _) = self.update(net, imap[:,kk], corr, None, ii, jj, kk) + + lmbda = 1e-4 + target = coords[...,p//2,p//2,:] + delta + + ep = 10 + for itr in range(2): + Gs, patches = BA(Gs, patches, intrinsics, target, weight, lmbda, ii, jj, kk, + bounds, ep=ep, fixedp=1, structure_only=structure_only) + + kl = torch.as_tensor(0) + dij = (ii - jj).abs() + k = (dij > 0) & (dij <= 2) + + coords = pops.transform(Gs, patches, intrinsics, ii[k], jj[k], kk[k]) + coords_gt, valid, _ = pops.transform(Ps, patches_gt, intrinsics, ii[k], jj[k], kk[k], jacobian=True) + + traj.append((valid, coords, coords_gt, Gs[:,:n], Ps[:,:n], kl)) + + return traj + diff --git a/mini_dpvo/plot_utils.py b/mini_dpvo/plot_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d4676a07fb301578b6207b31d0e2cd29915a28da --- /dev/null +++ b/mini_dpvo/plot_utils.py @@ -0,0 +1,52 @@ +from copy import deepcopy + +import matplotlib.pyplot as plt +import numpy as np +from evo.core import sync +from evo.core.trajectory import PoseTrajectory3D +# from evo.tools import plot +from pathlib import Path + + +def make_traj(args) -> PoseTrajectory3D: + if isinstance(args, tuple): + traj, tstamps = args + return PoseTrajectory3D(positions_xyz=traj[:,:3], orientations_quat_wxyz=traj[:,3:], timestamps=tstamps) + assert isinstance(args, PoseTrajectory3D), type(args) + return deepcopy(args) + +def best_plotmode(traj): + _, i1, i2 = np.argsort(np.var(traj.positions_xyz, axis=0)) + plot_axes = "xyz"[i2] + "xyz"[i1] + return getattr(plot.PlotMode, plot_axes) + +def plot_trajectory(pred_traj, gt_traj=None, title="", filename="", align=True, correct_scale=True): + pred_traj = make_traj(pred_traj) + + if gt_traj is not None: + gt_traj = make_traj(gt_traj) + gt_traj, pred_traj = sync.associate_trajectories(gt_traj, pred_traj) + + if align: + pred_traj.align(gt_traj, correct_scale=correct_scale) + + plot_collection = plot.PlotCollection("PlotCol") + fig = plt.figure(figsize=(8, 8)) + plot_mode = best_plotmode(gt_traj if (gt_traj is not None) else pred_traj) + ax = plot.prepare_axis(fig, plot_mode) + ax.set_title(title) + if gt_traj is not None: + plot.traj(ax, plot_mode, gt_traj, '--', 'gray', "Ground Truth") + plot.traj(ax, plot_mode, pred_traj, '-', 'blue', "Predicted") + plot_collection.add_figure("traj (error)", fig) + plot_collection.export(filename, confirm_overwrite=False) + plt.close(fig=fig) + print(f"Saved {filename}") + +def save_trajectory_tum_format(traj, filename): + traj = make_traj(traj) + tostr = lambda a: ' '.join(map(str, a)) + with Path(filename).open('w') as f: + for i in range(traj.num_poses): + f.write(f"{traj.timestamps[i]} {tostr(traj.positions_xyz[i])} {tostr(traj.orientations_quat_wxyz[i][[1,2,3,0]])}\n") + print(f"Saved {filename}") diff --git a/mini_dpvo/projective_ops.py b/mini_dpvo/projective_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..beb6610d4e169b03693be89a0abe670c65eccac8 --- /dev/null +++ b/mini_dpvo/projective_ops.py @@ -0,0 +1,121 @@ +import torch +import torch.nn.functional as F + +from .lietorch import SE3, Sim3 + +MIN_DEPTH = 0.2 + +def extract_intrinsics(intrinsics): + return intrinsics[...,None,None,:].unbind(dim=-1) + +def coords_grid(ht, wd, **kwargs): + y, x = torch.meshgrid( + torch.arange(ht).to(**kwargs).float(), + torch.arange(wd).to(**kwargs).float()) + + return torch.stack([x, y], dim=-1) + + +def iproj(patches, intrinsics): + """ inverse projection """ + x, y, d = patches.unbind(dim=2) + fx, fy, cx, cy = intrinsics[...,None,None].unbind(dim=2) + + i = torch.ones_like(d) + xn = (x - cx) / fx + yn = (y - cy) / fy + + X = torch.stack([xn, yn, i, d], dim=-1) + return X + + +def proj(X, intrinsics, depth=False): + """ projection """ + + X, Y, Z, W = X.unbind(dim=-1) + fx, fy, cx, cy = intrinsics[...,None,None].unbind(dim=2) + + # d = 0.01 * torch.ones_like(Z) + # d[Z > 0.01] = 1.0 / Z[Z > 0.01] + # d = torch.ones_like(Z) + # d[Z.abs() > 0.1] = 1.0 / Z[Z.abs() > 0.1] + + d = 1.0 / Z.clamp(min=0.1) + x = fx * (d * X) + cx + y = fy * (d * Y) + cy + + if depth: + return torch.stack([x, y, d], dim=-1) + + return torch.stack([x, y], dim=-1) + + +def transform(poses, patches, intrinsics, ii, jj, kk, depth=False, valid=False, jacobian=False, tonly=False): + """ projective transform """ + + # backproject + X0 = iproj(patches[:,kk], intrinsics[:,ii]) + + # transform + Gij = poses[:, jj] * poses[:, ii].inv() + + if tonly: + Gij[...,3:] = torch.as_tensor([0,0,0,1], device=Gij.device) + + X1 = Gij[:,:,None,None] * X0 + + # project + x1 = proj(X1, intrinsics[:,jj], depth) + + + if jacobian: + p = X1.shape[2] + X, Y, Z, H = X1[...,p//2,p//2,:].unbind(dim=-1) + o = torch.zeros_like(H) + i = torch.zeros_like(H) + + fx, fy, cx, cy = intrinsics[:,jj].unbind(dim=-1) + + d = torch.zeros_like(Z) + d[Z.abs() > 0.2] = 1.0 / Z[Z.abs() > 0.2] + + Ja = torch.stack([ + H, o, o, o, Z, -Y, + o, H, o, -Z, o, X, + o, o, H, Y, -X, o, + o, o, o, o, o, o, + ], dim=-1).view(1, len(ii), 4, 6) + + Jp = torch.stack([ + fx*d, o, -fx*X*d*d, o, + o, fy*d, -fy*Y*d*d, o, + ], dim=-1).view(1, len(ii), 2, 4) + + Jj = torch.matmul(Jp, Ja) + Ji = -Gij[:,:,None].adjT(Jj) + + Jz = torch.matmul(Jp, Gij.matrix()[...,:,3:]) + + return x1, (Z > 0.2).float(), (Ji, Jj, Jz) + + if valid: + return x1, (X1[...,2] > 0.2).float() + + return x1 + +def point_cloud(poses, patches, intrinsics, ix): + """ generate point cloud from patches """ + return poses[:,ix,None,None].inv() * iproj(patches, intrinsics[:,ix]) + + +def flow_mag(poses, patches, intrinsics, ii, jj, kk, beta=0.3): + """ projective transform """ + + coords0 = transform(poses, patches, intrinsics, ii, ii, kk) + coords1 = transform(poses, patches, intrinsics, ii, jj, kk, tonly=False) + coords2 = transform(poses, patches, intrinsics, ii, jj, kk, tonly=True) + + flow1 = (coords1 - coords0).norm(dim=-1) + flow2 = (coords2 - coords0).norm(dim=-1) + + return beta * flow1 + (1-beta) * flow2 diff --git a/mini_dpvo/stream.py b/mini_dpvo/stream.py new file mode 100644 index 0000000000000000000000000000000000000000..74ea7cee7dfacdba8f6aff54f26c1c52430b5ddc --- /dev/null +++ b/mini_dpvo/stream.py @@ -0,0 +1,92 @@ +import cv2 +import numpy as np +from pathlib import Path +from itertools import chain +from multiprocessing import Queue + + +def image_stream( + queue: Queue, imagedir: str, calib: str, stride: int, skip: int = 0 +) -> None: + """image generator""" + + calib = np.loadtxt(calib, delimiter=" ") + fx, fy, cx, cy = calib[:4] + + K = np.eye(3) + K[0, 0] = fx + K[0, 2] = cx + K[1, 1] = fy + K[1, 2] = cy + + img_exts = ["*.png", "*.jpeg", "*.jpg"] + image_list = sorted(chain.from_iterable(Path(imagedir).glob(e) for e in img_exts))[ + skip::stride + ] + + for t, imfile in enumerate(image_list): + image = cv2.imread(str(imfile)) + if len(calib) > 4: + image = cv2.undistort(image, K, calib[4:]) + + if 0: + image = cv2.resize(image, None, fx=0.5, fy=0.5) + intrinsics = np.array([fx / 2, fy / 2, cx / 2, cy / 2]) + + else: + intrinsics = np.array([fx, fy, cx, cy]) + + h, w, _ = image.shape + image = image[: h - h % 16, : w - w % 16] + + queue.put((t, image, intrinsics)) + + queue.put((-1, image, intrinsics)) + + +def video_stream( + queue: Queue, imagedir: str, calib: str, stride: int, skip: int = 0 +) -> None: + """video generator""" + + calib = np.loadtxt(calib, delimiter=" ") + fx, fy, cx, cy = calib[:4] + + K = np.eye(3) + K[0, 0] = fx + K[0, 2] = cx + K[1, 1] = fy + K[1, 2] = cy + + cap = cv2.VideoCapture(imagedir) + + t = 0 + + for _ in range(skip): + ret, image = cap.read() + + while True: + # Capture frame-by-frame + for _ in range(stride): + ret, image = cap.read() + # if frame is read correctly ret is True + if not ret: + break + + if not ret: + break + + if len(calib) > 4: + image = cv2.undistort(image, K, calib[4:]) + + image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA) + h, w, _ = image.shape + image = image[: h - h % 16, : w - w % 16] + + intrinsics = np.array([fx * 0.5, fy * 0.5, cx * 0.5, cy * 0.5]) + queue.put((t, image, intrinsics)) + + t += 1 + + queue.put((-1, image, intrinsics)) + cap.release() diff --git a/mini_dpvo/utils.py b/mini_dpvo/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2ab5193591b6f1d69b71102dd0faa1717bbef925 --- /dev/null +++ b/mini_dpvo/utils.py @@ -0,0 +1,92 @@ +import torch +import torch.nn.functional as F + + +all_times = [] + + +class Timer: + def __init__(self, name: str, enabled: bool = True): + self.name = name + self.enabled = enabled + + if self.enabled: + self.start = torch.cuda.Event(enable_timing=True) + self.end = torch.cuda.Event(enable_timing=True) + + def __enter__(self): + if self.enabled: + self.start.record() + + def __exit__(self, type, value, traceback): + global all_times + if self.enabled: + self.end.record() + torch.cuda.synchronize() + + elapsed = self.start.elapsed_time(self.end) + all_times.append(elapsed) + print(f"{self.name}: {elapsed:.2f}ms") + + +def coords_grid(b, n, h, w, **kwargs): + """coordinate grid""" + x = torch.arange(0, w, dtype=torch.float, **kwargs) + y = torch.arange(0, h, dtype=torch.float, **kwargs) + coords = torch.stack(torch.meshgrid(y, x, indexing="ij")) + return coords[[1, 0]].view(1, 1, 2, h, w).repeat(b, n, 1, 1, 1) + + +def coords_grid_with_index(d, **kwargs): + """coordinate grid with frame index""" + b, n, h, w = d.shape + i = torch.ones_like(d) + x = torch.arange(0, w, dtype=torch.float, **kwargs) + y = torch.arange(0, h, dtype=torch.float, **kwargs) + + y, x = torch.stack(torch.meshgrid(y, x, indexing="ij")) + y = y.view(1, 1, h, w).repeat(b, n, 1, 1) + x = x.view(1, 1, h, w).repeat(b, n, 1, 1) + + coords = torch.stack([x, y, d], dim=2) + index = torch.arange(0, n, dtype=torch.float, **kwargs) + index = index.view(1, n, 1, 1, 1).repeat(b, 1, 1, h, w) + + return coords, index + + +def patchify(x, patch_size=3): + """extract patches from video""" + b, n, c, h, w = x.shape + x = x.view(b * n, c, h, w) + y = F.unfold(x, patch_size) + y = y.transpose(1, 2) + return y.reshape(b, -1, c, patch_size, patch_size) + + +def pyramidify(fmap, lvls=[1]): + """turn fmap into a pyramid""" + b, n, c, h, w = fmap.shape + + pyramid = [] + for lvl in lvls: + gmap = F.avg_pool2d(fmap.view(b * n, c, h, w), lvl, stride=lvl) + pyramid += [gmap.view(b, n, c, h // lvl, w // lvl)] + + return pyramid + + +def all_pairs_exclusive(n, **kwargs): + ii, jj = torch.meshgrid(torch.arange(n, **kwargs), torch.arange(n, **kwargs)) + k = ii != jj + return ii[k].reshape(-1), jj[k].reshape(-1) + + +def set_depth(patches, depth): + patches[..., 2, :, :] = depth[..., None, None] + return patches + + +def flatmeshgrid(*args, **kwargs): + grid = torch.meshgrid(*args, **kwargs) + return (x.reshape(-1) for x in grid) diff --git a/pixi.toml b/pixi.toml index e4089996b28e8b7817b39b2c6b0de4ae8c115bb8..1f250f8d5cc00f44f398a208b3a296982dab3dec 100644 --- a/pixi.toml +++ b/pixi.toml @@ -10,7 +10,17 @@ platforms = ["linux-64"] libc = { family="glibc", version="2.31" } [tasks] -app = "python pixi_app.py" +download-model = """ + test -e checkpoints/dpvo.pth + || ( + wget https://www.dropbox.com/s/nap0u8zslspdwm4/models.zip + && unzip models.zip -d checkpoints + ) +""" +post-install = {cmd="python -m pip install -e .", depends_on=["download-model"]} +old-app = "python pixi_app.py" +app = {cmd="python tools/app.py", depends_on=["post-install"], outputs=["mini_dpvo.egg-info/PKG-INFO"]} + [dependencies] python = "3.11.*" diff --git a/tools/app.py b/tools/app.py new file mode 100644 index 0000000000000000000000000000000000000000..9fab861f9c3bb310558162af8014b7413bf2601b --- /dev/null +++ b/tools/app.py @@ -0,0 +1,148 @@ +import gradio as gr + +# import spaces +from gradio_rerun import Rerun +import rerun as rr +import rerun.blueprint as rrb +from pathlib import Path +import uuid +import mmcv +import spaces + +from mini_dpvo.api.inference import run +from mini_dpvo.config import cfg as base_cfg + +base_cfg.merge_from_file("config/fast.yaml") +base_cfg.BUFFER_SIZE = 2048 + + +def create_blueprint(image_name_list: list[str], log_path: Path) -> rrb.Blueprint: + # dont show 2d views if there are more than 4 images as to not clutter the view + if len(image_name_list) > 4: + blueprint = rrb.Blueprint( + rrb.Horizontal( + rrb.Spatial3DView(origin=f"{log_path}"), + ), + collapse_panels=True, + ) + else: + blueprint = rrb.Blueprint( + rrb.Horizontal( + contents=[ + rrb.Spatial3DView(origin=f"{log_path}"), + rrb.Vertical( + contents=[ + rrb.Spatial2DView( + origin=f"{log_path}/camera_{i}/pinhole/", + contents=[ + "+ $origin/**", + ], + ) + for i in range(len(image_name_list)) + ] + ), + ], + column_shares=[3, 1], + ), + collapse_panels=True, + ) + return blueprint + + +@spaces.GPU +def predict(video_file_path: str, stride: int) -> tuple[str, str]: + # check if is list or string and if not raise error + if not isinstance(video_file_path, str): + raise gr.Error( + f"Something is wrong with your input video, got: {type(video_file_path)}" + ) + + uuid_str = str(uuid.uuid4()) + filename = Path(f"/tmp/gradio/{uuid_str}.rrd") + if not filename.parent.exists(): + filename.parent.mkdir(parents=True) + rr.init(f"{uuid_str}") + + calib_path = "data/calib/iphone.txt" + if not Path(calib_path).exists(): + gr.Error(f"Calibration file not found at {calib_path}") + + dpvo_pred, time_taken = run( + cfg=base_cfg, + network_path="checkpoints/dpvo.pth", + imagedir=video_file_path, + calib="data/calib/iphone.txt", + stride=stride, + skip=0, + vis_during=True, + ) + + # blueprint: rrb.Blueprint = create_blueprint(image_name_list, log_path) + # rr.send_blueprint(blueprint) + + rr.set_time_sequence("sequence", 0) + # log_optimized_result(optimized_results, log_path) + rr.save(filename.as_posix()) + return filename.as_posix(), f"Total time: {time_taken:.2f}s" + + +def on_file_upload(video_file_path: str) -> None: + video_reader = mmcv.VideoReader(video_file_path) + video_info = f""" + **Video Info:** + - Number of Frames: {video_reader.frame_cnt} + - FPS: {round(video_reader.fps)} + """ + return video_info + + +with gr.Blocks( + css=""".gradio-container {margin: 0 !important; min-width: 100%};""", + title="Mini-DPVO Demo", +) as demo: + # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference + gr.HTML('

Mini-DPVO Demo

') + gr.HTML( + '

Unofficial DPVO demo using the mini-dpvo pip package

' + ) + gr.HTML( + '

Learn more about mini-dpvo here here

' + ) + with gr.Tab(label="Video Input"): + with gr.Column(): + with gr.Row(): + video_input = gr.File( + height=300, + file_count="single", + file_types=[".mp4", ".mov"], + label="Video", + ) + with gr.Column(): + video_info = gr.Markdown( + value=""" + **Video Info:** + """ + ) + time_taken = gr.Textbox(label="Time Taken") + with gr.Accordion(label="Advanced", open=False): + stride = gr.Slider( + label="Stride", + minimum=1, + maximum=5, + step=1, + value=2, + ) + run_btn_single = gr.Button("Run") + rerun_viewer_single = Rerun(height=900) + run_btn_single.click( + fn=predict, + inputs=[video_input, stride], + outputs=[rerun_viewer_single, time_taken], + ) + + video_input.upload( + fn=on_file_upload, inputs=[video_input], outputs=[video_info] + ) + + +demo.launch(share=False) diff --git a/tools/demo.py b/tools/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..4cf15ad605b2ce4f7690e8b5e908778c671078b2 --- /dev/null +++ b/tools/demo.py @@ -0,0 +1,36 @@ +from argparse import ArgumentParser +import rerun as rr +from mini_dpvo.api.inference import run +from mini_dpvo.config import cfg as base_cfg + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--network-path", type=str, default="checkpoints/dpvo.pth") + parser.add_argument("--imagedir", type=str) + parser.add_argument("--calib", type=str) + parser.add_argument("--stride", type=int, default=2) + parser.add_argument("--skip", type=int, default=0) + parser.add_argument("--buffer", type=int, default=2048) + parser.add_argument("--config", default="config/default.yaml") + parser.add_argument("--vis-during", action="store_true") + rr.script_add_args(parser) + args = parser.parse_args() + rr.script_setup(args, "mini_dpvo") + + base_cfg.merge_from_file(args.config) + base_cfg.BUFFER_SIZE = args.buffer + + print("Running with config...") + print(base_cfg) + + run( + base_cfg, + args.network_path, + args.imagedir, + args.calib, + args.stride, + args.skip, + vis_during=args.vis_during, + ) + rr.script_teardown(args)