Spaces:

kazuto1011
/

r2dm

Running

App Files Files Community

kazuto1011 commited on May 8

Commit

059842e

•

1 Parent(s): 365f709

update

Browse files

Files changed (4) hide show

app.py +127 -93
rendering.py +96 -0
requirements.txt +0 -1
style.css +15 -0

app.py CHANGED Viewed

@@ -1,12 +1,60 @@
 import einops
 import gradio as gr
 import matplotlib.cm as cm
-import matplotlib.pyplot as plt
 import numpy as np
 import plotly.graph_objects as go
 import torch
 import torch.nn.functional as F
 if torch.cuda.is_available():
     device = "cuda"
 elif torch.backends.mps.is_available():
@@ -15,16 +63,28 @@ else:
     device = "cpu"
 torch.set_grad_enabled(False)
 device = torch.device(device)
-ddpm, lidar_utils, _ = torch.hub.load(
-    "kazuto1011/r2dm",
-    "pretrained_r2dm",
-    device=device,
-)
-def colorize(tensor, cmap_fn=cm.turbo):
     colors = cmap_fn(np.linspace(0, 1, 256))[:, :3]
     colors = torch.from_numpy(colors).to(tensor)
     tensor = tensor.squeeze(1) if tensor.ndim == 4 else tensor
@@ -34,42 +94,37 @@ def colorize(tensor, cmap_fn=cm.turbo):
     return tensor
-def render_point_cloud(output, cmap):
-    output = lidar_utils.denormalize(output.clamp(-1, 1))
-    depth = lidar_utils.revert_depth(output[:, [0]])
-    rflct = output[:, [1]]
-    point = lidar_utils.to_xyz(depth).cpu().numpy()
-    point = einops.rearrange(point, "1 c h w -> c (h w)")
-    # angle = lidar_utils.ray_angles.rad2deg()
     fig = go.Figure(
         data=[
             go.Scatter3d(
-                x=-point[0],
-                y=-point[1],
-                z=point[2],
                 mode="markers",
-                marker=dict(
-                    size=1,
-                    color=point[2],
-                    colorscale="viridis",
-                    autocolorscale=False,
-                    cauto=False,
-                    cmin=-2,
-                    cmax=0.5,
-                ),
-                # text=[
-                #     f"depth: {float(d):.2f}m<br>"
-                #     + f"reflectance: {float(r):.2f}<br>"
-                #     + f"elevation: {float(e):.2f}°<br>"
-                #     + f"azimuth: {float(a):.2f}°"
-                #     for d, r, e, a in zip(
-                #         einops.rearrange(depth, "1 1 h w -> (h w)"),
-                #         einops.rearrange(rflct, "1 1 h w -> (h w)"),
-                #         einops.rearrange(angle[0, 0], "h w -> (h w)"),
-                #         einops.rearrange(angle[0, 1], "h w -> (h w)"),
-                #     )
-                # ],
-                # hoverinfo="text",
             )
         ],
         layout=dict(
@@ -85,71 +140,50 @@ def render_point_cloud(output, cmap):
         ),
     )
     depth = depth / lidar_utils.max_depth
-    depth = colorize(depth, cmap)[0].permute(1, 2, 0).cpu().numpy()
-    rflct = colorize(rflct, cmap)[0].permute(1, 2, 0).cpu().numpy()
-    return depth, rflct, fig
-def generate(num_steps, cmap_name, progress=gr.Progress()):
-    num_steps = int(num_steps)
-    x = ddpm.randn(1, *ddpm.sampling_shape, device=ddpm.device)
-    steps = torch.linspace(1.0, 0.0, num_steps + 1, device=ddpm.device)[None]
-    for i in progress.tqdm(range(num_steps), desc="Generating LiDAR data"):
-        step_t = steps[:, i]
-        step_s = steps[:, i + 1]
-        x = ddpm.p_step(x, step_t, step_s)
-    return render_point_cloud(x, plt.colormaps.get_cmap(cmap_name))
-with gr.Blocks() as demo:
-    gr.Markdown(
-        """
-        # R2DM
-        > **LiDAR Data Synthesis with Denoising Diffusion Probabilistic Models**<br>
-        Kazuto Nakashima, Ryo Kurazume<br>
-        ICRA 2024<br>
-        [[Project]](https://kazuto1011.github.io/r2dm/) [[arXiv]](https://arxiv.org/abs/2309.09256) [[Code]](https://github.com/kazuto1011/r2dm)
-        R2DM is a denoising diffusion probabilistic model (DDPM) for LiDAR range/reflectance generation based on the equirectangular representation.
-        """
-    )
-    with gr.Row():
         with gr.Column():
-            gr.Textbox(device, label="Device")
-            num_steps = gr.Dropdown(
-                choices=[2**i for i in range(2, 10)],
-                value=16,
-                label="number of sampling steps (>256 is recommended)",
             )
-            cmap_name = gr.Dropdown(
-                choices=plt.colormaps(),
-                value="turbo",
-                label="colormap for range/reflectance images",
             )
-            btn = gr.Button(value="Generate random samples")
         with gr.Column():
-            range_view = gr.Image(
-                type="numpy",
-                image_mode="RGB",
-                label="Range image",
-                scale=1,
-            )
-            rflct_view = gr.Image(
-                type="numpy",
-                image_mode="RGB",
-                label="Reflectance image",
-                scale=1,
-            )
-            point_view = gr.Plot(
-                label="Point cloud",
-                scale=1,
-            )
     btn.click(
         generate,
-        inputs=[num_steps, cmap_name],
         outputs=[range_view, rflct_view, point_view],
     )

 import einops
 import gradio as gr
 import matplotlib.cm as cm
 import numpy as np
 import plotly.graph_objects as go
 import torch
 import torch.nn.functional as F
+from rendering import estimate_surface_normal
+DESCRIPTION = """
+<div class="head">
+<div class="title">LiDAR Data Synthesis with Denoising Diffusion Probabilistic Models</div>
+<div class="authors">
+<a href="https://kazuto1011.github.io/" target="_blank" rel="noopener"> Kazuto Nakashima</a>
+&nbsp;&nbsp;&nbsp;
+<a href="https://robotics.ait.kyushu-u.ac.jp/kurazume/en/" target="_blank" rel="noopener"> Ryo Kurazume</a>
+</div>
+<div class="affiliations">Kyushu University</div>
+<div class="conference">ICRA 2024</div>
+<div class="materials">
+<a href="https://kazuto1011.github.io/r2dm">Project</a> |
+<a href="https://arxiv.org/abs/2309.09256">Paper</a> |
+<a href="https://github.com/kazuto1011/r2dm">Code</a>
+</div>
+<br>
+<div class="description">
+This is a demo of our paper "LiDAR Data Synthesis with Denoising Diffusion Probabilistic Models" presented at ICRA 2024.<br>
+We propose <strong>R2DM</strong>, a continuous-time diffusion model for LiDAR data generation based on the equirectangular range/reflectance image representation.<br>
+</div>
+<br>
+</div>
+"""
+RUN_LOCALLY = """
+To run this demo locally:
+```bash
+git clone https://huggingface.co/spaces/kazuto1011/r2dm
+```
+```bash
+cd r2dm
+```
+```bash
+pip install -r requirements.txt
+```
+```bash
+pip install gradio
+```
+```bash
+gradio app.py
+```
+"""
+THEME = gr.themes.Default(font=gr.themes.GoogleFont("Titillium Web"))
 if torch.cuda.is_available():
     device = "cuda"
 elif torch.backends.mps.is_available():
     device = "cpu"
 torch.set_grad_enabled(False)
+torch.backends.cudnn.benchmark = True
 device = torch.device(device)
+model_dict = {
+    "KITTI Raw (64x512)": torch.hub.load(
+        "kazuto1011/r2dm",
+        "pretrained_r2dm",
+        config="r2dm-h-kittiraw-300k",
+        device="cpu",
+        show_info=False,
+    ),
+    "KITTI-360 (64x1024)": torch.hub.load(
+        "kazuto1011/r2dm",
+        "pretrained_r2dm",
+        config="r2dm-h-kitti360-300k",
+        device="cpu",
+        show_info=False,
+    ),
+}
+def colorize(tensor: torch.Tensor, cmap_fn=cm.turbo):
     colors = cmap_fn(np.linspace(0, 1, 256))[:, :3]
     colors = torch.from_numpy(colors).to(tensor)
     tensor = tensor.squeeze(1) if tensor.ndim == 4 else tensor
     return tensor
+def generate(num_steps: int, sampling_mode: str, dataset: str, progress=gr.Progress()):
+    # model setup
+    model, lidar_utils, _ = model_dict[dataset]
+    model.to(device)
+    lidar_utils.to(device)
+    # sampling
+    num_steps = int(num_steps)
+    x = model.randn(1, *model.sampling_shape, device=model.device)
+    steps = torch.linspace(1.0, 0.0, num_steps + 1, device=model.device)[None]
+    for i in progress.tqdm(range(num_steps), desc="Generating LiDAR data"):
+        step_t = steps[:, i]
+        step_s = steps[:, i + 1]
+        x = model.p_step(x, step_t, step_s, mode=sampling_mode.lower())
+    # rendering point cloud
+    x = lidar_utils.denormalize(x.clamp(-1, 1))
+    depth = lidar_utils.revert_depth(x[:, [0]])
+    rflct = x[:, [1]]
+    point = lidar_utils.to_xyz(depth)
+    color = (-estimate_surface_normal(point) + 1) / 2
+    point = einops.rearrange(point, "1 c h w -> (h w) c").cpu().numpy()
+    color = einops.rearrange(color, "1 c h w -> (h w) c").cpu().numpy()
     fig = go.Figure(
         data=[
             go.Scatter3d(
+                x=-point[..., 0],
+                y=-point[..., 1],
+                z=point[..., 2],
                 mode="markers",
+                marker=dict(size=1, color=color),
             )
         ],
         layout=dict(
         ),
     )
     depth = depth / lidar_utils.max_depth
+    depth = colorize(depth, cm.turbo)[0].permute(1, 2, 0).cpu().numpy()
+    rflct = colorize(rflct, cm.turbo)[0].permute(1, 2, 0).cpu().numpy()
+    model.cpu()
+    lidar_utils.cpu()
+    return depth, rflct, fig
+with gr.Blocks(css="./style.css", theme=THEME) as demo:
+    gr.HTML(DESCRIPTION)
+    with gr.Row(variant="panel"):
         with gr.Column():
+            gr.Textbox(device, label="Running device")
+            dataset = gr.Dropdown(
+                choices=list(model_dict.keys()),
+                value=list(model_dict.keys())[0],
+                label="Dataset",
             )
+            sampling_mode = gr.Dropdown(
+                choices=["DDPM", "DDIM"],
+                value="DDPM",
+                label="Sampler",
             )
+            num_steps = gr.Dropdown(
+                choices=[2**i for i in range(5, 11)],
+                value=32,
+                label="Number of sampling steps (>256 is recommended)",
+            )
+            btn = gr.Button(value="Generate")
         with gr.Column():
+            range_view = gr.Image(type="numpy", label="Range image")
+            rflct_view = gr.Image(type="numpy", label="Reflectance image")
+            point_view = gr.Plot(label="Point cloud")
+    with gr.Row(variant="panel"):
+        gr.Markdown(RUN_LOCALLY)
     btn.click(
         generate,
+        inputs=[num_steps, sampling_mode, dataset],
         outputs=[range_view, rflct_view, point_view],
+        concurrency_limit=1,
     )

rendering.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+import torch.nn.functional as F
+def estimate_surface_normal(
+    points: torch.Tensor, d: int = 2, mode: str = "closest"
+) -> torch.Tensor:
+    # estimate surface normal from coordinated point clouds
+    # re-implemented the following codes with pytorch:
+    # https://github.com/wkentaro/morefusion/blob/master/morefusion/geometry/estimate_pointcloud_normals.py
+    # https://github.com/jmccormac/pySceneNetRGBD/blob/master/calculate_surface_normals.py
+    assert points.dim() == 4, f"expected (B,3,H,W), but got {points.shape}"
+    B, C, H, W = points.shape
+    assert C == 3, f"expected C==3, but got {C}"
+    device = points.device
+    # points = F.pad(points, (0, 0, d, d), mode="constant", value=float("inf"))
+    points = F.pad(points, (0, 0, d, d), mode="replicate")
+    points = F.pad(points, (d, d, 0, 0), mode="circular")
+    points = points.permute(0, 2, 3, 1)  # (B,H,W,3)
+    # 8 adjacent offsets
+    #  -----------
+    # | 7 | 6 | 5 |
+    #  -----------
+    # | 0 |   | 4 |
+    #  -----------
+    # | 1 | 2 | 3 |
+    #  -----------
+    offsets = torch.tensor(
+        [
+            # (dh,dw)
+            (-d, 0),  # 0
+            (-d, d),  # 1
+            (0, d),  # 2
+            (d, d),  # 3
+            (d, 0),  # 4
+            (d, -d),  # 5
+            (0, -d),  # 6
+            (-d, -d),  # 7
+        ],
+        device=device,
+    )
+    # (B,H,W) indices
+    b = torch.arange(B, device=device)[:, None, None]
+    h = torch.arange(H, device=device)[None, :, None]
+    w = torch.arange(W, device=device)[None, None, :]
+    k = torch.arange(8, device=device)
+    # anchor points
+    b1 = b[:, None]  # (B,1,1,1)
+    h1 = h[:, None] + d  # (1,1,H,1)
+    w1 = w[:, None] + d  # (1,1,1,W)
+    anchors = points[b1, h1, w1]  # (B,H,W,3) -> (B,1,H,W,3)
+    # neighbor points
+    offset = offsets[k]  # (8,2)
+    b2 = b1
+    h2 = h1 + offset[None, :, 0, None, None]  # (1,8,H,1)
+    w2 = w1 + offset[None, :, 1, None, None]  # (1,8,1,W)
+    points1 = points[b2, h2, w2]  # (B,8,H,W,3)
+    # anothor neighbor points
+    offset = offsets[(k + 2) % 8]
+    b3 = b1
+    h3 = h1 + offset[None, :, 0, None, None]
+    w3 = w1 + offset[None, :, 1, None, None]
+    points2 = points[b3, h3, w3]  # (B,8,H,W,3)
+    if mode == "closest":
+        # find the closest neighbor pair
+        diff = torch.norm(points1 - anchors, dim=4)
+        diff = diff + torch.norm(points2 - anchors, dim=4)
+        i = torch.argmin(diff, dim=1)  # (B,H,W)
+        # get normals by cross product
+        anchors = anchors[b, 0, h, w]  # (B,H,W,3)
+        points1 = points1[b, i, h, w]  # (B,H,W,3)
+        points2 = points2[b, i, h, w]  # (B,H,W,3)
+        vector1 = points1 - anchors
+        vector2 = points2 - anchors
+        normals = torch.cross(vector1, vector2, dim=-1)  # (B,H,W,3)
+    elif mode == "mean":
+        # get normals by cross product
+        vector1 = points1 - anchors
+        vector2 = points2 - anchors
+        normals = torch.cross(vector1, vector2, dim=-1)  # (B,8,H,W,3)
+        normals = normals.mean(dim=1)  # (B,H,W,3)
+    else:
+        raise NotImplementedError(mode)
+    normals = normals / (torch.norm(normals, dim=3, keepdim=True) + 1e-8)
+    normals = normals.permute(0, 3, 1, 2)  # (B,3,H,W)
+    return normals

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 einops
-kornia
 matplotlib
 numpy
 torch

 einops
 matplotlib
 numpy
 torch

style.css ADDED Viewed

	@@ -0,0 +1,15 @@

+.head {
+  text-align: center;
+  display: block;
+  font-size: var(--text-xl);
+}
+.title {
+  font-size: var(--text-xxl);
+  font-weight: bold;
+  margin-top: 2rem;
+}
+.description {
+  font-size: var(--text-lg);
+}