Work commited on
Commit
206b602
1 Parent(s): 9997a4d

update from lgm to lgm hf

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. acc_configs/gpu1.yaml +0 -15
  3. acc_configs/gpu4.yaml +0 -15
  4. acc_configs/gpu6.yaml +0 -15
  5. acc_configs/gpu8.yaml +0 -15
  6. app.py +56 -25
  7. convert.py +0 -462
  8. core/__pycache__/__init__.cpython-39.pyc +0 -0
  9. core/__pycache__/attention.cpython-39.pyc +0 -0
  10. core/__pycache__/gs.cpython-39.pyc +0 -0
  11. core/__pycache__/models.cpython-39.pyc +0 -0
  12. core/__pycache__/options.cpython-39.pyc +0 -0
  13. core/__pycache__/provider_objaverse.cpython-39.pyc +0 -0
  14. core/__pycache__/unet.cpython-39.pyc +0 -0
  15. core/__pycache__/utils.cpython-39.pyc +0 -0
  16. core/models.py +7 -4
  17. core/options.py +7 -7
  18. core/unet.py +7 -7
  19. data_test/anya_rgba.png +0 -0
  20. data_test/bird.jpg +0 -0
  21. data_test/bird_rgba.png +0 -0
  22. data_test/boy.jpg +0 -0
  23. data_test/cat_statue.jpg +0 -0
  24. data_test/catstatue_rgba.png +0 -0
  25. data_test/dragontoy.jpg +0 -0
  26. data_test/frog_sweater.jpg +0 -0
  27. data_test/gso_rabbit.jpg +0 -0
  28. diff-gaussian-rasterization/.gitignore +7 -0
  29. diff-gaussian-rasterization/.gitmodules +3 -0
  30. diff-gaussian-rasterization/CMakeLists.txt +36 -0
  31. diff-gaussian-rasterization/LICENSE.md +83 -0
  32. diff-gaussian-rasterization/README.md +35 -0
  33. diff-gaussian-rasterization/cuda_rasterizer/auxiliary.h +175 -0
  34. diff-gaussian-rasterization/cuda_rasterizer/backward.cu +712 -0
  35. diff-gaussian-rasterization/cuda_rasterizer/backward.h +70 -0
  36. diff-gaussian-rasterization/cuda_rasterizer/config.h +19 -0
  37. diff-gaussian-rasterization/cuda_rasterizer/forward.cu +466 -0
  38. diff-gaussian-rasterization/cuda_rasterizer/forward.h +68 -0
  39. diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h +94 -0
  40. diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu +447 -0
  41. diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.h +73 -0
  42. diff-gaussian-rasterization/diff_gaussian_rasterization/__init__.py +224 -0
  43. diff-gaussian-rasterization/ext.cpp +19 -0
  44. diff-gaussian-rasterization/rasterize_points.cu +229 -0
  45. diff-gaussian-rasterization/rasterize_points.h +70 -0
  46. diff-gaussian-rasterization/setup.py +34 -0
  47. diff-gaussian-rasterization/third_party/glm/.appveyor.yml +92 -0
  48. diff-gaussian-rasterization/third_party/glm/.gitignore +61 -0
  49. diff-gaussian-rasterization/third_party/glm/.travis.yml +388 -0
  50. diff-gaussian-rasterization/third_party/glm/CMakeLists.txt +45 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
acc_configs/gpu1.yaml DELETED
@@ -1,15 +0,0 @@
1
- compute_environment: LOCAL_MACHINE
2
- debug: false
3
- distributed_type: 'NO'
4
- downcast_bf16: 'no'
5
- machine_rank: 0
6
- main_training_function: main
7
- mixed_precision: bf16
8
- num_machines: 1
9
- num_processes: 1
10
- rdzv_backend: static
11
- same_network: true
12
- tpu_env: []
13
- tpu_use_cluster: false
14
- tpu_use_sudo: false
15
- use_cpu: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acc_configs/gpu4.yaml DELETED
@@ -1,15 +0,0 @@
1
- compute_environment: LOCAL_MACHINE
2
- debug: false
3
- distributed_type: MULTI_GPU
4
- downcast_bf16: 'no'
5
- machine_rank: 0
6
- main_training_function: main
7
- mixed_precision: fp16
8
- num_machines: 1
9
- num_processes: 4
10
- rdzv_backend: static
11
- same_network: true
12
- tpu_env: []
13
- tpu_use_cluster: false
14
- tpu_use_sudo: false
15
- use_cpu: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acc_configs/gpu6.yaml DELETED
@@ -1,15 +0,0 @@
1
- compute_environment: LOCAL_MACHINE
2
- debug: false
3
- distributed_type: MULTI_GPU
4
- downcast_bf16: 'no'
5
- machine_rank: 0
6
- main_training_function: main
7
- mixed_precision: fp16
8
- num_machines: 1
9
- num_processes: 6
10
- rdzv_backend: static
11
- same_network: true
12
- tpu_env: []
13
- tpu_use_cluster: false
14
- tpu_use_sudo: false
15
- use_cpu: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acc_configs/gpu8.yaml DELETED
@@ -1,15 +0,0 @@
1
- compute_environment: LOCAL_MACHINE
2
- debug: false
3
- distributed_type: MULTI_GPU
4
- downcast_bf16: 'no'
5
- machine_rank: 0
6
- main_training_function: main
7
- mixed_precision: bf16
8
- num_machines: 1
9
- num_processes: 8
10
- rdzv_backend: static
11
- same_network: true
12
- tpu_env: []
13
- tpu_use_cluster: false
14
- tpu_use_sudo: false
15
- use_cpu: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import tyro
3
  import imageio
4
  import numpy as np
@@ -11,6 +13,12 @@ from safetensors.torch import load_file
11
  import rembg
12
  import gradio as gr
13
 
 
 
 
 
 
 
14
  import kiui
15
  from kiui.op import recenter
16
  from kiui.cam import orbit_camera
@@ -19,12 +27,26 @@ from core.options import AllConfigs, Options
19
  from core.models import LGM
20
  from mvdream.pipeline_mvdream import MVDreamPipeline
21
 
 
 
22
  IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
23
  IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
24
  GRADIO_VIDEO_PATH = 'gradio_output.mp4'
25
  GRADIO_PLY_PATH = 'gradio_output.ply'
26
 
27
- opt = tyro.cli(AllConfigs)
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # model
30
  model = LGM(opt)
@@ -45,7 +67,7 @@ model = model.half().to(device)
45
  model.eval()
46
 
47
  tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
48
- proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=device)
49
  proj_matrix[0, 0] = 1 / tan_half_fov
50
  proj_matrix[1, 1] = 1 / tan_half_fov
51
  proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
@@ -73,6 +95,7 @@ pipe_image = pipe_image.to(device)
73
  bg_remover = rembg.new_session()
74
 
75
  # process function
 
76
  def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42):
77
 
78
  # seed
@@ -105,7 +128,7 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
105
  image = image.astype(np.float32) / 255.0
106
  image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
107
  mv_image = pipe_image(prompt, image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
108
-
109
  mv_image_grid = np.concatenate([
110
  np.concatenate([mv_image[1], mv_image[2]], axis=1),
111
  np.concatenate([mv_image[3], mv_image[0]], axis=1),
@@ -124,21 +147,21 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
124
  with torch.autocast(device_type='cuda', dtype=torch.float16):
125
  # generate gaussians
126
  gaussians = model.forward_gaussians(input_image)
127
-
128
  # save gaussians
129
  model.gs.save_ply(gaussians, output_ply_path)
130
-
131
- # render 360 video
132
  images = []
133
  elevation = 0
134
  if opt.fancy_video:
135
  azimuth = np.arange(0, 720, 4, dtype=np.int32)
136
  for azi in tqdm.tqdm(azimuth):
137
-
138
  cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
139
 
140
  cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
141
-
142
  # cameras needed by gaussian rasterizer
143
  cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
144
  cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
@@ -151,11 +174,11 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
151
  else:
152
  azimuth = np.arange(0, 360, 2, dtype=np.int32)
153
  for azi in tqdm.tqdm(azimuth):
154
-
155
  cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
156
 
157
  cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
158
-
159
  # cameras needed by gaussian rasterizer
160
  cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
161
  cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
@@ -179,7 +202,8 @@ _DESCRIPTION = '''
179
  <a style="display:inline-block; margin-left: .5em" href="https://github.com/3DTopia/LGM"><img src='https://img.shields.io/github/stars/3DTopia/LGM?style=social'/></a>
180
  </div>
181
 
182
- * Input can be only text, only image, or both image and text.
 
183
  * If you find the output unsatisfying, try using different seeds!
184
  '''
185
 
@@ -189,7 +213,7 @@ with block:
189
  with gr.Column(scale=1):
190
  gr.Markdown('# ' + _TITLE)
191
  gr.Markdown(_DESCRIPTION)
192
-
193
  with gr.Row(variant='panel'):
194
  with gr.Column(scale=1):
195
  # input image
@@ -207,43 +231,50 @@ with block:
207
  # gen button
208
  button_gen = gr.Button("Generate")
209
 
210
-
211
  with gr.Column(scale=1):
212
  with gr.Tab("Video"):
213
  # final video results
214
  output_video = gr.Video(label="video")
215
  # ply file
216
- output_file = gr.File(label="ply")
217
  with gr.Tab("Multi-view Image"):
218
  # multi-view results
219
  output_image = gr.Image(interactive=False, show_label=False)
220
 
221
  button_gen.click(process, inputs=[input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed], outputs=[output_image, output_video, output_file])
222
-
223
  gr.Examples(
224
  examples=[
225
- "data_test/anya_rgba.png",
226
- "data_test/bird_rgba.png",
227
- "data_test/catstatue_rgba.png",
 
 
 
228
  ],
229
  inputs=[input_image],
230
  outputs=[output_image, output_video, output_file],
231
  fn=lambda x: process(input_image=x, prompt=''),
232
- cache_examples=False,
233
  label='Image-to-3D Examples'
234
  )
235
 
236
  gr.Examples(
237
  examples=[
238
- "a motorbike",
239
- "a hamburger",
240
- "a furry red fox head",
 
 
 
 
241
  ],
242
  inputs=[input_text],
243
  outputs=[output_image, output_video, output_file],
244
  fn=lambda x: process(input_image=None, prompt=x),
245
- cache_examples=False,
246
  label='Text-to-3D Examples'
247
  )
248
-
249
- block.launch(server_name="0.0.0.0", share=False)
 
1
  import os
2
+ import shlex
3
+ import subprocess
4
  import tyro
5
  import imageio
6
  import numpy as np
 
13
  import rembg
14
  import gradio as gr
15
 
16
+ # download checkpoints
17
+ from huggingface_hub import hf_hub_download
18
+ ckpt_path = hf_hub_download(repo_id="ashawkey/LGM", filename="model_fp16.safetensors")
19
+
20
+ subprocess.run(shlex.split("pip install wheel/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"))
21
+
22
  import kiui
23
  from kiui.op import recenter
24
  from kiui.cam import orbit_camera
 
27
  from core.models import LGM
28
  from mvdream.pipeline_mvdream import MVDreamPipeline
29
 
30
+ import spaces
31
+
32
  IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
33
  IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
34
  GRADIO_VIDEO_PATH = 'gradio_output.mp4'
35
  GRADIO_PLY_PATH = 'gradio_output.ply'
36
 
37
+ # opt = tyro.cli(AllConfigs)
38
+ opt = Options(
39
+ input_size=256,
40
+ up_channels=(1024, 1024, 512, 256, 128), # one more decoder
41
+ up_attention=(True, True, True, False, False),
42
+ splat_size=128,
43
+ output_size=512, # render & supervise Gaussians at a higher resolution.
44
+ batch_size=8,
45
+ num_views=8,
46
+ gradient_accumulation_steps=1,
47
+ mixed_precision='bf16',
48
+ resume=ckpt_path,
49
+ )
50
 
51
  # model
52
  model = LGM(opt)
 
67
  model.eval()
68
 
69
  tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
70
+ proj_matrix = torch.zeros(4, 4, dtype=torch.float32).to(device)
71
  proj_matrix[0, 0] = 1 / tan_half_fov
72
  proj_matrix[1, 1] = 1 / tan_half_fov
73
  proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
 
95
  bg_remover = rembg.new_session()
96
 
97
  # process function
98
+ @spaces.GPU
99
  def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42):
100
 
101
  # seed
 
128
  image = image.astype(np.float32) / 255.0
129
  image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
130
  mv_image = pipe_image(prompt, image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
131
+
132
  mv_image_grid = np.concatenate([
133
  np.concatenate([mv_image[1], mv_image[2]], axis=1),
134
  np.concatenate([mv_image[3], mv_image[0]], axis=1),
 
147
  with torch.autocast(device_type='cuda', dtype=torch.float16):
148
  # generate gaussians
149
  gaussians = model.forward_gaussians(input_image)
150
+
151
  # save gaussians
152
  model.gs.save_ply(gaussians, output_ply_path)
153
+
154
+ # render 360 video
155
  images = []
156
  elevation = 0
157
  if opt.fancy_video:
158
  azimuth = np.arange(0, 720, 4, dtype=np.int32)
159
  for azi in tqdm.tqdm(azimuth):
160
+
161
  cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
162
 
163
  cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
164
+
165
  # cameras needed by gaussian rasterizer
166
  cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
167
  cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
 
174
  else:
175
  azimuth = np.arange(0, 360, 2, dtype=np.int32)
176
  for azi in tqdm.tqdm(azimuth):
177
+
178
  cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
179
 
180
  cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
181
+
182
  # cameras needed by gaussian rasterizer
183
  cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
184
  cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
 
202
  <a style="display:inline-block; margin-left: .5em" href="https://github.com/3DTopia/LGM"><img src='https://img.shields.io/github/stars/3DTopia/LGM?style=social'/></a>
203
  </div>
204
 
205
+ * Input can be only text, only image, or both image and text.
206
+ * Output is a `ply` file containing the 3D Gaussians, please check our [repo](https://github.com/3DTopia/LGM/blob/main/readme.md) for visualization and mesh conversion.
207
  * If you find the output unsatisfying, try using different seeds!
208
  '''
209
 
 
213
  with gr.Column(scale=1):
214
  gr.Markdown('# ' + _TITLE)
215
  gr.Markdown(_DESCRIPTION)
216
+
217
  with gr.Row(variant='panel'):
218
  with gr.Column(scale=1):
219
  # input image
 
231
  # gen button
232
  button_gen = gr.Button("Generate")
233
 
234
+
235
  with gr.Column(scale=1):
236
  with gr.Tab("Video"):
237
  # final video results
238
  output_video = gr.Video(label="video")
239
  # ply file
240
+ output_file = gr.File(label="3D Gaussians (ply format)")
241
  with gr.Tab("Multi-view Image"):
242
  # multi-view results
243
  output_image = gr.Image(interactive=False, show_label=False)
244
 
245
  button_gen.click(process, inputs=[input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed], outputs=[output_image, output_video, output_file])
246
+
247
  gr.Examples(
248
  examples=[
249
+ "data_test/frog_sweater.jpg",
250
+ "data_test/bird.jpg",
251
+ "data_test/boy.jpg",
252
+ "data_test/cat_statue.jpg",
253
+ "data_test/dragontoy.jpg",
254
+ "data_test/gso_rabbit.jpg",
255
  ],
256
  inputs=[input_image],
257
  outputs=[output_image, output_video, output_file],
258
  fn=lambda x: process(input_image=x, prompt=''),
259
+ cache_examples=True,
260
  label='Image-to-3D Examples'
261
  )
262
 
263
  gr.Examples(
264
  examples=[
265
+ "teddy bear",
266
+ "hamburger",
267
+ "oldman's head sculpture",
268
+ "headphone",
269
+ "motorbike",
270
+ "mech suit"
271
+
272
  ],
273
  inputs=[input_text],
274
  outputs=[output_image, output_video, output_file],
275
  fn=lambda x: process(input_image=None, prompt=x),
276
+ cache_examples=True,
277
  label='Text-to-3D Examples'
278
  )
279
+
280
+ block.launch()
convert.py DELETED
@@ -1,462 +0,0 @@
1
-
2
- import os
3
- import tyro
4
- import tqdm
5
- import numpy as np
6
- import torch
7
- import torch.nn as nn
8
- import torch.nn.functional as F
9
-
10
- from core.options import AllConfigs, Options
11
- from core.gs import GaussianRenderer
12
-
13
- import mcubes
14
- import nerfacc
15
- import nvdiffrast.torch as dr
16
-
17
- import kiui
18
- from kiui.mesh import Mesh
19
- from kiui.mesh_utils import clean_mesh, decimate_mesh
20
- from kiui.mesh_utils import laplacian_smooth_loss, normal_consistency
21
- from kiui.op import uv_padding, safe_normalize, inverse_sigmoid
22
- from kiui.cam import orbit_camera, get_perspective
23
- from kiui.nn import MLP, trunc_exp
24
- from kiui.gridencoder import GridEncoder
25
-
26
- def get_rays(pose, h, w, fovy, opengl=True):
27
-
28
- x, y = torch.meshgrid(
29
- torch.arange(w, device=pose.device),
30
- torch.arange(h, device=pose.device),
31
- indexing="xy",
32
- )
33
- x = x.flatten()
34
- y = y.flatten()
35
-
36
- cx = w * 0.5
37
- cy = h * 0.5
38
- focal = h * 0.5 / np.tan(0.5 * np.deg2rad(fovy))
39
-
40
- camera_dirs = F.pad(
41
- torch.stack(
42
- [
43
- (x - cx + 0.5) / focal,
44
- (y - cy + 0.5) / focal * (-1.0 if opengl else 1.0),
45
- ],
46
- dim=-1,
47
- ),
48
- (0, 1),
49
- value=(-1.0 if opengl else 1.0),
50
- ) # [hw, 3]
51
-
52
- rays_d = camera_dirs @ pose[:3, :3].transpose(0, 1) # [hw, 3]
53
- rays_o = pose[:3, 3].unsqueeze(0).expand_as(rays_d) # [hw, 3]
54
-
55
- rays_d = safe_normalize(rays_d)
56
-
57
- return rays_o, rays_d
58
-
59
- # Triple renderer of gaussians, gaussian, and diso mesh.
60
- # gaussian --> nerf --> mesh
61
- class Converter(nn.Module):
62
- def __init__(self, opt: Options):
63
- super().__init__()
64
-
65
- self.opt = opt
66
- self.device = torch.device("cuda")
67
-
68
- # gs renderer
69
- self.tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
70
- self.proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=self.device)
71
- self.proj_matrix[0, 0] = 1 / self.tan_half_fov
72
- self.proj_matrix[1, 1] = 1 / self.tan_half_fov
73
- self.proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
74
- self.proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
75
- self.proj_matrix[2, 3] = 1
76
-
77
- self.gs_renderer = GaussianRenderer(opt)
78
-
79
- self.gaussians = self.gs_renderer.load_ply(opt.test_path).to(self.device)
80
-
81
- # nerf renderer
82
- if not self.opt.force_cuda_rast:
83
- self.glctx = dr.RasterizeGLContext()
84
- else:
85
- self.glctx = dr.RasterizeCudaContext()
86
-
87
- self.step = 0
88
- self.render_step_size = 5e-3
89
- self.aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=self.device)
90
- self.estimator = nerfacc.OccGridEstimator(roi_aabb=self.aabb, resolution=64, levels=1)
91
-
92
- self.encoder_density = GridEncoder(num_levels=12) # VMEncoder(output_dim=16, mode='sum')
93
- self.encoder = GridEncoder(num_levels=12)
94
- self.mlp_density = MLP(self.encoder_density.output_dim, 1, 32, 2, bias=False)
95
- self.mlp = MLP(self.encoder.output_dim, 3, 32, 2, bias=False)
96
-
97
- # mesh renderer
98
- self.proj = torch.from_numpy(get_perspective(self.opt.fovy)).float().to(self.device)
99
- self.v = self.f = None
100
- self.vt = self.ft = None
101
- self.deform = None
102
- self.albedo = None
103
-
104
-
105
- @torch.no_grad()
106
- def render_gs(self, pose):
107
-
108
- cam_poses = torch.from_numpy(pose).unsqueeze(0).to(self.device)
109
- cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
110
-
111
- # cameras needed by gaussian rasterizer
112
- cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
113
- cam_view_proj = cam_view @ self.proj_matrix # [V, 4, 4]
114
- cam_pos = - cam_poses[:, :3, 3] # [V, 3]
115
-
116
- out = self.gs_renderer.render(self.gaussians.unsqueeze(0), cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0))
117
- image = out['image'].squeeze(1).squeeze(0) # [C, H, W]
118
- alpha = out['alpha'].squeeze(2).squeeze(1).squeeze(0) # [H, W]
119
-
120
- return image, alpha
121
-
122
- def get_density(self, xs):
123
- # xs: [..., 3]
124
- prefix = xs.shape[:-1]
125
- xs = xs.view(-1, 3)
126
- feats = self.encoder_density(xs)
127
- density = trunc_exp(self.mlp_density(feats))
128
- density = density.view(*prefix, 1)
129
- return density
130
-
131
- def render_nerf(self, pose):
132
-
133
- pose = torch.from_numpy(pose.astype(np.float32)).to(self.device)
134
-
135
- # get rays
136
- resolution = self.opt.output_size
137
- rays_o, rays_d = get_rays(pose, resolution, resolution, self.opt.fovy)
138
-
139
- # update occ grid
140
- if self.training:
141
- def occ_eval_fn(xs):
142
- sigmas = self.get_density(xs)
143
- return self.render_step_size * sigmas
144
-
145
- self.estimator.update_every_n_steps(self.step, occ_eval_fn=occ_eval_fn, occ_thre=0.01, n=8)
146
- self.step += 1
147
-
148
- # render
149
- def sigma_fn(t_starts, t_ends, ray_indices):
150
- t_origins = rays_o[ray_indices]
151
- t_dirs = rays_d[ray_indices]
152
- xs = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
153
- sigmas = self.get_density(xs)
154
- return sigmas.squeeze(-1)
155
-
156
- with torch.no_grad():
157
- ray_indices, t_starts, t_ends = self.estimator.sampling(
158
- rays_o,
159
- rays_d,
160
- sigma_fn=sigma_fn,
161
- near_plane=0.01,
162
- far_plane=100,
163
- render_step_size=self.render_step_size,
164
- stratified=self.training,
165
- cone_angle=0,
166
- )
167
-
168
- t_origins = rays_o[ray_indices]
169
- t_dirs = rays_d[ray_indices]
170
- xs = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
171
- sigmas = self.get_density(xs).squeeze(-1)
172
- rgbs = torch.sigmoid(self.mlp(self.encoder(xs)))
173
-
174
- n_rays=rays_o.shape[0]
175
- weights, trans, alphas = nerfacc.render_weight_from_density(t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=n_rays)
176
- color = nerfacc.accumulate_along_rays(weights, values=rgbs, ray_indices=ray_indices, n_rays=n_rays)
177
- alpha = nerfacc.accumulate_along_rays(weights, values=None, ray_indices=ray_indices, n_rays=n_rays)
178
-
179
- color = color + 1 * (1.0 - alpha)
180
-
181
- color = color.view(resolution, resolution, 3).clamp(0, 1).permute(2, 0, 1).contiguous()
182
- alpha = alpha.view(resolution, resolution).clamp(0, 1).contiguous()
183
-
184
- return color, alpha
185
-
186
- def fit_nerf(self, iters=512, resolution=128):
187
-
188
- self.opt.output_size = resolution
189
-
190
- optimizer = torch.optim.Adam([
191
- {'params': self.encoder_density.parameters(), 'lr': 1e-2},
192
- {'params': self.encoder.parameters(), 'lr': 1e-2},
193
- {'params': self.mlp_density.parameters(), 'lr': 1e-3},
194
- {'params': self.mlp.parameters(), 'lr': 1e-3},
195
- ])
196
-
197
- print(f"[INFO] fitting nerf...")
198
- pbar = tqdm.trange(iters)
199
- for i in pbar:
200
-
201
- ver = np.random.randint(-45, 45)
202
- hor = np.random.randint(-180, 180)
203
- rad = np.random.uniform(1.5, 3.0)
204
-
205
- pose = orbit_camera(ver, hor, rad)
206
-
207
- image_gt, alpha_gt = self.render_gs(pose)
208
- image_pred, alpha_pred = self.render_nerf(pose)
209
-
210
- # if i % 200 == 0:
211
- # kiui.vis.plot_image(image_gt, alpha_gt, image_pred, alpha_pred)
212
-
213
- loss_mse = F.mse_loss(image_pred, image_gt) + 0.1 * F.mse_loss(alpha_pred, alpha_gt)
214
- loss = loss_mse #+ 0.1 * self.encoder_density.tv_loss() #+ 0.0001 * self.encoder_density.density_loss()
215
-
216
- loss.backward()
217
- self.encoder_density.grad_total_variation(1e-8)
218
-
219
- optimizer.step()
220
- optimizer.zero_grad()
221
-
222
- pbar.set_description(f"MSE = {loss_mse.item():.6f}")
223
-
224
- print(f"[INFO] finished fitting nerf!")
225
-
226
- def render_mesh(self, pose):
227
-
228
- h = w = self.opt.output_size
229
-
230
- v = self.v + self.deform
231
- f = self.f
232
-
233
- pose = torch.from_numpy(pose.astype(np.float32)).to(v.device)
234
-
235
- # get v_clip and render rgb
236
- v_cam = torch.matmul(F.pad(v, pad=(0, 1), mode='constant', value=1.0), torch.inverse(pose).T).float().unsqueeze(0)
237
- v_clip = v_cam @ self.proj.T
238
-
239
- rast, rast_db = dr.rasterize(self.glctx, v_clip, f, (h, w))
240
-
241
- alpha = torch.clamp(rast[..., -1:], 0, 1).contiguous() # [1, H, W, 1]
242
- alpha = dr.antialias(alpha, rast, v_clip, f).clamp(0, 1).squeeze(-1).squeeze(0) # [H, W] important to enable gradients!
243
-
244
- if self.albedo is None:
245
- xyzs, _ = dr.interpolate(v.unsqueeze(0), rast, f) # [1, H, W, 3]
246
- xyzs = xyzs.view(-1, 3)
247
- mask = (alpha > 0).view(-1)
248
- image = torch.zeros_like(xyzs, dtype=torch.float32)
249
- if mask.any():
250
- masked_albedo = torch.sigmoid(self.mlp(self.encoder(xyzs[mask].detach(), bound=1)))
251
- image[mask] = masked_albedo.float()
252
- else:
253
- texc, texc_db = dr.interpolate(self.vt.unsqueeze(0), rast, self.ft, rast_db=rast_db, diff_attrs='all')
254
- image = torch.sigmoid(dr.texture(self.albedo.unsqueeze(0), texc, uv_da=texc_db)) # [1, H, W, 3]
255
-
256
- image = image.view(1, h, w, 3)
257
- # image = dr.antialias(image, rast, v_clip, f).clamp(0, 1)
258
- image = image.squeeze(0).permute(2, 0, 1).contiguous() # [3, H, W]
259
- image = alpha * image + (1 - alpha)
260
-
261
- return image, alpha
262
-
263
- def fit_mesh(self, iters=2048, resolution=512, decimate_target=5e4):
264
-
265
- self.opt.output_size = resolution
266
-
267
- # init mesh from nerf
268
- grid_size = 256
269
- sigmas = np.zeros([grid_size, grid_size, grid_size], dtype=np.float32)
270
-
271
- S = 128
272
- density_thresh = 10
273
-
274
- X = torch.linspace(-1, 1, grid_size).split(S)
275
- Y = torch.linspace(-1, 1, grid_size).split(S)
276
- Z = torch.linspace(-1, 1, grid_size).split(S)
277
-
278
- for xi, xs in enumerate(X):
279
- for yi, ys in enumerate(Y):
280
- for zi, zs in enumerate(Z):
281
- xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing='ij')
282
- pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [S, 3]
283
- val = self.get_density(pts.to(self.device))
284
- sigmas[xi * S: xi * S + len(xs), yi * S: yi * S + len(ys), zi * S: zi * S + len(zs)] = val.reshape(len(xs), len(ys), len(zs)).detach().cpu().numpy() # [S, 1] --> [x, y, z]
285
-
286
- print(f'[INFO] marching cubes thresh: {density_thresh} ({sigmas.min()} ~ {sigmas.max()})')
287
-
288
- vertices, triangles = mcubes.marching_cubes(sigmas, density_thresh)
289
- vertices = vertices / (grid_size - 1.0) * 2 - 1
290
-
291
- # clean
292
- vertices = vertices.astype(np.float32)
293
- triangles = triangles.astype(np.int32)
294
- vertices, triangles = clean_mesh(vertices, triangles, remesh=True, remesh_size=0.01)
295
- if triangles.shape[0] > decimate_target:
296
- vertices, triangles = decimate_mesh(vertices, triangles, decimate_target, optimalplacement=False)
297
-
298
- self.v = torch.from_numpy(vertices).contiguous().float().to(self.device)
299
- self.f = torch.from_numpy(triangles).contiguous().int().to(self.device)
300
- self.deform = nn.Parameter(torch.zeros_like(self.v)).to(self.device)
301
-
302
- # fit mesh from gs
303
- lr_factor = 1
304
- optimizer = torch.optim.Adam([
305
- {'params': self.encoder.parameters(), 'lr': 1e-3 * lr_factor},
306
- {'params': self.mlp.parameters(), 'lr': 1e-3 * lr_factor},
307
- {'params': self.deform, 'lr': 1e-4},
308
- ])
309
-
310
- print(f"[INFO] fitting mesh...")
311
- pbar = tqdm.trange(iters)
312
- for i in pbar:
313
-
314
- ver = np.random.randint(-10, 10)
315
- hor = np.random.randint(-180, 180)
316
- rad = self.opt.cam_radius # np.random.uniform(1, 2)
317
-
318
- pose = orbit_camera(ver, hor, rad)
319
-
320
- image_gt, alpha_gt = self.render_gs(pose)
321
- image_pred, alpha_pred = self.render_mesh(pose)
322
-
323
- loss_mse = F.mse_loss(image_pred, image_gt) + 0.1 * F.mse_loss(alpha_pred, alpha_gt)
324
- # loss_lap = laplacian_smooth_loss(self.v + self.deform, self.f)
325
- loss_normal = normal_consistency(self.v + self.deform, self.f)
326
- loss_offsets = (self.deform ** 2).sum(-1).mean()
327
- loss = loss_mse + 0.001 * loss_normal + 0.1 * loss_offsets
328
-
329
- loss.backward()
330
-
331
- optimizer.step()
332
- optimizer.zero_grad()
333
-
334
- # remesh periodically
335
- if i > 0 and i % 512 == 0:
336
- vertices = (self.v + self.deform).detach().cpu().numpy()
337
- triangles = self.f.detach().cpu().numpy()
338
- vertices, triangles = clean_mesh(vertices, triangles, remesh=True, remesh_size=0.01)
339
- if triangles.shape[0] > decimate_target:
340
- vertices, triangles = decimate_mesh(vertices, triangles, decimate_target, optimalplacement=False)
341
- self.v = torch.from_numpy(vertices).contiguous().float().to(self.device)
342
- self.f = torch.from_numpy(triangles).contiguous().int().to(self.device)
343
- self.deform = nn.Parameter(torch.zeros_like(self.v)).to(self.device)
344
- lr_factor *= 0.5
345
- optimizer = torch.optim.Adam([
346
- {'params': self.encoder.parameters(), 'lr': 1e-3 * lr_factor},
347
- {'params': self.mlp.parameters(), 'lr': 1e-3 * lr_factor},
348
- {'params': self.deform, 'lr': 1e-4},
349
- ])
350
-
351
- pbar.set_description(f"MSE = {loss_mse.item():.6f}")
352
-
353
- # last clean
354
- vertices = (self.v + self.deform).detach().cpu().numpy()
355
- triangles = self.f.detach().cpu().numpy()
356
- vertices, triangles = clean_mesh(vertices, triangles, remesh=False)
357
- self.v = torch.from_numpy(vertices).contiguous().float().to(self.device)
358
- self.f = torch.from_numpy(triangles).contiguous().int().to(self.device)
359
- self.deform = nn.Parameter(torch.zeros_like(self.v).to(self.device))
360
-
361
- print(f"[INFO] finished fitting mesh!")
362
-
363
- # uv mesh refine
364
- def fit_mesh_uv(self, iters=512, resolution=512, texture_resolution=1024, padding=2):
365
-
366
- self.opt.output_size = resolution
367
-
368
- # unwrap uv
369
- print(f"[INFO] uv unwrapping...")
370
- mesh = Mesh(v=self.v, f=self.f, albedo=None, device=self.device)
371
- mesh.auto_normal()
372
- mesh.auto_uv()
373
-
374
- self.vt = mesh.vt
375
- self.ft = mesh.ft
376
-
377
- # render uv maps
378
- h = w = texture_resolution
379
- uv = mesh.vt * 2.0 - 1.0 # uvs to range [-1, 1]
380
- uv = torch.cat((uv, torch.zeros_like(uv[..., :1]), torch.ones_like(uv[..., :1])), dim=-1) # [N, 4]
381
-
382
- rast, _ = dr.rasterize(self.glctx, uv.unsqueeze(0), mesh.ft, (h, w)) # [1, h, w, 4]
383
- xyzs, _ = dr.interpolate(mesh.v.unsqueeze(0), rast, mesh.f) # [1, h, w, 3]
384
- mask, _ = dr.interpolate(torch.ones_like(mesh.v[:, :1]).unsqueeze(0), rast, mesh.f) # [1, h, w, 1]
385
-
386
- # masked query
387
- xyzs = xyzs.view(-1, 3)
388
- mask = (mask > 0).view(-1)
389
-
390
- albedo = torch.zeros(h * w, 3, device=self.device, dtype=torch.float32)
391
-
392
- if mask.any():
393
- print(f"[INFO] querying texture...")
394
-
395
- xyzs = xyzs[mask] # [M, 3]
396
-
397
- # batched inference to avoid OOM
398
- batch = []
399
- head = 0
400
- while head < xyzs.shape[0]:
401
- tail = min(head + 640000, xyzs.shape[0])
402
- batch.append(torch.sigmoid(self.mlp(self.encoder(xyzs[head:tail]))).float())
403
- head += 640000
404
-
405
- albedo[mask] = torch.cat(batch, dim=0)
406
-
407
- albedo = albedo.view(h, w, -1)
408
- mask = mask.view(h, w)
409
- albedo = uv_padding(albedo, mask, padding)
410
-
411
- # optimize texture
412
- self.albedo = nn.Parameter(inverse_sigmoid(albedo)).to(self.device)
413
-
414
- optimizer = torch.optim.Adam([
415
- {'params': self.albedo, 'lr': 1e-3},
416
- ])
417
-
418
- print(f"[INFO] fitting mesh texture...")
419
- pbar = tqdm.trange(iters)
420
- for i in pbar:
421
-
422
- # shrink to front view as we care more about it...
423
- ver = np.random.randint(-5, 5)
424
- hor = np.random.randint(-15, 15)
425
- rad = self.opt.cam_radius # np.random.uniform(1, 2)
426
-
427
- pose = orbit_camera(ver, hor, rad)
428
-
429
- image_gt, alpha_gt = self.render_gs(pose)
430
- image_pred, alpha_pred = self.render_mesh(pose)
431
-
432
- loss_mse = F.mse_loss(image_pred, image_gt)
433
- loss = loss_mse
434
-
435
- loss.backward()
436
-
437
- optimizer.step()
438
- optimizer.zero_grad()
439
-
440
- pbar.set_description(f"MSE = {loss_mse.item():.6f}")
441
-
442
- print(f"[INFO] finished fitting mesh texture!")
443
-
444
-
445
- @torch.no_grad()
446
- def export_mesh(self, path):
447
-
448
- mesh = Mesh(v=self.v, f=self.f, vt=self.vt, ft=self.ft, albedo=torch.sigmoid(self.albedo), device=self.device)
449
- mesh.auto_normal()
450
- mesh.write(path)
451
-
452
-
453
- opt = tyro.cli(AllConfigs)
454
-
455
- # load a saved ply and convert to mesh
456
- assert opt.test_path.endswith('.ply'), '--test_path must be a .ply file saved by infer.py'
457
-
458
- converter = Converter(opt).cuda()
459
- converter.fit_nerf()
460
- converter.fit_mesh()
461
- converter.fit_mesh_uv()
462
- converter.export_mesh(opt.test_path.replace('.ply', '.glb'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (123 Bytes). View file
 
core/__pycache__/attention.cpython-39.pyc ADDED
Binary file (4.36 kB). View file
 
core/__pycache__/gs.cpython-39.pyc ADDED
Binary file (5.48 kB). View file
 
core/__pycache__/models.cpython-39.pyc ADDED
Binary file (4.47 kB). View file
 
core/__pycache__/options.cpython-39.pyc ADDED
Binary file (2.46 kB). View file
 
core/__pycache__/provider_objaverse.cpython-39.pyc ADDED
Binary file (7.74 kB). View file
 
core/__pycache__/unet.cpython-39.pyc ADDED
Binary file (7.45 kB). View file
 
core/__pycache__/utils.cpython-39.pyc ADDED
Binary file (2.54 kB). View file
 
core/models.py CHANGED
@@ -131,9 +131,12 @@ class LGM(nn.Module):
131
 
132
  results['gaussians'] = gaussians
133
 
134
- # always use white bg
135
- bg_color = torch.ones(3, dtype=torch.float32, device=gaussians.device)
136
-
 
 
 
137
  # use the other views for rendering and supervision
138
  results = self.gs.render(gaussians, data['cam_view'], data['cam_view_proj'], data['cam_pos'], bg_color=bg_color)
139
  pred_images = results['image'] # [B, V, C, output_size, output_size]
@@ -168,4 +171,4 @@ class LGM(nn.Module):
168
  psnr = -10 * torch.log10(torch.mean((pred_images.detach() - gt_images) ** 2))
169
  results['psnr'] = psnr
170
 
171
- return results
 
131
 
132
  results['gaussians'] = gaussians
133
 
134
+ # random bg for training
135
+ if self.training:
136
+ bg_color = torch.rand(3, dtype=torch.float32, device=gaussians.device)
137
+ else:
138
+ bg_color = torch.ones(3, dtype=torch.float32, device=gaussians.device)
139
+
140
  # use the other views for rendering and supervision
141
  results = self.gs.render(gaussians, data['cam_view'], data['cam_view_proj'], data['cam_pos'], bg_color=bg_color)
142
  pred_images = results['image'] # [B, V, C, output_size, output_size]
 
171
  psnr = -10 * torch.log10(torch.mean((pred_images.detach() - gt_images) ** 2))
172
  results['psnr'] = psnr
173
 
174
+ return results
core/options.py CHANGED
@@ -9,16 +9,16 @@ class Options:
9
  # Unet image input size
10
  input_size: int = 256
11
  # Unet definition
12
- down_channels: Tuple[int, ...] = (64, 128, 256, 512, 1024, 1024)
13
- down_attention: Tuple[bool, ...] = (False, False, False, True, True, True)
14
  mid_attention: bool = True
15
- up_channels: Tuple[int, ...] = (1024, 1024, 512, 256)
16
- up_attention: Tuple[bool, ...] = (True, True, True, False)
17
  # Unet output size, dependent on the input_size and U-Net structure!
18
  splat_size: int = 64
19
  # gaussian render size
20
  output_size: int = 256
21
-
22
  ### dataset
23
  # data mode (only support s3 now)
24
  data_mode: Literal['s3'] = 's3'
@@ -40,7 +40,7 @@ class Options:
40
  ### training
41
  # workspace
42
  workspace: str = './workspace'
43
- # resume
44
  resume: Optional[str] = None
45
  # batch size (per-GPU)
46
  batch_size: int = 8
@@ -117,4 +117,4 @@ config_defaults['tiny'] = Options(
117
  mixed_precision='bf16',
118
  )
119
 
120
- AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)
 
9
  # Unet image input size
10
  input_size: int = 256
11
  # Unet definition
12
+ down_channels: Tuple[int] = (64, 128, 256, 512, 1024, 1024)
13
+ down_attention: Tuple[bool] = (False, False, False, True, True, True)
14
  mid_attention: bool = True
15
+ up_channels: Tuple[int] = (1024, 1024, 512, 256)
16
+ up_attention: Tuple[bool] = (True, True, True, False)
17
  # Unet output size, dependent on the input_size and U-Net structure!
18
  splat_size: int = 64
19
  # gaussian render size
20
  output_size: int = 256
21
+
22
  ### dataset
23
  # data mode (only support s3 now)
24
  data_mode: Literal['s3'] = 's3'
 
40
  ### training
41
  # workspace
42
  workspace: str = './workspace'
43
+ # resume
44
  resume: Optional[str] = None
45
  # batch size (per-GPU)
46
  batch_size: int = 8
 
117
  mixed_precision='bf16',
118
  )
119
 
120
+ AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)
core/unet.py CHANGED
@@ -3,10 +3,10 @@ import torch.nn as nn
3
  import torch.nn.functional as F
4
 
5
  import numpy as np
6
- from typing import Tuple, Literal
7
  from functools import partial
8
 
9
- from core.attention import MemEffAttention
10
 
11
  class MVAttention(nn.Module):
12
  def __init__(
@@ -236,11 +236,11 @@ class UNet(nn.Module):
236
  self,
237
  in_channels: int = 3,
238
  out_channels: int = 3,
239
- down_channels: Tuple[int, ...] = (64, 128, 256, 512, 1024),
240
- down_attention: Tuple[bool, ...] = (False, False, False, True, True),
241
  mid_attention: bool = True,
242
- up_channels: Tuple[int, ...] = (1024, 512, 256),
243
- up_attention: Tuple[bool, ...] = (True, True, False),
244
  layers_per_block: int = 2,
245
  skip_scale: float = np.sqrt(0.5),
246
  ):
@@ -316,4 +316,4 @@ class UNet(nn.Module):
316
  x = F.silu(x)
317
  x = self.conv_out(x) # [B, Cout, H', W']
318
 
319
- return x
 
3
  import torch.nn.functional as F
4
 
5
  import numpy as np
6
+ from typing import Tuple, Optional, Literal
7
  from functools import partial
8
 
9
+ from core.attention import MemEffAttention, MemEffCrossAttention
10
 
11
  class MVAttention(nn.Module):
12
  def __init__(
 
236
  self,
237
  in_channels: int = 3,
238
  out_channels: int = 3,
239
+ down_channels: Tuple[int] = (64, 128, 256, 512, 1024),
240
+ down_attention: Tuple[bool] = (False, False, False, True, True),
241
  mid_attention: bool = True,
242
+ up_channels: Tuple[int] = (1024, 512, 256),
243
+ up_attention: Tuple[bool] = (True, True, False),
244
  layers_per_block: int = 2,
245
  skip_scale: float = np.sqrt(0.5),
246
  ):
 
316
  x = F.silu(x)
317
  x = self.conv_out(x) # [B, Cout, H', W']
318
 
319
+ return x
data_test/anya_rgba.png DELETED
Binary file (32.9 kB)
 
data_test/bird.jpg ADDED
data_test/bird_rgba.png DELETED
Binary file (56.2 kB)
 
data_test/boy.jpg ADDED
data_test/cat_statue.jpg ADDED
data_test/catstatue_rgba.png DELETED
Binary file (45.5 kB)
 
data_test/dragontoy.jpg ADDED
data_test/frog_sweater.jpg ADDED
data_test/gso_rabbit.jpg ADDED
diff-gaussian-rasterization/.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ build/
2
+ diff_gaussian_rasterization.egg-info/
3
+ dist/
4
+
5
+ __pycache__
6
+
7
+ *.so
diff-gaussian-rasterization/.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "third_party/glm"]
2
+ path = third_party/glm
3
+ url = https://github.com/g-truc/glm.git
diff-gaussian-rasterization/CMakeLists.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2023, Inria
3
+ # GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ # All rights reserved.
5
+ #
6
+ # This software is free for non-commercial, research and evaluation use
7
+ # under the terms of the LICENSE.md file.
8
+ #
9
+ # For inquiries contact george.drettakis@inria.fr
10
+ #
11
+
12
+ cmake_minimum_required(VERSION 3.20)
13
+
14
+ project(DiffRast LANGUAGES CUDA CXX)
15
+
16
+ set(CMAKE_CXX_STANDARD 17)
17
+ set(CMAKE_CXX_EXTENSIONS OFF)
18
+ set(CMAKE_CUDA_STANDARD 17)
19
+
20
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
21
+
22
+ add_library(CudaRasterizer
23
+ cuda_rasterizer/backward.h
24
+ cuda_rasterizer/backward.cu
25
+ cuda_rasterizer/forward.h
26
+ cuda_rasterizer/forward.cu
27
+ cuda_rasterizer/auxiliary.h
28
+ cuda_rasterizer/rasterizer_impl.cu
29
+ cuda_rasterizer/rasterizer_impl.h
30
+ cuda_rasterizer/rasterizer.h
31
+ )
32
+
33
+ set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "75;86")
34
+
35
+ target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
36
+ target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
diff-gaussian-rasterization/LICENSE.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Gaussian-Splatting License
2
+ ===========================
3
+
4
+ **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.
5
+ The *Software* is in the process of being registered with the Agence pour la Protection des
6
+ Programmes (APP).
7
+
8
+ The *Software* is still being developed by the *Licensor*.
9
+
10
+ *Licensor*'s goal is to allow the research community to use, test and evaluate
11
+ the *Software*.
12
+
13
+ ## 1. Definitions
14
+
15
+ *Licensee* means any person or entity that uses the *Software* and distributes
16
+ its *Work*.
17
+
18
+ *Licensor* means the owners of the *Software*, i.e Inria and MPII
19
+
20
+ *Software* means the original work of authorship made available under this
21
+ License ie gaussian-splatting.
22
+
23
+ *Work* means the *Software* and any additions to or derivative works of the
24
+ *Software* that are made available under this License.
25
+
26
+
27
+ ## 2. Purpose
28
+ This license is intended to define the rights granted to the *Licensee* by
29
+ Licensors under the *Software*.
30
+
31
+ ## 3. Rights granted
32
+
33
+ For the above reasons Licensors have decided to distribute the *Software*.
34
+ Licensors grant non-exclusive rights to use the *Software* for research purposes
35
+ to research users (both academic and industrial), free of charge, without right
36
+ to sublicense.. The *Software* may be used "non-commercially", i.e., for research
37
+ and/or evaluation purposes only.
38
+
39
+ Subject to the terms and conditions of this License, you are granted a
40
+ non-exclusive, royalty-free, license to reproduce, prepare derivative works of,
41
+ publicly display, publicly perform and distribute its *Work* and any resulting
42
+ derivative works in any form.
43
+
44
+ ## 4. Limitations
45
+
46
+ **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do
47
+ so under this License, (b) you include a complete copy of this License with
48
+ your distribution, and (c) you retain without modification any copyright,
49
+ patent, trademark, or attribution notices that are present in the *Work*.
50
+
51
+ **4.2 Derivative Works.** You may specify that additional or different terms apply
52
+ to the use, reproduction, and distribution of your derivative works of the *Work*
53
+ ("Your Terms") only if (a) Your Terms provide that the use limitation in
54
+ Section 2 applies to your derivative works, and (b) you identify the specific
55
+ derivative works that are subject to Your Terms. Notwithstanding Your Terms,
56
+ this License (including the redistribution requirements in Section 3.1) will
57
+ continue to apply to the *Work* itself.
58
+
59
+ **4.3** Any other use without of prior consent of Licensors is prohibited. Research
60
+ users explicitly acknowledge having received from Licensors all information
61
+ allowing to appreciate the adequacy between of the *Software* and their needs and
62
+ to undertake all necessary precautions for its execution and use.
63
+
64
+ **4.4** The *Software* is provided both as a compiled library file and as source
65
+ code. In case of using the *Software* for a publication or other results obtained
66
+ through the use of the *Software*, users are strongly encouraged to cite the
67
+ corresponding publications as explained in the documentation of the *Software*.
68
+
69
+ ## 5. Disclaimer
70
+
71
+ THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES
72
+ WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY
73
+ UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL
74
+ CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES
75
+ OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL
76
+ USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR
77
+ ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE
78
+ AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
79
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
80
+ GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)
81
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
82
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR
83
+ IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.
diff-gaussian-rasterization/README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Differential Gaussian Rasterization
2
+
3
+ **NOTE**: this is a modified version to support depth & alpha rendering (both forward and backward) from the [original repository](https://github.com/graphdeco-inria/diff-gaussian-rasterization).
4
+
5
+ ```python
6
+ rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
7
+ means3D=means3D,
8
+ means2D=means2D,
9
+ shs=shs,
10
+ colors_precomp=colors_precomp,
11
+ opacities=opacity,
12
+ scales=scales,
13
+ rotations=rotations,
14
+ cov3D_precomp=cov3D_precomp,
15
+ )
16
+ ```
17
+
18
+
19
+ Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us.
20
+
21
+ <section class="section" id="BibTeX">
22
+ <div class="container is-max-desktop content">
23
+ <h2 class="title">BibTeX</h2>
24
+ <pre><code>@Article{kerbl3Dgaussians,
25
+ author = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
26
+ title = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
27
+ journal = {ACM Transactions on Graphics},
28
+ number = {4},
29
+ volume = {42},
30
+ month = {July},
31
+ year = {2023},
32
+ url = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
33
+ }</code></pre>
34
+ </div>
35
+ </section>
diff-gaussian-rasterization/cuda_rasterizer/auxiliary.h ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
13
+ #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
14
+
15
+ #include "config.h"
16
+ #include "stdio.h"
17
+
18
+ #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
19
+ #define NUM_WARPS (BLOCK_SIZE/32)
20
+
21
+ // Spherical harmonics coefficients
22
+ __device__ const float SH_C0 = 0.28209479177387814f;
23
+ __device__ const float SH_C1 = 0.4886025119029199f;
24
+ __device__ const float SH_C2[] = {
25
+ 1.0925484305920792f,
26
+ -1.0925484305920792f,
27
+ 0.31539156525252005f,
28
+ -1.0925484305920792f,
29
+ 0.5462742152960396f
30
+ };
31
+ __device__ const float SH_C3[] = {
32
+ -0.5900435899266435f,
33
+ 2.890611442640554f,
34
+ -0.4570457994644658f,
35
+ 0.3731763325901154f,
36
+ -0.4570457994644658f,
37
+ 1.445305721320277f,
38
+ -0.5900435899266435f
39
+ };
40
+
41
+ __forceinline__ __device__ float ndc2Pix(float v, int S)
42
+ {
43
+ return ((v + 1.0) * S - 1.0) * 0.5;
44
+ }
45
+
46
+ __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
47
+ {
48
+ rect_min = {
49
+ min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
50
+ min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
51
+ };
52
+ rect_max = {
53
+ min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
54
+ min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
55
+ };
56
+ }
57
+
58
+ __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
59
+ {
60
+ float3 transformed = {
61
+ matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
62
+ matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
63
+ matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
64
+ };
65
+ return transformed;
66
+ }
67
+
68
+ __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
69
+ {
70
+ float4 transformed = {
71
+ matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
72
+ matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
73
+ matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
74
+ matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
75
+ };
76
+ return transformed;
77
+ }
78
+
79
+ __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
80
+ {
81
+ float3 transformed = {
82
+ matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
83
+ matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
84
+ matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
85
+ };
86
+ return transformed;
87
+ }
88
+
89
+ __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
90
+ {
91
+ float3 transformed = {
92
+ matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
93
+ matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
94
+ matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
95
+ };
96
+ return transformed;
97
+ }
98
+
99
+ __forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
100
+ {
101
+ float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
102
+ float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
103
+ float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
104
+ return dnormvdz;
105
+ }
106
+
107
+ __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
108
+ {
109
+ float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
110
+ float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
111
+
112
+ float3 dnormvdv;
113
+ dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
114
+ dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
115
+ dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
116
+ return dnormvdv;
117
+ }
118
+
119
+ __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
120
+ {
121
+ float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
122
+ float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
123
+
124
+ float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
125
+ float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
126
+ float4 dnormvdv;
127
+ dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
128
+ dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
129
+ dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
130
+ dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
131
+ return dnormvdv;
132
+ }
133
+
134
+ __forceinline__ __device__ float sigmoid(float x)
135
+ {
136
+ return 1.0f / (1.0f + expf(-x));
137
+ }
138
+
139
+ __forceinline__ __device__ bool in_frustum(int idx,
140
+ const float* orig_points,
141
+ const float* viewmatrix,
142
+ const float* projmatrix,
143
+ bool prefiltered,
144
+ float3& p_view)
145
+ {
146
+ float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
147
+
148
+ // Bring points to screen space
149
+ float4 p_hom = transformPoint4x4(p_orig, projmatrix);
150
+ float p_w = 1.0f / (p_hom.w + 0.0000001f);
151
+ float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
152
+ p_view = transformPoint4x3(p_orig, viewmatrix);
153
+
154
+ if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
155
+ {
156
+ if (prefiltered)
157
+ {
158
+ printf("Point is filtered although prefiltered is set. This shouldn't happen!");
159
+ __trap();
160
+ }
161
+ return false;
162
+ }
163
+ return true;
164
+ }
165
+
166
+ #define CHECK_CUDA(A, debug) \
167
+ A; if(debug) { \
168
+ auto ret = cudaDeviceSynchronize(); \
169
+ if (ret != cudaSuccess) { \
170
+ std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
171
+ throw std::runtime_error(cudaGetErrorString(ret)); \
172
+ } \
173
+ }
174
+
175
+ #endif
diff-gaussian-rasterization/cuda_rasterizer/backward.cu ADDED
@@ -0,0 +1,712 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include "backward.h"
13
+ #include "auxiliary.h"
14
+ #include <cooperative_groups.h>
15
+ #include <cooperative_groups/reduce.h>
16
+ namespace cg = cooperative_groups;
17
+
18
+ // Backward pass for conversion of spherical harmonics to RGB for
19
+ // each Gaussian.
20
+ __device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, const bool* clamped, const glm::vec3* dL_dcolor, glm::vec3* dL_dmeans, glm::vec3* dL_dshs)
21
+ {
22
+ // Compute intermediate values, as it is done during forward
23
+ glm::vec3 pos = means[idx];
24
+ glm::vec3 dir_orig = pos - campos;
25
+ glm::vec3 dir = dir_orig / glm::length(dir_orig);
26
+
27
+ glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs;
28
+
29
+ // Use PyTorch rule for clamping: if clamping was applied,
30
+ // gradient becomes 0.
31
+ glm::vec3 dL_dRGB = dL_dcolor[idx];
32
+ dL_dRGB.x *= clamped[3 * idx + 0] ? 0 : 1;
33
+ dL_dRGB.y *= clamped[3 * idx + 1] ? 0 : 1;
34
+ dL_dRGB.z *= clamped[3 * idx + 2] ? 0 : 1;
35
+
36
+ glm::vec3 dRGBdx(0, 0, 0);
37
+ glm::vec3 dRGBdy(0, 0, 0);
38
+ glm::vec3 dRGBdz(0, 0, 0);
39
+ float x = dir.x;
40
+ float y = dir.y;
41
+ float z = dir.z;
42
+
43
+ // Target location for this Gaussian to write SH gradients to
44
+ glm::vec3* dL_dsh = dL_dshs + idx * max_coeffs;
45
+
46
+ // No tricks here, just high school-level calculus.
47
+ float dRGBdsh0 = SH_C0;
48
+ dL_dsh[0] = dRGBdsh0 * dL_dRGB;
49
+ if (deg > 0)
50
+ {
51
+ float dRGBdsh1 = -SH_C1 * y;
52
+ float dRGBdsh2 = SH_C1 * z;
53
+ float dRGBdsh3 = -SH_C1 * x;
54
+ dL_dsh[1] = dRGBdsh1 * dL_dRGB;
55
+ dL_dsh[2] = dRGBdsh2 * dL_dRGB;
56
+ dL_dsh[3] = dRGBdsh3 * dL_dRGB;
57
+
58
+ dRGBdx = -SH_C1 * sh[3];
59
+ dRGBdy = -SH_C1 * sh[1];
60
+ dRGBdz = SH_C1 * sh[2];
61
+
62
+ if (deg > 1)
63
+ {
64
+ float xx = x * x, yy = y * y, zz = z * z;
65
+ float xy = x * y, yz = y * z, xz = x * z;
66
+
67
+ float dRGBdsh4 = SH_C2[0] * xy;
68
+ float dRGBdsh5 = SH_C2[1] * yz;
69
+ float dRGBdsh6 = SH_C2[2] * (2.f * zz - xx - yy);
70
+ float dRGBdsh7 = SH_C2[3] * xz;
71
+ float dRGBdsh8 = SH_C2[4] * (xx - yy);
72
+ dL_dsh[4] = dRGBdsh4 * dL_dRGB;
73
+ dL_dsh[5] = dRGBdsh5 * dL_dRGB;
74
+ dL_dsh[6] = dRGBdsh6 * dL_dRGB;
75
+ dL_dsh[7] = dRGBdsh7 * dL_dRGB;
76
+ dL_dsh[8] = dRGBdsh8 * dL_dRGB;
77
+
78
+ dRGBdx += SH_C2[0] * y * sh[4] + SH_C2[2] * 2.f * -x * sh[6] + SH_C2[3] * z * sh[7] + SH_C2[4] * 2.f * x * sh[8];
79
+ dRGBdy += SH_C2[0] * x * sh[4] + SH_C2[1] * z * sh[5] + SH_C2[2] * 2.f * -y * sh[6] + SH_C2[4] * 2.f * -y * sh[8];
80
+ dRGBdz += SH_C2[1] * y * sh[5] + SH_C2[2] * 2.f * 2.f * z * sh[6] + SH_C2[3] * x * sh[7];
81
+
82
+ if (deg > 2)
83
+ {
84
+ float dRGBdsh9 = SH_C3[0] * y * (3.f * xx - yy);
85
+ float dRGBdsh10 = SH_C3[1] * xy * z;
86
+ float dRGBdsh11 = SH_C3[2] * y * (4.f * zz - xx - yy);
87
+ float dRGBdsh12 = SH_C3[3] * z * (2.f * zz - 3.f * xx - 3.f * yy);
88
+ float dRGBdsh13 = SH_C3[4] * x * (4.f * zz - xx - yy);
89
+ float dRGBdsh14 = SH_C3[5] * z * (xx - yy);
90
+ float dRGBdsh15 = SH_C3[6] * x * (xx - 3.f * yy);
91
+ dL_dsh[9] = dRGBdsh9 * dL_dRGB;
92
+ dL_dsh[10] = dRGBdsh10 * dL_dRGB;
93
+ dL_dsh[11] = dRGBdsh11 * dL_dRGB;
94
+ dL_dsh[12] = dRGBdsh12 * dL_dRGB;
95
+ dL_dsh[13] = dRGBdsh13 * dL_dRGB;
96
+ dL_dsh[14] = dRGBdsh14 * dL_dRGB;
97
+ dL_dsh[15] = dRGBdsh15 * dL_dRGB;
98
+
99
+ dRGBdx += (
100
+ SH_C3[0] * sh[9] * 3.f * 2.f * xy +
101
+ SH_C3[1] * sh[10] * yz +
102
+ SH_C3[2] * sh[11] * -2.f * xy +
103
+ SH_C3[3] * sh[12] * -3.f * 2.f * xz +
104
+ SH_C3[4] * sh[13] * (-3.f * xx + 4.f * zz - yy) +
105
+ SH_C3[5] * sh[14] * 2.f * xz +
106
+ SH_C3[6] * sh[15] * 3.f * (xx - yy));
107
+
108
+ dRGBdy += (
109
+ SH_C3[0] * sh[9] * 3.f * (xx - yy) +
110
+ SH_C3[1] * sh[10] * xz +
111
+ SH_C3[2] * sh[11] * (-3.f * yy + 4.f * zz - xx) +
112
+ SH_C3[3] * sh[12] * -3.f * 2.f * yz +
113
+ SH_C3[4] * sh[13] * -2.f * xy +
114
+ SH_C3[5] * sh[14] * -2.f * yz +
115
+ SH_C3[6] * sh[15] * -3.f * 2.f * xy);
116
+
117
+ dRGBdz += (
118
+ SH_C3[1] * sh[10] * xy +
119
+ SH_C3[2] * sh[11] * 4.f * 2.f * yz +
120
+ SH_C3[3] * sh[12] * 3.f * (2.f * zz - xx - yy) +
121
+ SH_C3[4] * sh[13] * 4.f * 2.f * xz +
122
+ SH_C3[5] * sh[14] * (xx - yy));
123
+ }
124
+ }
125
+ }
126
+
127
+ // The view direction is an input to the computation. View direction
128
+ // is influenced by the Gaussian's mean, so SHs gradients
129
+ // must propagate back into 3D position.
130
+ glm::vec3 dL_ddir(glm::dot(dRGBdx, dL_dRGB), glm::dot(dRGBdy, dL_dRGB), glm::dot(dRGBdz, dL_dRGB));
131
+
132
+ // Account for normalization of direction
133
+ float3 dL_dmean = dnormvdv(float3{ dir_orig.x, dir_orig.y, dir_orig.z }, float3{ dL_ddir.x, dL_ddir.y, dL_ddir.z });
134
+
135
+ // Gradients of loss w.r.t. Gaussian means, but only the portion
136
+ // that is caused because the mean affects the view-dependent color.
137
+ // Additional mean gradient is accumulated in below methods.
138
+ dL_dmeans[idx] += glm::vec3(dL_dmean.x, dL_dmean.y, dL_dmean.z);
139
+ }
140
+
141
+ // Backward version of INVERSE 2D covariance matrix computation
142
+ // (due to length launched as separate kernel before other
143
+ // backward steps contained in preprocess)
144
+ __global__ void computeCov2DCUDA(int P,
145
+ const float3* means,
146
+ const int* radii,
147
+ const float* cov3Ds,
148
+ const float h_x, float h_y,
149
+ const float tan_fovx, float tan_fovy,
150
+ const float* view_matrix,
151
+ const float* dL_dconics,
152
+ float3* dL_dmeans,
153
+ float* dL_dcov)
154
+ {
155
+ auto idx = cg::this_grid().thread_rank();
156
+ if (idx >= P || !(radii[idx] > 0))
157
+ return;
158
+
159
+ // Reading location of 3D covariance for this Gaussian
160
+ const float* cov3D = cov3Ds + 6 * idx;
161
+
162
+ // Fetch gradients, recompute 2D covariance and relevant
163
+ // intermediate forward results needed in the backward.
164
+ float3 mean = means[idx];
165
+ float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] };
166
+ float3 t = transformPoint4x3(mean, view_matrix);
167
+
168
+ const float limx = 1.3f * tan_fovx;
169
+ const float limy = 1.3f * tan_fovy;
170
+ const float txtz = t.x / t.z;
171
+ const float tytz = t.y / t.z;
172
+ t.x = min(limx, max(-limx, txtz)) * t.z;
173
+ t.y = min(limy, max(-limy, tytz)) * t.z;
174
+
175
+ const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1;
176
+ const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1;
177
+
178
+ glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z),
179
+ 0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z),
180
+ 0, 0, 0);
181
+
182
+ glm::mat3 W = glm::mat3(
183
+ view_matrix[0], view_matrix[4], view_matrix[8],
184
+ view_matrix[1], view_matrix[5], view_matrix[9],
185
+ view_matrix[2], view_matrix[6], view_matrix[10]);
186
+
187
+ glm::mat3 Vrk = glm::mat3(
188
+ cov3D[0], cov3D[1], cov3D[2],
189
+ cov3D[1], cov3D[3], cov3D[4],
190
+ cov3D[2], cov3D[4], cov3D[5]);
191
+
192
+ glm::mat3 T = W * J;
193
+
194
+ glm::mat3 cov2D = glm::transpose(T) * glm::transpose(Vrk) * T;
195
+
196
+ // Use helper variables for 2D covariance entries. More compact.
197
+ float a = cov2D[0][0] += 0.3f;
198
+ float b = cov2D[0][1];
199
+ float c = cov2D[1][1] += 0.3f;
200
+
201
+ float denom = a * c - b * b;
202
+ float dL_da = 0, dL_db = 0, dL_dc = 0;
203
+ float denom2inv = 1.0f / ((denom * denom) + 0.0000001f);
204
+
205
+ if (denom2inv != 0)
206
+ {
207
+ // Gradients of loss w.r.t. entries of 2D covariance matrix,
208
+ // given gradients of loss w.r.t. conic matrix (inverse covariance matrix).
209
+ // e.g., dL / da = dL / d_conic_a * d_conic_a / d_a
210
+ dL_da = denom2inv * (-c * c * dL_dconic.x + 2 * b * c * dL_dconic.y + (denom - a * c) * dL_dconic.z);
211
+ dL_dc = denom2inv * (-a * a * dL_dconic.z + 2 * a * b * dL_dconic.y + (denom - a * c) * dL_dconic.x);
212
+ dL_db = denom2inv * 2 * (b * c * dL_dconic.x - (denom + 2 * b * b) * dL_dconic.y + a * b * dL_dconic.z);
213
+
214
+ // Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry,
215
+ // given gradients w.r.t. 2D covariance matrix (diagonal).
216
+ // cov2D = transpose(T) * transpose(Vrk) * T;
217
+ dL_dcov[6 * idx + 0] = (T[0][0] * T[0][0] * dL_da + T[0][0] * T[1][0] * dL_db + T[1][0] * T[1][0] * dL_dc);
218
+ dL_dcov[6 * idx + 3] = (T[0][1] * T[0][1] * dL_da + T[0][1] * T[1][1] * dL_db + T[1][1] * T[1][1] * dL_dc);
219
+ dL_dcov[6 * idx + 5] = (T[0][2] * T[0][2] * dL_da + T[0][2] * T[1][2] * dL_db + T[1][2] * T[1][2] * dL_dc);
220
+
221
+ // Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry,
222
+ // given gradients w.r.t. 2D covariance matrix (off-diagonal).
223
+ // Off-diagonal elements appear twice --> double the gradient.
224
+ // cov2D = transpose(T) * transpose(Vrk) * T;
225
+ dL_dcov[6 * idx + 1] = 2 * T[0][0] * T[0][1] * dL_da + (T[0][0] * T[1][1] + T[0][1] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][1] * dL_dc;
226
+ dL_dcov[6 * idx + 2] = 2 * T[0][0] * T[0][2] * dL_da + (T[0][0] * T[1][2] + T[0][2] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][2] * dL_dc;
227
+ dL_dcov[6 * idx + 4] = 2 * T[0][2] * T[0][1] * dL_da + (T[0][1] * T[1][2] + T[0][2] * T[1][1]) * dL_db + 2 * T[1][1] * T[1][2] * dL_dc;
228
+ }
229
+ else
230
+ {
231
+ for (int i = 0; i < 6; i++)
232
+ dL_dcov[6 * idx + i] = 0;
233
+ }
234
+
235
+ // Gradients of loss w.r.t. upper 2x3 portion of intermediate matrix T
236
+ // cov2D = transpose(T) * transpose(Vrk) * T;
237
+ float dL_dT00 = 2 * (T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_da +
238
+ (T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_db;
239
+ float dL_dT01 = 2 * (T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_da +
240
+ (T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_db;
241
+ float dL_dT02 = 2 * (T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_da +
242
+ (T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_db;
243
+ float dL_dT10 = 2 * (T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_dc +
244
+ (T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_db;
245
+ float dL_dT11 = 2 * (T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_dc +
246
+ (T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_db;
247
+ float dL_dT12 = 2 * (T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_dc +
248
+ (T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_db;
249
+
250
+ // Gradients of loss w.r.t. upper 3x2 non-zero entries of Jacobian matrix
251
+ // T = W * J
252
+ float dL_dJ00 = W[0][0] * dL_dT00 + W[0][1] * dL_dT01 + W[0][2] * dL_dT02;
253
+ float dL_dJ02 = W[2][0] * dL_dT00 + W[2][1] * dL_dT01 + W[2][2] * dL_dT02;
254
+ float dL_dJ11 = W[1][0] * dL_dT10 + W[1][1] * dL_dT11 + W[1][2] * dL_dT12;
255
+ float dL_dJ12 = W[2][0] * dL_dT10 + W[2][1] * dL_dT11 + W[2][2] * dL_dT12;
256
+
257
+ float tz = 1.f / t.z;
258
+ float tz2 = tz * tz;
259
+ float tz3 = tz2 * tz;
260
+
261
+ // Gradients of loss w.r.t. transformed Gaussian mean t
262
+ float dL_dtx = x_grad_mul * -h_x * tz2 * dL_dJ02;
263
+ float dL_dty = y_grad_mul * -h_y * tz2 * dL_dJ12;
264
+ float dL_dtz = -h_x * tz2 * dL_dJ00 - h_y * tz2 * dL_dJ11 + (2 * h_x * t.x) * tz3 * dL_dJ02 + (2 * h_y * t.y) * tz3 * dL_dJ12;
265
+
266
+ // Account for transformation of mean to t
267
+ // t = transformPoint4x3(mean, view_matrix);
268
+ float3 dL_dmean = transformVec4x3Transpose({ dL_dtx, dL_dty, dL_dtz }, view_matrix);
269
+
270
+ // Gradients of loss w.r.t. Gaussian means, but only the portion
271
+ // that is caused because the mean affects the covariance matrix.
272
+ // Additional mean gradient is accumulated in BACKWARD::preprocess.
273
+ dL_dmeans[idx] = dL_dmean;
274
+ }
275
+
276
+ // Backward pass for the conversion of scale and rotation to a
277
+ // 3D covariance matrix for each Gaussian.
278
+ __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const glm::vec4 rot, const float* dL_dcov3Ds, glm::vec3* dL_dscales, glm::vec4* dL_drots)
279
+ {
280
+ // Recompute (intermediate) results for the 3D covariance computation.
281
+ glm::vec4 q = rot;// / glm::length(rot);
282
+ float r = q.x;
283
+ float x = q.y;
284
+ float y = q.z;
285
+ float z = q.w;
286
+
287
+ glm::mat3 R = glm::mat3(
288
+ 1.f - 2.f * (y * y + z * z), 2.f * (x * y - r * z), 2.f * (x * z + r * y),
289
+ 2.f * (x * y + r * z), 1.f - 2.f * (x * x + z * z), 2.f * (y * z - r * x),
290
+ 2.f * (x * z - r * y), 2.f * (y * z + r * x), 1.f - 2.f * (x * x + y * y)
291
+ );
292
+
293
+ glm::mat3 S = glm::mat3(1.0f);
294
+
295
+ glm::vec3 s = mod * scale;
296
+ S[0][0] = s.x;
297
+ S[1][1] = s.y;
298
+ S[2][2] = s.z;
299
+
300
+ glm::mat3 M = S * R;
301
+
302
+ const float* dL_dcov3D = dL_dcov3Ds + 6 * idx;
303
+
304
+ glm::vec3 dunc(dL_dcov3D[0], dL_dcov3D[3], dL_dcov3D[5]);
305
+ glm::vec3 ounc = 0.5f * glm::vec3(dL_dcov3D[1], dL_dcov3D[2], dL_dcov3D[4]);
306
+
307
+ // Convert per-element covariance loss gradients to matrix form
308
+ glm::mat3 dL_dSigma = glm::mat3(
309
+ dL_dcov3D[0], 0.5f * dL_dcov3D[1], 0.5f * dL_dcov3D[2],
310
+ 0.5f * dL_dcov3D[1], dL_dcov3D[3], 0.5f * dL_dcov3D[4],
311
+ 0.5f * dL_dcov3D[2], 0.5f * dL_dcov3D[4], dL_dcov3D[5]
312
+ );
313
+
314
+ // Compute loss gradient w.r.t. matrix M
315
+ // dSigma_dM = 2 * M
316
+ glm::mat3 dL_dM = 2.0f * M * dL_dSigma;
317
+
318
+ glm::mat3 Rt = glm::transpose(R);
319
+ glm::mat3 dL_dMt = glm::transpose(dL_dM);
320
+
321
+ // Gradients of loss w.r.t. scale
322
+ glm::vec3* dL_dscale = dL_dscales + idx;
323
+ dL_dscale->x = glm::dot(Rt[0], dL_dMt[0]);
324
+ dL_dscale->y = glm::dot(Rt[1], dL_dMt[1]);
325
+ dL_dscale->z = glm::dot(Rt[2], dL_dMt[2]);
326
+
327
+ dL_dMt[0] *= s.x;
328
+ dL_dMt[1] *= s.y;
329
+ dL_dMt[2] *= s.z;
330
+
331
+ // Gradients of loss w.r.t. normalized quaternion
332
+ glm::vec4 dL_dq;
333
+ dL_dq.x = 2 * z * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * y * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * x * (dL_dMt[1][2] - dL_dMt[2][1]);
334
+ dL_dq.y = 2 * y * (dL_dMt[1][0] + dL_dMt[0][1]) + 2 * z * (dL_dMt[2][0] + dL_dMt[0][2]) + 2 * r * (dL_dMt[1][2] - dL_dMt[2][1]) - 4 * x * (dL_dMt[2][2] + dL_dMt[1][1]);
335
+ dL_dq.z = 2 * x * (dL_dMt[1][0] + dL_dMt[0][1]) + 2 * r * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * z * (dL_dMt[1][2] + dL_dMt[2][1]) - 4 * y * (dL_dMt[2][2] + dL_dMt[0][0]);
336
+ dL_dq.w = 2 * r * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * x * (dL_dMt[2][0] + dL_dMt[0][2]) + 2 * y * (dL_dMt[1][2] + dL_dMt[2][1]) - 4 * z * (dL_dMt[1][1] + dL_dMt[0][0]);
337
+
338
+ // Gradients of loss w.r.t. unnormalized quaternion
339
+ float4* dL_drot = (float4*)(dL_drots + idx);
340
+ *dL_drot = float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w };//dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
341
+ }
342
+
343
+ // Backward pass of the preprocessing steps, except
344
+ // for the covariance computation and inversion
345
+ // (those are handled by a previous kernel call)
346
+ template<int C>
347
+ __global__ void preprocessCUDA(
348
+ int P, int D, int M,
349
+ const float3* means,
350
+ const int* radii,
351
+ const float* shs,
352
+ const bool* clamped,
353
+ const glm::vec3* scales,
354
+ const glm::vec4* rotations,
355
+ const float scale_modifier,
356
+ const float* view,
357
+ const float* proj,
358
+ const glm::vec3* campos,
359
+ const float3* dL_dmean2D,
360
+ glm::vec3* dL_dmeans,
361
+ float* dL_dcolor,
362
+ float* dL_ddepth,
363
+ float* dL_dcov3D,
364
+ float* dL_dsh,
365
+ glm::vec3* dL_dscale,
366
+ glm::vec4* dL_drot)
367
+ {
368
+ auto idx = cg::this_grid().thread_rank();
369
+ if (idx >= P || !(radii[idx] > 0))
370
+ return;
371
+
372
+ float3 m = means[idx];
373
+
374
+ // Taking care of gradients from the screenspace points
375
+ float4 m_hom = transformPoint4x4(m, proj);
376
+ float m_w = 1.0f / (m_hom.w + 0.0000001f);
377
+
378
+ // Compute loss gradient w.r.t. 3D means due to gradients of 2D means
379
+ // from rendering procedure
380
+ glm::vec3 dL_dmean;
381
+ float mul1 = (proj[0] * m.x + proj[4] * m.y + proj[8] * m.z + proj[12]) * m_w * m_w;
382
+ float mul2 = (proj[1] * m.x + proj[5] * m.y + proj[9] * m.z + proj[13]) * m_w * m_w;
383
+ dL_dmean.x = (proj[0] * m_w - proj[3] * mul1) * dL_dmean2D[idx].x + (proj[1] * m_w - proj[3] * mul2) * dL_dmean2D[idx].y;
384
+ dL_dmean.y = (proj[4] * m_w - proj[7] * mul1) * dL_dmean2D[idx].x + (proj[5] * m_w - proj[7] * mul2) * dL_dmean2D[idx].y;
385
+ dL_dmean.z = (proj[8] * m_w - proj[11] * mul1) * dL_dmean2D[idx].x + (proj[9] * m_w - proj[11] * mul2) * dL_dmean2D[idx].y;
386
+
387
+ // That's the second part of the mean gradient. Previous computation
388
+ // of cov2D and following SH conversion also affects it.
389
+ dL_dmeans[idx] += dL_dmean;
390
+
391
+ // the w must be equal to 1 for view^T * [x,y,z,1]
392
+ float3 m_view = transformPoint4x3(m, view);
393
+
394
+ // Compute loss gradient w.r.t. 3D means due to gradients of depth
395
+ // from rendering procedure
396
+ glm::vec3 dL_dmean2;
397
+ float mul3 = view[2] * m.x + view[6] * m.y + view[10] * m.z + view[14];
398
+ dL_dmean2.x = (view[2] - view[3] * mul3) * dL_ddepth[idx];
399
+ dL_dmean2.y = (view[6] - view[7] * mul3) * dL_ddepth[idx];
400
+ dL_dmean2.z = (view[10] - view[11] * mul3) * dL_ddepth[idx];
401
+
402
+ // That's the third part of the mean gradient.
403
+ dL_dmeans[idx] += dL_dmean2;
404
+
405
+ // Compute gradient updates due to computing colors from SHs
406
+ if (shs)
407
+ computeColorFromSH(idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh);
408
+
409
+ // Compute gradient updates due to computing covariance from scale/rotation
410
+ if (scales)
411
+ computeCov3D(idx, scales[idx], scale_modifier, rotations[idx], dL_dcov3D, dL_dscale, dL_drot);
412
+ }
413
+
414
+ // Backward version of the rendering procedure.
415
+ template <uint32_t C>
416
+ __global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
417
+ renderCUDA(
418
+ const uint2* __restrict__ ranges,
419
+ const uint32_t* __restrict__ point_list,
420
+ int W, int H,
421
+ const float* __restrict__ bg_color,
422
+ const float2* __restrict__ points_xy_image,
423
+ const float4* __restrict__ conic_opacity,
424
+ const float* __restrict__ colors,
425
+ const float* __restrict__ depths,
426
+ const float* __restrict__ alphas,
427
+ const uint32_t* __restrict__ n_contrib,
428
+ const float* __restrict__ dL_dpixels,
429
+ const float* __restrict__ dL_dpixel_depths,
430
+ const float* __restrict__ dL_dalphas,
431
+ float3* __restrict__ dL_dmean2D,
432
+ float4* __restrict__ dL_dconic2D,
433
+ float* __restrict__ dL_dopacity,
434
+ float* __restrict__ dL_dcolors,
435
+ float* __restrict__ dL_ddepths
436
+ )
437
+ {
438
+ // We rasterize again. Compute necessary block info.
439
+ auto block = cg::this_thread_block();
440
+ const uint32_t horizontal_blocks = (W + BLOCK_X - 1) / BLOCK_X;
441
+ const uint2 pix_min = { block.group_index().x * BLOCK_X, block.group_index().y * BLOCK_Y };
442
+ const uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) };
443
+ const uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y };
444
+ const uint32_t pix_id = W * pix.y + pix.x;
445
+ const float2 pixf = { (float)pix.x, (float)pix.y };
446
+
447
+ const bool inside = pix.x < W&& pix.y < H;
448
+ const uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
449
+
450
+ const int rounds = ((range.y - range.x + BLOCK_SIZE - 1) / BLOCK_SIZE);
451
+
452
+ bool done = !inside;
453
+ int toDo = range.y - range.x;
454
+
455
+ __shared__ int collected_id[BLOCK_SIZE];
456
+ __shared__ float2 collected_xy[BLOCK_SIZE];
457
+ __shared__ float4 collected_conic_opacity[BLOCK_SIZE];
458
+ __shared__ float collected_colors[C * BLOCK_SIZE];
459
+ __shared__ float collected_depths[BLOCK_SIZE];
460
+
461
+ // In the forward, we stored the final value for T, the
462
+ // product of all (1 - alpha) factors.
463
+ const float T_final = inside ? (1 - alphas[pix_id]) : 0;
464
+ float T = T_final;
465
+
466
+ // We start from the back. The ID of the last contributing
467
+ // Gaussian is known from each pixel from the forward.
468
+ uint32_t contributor = toDo;
469
+ const int last_contributor = inside ? n_contrib[pix_id] : 0;
470
+
471
+ float accum_rec[C] = { 0 };
472
+ float dL_dpixel[C];
473
+ float accum_depth_rec = 0;
474
+ float dL_dpixel_depth;
475
+ float accum_alpha_rec = 0;
476
+ float dL_dalpha;
477
+ if (inside) {
478
+ for (int i = 0; i < C; i++)
479
+ dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
480
+ dL_dpixel_depth = dL_dpixel_depths[pix_id];
481
+ dL_dalpha = dL_dalphas[pix_id];
482
+ }
483
+
484
+ float last_alpha = 0;
485
+ float last_color[C] = { 0 };
486
+ float last_depth = 0;
487
+
488
+ // Gradient of pixel coordinate w.r.t. normalized
489
+ // screen-space viewport corrdinates (-1 to 1)
490
+ const float ddelx_dx = 0.5 * W;
491
+ const float ddely_dy = 0.5 * H;
492
+
493
+ // Traverse all Gaussians
494
+ for (int i = 0; i < rounds; i++, toDo -= BLOCK_SIZE)
495
+ {
496
+ // Load auxiliary data into shared memory, start in the BACK
497
+ // and load them in revers order.
498
+ block.sync();
499
+ const int progress = i * BLOCK_SIZE + block.thread_rank();
500
+ if (range.x + progress < range.y)
501
+ {
502
+ const int coll_id = point_list[range.y - progress - 1];
503
+ collected_id[block.thread_rank()] = coll_id;
504
+ collected_xy[block.thread_rank()] = points_xy_image[coll_id];
505
+ collected_conic_opacity[block.thread_rank()] = conic_opacity[coll_id];
506
+ for (int i = 0; i < C; i++)
507
+ collected_colors[i * BLOCK_SIZE + block.thread_rank()] = colors[coll_id * C + i];
508
+ collected_depths[block.thread_rank()] = depths[coll_id];
509
+ }
510
+ block.sync();
511
+
512
+ // Iterate over Gaussians
513
+ for (int j = 0; !done && j < min(BLOCK_SIZE, toDo); j++)
514
+ {
515
+ // Keep track of current Gaussian ID. Skip, if this one
516
+ // is behind the last contributor for this pixel.
517
+ contributor--;
518
+ if (contributor >= last_contributor)
519
+ continue;
520
+
521
+ // Compute blending values, as before.
522
+ const float2 xy = collected_xy[j];
523
+ const float2 d = { xy.x - pixf.x, xy.y - pixf.y };
524
+ const float4 con_o = collected_conic_opacity[j];
525
+ const float power = -0.5f * (con_o.x * d.x * d.x + con_o.z * d.y * d.y) - con_o.y * d.x * d.y;
526
+ if (power > 0.0f)
527
+ continue;
528
+
529
+ const float G = exp(power);
530
+ const float alpha = min(0.99f, con_o.w * G);
531
+ if (alpha < 1.0f / 255.0f)
532
+ continue;
533
+
534
+ T = T / (1.f - alpha);
535
+ const float dchannel_dcolor = alpha * T;
536
+ const float dpixel_depth_ddepth = alpha * T;
537
+
538
+ // Propagate gradients to per-Gaussian colors and keep
539
+ // gradients w.r.t. alpha (blending factor for a Gaussian/pixel
540
+ // pair).
541
+ float dL_dopa = 0.0f;
542
+ const int global_id = collected_id[j];
543
+ for (int ch = 0; ch < C; ch++)
544
+ {
545
+ const float c = collected_colors[ch * BLOCK_SIZE + j];
546
+ // Update last color (to be used in the next iteration)
547
+ accum_rec[ch] = last_alpha * last_color[ch] + (1.f - last_alpha) * accum_rec[ch];
548
+ last_color[ch] = c;
549
+
550
+ const float dL_dchannel = dL_dpixel[ch];
551
+ dL_dopa += (c - accum_rec[ch]) * dL_dchannel;
552
+ // Update the gradients w.r.t. color of the Gaussian.
553
+ // Atomic, since this pixel is just one of potentially
554
+ // many that were affected by this Gaussian.
555
+ atomicAdd(&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_dchannel);
556
+ }
557
+
558
+ // Propagate gradients from pixel depth to opacity
559
+ const float c_d = collected_depths[j];
560
+ accum_depth_rec = last_alpha * last_depth + (1.f - last_alpha) * accum_depth_rec;
561
+ last_depth = c_d;
562
+ dL_dopa += (c_d - accum_depth_rec) * dL_dpixel_depth;
563
+ atomicAdd(&(dL_ddepths[global_id]), dpixel_depth_ddepth * dL_dpixel_depth);
564
+
565
+ // Propagate gradients from pixel alpha (weights_sum) to opacity
566
+ accum_alpha_rec = last_alpha + (1.f - last_alpha) * accum_alpha_rec;
567
+ dL_dopa += (1 - accum_alpha_rec) * dL_dalpha; //- (alpha - accum_alpha_rec) * dL_dalpha;
568
+
569
+ dL_dopa *= T;
570
+ // Update last alpha (to be used in the next iteration)
571
+ last_alpha = alpha;
572
+
573
+ // Account for fact that alpha also influences how much of
574
+ // the background color is added if nothing left to blend
575
+ float bg_dot_dpixel = 0;
576
+ for (int i = 0; i < C; i++)
577
+ bg_dot_dpixel += bg_color[i] * dL_dpixel[i];
578
+ dL_dopa += (-T_final / (1.f - alpha)) * bg_dot_dpixel;
579
+
580
+
581
+ // Helpful reusable temporary variables
582
+ const float dL_dG = con_o.w * dL_dopa;
583
+ const float gdx = G * d.x;
584
+ const float gdy = G * d.y;
585
+ const float dG_ddelx = -gdx * con_o.x - gdy * con_o.y;
586
+ const float dG_ddely = -gdy * con_o.z - gdx * con_o.y;
587
+
588
+ // Update gradients w.r.t. 2D mean position of the Gaussian
589
+ atomicAdd(&dL_dmean2D[global_id].x, dL_dG * dG_ddelx * ddelx_dx);
590
+ atomicAdd(&dL_dmean2D[global_id].y, dL_dG * dG_ddely * ddely_dy);
591
+
592
+ // Update gradients w.r.t. 2D covariance (2x2 matrix, symmetric)
593
+ atomicAdd(&dL_dconic2D[global_id].x, -0.5f * gdx * d.x * dL_dG);
594
+ atomicAdd(&dL_dconic2D[global_id].y, -0.5f * gdx * d.y * dL_dG);
595
+ atomicAdd(&dL_dconic2D[global_id].w, -0.5f * gdy * d.y * dL_dG);
596
+
597
+ // Update gradients w.r.t. opacity of the Gaussian
598
+ atomicAdd(&(dL_dopacity[global_id]), G * dL_dopa);
599
+ }
600
+ }
601
+ }
602
+
603
+ void BACKWARD::preprocess(
604
+ int P, int D, int M,
605
+ const float3* means3D,
606
+ const int* radii,
607
+ const float* shs,
608
+ const bool* clamped,
609
+ const glm::vec3* scales,
610
+ const glm::vec4* rotations,
611
+ const float scale_modifier,
612
+ const float* cov3Ds,
613
+ const float* viewmatrix,
614
+ const float* projmatrix,
615
+ const float focal_x, float focal_y,
616
+ const float tan_fovx, float tan_fovy,
617
+ const glm::vec3* campos,
618
+ const float3* dL_dmean2D,
619
+ const float* dL_dconic,
620
+ glm::vec3* dL_dmean3D,
621
+ float* dL_dcolor,
622
+ float* dL_ddepth,
623
+ float* dL_dcov3D,
624
+ float* dL_dsh,
625
+ glm::vec3* dL_dscale,
626
+ glm::vec4* dL_drot)
627
+ {
628
+ // Propagate gradients for the path of 2D conic matrix computation.
629
+ // Somewhat long, thus it is its own kernel rather than being part of
630
+ // "preprocess". When done, loss gradient w.r.t. 3D means has been
631
+ // modified and gradient w.r.t. 3D covariance matrix has been computed.
632
+ computeCov2DCUDA << <(P + 255) / 256, 256 >> > (
633
+ P,
634
+ means3D,
635
+ radii,
636
+ cov3Ds,
637
+ focal_x,
638
+ focal_y,
639
+ tan_fovx,
640
+ tan_fovy,
641
+ viewmatrix,
642
+ dL_dconic,
643
+ (float3*)dL_dmean3D,
644
+ dL_dcov3D);
645
+
646
+ // Propagate gradients for remaining steps: finish 3D mean gradients,
647
+ // propagate color gradients to SH (if desireD), propagate 3D covariance
648
+ // matrix gradients to scale and rotation.
649
+ preprocessCUDA<NUM_CHANNELS> << < (P + 255) / 256, 256 >> > (
650
+ P, D, M,
651
+ (float3*)means3D,
652
+ radii,
653
+ shs,
654
+ clamped,
655
+ (glm::vec3*)scales,
656
+ (glm::vec4*)rotations,
657
+ scale_modifier,
658
+ viewmatrix,
659
+ projmatrix,
660
+ campos,
661
+ (float3*)dL_dmean2D,
662
+ (glm::vec3*)dL_dmean3D,
663
+ dL_dcolor,
664
+ dL_ddepth,
665
+ dL_dcov3D,
666
+ dL_dsh,
667
+ dL_dscale,
668
+ dL_drot);
669
+ }
670
+
671
+ void BACKWARD::render(
672
+ const dim3 grid, const dim3 block,
673
+ const uint2* ranges,
674
+ const uint32_t* point_list,
675
+ int W, int H,
676
+ const float* bg_color,
677
+ const float2* means2D,
678
+ const float4* conic_opacity,
679
+ const float* colors,
680
+ const float* depths,
681
+ const float* alphas,
682
+ const uint32_t* n_contrib,
683
+ const float* dL_dpixels,
684
+ const float* dL_dpixel_depths,
685
+ const float* dL_dalphas,
686
+ float3* dL_dmean2D,
687
+ float4* dL_dconic2D,
688
+ float* dL_dopacity,
689
+ float* dL_dcolors,
690
+ float* dL_ddepths)
691
+ {
692
+ renderCUDA<NUM_CHANNELS> << <grid, block >> >(
693
+ ranges,
694
+ point_list,
695
+ W, H,
696
+ bg_color,
697
+ means2D,
698
+ conic_opacity,
699
+ colors,
700
+ depths,
701
+ alphas,
702
+ n_contrib,
703
+ dL_dpixels,
704
+ dL_dpixel_depths,
705
+ dL_dalphas,
706
+ dL_dmean2D,
707
+ dL_dconic2D,
708
+ dL_dopacity,
709
+ dL_dcolors,
710
+ dL_ddepths
711
+ );
712
+ }
diff-gaussian-rasterization/cuda_rasterizer/backward.h ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED
13
+ #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED
14
+
15
+ #include <cuda.h>
16
+ #include "cuda_runtime.h"
17
+ #include "device_launch_parameters.h"
18
+ #define GLM_FORCE_CUDA
19
+ #include <glm/glm.hpp>
20
+
21
+ namespace BACKWARD
22
+ {
23
+ void render(
24
+ const dim3 grid, dim3 block,
25
+ const uint2* ranges,
26
+ const uint32_t* point_list,
27
+ int W, int H,
28
+ const float* bg_color,
29
+ const float2* means2D,
30
+ const float4* conic_opacity,
31
+ const float* colors,
32
+ const float* depths,
33
+ const float* alphas,
34
+ const uint32_t* n_contrib,
35
+ const float* dL_dpixels,
36
+ const float* dL_dpixel_depths,
37
+ const float* dL_dalphas,
38
+ float3* dL_dmean2D,
39
+ float4* dL_dconic2D,
40
+ float* dL_dopacity,
41
+ float* dL_dcolors,
42
+ float* dL_ddepths);
43
+
44
+ void preprocess(
45
+ int P, int D, int M,
46
+ const float3* means,
47
+ const int* radii,
48
+ const float* shs,
49
+ const bool* clamped,
50
+ const glm::vec3* scales,
51
+ const glm::vec4* rotations,
52
+ const float scale_modifier,
53
+ const float* cov3Ds,
54
+ const float* view,
55
+ const float* proj,
56
+ const float focal_x, float focal_y,
57
+ const float tan_fovx, float tan_fovy,
58
+ const glm::vec3* campos,
59
+ const float3* dL_dmean2D,
60
+ const float* dL_dconics,
61
+ glm::vec3* dL_dmeans,
62
+ float* dL_dcolor,
63
+ float* dL_ddepth,
64
+ float* dL_dcov3D,
65
+ float* dL_dsh,
66
+ glm::vec3* dL_dscale,
67
+ glm::vec4* dL_drot);
68
+ }
69
+
70
+ #endif
diff-gaussian-rasterization/cuda_rasterizer/config.h ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED
13
+ #define CUDA_RASTERIZER_CONFIG_H_INCLUDED
14
+
15
+ #define NUM_CHANNELS 3 // Default 3, RGB
16
+ #define BLOCK_X 16
17
+ #define BLOCK_Y 16
18
+
19
+ #endif
diff-gaussian-rasterization/cuda_rasterizer/forward.cu ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include "forward.h"
13
+ #include "auxiliary.h"
14
+ #include <cooperative_groups.h>
15
+ #include <cooperative_groups/reduce.h>
16
+ namespace cg = cooperative_groups;
17
+
18
+ // Forward method for converting the input spherical harmonics
19
+ // coefficients of each Gaussian to a simple RGB color.
20
+ __device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, bool* clamped)
21
+ {
22
+ // The implementation is loosely based on code for
23
+ // "Differentiable Point-Based Radiance Fields for
24
+ // Efficient View Synthesis" by Zhang et al. (2022)
25
+ glm::vec3 pos = means[idx];
26
+ glm::vec3 dir = pos - campos;
27
+ dir = dir / glm::length(dir);
28
+
29
+ glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs;
30
+ glm::vec3 result = SH_C0 * sh[0];
31
+
32
+ if (deg > 0)
33
+ {
34
+ float x = dir.x;
35
+ float y = dir.y;
36
+ float z = dir.z;
37
+ result = result - SH_C1 * y * sh[1] + SH_C1 * z * sh[2] - SH_C1 * x * sh[3];
38
+
39
+ if (deg > 1)
40
+ {
41
+ float xx = x * x, yy = y * y, zz = z * z;
42
+ float xy = x * y, yz = y * z, xz = x * z;
43
+ result = result +
44
+ SH_C2[0] * xy * sh[4] +
45
+ SH_C2[1] * yz * sh[5] +
46
+ SH_C2[2] * (2.0f * zz - xx - yy) * sh[6] +
47
+ SH_C2[3] * xz * sh[7] +
48
+ SH_C2[4] * (xx - yy) * sh[8];
49
+
50
+ if (deg > 2)
51
+ {
52
+ result = result +
53
+ SH_C3[0] * y * (3.0f * xx - yy) * sh[9] +
54
+ SH_C3[1] * xy * z * sh[10] +
55
+ SH_C3[2] * y * (4.0f * zz - xx - yy) * sh[11] +
56
+ SH_C3[3] * z * (2.0f * zz - 3.0f * xx - 3.0f * yy) * sh[12] +
57
+ SH_C3[4] * x * (4.0f * zz - xx - yy) * sh[13] +
58
+ SH_C3[5] * z * (xx - yy) * sh[14] +
59
+ SH_C3[6] * x * (xx - 3.0f * yy) * sh[15];
60
+ }
61
+ }
62
+ }
63
+ result += 0.5f;
64
+
65
+ // RGB colors are clamped to positive values. If values are
66
+ // clamped, we need to keep track of this for the backward pass.
67
+ clamped[3 * idx + 0] = (result.x < 0);
68
+ clamped[3 * idx + 1] = (result.y < 0);
69
+ clamped[3 * idx + 2] = (result.z < 0);
70
+ return glm::max(result, 0.0f);
71
+ }
72
+
73
+ // Forward version of 2D covariance matrix computation
74
+ __device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y, float tan_fovx, float tan_fovy, const float* cov3D, const float* viewmatrix)
75
+ {
76
+ // The following models the steps outlined by equations 29
77
+ // and 31 in "EWA Splatting" (Zwicker et al., 2002).
78
+ // Additionally considers aspect / scaling of viewport.
79
+ // Transposes used to account for row-/column-major conventions.
80
+ float3 t = transformPoint4x3(mean, viewmatrix);
81
+
82
+ const float limx = 1.3f * tan_fovx;
83
+ const float limy = 1.3f * tan_fovy;
84
+ const float txtz = t.x / t.z;
85
+ const float tytz = t.y / t.z;
86
+ t.x = min(limx, max(-limx, txtz)) * t.z;
87
+ t.y = min(limy, max(-limy, tytz)) * t.z;
88
+
89
+ glm::mat3 J = glm::mat3(
90
+ focal_x / t.z, 0.0f, -(focal_x * t.x) / (t.z * t.z),
91
+ 0.0f, focal_y / t.z, -(focal_y * t.y) / (t.z * t.z),
92
+ 0, 0, 0);
93
+
94
+ glm::mat3 W = glm::mat3(
95
+ viewmatrix[0], viewmatrix[4], viewmatrix[8],
96
+ viewmatrix[1], viewmatrix[5], viewmatrix[9],
97
+ viewmatrix[2], viewmatrix[6], viewmatrix[10]);
98
+
99
+ glm::mat3 T = W * J;
100
+
101
+ glm::mat3 Vrk = glm::mat3(
102
+ cov3D[0], cov3D[1], cov3D[2],
103
+ cov3D[1], cov3D[3], cov3D[4],
104
+ cov3D[2], cov3D[4], cov3D[5]);
105
+
106
+ glm::mat3 cov = glm::transpose(T) * glm::transpose(Vrk) * T;
107
+
108
+ // Apply low-pass filter: every Gaussian should be at least
109
+ // one pixel wide/high. Discard 3rd row and column.
110
+ cov[0][0] += 0.3f;
111
+ cov[1][1] += 0.3f;
112
+ return { float(cov[0][0]), float(cov[0][1]), float(cov[1][1]) };
113
+ }
114
+
115
+ // Forward method for converting scale and rotation properties of each
116
+ // Gaussian to a 3D covariance matrix in world space. Also takes care
117
+ // of quaternion normalization.
118
+ __device__ void computeCov3D(const glm::vec3 scale, float mod, const glm::vec4 rot, float* cov3D)
119
+ {
120
+ // Create scaling matrix
121
+ glm::mat3 S = glm::mat3(1.0f);
122
+ S[0][0] = mod * scale.x;
123
+ S[1][1] = mod * scale.y;
124
+ S[2][2] = mod * scale.z;
125
+
126
+ // Normalize quaternion to get valid rotation
127
+ glm::vec4 q = rot;// / glm::length(rot);
128
+ float r = q.x;
129
+ float x = q.y;
130
+ float y = q.z;
131
+ float z = q.w;
132
+
133
+ // Compute rotation matrix from quaternion
134
+ glm::mat3 R = glm::mat3(
135
+ 1.f - 2.f * (y * y + z * z), 2.f * (x * y - r * z), 2.f * (x * z + r * y),
136
+ 2.f * (x * y + r * z), 1.f - 2.f * (x * x + z * z), 2.f * (y * z - r * x),
137
+ 2.f * (x * z - r * y), 2.f * (y * z + r * x), 1.f - 2.f * (x * x + y * y)
138
+ );
139
+
140
+ glm::mat3 M = S * R;
141
+
142
+ // Compute 3D world covariance matrix Sigma
143
+ glm::mat3 Sigma = glm::transpose(M) * M;
144
+
145
+ // Covariance is symmetric, only store upper right
146
+ cov3D[0] = Sigma[0][0];
147
+ cov3D[1] = Sigma[0][1];
148
+ cov3D[2] = Sigma[0][2];
149
+ cov3D[3] = Sigma[1][1];
150
+ cov3D[4] = Sigma[1][2];
151
+ cov3D[5] = Sigma[2][2];
152
+ }
153
+
154
+ // Perform initial steps for each Gaussian prior to rasterization.
155
+ template<int C>
156
+ __global__ void preprocessCUDA(int P, int D, int M,
157
+ const float* orig_points,
158
+ const glm::vec3* scales,
159
+ const float scale_modifier,
160
+ const glm::vec4* rotations,
161
+ const float* opacities,
162
+ const float* shs,
163
+ bool* clamped,
164
+ const float* cov3D_precomp,
165
+ const float* colors_precomp,
166
+ const float* viewmatrix,
167
+ const float* projmatrix,
168
+ const glm::vec3* cam_pos,
169
+ const int W, int H,
170
+ const float tan_fovx, float tan_fovy,
171
+ const float focal_x, float focal_y,
172
+ int* radii,
173
+ float2* points_xy_image,
174
+ float* depths,
175
+ float* cov3Ds,
176
+ float* rgb,
177
+ float4* conic_opacity,
178
+ const dim3 grid,
179
+ uint32_t* tiles_touched,
180
+ bool prefiltered)
181
+ {
182
+ auto idx = cg::this_grid().thread_rank();
183
+ if (idx >= P)
184
+ return;
185
+
186
+ // Initialize radius and touched tiles to 0. If this isn't changed,
187
+ // this Gaussian will not be processed further.
188
+ radii[idx] = 0;
189
+ tiles_touched[idx] = 0;
190
+
191
+ // Perform near culling, quit if outside.
192
+ float3 p_view;
193
+ if (!in_frustum(idx, orig_points, viewmatrix, projmatrix, prefiltered, p_view))
194
+ return;
195
+
196
+ // Transform point by projecting
197
+ float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
198
+ float4 p_hom = transformPoint4x4(p_orig, projmatrix);
199
+ float p_w = 1.0f / (p_hom.w + 0.0000001f);
200
+ float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
201
+
202
+ // If 3D covariance matrix is precomputed, use it, otherwise compute
203
+ // from scaling and rotation parameters.
204
+ const float* cov3D;
205
+ if (cov3D_precomp != nullptr)
206
+ {
207
+ cov3D = cov3D_precomp + idx * 6;
208
+ }
209
+ else
210
+ {
211
+ computeCov3D(scales[idx], scale_modifier, rotations[idx], cov3Ds + idx * 6);
212
+ cov3D = cov3Ds + idx * 6;
213
+ }
214
+
215
+ // Compute 2D screen-space covariance matrix
216
+ float3 cov = computeCov2D(p_orig, focal_x, focal_y, tan_fovx, tan_fovy, cov3D, viewmatrix);
217
+
218
+ // Invert covariance (EWA algorithm)
219
+ float det = (cov.x * cov.z - cov.y * cov.y);
220
+ if (det == 0.0f)
221
+ return;
222
+ float det_inv = 1.f / det;
223
+ float3 conic = { cov.z * det_inv, -cov.y * det_inv, cov.x * det_inv };
224
+
225
+ // Compute extent in screen space (by finding eigenvalues of
226
+ // 2D covariance matrix). Use extent to compute a bounding rectangle
227
+ // of screen-space tiles that this Gaussian overlaps with. Quit if
228
+ // rectangle covers 0 tiles.
229
+ float mid = 0.5f * (cov.x + cov.z);
230
+ float lambda1 = mid + sqrt(max(0.1f, mid * mid - det));
231
+ float lambda2 = mid - sqrt(max(0.1f, mid * mid - det));
232
+ float my_radius = ceil(3.f * sqrt(max(lambda1, lambda2)));
233
+ float2 point_image = { ndc2Pix(p_proj.x, W), ndc2Pix(p_proj.y, H) };
234
+ uint2 rect_min, rect_max;
235
+ getRect(point_image, my_radius, rect_min, rect_max, grid);
236
+ if ((rect_max.x - rect_min.x) * (rect_max.y - rect_min.y) == 0)
237
+ return;
238
+
239
+ // If colors have been precomputed, use them, otherwise convert
240
+ // spherical harmonics coefficients to RGB color.
241
+ if (colors_precomp == nullptr)
242
+ {
243
+ glm::vec3 result = computeColorFromSH(idx, D, M, (glm::vec3*)orig_points, *cam_pos, shs, clamped);
244
+ rgb[idx * C + 0] = result.x;
245
+ rgb[idx * C + 1] = result.y;
246
+ rgb[idx * C + 2] = result.z;
247
+ }
248
+
249
+ // Store some useful helper data for the next steps.
250
+ depths[idx] = p_view.z;
251
+ radii[idx] = my_radius;
252
+ points_xy_image[idx] = point_image;
253
+ // Inverse 2D covariance and opacity neatly pack into one float4
254
+ conic_opacity[idx] = { conic.x, conic.y, conic.z, opacities[idx] };
255
+ tiles_touched[idx] = (rect_max.y - rect_min.y) * (rect_max.x - rect_min.x);
256
+ }
257
+
258
+ // Main rasterization method. Collaboratively works on one tile per
259
+ // block, each thread treats one pixel. Alternates between fetching
260
+ // and rasterizing data.
261
+ template <uint32_t CHANNELS>
262
+ __global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
263
+ renderCUDA(
264
+ const uint2* __restrict__ ranges,
265
+ const uint32_t* __restrict__ point_list,
266
+ int W, int H,
267
+ const float2* __restrict__ points_xy_image,
268
+ const float* __restrict__ features,
269
+ const float* __restrict__ depths,
270
+ const float4* __restrict__ conic_opacity,
271
+ float* __restrict__ out_alpha,
272
+ uint32_t* __restrict__ n_contrib,
273
+ const float* __restrict__ bg_color,
274
+ float* __restrict__ out_color,
275
+ float* __restrict__ out_depth)
276
+ {
277
+ // Identify current tile and associated min/max pixel range.
278
+ auto block = cg::this_thread_block();
279
+ uint32_t horizontal_blocks = (W + BLOCK_X - 1) / BLOCK_X;
280
+ uint2 pix_min = { block.group_index().x * BLOCK_X, block.group_index().y * BLOCK_Y };
281
+ uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) };
282
+ uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y };
283
+ uint32_t pix_id = W * pix.y + pix.x;
284
+ float2 pixf = { (float)pix.x, (float)pix.y };
285
+
286
+ // Check if this thread is associated with a valid pixel or outside.
287
+ bool inside = pix.x < W&& pix.y < H;
288
+ // Done threads can help with fetching, but don't rasterize
289
+ bool done = !inside;
290
+
291
+ // Load start/end range of IDs to process in bit sorted list.
292
+ uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
293
+ const int rounds = ((range.y - range.x + BLOCK_SIZE - 1) / BLOCK_SIZE);
294
+ int toDo = range.y - range.x;
295
+
296
+ // Allocate storage for batches of collectively fetched data.
297
+ __shared__ int collected_id[BLOCK_SIZE];
298
+ __shared__ float2 collected_xy[BLOCK_SIZE];
299
+ __shared__ float4 collected_conic_opacity[BLOCK_SIZE];
300
+
301
+ // Initialize helper variables
302
+ float T = 1.0f;
303
+ uint32_t contributor = 0;
304
+ uint32_t last_contributor = 0;
305
+ float C[CHANNELS] = { 0 };
306
+ float weight = 0;
307
+ float D = 0;
308
+
309
+ // Iterate over batches until all done or range is complete
310
+ for (int i = 0; i < rounds; i++, toDo -= BLOCK_SIZE)
311
+ {
312
+ // End if entire block votes that it is done rasterizing
313
+ int num_done = __syncthreads_count(done);
314
+ if (num_done == BLOCK_SIZE)
315
+ break;
316
+
317
+ // Collectively fetch per-Gaussian data from global to shared
318
+ int progress = i * BLOCK_SIZE + block.thread_rank();
319
+ if (range.x + progress < range.y)
320
+ {
321
+ int coll_id = point_list[range.x + progress];
322
+ collected_id[block.thread_rank()] = coll_id;
323
+ collected_xy[block.thread_rank()] = points_xy_image[coll_id];
324
+ collected_conic_opacity[block.thread_rank()] = conic_opacity[coll_id];
325
+ }
326
+ block.sync();
327
+
328
+ // Iterate over current batch
329
+ for (int j = 0; !done && j < min(BLOCK_SIZE, toDo); j++)
330
+ {
331
+ // Keep track of current position in range
332
+ contributor++;
333
+
334
+ // Resample using conic matrix (cf. "Surface
335
+ // Splatting" by Zwicker et al., 2001)
336
+ float2 xy = collected_xy[j];
337
+ float2 d = { xy.x - pixf.x, xy.y - pixf.y };
338
+ float4 con_o = collected_conic_opacity[j];
339
+ float power = -0.5f * (con_o.x * d.x * d.x + con_o.z * d.y * d.y) - con_o.y * d.x * d.y;
340
+ if (power > 0.0f)
341
+ continue;
342
+
343
+ // Eq. (2) from 3D Gaussian splatting paper.
344
+ // Obtain alpha by multiplying with Gaussian opacity
345
+ // and its exponential falloff from mean.
346
+ // Avoid numerical instabilities (see paper appendix).
347
+ float alpha = min(0.99f, con_o.w * exp(power));
348
+ if (alpha < 1.0f / 255.0f)
349
+ continue;
350
+ float test_T = T * (1 - alpha);
351
+ if (test_T < 0.0001f)
352
+ {
353
+ done = true;
354
+ continue;
355
+ }
356
+
357
+ // Eq. (3) from 3D Gaussian splatting paper.
358
+ for (int ch = 0; ch < CHANNELS; ch++)
359
+ C[ch] += features[collected_id[j] * CHANNELS + ch] * alpha * T;
360
+ weight += alpha * T;
361
+ D += depths[collected_id[j]] * alpha * T;
362
+
363
+ T = test_T;
364
+
365
+ // Keep track of last range entry to update this
366
+ // pixel.
367
+ last_contributor = contributor;
368
+ }
369
+ }
370
+
371
+ // All threads that treat valid pixel write out their final
372
+ // rendering data to the frame and auxiliary buffers.
373
+ if (inside)
374
+ {
375
+ n_contrib[pix_id] = last_contributor;
376
+ for (int ch = 0; ch < CHANNELS; ch++)
377
+ out_color[ch * H * W + pix_id] = C[ch] + T * bg_color[ch];
378
+ out_alpha[pix_id] = weight; //1 - T;
379
+ out_depth[pix_id] = D;
380
+ }
381
+ }
382
+
383
+ void FORWARD::render(
384
+ const dim3 grid, dim3 block,
385
+ const uint2* ranges,
386
+ const uint32_t* point_list,
387
+ int W, int H,
388
+ const float2* means2D,
389
+ const float* colors,
390
+ const float* depths,
391
+ const float4* conic_opacity,
392
+ float* out_alpha,
393
+ uint32_t* n_contrib,
394
+ const float* bg_color,
395
+ float* out_color,
396
+ float* out_depth)
397
+ {
398
+ renderCUDA<NUM_CHANNELS> << <grid, block >> > (
399
+ ranges,
400
+ point_list,
401
+ W, H,
402
+ means2D,
403
+ colors,
404
+ depths,
405
+ conic_opacity,
406
+ out_alpha,
407
+ n_contrib,
408
+ bg_color,
409
+ out_color,
410
+ out_depth);
411
+ }
412
+
413
+ void FORWARD::preprocess(int P, int D, int M,
414
+ const float* means3D,
415
+ const glm::vec3* scales,
416
+ const float scale_modifier,
417
+ const glm::vec4* rotations,
418
+ const float* opacities,
419
+ const float* shs,
420
+ bool* clamped,
421
+ const float* cov3D_precomp,
422
+ const float* colors_precomp,
423
+ const float* viewmatrix,
424
+ const float* projmatrix,
425
+ const glm::vec3* cam_pos,
426
+ const int W, int H,
427
+ const float focal_x, float focal_y,
428
+ const float tan_fovx, float tan_fovy,
429
+ int* radii,
430
+ float2* means2D,
431
+ float* depths,
432
+ float* cov3Ds,
433
+ float* rgb,
434
+ float4* conic_opacity,
435
+ const dim3 grid,
436
+ uint32_t* tiles_touched,
437
+ bool prefiltered)
438
+ {
439
+ preprocessCUDA<NUM_CHANNELS> << <(P + 255) / 256, 256 >> > (
440
+ P, D, M,
441
+ means3D,
442
+ scales,
443
+ scale_modifier,
444
+ rotations,
445
+ opacities,
446
+ shs,
447
+ clamped,
448
+ cov3D_precomp,
449
+ colors_precomp,
450
+ viewmatrix,
451
+ projmatrix,
452
+ cam_pos,
453
+ W, H,
454
+ tan_fovx, tan_fovy,
455
+ focal_x, focal_y,
456
+ radii,
457
+ means2D,
458
+ depths,
459
+ cov3Ds,
460
+ rgb,
461
+ conic_opacity,
462
+ grid,
463
+ tiles_touched,
464
+ prefiltered
465
+ );
466
+ }
diff-gaussian-rasterization/cuda_rasterizer/forward.h ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED
13
+ #define CUDA_RASTERIZER_FORWARD_H_INCLUDED
14
+
15
+ #include <cuda.h>
16
+ #include "cuda_runtime.h"
17
+ #include "device_launch_parameters.h"
18
+ #define GLM_FORCE_CUDA
19
+ #include <glm/glm.hpp>
20
+
21
+ namespace FORWARD
22
+ {
23
+ // Perform initial steps for each Gaussian prior to rasterization.
24
+ void preprocess(int P, int D, int M,
25
+ const float* orig_points,
26
+ const glm::vec3* scales,
27
+ const float scale_modifier,
28
+ const glm::vec4* rotations,
29
+ const float* opacities,
30
+ const float* shs,
31
+ bool* clamped,
32
+ const float* cov3D_precomp,
33
+ const float* colors_precomp,
34
+ const float* viewmatrix,
35
+ const float* projmatrix,
36
+ const glm::vec3* cam_pos,
37
+ const int W, int H,
38
+ const float focal_x, float focal_y,
39
+ const float tan_fovx, float tan_fovy,
40
+ int* radii,
41
+ float2* points_xy_image,
42
+ float* depths,
43
+ float* cov3Ds,
44
+ float* colors,
45
+ float4* conic_opacity,
46
+ const dim3 grid,
47
+ uint32_t* tiles_touched,
48
+ bool prefiltered);
49
+
50
+ // Main rasterization method.
51
+ void render(
52
+ const dim3 grid, dim3 block,
53
+ const uint2* ranges,
54
+ const uint32_t* point_list,
55
+ int W, int H,
56
+ const float2* points_xy_image,
57
+ const float* features,
58
+ const float* depths,
59
+ const float4* conic_opacity,
60
+ float* out_alpha,
61
+ uint32_t* n_contrib,
62
+ const float* bg_color,
63
+ float* out_color,
64
+ float* out_depth);
65
+ }
66
+
67
+
68
+ #endif
diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #ifndef CUDA_RASTERIZER_H_INCLUDED
13
+ #define CUDA_RASTERIZER_H_INCLUDED
14
+
15
+ #include <vector>
16
+ #include <functional>
17
+
18
+ namespace CudaRasterizer
19
+ {
20
+ class Rasterizer
21
+ {
22
+ public:
23
+
24
+ static void markVisible(
25
+ int P,
26
+ float* means3D,
27
+ float* viewmatrix,
28
+ float* projmatrix,
29
+ bool* present);
30
+
31
+ static int forward(
32
+ std::function<char* (size_t)> geometryBuffer,
33
+ std::function<char* (size_t)> binningBuffer,
34
+ std::function<char* (size_t)> imageBuffer,
35
+ const int P, int D, int M,
36
+ const float* background,
37
+ const int width, int height,
38
+ const float* means3D,
39
+ const float* shs,
40
+ const float* colors_precomp,
41
+ const float* opacities,
42
+ const float* scales,
43
+ const float scale_modifier,
44
+ const float* rotations,
45
+ const float* cov3D_precomp,
46
+ const float* viewmatrix,
47
+ const float* projmatrix,
48
+ const float* cam_pos,
49
+ const float tan_fovx, float tan_fovy,
50
+ const bool prefiltered,
51
+ float* out_color,
52
+ float* out_depth,
53
+ float* out_alpha,
54
+ int* radii = nullptr,
55
+ bool debug = false);
56
+
57
+ static void backward(
58
+ const int P, int D, int M, int R,
59
+ const float* background,
60
+ const int width, int height,
61
+ const float* means3D,
62
+ const float* shs,
63
+ const float* colors_precomp,
64
+ const float* alphas,
65
+ const float* scales,
66
+ const float scale_modifier,
67
+ const float* rotations,
68
+ const float* cov3D_precomp,
69
+ const float* viewmatrix,
70
+ const float* projmatrix,
71
+ const float* campos,
72
+ const float tan_fovx, float tan_fovy,
73
+ const int* radii,
74
+ char* geom_buffer,
75
+ char* binning_buffer,
76
+ char* image_buffer,
77
+ const float* dL_dpix,
78
+ const float* dL_dpix_depth,
79
+ const float* dL_dalphas,
80
+ float* dL_dmean2D,
81
+ float* dL_dconic,
82
+ float* dL_dopacity,
83
+ float* dL_dcolor,
84
+ float* dL_ddepth,
85
+ float* dL_dmean3D,
86
+ float* dL_dcov3D,
87
+ float* dL_dsh,
88
+ float* dL_dscale,
89
+ float* dL_drot,
90
+ bool debug);
91
+ };
92
+ };
93
+
94
+ #endif
diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include "rasterizer_impl.h"
13
+ #include <iostream>
14
+ #include <fstream>
15
+ #include <algorithm>
16
+ #include <numeric>
17
+ #include <cuda.h>
18
+ #include "cuda_runtime.h"
19
+ #include "device_launch_parameters.h"
20
+ #include <cub/cub.cuh>
21
+ #include <cub/device/device_radix_sort.cuh>
22
+ #define GLM_FORCE_CUDA
23
+ #include <glm/glm.hpp>
24
+
25
+ #include <cooperative_groups.h>
26
+ #include <cooperative_groups/reduce.h>
27
+ namespace cg = cooperative_groups;
28
+
29
+ #include "auxiliary.h"
30
+ #include "forward.h"
31
+ #include "backward.h"
32
+
33
+ // Helper function to find the next-highest bit of the MSB
34
+ // on the CPU.
35
+ uint32_t getHigherMsb(uint32_t n)
36
+ {
37
+ uint32_t msb = sizeof(n) * 4;
38
+ uint32_t step = msb;
39
+ while (step > 1)
40
+ {
41
+ step /= 2;
42
+ if (n >> msb)
43
+ msb += step;
44
+ else
45
+ msb -= step;
46
+ }
47
+ if (n >> msb)
48
+ msb++;
49
+ return msb;
50
+ }
51
+
52
+ // Wrapper method to call auxiliary coarse frustum containment test.
53
+ // Mark all Gaussians that pass it.
54
+ __global__ void checkFrustum(int P,
55
+ const float* orig_points,
56
+ const float* viewmatrix,
57
+ const float* projmatrix,
58
+ bool* present)
59
+ {
60
+ auto idx = cg::this_grid().thread_rank();
61
+ if (idx >= P)
62
+ return;
63
+
64
+ float3 p_view;
65
+ present[idx] = in_frustum(idx, orig_points, viewmatrix, projmatrix, false, p_view);
66
+ }
67
+
68
+ // Generates one key/value pair for all Gaussian / tile overlaps.
69
+ // Run once per Gaussian (1:N mapping).
70
+ __global__ void duplicateWithKeys(
71
+ int P,
72
+ const float2* points_xy,
73
+ const float* depths,
74
+ const uint32_t* offsets,
75
+ uint64_t* gaussian_keys_unsorted,
76
+ uint32_t* gaussian_values_unsorted,
77
+ int* radii,
78
+ dim3 grid)
79
+ {
80
+ auto idx = cg::this_grid().thread_rank();
81
+ if (idx >= P)
82
+ return;
83
+
84
+ // Generate no key/value pair for invisible Gaussians
85
+ if (radii[idx] > 0)
86
+ {
87
+ // Find this Gaussian's offset in buffer for writing keys/values.
88
+ uint32_t off = (idx == 0) ? 0 : offsets[idx - 1];
89
+ uint2 rect_min, rect_max;
90
+
91
+ getRect(points_xy[idx], radii[idx], rect_min, rect_max, grid);
92
+
93
+ // For each tile that the bounding rect overlaps, emit a
94
+ // key/value pair. The key is | tile ID | depth |,
95
+ // and the value is the ID of the Gaussian. Sorting the values
96
+ // with this key yields Gaussian IDs in a list, such that they
97
+ // are first sorted by tile and then by depth.
98
+ for (int y = rect_min.y; y < rect_max.y; y++)
99
+ {
100
+ for (int x = rect_min.x; x < rect_max.x; x++)
101
+ {
102
+ uint64_t key = y * grid.x + x;
103
+ key <<= 32;
104
+ key |= *((uint32_t*)&depths[idx]);
105
+ gaussian_keys_unsorted[off] = key;
106
+ gaussian_values_unsorted[off] = idx;
107
+ off++;
108
+ }
109
+ }
110
+ }
111
+ }
112
+
113
+ // Check keys to see if it is at the start/end of one tile's range in
114
+ // the full sorted list. If yes, write start/end of this tile.
115
+ // Run once per instanced (duplicated) Gaussian ID.
116
+ __global__ void identifyTileRanges(int L, uint64_t* point_list_keys, uint2* ranges)
117
+ {
118
+ auto idx = cg::this_grid().thread_rank();
119
+ if (idx >= L)
120
+ return;
121
+
122
+ // Read tile ID from key. Update start/end of tile range if at limit.
123
+ uint64_t key = point_list_keys[idx];
124
+ uint32_t currtile = key >> 32;
125
+ if (idx == 0)
126
+ ranges[currtile].x = 0;
127
+ else
128
+ {
129
+ uint32_t prevtile = point_list_keys[idx - 1] >> 32;
130
+ if (currtile != prevtile)
131
+ {
132
+ ranges[prevtile].y = idx;
133
+ ranges[currtile].x = idx;
134
+ }
135
+ }
136
+ if (idx == L - 1)
137
+ ranges[currtile].y = L;
138
+ }
139
+
140
+ // Mark Gaussians as visible/invisible, based on view frustum testing
141
+ void CudaRasterizer::Rasterizer::markVisible(
142
+ int P,
143
+ float* means3D,
144
+ float* viewmatrix,
145
+ float* projmatrix,
146
+ bool* present)
147
+ {
148
+ checkFrustum << <(P + 255) / 256, 256 >> > (
149
+ P,
150
+ means3D,
151
+ viewmatrix, projmatrix,
152
+ present);
153
+ }
154
+
155
+ CudaRasterizer::GeometryState CudaRasterizer::GeometryState::fromChunk(char*& chunk, size_t P)
156
+ {
157
+ GeometryState geom;
158
+ obtain(chunk, geom.depths, P, 128);
159
+ obtain(chunk, geom.clamped, P * 3, 128);
160
+ obtain(chunk, geom.internal_radii, P, 128);
161
+ obtain(chunk, geom.means2D, P, 128);
162
+ obtain(chunk, geom.cov3D, P * 6, 128);
163
+ obtain(chunk, geom.conic_opacity, P, 128);
164
+ obtain(chunk, geom.rgb, P * 3, 128);
165
+ obtain(chunk, geom.tiles_touched, P, 128);
166
+ cub::DeviceScan::InclusiveSum(nullptr, geom.scan_size, geom.tiles_touched, geom.tiles_touched, P);
167
+ obtain(chunk, geom.scanning_space, geom.scan_size, 128);
168
+ obtain(chunk, geom.point_offsets, P, 128);
169
+ return geom;
170
+ }
171
+
172
+ CudaRasterizer::ImageState CudaRasterizer::ImageState::fromChunk(char*& chunk, size_t N)
173
+ {
174
+ ImageState img;
175
+ obtain(chunk, img.n_contrib, N, 128);
176
+ obtain(chunk, img.ranges, N, 128);
177
+ return img;
178
+ }
179
+
180
+ CudaRasterizer::BinningState CudaRasterizer::BinningState::fromChunk(char*& chunk, size_t P)
181
+ {
182
+ BinningState binning;
183
+ obtain(chunk, binning.point_list, P, 128);
184
+ obtain(chunk, binning.point_list_unsorted, P, 128);
185
+ obtain(chunk, binning.point_list_keys, P, 128);
186
+ obtain(chunk, binning.point_list_keys_unsorted, P, 128);
187
+ cub::DeviceRadixSort::SortPairs(
188
+ nullptr, binning.sorting_size,
189
+ binning.point_list_keys_unsorted, binning.point_list_keys,
190
+ binning.point_list_unsorted, binning.point_list, P);
191
+ obtain(chunk, binning.list_sorting_space, binning.sorting_size, 128);
192
+ return binning;
193
+ }
194
+
195
+ // Forward rendering procedure for differentiable rasterization
196
+ // of Gaussians.
197
+ int CudaRasterizer::Rasterizer::forward(
198
+ std::function<char* (size_t)> geometryBuffer,
199
+ std::function<char* (size_t)> binningBuffer,
200
+ std::function<char* (size_t)> imageBuffer,
201
+ const int P, int D, int M,
202
+ const float* background,
203
+ const int width, int height,
204
+ const float* means3D,
205
+ const float* shs,
206
+ const float* colors_precomp,
207
+ const float* opacities,
208
+ const float* scales,
209
+ const float scale_modifier,
210
+ const float* rotations,
211
+ const float* cov3D_precomp,
212
+ const float* viewmatrix,
213
+ const float* projmatrix,
214
+ const float* cam_pos,
215
+ const float tan_fovx, float tan_fovy,
216
+ const bool prefiltered,
217
+ float* out_color,
218
+ float* out_depth,
219
+ float* out_alpha,
220
+ int* radii,
221
+ bool debug)
222
+ {
223
+ const float focal_y = height / (2.0f * tan_fovy);
224
+ const float focal_x = width / (2.0f * tan_fovx);
225
+
226
+ size_t chunk_size = required<GeometryState>(P);
227
+ char* chunkptr = geometryBuffer(chunk_size);
228
+ GeometryState geomState = GeometryState::fromChunk(chunkptr, P);
229
+
230
+ if (radii == nullptr)
231
+ {
232
+ radii = geomState.internal_radii;
233
+ }
234
+
235
+ dim3 tile_grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1);
236
+ dim3 block(BLOCK_X, BLOCK_Y, 1);
237
+
238
+ // Dynamically resize image-based auxiliary buffers during training
239
+ size_t img_chunk_size = required<ImageState>(width * height);
240
+ char* img_chunkptr = imageBuffer(img_chunk_size);
241
+ ImageState imgState = ImageState::fromChunk(img_chunkptr, width * height);
242
+
243
+ if (NUM_CHANNELS != 3 && colors_precomp == nullptr)
244
+ {
245
+ throw std::runtime_error("For non-RGB, provide precomputed Gaussian colors!");
246
+ }
247
+
248
+ // Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB)
249
+ CHECK_CUDA(FORWARD::preprocess(
250
+ P, D, M,
251
+ means3D,
252
+ (glm::vec3*)scales,
253
+ scale_modifier,
254
+ (glm::vec4*)rotations,
255
+ opacities,
256
+ shs,
257
+ geomState.clamped,
258
+ cov3D_precomp,
259
+ colors_precomp,
260
+ viewmatrix, projmatrix,
261
+ (glm::vec3*)cam_pos,
262
+ width, height,
263
+ focal_x, focal_y,
264
+ tan_fovx, tan_fovy,
265
+ radii,
266
+ geomState.means2D,
267
+ geomState.depths,
268
+ geomState.cov3D,
269
+ geomState.rgb,
270
+ geomState.conic_opacity,
271
+ tile_grid,
272
+ geomState.tiles_touched,
273
+ prefiltered
274
+ ), debug)
275
+
276
+ // Compute prefix sum over full list of touched tile counts by Gaussians
277
+ // E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8]
278
+ CHECK_CUDA(cub::DeviceScan::InclusiveSum(geomState.scanning_space, geomState.scan_size, geomState.tiles_touched, geomState.point_offsets, P), debug)
279
+
280
+ // Retrieve total number of Gaussian instances to launch and resize aux buffers
281
+ int num_rendered;
282
+ CHECK_CUDA(cudaMemcpy(&num_rendered, geomState.point_offsets + P - 1, sizeof(int), cudaMemcpyDeviceToHost), debug);
283
+
284
+ size_t binning_chunk_size = required<BinningState>(num_rendered);
285
+ char* binning_chunkptr = binningBuffer(binning_chunk_size);
286
+ BinningState binningState = BinningState::fromChunk(binning_chunkptr, num_rendered);
287
+
288
+ // For each instance to be rendered, produce adequate [ tile | depth ] key
289
+ // and corresponding dublicated Gaussian indices to be sorted
290
+ duplicateWithKeys << <(P + 255) / 256, 256 >> > (
291
+ P,
292
+ geomState.means2D,
293
+ geomState.depths,
294
+ geomState.point_offsets,
295
+ binningState.point_list_keys_unsorted,
296
+ binningState.point_list_unsorted,
297
+ radii,
298
+ tile_grid)
299
+ CHECK_CUDA(, debug)
300
+
301
+ int bit = getHigherMsb(tile_grid.x * tile_grid.y);
302
+
303
+ // Sort complete list of (duplicated) Gaussian indices by keys
304
+ CHECK_CUDA(cub::DeviceRadixSort::SortPairs(
305
+ binningState.list_sorting_space,
306
+ binningState.sorting_size,
307
+ binningState.point_list_keys_unsorted, binningState.point_list_keys,
308
+ binningState.point_list_unsorted, binningState.point_list,
309
+ num_rendered, 0, 32 + bit), debug)
310
+
311
+ CHECK_CUDA(cudaMemset(imgState.ranges, 0, tile_grid.x * tile_grid.y * sizeof(uint2)), debug);
312
+
313
+ // Identify start and end of per-tile workloads in sorted list
314
+ if (num_rendered > 0)
315
+ identifyTileRanges << <(num_rendered + 255) / 256, 256 >> > (
316
+ num_rendered,
317
+ binningState.point_list_keys,
318
+ imgState.ranges);
319
+ CHECK_CUDA(, debug);
320
+
321
+ // Let each tile blend its range of Gaussians independently in parallel
322
+ const float* feature_ptr = colors_precomp != nullptr ? colors_precomp : geomState.rgb;
323
+ CHECK_CUDA(FORWARD::render(
324
+ tile_grid, block,
325
+ imgState.ranges,
326
+ binningState.point_list,
327
+ width, height,
328
+ geomState.means2D,
329
+ feature_ptr,
330
+ geomState.depths,
331
+ geomState.conic_opacity,
332
+ out_alpha,
333
+ imgState.n_contrib,
334
+ background,
335
+ out_color,
336
+ out_depth), debug);
337
+
338
+ return num_rendered;
339
+ }
340
+
341
+ // Produce necessary gradients for optimization, corresponding
342
+ // to forward render pass
343
+ void CudaRasterizer::Rasterizer::backward(
344
+ const int P, int D, int M, int R,
345
+ const float* background,
346
+ const int width, int height,
347
+ const float* means3D,
348
+ const float* shs,
349
+ const float* colors_precomp,
350
+ const float* alphas,
351
+ const float* scales,
352
+ const float scale_modifier,
353
+ const float* rotations,
354
+ const float* cov3D_precomp,
355
+ const float* viewmatrix,
356
+ const float* projmatrix,
357
+ const float* campos,
358
+ const float tan_fovx, float tan_fovy,
359
+ const int* radii,
360
+ char* geom_buffer,
361
+ char* binning_buffer,
362
+ char* img_buffer,
363
+ const float* dL_dpix,
364
+ const float* dL_dpix_depth,
365
+ const float* dL_dalphas,
366
+ float* dL_dmean2D,
367
+ float* dL_dconic,
368
+ float* dL_dopacity,
369
+ float* dL_dcolor,
370
+ float* dL_ddepth,
371
+ float* dL_dmean3D,
372
+ float* dL_dcov3D,
373
+ float* dL_dsh,
374
+ float* dL_dscale,
375
+ float* dL_drot,
376
+ bool debug)
377
+ {
378
+ GeometryState geomState = GeometryState::fromChunk(geom_buffer, P);
379
+ BinningState binningState = BinningState::fromChunk(binning_buffer, R);
380
+ ImageState imgState = ImageState::fromChunk(img_buffer, width * height);
381
+
382
+ if (radii == nullptr)
383
+ {
384
+ radii = geomState.internal_radii;
385
+ }
386
+
387
+ const float focal_y = height / (2.0f * tan_fovy);
388
+ const float focal_x = width / (2.0f * tan_fovx);
389
+
390
+ const dim3 tile_grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1);
391
+ const dim3 block(BLOCK_X, BLOCK_Y, 1);
392
+
393
+ // Compute loss gradients w.r.t. 2D mean position, conic matrix,
394
+ // opacity and RGB of Gaussians from per-pixel loss gradients.
395
+ // If we were given precomputed colors and not SHs, use them.
396
+ const float* color_ptr = (colors_precomp != nullptr) ? colors_precomp : geomState.rgb;
397
+ const float* depth_ptr = geomState.depths;
398
+ CHECK_CUDA(BACKWARD::render(
399
+ tile_grid,
400
+ block,
401
+ imgState.ranges,
402
+ binningState.point_list,
403
+ width, height,
404
+ background,
405
+ geomState.means2D,
406
+ geomState.conic_opacity,
407
+ color_ptr,
408
+ depth_ptr,
409
+ alphas,
410
+ imgState.n_contrib,
411
+ dL_dpix,
412
+ dL_dpix_depth,
413
+ dL_dalphas,
414
+ (float3*)dL_dmean2D,
415
+ (float4*)dL_dconic,
416
+ dL_dopacity,
417
+ dL_dcolor,
418
+ dL_ddepth), debug)
419
+
420
+ // Take care of the rest of preprocessing. Was the precomputed covariance
421
+ // given to us or a scales/rot pair? If precomputed, pass that. If not,
422
+ // use the one we computed ourselves.
423
+ const float* cov3D_ptr = (cov3D_precomp != nullptr) ? cov3D_precomp : geomState.cov3D;
424
+ CHECK_CUDA(BACKWARD::preprocess(P, D, M,
425
+ (float3*)means3D,
426
+ radii,
427
+ shs,
428
+ geomState.clamped,
429
+ (glm::vec3*)scales,
430
+ (glm::vec4*)rotations,
431
+ scale_modifier,
432
+ cov3D_ptr,
433
+ viewmatrix,
434
+ projmatrix,
435
+ focal_x, focal_y,
436
+ tan_fovx, tan_fovy,
437
+ (glm::vec3*)campos,
438
+ (float3*)dL_dmean2D,
439
+ dL_dconic,
440
+ (glm::vec3*)dL_dmean3D,
441
+ dL_dcolor,
442
+ dL_ddepth,
443
+ dL_dcov3D,
444
+ dL_dsh,
445
+ (glm::vec3*)dL_dscale,
446
+ (glm::vec4*)dL_drot), debug)
447
+ }
diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.h ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #pragma once
13
+
14
+ #include <iostream>
15
+ #include <vector>
16
+ #include "rasterizer.h"
17
+ #include <cuda_runtime_api.h>
18
+
19
+ namespace CudaRasterizer
20
+ {
21
+ template <typename T>
22
+ static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
23
+ {
24
+ std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
25
+ ptr = reinterpret_cast<T*>(offset);
26
+ chunk = reinterpret_cast<char*>(ptr + count);
27
+ }
28
+
29
+ struct GeometryState
30
+ {
31
+ size_t scan_size;
32
+ float* depths;
33
+ char* scanning_space;
34
+ bool* clamped;
35
+ int* internal_radii;
36
+ float2* means2D;
37
+ float* cov3D;
38
+ float4* conic_opacity;
39
+ float* rgb;
40
+ uint32_t* point_offsets;
41
+ uint32_t* tiles_touched;
42
+
43
+ static GeometryState fromChunk(char*& chunk, size_t P);
44
+ };
45
+
46
+ struct ImageState
47
+ {
48
+ uint2* ranges;
49
+ uint32_t* n_contrib;
50
+
51
+ static ImageState fromChunk(char*& chunk, size_t N);
52
+ };
53
+
54
+ struct BinningState
55
+ {
56
+ size_t sorting_size;
57
+ uint64_t* point_list_keys_unsorted;
58
+ uint64_t* point_list_keys;
59
+ uint32_t* point_list_unsorted;
60
+ uint32_t* point_list;
61
+ char* list_sorting_space;
62
+
63
+ static BinningState fromChunk(char*& chunk, size_t P);
64
+ };
65
+
66
+ template<typename T>
67
+ size_t required(size_t P)
68
+ {
69
+ char* size = nullptr;
70
+ T::fromChunk(size, P);
71
+ return ((size_t)size) + 128;
72
+ }
73
+ };
diff-gaussian-rasterization/diff_gaussian_rasterization/__init__.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2023, Inria
3
+ # GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ # All rights reserved.
5
+ #
6
+ # This software is free for non-commercial, research and evaluation use
7
+ # under the terms of the LICENSE.md file.
8
+ #
9
+ # For inquiries contact george.drettakis@inria.fr
10
+ #
11
+
12
+ from typing import NamedTuple
13
+ import torch.nn as nn
14
+ import torch
15
+ from . import _C
16
+
17
+ def cpu_deep_copy_tuple(input_tuple):
18
+ copied_tensors = [item.cpu().clone() if isinstance(item, torch.Tensor) else item for item in input_tuple]
19
+ return tuple(copied_tensors)
20
+
21
+ def rasterize_gaussians(
22
+ means3D,
23
+ means2D,
24
+ sh,
25
+ colors_precomp,
26
+ opacities,
27
+ scales,
28
+ rotations,
29
+ cov3Ds_precomp,
30
+ raster_settings,
31
+ ):
32
+ return _RasterizeGaussians.apply(
33
+ means3D,
34
+ means2D,
35
+ sh,
36
+ colors_precomp,
37
+ opacities,
38
+ scales,
39
+ rotations,
40
+ cov3Ds_precomp,
41
+ raster_settings,
42
+ )
43
+
44
+ class _RasterizeGaussians(torch.autograd.Function):
45
+ @staticmethod
46
+ def forward(
47
+ ctx,
48
+ means3D,
49
+ means2D,
50
+ sh,
51
+ colors_precomp,
52
+ opacities,
53
+ scales,
54
+ rotations,
55
+ cov3Ds_precomp,
56
+ raster_settings,
57
+ ):
58
+
59
+ # Restructure arguments the way that the C++ lib expects them
60
+ args = (
61
+ raster_settings.bg,
62
+ means3D,
63
+ colors_precomp,
64
+ opacities,
65
+ scales,
66
+ rotations,
67
+ raster_settings.scale_modifier,
68
+ cov3Ds_precomp,
69
+ raster_settings.viewmatrix,
70
+ raster_settings.projmatrix,
71
+ raster_settings.tanfovx,
72
+ raster_settings.tanfovy,
73
+ raster_settings.image_height,
74
+ raster_settings.image_width,
75
+ sh,
76
+ raster_settings.sh_degree,
77
+ raster_settings.campos,
78
+ raster_settings.prefiltered,
79
+ raster_settings.debug
80
+ )
81
+
82
+ # Invoke C++/CUDA rasterizer
83
+ if raster_settings.debug:
84
+ cpu_args = cpu_deep_copy_tuple(args) # Copy them before they can be corrupted
85
+ try:
86
+ num_rendered, color, depth, alpha, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
87
+ except Exception as ex:
88
+ torch.save(cpu_args, "snapshot_fw.dump")
89
+ print("\nAn error occured in forward. Please forward snapshot_fw.dump for debugging.")
90
+ raise ex
91
+ else:
92
+ num_rendered, color, depth, alpha, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
93
+
94
+ # Keep relevant tensors for backward
95
+ ctx.raster_settings = raster_settings
96
+ ctx.num_rendered = num_rendered
97
+ ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer, alpha)
98
+ return color, radii, depth, alpha
99
+
100
+ @staticmethod
101
+ def backward(ctx, grad_color, grad_radii, grad_depth, grad_alpha):
102
+
103
+ # Restore necessary values from context
104
+ num_rendered = ctx.num_rendered
105
+ raster_settings = ctx.raster_settings
106
+ colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer, alpha = ctx.saved_tensors
107
+
108
+ # Restructure args as C++ method expects them
109
+ args = (raster_settings.bg,
110
+ means3D,
111
+ radii,
112
+ colors_precomp,
113
+ scales,
114
+ rotations,
115
+ raster_settings.scale_modifier,
116
+ cov3Ds_precomp,
117
+ raster_settings.viewmatrix,
118
+ raster_settings.projmatrix,
119
+ raster_settings.tanfovx,
120
+ raster_settings.tanfovy,
121
+ grad_color,
122
+ grad_depth,
123
+ grad_alpha,
124
+ sh,
125
+ raster_settings.sh_degree,
126
+ raster_settings.campos,
127
+ geomBuffer,
128
+ num_rendered,
129
+ binningBuffer,
130
+ imgBuffer,
131
+ alpha,
132
+ raster_settings.debug)
133
+
134
+ # Compute gradients for relevant tensors by invoking backward method
135
+ if raster_settings.debug:
136
+ cpu_args = cpu_deep_copy_tuple(args) # Copy them before they can be corrupted
137
+ try:
138
+ grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
139
+ except Exception as ex:
140
+ torch.save(cpu_args, "snapshot_bw.dump")
141
+ print("\nAn error occured in backward. Writing snapshot_bw.dump for debugging.\n")
142
+ raise ex
143
+ else:
144
+ grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
145
+
146
+ grads = (
147
+ grad_means3D,
148
+ grad_means2D,
149
+ grad_sh,
150
+ grad_colors_precomp,
151
+ grad_opacities,
152
+ grad_scales,
153
+ grad_rotations,
154
+ grad_cov3Ds_precomp,
155
+ None,
156
+ )
157
+
158
+ return grads
159
+
160
+ class GaussianRasterizationSettings(NamedTuple):
161
+ image_height: int
162
+ image_width: int
163
+ tanfovx : float
164
+ tanfovy : float
165
+ bg : torch.Tensor
166
+ scale_modifier : float
167
+ viewmatrix : torch.Tensor
168
+ projmatrix : torch.Tensor
169
+ sh_degree : int
170
+ campos : torch.Tensor
171
+ prefiltered : bool
172
+ debug : bool
173
+
174
+ class GaussianRasterizer(nn.Module):
175
+ def __init__(self, raster_settings):
176
+ super().__init__()
177
+ self.raster_settings = raster_settings
178
+
179
+ def markVisible(self, positions):
180
+ # Mark visible points (based on frustum culling for camera) with a boolean
181
+ with torch.no_grad():
182
+ raster_settings = self.raster_settings
183
+ visible = _C.mark_visible(
184
+ positions,
185
+ raster_settings.viewmatrix,
186
+ raster_settings.projmatrix)
187
+
188
+ return visible
189
+
190
+ def forward(self, means3D, means2D, opacities, shs = None, colors_precomp = None, scales = None, rotations = None, cov3D_precomp = None):
191
+
192
+ raster_settings = self.raster_settings
193
+
194
+ if (shs is None and colors_precomp is None) or (shs is not None and colors_precomp is not None):
195
+ raise Exception('Please provide excatly one of either SHs or precomputed colors!')
196
+
197
+ if ((scales is None or rotations is None) and cov3D_precomp is None) or ((scales is not None or rotations is not None) and cov3D_precomp is not None):
198
+ raise Exception('Please provide exactly one of either scale/rotation pair or precomputed 3D covariance!')
199
+
200
+ if shs is None:
201
+ shs = torch.Tensor([])
202
+ if colors_precomp is None:
203
+ colors_precomp = torch.Tensor([])
204
+
205
+ if scales is None:
206
+ scales = torch.Tensor([])
207
+ if rotations is None:
208
+ rotations = torch.Tensor([])
209
+ if cov3D_precomp is None:
210
+ cov3D_precomp = torch.Tensor([])
211
+
212
+ # Invoke C++/CUDA rasterization routine
213
+ return rasterize_gaussians(
214
+ means3D,
215
+ means2D,
216
+ shs,
217
+ colors_precomp,
218
+ opacities,
219
+ scales,
220
+ rotations,
221
+ cov3D_precomp,
222
+ raster_settings,
223
+ )
224
+
diff-gaussian-rasterization/ext.cpp ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include <torch/extension.h>
13
+ #include "rasterize_points.h"
14
+
15
+ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16
+ m.def("rasterize_gaussians", &RasterizeGaussiansCUDA);
17
+ m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA);
18
+ m.def("mark_visible", &markVisible);
19
+ }
diff-gaussian-rasterization/rasterize_points.cu ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include <math.h>
13
+ #include <torch/extension.h>
14
+ #include <cstdio>
15
+ #include <sstream>
16
+ #include <iostream>
17
+ #include <tuple>
18
+ #include <stdio.h>
19
+ #include <cuda_runtime_api.h>
20
+ #include <memory>
21
+ #include "cuda_rasterizer/config.h"
22
+ #include "cuda_rasterizer/rasterizer.h"
23
+ #include <fstream>
24
+ #include <string>
25
+ #include <functional>
26
+
27
+ std::function<char*(size_t N)> resizeFunctional(torch::Tensor& t) {
28
+ auto lambda = [&t](size_t N) {
29
+ t.resize_({(long long)N});
30
+ return reinterpret_cast<char*>(t.contiguous().data_ptr());
31
+ };
32
+ return lambda;
33
+ }
34
+
35
+ std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
36
+ RasterizeGaussiansCUDA(
37
+ const torch::Tensor& background,
38
+ const torch::Tensor& means3D,
39
+ const torch::Tensor& colors,
40
+ const torch::Tensor& opacity,
41
+ const torch::Tensor& scales,
42
+ const torch::Tensor& rotations,
43
+ const float scale_modifier,
44
+ const torch::Tensor& cov3D_precomp,
45
+ const torch::Tensor& viewmatrix,
46
+ const torch::Tensor& projmatrix,
47
+ const float tan_fovx,
48
+ const float tan_fovy,
49
+ const int image_height,
50
+ const int image_width,
51
+ const torch::Tensor& sh,
52
+ const int degree,
53
+ const torch::Tensor& campos,
54
+ const bool prefiltered,
55
+ const bool debug)
56
+ {
57
+ if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
58
+ AT_ERROR("means3D must have dimensions (num_points, 3)");
59
+ }
60
+
61
+ const int P = means3D.size(0);
62
+ const int H = image_height;
63
+ const int W = image_width;
64
+
65
+ auto int_opts = means3D.options().dtype(torch::kInt32);
66
+ auto float_opts = means3D.options().dtype(torch::kFloat32);
67
+
68
+ torch::Tensor out_color = torch::full({NUM_CHANNELS, H, W}, 0.0, float_opts);
69
+ torch::Tensor out_depth = torch::full({1, H, W}, 0.0, float_opts);
70
+ torch::Tensor out_alpha = torch::full({1, H, W}, 0.0, float_opts);
71
+ torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32));
72
+
73
+ torch::Device device(torch::kCUDA);
74
+ torch::TensorOptions options(torch::kByte);
75
+ torch::Tensor geomBuffer = torch::empty({0}, options.device(device));
76
+ torch::Tensor binningBuffer = torch::empty({0}, options.device(device));
77
+ torch::Tensor imgBuffer = torch::empty({0}, options.device(device));
78
+ std::function<char*(size_t)> geomFunc = resizeFunctional(geomBuffer);
79
+ std::function<char*(size_t)> binningFunc = resizeFunctional(binningBuffer);
80
+ std::function<char*(size_t)> imgFunc = resizeFunctional(imgBuffer);
81
+
82
+ int rendered = 0;
83
+ if(P != 0)
84
+ {
85
+ int M = 0;
86
+ if(sh.size(0) != 0)
87
+ {
88
+ M = sh.size(1);
89
+ }
90
+
91
+ rendered = CudaRasterizer::Rasterizer::forward(
92
+ geomFunc,
93
+ binningFunc,
94
+ imgFunc,
95
+ P, degree, M,
96
+ background.contiguous().data<float>(),
97
+ W, H,
98
+ means3D.contiguous().data<float>(),
99
+ sh.contiguous().data_ptr<float>(),
100
+ colors.contiguous().data<float>(),
101
+ opacity.contiguous().data<float>(),
102
+ scales.contiguous().data_ptr<float>(),
103
+ scale_modifier,
104
+ rotations.contiguous().data_ptr<float>(),
105
+ cov3D_precomp.contiguous().data<float>(),
106
+ viewmatrix.contiguous().data<float>(),
107
+ projmatrix.contiguous().data<float>(),
108
+ campos.contiguous().data<float>(),
109
+ tan_fovx,
110
+ tan_fovy,
111
+ prefiltered,
112
+ out_color.contiguous().data<float>(),
113
+ out_depth.contiguous().data<float>(),
114
+ out_alpha.contiguous().data<float>(),
115
+ radii.contiguous().data<int>(),
116
+ debug);
117
+ }
118
+ return std::make_tuple(rendered, out_color, out_depth, out_alpha, radii, geomBuffer, binningBuffer, imgBuffer);
119
+ }
120
+
121
+ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
122
+ RasterizeGaussiansBackwardCUDA(
123
+ const torch::Tensor& background,
124
+ const torch::Tensor& means3D,
125
+ const torch::Tensor& radii,
126
+ const torch::Tensor& colors,
127
+ const torch::Tensor& scales,
128
+ const torch::Tensor& rotations,
129
+ const float scale_modifier,
130
+ const torch::Tensor& cov3D_precomp,
131
+ const torch::Tensor& viewmatrix,
132
+ const torch::Tensor& projmatrix,
133
+ const float tan_fovx,
134
+ const float tan_fovy,
135
+ const torch::Tensor& dL_dout_color,
136
+ const torch::Tensor& dL_dout_depth,
137
+ const torch::Tensor& dL_dout_alpha,
138
+ const torch::Tensor& sh,
139
+ const int degree,
140
+ const torch::Tensor& campos,
141
+ const torch::Tensor& geomBuffer,
142
+ const int R,
143
+ const torch::Tensor& binningBuffer,
144
+ const torch::Tensor& imageBuffer,
145
+ const torch::Tensor& alphas,
146
+ const bool debug)
147
+ {
148
+ const int P = means3D.size(0);
149
+ const int H = dL_dout_color.size(1);
150
+ const int W = dL_dout_color.size(2);
151
+
152
+ int M = 0;
153
+ if(sh.size(0) != 0)
154
+ {
155
+ M = sh.size(1);
156
+ }
157
+
158
+ torch::Tensor dL_dmeans3D = torch::zeros({P, 3}, means3D.options());
159
+ torch::Tensor dL_dmeans2D = torch::zeros({P, 3}, means3D.options());
160
+ torch::Tensor dL_dcolors = torch::zeros({P, NUM_CHANNELS}, means3D.options());
161
+ torch::Tensor dL_ddepths = torch::zeros({P, 1}, means3D.options());
162
+ torch::Tensor dL_dconic = torch::zeros({P, 2, 2}, means3D.options());
163
+ torch::Tensor dL_dopacity = torch::zeros({P, 1}, means3D.options());
164
+ torch::Tensor dL_dcov3D = torch::zeros({P, 6}, means3D.options());
165
+ torch::Tensor dL_dsh = torch::zeros({P, M, 3}, means3D.options());
166
+ torch::Tensor dL_dscales = torch::zeros({P, 3}, means3D.options());
167
+ torch::Tensor dL_drotations = torch::zeros({P, 4}, means3D.options());
168
+
169
+ if(P != 0)
170
+ {
171
+ CudaRasterizer::Rasterizer::backward(P, degree, M, R,
172
+ background.contiguous().data<float>(),
173
+ W, H,
174
+ means3D.contiguous().data<float>(),
175
+ sh.contiguous().data<float>(),
176
+ colors.contiguous().data<float>(),
177
+ alphas.contiguous().data<float>(),
178
+ scales.data_ptr<float>(),
179
+ scale_modifier,
180
+ rotations.data_ptr<float>(),
181
+ cov3D_precomp.contiguous().data<float>(),
182
+ viewmatrix.contiguous().data<float>(),
183
+ projmatrix.contiguous().data<float>(),
184
+ campos.contiguous().data<float>(),
185
+ tan_fovx,
186
+ tan_fovy,
187
+ radii.contiguous().data<int>(),
188
+ reinterpret_cast<char*>(geomBuffer.contiguous().data_ptr()),
189
+ reinterpret_cast<char*>(binningBuffer.contiguous().data_ptr()),
190
+ reinterpret_cast<char*>(imageBuffer.contiguous().data_ptr()),
191
+ dL_dout_color.contiguous().data<float>(),
192
+ dL_dout_depth.contiguous().data<float>(),
193
+ dL_dout_alpha.contiguous().data<float>(),
194
+ dL_dmeans2D.contiguous().data<float>(),
195
+ dL_dconic.contiguous().data<float>(),
196
+ dL_dopacity.contiguous().data<float>(),
197
+ dL_dcolors.contiguous().data<float>(),
198
+ dL_ddepths.contiguous().data<float>(),
199
+ dL_dmeans3D.contiguous().data<float>(),
200
+ dL_dcov3D.contiguous().data<float>(),
201
+ dL_dsh.contiguous().data<float>(),
202
+ dL_dscales.contiguous().data<float>(),
203
+ dL_drotations.contiguous().data<float>(),
204
+ debug);
205
+ }
206
+
207
+ return std::make_tuple(dL_dmeans2D, dL_dcolors, dL_dopacity, dL_dmeans3D, dL_dcov3D, dL_dsh, dL_dscales, dL_drotations);
208
+ }
209
+
210
+ torch::Tensor markVisible(
211
+ torch::Tensor& means3D,
212
+ torch::Tensor& viewmatrix,
213
+ torch::Tensor& projmatrix)
214
+ {
215
+ const int P = means3D.size(0);
216
+
217
+ torch::Tensor present = torch::full({P}, false, means3D.options().dtype(at::kBool));
218
+
219
+ if(P != 0)
220
+ {
221
+ CudaRasterizer::Rasterizer::markVisible(P,
222
+ means3D.contiguous().data<float>(),
223
+ viewmatrix.contiguous().data<float>(),
224
+ projmatrix.contiguous().data<float>(),
225
+ present.contiguous().data<bool>());
226
+ }
227
+
228
+ return present;
229
+ }
diff-gaussian-rasterization/rasterize_points.h ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #pragma once
13
+ #include <torch/extension.h>
14
+ #include <cstdio>
15
+ #include <tuple>
16
+ #include <string>
17
+
18
+ std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
19
+ RasterizeGaussiansCUDA(
20
+ const torch::Tensor& background,
21
+ const torch::Tensor& means3D,
22
+ const torch::Tensor& colors,
23
+ const torch::Tensor& opacity,
24
+ const torch::Tensor& scales,
25
+ const torch::Tensor& rotations,
26
+ const float scale_modifier,
27
+ const torch::Tensor& cov3D_precomp,
28
+ const torch::Tensor& viewmatrix,
29
+ const torch::Tensor& projmatrix,
30
+ const float tan_fovx,
31
+ const float tan_fovy,
32
+ const int image_height,
33
+ const int image_width,
34
+ const torch::Tensor& sh,
35
+ const int degree,
36
+ const torch::Tensor& campos,
37
+ const bool prefiltered,
38
+ const bool debug);
39
+
40
+ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
41
+ RasterizeGaussiansBackwardCUDA(
42
+ const torch::Tensor& background,
43
+ const torch::Tensor& means3D,
44
+ const torch::Tensor& radii,
45
+ const torch::Tensor& colors,
46
+ const torch::Tensor& scales,
47
+ const torch::Tensor& rotations,
48
+ const float scale_modifier,
49
+ const torch::Tensor& cov3D_precomp,
50
+ const torch::Tensor& viewmatrix,
51
+ const torch::Tensor& projmatrix,
52
+ const float tan_fovx,
53
+ const float tan_fovy,
54
+ const torch::Tensor& dL_dout_color,
55
+ const torch::Tensor& dL_dout_depth,
56
+ const torch::Tensor& dL_dout_alpha,
57
+ const torch::Tensor& sh,
58
+ const int degree,
59
+ const torch::Tensor& campos,
60
+ const torch::Tensor& geomBuffer,
61
+ const int R,
62
+ const torch::Tensor& binningBuffer,
63
+ const torch::Tensor& imageBuffer,
64
+ const torch::Tensor& alpha,
65
+ const bool debug);
66
+
67
+ torch::Tensor markVisible(
68
+ torch::Tensor& means3D,
69
+ torch::Tensor& viewmatrix,
70
+ torch::Tensor& projmatrix);
diff-gaussian-rasterization/setup.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2023, Inria
3
+ # GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ # All rights reserved.
5
+ #
6
+ # This software is free for non-commercial, research and evaluation use
7
+ # under the terms of the LICENSE.md file.
8
+ #
9
+ # For inquiries contact george.drettakis@inria.fr
10
+ #
11
+
12
+ from setuptools import setup
13
+ from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14
+ import os
15
+ os.path.dirname(os.path.abspath(__file__))
16
+
17
+ setup(
18
+ name="diff_gaussian_rasterization",
19
+ packages=['diff_gaussian_rasterization'],
20
+ ext_modules=[
21
+ CUDAExtension(
22
+ name="diff_gaussian_rasterization._C",
23
+ sources=[
24
+ "cuda_rasterizer/rasterizer_impl.cu",
25
+ "cuda_rasterizer/forward.cu",
26
+ "cuda_rasterizer/backward.cu",
27
+ "rasterize_points.cu",
28
+ "ext.cpp"],
29
+ extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]})
30
+ ],
31
+ cmdclass={
32
+ 'build_ext': BuildExtension
33
+ }
34
+ )
diff-gaussian-rasterization/third_party/glm/.appveyor.yml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ shallow_clone: true
2
+
3
+ platform:
4
+ - x86
5
+ - x64
6
+
7
+ configuration:
8
+ - Debug
9
+ - Release
10
+
11
+ image:
12
+ - Visual Studio 2013
13
+ - Visual Studio 2015
14
+ - Visual Studio 2017
15
+ - Visual Studio 2019
16
+
17
+ environment:
18
+ matrix:
19
+ - GLM_ARGUMENTS: -DGLM_TEST_FORCE_PURE=ON
20
+ - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
21
+ - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
22
+ - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
23
+ - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
24
+
25
+ matrix:
26
+ exclude:
27
+ - image: Visual Studio 2013
28
+ GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
29
+ - image: Visual Studio 2013
30
+ GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
31
+ - image: Visual Studio 2013
32
+ GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
33
+ - image: Visual Studio 2013
34
+ configuration: Debug
35
+ - image: Visual Studio 2015
36
+ GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
37
+ - image: Visual Studio 2015
38
+ GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
39
+ - image: Visual Studio 2015
40
+ GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
41
+ - image: Visual Studio 2015
42
+ platform: x86
43
+ - image: Visual Studio 2015
44
+ configuration: Debug
45
+ - image: Visual Studio 2017
46
+ platform: x86
47
+ - image: Visual Studio 2017
48
+ configuration: Debug
49
+ - image: Visual Studio 2019
50
+ platform: x64
51
+
52
+ branches:
53
+ only:
54
+ - master
55
+
56
+ before_build:
57
+ - ps: |
58
+ mkdir build
59
+ cd build
60
+
61
+ if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2013") {
62
+ $env:generator="Visual Studio 12 2013"
63
+ }
64
+ if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2015") {
65
+ $env:generator="Visual Studio 14 2015"
66
+ }
67
+ if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2017") {
68
+ $env:generator="Visual Studio 15 2017"
69
+ }
70
+ if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2019") {
71
+ $env:generator="Visual Studio 16 2019"
72
+ }
73
+ if ($env:PLATFORM -eq "x64") {
74
+ $env:generator="$env:generator Win64"
75
+ }
76
+ echo generator="$env:generator"
77
+ cmake .. -G "$env:generator" -DCMAKE_INSTALL_PREFIX="$env:APPVEYOR_BUILD_FOLDER/install" -DGLM_QUIET=ON -DGLM_TEST_ENABLE=ON "$env:GLM_ARGUMENTS"
78
+
79
+ build_script:
80
+ - cmake --build . --parallel --config %CONFIGURATION% -- /m /v:minimal
81
+ - cmake --build . --target install --parallel --config %CONFIGURATION% -- /m /v:minimal
82
+
83
+ test_script:
84
+ - ctest --parallel 4 --verbose -C %CONFIGURATION%
85
+ - cd ..
86
+ - ps: |
87
+ mkdir build_test_cmake
88
+ cd build_test_cmake
89
+ cmake ..\test\cmake\ -G "$env:generator" -DCMAKE_PREFIX_PATH="$env:APPVEYOR_BUILD_FOLDER/install"
90
+ - cmake --build . --parallel --config %CONFIGURATION% -- /m /v:minimal
91
+
92
+ deploy: off
diff-gaussian-rasterization/third_party/glm/.gitignore ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Compiled Object files
2
+ *.slo
3
+ *.lo
4
+ *.o
5
+ *.obj
6
+
7
+ # Precompiled Headers
8
+ *.gch
9
+ *.pch
10
+
11
+ # Compiled Dynamic libraries
12
+ *.so
13
+ *.dylib
14
+ *.dll
15
+
16
+ # Fortran module files
17
+ *.mod
18
+
19
+ # Compiled Static libraries
20
+ *.lai
21
+ *.la
22
+ *.a
23
+ *.lib
24
+
25
+ # Executables
26
+ *.exe
27
+ *.out
28
+ *.app
29
+
30
+ # CMake
31
+ CMakeCache.txt
32
+ CMakeFiles
33
+ cmake_install.cmake
34
+ install_manifest.txt
35
+ *.cmake
36
+ !glmConfig.cmake
37
+ !glmConfig-version.cmake
38
+ # ^ May need to add future .cmake files as exceptions
39
+
40
+ # Test logs
41
+ Testing/*
42
+
43
+ # Test input
44
+ test/gtc/*.dds
45
+
46
+ # Project Files
47
+ Makefile
48
+ *.cbp
49
+ *.user
50
+
51
+ # Misc.
52
+ *.log
53
+
54
+ # local build(s)
55
+ build*
56
+
57
+ /.vs
58
+ /.vscode
59
+ /CMakeSettings.json
60
+ .DS_Store
61
+ *.swp
diff-gaussian-rasterization/third_party/glm/.travis.yml ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language: cpp
2
+
3
+ branches:
4
+ only:
5
+ - master
6
+ - stable
7
+
8
+ jobs:
9
+ include:
10
+ - name: "Xcode 7.3 C++98 pure release"
11
+ os: osx
12
+ osx_image: xcode7.3
13
+ env:
14
+ - MATRIX_EVAL=""
15
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_FORCE_PURE=ON"
16
+
17
+ - name: "Xcode 7.3 C++98 sse2 release"
18
+ os: osx
19
+ osx_image: xcode7.3
20
+ env:
21
+ - MATRIX_EVAL=""
22
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
23
+
24
+ - name: "Xcode 7.3 C++98 ms release"
25
+ os: osx
26
+ osx_image: xcode7.3
27
+ env:
28
+ - MATRIX_EVAL=""
29
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
30
+
31
+ - name: "XCode 7.3 C++11 pure release"
32
+ os: osx
33
+ osx_image: xcode7.3
34
+ env:
35
+ - MATRIX_EVAL=""
36
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_FORCE_PURE=ON"
37
+
38
+ - name: "XCode 7.3 C++11 sse2 release"
39
+ os: osx
40
+ osx_image: xcode7.3
41
+ env:
42
+ - MATRIX_EVAL=""
43
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
44
+
45
+ - name: "XCode 10.3 C++11 sse2 release"
46
+ os: osx
47
+ osx_image: xcode10.3
48
+ env:
49
+ - MATRIX_EVAL=""
50
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
51
+
52
+ - name: "XCode 12.2 C++11 sse2 release"
53
+ os: osx
54
+ osx_image: xcode12.2
55
+ env:
56
+ - MATRIX_EVAL=""
57
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
58
+ - CTEST_ENV="--parallel 4 --output-on-failure"
59
+ - CMAKE_ENV="--parallel"
60
+
61
+ - name: "XCode 12.2 C++11 sse2 debug"
62
+ os: osx
63
+ osx_image: xcode12.2
64
+ env:
65
+ - MATRIX_EVAL=""
66
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
67
+ - CTEST_ENV="--parallel 4 --output-on-failure"
68
+ - CMAKE_ENV="--parallel"
69
+
70
+ - name: "XCode 12.2 C++11 avx debug"
71
+ os: osx
72
+ osx_image: xcode12.2
73
+ env:
74
+ - MATRIX_EVAL=""
75
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX=ON"
76
+ - CTEST_ENV="--parallel 4 --output-on-failure"
77
+ - CMAKE_ENV="--parallel"
78
+
79
+ - name: "XCode 12.2 C++14 avx debug"
80
+ os: osx
81
+ osx_image: xcode12.2
82
+ env:
83
+ - MATRIX_EVAL=""
84
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX=ON"
85
+ - CTEST_ENV="--parallel 4 --output-on-failure"
86
+ - CMAKE_ENV="--parallel"
87
+
88
+ - name: "XCode 12.2 C++14 pure debug"
89
+ os: osx
90
+ osx_image: xcode12.2
91
+ env:
92
+ - MATRIX_EVAL=""
93
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_FORCE_PURE=ON"
94
+ - CTEST_ENV="--parallel 4 --output-on-failure"
95
+ - CMAKE_ENV="--parallel"
96
+
97
+ - name: "XCode 12.2 C++17 pure debug"
98
+ os: osx
99
+ osx_image: xcode12.2
100
+ env:
101
+ - MATRIX_EVAL=""
102
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_FORCE_PURE=ON"
103
+ - CTEST_ENV="--parallel 4 --output-on-failure"
104
+ - CMAKE_ENV="--parallel"
105
+
106
+ - name: "XCode 12.2 C++17 sse2 debug"
107
+ os: osx
108
+ osx_image: xcode12.2
109
+ env:
110
+ - MATRIX_EVAL=""
111
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
112
+ - CTEST_ENV="--parallel 4 --output-on-failure"
113
+ - CMAKE_ENV="--parallel"
114
+
115
+ - name: "XCode 12.2 C++17 sse2 release"
116
+ os: osx
117
+ osx_image: xcode12.2
118
+ env:
119
+ - MATRIX_EVAL=""
120
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
121
+ - CTEST_ENV="--parallel 4 --output-on-failure"
122
+ - CMAKE_ENV="--parallel"
123
+
124
+ - name: "XCode 12.2 C++17 avx release"
125
+ os: osx
126
+ osx_image: xcode12.2
127
+ env:
128
+ - MATRIX_EVAL=""
129
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX=ON"
130
+ - CTEST_ENV="--parallel 4 --output-on-failure"
131
+ - CMAKE_ENV="--parallel"
132
+
133
+ - name: "GCC 4.9 C++98 pure release"
134
+ os: linux
135
+ dist: Xenial
136
+ addons:
137
+ apt:
138
+ sources:
139
+ - ubuntu-toolchain-r-test
140
+ packages:
141
+ - g++-4.9
142
+ env:
143
+ - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
144
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_FORCE_PURE=ON"
145
+ - CTEST_ENV="--parallel 4 --output-on-failure"
146
+ - CMAKE_ENV="--parallel"
147
+
148
+ - name: "GCC 4.9 C++98 pure debug"
149
+ os: linux
150
+ dist: Xenial
151
+ addons:
152
+ apt:
153
+ sources:
154
+ - ubuntu-toolchain-r-test
155
+ packages:
156
+ - g++-4.9
157
+ env:
158
+ - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
159
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_FORCE_PURE=ON"
160
+ - CTEST_ENV="--parallel 4 --output-on-failure"
161
+ - CMAKE_ENV="--parallel"
162
+
163
+ - name: "GCC 4.9 C++98 ms debug"
164
+ os: linux
165
+ dist: Xenial
166
+ addons:
167
+ apt:
168
+ sources:
169
+ - ubuntu-toolchain-r-test
170
+ packages:
171
+ - g++-4.9
172
+ env:
173
+ - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
174
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
175
+ - CTEST_ENV="--parallel 4 --output-on-failure"
176
+ - CMAKE_ENV="--parallel"
177
+
178
+ - name: "GCC 4.9 C++11 ms debug"
179
+ os: linux
180
+ dist: Xenial
181
+ addons:
182
+ apt:
183
+ sources:
184
+ - ubuntu-toolchain-r-test
185
+ packages:
186
+ - g++-4.9
187
+ env:
188
+ - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
189
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
190
+ - CTEST_ENV="--parallel 4 --output-on-failure"
191
+ - CMAKE_ENV="--parallel"
192
+
193
+ - name: "GCC 4.9 C++11 pure debug"
194
+ os: linux
195
+ dist: Xenial
196
+ addons:
197
+ apt:
198
+ sources:
199
+ - ubuntu-toolchain-r-test
200
+ packages:
201
+ - g++-4.9
202
+ env:
203
+ - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
204
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_FORCE_PURE=ON"
205
+ - CTEST_ENV="--parallel 4 --output-on-failure"
206
+ - CMAKE_ENV="--parallel"
207
+
208
+ - name: "GCC 6 C++14 pure debug"
209
+ os: linux
210
+ dist: bionic
211
+ addons:
212
+ apt:
213
+ sources:
214
+ - ubuntu-toolchain-r-test
215
+ packages:
216
+ - g++-6
217
+ env:
218
+ - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
219
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_FORCE_PURE=ON"
220
+ - CTEST_ENV="--parallel 4 --output-on-failure"
221
+ - CMAKE_ENV="--parallel"
222
+
223
+ - name: "GCC 6 C++14 ms debug"
224
+ os: linux
225
+ dist: bionic
226
+ addons:
227
+ apt:
228
+ sources:
229
+ - ubuntu-toolchain-r-test
230
+ packages:
231
+ - g++-6
232
+ env:
233
+ - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
234
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
235
+ - CTEST_ENV="--parallel 4 --output-on-failure"
236
+ - CMAKE_ENV="--parallel"
237
+
238
+ - name: "GCC 7 C++17 ms debug"
239
+ os: linux
240
+ dist: bionic
241
+ addons:
242
+ apt:
243
+ sources:
244
+ - ubuntu-toolchain-r-test
245
+ packages:
246
+ - g++-7
247
+ env:
248
+ - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
249
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
250
+ - CTEST_ENV="--parallel 4 --output-on-failure"
251
+ - CMAKE_ENV="--parallel"
252
+
253
+ - name: "GCC 7 C++17 pure debug"
254
+ os: linux
255
+ dist: bionic
256
+ addons:
257
+ apt:
258
+ sources:
259
+ - ubuntu-toolchain-r-test
260
+ packages:
261
+ - g++-7
262
+ env:
263
+ - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
264
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
265
+ - CTEST_ENV="--parallel 4 --output-on-failure"
266
+ - CMAKE_ENV="--parallel"
267
+
268
+ - name: "GCC 10 C++17 pure debug"
269
+ os: linux
270
+ dist: bionic
271
+ addons:
272
+ apt:
273
+ sources:
274
+ - ubuntu-toolchain-r-test
275
+ packages:
276
+ - g++-10
277
+ env:
278
+ - MATRIX_EVAL="CC=gcc-10 && CXX=g++-10"
279
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
280
+ - CTEST_ENV="--parallel 4 --output-on-failure"
281
+ - CMAKE_ENV="--parallel"
282
+
283
+ - name: "GCC 10 C++17 pure release"
284
+ os: linux
285
+ dist: bionic
286
+ addons:
287
+ apt:
288
+ sources:
289
+ - ubuntu-toolchain-r-test
290
+ packages:
291
+ - g++-10
292
+ env:
293
+ - MATRIX_EVAL="CC=gcc-10 && CXX=g++-10"
294
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
295
+ - CTEST_ENV="--parallel 4 --output-on-failure"
296
+ - CMAKE_ENV="--parallel"
297
+
298
+ - name: "Clang C++14 pure release"
299
+ os: linux
300
+ dist: Xenial
301
+ env:
302
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
303
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_FORCE_PURE=ON"
304
+ - CTEST_ENV="--parallel 4 --output-on-failure"
305
+ - CMAKE_ENV="--parallel"
306
+
307
+ - name: "Clang C++14 pure debug"
308
+ os: linux
309
+ dist: Xenial
310
+ env:
311
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
312
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_FORCE_PURE=ON"
313
+ - CTEST_ENV="--parallel 4 --output-on-failure"
314
+ - CMAKE_ENV="--parallel"
315
+
316
+ - name: "Clang C++14 sse2 debug"
317
+ os: linux
318
+ dist: Xenial
319
+ env:
320
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
321
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
322
+ - CTEST_ENV="--parallel 4 --output-on-failure"
323
+ - CMAKE_ENV="--parallel"
324
+
325
+ - name: "Clang C++14 sse2 debug"
326
+ os: linux
327
+ dist: focal
328
+ env:
329
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
330
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
331
+ - CTEST_ENV="--parallel 4 --output-on-failure"
332
+ - CMAKE_ENV="--parallel"
333
+
334
+ - name: "Clang C++17 sse2 debug"
335
+ os: linux
336
+ dist: focal
337
+ env:
338
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
339
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
340
+ - CTEST_ENV="--parallel 4 --output-on-failure"
341
+ - CMAKE_ENV="--parallel"
342
+
343
+ - name: "Clang C++17 avx2 debug"
344
+ os: linux
345
+ dist: focal
346
+ env:
347
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
348
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX2=ON"
349
+ - CTEST_ENV="--parallel 4 --output-on-failure"
350
+ - CMAKE_ENV="--parallel"
351
+
352
+ - name: "Clang C++17 pure debug"
353
+ os: linux
354
+ dist: focal
355
+ env:
356
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
357
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
358
+ - CTEST_ENV="--parallel 4 --output-on-failure"
359
+ - CMAKE_ENV="--parallel"
360
+
361
+ - name: "Clang C++17 pure release"
362
+ os: linux
363
+ dist: focal
364
+ env:
365
+ - MATRIX_EVAL="CC=clang && CXX=clang++"
366
+ - CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
367
+ - CTEST_ENV="--parallel 4 --output-on-failure"
368
+ - CMAKE_ENV="--parallel"
369
+
370
+ before_script:
371
+ - cmake --version
372
+ - eval "${MATRIX_EVAL}"
373
+
374
+ script:
375
+ - ${CC} --version
376
+ - mkdir ./build
377
+ - cd ./build
378
+ - cmake -DCMAKE_INSTALL_PREFIX=$TRAVIS_BUILD_DIR/install -DCMAKE_CXX_COMPILER=$COMPILER ${CMAKE_BUILD_ENV} ..
379
+ - cmake --build . ${CMAKE_ENV}
380
+ - ctest ${CTEST_ENV}
381
+ - cmake --build . --target install ${CMAKE_ENV}
382
+ - cd $TRAVIS_BUILD_DIR
383
+ - mkdir ./build_test_cmake
384
+ - cd ./build_test_cmake
385
+ - cmake -DCMAKE_CXX_COMPILER=$COMPILER $TRAVIS_BUILD_DIR/test/cmake/ -DCMAKE_PREFIX_PATH=$TRAVIS_BUILD_DIR/install
386
+ - cmake --build .
387
+
388
+
diff-gaussian-rasterization/third_party/glm/CMakeLists.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.2 FATAL_ERROR)
2
+ cmake_policy(VERSION 3.2)
3
+
4
+
5
+ file(READ "glm/detail/setup.hpp" GLM_SETUP_FILE)
6
+ string(REGEX MATCH "#define[ ]+GLM_VERSION_MAJOR[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
7
+ set(GLM_VERSION_MAJOR "${CMAKE_MATCH_1}")
8
+ string(REGEX MATCH "#define[ ]+GLM_VERSION_MINOR[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
9
+ set(GLM_VERSION_MINOR "${CMAKE_MATCH_1}")
10
+ string(REGEX MATCH "#define[ ]+GLM_VERSION_PATCH[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
11
+ set(GLM_VERSION_PATCH "${CMAKE_MATCH_1}")
12
+ string(REGEX MATCH "#define[ ]+GLM_VERSION_REVISION[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
13
+ set(GLM_VERSION_REVISION "${CMAKE_MATCH_1}")
14
+
15
+ set(GLM_VERSION ${GLM_VERSION_MAJOR}.${GLM_VERSION_MINOR}.${GLM_VERSION_PATCH}.${GLM_VERSION_REVISION})
16
+ project(glm VERSION ${GLM_VERSION} LANGUAGES CXX)
17
+ message(STATUS "GLM: Version " ${GLM_VERSION})
18
+
19
+ add_subdirectory(glm)
20
+ add_library(glm::glm ALIAS glm)
21
+
22
+ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
23
+
24
+ include(CPack)
25
+ install(DIRECTORY glm DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} PATTERN "CMakeLists.txt" EXCLUDE)
26
+ install(EXPORT glm FILE glmConfig.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/glm NAMESPACE glm::)
27
+ include(CMakePackageConfigHelpers)
28
+ write_basic_package_version_file("glmConfigVersion.cmake" COMPATIBILITY AnyNewerVersion)
29
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/glmConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/glm)
30
+
31
+ include(CTest)
32
+ if(BUILD_TESTING)
33
+ add_subdirectory(test)
34
+ endif()
35
+
36
+ endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
37
+
38
+ if (NOT TARGET uninstall)
39
+ configure_file(cmake/cmake_uninstall.cmake.in
40
+ cmake_uninstall.cmake IMMEDIATE @ONLY)
41
+
42
+ add_custom_target(uninstall
43
+ "${CMAKE_COMMAND}" -P
44
+ "${CMAKE_BINARY_DIR}/cmake_uninstall.cmake")
45
+ endif()