Spaces:
Runtime error
Runtime error
Work
commited on
Commit
•
206b602
1
Parent(s):
9997a4d
update from lgm to lgm hf
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- acc_configs/gpu1.yaml +0 -15
- acc_configs/gpu4.yaml +0 -15
- acc_configs/gpu6.yaml +0 -15
- acc_configs/gpu8.yaml +0 -15
- app.py +56 -25
- convert.py +0 -462
- core/__pycache__/__init__.cpython-39.pyc +0 -0
- core/__pycache__/attention.cpython-39.pyc +0 -0
- core/__pycache__/gs.cpython-39.pyc +0 -0
- core/__pycache__/models.cpython-39.pyc +0 -0
- core/__pycache__/options.cpython-39.pyc +0 -0
- core/__pycache__/provider_objaverse.cpython-39.pyc +0 -0
- core/__pycache__/unet.cpython-39.pyc +0 -0
- core/__pycache__/utils.cpython-39.pyc +0 -0
- core/models.py +7 -4
- core/options.py +7 -7
- core/unet.py +7 -7
- data_test/anya_rgba.png +0 -0
- data_test/bird.jpg +0 -0
- data_test/bird_rgba.png +0 -0
- data_test/boy.jpg +0 -0
- data_test/cat_statue.jpg +0 -0
- data_test/catstatue_rgba.png +0 -0
- data_test/dragontoy.jpg +0 -0
- data_test/frog_sweater.jpg +0 -0
- data_test/gso_rabbit.jpg +0 -0
- diff-gaussian-rasterization/.gitignore +7 -0
- diff-gaussian-rasterization/.gitmodules +3 -0
- diff-gaussian-rasterization/CMakeLists.txt +36 -0
- diff-gaussian-rasterization/LICENSE.md +83 -0
- diff-gaussian-rasterization/README.md +35 -0
- diff-gaussian-rasterization/cuda_rasterizer/auxiliary.h +175 -0
- diff-gaussian-rasterization/cuda_rasterizer/backward.cu +712 -0
- diff-gaussian-rasterization/cuda_rasterizer/backward.h +70 -0
- diff-gaussian-rasterization/cuda_rasterizer/config.h +19 -0
- diff-gaussian-rasterization/cuda_rasterizer/forward.cu +466 -0
- diff-gaussian-rasterization/cuda_rasterizer/forward.h +68 -0
- diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h +94 -0
- diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu +447 -0
- diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.h +73 -0
- diff-gaussian-rasterization/diff_gaussian_rasterization/__init__.py +224 -0
- diff-gaussian-rasterization/ext.cpp +19 -0
- diff-gaussian-rasterization/rasterize_points.cu +229 -0
- diff-gaussian-rasterization/rasterize_points.h +70 -0
- diff-gaussian-rasterization/setup.py +34 -0
- diff-gaussian-rasterization/third_party/glm/.appveyor.yml +92 -0
- diff-gaussian-rasterization/third_party/glm/.gitignore +61 -0
- diff-gaussian-rasterization/third_party/glm/.travis.yml +388 -0
- diff-gaussian-rasterization/third_party/glm/CMakeLists.txt +45 -0
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
acc_configs/gpu1.yaml
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
compute_environment: LOCAL_MACHINE
|
2 |
-
debug: false
|
3 |
-
distributed_type: 'NO'
|
4 |
-
downcast_bf16: 'no'
|
5 |
-
machine_rank: 0
|
6 |
-
main_training_function: main
|
7 |
-
mixed_precision: bf16
|
8 |
-
num_machines: 1
|
9 |
-
num_processes: 1
|
10 |
-
rdzv_backend: static
|
11 |
-
same_network: true
|
12 |
-
tpu_env: []
|
13 |
-
tpu_use_cluster: false
|
14 |
-
tpu_use_sudo: false
|
15 |
-
use_cpu: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
acc_configs/gpu4.yaml
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
compute_environment: LOCAL_MACHINE
|
2 |
-
debug: false
|
3 |
-
distributed_type: MULTI_GPU
|
4 |
-
downcast_bf16: 'no'
|
5 |
-
machine_rank: 0
|
6 |
-
main_training_function: main
|
7 |
-
mixed_precision: fp16
|
8 |
-
num_machines: 1
|
9 |
-
num_processes: 4
|
10 |
-
rdzv_backend: static
|
11 |
-
same_network: true
|
12 |
-
tpu_env: []
|
13 |
-
tpu_use_cluster: false
|
14 |
-
tpu_use_sudo: false
|
15 |
-
use_cpu: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
acc_configs/gpu6.yaml
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
compute_environment: LOCAL_MACHINE
|
2 |
-
debug: false
|
3 |
-
distributed_type: MULTI_GPU
|
4 |
-
downcast_bf16: 'no'
|
5 |
-
machine_rank: 0
|
6 |
-
main_training_function: main
|
7 |
-
mixed_precision: fp16
|
8 |
-
num_machines: 1
|
9 |
-
num_processes: 6
|
10 |
-
rdzv_backend: static
|
11 |
-
same_network: true
|
12 |
-
tpu_env: []
|
13 |
-
tpu_use_cluster: false
|
14 |
-
tpu_use_sudo: false
|
15 |
-
use_cpu: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
acc_configs/gpu8.yaml
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
compute_environment: LOCAL_MACHINE
|
2 |
-
debug: false
|
3 |
-
distributed_type: MULTI_GPU
|
4 |
-
downcast_bf16: 'no'
|
5 |
-
machine_rank: 0
|
6 |
-
main_training_function: main
|
7 |
-
mixed_precision: bf16
|
8 |
-
num_machines: 1
|
9 |
-
num_processes: 8
|
10 |
-
rdzv_backend: static
|
11 |
-
same_network: true
|
12 |
-
tpu_env: []
|
13 |
-
tpu_use_cluster: false
|
14 |
-
tpu_use_sudo: false
|
15 |
-
use_cpu: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import tyro
|
3 |
import imageio
|
4 |
import numpy as np
|
@@ -11,6 +13,12 @@ from safetensors.torch import load_file
|
|
11 |
import rembg
|
12 |
import gradio as gr
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
import kiui
|
15 |
from kiui.op import recenter
|
16 |
from kiui.cam import orbit_camera
|
@@ -19,12 +27,26 @@ from core.options import AllConfigs, Options
|
|
19 |
from core.models import LGM
|
20 |
from mvdream.pipeline_mvdream import MVDreamPipeline
|
21 |
|
|
|
|
|
22 |
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
23 |
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
24 |
GRADIO_VIDEO_PATH = 'gradio_output.mp4'
|
25 |
GRADIO_PLY_PATH = 'gradio_output.ply'
|
26 |
|
27 |
-
opt = tyro.cli(AllConfigs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# model
|
30 |
model = LGM(opt)
|
@@ -45,7 +67,7 @@ model = model.half().to(device)
|
|
45 |
model.eval()
|
46 |
|
47 |
tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
|
48 |
-
proj_matrix = torch.zeros(4, 4, dtype=torch.float32
|
49 |
proj_matrix[0, 0] = 1 / tan_half_fov
|
50 |
proj_matrix[1, 1] = 1 / tan_half_fov
|
51 |
proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
|
@@ -73,6 +95,7 @@ pipe_image = pipe_image.to(device)
|
|
73 |
bg_remover = rembg.new_session()
|
74 |
|
75 |
# process function
|
|
|
76 |
def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42):
|
77 |
|
78 |
# seed
|
@@ -105,7 +128,7 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
|
|
105 |
image = image.astype(np.float32) / 255.0
|
106 |
image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
|
107 |
mv_image = pipe_image(prompt, image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
|
108 |
-
|
109 |
mv_image_grid = np.concatenate([
|
110 |
np.concatenate([mv_image[1], mv_image[2]], axis=1),
|
111 |
np.concatenate([mv_image[3], mv_image[0]], axis=1),
|
@@ -124,21 +147,21 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
|
|
124 |
with torch.autocast(device_type='cuda', dtype=torch.float16):
|
125 |
# generate gaussians
|
126 |
gaussians = model.forward_gaussians(input_image)
|
127 |
-
|
128 |
# save gaussians
|
129 |
model.gs.save_ply(gaussians, output_ply_path)
|
130 |
-
|
131 |
-
# render 360 video
|
132 |
images = []
|
133 |
elevation = 0
|
134 |
if opt.fancy_video:
|
135 |
azimuth = np.arange(0, 720, 4, dtype=np.int32)
|
136 |
for azi in tqdm.tqdm(azimuth):
|
137 |
-
|
138 |
cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
|
139 |
|
140 |
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
|
141 |
-
|
142 |
# cameras needed by gaussian rasterizer
|
143 |
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
|
144 |
cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
|
@@ -151,11 +174,11 @@ def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_ste
|
|
151 |
else:
|
152 |
azimuth = np.arange(0, 360, 2, dtype=np.int32)
|
153 |
for azi in tqdm.tqdm(azimuth):
|
154 |
-
|
155 |
cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
|
156 |
|
157 |
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
|
158 |
-
|
159 |
# cameras needed by gaussian rasterizer
|
160 |
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
|
161 |
cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
|
@@ -179,7 +202,8 @@ _DESCRIPTION = '''
|
|
179 |
<a style="display:inline-block; margin-left: .5em" href="https://github.com/3DTopia/LGM"><img src='https://img.shields.io/github/stars/3DTopia/LGM?style=social'/></a>
|
180 |
</div>
|
181 |
|
182 |
-
* Input can be only text, only image, or both image and text.
|
|
|
183 |
* If you find the output unsatisfying, try using different seeds!
|
184 |
'''
|
185 |
|
@@ -189,7 +213,7 @@ with block:
|
|
189 |
with gr.Column(scale=1):
|
190 |
gr.Markdown('# ' + _TITLE)
|
191 |
gr.Markdown(_DESCRIPTION)
|
192 |
-
|
193 |
with gr.Row(variant='panel'):
|
194 |
with gr.Column(scale=1):
|
195 |
# input image
|
@@ -207,43 +231,50 @@ with block:
|
|
207 |
# gen button
|
208 |
button_gen = gr.Button("Generate")
|
209 |
|
210 |
-
|
211 |
with gr.Column(scale=1):
|
212 |
with gr.Tab("Video"):
|
213 |
# final video results
|
214 |
output_video = gr.Video(label="video")
|
215 |
# ply file
|
216 |
-
output_file = gr.File(label="ply")
|
217 |
with gr.Tab("Multi-view Image"):
|
218 |
# multi-view results
|
219 |
output_image = gr.Image(interactive=False, show_label=False)
|
220 |
|
221 |
button_gen.click(process, inputs=[input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed], outputs=[output_image, output_video, output_file])
|
222 |
-
|
223 |
gr.Examples(
|
224 |
examples=[
|
225 |
-
"data_test/
|
226 |
-
"data_test/
|
227 |
-
"data_test/
|
|
|
|
|
|
|
228 |
],
|
229 |
inputs=[input_image],
|
230 |
outputs=[output_image, output_video, output_file],
|
231 |
fn=lambda x: process(input_image=x, prompt=''),
|
232 |
-
cache_examples=
|
233 |
label='Image-to-3D Examples'
|
234 |
)
|
235 |
|
236 |
gr.Examples(
|
237 |
examples=[
|
238 |
-
"
|
239 |
-
"
|
240 |
-
"
|
|
|
|
|
|
|
|
|
241 |
],
|
242 |
inputs=[input_text],
|
243 |
outputs=[output_image, output_video, output_file],
|
244 |
fn=lambda x: process(input_image=None, prompt=x),
|
245 |
-
cache_examples=
|
246 |
label='Text-to-3D Examples'
|
247 |
)
|
248 |
-
|
249 |
-
block.launch(
|
|
|
1 |
import os
|
2 |
+
import shlex
|
3 |
+
import subprocess
|
4 |
import tyro
|
5 |
import imageio
|
6 |
import numpy as np
|
|
|
13 |
import rembg
|
14 |
import gradio as gr
|
15 |
|
16 |
+
# download checkpoints
|
17 |
+
from huggingface_hub import hf_hub_download
|
18 |
+
ckpt_path = hf_hub_download(repo_id="ashawkey/LGM", filename="model_fp16.safetensors")
|
19 |
+
|
20 |
+
subprocess.run(shlex.split("pip install wheel/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"))
|
21 |
+
|
22 |
import kiui
|
23 |
from kiui.op import recenter
|
24 |
from kiui.cam import orbit_camera
|
|
|
27 |
from core.models import LGM
|
28 |
from mvdream.pipeline_mvdream import MVDreamPipeline
|
29 |
|
30 |
+
import spaces
|
31 |
+
|
32 |
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
33 |
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
34 |
GRADIO_VIDEO_PATH = 'gradio_output.mp4'
|
35 |
GRADIO_PLY_PATH = 'gradio_output.ply'
|
36 |
|
37 |
+
# opt = tyro.cli(AllConfigs)
|
38 |
+
opt = Options(
|
39 |
+
input_size=256,
|
40 |
+
up_channels=(1024, 1024, 512, 256, 128), # one more decoder
|
41 |
+
up_attention=(True, True, True, False, False),
|
42 |
+
splat_size=128,
|
43 |
+
output_size=512, # render & supervise Gaussians at a higher resolution.
|
44 |
+
batch_size=8,
|
45 |
+
num_views=8,
|
46 |
+
gradient_accumulation_steps=1,
|
47 |
+
mixed_precision='bf16',
|
48 |
+
resume=ckpt_path,
|
49 |
+
)
|
50 |
|
51 |
# model
|
52 |
model = LGM(opt)
|
|
|
67 |
model.eval()
|
68 |
|
69 |
tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
|
70 |
+
proj_matrix = torch.zeros(4, 4, dtype=torch.float32).to(device)
|
71 |
proj_matrix[0, 0] = 1 / tan_half_fov
|
72 |
proj_matrix[1, 1] = 1 / tan_half_fov
|
73 |
proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
|
|
|
95 |
bg_remover = rembg.new_session()
|
96 |
|
97 |
# process function
|
98 |
+
@spaces.GPU
|
99 |
def process(input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42):
|
100 |
|
101 |
# seed
|
|
|
128 |
image = image.astype(np.float32) / 255.0
|
129 |
image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
|
130 |
mv_image = pipe_image(prompt, image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
|
131 |
+
|
132 |
mv_image_grid = np.concatenate([
|
133 |
np.concatenate([mv_image[1], mv_image[2]], axis=1),
|
134 |
np.concatenate([mv_image[3], mv_image[0]], axis=1),
|
|
|
147 |
with torch.autocast(device_type='cuda', dtype=torch.float16):
|
148 |
# generate gaussians
|
149 |
gaussians = model.forward_gaussians(input_image)
|
150 |
+
|
151 |
# save gaussians
|
152 |
model.gs.save_ply(gaussians, output_ply_path)
|
153 |
+
|
154 |
+
# render 360 video
|
155 |
images = []
|
156 |
elevation = 0
|
157 |
if opt.fancy_video:
|
158 |
azimuth = np.arange(0, 720, 4, dtype=np.int32)
|
159 |
for azi in tqdm.tqdm(azimuth):
|
160 |
+
|
161 |
cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
|
162 |
|
163 |
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
|
164 |
+
|
165 |
# cameras needed by gaussian rasterizer
|
166 |
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
|
167 |
cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
|
|
|
174 |
else:
|
175 |
azimuth = np.arange(0, 360, 2, dtype=np.int32)
|
176 |
for azi in tqdm.tqdm(azimuth):
|
177 |
+
|
178 |
cam_poses = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
|
179 |
|
180 |
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
|
181 |
+
|
182 |
# cameras needed by gaussian rasterizer
|
183 |
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
|
184 |
cam_view_proj = cam_view @ proj_matrix # [V, 4, 4]
|
|
|
202 |
<a style="display:inline-block; margin-left: .5em" href="https://github.com/3DTopia/LGM"><img src='https://img.shields.io/github/stars/3DTopia/LGM?style=social'/></a>
|
203 |
</div>
|
204 |
|
205 |
+
* Input can be only text, only image, or both image and text.
|
206 |
+
* Output is a `ply` file containing the 3D Gaussians, please check our [repo](https://github.com/3DTopia/LGM/blob/main/readme.md) for visualization and mesh conversion.
|
207 |
* If you find the output unsatisfying, try using different seeds!
|
208 |
'''
|
209 |
|
|
|
213 |
with gr.Column(scale=1):
|
214 |
gr.Markdown('# ' + _TITLE)
|
215 |
gr.Markdown(_DESCRIPTION)
|
216 |
+
|
217 |
with gr.Row(variant='panel'):
|
218 |
with gr.Column(scale=1):
|
219 |
# input image
|
|
|
231 |
# gen button
|
232 |
button_gen = gr.Button("Generate")
|
233 |
|
234 |
+
|
235 |
with gr.Column(scale=1):
|
236 |
with gr.Tab("Video"):
|
237 |
# final video results
|
238 |
output_video = gr.Video(label="video")
|
239 |
# ply file
|
240 |
+
output_file = gr.File(label="3D Gaussians (ply format)")
|
241 |
with gr.Tab("Multi-view Image"):
|
242 |
# multi-view results
|
243 |
output_image = gr.Image(interactive=False, show_label=False)
|
244 |
|
245 |
button_gen.click(process, inputs=[input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed], outputs=[output_image, output_video, output_file])
|
246 |
+
|
247 |
gr.Examples(
|
248 |
examples=[
|
249 |
+
"data_test/frog_sweater.jpg",
|
250 |
+
"data_test/bird.jpg",
|
251 |
+
"data_test/boy.jpg",
|
252 |
+
"data_test/cat_statue.jpg",
|
253 |
+
"data_test/dragontoy.jpg",
|
254 |
+
"data_test/gso_rabbit.jpg",
|
255 |
],
|
256 |
inputs=[input_image],
|
257 |
outputs=[output_image, output_video, output_file],
|
258 |
fn=lambda x: process(input_image=x, prompt=''),
|
259 |
+
cache_examples=True,
|
260 |
label='Image-to-3D Examples'
|
261 |
)
|
262 |
|
263 |
gr.Examples(
|
264 |
examples=[
|
265 |
+
"teddy bear",
|
266 |
+
"hamburger",
|
267 |
+
"oldman's head sculpture",
|
268 |
+
"headphone",
|
269 |
+
"motorbike",
|
270 |
+
"mech suit"
|
271 |
+
|
272 |
],
|
273 |
inputs=[input_text],
|
274 |
outputs=[output_image, output_video, output_file],
|
275 |
fn=lambda x: process(input_image=None, prompt=x),
|
276 |
+
cache_examples=True,
|
277 |
label='Text-to-3D Examples'
|
278 |
)
|
279 |
+
|
280 |
+
block.launch()
|
convert.py
DELETED
@@ -1,462 +0,0 @@
|
|
1 |
-
|
2 |
-
import os
|
3 |
-
import tyro
|
4 |
-
import tqdm
|
5 |
-
import numpy as np
|
6 |
-
import torch
|
7 |
-
import torch.nn as nn
|
8 |
-
import torch.nn.functional as F
|
9 |
-
|
10 |
-
from core.options import AllConfigs, Options
|
11 |
-
from core.gs import GaussianRenderer
|
12 |
-
|
13 |
-
import mcubes
|
14 |
-
import nerfacc
|
15 |
-
import nvdiffrast.torch as dr
|
16 |
-
|
17 |
-
import kiui
|
18 |
-
from kiui.mesh import Mesh
|
19 |
-
from kiui.mesh_utils import clean_mesh, decimate_mesh
|
20 |
-
from kiui.mesh_utils import laplacian_smooth_loss, normal_consistency
|
21 |
-
from kiui.op import uv_padding, safe_normalize, inverse_sigmoid
|
22 |
-
from kiui.cam import orbit_camera, get_perspective
|
23 |
-
from kiui.nn import MLP, trunc_exp
|
24 |
-
from kiui.gridencoder import GridEncoder
|
25 |
-
|
26 |
-
def get_rays(pose, h, w, fovy, opengl=True):
|
27 |
-
|
28 |
-
x, y = torch.meshgrid(
|
29 |
-
torch.arange(w, device=pose.device),
|
30 |
-
torch.arange(h, device=pose.device),
|
31 |
-
indexing="xy",
|
32 |
-
)
|
33 |
-
x = x.flatten()
|
34 |
-
y = y.flatten()
|
35 |
-
|
36 |
-
cx = w * 0.5
|
37 |
-
cy = h * 0.5
|
38 |
-
focal = h * 0.5 / np.tan(0.5 * np.deg2rad(fovy))
|
39 |
-
|
40 |
-
camera_dirs = F.pad(
|
41 |
-
torch.stack(
|
42 |
-
[
|
43 |
-
(x - cx + 0.5) / focal,
|
44 |
-
(y - cy + 0.5) / focal * (-1.0 if opengl else 1.0),
|
45 |
-
],
|
46 |
-
dim=-1,
|
47 |
-
),
|
48 |
-
(0, 1),
|
49 |
-
value=(-1.0 if opengl else 1.0),
|
50 |
-
) # [hw, 3]
|
51 |
-
|
52 |
-
rays_d = camera_dirs @ pose[:3, :3].transpose(0, 1) # [hw, 3]
|
53 |
-
rays_o = pose[:3, 3].unsqueeze(0).expand_as(rays_d) # [hw, 3]
|
54 |
-
|
55 |
-
rays_d = safe_normalize(rays_d)
|
56 |
-
|
57 |
-
return rays_o, rays_d
|
58 |
-
|
59 |
-
# Triple renderer of gaussians, gaussian, and diso mesh.
|
60 |
-
# gaussian --> nerf --> mesh
|
61 |
-
class Converter(nn.Module):
|
62 |
-
def __init__(self, opt: Options):
|
63 |
-
super().__init__()
|
64 |
-
|
65 |
-
self.opt = opt
|
66 |
-
self.device = torch.device("cuda")
|
67 |
-
|
68 |
-
# gs renderer
|
69 |
-
self.tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
|
70 |
-
self.proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=self.device)
|
71 |
-
self.proj_matrix[0, 0] = 1 / self.tan_half_fov
|
72 |
-
self.proj_matrix[1, 1] = 1 / self.tan_half_fov
|
73 |
-
self.proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
|
74 |
-
self.proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
|
75 |
-
self.proj_matrix[2, 3] = 1
|
76 |
-
|
77 |
-
self.gs_renderer = GaussianRenderer(opt)
|
78 |
-
|
79 |
-
self.gaussians = self.gs_renderer.load_ply(opt.test_path).to(self.device)
|
80 |
-
|
81 |
-
# nerf renderer
|
82 |
-
if not self.opt.force_cuda_rast:
|
83 |
-
self.glctx = dr.RasterizeGLContext()
|
84 |
-
else:
|
85 |
-
self.glctx = dr.RasterizeCudaContext()
|
86 |
-
|
87 |
-
self.step = 0
|
88 |
-
self.render_step_size = 5e-3
|
89 |
-
self.aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=self.device)
|
90 |
-
self.estimator = nerfacc.OccGridEstimator(roi_aabb=self.aabb, resolution=64, levels=1)
|
91 |
-
|
92 |
-
self.encoder_density = GridEncoder(num_levels=12) # VMEncoder(output_dim=16, mode='sum')
|
93 |
-
self.encoder = GridEncoder(num_levels=12)
|
94 |
-
self.mlp_density = MLP(self.encoder_density.output_dim, 1, 32, 2, bias=False)
|
95 |
-
self.mlp = MLP(self.encoder.output_dim, 3, 32, 2, bias=False)
|
96 |
-
|
97 |
-
# mesh renderer
|
98 |
-
self.proj = torch.from_numpy(get_perspective(self.opt.fovy)).float().to(self.device)
|
99 |
-
self.v = self.f = None
|
100 |
-
self.vt = self.ft = None
|
101 |
-
self.deform = None
|
102 |
-
self.albedo = None
|
103 |
-
|
104 |
-
|
105 |
-
@torch.no_grad()
|
106 |
-
def render_gs(self, pose):
|
107 |
-
|
108 |
-
cam_poses = torch.from_numpy(pose).unsqueeze(0).to(self.device)
|
109 |
-
cam_poses[:, :3, 1:3] *= -1 # invert up & forward direction
|
110 |
-
|
111 |
-
# cameras needed by gaussian rasterizer
|
112 |
-
cam_view = torch.inverse(cam_poses).transpose(1, 2) # [V, 4, 4]
|
113 |
-
cam_view_proj = cam_view @ self.proj_matrix # [V, 4, 4]
|
114 |
-
cam_pos = - cam_poses[:, :3, 3] # [V, 3]
|
115 |
-
|
116 |
-
out = self.gs_renderer.render(self.gaussians.unsqueeze(0), cam_view.unsqueeze(0), cam_view_proj.unsqueeze(0), cam_pos.unsqueeze(0))
|
117 |
-
image = out['image'].squeeze(1).squeeze(0) # [C, H, W]
|
118 |
-
alpha = out['alpha'].squeeze(2).squeeze(1).squeeze(0) # [H, W]
|
119 |
-
|
120 |
-
return image, alpha
|
121 |
-
|
122 |
-
def get_density(self, xs):
|
123 |
-
# xs: [..., 3]
|
124 |
-
prefix = xs.shape[:-1]
|
125 |
-
xs = xs.view(-1, 3)
|
126 |
-
feats = self.encoder_density(xs)
|
127 |
-
density = trunc_exp(self.mlp_density(feats))
|
128 |
-
density = density.view(*prefix, 1)
|
129 |
-
return density
|
130 |
-
|
131 |
-
def render_nerf(self, pose):
|
132 |
-
|
133 |
-
pose = torch.from_numpy(pose.astype(np.float32)).to(self.device)
|
134 |
-
|
135 |
-
# get rays
|
136 |
-
resolution = self.opt.output_size
|
137 |
-
rays_o, rays_d = get_rays(pose, resolution, resolution, self.opt.fovy)
|
138 |
-
|
139 |
-
# update occ grid
|
140 |
-
if self.training:
|
141 |
-
def occ_eval_fn(xs):
|
142 |
-
sigmas = self.get_density(xs)
|
143 |
-
return self.render_step_size * sigmas
|
144 |
-
|
145 |
-
self.estimator.update_every_n_steps(self.step, occ_eval_fn=occ_eval_fn, occ_thre=0.01, n=8)
|
146 |
-
self.step += 1
|
147 |
-
|
148 |
-
# render
|
149 |
-
def sigma_fn(t_starts, t_ends, ray_indices):
|
150 |
-
t_origins = rays_o[ray_indices]
|
151 |
-
t_dirs = rays_d[ray_indices]
|
152 |
-
xs = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
|
153 |
-
sigmas = self.get_density(xs)
|
154 |
-
return sigmas.squeeze(-1)
|
155 |
-
|
156 |
-
with torch.no_grad():
|
157 |
-
ray_indices, t_starts, t_ends = self.estimator.sampling(
|
158 |
-
rays_o,
|
159 |
-
rays_d,
|
160 |
-
sigma_fn=sigma_fn,
|
161 |
-
near_plane=0.01,
|
162 |
-
far_plane=100,
|
163 |
-
render_step_size=self.render_step_size,
|
164 |
-
stratified=self.training,
|
165 |
-
cone_angle=0,
|
166 |
-
)
|
167 |
-
|
168 |
-
t_origins = rays_o[ray_indices]
|
169 |
-
t_dirs = rays_d[ray_indices]
|
170 |
-
xs = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
|
171 |
-
sigmas = self.get_density(xs).squeeze(-1)
|
172 |
-
rgbs = torch.sigmoid(self.mlp(self.encoder(xs)))
|
173 |
-
|
174 |
-
n_rays=rays_o.shape[0]
|
175 |
-
weights, trans, alphas = nerfacc.render_weight_from_density(t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=n_rays)
|
176 |
-
color = nerfacc.accumulate_along_rays(weights, values=rgbs, ray_indices=ray_indices, n_rays=n_rays)
|
177 |
-
alpha = nerfacc.accumulate_along_rays(weights, values=None, ray_indices=ray_indices, n_rays=n_rays)
|
178 |
-
|
179 |
-
color = color + 1 * (1.0 - alpha)
|
180 |
-
|
181 |
-
color = color.view(resolution, resolution, 3).clamp(0, 1).permute(2, 0, 1).contiguous()
|
182 |
-
alpha = alpha.view(resolution, resolution).clamp(0, 1).contiguous()
|
183 |
-
|
184 |
-
return color, alpha
|
185 |
-
|
186 |
-
def fit_nerf(self, iters=512, resolution=128):
|
187 |
-
|
188 |
-
self.opt.output_size = resolution
|
189 |
-
|
190 |
-
optimizer = torch.optim.Adam([
|
191 |
-
{'params': self.encoder_density.parameters(), 'lr': 1e-2},
|
192 |
-
{'params': self.encoder.parameters(), 'lr': 1e-2},
|
193 |
-
{'params': self.mlp_density.parameters(), 'lr': 1e-3},
|
194 |
-
{'params': self.mlp.parameters(), 'lr': 1e-3},
|
195 |
-
])
|
196 |
-
|
197 |
-
print(f"[INFO] fitting nerf...")
|
198 |
-
pbar = tqdm.trange(iters)
|
199 |
-
for i in pbar:
|
200 |
-
|
201 |
-
ver = np.random.randint(-45, 45)
|
202 |
-
hor = np.random.randint(-180, 180)
|
203 |
-
rad = np.random.uniform(1.5, 3.0)
|
204 |
-
|
205 |
-
pose = orbit_camera(ver, hor, rad)
|
206 |
-
|
207 |
-
image_gt, alpha_gt = self.render_gs(pose)
|
208 |
-
image_pred, alpha_pred = self.render_nerf(pose)
|
209 |
-
|
210 |
-
# if i % 200 == 0:
|
211 |
-
# kiui.vis.plot_image(image_gt, alpha_gt, image_pred, alpha_pred)
|
212 |
-
|
213 |
-
loss_mse = F.mse_loss(image_pred, image_gt) + 0.1 * F.mse_loss(alpha_pred, alpha_gt)
|
214 |
-
loss = loss_mse #+ 0.1 * self.encoder_density.tv_loss() #+ 0.0001 * self.encoder_density.density_loss()
|
215 |
-
|
216 |
-
loss.backward()
|
217 |
-
self.encoder_density.grad_total_variation(1e-8)
|
218 |
-
|
219 |
-
optimizer.step()
|
220 |
-
optimizer.zero_grad()
|
221 |
-
|
222 |
-
pbar.set_description(f"MSE = {loss_mse.item():.6f}")
|
223 |
-
|
224 |
-
print(f"[INFO] finished fitting nerf!")
|
225 |
-
|
226 |
-
def render_mesh(self, pose):
|
227 |
-
|
228 |
-
h = w = self.opt.output_size
|
229 |
-
|
230 |
-
v = self.v + self.deform
|
231 |
-
f = self.f
|
232 |
-
|
233 |
-
pose = torch.from_numpy(pose.astype(np.float32)).to(v.device)
|
234 |
-
|
235 |
-
# get v_clip and render rgb
|
236 |
-
v_cam = torch.matmul(F.pad(v, pad=(0, 1), mode='constant', value=1.0), torch.inverse(pose).T).float().unsqueeze(0)
|
237 |
-
v_clip = v_cam @ self.proj.T
|
238 |
-
|
239 |
-
rast, rast_db = dr.rasterize(self.glctx, v_clip, f, (h, w))
|
240 |
-
|
241 |
-
alpha = torch.clamp(rast[..., -1:], 0, 1).contiguous() # [1, H, W, 1]
|
242 |
-
alpha = dr.antialias(alpha, rast, v_clip, f).clamp(0, 1).squeeze(-1).squeeze(0) # [H, W] important to enable gradients!
|
243 |
-
|
244 |
-
if self.albedo is None:
|
245 |
-
xyzs, _ = dr.interpolate(v.unsqueeze(0), rast, f) # [1, H, W, 3]
|
246 |
-
xyzs = xyzs.view(-1, 3)
|
247 |
-
mask = (alpha > 0).view(-1)
|
248 |
-
image = torch.zeros_like(xyzs, dtype=torch.float32)
|
249 |
-
if mask.any():
|
250 |
-
masked_albedo = torch.sigmoid(self.mlp(self.encoder(xyzs[mask].detach(), bound=1)))
|
251 |
-
image[mask] = masked_albedo.float()
|
252 |
-
else:
|
253 |
-
texc, texc_db = dr.interpolate(self.vt.unsqueeze(0), rast, self.ft, rast_db=rast_db, diff_attrs='all')
|
254 |
-
image = torch.sigmoid(dr.texture(self.albedo.unsqueeze(0), texc, uv_da=texc_db)) # [1, H, W, 3]
|
255 |
-
|
256 |
-
image = image.view(1, h, w, 3)
|
257 |
-
# image = dr.antialias(image, rast, v_clip, f).clamp(0, 1)
|
258 |
-
image = image.squeeze(0).permute(2, 0, 1).contiguous() # [3, H, W]
|
259 |
-
image = alpha * image + (1 - alpha)
|
260 |
-
|
261 |
-
return image, alpha
|
262 |
-
|
263 |
-
def fit_mesh(self, iters=2048, resolution=512, decimate_target=5e4):
|
264 |
-
|
265 |
-
self.opt.output_size = resolution
|
266 |
-
|
267 |
-
# init mesh from nerf
|
268 |
-
grid_size = 256
|
269 |
-
sigmas = np.zeros([grid_size, grid_size, grid_size], dtype=np.float32)
|
270 |
-
|
271 |
-
S = 128
|
272 |
-
density_thresh = 10
|
273 |
-
|
274 |
-
X = torch.linspace(-1, 1, grid_size).split(S)
|
275 |
-
Y = torch.linspace(-1, 1, grid_size).split(S)
|
276 |
-
Z = torch.linspace(-1, 1, grid_size).split(S)
|
277 |
-
|
278 |
-
for xi, xs in enumerate(X):
|
279 |
-
for yi, ys in enumerate(Y):
|
280 |
-
for zi, zs in enumerate(Z):
|
281 |
-
xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing='ij')
|
282 |
-
pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # [S, 3]
|
283 |
-
val = self.get_density(pts.to(self.device))
|
284 |
-
sigmas[xi * S: xi * S + len(xs), yi * S: yi * S + len(ys), zi * S: zi * S + len(zs)] = val.reshape(len(xs), len(ys), len(zs)).detach().cpu().numpy() # [S, 1] --> [x, y, z]
|
285 |
-
|
286 |
-
print(f'[INFO] marching cubes thresh: {density_thresh} ({sigmas.min()} ~ {sigmas.max()})')
|
287 |
-
|
288 |
-
vertices, triangles = mcubes.marching_cubes(sigmas, density_thresh)
|
289 |
-
vertices = vertices / (grid_size - 1.0) * 2 - 1
|
290 |
-
|
291 |
-
# clean
|
292 |
-
vertices = vertices.astype(np.float32)
|
293 |
-
triangles = triangles.astype(np.int32)
|
294 |
-
vertices, triangles = clean_mesh(vertices, triangles, remesh=True, remesh_size=0.01)
|
295 |
-
if triangles.shape[0] > decimate_target:
|
296 |
-
vertices, triangles = decimate_mesh(vertices, triangles, decimate_target, optimalplacement=False)
|
297 |
-
|
298 |
-
self.v = torch.from_numpy(vertices).contiguous().float().to(self.device)
|
299 |
-
self.f = torch.from_numpy(triangles).contiguous().int().to(self.device)
|
300 |
-
self.deform = nn.Parameter(torch.zeros_like(self.v)).to(self.device)
|
301 |
-
|
302 |
-
# fit mesh from gs
|
303 |
-
lr_factor = 1
|
304 |
-
optimizer = torch.optim.Adam([
|
305 |
-
{'params': self.encoder.parameters(), 'lr': 1e-3 * lr_factor},
|
306 |
-
{'params': self.mlp.parameters(), 'lr': 1e-3 * lr_factor},
|
307 |
-
{'params': self.deform, 'lr': 1e-4},
|
308 |
-
])
|
309 |
-
|
310 |
-
print(f"[INFO] fitting mesh...")
|
311 |
-
pbar = tqdm.trange(iters)
|
312 |
-
for i in pbar:
|
313 |
-
|
314 |
-
ver = np.random.randint(-10, 10)
|
315 |
-
hor = np.random.randint(-180, 180)
|
316 |
-
rad = self.opt.cam_radius # np.random.uniform(1, 2)
|
317 |
-
|
318 |
-
pose = orbit_camera(ver, hor, rad)
|
319 |
-
|
320 |
-
image_gt, alpha_gt = self.render_gs(pose)
|
321 |
-
image_pred, alpha_pred = self.render_mesh(pose)
|
322 |
-
|
323 |
-
loss_mse = F.mse_loss(image_pred, image_gt) + 0.1 * F.mse_loss(alpha_pred, alpha_gt)
|
324 |
-
# loss_lap = laplacian_smooth_loss(self.v + self.deform, self.f)
|
325 |
-
loss_normal = normal_consistency(self.v + self.deform, self.f)
|
326 |
-
loss_offsets = (self.deform ** 2).sum(-1).mean()
|
327 |
-
loss = loss_mse + 0.001 * loss_normal + 0.1 * loss_offsets
|
328 |
-
|
329 |
-
loss.backward()
|
330 |
-
|
331 |
-
optimizer.step()
|
332 |
-
optimizer.zero_grad()
|
333 |
-
|
334 |
-
# remesh periodically
|
335 |
-
if i > 0 and i % 512 == 0:
|
336 |
-
vertices = (self.v + self.deform).detach().cpu().numpy()
|
337 |
-
triangles = self.f.detach().cpu().numpy()
|
338 |
-
vertices, triangles = clean_mesh(vertices, triangles, remesh=True, remesh_size=0.01)
|
339 |
-
if triangles.shape[0] > decimate_target:
|
340 |
-
vertices, triangles = decimate_mesh(vertices, triangles, decimate_target, optimalplacement=False)
|
341 |
-
self.v = torch.from_numpy(vertices).contiguous().float().to(self.device)
|
342 |
-
self.f = torch.from_numpy(triangles).contiguous().int().to(self.device)
|
343 |
-
self.deform = nn.Parameter(torch.zeros_like(self.v)).to(self.device)
|
344 |
-
lr_factor *= 0.5
|
345 |
-
optimizer = torch.optim.Adam([
|
346 |
-
{'params': self.encoder.parameters(), 'lr': 1e-3 * lr_factor},
|
347 |
-
{'params': self.mlp.parameters(), 'lr': 1e-3 * lr_factor},
|
348 |
-
{'params': self.deform, 'lr': 1e-4},
|
349 |
-
])
|
350 |
-
|
351 |
-
pbar.set_description(f"MSE = {loss_mse.item():.6f}")
|
352 |
-
|
353 |
-
# last clean
|
354 |
-
vertices = (self.v + self.deform).detach().cpu().numpy()
|
355 |
-
triangles = self.f.detach().cpu().numpy()
|
356 |
-
vertices, triangles = clean_mesh(vertices, triangles, remesh=False)
|
357 |
-
self.v = torch.from_numpy(vertices).contiguous().float().to(self.device)
|
358 |
-
self.f = torch.from_numpy(triangles).contiguous().int().to(self.device)
|
359 |
-
self.deform = nn.Parameter(torch.zeros_like(self.v).to(self.device))
|
360 |
-
|
361 |
-
print(f"[INFO] finished fitting mesh!")
|
362 |
-
|
363 |
-
# uv mesh refine
|
364 |
-
def fit_mesh_uv(self, iters=512, resolution=512, texture_resolution=1024, padding=2):
|
365 |
-
|
366 |
-
self.opt.output_size = resolution
|
367 |
-
|
368 |
-
# unwrap uv
|
369 |
-
print(f"[INFO] uv unwrapping...")
|
370 |
-
mesh = Mesh(v=self.v, f=self.f, albedo=None, device=self.device)
|
371 |
-
mesh.auto_normal()
|
372 |
-
mesh.auto_uv()
|
373 |
-
|
374 |
-
self.vt = mesh.vt
|
375 |
-
self.ft = mesh.ft
|
376 |
-
|
377 |
-
# render uv maps
|
378 |
-
h = w = texture_resolution
|
379 |
-
uv = mesh.vt * 2.0 - 1.0 # uvs to range [-1, 1]
|
380 |
-
uv = torch.cat((uv, torch.zeros_like(uv[..., :1]), torch.ones_like(uv[..., :1])), dim=-1) # [N, 4]
|
381 |
-
|
382 |
-
rast, _ = dr.rasterize(self.glctx, uv.unsqueeze(0), mesh.ft, (h, w)) # [1, h, w, 4]
|
383 |
-
xyzs, _ = dr.interpolate(mesh.v.unsqueeze(0), rast, mesh.f) # [1, h, w, 3]
|
384 |
-
mask, _ = dr.interpolate(torch.ones_like(mesh.v[:, :1]).unsqueeze(0), rast, mesh.f) # [1, h, w, 1]
|
385 |
-
|
386 |
-
# masked query
|
387 |
-
xyzs = xyzs.view(-1, 3)
|
388 |
-
mask = (mask > 0).view(-1)
|
389 |
-
|
390 |
-
albedo = torch.zeros(h * w, 3, device=self.device, dtype=torch.float32)
|
391 |
-
|
392 |
-
if mask.any():
|
393 |
-
print(f"[INFO] querying texture...")
|
394 |
-
|
395 |
-
xyzs = xyzs[mask] # [M, 3]
|
396 |
-
|
397 |
-
# batched inference to avoid OOM
|
398 |
-
batch = []
|
399 |
-
head = 0
|
400 |
-
while head < xyzs.shape[0]:
|
401 |
-
tail = min(head + 640000, xyzs.shape[0])
|
402 |
-
batch.append(torch.sigmoid(self.mlp(self.encoder(xyzs[head:tail]))).float())
|
403 |
-
head += 640000
|
404 |
-
|
405 |
-
albedo[mask] = torch.cat(batch, dim=0)
|
406 |
-
|
407 |
-
albedo = albedo.view(h, w, -1)
|
408 |
-
mask = mask.view(h, w)
|
409 |
-
albedo = uv_padding(albedo, mask, padding)
|
410 |
-
|
411 |
-
# optimize texture
|
412 |
-
self.albedo = nn.Parameter(inverse_sigmoid(albedo)).to(self.device)
|
413 |
-
|
414 |
-
optimizer = torch.optim.Adam([
|
415 |
-
{'params': self.albedo, 'lr': 1e-3},
|
416 |
-
])
|
417 |
-
|
418 |
-
print(f"[INFO] fitting mesh texture...")
|
419 |
-
pbar = tqdm.trange(iters)
|
420 |
-
for i in pbar:
|
421 |
-
|
422 |
-
# shrink to front view as we care more about it...
|
423 |
-
ver = np.random.randint(-5, 5)
|
424 |
-
hor = np.random.randint(-15, 15)
|
425 |
-
rad = self.opt.cam_radius # np.random.uniform(1, 2)
|
426 |
-
|
427 |
-
pose = orbit_camera(ver, hor, rad)
|
428 |
-
|
429 |
-
image_gt, alpha_gt = self.render_gs(pose)
|
430 |
-
image_pred, alpha_pred = self.render_mesh(pose)
|
431 |
-
|
432 |
-
loss_mse = F.mse_loss(image_pred, image_gt)
|
433 |
-
loss = loss_mse
|
434 |
-
|
435 |
-
loss.backward()
|
436 |
-
|
437 |
-
optimizer.step()
|
438 |
-
optimizer.zero_grad()
|
439 |
-
|
440 |
-
pbar.set_description(f"MSE = {loss_mse.item():.6f}")
|
441 |
-
|
442 |
-
print(f"[INFO] finished fitting mesh texture!")
|
443 |
-
|
444 |
-
|
445 |
-
@torch.no_grad()
|
446 |
-
def export_mesh(self, path):
|
447 |
-
|
448 |
-
mesh = Mesh(v=self.v, f=self.f, vt=self.vt, ft=self.ft, albedo=torch.sigmoid(self.albedo), device=self.device)
|
449 |
-
mesh.auto_normal()
|
450 |
-
mesh.write(path)
|
451 |
-
|
452 |
-
|
453 |
-
opt = tyro.cli(AllConfigs)
|
454 |
-
|
455 |
-
# load a saved ply and convert to mesh
|
456 |
-
assert opt.test_path.endswith('.ply'), '--test_path must be a .ply file saved by infer.py'
|
457 |
-
|
458 |
-
converter = Converter(opt).cuda()
|
459 |
-
converter.fit_nerf()
|
460 |
-
converter.fit_mesh()
|
461 |
-
converter.fit_mesh_uv()
|
462 |
-
converter.export_mesh(opt.test_path.replace('.ply', '.glb'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (123 Bytes). View file
|
|
core/__pycache__/attention.cpython-39.pyc
ADDED
Binary file (4.36 kB). View file
|
|
core/__pycache__/gs.cpython-39.pyc
ADDED
Binary file (5.48 kB). View file
|
|
core/__pycache__/models.cpython-39.pyc
ADDED
Binary file (4.47 kB). View file
|
|
core/__pycache__/options.cpython-39.pyc
ADDED
Binary file (2.46 kB). View file
|
|
core/__pycache__/provider_objaverse.cpython-39.pyc
ADDED
Binary file (7.74 kB). View file
|
|
core/__pycache__/unet.cpython-39.pyc
ADDED
Binary file (7.45 kB). View file
|
|
core/__pycache__/utils.cpython-39.pyc
ADDED
Binary file (2.54 kB). View file
|
|
core/models.py
CHANGED
@@ -131,9 +131,12 @@ class LGM(nn.Module):
|
|
131 |
|
132 |
results['gaussians'] = gaussians
|
133 |
|
134 |
-
#
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
137 |
# use the other views for rendering and supervision
|
138 |
results = self.gs.render(gaussians, data['cam_view'], data['cam_view_proj'], data['cam_pos'], bg_color=bg_color)
|
139 |
pred_images = results['image'] # [B, V, C, output_size, output_size]
|
@@ -168,4 +171,4 @@ class LGM(nn.Module):
|
|
168 |
psnr = -10 * torch.log10(torch.mean((pred_images.detach() - gt_images) ** 2))
|
169 |
results['psnr'] = psnr
|
170 |
|
171 |
-
return results
|
|
|
131 |
|
132 |
results['gaussians'] = gaussians
|
133 |
|
134 |
+
# random bg for training
|
135 |
+
if self.training:
|
136 |
+
bg_color = torch.rand(3, dtype=torch.float32, device=gaussians.device)
|
137 |
+
else:
|
138 |
+
bg_color = torch.ones(3, dtype=torch.float32, device=gaussians.device)
|
139 |
+
|
140 |
# use the other views for rendering and supervision
|
141 |
results = self.gs.render(gaussians, data['cam_view'], data['cam_view_proj'], data['cam_pos'], bg_color=bg_color)
|
142 |
pred_images = results['image'] # [B, V, C, output_size, output_size]
|
|
|
171 |
psnr = -10 * torch.log10(torch.mean((pred_images.detach() - gt_images) ** 2))
|
172 |
results['psnr'] = psnr
|
173 |
|
174 |
+
return results
|
core/options.py
CHANGED
@@ -9,16 +9,16 @@ class Options:
|
|
9 |
# Unet image input size
|
10 |
input_size: int = 256
|
11 |
# Unet definition
|
12 |
-
down_channels: Tuple[int
|
13 |
-
down_attention: Tuple[bool
|
14 |
mid_attention: bool = True
|
15 |
-
up_channels: Tuple[int
|
16 |
-
up_attention: Tuple[bool
|
17 |
# Unet output size, dependent on the input_size and U-Net structure!
|
18 |
splat_size: int = 64
|
19 |
# gaussian render size
|
20 |
output_size: int = 256
|
21 |
-
|
22 |
### dataset
|
23 |
# data mode (only support s3 now)
|
24 |
data_mode: Literal['s3'] = 's3'
|
@@ -40,7 +40,7 @@ class Options:
|
|
40 |
### training
|
41 |
# workspace
|
42 |
workspace: str = './workspace'
|
43 |
-
# resume
|
44 |
resume: Optional[str] = None
|
45 |
# batch size (per-GPU)
|
46 |
batch_size: int = 8
|
@@ -117,4 +117,4 @@ config_defaults['tiny'] = Options(
|
|
117 |
mixed_precision='bf16',
|
118 |
)
|
119 |
|
120 |
-
AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)
|
|
|
9 |
# Unet image input size
|
10 |
input_size: int = 256
|
11 |
# Unet definition
|
12 |
+
down_channels: Tuple[int] = (64, 128, 256, 512, 1024, 1024)
|
13 |
+
down_attention: Tuple[bool] = (False, False, False, True, True, True)
|
14 |
mid_attention: bool = True
|
15 |
+
up_channels: Tuple[int] = (1024, 1024, 512, 256)
|
16 |
+
up_attention: Tuple[bool] = (True, True, True, False)
|
17 |
# Unet output size, dependent on the input_size and U-Net structure!
|
18 |
splat_size: int = 64
|
19 |
# gaussian render size
|
20 |
output_size: int = 256
|
21 |
+
|
22 |
### dataset
|
23 |
# data mode (only support s3 now)
|
24 |
data_mode: Literal['s3'] = 's3'
|
|
|
40 |
### training
|
41 |
# workspace
|
42 |
workspace: str = './workspace'
|
43 |
+
# resume
|
44 |
resume: Optional[str] = None
|
45 |
# batch size (per-GPU)
|
46 |
batch_size: int = 8
|
|
|
117 |
mixed_precision='bf16',
|
118 |
)
|
119 |
|
120 |
+
AllConfigs = tyro.extras.subcommand_type_from_defaults(config_defaults, config_doc)
|
core/unet.py
CHANGED
@@ -3,10 +3,10 @@ import torch.nn as nn
|
|
3 |
import torch.nn.functional as F
|
4 |
|
5 |
import numpy as np
|
6 |
-
from typing import Tuple, Literal
|
7 |
from functools import partial
|
8 |
|
9 |
-
from core.attention import MemEffAttention
|
10 |
|
11 |
class MVAttention(nn.Module):
|
12 |
def __init__(
|
@@ -236,11 +236,11 @@ class UNet(nn.Module):
|
|
236 |
self,
|
237 |
in_channels: int = 3,
|
238 |
out_channels: int = 3,
|
239 |
-
down_channels: Tuple[int
|
240 |
-
down_attention: Tuple[bool
|
241 |
mid_attention: bool = True,
|
242 |
-
up_channels: Tuple[int
|
243 |
-
up_attention: Tuple[bool
|
244 |
layers_per_block: int = 2,
|
245 |
skip_scale: float = np.sqrt(0.5),
|
246 |
):
|
@@ -316,4 +316,4 @@ class UNet(nn.Module):
|
|
316 |
x = F.silu(x)
|
317 |
x = self.conv_out(x) # [B, Cout, H', W']
|
318 |
|
319 |
-
return x
|
|
|
3 |
import torch.nn.functional as F
|
4 |
|
5 |
import numpy as np
|
6 |
+
from typing import Tuple, Optional, Literal
|
7 |
from functools import partial
|
8 |
|
9 |
+
from core.attention import MemEffAttention, MemEffCrossAttention
|
10 |
|
11 |
class MVAttention(nn.Module):
|
12 |
def __init__(
|
|
|
236 |
self,
|
237 |
in_channels: int = 3,
|
238 |
out_channels: int = 3,
|
239 |
+
down_channels: Tuple[int] = (64, 128, 256, 512, 1024),
|
240 |
+
down_attention: Tuple[bool] = (False, False, False, True, True),
|
241 |
mid_attention: bool = True,
|
242 |
+
up_channels: Tuple[int] = (1024, 512, 256),
|
243 |
+
up_attention: Tuple[bool] = (True, True, False),
|
244 |
layers_per_block: int = 2,
|
245 |
skip_scale: float = np.sqrt(0.5),
|
246 |
):
|
|
|
316 |
x = F.silu(x)
|
317 |
x = self.conv_out(x) # [B, Cout, H', W']
|
318 |
|
319 |
+
return x
|
data_test/anya_rgba.png
DELETED
Binary file (32.9 kB)
|
|
data_test/bird.jpg
ADDED
data_test/bird_rgba.png
DELETED
Binary file (56.2 kB)
|
|
data_test/boy.jpg
ADDED
data_test/cat_statue.jpg
ADDED
data_test/catstatue_rgba.png
DELETED
Binary file (45.5 kB)
|
|
data_test/dragontoy.jpg
ADDED
data_test/frog_sweater.jpg
ADDED
data_test/gso_rabbit.jpg
ADDED
diff-gaussian-rasterization/.gitignore
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
build/
|
2 |
+
diff_gaussian_rasterization.egg-info/
|
3 |
+
dist/
|
4 |
+
|
5 |
+
__pycache__
|
6 |
+
|
7 |
+
*.so
|
diff-gaussian-rasterization/.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "third_party/glm"]
|
2 |
+
path = third_party/glm
|
3 |
+
url = https://github.com/g-truc/glm.git
|
diff-gaussian-rasterization/CMakeLists.txt
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (C) 2023, Inria
|
3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# This software is free for non-commercial, research and evaluation use
|
7 |
+
# under the terms of the LICENSE.md file.
|
8 |
+
#
|
9 |
+
# For inquiries contact george.drettakis@inria.fr
|
10 |
+
#
|
11 |
+
|
12 |
+
cmake_minimum_required(VERSION 3.20)
|
13 |
+
|
14 |
+
project(DiffRast LANGUAGES CUDA CXX)
|
15 |
+
|
16 |
+
set(CMAKE_CXX_STANDARD 17)
|
17 |
+
set(CMAKE_CXX_EXTENSIONS OFF)
|
18 |
+
set(CMAKE_CUDA_STANDARD 17)
|
19 |
+
|
20 |
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
21 |
+
|
22 |
+
add_library(CudaRasterizer
|
23 |
+
cuda_rasterizer/backward.h
|
24 |
+
cuda_rasterizer/backward.cu
|
25 |
+
cuda_rasterizer/forward.h
|
26 |
+
cuda_rasterizer/forward.cu
|
27 |
+
cuda_rasterizer/auxiliary.h
|
28 |
+
cuda_rasterizer/rasterizer_impl.cu
|
29 |
+
cuda_rasterizer/rasterizer_impl.h
|
30 |
+
cuda_rasterizer/rasterizer.h
|
31 |
+
)
|
32 |
+
|
33 |
+
set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "75;86")
|
34 |
+
|
35 |
+
target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
|
36 |
+
target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
diff-gaussian-rasterization/LICENSE.md
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Gaussian-Splatting License
|
2 |
+
===========================
|
3 |
+
|
4 |
+
**Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.
|
5 |
+
The *Software* is in the process of being registered with the Agence pour la Protection des
|
6 |
+
Programmes (APP).
|
7 |
+
|
8 |
+
The *Software* is still being developed by the *Licensor*.
|
9 |
+
|
10 |
+
*Licensor*'s goal is to allow the research community to use, test and evaluate
|
11 |
+
the *Software*.
|
12 |
+
|
13 |
+
## 1. Definitions
|
14 |
+
|
15 |
+
*Licensee* means any person or entity that uses the *Software* and distributes
|
16 |
+
its *Work*.
|
17 |
+
|
18 |
+
*Licensor* means the owners of the *Software*, i.e Inria and MPII
|
19 |
+
|
20 |
+
*Software* means the original work of authorship made available under this
|
21 |
+
License ie gaussian-splatting.
|
22 |
+
|
23 |
+
*Work* means the *Software* and any additions to or derivative works of the
|
24 |
+
*Software* that are made available under this License.
|
25 |
+
|
26 |
+
|
27 |
+
## 2. Purpose
|
28 |
+
This license is intended to define the rights granted to the *Licensee* by
|
29 |
+
Licensors under the *Software*.
|
30 |
+
|
31 |
+
## 3. Rights granted
|
32 |
+
|
33 |
+
For the above reasons Licensors have decided to distribute the *Software*.
|
34 |
+
Licensors grant non-exclusive rights to use the *Software* for research purposes
|
35 |
+
to research users (both academic and industrial), free of charge, without right
|
36 |
+
to sublicense.. The *Software* may be used "non-commercially", i.e., for research
|
37 |
+
and/or evaluation purposes only.
|
38 |
+
|
39 |
+
Subject to the terms and conditions of this License, you are granted a
|
40 |
+
non-exclusive, royalty-free, license to reproduce, prepare derivative works of,
|
41 |
+
publicly display, publicly perform and distribute its *Work* and any resulting
|
42 |
+
derivative works in any form.
|
43 |
+
|
44 |
+
## 4. Limitations
|
45 |
+
|
46 |
+
**4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do
|
47 |
+
so under this License, (b) you include a complete copy of this License with
|
48 |
+
your distribution, and (c) you retain without modification any copyright,
|
49 |
+
patent, trademark, or attribution notices that are present in the *Work*.
|
50 |
+
|
51 |
+
**4.2 Derivative Works.** You may specify that additional or different terms apply
|
52 |
+
to the use, reproduction, and distribution of your derivative works of the *Work*
|
53 |
+
("Your Terms") only if (a) Your Terms provide that the use limitation in
|
54 |
+
Section 2 applies to your derivative works, and (b) you identify the specific
|
55 |
+
derivative works that are subject to Your Terms. Notwithstanding Your Terms,
|
56 |
+
this License (including the redistribution requirements in Section 3.1) will
|
57 |
+
continue to apply to the *Work* itself.
|
58 |
+
|
59 |
+
**4.3** Any other use without of prior consent of Licensors is prohibited. Research
|
60 |
+
users explicitly acknowledge having received from Licensors all information
|
61 |
+
allowing to appreciate the adequacy between of the *Software* and their needs and
|
62 |
+
to undertake all necessary precautions for its execution and use.
|
63 |
+
|
64 |
+
**4.4** The *Software* is provided both as a compiled library file and as source
|
65 |
+
code. In case of using the *Software* for a publication or other results obtained
|
66 |
+
through the use of the *Software*, users are strongly encouraged to cite the
|
67 |
+
corresponding publications as explained in the documentation of the *Software*.
|
68 |
+
|
69 |
+
## 5. Disclaimer
|
70 |
+
|
71 |
+
THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES
|
72 |
+
WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY
|
73 |
+
UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL
|
74 |
+
CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES
|
75 |
+
OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL
|
76 |
+
USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR
|
77 |
+
ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE
|
78 |
+
AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
79 |
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
80 |
+
GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)
|
81 |
+
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
82 |
+
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR
|
83 |
+
IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.
|
diff-gaussian-rasterization/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Differential Gaussian Rasterization
|
2 |
+
|
3 |
+
**NOTE**: this is a modified version to support depth & alpha rendering (both forward and backward) from the [original repository](https://github.com/graphdeco-inria/diff-gaussian-rasterization).
|
4 |
+
|
5 |
+
```python
|
6 |
+
rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
|
7 |
+
means3D=means3D,
|
8 |
+
means2D=means2D,
|
9 |
+
shs=shs,
|
10 |
+
colors_precomp=colors_precomp,
|
11 |
+
opacities=opacity,
|
12 |
+
scales=scales,
|
13 |
+
rotations=rotations,
|
14 |
+
cov3D_precomp=cov3D_precomp,
|
15 |
+
)
|
16 |
+
```
|
17 |
+
|
18 |
+
|
19 |
+
Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us.
|
20 |
+
|
21 |
+
<section class="section" id="BibTeX">
|
22 |
+
<div class="container is-max-desktop content">
|
23 |
+
<h2 class="title">BibTeX</h2>
|
24 |
+
<pre><code>@Article{kerbl3Dgaussians,
|
25 |
+
author = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
|
26 |
+
title = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
|
27 |
+
journal = {ACM Transactions on Graphics},
|
28 |
+
number = {4},
|
29 |
+
volume = {42},
|
30 |
+
month = {July},
|
31 |
+
year = {2023},
|
32 |
+
url = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
|
33 |
+
}</code></pre>
|
34 |
+
</div>
|
35 |
+
</section>
|
diff-gaussian-rasterization/cuda_rasterizer/auxiliary.h
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
|
13 |
+
#define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
|
14 |
+
|
15 |
+
#include "config.h"
|
16 |
+
#include "stdio.h"
|
17 |
+
|
18 |
+
#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
|
19 |
+
#define NUM_WARPS (BLOCK_SIZE/32)
|
20 |
+
|
21 |
+
// Spherical harmonics coefficients
|
22 |
+
__device__ const float SH_C0 = 0.28209479177387814f;
|
23 |
+
__device__ const float SH_C1 = 0.4886025119029199f;
|
24 |
+
__device__ const float SH_C2[] = {
|
25 |
+
1.0925484305920792f,
|
26 |
+
-1.0925484305920792f,
|
27 |
+
0.31539156525252005f,
|
28 |
+
-1.0925484305920792f,
|
29 |
+
0.5462742152960396f
|
30 |
+
};
|
31 |
+
__device__ const float SH_C3[] = {
|
32 |
+
-0.5900435899266435f,
|
33 |
+
2.890611442640554f,
|
34 |
+
-0.4570457994644658f,
|
35 |
+
0.3731763325901154f,
|
36 |
+
-0.4570457994644658f,
|
37 |
+
1.445305721320277f,
|
38 |
+
-0.5900435899266435f
|
39 |
+
};
|
40 |
+
|
41 |
+
__forceinline__ __device__ float ndc2Pix(float v, int S)
|
42 |
+
{
|
43 |
+
return ((v + 1.0) * S - 1.0) * 0.5;
|
44 |
+
}
|
45 |
+
|
46 |
+
__forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
|
47 |
+
{
|
48 |
+
rect_min = {
|
49 |
+
min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
|
50 |
+
min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
|
51 |
+
};
|
52 |
+
rect_max = {
|
53 |
+
min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
|
54 |
+
min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
|
55 |
+
};
|
56 |
+
}
|
57 |
+
|
58 |
+
__forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
|
59 |
+
{
|
60 |
+
float3 transformed = {
|
61 |
+
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
|
62 |
+
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
|
63 |
+
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
|
64 |
+
};
|
65 |
+
return transformed;
|
66 |
+
}
|
67 |
+
|
68 |
+
__forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
|
69 |
+
{
|
70 |
+
float4 transformed = {
|
71 |
+
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
|
72 |
+
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
|
73 |
+
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
|
74 |
+
matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
|
75 |
+
};
|
76 |
+
return transformed;
|
77 |
+
}
|
78 |
+
|
79 |
+
__forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
|
80 |
+
{
|
81 |
+
float3 transformed = {
|
82 |
+
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
|
83 |
+
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
|
84 |
+
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
|
85 |
+
};
|
86 |
+
return transformed;
|
87 |
+
}
|
88 |
+
|
89 |
+
__forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
|
90 |
+
{
|
91 |
+
float3 transformed = {
|
92 |
+
matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
|
93 |
+
matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
|
94 |
+
matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
|
95 |
+
};
|
96 |
+
return transformed;
|
97 |
+
}
|
98 |
+
|
99 |
+
__forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
|
100 |
+
{
|
101 |
+
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
|
102 |
+
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
|
103 |
+
float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
|
104 |
+
return dnormvdz;
|
105 |
+
}
|
106 |
+
|
107 |
+
__forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
|
108 |
+
{
|
109 |
+
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
|
110 |
+
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
|
111 |
+
|
112 |
+
float3 dnormvdv;
|
113 |
+
dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
|
114 |
+
dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
|
115 |
+
dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
|
116 |
+
return dnormvdv;
|
117 |
+
}
|
118 |
+
|
119 |
+
__forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
|
120 |
+
{
|
121 |
+
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
|
122 |
+
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
|
123 |
+
|
124 |
+
float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
|
125 |
+
float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
|
126 |
+
float4 dnormvdv;
|
127 |
+
dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
|
128 |
+
dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
|
129 |
+
dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
|
130 |
+
dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
|
131 |
+
return dnormvdv;
|
132 |
+
}
|
133 |
+
|
134 |
+
__forceinline__ __device__ float sigmoid(float x)
|
135 |
+
{
|
136 |
+
return 1.0f / (1.0f + expf(-x));
|
137 |
+
}
|
138 |
+
|
139 |
+
__forceinline__ __device__ bool in_frustum(int idx,
|
140 |
+
const float* orig_points,
|
141 |
+
const float* viewmatrix,
|
142 |
+
const float* projmatrix,
|
143 |
+
bool prefiltered,
|
144 |
+
float3& p_view)
|
145 |
+
{
|
146 |
+
float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
|
147 |
+
|
148 |
+
// Bring points to screen space
|
149 |
+
float4 p_hom = transformPoint4x4(p_orig, projmatrix);
|
150 |
+
float p_w = 1.0f / (p_hom.w + 0.0000001f);
|
151 |
+
float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
|
152 |
+
p_view = transformPoint4x3(p_orig, viewmatrix);
|
153 |
+
|
154 |
+
if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
|
155 |
+
{
|
156 |
+
if (prefiltered)
|
157 |
+
{
|
158 |
+
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
|
159 |
+
__trap();
|
160 |
+
}
|
161 |
+
return false;
|
162 |
+
}
|
163 |
+
return true;
|
164 |
+
}
|
165 |
+
|
166 |
+
#define CHECK_CUDA(A, debug) \
|
167 |
+
A; if(debug) { \
|
168 |
+
auto ret = cudaDeviceSynchronize(); \
|
169 |
+
if (ret != cudaSuccess) { \
|
170 |
+
std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
|
171 |
+
throw std::runtime_error(cudaGetErrorString(ret)); \
|
172 |
+
} \
|
173 |
+
}
|
174 |
+
|
175 |
+
#endif
|
diff-gaussian-rasterization/cuda_rasterizer/backward.cu
ADDED
@@ -0,0 +1,712 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include "backward.h"
|
13 |
+
#include "auxiliary.h"
|
14 |
+
#include <cooperative_groups.h>
|
15 |
+
#include <cooperative_groups/reduce.h>
|
16 |
+
namespace cg = cooperative_groups;
|
17 |
+
|
18 |
+
// Backward pass for conversion of spherical harmonics to RGB for
|
19 |
+
// each Gaussian.
|
20 |
+
__device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, const bool* clamped, const glm::vec3* dL_dcolor, glm::vec3* dL_dmeans, glm::vec3* dL_dshs)
|
21 |
+
{
|
22 |
+
// Compute intermediate values, as it is done during forward
|
23 |
+
glm::vec3 pos = means[idx];
|
24 |
+
glm::vec3 dir_orig = pos - campos;
|
25 |
+
glm::vec3 dir = dir_orig / glm::length(dir_orig);
|
26 |
+
|
27 |
+
glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs;
|
28 |
+
|
29 |
+
// Use PyTorch rule for clamping: if clamping was applied,
|
30 |
+
// gradient becomes 0.
|
31 |
+
glm::vec3 dL_dRGB = dL_dcolor[idx];
|
32 |
+
dL_dRGB.x *= clamped[3 * idx + 0] ? 0 : 1;
|
33 |
+
dL_dRGB.y *= clamped[3 * idx + 1] ? 0 : 1;
|
34 |
+
dL_dRGB.z *= clamped[3 * idx + 2] ? 0 : 1;
|
35 |
+
|
36 |
+
glm::vec3 dRGBdx(0, 0, 0);
|
37 |
+
glm::vec3 dRGBdy(0, 0, 0);
|
38 |
+
glm::vec3 dRGBdz(0, 0, 0);
|
39 |
+
float x = dir.x;
|
40 |
+
float y = dir.y;
|
41 |
+
float z = dir.z;
|
42 |
+
|
43 |
+
// Target location for this Gaussian to write SH gradients to
|
44 |
+
glm::vec3* dL_dsh = dL_dshs + idx * max_coeffs;
|
45 |
+
|
46 |
+
// No tricks here, just high school-level calculus.
|
47 |
+
float dRGBdsh0 = SH_C0;
|
48 |
+
dL_dsh[0] = dRGBdsh0 * dL_dRGB;
|
49 |
+
if (deg > 0)
|
50 |
+
{
|
51 |
+
float dRGBdsh1 = -SH_C1 * y;
|
52 |
+
float dRGBdsh2 = SH_C1 * z;
|
53 |
+
float dRGBdsh3 = -SH_C1 * x;
|
54 |
+
dL_dsh[1] = dRGBdsh1 * dL_dRGB;
|
55 |
+
dL_dsh[2] = dRGBdsh2 * dL_dRGB;
|
56 |
+
dL_dsh[3] = dRGBdsh3 * dL_dRGB;
|
57 |
+
|
58 |
+
dRGBdx = -SH_C1 * sh[3];
|
59 |
+
dRGBdy = -SH_C1 * sh[1];
|
60 |
+
dRGBdz = SH_C1 * sh[2];
|
61 |
+
|
62 |
+
if (deg > 1)
|
63 |
+
{
|
64 |
+
float xx = x * x, yy = y * y, zz = z * z;
|
65 |
+
float xy = x * y, yz = y * z, xz = x * z;
|
66 |
+
|
67 |
+
float dRGBdsh4 = SH_C2[0] * xy;
|
68 |
+
float dRGBdsh5 = SH_C2[1] * yz;
|
69 |
+
float dRGBdsh6 = SH_C2[2] * (2.f * zz - xx - yy);
|
70 |
+
float dRGBdsh7 = SH_C2[3] * xz;
|
71 |
+
float dRGBdsh8 = SH_C2[4] * (xx - yy);
|
72 |
+
dL_dsh[4] = dRGBdsh4 * dL_dRGB;
|
73 |
+
dL_dsh[5] = dRGBdsh5 * dL_dRGB;
|
74 |
+
dL_dsh[6] = dRGBdsh6 * dL_dRGB;
|
75 |
+
dL_dsh[7] = dRGBdsh7 * dL_dRGB;
|
76 |
+
dL_dsh[8] = dRGBdsh8 * dL_dRGB;
|
77 |
+
|
78 |
+
dRGBdx += SH_C2[0] * y * sh[4] + SH_C2[2] * 2.f * -x * sh[6] + SH_C2[3] * z * sh[7] + SH_C2[4] * 2.f * x * sh[8];
|
79 |
+
dRGBdy += SH_C2[0] * x * sh[4] + SH_C2[1] * z * sh[5] + SH_C2[2] * 2.f * -y * sh[6] + SH_C2[4] * 2.f * -y * sh[8];
|
80 |
+
dRGBdz += SH_C2[1] * y * sh[5] + SH_C2[2] * 2.f * 2.f * z * sh[6] + SH_C2[3] * x * sh[7];
|
81 |
+
|
82 |
+
if (deg > 2)
|
83 |
+
{
|
84 |
+
float dRGBdsh9 = SH_C3[0] * y * (3.f * xx - yy);
|
85 |
+
float dRGBdsh10 = SH_C3[1] * xy * z;
|
86 |
+
float dRGBdsh11 = SH_C3[2] * y * (4.f * zz - xx - yy);
|
87 |
+
float dRGBdsh12 = SH_C3[3] * z * (2.f * zz - 3.f * xx - 3.f * yy);
|
88 |
+
float dRGBdsh13 = SH_C3[4] * x * (4.f * zz - xx - yy);
|
89 |
+
float dRGBdsh14 = SH_C3[5] * z * (xx - yy);
|
90 |
+
float dRGBdsh15 = SH_C3[6] * x * (xx - 3.f * yy);
|
91 |
+
dL_dsh[9] = dRGBdsh9 * dL_dRGB;
|
92 |
+
dL_dsh[10] = dRGBdsh10 * dL_dRGB;
|
93 |
+
dL_dsh[11] = dRGBdsh11 * dL_dRGB;
|
94 |
+
dL_dsh[12] = dRGBdsh12 * dL_dRGB;
|
95 |
+
dL_dsh[13] = dRGBdsh13 * dL_dRGB;
|
96 |
+
dL_dsh[14] = dRGBdsh14 * dL_dRGB;
|
97 |
+
dL_dsh[15] = dRGBdsh15 * dL_dRGB;
|
98 |
+
|
99 |
+
dRGBdx += (
|
100 |
+
SH_C3[0] * sh[9] * 3.f * 2.f * xy +
|
101 |
+
SH_C3[1] * sh[10] * yz +
|
102 |
+
SH_C3[2] * sh[11] * -2.f * xy +
|
103 |
+
SH_C3[3] * sh[12] * -3.f * 2.f * xz +
|
104 |
+
SH_C3[4] * sh[13] * (-3.f * xx + 4.f * zz - yy) +
|
105 |
+
SH_C3[5] * sh[14] * 2.f * xz +
|
106 |
+
SH_C3[6] * sh[15] * 3.f * (xx - yy));
|
107 |
+
|
108 |
+
dRGBdy += (
|
109 |
+
SH_C3[0] * sh[9] * 3.f * (xx - yy) +
|
110 |
+
SH_C3[1] * sh[10] * xz +
|
111 |
+
SH_C3[2] * sh[11] * (-3.f * yy + 4.f * zz - xx) +
|
112 |
+
SH_C3[3] * sh[12] * -3.f * 2.f * yz +
|
113 |
+
SH_C3[4] * sh[13] * -2.f * xy +
|
114 |
+
SH_C3[5] * sh[14] * -2.f * yz +
|
115 |
+
SH_C3[6] * sh[15] * -3.f * 2.f * xy);
|
116 |
+
|
117 |
+
dRGBdz += (
|
118 |
+
SH_C3[1] * sh[10] * xy +
|
119 |
+
SH_C3[2] * sh[11] * 4.f * 2.f * yz +
|
120 |
+
SH_C3[3] * sh[12] * 3.f * (2.f * zz - xx - yy) +
|
121 |
+
SH_C3[4] * sh[13] * 4.f * 2.f * xz +
|
122 |
+
SH_C3[5] * sh[14] * (xx - yy));
|
123 |
+
}
|
124 |
+
}
|
125 |
+
}
|
126 |
+
|
127 |
+
// The view direction is an input to the computation. View direction
|
128 |
+
// is influenced by the Gaussian's mean, so SHs gradients
|
129 |
+
// must propagate back into 3D position.
|
130 |
+
glm::vec3 dL_ddir(glm::dot(dRGBdx, dL_dRGB), glm::dot(dRGBdy, dL_dRGB), glm::dot(dRGBdz, dL_dRGB));
|
131 |
+
|
132 |
+
// Account for normalization of direction
|
133 |
+
float3 dL_dmean = dnormvdv(float3{ dir_orig.x, dir_orig.y, dir_orig.z }, float3{ dL_ddir.x, dL_ddir.y, dL_ddir.z });
|
134 |
+
|
135 |
+
// Gradients of loss w.r.t. Gaussian means, but only the portion
|
136 |
+
// that is caused because the mean affects the view-dependent color.
|
137 |
+
// Additional mean gradient is accumulated in below methods.
|
138 |
+
dL_dmeans[idx] += glm::vec3(dL_dmean.x, dL_dmean.y, dL_dmean.z);
|
139 |
+
}
|
140 |
+
|
141 |
+
// Backward version of INVERSE 2D covariance matrix computation
|
142 |
+
// (due to length launched as separate kernel before other
|
143 |
+
// backward steps contained in preprocess)
|
144 |
+
__global__ void computeCov2DCUDA(int P,
|
145 |
+
const float3* means,
|
146 |
+
const int* radii,
|
147 |
+
const float* cov3Ds,
|
148 |
+
const float h_x, float h_y,
|
149 |
+
const float tan_fovx, float tan_fovy,
|
150 |
+
const float* view_matrix,
|
151 |
+
const float* dL_dconics,
|
152 |
+
float3* dL_dmeans,
|
153 |
+
float* dL_dcov)
|
154 |
+
{
|
155 |
+
auto idx = cg::this_grid().thread_rank();
|
156 |
+
if (idx >= P || !(radii[idx] > 0))
|
157 |
+
return;
|
158 |
+
|
159 |
+
// Reading location of 3D covariance for this Gaussian
|
160 |
+
const float* cov3D = cov3Ds + 6 * idx;
|
161 |
+
|
162 |
+
// Fetch gradients, recompute 2D covariance and relevant
|
163 |
+
// intermediate forward results needed in the backward.
|
164 |
+
float3 mean = means[idx];
|
165 |
+
float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] };
|
166 |
+
float3 t = transformPoint4x3(mean, view_matrix);
|
167 |
+
|
168 |
+
const float limx = 1.3f * tan_fovx;
|
169 |
+
const float limy = 1.3f * tan_fovy;
|
170 |
+
const float txtz = t.x / t.z;
|
171 |
+
const float tytz = t.y / t.z;
|
172 |
+
t.x = min(limx, max(-limx, txtz)) * t.z;
|
173 |
+
t.y = min(limy, max(-limy, tytz)) * t.z;
|
174 |
+
|
175 |
+
const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1;
|
176 |
+
const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1;
|
177 |
+
|
178 |
+
glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z),
|
179 |
+
0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z),
|
180 |
+
0, 0, 0);
|
181 |
+
|
182 |
+
glm::mat3 W = glm::mat3(
|
183 |
+
view_matrix[0], view_matrix[4], view_matrix[8],
|
184 |
+
view_matrix[1], view_matrix[5], view_matrix[9],
|
185 |
+
view_matrix[2], view_matrix[6], view_matrix[10]);
|
186 |
+
|
187 |
+
glm::mat3 Vrk = glm::mat3(
|
188 |
+
cov3D[0], cov3D[1], cov3D[2],
|
189 |
+
cov3D[1], cov3D[3], cov3D[4],
|
190 |
+
cov3D[2], cov3D[4], cov3D[5]);
|
191 |
+
|
192 |
+
glm::mat3 T = W * J;
|
193 |
+
|
194 |
+
glm::mat3 cov2D = glm::transpose(T) * glm::transpose(Vrk) * T;
|
195 |
+
|
196 |
+
// Use helper variables for 2D covariance entries. More compact.
|
197 |
+
float a = cov2D[0][0] += 0.3f;
|
198 |
+
float b = cov2D[0][1];
|
199 |
+
float c = cov2D[1][1] += 0.3f;
|
200 |
+
|
201 |
+
float denom = a * c - b * b;
|
202 |
+
float dL_da = 0, dL_db = 0, dL_dc = 0;
|
203 |
+
float denom2inv = 1.0f / ((denom * denom) + 0.0000001f);
|
204 |
+
|
205 |
+
if (denom2inv != 0)
|
206 |
+
{
|
207 |
+
// Gradients of loss w.r.t. entries of 2D covariance matrix,
|
208 |
+
// given gradients of loss w.r.t. conic matrix (inverse covariance matrix).
|
209 |
+
// e.g., dL / da = dL / d_conic_a * d_conic_a / d_a
|
210 |
+
dL_da = denom2inv * (-c * c * dL_dconic.x + 2 * b * c * dL_dconic.y + (denom - a * c) * dL_dconic.z);
|
211 |
+
dL_dc = denom2inv * (-a * a * dL_dconic.z + 2 * a * b * dL_dconic.y + (denom - a * c) * dL_dconic.x);
|
212 |
+
dL_db = denom2inv * 2 * (b * c * dL_dconic.x - (denom + 2 * b * b) * dL_dconic.y + a * b * dL_dconic.z);
|
213 |
+
|
214 |
+
// Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry,
|
215 |
+
// given gradients w.r.t. 2D covariance matrix (diagonal).
|
216 |
+
// cov2D = transpose(T) * transpose(Vrk) * T;
|
217 |
+
dL_dcov[6 * idx + 0] = (T[0][0] * T[0][0] * dL_da + T[0][0] * T[1][0] * dL_db + T[1][0] * T[1][0] * dL_dc);
|
218 |
+
dL_dcov[6 * idx + 3] = (T[0][1] * T[0][1] * dL_da + T[0][1] * T[1][1] * dL_db + T[1][1] * T[1][1] * dL_dc);
|
219 |
+
dL_dcov[6 * idx + 5] = (T[0][2] * T[0][2] * dL_da + T[0][2] * T[1][2] * dL_db + T[1][2] * T[1][2] * dL_dc);
|
220 |
+
|
221 |
+
// Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry,
|
222 |
+
// given gradients w.r.t. 2D covariance matrix (off-diagonal).
|
223 |
+
// Off-diagonal elements appear twice --> double the gradient.
|
224 |
+
// cov2D = transpose(T) * transpose(Vrk) * T;
|
225 |
+
dL_dcov[6 * idx + 1] = 2 * T[0][0] * T[0][1] * dL_da + (T[0][0] * T[1][1] + T[0][1] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][1] * dL_dc;
|
226 |
+
dL_dcov[6 * idx + 2] = 2 * T[0][0] * T[0][2] * dL_da + (T[0][0] * T[1][2] + T[0][2] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][2] * dL_dc;
|
227 |
+
dL_dcov[6 * idx + 4] = 2 * T[0][2] * T[0][1] * dL_da + (T[0][1] * T[1][2] + T[0][2] * T[1][1]) * dL_db + 2 * T[1][1] * T[1][2] * dL_dc;
|
228 |
+
}
|
229 |
+
else
|
230 |
+
{
|
231 |
+
for (int i = 0; i < 6; i++)
|
232 |
+
dL_dcov[6 * idx + i] = 0;
|
233 |
+
}
|
234 |
+
|
235 |
+
// Gradients of loss w.r.t. upper 2x3 portion of intermediate matrix T
|
236 |
+
// cov2D = transpose(T) * transpose(Vrk) * T;
|
237 |
+
float dL_dT00 = 2 * (T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_da +
|
238 |
+
(T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_db;
|
239 |
+
float dL_dT01 = 2 * (T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_da +
|
240 |
+
(T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_db;
|
241 |
+
float dL_dT02 = 2 * (T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_da +
|
242 |
+
(T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_db;
|
243 |
+
float dL_dT10 = 2 * (T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_dc +
|
244 |
+
(T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_db;
|
245 |
+
float dL_dT11 = 2 * (T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_dc +
|
246 |
+
(T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_db;
|
247 |
+
float dL_dT12 = 2 * (T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_dc +
|
248 |
+
(T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_db;
|
249 |
+
|
250 |
+
// Gradients of loss w.r.t. upper 3x2 non-zero entries of Jacobian matrix
|
251 |
+
// T = W * J
|
252 |
+
float dL_dJ00 = W[0][0] * dL_dT00 + W[0][1] * dL_dT01 + W[0][2] * dL_dT02;
|
253 |
+
float dL_dJ02 = W[2][0] * dL_dT00 + W[2][1] * dL_dT01 + W[2][2] * dL_dT02;
|
254 |
+
float dL_dJ11 = W[1][0] * dL_dT10 + W[1][1] * dL_dT11 + W[1][2] * dL_dT12;
|
255 |
+
float dL_dJ12 = W[2][0] * dL_dT10 + W[2][1] * dL_dT11 + W[2][2] * dL_dT12;
|
256 |
+
|
257 |
+
float tz = 1.f / t.z;
|
258 |
+
float tz2 = tz * tz;
|
259 |
+
float tz3 = tz2 * tz;
|
260 |
+
|
261 |
+
// Gradients of loss w.r.t. transformed Gaussian mean t
|
262 |
+
float dL_dtx = x_grad_mul * -h_x * tz2 * dL_dJ02;
|
263 |
+
float dL_dty = y_grad_mul * -h_y * tz2 * dL_dJ12;
|
264 |
+
float dL_dtz = -h_x * tz2 * dL_dJ00 - h_y * tz2 * dL_dJ11 + (2 * h_x * t.x) * tz3 * dL_dJ02 + (2 * h_y * t.y) * tz3 * dL_dJ12;
|
265 |
+
|
266 |
+
// Account for transformation of mean to t
|
267 |
+
// t = transformPoint4x3(mean, view_matrix);
|
268 |
+
float3 dL_dmean = transformVec4x3Transpose({ dL_dtx, dL_dty, dL_dtz }, view_matrix);
|
269 |
+
|
270 |
+
// Gradients of loss w.r.t. Gaussian means, but only the portion
|
271 |
+
// that is caused because the mean affects the covariance matrix.
|
272 |
+
// Additional mean gradient is accumulated in BACKWARD::preprocess.
|
273 |
+
dL_dmeans[idx] = dL_dmean;
|
274 |
+
}
|
275 |
+
|
276 |
+
// Backward pass for the conversion of scale and rotation to a
|
277 |
+
// 3D covariance matrix for each Gaussian.
|
278 |
+
__device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const glm::vec4 rot, const float* dL_dcov3Ds, glm::vec3* dL_dscales, glm::vec4* dL_drots)
|
279 |
+
{
|
280 |
+
// Recompute (intermediate) results for the 3D covariance computation.
|
281 |
+
glm::vec4 q = rot;// / glm::length(rot);
|
282 |
+
float r = q.x;
|
283 |
+
float x = q.y;
|
284 |
+
float y = q.z;
|
285 |
+
float z = q.w;
|
286 |
+
|
287 |
+
glm::mat3 R = glm::mat3(
|
288 |
+
1.f - 2.f * (y * y + z * z), 2.f * (x * y - r * z), 2.f * (x * z + r * y),
|
289 |
+
2.f * (x * y + r * z), 1.f - 2.f * (x * x + z * z), 2.f * (y * z - r * x),
|
290 |
+
2.f * (x * z - r * y), 2.f * (y * z + r * x), 1.f - 2.f * (x * x + y * y)
|
291 |
+
);
|
292 |
+
|
293 |
+
glm::mat3 S = glm::mat3(1.0f);
|
294 |
+
|
295 |
+
glm::vec3 s = mod * scale;
|
296 |
+
S[0][0] = s.x;
|
297 |
+
S[1][1] = s.y;
|
298 |
+
S[2][2] = s.z;
|
299 |
+
|
300 |
+
glm::mat3 M = S * R;
|
301 |
+
|
302 |
+
const float* dL_dcov3D = dL_dcov3Ds + 6 * idx;
|
303 |
+
|
304 |
+
glm::vec3 dunc(dL_dcov3D[0], dL_dcov3D[3], dL_dcov3D[5]);
|
305 |
+
glm::vec3 ounc = 0.5f * glm::vec3(dL_dcov3D[1], dL_dcov3D[2], dL_dcov3D[4]);
|
306 |
+
|
307 |
+
// Convert per-element covariance loss gradients to matrix form
|
308 |
+
glm::mat3 dL_dSigma = glm::mat3(
|
309 |
+
dL_dcov3D[0], 0.5f * dL_dcov3D[1], 0.5f * dL_dcov3D[2],
|
310 |
+
0.5f * dL_dcov3D[1], dL_dcov3D[3], 0.5f * dL_dcov3D[4],
|
311 |
+
0.5f * dL_dcov3D[2], 0.5f * dL_dcov3D[4], dL_dcov3D[5]
|
312 |
+
);
|
313 |
+
|
314 |
+
// Compute loss gradient w.r.t. matrix M
|
315 |
+
// dSigma_dM = 2 * M
|
316 |
+
glm::mat3 dL_dM = 2.0f * M * dL_dSigma;
|
317 |
+
|
318 |
+
glm::mat3 Rt = glm::transpose(R);
|
319 |
+
glm::mat3 dL_dMt = glm::transpose(dL_dM);
|
320 |
+
|
321 |
+
// Gradients of loss w.r.t. scale
|
322 |
+
glm::vec3* dL_dscale = dL_dscales + idx;
|
323 |
+
dL_dscale->x = glm::dot(Rt[0], dL_dMt[0]);
|
324 |
+
dL_dscale->y = glm::dot(Rt[1], dL_dMt[1]);
|
325 |
+
dL_dscale->z = glm::dot(Rt[2], dL_dMt[2]);
|
326 |
+
|
327 |
+
dL_dMt[0] *= s.x;
|
328 |
+
dL_dMt[1] *= s.y;
|
329 |
+
dL_dMt[2] *= s.z;
|
330 |
+
|
331 |
+
// Gradients of loss w.r.t. normalized quaternion
|
332 |
+
glm::vec4 dL_dq;
|
333 |
+
dL_dq.x = 2 * z * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * y * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * x * (dL_dMt[1][2] - dL_dMt[2][1]);
|
334 |
+
dL_dq.y = 2 * y * (dL_dMt[1][0] + dL_dMt[0][1]) + 2 * z * (dL_dMt[2][0] + dL_dMt[0][2]) + 2 * r * (dL_dMt[1][2] - dL_dMt[2][1]) - 4 * x * (dL_dMt[2][2] + dL_dMt[1][1]);
|
335 |
+
dL_dq.z = 2 * x * (dL_dMt[1][0] + dL_dMt[0][1]) + 2 * r * (dL_dMt[2][0] - dL_dMt[0][2]) + 2 * z * (dL_dMt[1][2] + dL_dMt[2][1]) - 4 * y * (dL_dMt[2][2] + dL_dMt[0][0]);
|
336 |
+
dL_dq.w = 2 * r * (dL_dMt[0][1] - dL_dMt[1][0]) + 2 * x * (dL_dMt[2][0] + dL_dMt[0][2]) + 2 * y * (dL_dMt[1][2] + dL_dMt[2][1]) - 4 * z * (dL_dMt[1][1] + dL_dMt[0][0]);
|
337 |
+
|
338 |
+
// Gradients of loss w.r.t. unnormalized quaternion
|
339 |
+
float4* dL_drot = (float4*)(dL_drots + idx);
|
340 |
+
*dL_drot = float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w };//dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
|
341 |
+
}
|
342 |
+
|
343 |
+
// Backward pass of the preprocessing steps, except
|
344 |
+
// for the covariance computation and inversion
|
345 |
+
// (those are handled by a previous kernel call)
|
346 |
+
template<int C>
|
347 |
+
__global__ void preprocessCUDA(
|
348 |
+
int P, int D, int M,
|
349 |
+
const float3* means,
|
350 |
+
const int* radii,
|
351 |
+
const float* shs,
|
352 |
+
const bool* clamped,
|
353 |
+
const glm::vec3* scales,
|
354 |
+
const glm::vec4* rotations,
|
355 |
+
const float scale_modifier,
|
356 |
+
const float* view,
|
357 |
+
const float* proj,
|
358 |
+
const glm::vec3* campos,
|
359 |
+
const float3* dL_dmean2D,
|
360 |
+
glm::vec3* dL_dmeans,
|
361 |
+
float* dL_dcolor,
|
362 |
+
float* dL_ddepth,
|
363 |
+
float* dL_dcov3D,
|
364 |
+
float* dL_dsh,
|
365 |
+
glm::vec3* dL_dscale,
|
366 |
+
glm::vec4* dL_drot)
|
367 |
+
{
|
368 |
+
auto idx = cg::this_grid().thread_rank();
|
369 |
+
if (idx >= P || !(radii[idx] > 0))
|
370 |
+
return;
|
371 |
+
|
372 |
+
float3 m = means[idx];
|
373 |
+
|
374 |
+
// Taking care of gradients from the screenspace points
|
375 |
+
float4 m_hom = transformPoint4x4(m, proj);
|
376 |
+
float m_w = 1.0f / (m_hom.w + 0.0000001f);
|
377 |
+
|
378 |
+
// Compute loss gradient w.r.t. 3D means due to gradients of 2D means
|
379 |
+
// from rendering procedure
|
380 |
+
glm::vec3 dL_dmean;
|
381 |
+
float mul1 = (proj[0] * m.x + proj[4] * m.y + proj[8] * m.z + proj[12]) * m_w * m_w;
|
382 |
+
float mul2 = (proj[1] * m.x + proj[5] * m.y + proj[9] * m.z + proj[13]) * m_w * m_w;
|
383 |
+
dL_dmean.x = (proj[0] * m_w - proj[3] * mul1) * dL_dmean2D[idx].x + (proj[1] * m_w - proj[3] * mul2) * dL_dmean2D[idx].y;
|
384 |
+
dL_dmean.y = (proj[4] * m_w - proj[7] * mul1) * dL_dmean2D[idx].x + (proj[5] * m_w - proj[7] * mul2) * dL_dmean2D[idx].y;
|
385 |
+
dL_dmean.z = (proj[8] * m_w - proj[11] * mul1) * dL_dmean2D[idx].x + (proj[9] * m_w - proj[11] * mul2) * dL_dmean2D[idx].y;
|
386 |
+
|
387 |
+
// That's the second part of the mean gradient. Previous computation
|
388 |
+
// of cov2D and following SH conversion also affects it.
|
389 |
+
dL_dmeans[idx] += dL_dmean;
|
390 |
+
|
391 |
+
// the w must be equal to 1 for view^T * [x,y,z,1]
|
392 |
+
float3 m_view = transformPoint4x3(m, view);
|
393 |
+
|
394 |
+
// Compute loss gradient w.r.t. 3D means due to gradients of depth
|
395 |
+
// from rendering procedure
|
396 |
+
glm::vec3 dL_dmean2;
|
397 |
+
float mul3 = view[2] * m.x + view[6] * m.y + view[10] * m.z + view[14];
|
398 |
+
dL_dmean2.x = (view[2] - view[3] * mul3) * dL_ddepth[idx];
|
399 |
+
dL_dmean2.y = (view[6] - view[7] * mul3) * dL_ddepth[idx];
|
400 |
+
dL_dmean2.z = (view[10] - view[11] * mul3) * dL_ddepth[idx];
|
401 |
+
|
402 |
+
// That's the third part of the mean gradient.
|
403 |
+
dL_dmeans[idx] += dL_dmean2;
|
404 |
+
|
405 |
+
// Compute gradient updates due to computing colors from SHs
|
406 |
+
if (shs)
|
407 |
+
computeColorFromSH(idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh);
|
408 |
+
|
409 |
+
// Compute gradient updates due to computing covariance from scale/rotation
|
410 |
+
if (scales)
|
411 |
+
computeCov3D(idx, scales[idx], scale_modifier, rotations[idx], dL_dcov3D, dL_dscale, dL_drot);
|
412 |
+
}
|
413 |
+
|
414 |
+
// Backward version of the rendering procedure.
|
415 |
+
template <uint32_t C>
|
416 |
+
__global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
|
417 |
+
renderCUDA(
|
418 |
+
const uint2* __restrict__ ranges,
|
419 |
+
const uint32_t* __restrict__ point_list,
|
420 |
+
int W, int H,
|
421 |
+
const float* __restrict__ bg_color,
|
422 |
+
const float2* __restrict__ points_xy_image,
|
423 |
+
const float4* __restrict__ conic_opacity,
|
424 |
+
const float* __restrict__ colors,
|
425 |
+
const float* __restrict__ depths,
|
426 |
+
const float* __restrict__ alphas,
|
427 |
+
const uint32_t* __restrict__ n_contrib,
|
428 |
+
const float* __restrict__ dL_dpixels,
|
429 |
+
const float* __restrict__ dL_dpixel_depths,
|
430 |
+
const float* __restrict__ dL_dalphas,
|
431 |
+
float3* __restrict__ dL_dmean2D,
|
432 |
+
float4* __restrict__ dL_dconic2D,
|
433 |
+
float* __restrict__ dL_dopacity,
|
434 |
+
float* __restrict__ dL_dcolors,
|
435 |
+
float* __restrict__ dL_ddepths
|
436 |
+
)
|
437 |
+
{
|
438 |
+
// We rasterize again. Compute necessary block info.
|
439 |
+
auto block = cg::this_thread_block();
|
440 |
+
const uint32_t horizontal_blocks = (W + BLOCK_X - 1) / BLOCK_X;
|
441 |
+
const uint2 pix_min = { block.group_index().x * BLOCK_X, block.group_index().y * BLOCK_Y };
|
442 |
+
const uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) };
|
443 |
+
const uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y };
|
444 |
+
const uint32_t pix_id = W * pix.y + pix.x;
|
445 |
+
const float2 pixf = { (float)pix.x, (float)pix.y };
|
446 |
+
|
447 |
+
const bool inside = pix.x < W&& pix.y < H;
|
448 |
+
const uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
|
449 |
+
|
450 |
+
const int rounds = ((range.y - range.x + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
451 |
+
|
452 |
+
bool done = !inside;
|
453 |
+
int toDo = range.y - range.x;
|
454 |
+
|
455 |
+
__shared__ int collected_id[BLOCK_SIZE];
|
456 |
+
__shared__ float2 collected_xy[BLOCK_SIZE];
|
457 |
+
__shared__ float4 collected_conic_opacity[BLOCK_SIZE];
|
458 |
+
__shared__ float collected_colors[C * BLOCK_SIZE];
|
459 |
+
__shared__ float collected_depths[BLOCK_SIZE];
|
460 |
+
|
461 |
+
// In the forward, we stored the final value for T, the
|
462 |
+
// product of all (1 - alpha) factors.
|
463 |
+
const float T_final = inside ? (1 - alphas[pix_id]) : 0;
|
464 |
+
float T = T_final;
|
465 |
+
|
466 |
+
// We start from the back. The ID of the last contributing
|
467 |
+
// Gaussian is known from each pixel from the forward.
|
468 |
+
uint32_t contributor = toDo;
|
469 |
+
const int last_contributor = inside ? n_contrib[pix_id] : 0;
|
470 |
+
|
471 |
+
float accum_rec[C] = { 0 };
|
472 |
+
float dL_dpixel[C];
|
473 |
+
float accum_depth_rec = 0;
|
474 |
+
float dL_dpixel_depth;
|
475 |
+
float accum_alpha_rec = 0;
|
476 |
+
float dL_dalpha;
|
477 |
+
if (inside) {
|
478 |
+
for (int i = 0; i < C; i++)
|
479 |
+
dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
|
480 |
+
dL_dpixel_depth = dL_dpixel_depths[pix_id];
|
481 |
+
dL_dalpha = dL_dalphas[pix_id];
|
482 |
+
}
|
483 |
+
|
484 |
+
float last_alpha = 0;
|
485 |
+
float last_color[C] = { 0 };
|
486 |
+
float last_depth = 0;
|
487 |
+
|
488 |
+
// Gradient of pixel coordinate w.r.t. normalized
|
489 |
+
// screen-space viewport corrdinates (-1 to 1)
|
490 |
+
const float ddelx_dx = 0.5 * W;
|
491 |
+
const float ddely_dy = 0.5 * H;
|
492 |
+
|
493 |
+
// Traverse all Gaussians
|
494 |
+
for (int i = 0; i < rounds; i++, toDo -= BLOCK_SIZE)
|
495 |
+
{
|
496 |
+
// Load auxiliary data into shared memory, start in the BACK
|
497 |
+
// and load them in revers order.
|
498 |
+
block.sync();
|
499 |
+
const int progress = i * BLOCK_SIZE + block.thread_rank();
|
500 |
+
if (range.x + progress < range.y)
|
501 |
+
{
|
502 |
+
const int coll_id = point_list[range.y - progress - 1];
|
503 |
+
collected_id[block.thread_rank()] = coll_id;
|
504 |
+
collected_xy[block.thread_rank()] = points_xy_image[coll_id];
|
505 |
+
collected_conic_opacity[block.thread_rank()] = conic_opacity[coll_id];
|
506 |
+
for (int i = 0; i < C; i++)
|
507 |
+
collected_colors[i * BLOCK_SIZE + block.thread_rank()] = colors[coll_id * C + i];
|
508 |
+
collected_depths[block.thread_rank()] = depths[coll_id];
|
509 |
+
}
|
510 |
+
block.sync();
|
511 |
+
|
512 |
+
// Iterate over Gaussians
|
513 |
+
for (int j = 0; !done && j < min(BLOCK_SIZE, toDo); j++)
|
514 |
+
{
|
515 |
+
// Keep track of current Gaussian ID. Skip, if this one
|
516 |
+
// is behind the last contributor for this pixel.
|
517 |
+
contributor--;
|
518 |
+
if (contributor >= last_contributor)
|
519 |
+
continue;
|
520 |
+
|
521 |
+
// Compute blending values, as before.
|
522 |
+
const float2 xy = collected_xy[j];
|
523 |
+
const float2 d = { xy.x - pixf.x, xy.y - pixf.y };
|
524 |
+
const float4 con_o = collected_conic_opacity[j];
|
525 |
+
const float power = -0.5f * (con_o.x * d.x * d.x + con_o.z * d.y * d.y) - con_o.y * d.x * d.y;
|
526 |
+
if (power > 0.0f)
|
527 |
+
continue;
|
528 |
+
|
529 |
+
const float G = exp(power);
|
530 |
+
const float alpha = min(0.99f, con_o.w * G);
|
531 |
+
if (alpha < 1.0f / 255.0f)
|
532 |
+
continue;
|
533 |
+
|
534 |
+
T = T / (1.f - alpha);
|
535 |
+
const float dchannel_dcolor = alpha * T;
|
536 |
+
const float dpixel_depth_ddepth = alpha * T;
|
537 |
+
|
538 |
+
// Propagate gradients to per-Gaussian colors and keep
|
539 |
+
// gradients w.r.t. alpha (blending factor for a Gaussian/pixel
|
540 |
+
// pair).
|
541 |
+
float dL_dopa = 0.0f;
|
542 |
+
const int global_id = collected_id[j];
|
543 |
+
for (int ch = 0; ch < C; ch++)
|
544 |
+
{
|
545 |
+
const float c = collected_colors[ch * BLOCK_SIZE + j];
|
546 |
+
// Update last color (to be used in the next iteration)
|
547 |
+
accum_rec[ch] = last_alpha * last_color[ch] + (1.f - last_alpha) * accum_rec[ch];
|
548 |
+
last_color[ch] = c;
|
549 |
+
|
550 |
+
const float dL_dchannel = dL_dpixel[ch];
|
551 |
+
dL_dopa += (c - accum_rec[ch]) * dL_dchannel;
|
552 |
+
// Update the gradients w.r.t. color of the Gaussian.
|
553 |
+
// Atomic, since this pixel is just one of potentially
|
554 |
+
// many that were affected by this Gaussian.
|
555 |
+
atomicAdd(&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_dchannel);
|
556 |
+
}
|
557 |
+
|
558 |
+
// Propagate gradients from pixel depth to opacity
|
559 |
+
const float c_d = collected_depths[j];
|
560 |
+
accum_depth_rec = last_alpha * last_depth + (1.f - last_alpha) * accum_depth_rec;
|
561 |
+
last_depth = c_d;
|
562 |
+
dL_dopa += (c_d - accum_depth_rec) * dL_dpixel_depth;
|
563 |
+
atomicAdd(&(dL_ddepths[global_id]), dpixel_depth_ddepth * dL_dpixel_depth);
|
564 |
+
|
565 |
+
// Propagate gradients from pixel alpha (weights_sum) to opacity
|
566 |
+
accum_alpha_rec = last_alpha + (1.f - last_alpha) * accum_alpha_rec;
|
567 |
+
dL_dopa += (1 - accum_alpha_rec) * dL_dalpha; //- (alpha - accum_alpha_rec) * dL_dalpha;
|
568 |
+
|
569 |
+
dL_dopa *= T;
|
570 |
+
// Update last alpha (to be used in the next iteration)
|
571 |
+
last_alpha = alpha;
|
572 |
+
|
573 |
+
// Account for fact that alpha also influences how much of
|
574 |
+
// the background color is added if nothing left to blend
|
575 |
+
float bg_dot_dpixel = 0;
|
576 |
+
for (int i = 0; i < C; i++)
|
577 |
+
bg_dot_dpixel += bg_color[i] * dL_dpixel[i];
|
578 |
+
dL_dopa += (-T_final / (1.f - alpha)) * bg_dot_dpixel;
|
579 |
+
|
580 |
+
|
581 |
+
// Helpful reusable temporary variables
|
582 |
+
const float dL_dG = con_o.w * dL_dopa;
|
583 |
+
const float gdx = G * d.x;
|
584 |
+
const float gdy = G * d.y;
|
585 |
+
const float dG_ddelx = -gdx * con_o.x - gdy * con_o.y;
|
586 |
+
const float dG_ddely = -gdy * con_o.z - gdx * con_o.y;
|
587 |
+
|
588 |
+
// Update gradients w.r.t. 2D mean position of the Gaussian
|
589 |
+
atomicAdd(&dL_dmean2D[global_id].x, dL_dG * dG_ddelx * ddelx_dx);
|
590 |
+
atomicAdd(&dL_dmean2D[global_id].y, dL_dG * dG_ddely * ddely_dy);
|
591 |
+
|
592 |
+
// Update gradients w.r.t. 2D covariance (2x2 matrix, symmetric)
|
593 |
+
atomicAdd(&dL_dconic2D[global_id].x, -0.5f * gdx * d.x * dL_dG);
|
594 |
+
atomicAdd(&dL_dconic2D[global_id].y, -0.5f * gdx * d.y * dL_dG);
|
595 |
+
atomicAdd(&dL_dconic2D[global_id].w, -0.5f * gdy * d.y * dL_dG);
|
596 |
+
|
597 |
+
// Update gradients w.r.t. opacity of the Gaussian
|
598 |
+
atomicAdd(&(dL_dopacity[global_id]), G * dL_dopa);
|
599 |
+
}
|
600 |
+
}
|
601 |
+
}
|
602 |
+
|
603 |
+
void BACKWARD::preprocess(
|
604 |
+
int P, int D, int M,
|
605 |
+
const float3* means3D,
|
606 |
+
const int* radii,
|
607 |
+
const float* shs,
|
608 |
+
const bool* clamped,
|
609 |
+
const glm::vec3* scales,
|
610 |
+
const glm::vec4* rotations,
|
611 |
+
const float scale_modifier,
|
612 |
+
const float* cov3Ds,
|
613 |
+
const float* viewmatrix,
|
614 |
+
const float* projmatrix,
|
615 |
+
const float focal_x, float focal_y,
|
616 |
+
const float tan_fovx, float tan_fovy,
|
617 |
+
const glm::vec3* campos,
|
618 |
+
const float3* dL_dmean2D,
|
619 |
+
const float* dL_dconic,
|
620 |
+
glm::vec3* dL_dmean3D,
|
621 |
+
float* dL_dcolor,
|
622 |
+
float* dL_ddepth,
|
623 |
+
float* dL_dcov3D,
|
624 |
+
float* dL_dsh,
|
625 |
+
glm::vec3* dL_dscale,
|
626 |
+
glm::vec4* dL_drot)
|
627 |
+
{
|
628 |
+
// Propagate gradients for the path of 2D conic matrix computation.
|
629 |
+
// Somewhat long, thus it is its own kernel rather than being part of
|
630 |
+
// "preprocess". When done, loss gradient w.r.t. 3D means has been
|
631 |
+
// modified and gradient w.r.t. 3D covariance matrix has been computed.
|
632 |
+
computeCov2DCUDA << <(P + 255) / 256, 256 >> > (
|
633 |
+
P,
|
634 |
+
means3D,
|
635 |
+
radii,
|
636 |
+
cov3Ds,
|
637 |
+
focal_x,
|
638 |
+
focal_y,
|
639 |
+
tan_fovx,
|
640 |
+
tan_fovy,
|
641 |
+
viewmatrix,
|
642 |
+
dL_dconic,
|
643 |
+
(float3*)dL_dmean3D,
|
644 |
+
dL_dcov3D);
|
645 |
+
|
646 |
+
// Propagate gradients for remaining steps: finish 3D mean gradients,
|
647 |
+
// propagate color gradients to SH (if desireD), propagate 3D covariance
|
648 |
+
// matrix gradients to scale and rotation.
|
649 |
+
preprocessCUDA<NUM_CHANNELS> << < (P + 255) / 256, 256 >> > (
|
650 |
+
P, D, M,
|
651 |
+
(float3*)means3D,
|
652 |
+
radii,
|
653 |
+
shs,
|
654 |
+
clamped,
|
655 |
+
(glm::vec3*)scales,
|
656 |
+
(glm::vec4*)rotations,
|
657 |
+
scale_modifier,
|
658 |
+
viewmatrix,
|
659 |
+
projmatrix,
|
660 |
+
campos,
|
661 |
+
(float3*)dL_dmean2D,
|
662 |
+
(glm::vec3*)dL_dmean3D,
|
663 |
+
dL_dcolor,
|
664 |
+
dL_ddepth,
|
665 |
+
dL_dcov3D,
|
666 |
+
dL_dsh,
|
667 |
+
dL_dscale,
|
668 |
+
dL_drot);
|
669 |
+
}
|
670 |
+
|
671 |
+
void BACKWARD::render(
|
672 |
+
const dim3 grid, const dim3 block,
|
673 |
+
const uint2* ranges,
|
674 |
+
const uint32_t* point_list,
|
675 |
+
int W, int H,
|
676 |
+
const float* bg_color,
|
677 |
+
const float2* means2D,
|
678 |
+
const float4* conic_opacity,
|
679 |
+
const float* colors,
|
680 |
+
const float* depths,
|
681 |
+
const float* alphas,
|
682 |
+
const uint32_t* n_contrib,
|
683 |
+
const float* dL_dpixels,
|
684 |
+
const float* dL_dpixel_depths,
|
685 |
+
const float* dL_dalphas,
|
686 |
+
float3* dL_dmean2D,
|
687 |
+
float4* dL_dconic2D,
|
688 |
+
float* dL_dopacity,
|
689 |
+
float* dL_dcolors,
|
690 |
+
float* dL_ddepths)
|
691 |
+
{
|
692 |
+
renderCUDA<NUM_CHANNELS> << <grid, block >> >(
|
693 |
+
ranges,
|
694 |
+
point_list,
|
695 |
+
W, H,
|
696 |
+
bg_color,
|
697 |
+
means2D,
|
698 |
+
conic_opacity,
|
699 |
+
colors,
|
700 |
+
depths,
|
701 |
+
alphas,
|
702 |
+
n_contrib,
|
703 |
+
dL_dpixels,
|
704 |
+
dL_dpixel_depths,
|
705 |
+
dL_dalphas,
|
706 |
+
dL_dmean2D,
|
707 |
+
dL_dconic2D,
|
708 |
+
dL_dopacity,
|
709 |
+
dL_dcolors,
|
710 |
+
dL_ddepths
|
711 |
+
);
|
712 |
+
}
|
diff-gaussian-rasterization/cuda_rasterizer/backward.h
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED
|
13 |
+
#define CUDA_RASTERIZER_BACKWARD_H_INCLUDED
|
14 |
+
|
15 |
+
#include <cuda.h>
|
16 |
+
#include "cuda_runtime.h"
|
17 |
+
#include "device_launch_parameters.h"
|
18 |
+
#define GLM_FORCE_CUDA
|
19 |
+
#include <glm/glm.hpp>
|
20 |
+
|
21 |
+
namespace BACKWARD
|
22 |
+
{
|
23 |
+
void render(
|
24 |
+
const dim3 grid, dim3 block,
|
25 |
+
const uint2* ranges,
|
26 |
+
const uint32_t* point_list,
|
27 |
+
int W, int H,
|
28 |
+
const float* bg_color,
|
29 |
+
const float2* means2D,
|
30 |
+
const float4* conic_opacity,
|
31 |
+
const float* colors,
|
32 |
+
const float* depths,
|
33 |
+
const float* alphas,
|
34 |
+
const uint32_t* n_contrib,
|
35 |
+
const float* dL_dpixels,
|
36 |
+
const float* dL_dpixel_depths,
|
37 |
+
const float* dL_dalphas,
|
38 |
+
float3* dL_dmean2D,
|
39 |
+
float4* dL_dconic2D,
|
40 |
+
float* dL_dopacity,
|
41 |
+
float* dL_dcolors,
|
42 |
+
float* dL_ddepths);
|
43 |
+
|
44 |
+
void preprocess(
|
45 |
+
int P, int D, int M,
|
46 |
+
const float3* means,
|
47 |
+
const int* radii,
|
48 |
+
const float* shs,
|
49 |
+
const bool* clamped,
|
50 |
+
const glm::vec3* scales,
|
51 |
+
const glm::vec4* rotations,
|
52 |
+
const float scale_modifier,
|
53 |
+
const float* cov3Ds,
|
54 |
+
const float* view,
|
55 |
+
const float* proj,
|
56 |
+
const float focal_x, float focal_y,
|
57 |
+
const float tan_fovx, float tan_fovy,
|
58 |
+
const glm::vec3* campos,
|
59 |
+
const float3* dL_dmean2D,
|
60 |
+
const float* dL_dconics,
|
61 |
+
glm::vec3* dL_dmeans,
|
62 |
+
float* dL_dcolor,
|
63 |
+
float* dL_ddepth,
|
64 |
+
float* dL_dcov3D,
|
65 |
+
float* dL_dsh,
|
66 |
+
glm::vec3* dL_dscale,
|
67 |
+
glm::vec4* dL_drot);
|
68 |
+
}
|
69 |
+
|
70 |
+
#endif
|
diff-gaussian-rasterization/cuda_rasterizer/config.h
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED
|
13 |
+
#define CUDA_RASTERIZER_CONFIG_H_INCLUDED
|
14 |
+
|
15 |
+
#define NUM_CHANNELS 3 // Default 3, RGB
|
16 |
+
#define BLOCK_X 16
|
17 |
+
#define BLOCK_Y 16
|
18 |
+
|
19 |
+
#endif
|
diff-gaussian-rasterization/cuda_rasterizer/forward.cu
ADDED
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include "forward.h"
|
13 |
+
#include "auxiliary.h"
|
14 |
+
#include <cooperative_groups.h>
|
15 |
+
#include <cooperative_groups/reduce.h>
|
16 |
+
namespace cg = cooperative_groups;
|
17 |
+
|
18 |
+
// Forward method for converting the input spherical harmonics
|
19 |
+
// coefficients of each Gaussian to a simple RGB color.
|
20 |
+
__device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, bool* clamped)
|
21 |
+
{
|
22 |
+
// The implementation is loosely based on code for
|
23 |
+
// "Differentiable Point-Based Radiance Fields for
|
24 |
+
// Efficient View Synthesis" by Zhang et al. (2022)
|
25 |
+
glm::vec3 pos = means[idx];
|
26 |
+
glm::vec3 dir = pos - campos;
|
27 |
+
dir = dir / glm::length(dir);
|
28 |
+
|
29 |
+
glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs;
|
30 |
+
glm::vec3 result = SH_C0 * sh[0];
|
31 |
+
|
32 |
+
if (deg > 0)
|
33 |
+
{
|
34 |
+
float x = dir.x;
|
35 |
+
float y = dir.y;
|
36 |
+
float z = dir.z;
|
37 |
+
result = result - SH_C1 * y * sh[1] + SH_C1 * z * sh[2] - SH_C1 * x * sh[3];
|
38 |
+
|
39 |
+
if (deg > 1)
|
40 |
+
{
|
41 |
+
float xx = x * x, yy = y * y, zz = z * z;
|
42 |
+
float xy = x * y, yz = y * z, xz = x * z;
|
43 |
+
result = result +
|
44 |
+
SH_C2[0] * xy * sh[4] +
|
45 |
+
SH_C2[1] * yz * sh[5] +
|
46 |
+
SH_C2[2] * (2.0f * zz - xx - yy) * sh[6] +
|
47 |
+
SH_C2[3] * xz * sh[7] +
|
48 |
+
SH_C2[4] * (xx - yy) * sh[8];
|
49 |
+
|
50 |
+
if (deg > 2)
|
51 |
+
{
|
52 |
+
result = result +
|
53 |
+
SH_C3[0] * y * (3.0f * xx - yy) * sh[9] +
|
54 |
+
SH_C3[1] * xy * z * sh[10] +
|
55 |
+
SH_C3[2] * y * (4.0f * zz - xx - yy) * sh[11] +
|
56 |
+
SH_C3[3] * z * (2.0f * zz - 3.0f * xx - 3.0f * yy) * sh[12] +
|
57 |
+
SH_C3[4] * x * (4.0f * zz - xx - yy) * sh[13] +
|
58 |
+
SH_C3[5] * z * (xx - yy) * sh[14] +
|
59 |
+
SH_C3[6] * x * (xx - 3.0f * yy) * sh[15];
|
60 |
+
}
|
61 |
+
}
|
62 |
+
}
|
63 |
+
result += 0.5f;
|
64 |
+
|
65 |
+
// RGB colors are clamped to positive values. If values are
|
66 |
+
// clamped, we need to keep track of this for the backward pass.
|
67 |
+
clamped[3 * idx + 0] = (result.x < 0);
|
68 |
+
clamped[3 * idx + 1] = (result.y < 0);
|
69 |
+
clamped[3 * idx + 2] = (result.z < 0);
|
70 |
+
return glm::max(result, 0.0f);
|
71 |
+
}
|
72 |
+
|
73 |
+
// Forward version of 2D covariance matrix computation
|
74 |
+
__device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y, float tan_fovx, float tan_fovy, const float* cov3D, const float* viewmatrix)
|
75 |
+
{
|
76 |
+
// The following models the steps outlined by equations 29
|
77 |
+
// and 31 in "EWA Splatting" (Zwicker et al., 2002).
|
78 |
+
// Additionally considers aspect / scaling of viewport.
|
79 |
+
// Transposes used to account for row-/column-major conventions.
|
80 |
+
float3 t = transformPoint4x3(mean, viewmatrix);
|
81 |
+
|
82 |
+
const float limx = 1.3f * tan_fovx;
|
83 |
+
const float limy = 1.3f * tan_fovy;
|
84 |
+
const float txtz = t.x / t.z;
|
85 |
+
const float tytz = t.y / t.z;
|
86 |
+
t.x = min(limx, max(-limx, txtz)) * t.z;
|
87 |
+
t.y = min(limy, max(-limy, tytz)) * t.z;
|
88 |
+
|
89 |
+
glm::mat3 J = glm::mat3(
|
90 |
+
focal_x / t.z, 0.0f, -(focal_x * t.x) / (t.z * t.z),
|
91 |
+
0.0f, focal_y / t.z, -(focal_y * t.y) / (t.z * t.z),
|
92 |
+
0, 0, 0);
|
93 |
+
|
94 |
+
glm::mat3 W = glm::mat3(
|
95 |
+
viewmatrix[0], viewmatrix[4], viewmatrix[8],
|
96 |
+
viewmatrix[1], viewmatrix[5], viewmatrix[9],
|
97 |
+
viewmatrix[2], viewmatrix[6], viewmatrix[10]);
|
98 |
+
|
99 |
+
glm::mat3 T = W * J;
|
100 |
+
|
101 |
+
glm::mat3 Vrk = glm::mat3(
|
102 |
+
cov3D[0], cov3D[1], cov3D[2],
|
103 |
+
cov3D[1], cov3D[3], cov3D[4],
|
104 |
+
cov3D[2], cov3D[4], cov3D[5]);
|
105 |
+
|
106 |
+
glm::mat3 cov = glm::transpose(T) * glm::transpose(Vrk) * T;
|
107 |
+
|
108 |
+
// Apply low-pass filter: every Gaussian should be at least
|
109 |
+
// one pixel wide/high. Discard 3rd row and column.
|
110 |
+
cov[0][0] += 0.3f;
|
111 |
+
cov[1][1] += 0.3f;
|
112 |
+
return { float(cov[0][0]), float(cov[0][1]), float(cov[1][1]) };
|
113 |
+
}
|
114 |
+
|
115 |
+
// Forward method for converting scale and rotation properties of each
|
116 |
+
// Gaussian to a 3D covariance matrix in world space. Also takes care
|
117 |
+
// of quaternion normalization.
|
118 |
+
__device__ void computeCov3D(const glm::vec3 scale, float mod, const glm::vec4 rot, float* cov3D)
|
119 |
+
{
|
120 |
+
// Create scaling matrix
|
121 |
+
glm::mat3 S = glm::mat3(1.0f);
|
122 |
+
S[0][0] = mod * scale.x;
|
123 |
+
S[1][1] = mod * scale.y;
|
124 |
+
S[2][2] = mod * scale.z;
|
125 |
+
|
126 |
+
// Normalize quaternion to get valid rotation
|
127 |
+
glm::vec4 q = rot;// / glm::length(rot);
|
128 |
+
float r = q.x;
|
129 |
+
float x = q.y;
|
130 |
+
float y = q.z;
|
131 |
+
float z = q.w;
|
132 |
+
|
133 |
+
// Compute rotation matrix from quaternion
|
134 |
+
glm::mat3 R = glm::mat3(
|
135 |
+
1.f - 2.f * (y * y + z * z), 2.f * (x * y - r * z), 2.f * (x * z + r * y),
|
136 |
+
2.f * (x * y + r * z), 1.f - 2.f * (x * x + z * z), 2.f * (y * z - r * x),
|
137 |
+
2.f * (x * z - r * y), 2.f * (y * z + r * x), 1.f - 2.f * (x * x + y * y)
|
138 |
+
);
|
139 |
+
|
140 |
+
glm::mat3 M = S * R;
|
141 |
+
|
142 |
+
// Compute 3D world covariance matrix Sigma
|
143 |
+
glm::mat3 Sigma = glm::transpose(M) * M;
|
144 |
+
|
145 |
+
// Covariance is symmetric, only store upper right
|
146 |
+
cov3D[0] = Sigma[0][0];
|
147 |
+
cov3D[1] = Sigma[0][1];
|
148 |
+
cov3D[2] = Sigma[0][2];
|
149 |
+
cov3D[3] = Sigma[1][1];
|
150 |
+
cov3D[4] = Sigma[1][2];
|
151 |
+
cov3D[5] = Sigma[2][2];
|
152 |
+
}
|
153 |
+
|
154 |
+
// Perform initial steps for each Gaussian prior to rasterization.
|
155 |
+
template<int C>
|
156 |
+
__global__ void preprocessCUDA(int P, int D, int M,
|
157 |
+
const float* orig_points,
|
158 |
+
const glm::vec3* scales,
|
159 |
+
const float scale_modifier,
|
160 |
+
const glm::vec4* rotations,
|
161 |
+
const float* opacities,
|
162 |
+
const float* shs,
|
163 |
+
bool* clamped,
|
164 |
+
const float* cov3D_precomp,
|
165 |
+
const float* colors_precomp,
|
166 |
+
const float* viewmatrix,
|
167 |
+
const float* projmatrix,
|
168 |
+
const glm::vec3* cam_pos,
|
169 |
+
const int W, int H,
|
170 |
+
const float tan_fovx, float tan_fovy,
|
171 |
+
const float focal_x, float focal_y,
|
172 |
+
int* radii,
|
173 |
+
float2* points_xy_image,
|
174 |
+
float* depths,
|
175 |
+
float* cov3Ds,
|
176 |
+
float* rgb,
|
177 |
+
float4* conic_opacity,
|
178 |
+
const dim3 grid,
|
179 |
+
uint32_t* tiles_touched,
|
180 |
+
bool prefiltered)
|
181 |
+
{
|
182 |
+
auto idx = cg::this_grid().thread_rank();
|
183 |
+
if (idx >= P)
|
184 |
+
return;
|
185 |
+
|
186 |
+
// Initialize radius and touched tiles to 0. If this isn't changed,
|
187 |
+
// this Gaussian will not be processed further.
|
188 |
+
radii[idx] = 0;
|
189 |
+
tiles_touched[idx] = 0;
|
190 |
+
|
191 |
+
// Perform near culling, quit if outside.
|
192 |
+
float3 p_view;
|
193 |
+
if (!in_frustum(idx, orig_points, viewmatrix, projmatrix, prefiltered, p_view))
|
194 |
+
return;
|
195 |
+
|
196 |
+
// Transform point by projecting
|
197 |
+
float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
|
198 |
+
float4 p_hom = transformPoint4x4(p_orig, projmatrix);
|
199 |
+
float p_w = 1.0f / (p_hom.w + 0.0000001f);
|
200 |
+
float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
|
201 |
+
|
202 |
+
// If 3D covariance matrix is precomputed, use it, otherwise compute
|
203 |
+
// from scaling and rotation parameters.
|
204 |
+
const float* cov3D;
|
205 |
+
if (cov3D_precomp != nullptr)
|
206 |
+
{
|
207 |
+
cov3D = cov3D_precomp + idx * 6;
|
208 |
+
}
|
209 |
+
else
|
210 |
+
{
|
211 |
+
computeCov3D(scales[idx], scale_modifier, rotations[idx], cov3Ds + idx * 6);
|
212 |
+
cov3D = cov3Ds + idx * 6;
|
213 |
+
}
|
214 |
+
|
215 |
+
// Compute 2D screen-space covariance matrix
|
216 |
+
float3 cov = computeCov2D(p_orig, focal_x, focal_y, tan_fovx, tan_fovy, cov3D, viewmatrix);
|
217 |
+
|
218 |
+
// Invert covariance (EWA algorithm)
|
219 |
+
float det = (cov.x * cov.z - cov.y * cov.y);
|
220 |
+
if (det == 0.0f)
|
221 |
+
return;
|
222 |
+
float det_inv = 1.f / det;
|
223 |
+
float3 conic = { cov.z * det_inv, -cov.y * det_inv, cov.x * det_inv };
|
224 |
+
|
225 |
+
// Compute extent in screen space (by finding eigenvalues of
|
226 |
+
// 2D covariance matrix). Use extent to compute a bounding rectangle
|
227 |
+
// of screen-space tiles that this Gaussian overlaps with. Quit if
|
228 |
+
// rectangle covers 0 tiles.
|
229 |
+
float mid = 0.5f * (cov.x + cov.z);
|
230 |
+
float lambda1 = mid + sqrt(max(0.1f, mid * mid - det));
|
231 |
+
float lambda2 = mid - sqrt(max(0.1f, mid * mid - det));
|
232 |
+
float my_radius = ceil(3.f * sqrt(max(lambda1, lambda2)));
|
233 |
+
float2 point_image = { ndc2Pix(p_proj.x, W), ndc2Pix(p_proj.y, H) };
|
234 |
+
uint2 rect_min, rect_max;
|
235 |
+
getRect(point_image, my_radius, rect_min, rect_max, grid);
|
236 |
+
if ((rect_max.x - rect_min.x) * (rect_max.y - rect_min.y) == 0)
|
237 |
+
return;
|
238 |
+
|
239 |
+
// If colors have been precomputed, use them, otherwise convert
|
240 |
+
// spherical harmonics coefficients to RGB color.
|
241 |
+
if (colors_precomp == nullptr)
|
242 |
+
{
|
243 |
+
glm::vec3 result = computeColorFromSH(idx, D, M, (glm::vec3*)orig_points, *cam_pos, shs, clamped);
|
244 |
+
rgb[idx * C + 0] = result.x;
|
245 |
+
rgb[idx * C + 1] = result.y;
|
246 |
+
rgb[idx * C + 2] = result.z;
|
247 |
+
}
|
248 |
+
|
249 |
+
// Store some useful helper data for the next steps.
|
250 |
+
depths[idx] = p_view.z;
|
251 |
+
radii[idx] = my_radius;
|
252 |
+
points_xy_image[idx] = point_image;
|
253 |
+
// Inverse 2D covariance and opacity neatly pack into one float4
|
254 |
+
conic_opacity[idx] = { conic.x, conic.y, conic.z, opacities[idx] };
|
255 |
+
tiles_touched[idx] = (rect_max.y - rect_min.y) * (rect_max.x - rect_min.x);
|
256 |
+
}
|
257 |
+
|
258 |
+
// Main rasterization method. Collaboratively works on one tile per
|
259 |
+
// block, each thread treats one pixel. Alternates between fetching
|
260 |
+
// and rasterizing data.
|
261 |
+
template <uint32_t CHANNELS>
|
262 |
+
__global__ void __launch_bounds__(BLOCK_X * BLOCK_Y)
|
263 |
+
renderCUDA(
|
264 |
+
const uint2* __restrict__ ranges,
|
265 |
+
const uint32_t* __restrict__ point_list,
|
266 |
+
int W, int H,
|
267 |
+
const float2* __restrict__ points_xy_image,
|
268 |
+
const float* __restrict__ features,
|
269 |
+
const float* __restrict__ depths,
|
270 |
+
const float4* __restrict__ conic_opacity,
|
271 |
+
float* __restrict__ out_alpha,
|
272 |
+
uint32_t* __restrict__ n_contrib,
|
273 |
+
const float* __restrict__ bg_color,
|
274 |
+
float* __restrict__ out_color,
|
275 |
+
float* __restrict__ out_depth)
|
276 |
+
{
|
277 |
+
// Identify current tile and associated min/max pixel range.
|
278 |
+
auto block = cg::this_thread_block();
|
279 |
+
uint32_t horizontal_blocks = (W + BLOCK_X - 1) / BLOCK_X;
|
280 |
+
uint2 pix_min = { block.group_index().x * BLOCK_X, block.group_index().y * BLOCK_Y };
|
281 |
+
uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) };
|
282 |
+
uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y };
|
283 |
+
uint32_t pix_id = W * pix.y + pix.x;
|
284 |
+
float2 pixf = { (float)pix.x, (float)pix.y };
|
285 |
+
|
286 |
+
// Check if this thread is associated with a valid pixel or outside.
|
287 |
+
bool inside = pix.x < W&& pix.y < H;
|
288 |
+
// Done threads can help with fetching, but don't rasterize
|
289 |
+
bool done = !inside;
|
290 |
+
|
291 |
+
// Load start/end range of IDs to process in bit sorted list.
|
292 |
+
uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
|
293 |
+
const int rounds = ((range.y - range.x + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
294 |
+
int toDo = range.y - range.x;
|
295 |
+
|
296 |
+
// Allocate storage for batches of collectively fetched data.
|
297 |
+
__shared__ int collected_id[BLOCK_SIZE];
|
298 |
+
__shared__ float2 collected_xy[BLOCK_SIZE];
|
299 |
+
__shared__ float4 collected_conic_opacity[BLOCK_SIZE];
|
300 |
+
|
301 |
+
// Initialize helper variables
|
302 |
+
float T = 1.0f;
|
303 |
+
uint32_t contributor = 0;
|
304 |
+
uint32_t last_contributor = 0;
|
305 |
+
float C[CHANNELS] = { 0 };
|
306 |
+
float weight = 0;
|
307 |
+
float D = 0;
|
308 |
+
|
309 |
+
// Iterate over batches until all done or range is complete
|
310 |
+
for (int i = 0; i < rounds; i++, toDo -= BLOCK_SIZE)
|
311 |
+
{
|
312 |
+
// End if entire block votes that it is done rasterizing
|
313 |
+
int num_done = __syncthreads_count(done);
|
314 |
+
if (num_done == BLOCK_SIZE)
|
315 |
+
break;
|
316 |
+
|
317 |
+
// Collectively fetch per-Gaussian data from global to shared
|
318 |
+
int progress = i * BLOCK_SIZE + block.thread_rank();
|
319 |
+
if (range.x + progress < range.y)
|
320 |
+
{
|
321 |
+
int coll_id = point_list[range.x + progress];
|
322 |
+
collected_id[block.thread_rank()] = coll_id;
|
323 |
+
collected_xy[block.thread_rank()] = points_xy_image[coll_id];
|
324 |
+
collected_conic_opacity[block.thread_rank()] = conic_opacity[coll_id];
|
325 |
+
}
|
326 |
+
block.sync();
|
327 |
+
|
328 |
+
// Iterate over current batch
|
329 |
+
for (int j = 0; !done && j < min(BLOCK_SIZE, toDo); j++)
|
330 |
+
{
|
331 |
+
// Keep track of current position in range
|
332 |
+
contributor++;
|
333 |
+
|
334 |
+
// Resample using conic matrix (cf. "Surface
|
335 |
+
// Splatting" by Zwicker et al., 2001)
|
336 |
+
float2 xy = collected_xy[j];
|
337 |
+
float2 d = { xy.x - pixf.x, xy.y - pixf.y };
|
338 |
+
float4 con_o = collected_conic_opacity[j];
|
339 |
+
float power = -0.5f * (con_o.x * d.x * d.x + con_o.z * d.y * d.y) - con_o.y * d.x * d.y;
|
340 |
+
if (power > 0.0f)
|
341 |
+
continue;
|
342 |
+
|
343 |
+
// Eq. (2) from 3D Gaussian splatting paper.
|
344 |
+
// Obtain alpha by multiplying with Gaussian opacity
|
345 |
+
// and its exponential falloff from mean.
|
346 |
+
// Avoid numerical instabilities (see paper appendix).
|
347 |
+
float alpha = min(0.99f, con_o.w * exp(power));
|
348 |
+
if (alpha < 1.0f / 255.0f)
|
349 |
+
continue;
|
350 |
+
float test_T = T * (1 - alpha);
|
351 |
+
if (test_T < 0.0001f)
|
352 |
+
{
|
353 |
+
done = true;
|
354 |
+
continue;
|
355 |
+
}
|
356 |
+
|
357 |
+
// Eq. (3) from 3D Gaussian splatting paper.
|
358 |
+
for (int ch = 0; ch < CHANNELS; ch++)
|
359 |
+
C[ch] += features[collected_id[j] * CHANNELS + ch] * alpha * T;
|
360 |
+
weight += alpha * T;
|
361 |
+
D += depths[collected_id[j]] * alpha * T;
|
362 |
+
|
363 |
+
T = test_T;
|
364 |
+
|
365 |
+
// Keep track of last range entry to update this
|
366 |
+
// pixel.
|
367 |
+
last_contributor = contributor;
|
368 |
+
}
|
369 |
+
}
|
370 |
+
|
371 |
+
// All threads that treat valid pixel write out their final
|
372 |
+
// rendering data to the frame and auxiliary buffers.
|
373 |
+
if (inside)
|
374 |
+
{
|
375 |
+
n_contrib[pix_id] = last_contributor;
|
376 |
+
for (int ch = 0; ch < CHANNELS; ch++)
|
377 |
+
out_color[ch * H * W + pix_id] = C[ch] + T * bg_color[ch];
|
378 |
+
out_alpha[pix_id] = weight; //1 - T;
|
379 |
+
out_depth[pix_id] = D;
|
380 |
+
}
|
381 |
+
}
|
382 |
+
|
383 |
+
void FORWARD::render(
|
384 |
+
const dim3 grid, dim3 block,
|
385 |
+
const uint2* ranges,
|
386 |
+
const uint32_t* point_list,
|
387 |
+
int W, int H,
|
388 |
+
const float2* means2D,
|
389 |
+
const float* colors,
|
390 |
+
const float* depths,
|
391 |
+
const float4* conic_opacity,
|
392 |
+
float* out_alpha,
|
393 |
+
uint32_t* n_contrib,
|
394 |
+
const float* bg_color,
|
395 |
+
float* out_color,
|
396 |
+
float* out_depth)
|
397 |
+
{
|
398 |
+
renderCUDA<NUM_CHANNELS> << <grid, block >> > (
|
399 |
+
ranges,
|
400 |
+
point_list,
|
401 |
+
W, H,
|
402 |
+
means2D,
|
403 |
+
colors,
|
404 |
+
depths,
|
405 |
+
conic_opacity,
|
406 |
+
out_alpha,
|
407 |
+
n_contrib,
|
408 |
+
bg_color,
|
409 |
+
out_color,
|
410 |
+
out_depth);
|
411 |
+
}
|
412 |
+
|
413 |
+
void FORWARD::preprocess(int P, int D, int M,
|
414 |
+
const float* means3D,
|
415 |
+
const glm::vec3* scales,
|
416 |
+
const float scale_modifier,
|
417 |
+
const glm::vec4* rotations,
|
418 |
+
const float* opacities,
|
419 |
+
const float* shs,
|
420 |
+
bool* clamped,
|
421 |
+
const float* cov3D_precomp,
|
422 |
+
const float* colors_precomp,
|
423 |
+
const float* viewmatrix,
|
424 |
+
const float* projmatrix,
|
425 |
+
const glm::vec3* cam_pos,
|
426 |
+
const int W, int H,
|
427 |
+
const float focal_x, float focal_y,
|
428 |
+
const float tan_fovx, float tan_fovy,
|
429 |
+
int* radii,
|
430 |
+
float2* means2D,
|
431 |
+
float* depths,
|
432 |
+
float* cov3Ds,
|
433 |
+
float* rgb,
|
434 |
+
float4* conic_opacity,
|
435 |
+
const dim3 grid,
|
436 |
+
uint32_t* tiles_touched,
|
437 |
+
bool prefiltered)
|
438 |
+
{
|
439 |
+
preprocessCUDA<NUM_CHANNELS> << <(P + 255) / 256, 256 >> > (
|
440 |
+
P, D, M,
|
441 |
+
means3D,
|
442 |
+
scales,
|
443 |
+
scale_modifier,
|
444 |
+
rotations,
|
445 |
+
opacities,
|
446 |
+
shs,
|
447 |
+
clamped,
|
448 |
+
cov3D_precomp,
|
449 |
+
colors_precomp,
|
450 |
+
viewmatrix,
|
451 |
+
projmatrix,
|
452 |
+
cam_pos,
|
453 |
+
W, H,
|
454 |
+
tan_fovx, tan_fovy,
|
455 |
+
focal_x, focal_y,
|
456 |
+
radii,
|
457 |
+
means2D,
|
458 |
+
depths,
|
459 |
+
cov3Ds,
|
460 |
+
rgb,
|
461 |
+
conic_opacity,
|
462 |
+
grid,
|
463 |
+
tiles_touched,
|
464 |
+
prefiltered
|
465 |
+
);
|
466 |
+
}
|
diff-gaussian-rasterization/cuda_rasterizer/forward.h
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED
|
13 |
+
#define CUDA_RASTERIZER_FORWARD_H_INCLUDED
|
14 |
+
|
15 |
+
#include <cuda.h>
|
16 |
+
#include "cuda_runtime.h"
|
17 |
+
#include "device_launch_parameters.h"
|
18 |
+
#define GLM_FORCE_CUDA
|
19 |
+
#include <glm/glm.hpp>
|
20 |
+
|
21 |
+
namespace FORWARD
|
22 |
+
{
|
23 |
+
// Perform initial steps for each Gaussian prior to rasterization.
|
24 |
+
void preprocess(int P, int D, int M,
|
25 |
+
const float* orig_points,
|
26 |
+
const glm::vec3* scales,
|
27 |
+
const float scale_modifier,
|
28 |
+
const glm::vec4* rotations,
|
29 |
+
const float* opacities,
|
30 |
+
const float* shs,
|
31 |
+
bool* clamped,
|
32 |
+
const float* cov3D_precomp,
|
33 |
+
const float* colors_precomp,
|
34 |
+
const float* viewmatrix,
|
35 |
+
const float* projmatrix,
|
36 |
+
const glm::vec3* cam_pos,
|
37 |
+
const int W, int H,
|
38 |
+
const float focal_x, float focal_y,
|
39 |
+
const float tan_fovx, float tan_fovy,
|
40 |
+
int* radii,
|
41 |
+
float2* points_xy_image,
|
42 |
+
float* depths,
|
43 |
+
float* cov3Ds,
|
44 |
+
float* colors,
|
45 |
+
float4* conic_opacity,
|
46 |
+
const dim3 grid,
|
47 |
+
uint32_t* tiles_touched,
|
48 |
+
bool prefiltered);
|
49 |
+
|
50 |
+
// Main rasterization method.
|
51 |
+
void render(
|
52 |
+
const dim3 grid, dim3 block,
|
53 |
+
const uint2* ranges,
|
54 |
+
const uint32_t* point_list,
|
55 |
+
int W, int H,
|
56 |
+
const float2* points_xy_image,
|
57 |
+
const float* features,
|
58 |
+
const float* depths,
|
59 |
+
const float4* conic_opacity,
|
60 |
+
float* out_alpha,
|
61 |
+
uint32_t* n_contrib,
|
62 |
+
const float* bg_color,
|
63 |
+
float* out_color,
|
64 |
+
float* out_depth);
|
65 |
+
}
|
66 |
+
|
67 |
+
|
68 |
+
#endif
|
diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#ifndef CUDA_RASTERIZER_H_INCLUDED
|
13 |
+
#define CUDA_RASTERIZER_H_INCLUDED
|
14 |
+
|
15 |
+
#include <vector>
|
16 |
+
#include <functional>
|
17 |
+
|
18 |
+
namespace CudaRasterizer
|
19 |
+
{
|
20 |
+
class Rasterizer
|
21 |
+
{
|
22 |
+
public:
|
23 |
+
|
24 |
+
static void markVisible(
|
25 |
+
int P,
|
26 |
+
float* means3D,
|
27 |
+
float* viewmatrix,
|
28 |
+
float* projmatrix,
|
29 |
+
bool* present);
|
30 |
+
|
31 |
+
static int forward(
|
32 |
+
std::function<char* (size_t)> geometryBuffer,
|
33 |
+
std::function<char* (size_t)> binningBuffer,
|
34 |
+
std::function<char* (size_t)> imageBuffer,
|
35 |
+
const int P, int D, int M,
|
36 |
+
const float* background,
|
37 |
+
const int width, int height,
|
38 |
+
const float* means3D,
|
39 |
+
const float* shs,
|
40 |
+
const float* colors_precomp,
|
41 |
+
const float* opacities,
|
42 |
+
const float* scales,
|
43 |
+
const float scale_modifier,
|
44 |
+
const float* rotations,
|
45 |
+
const float* cov3D_precomp,
|
46 |
+
const float* viewmatrix,
|
47 |
+
const float* projmatrix,
|
48 |
+
const float* cam_pos,
|
49 |
+
const float tan_fovx, float tan_fovy,
|
50 |
+
const bool prefiltered,
|
51 |
+
float* out_color,
|
52 |
+
float* out_depth,
|
53 |
+
float* out_alpha,
|
54 |
+
int* radii = nullptr,
|
55 |
+
bool debug = false);
|
56 |
+
|
57 |
+
static void backward(
|
58 |
+
const int P, int D, int M, int R,
|
59 |
+
const float* background,
|
60 |
+
const int width, int height,
|
61 |
+
const float* means3D,
|
62 |
+
const float* shs,
|
63 |
+
const float* colors_precomp,
|
64 |
+
const float* alphas,
|
65 |
+
const float* scales,
|
66 |
+
const float scale_modifier,
|
67 |
+
const float* rotations,
|
68 |
+
const float* cov3D_precomp,
|
69 |
+
const float* viewmatrix,
|
70 |
+
const float* projmatrix,
|
71 |
+
const float* campos,
|
72 |
+
const float tan_fovx, float tan_fovy,
|
73 |
+
const int* radii,
|
74 |
+
char* geom_buffer,
|
75 |
+
char* binning_buffer,
|
76 |
+
char* image_buffer,
|
77 |
+
const float* dL_dpix,
|
78 |
+
const float* dL_dpix_depth,
|
79 |
+
const float* dL_dalphas,
|
80 |
+
float* dL_dmean2D,
|
81 |
+
float* dL_dconic,
|
82 |
+
float* dL_dopacity,
|
83 |
+
float* dL_dcolor,
|
84 |
+
float* dL_ddepth,
|
85 |
+
float* dL_dmean3D,
|
86 |
+
float* dL_dcov3D,
|
87 |
+
float* dL_dsh,
|
88 |
+
float* dL_dscale,
|
89 |
+
float* dL_drot,
|
90 |
+
bool debug);
|
91 |
+
};
|
92 |
+
};
|
93 |
+
|
94 |
+
#endif
|
diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu
ADDED
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include "rasterizer_impl.h"
|
13 |
+
#include <iostream>
|
14 |
+
#include <fstream>
|
15 |
+
#include <algorithm>
|
16 |
+
#include <numeric>
|
17 |
+
#include <cuda.h>
|
18 |
+
#include "cuda_runtime.h"
|
19 |
+
#include "device_launch_parameters.h"
|
20 |
+
#include <cub/cub.cuh>
|
21 |
+
#include <cub/device/device_radix_sort.cuh>
|
22 |
+
#define GLM_FORCE_CUDA
|
23 |
+
#include <glm/glm.hpp>
|
24 |
+
|
25 |
+
#include <cooperative_groups.h>
|
26 |
+
#include <cooperative_groups/reduce.h>
|
27 |
+
namespace cg = cooperative_groups;
|
28 |
+
|
29 |
+
#include "auxiliary.h"
|
30 |
+
#include "forward.h"
|
31 |
+
#include "backward.h"
|
32 |
+
|
33 |
+
// Helper function to find the next-highest bit of the MSB
|
34 |
+
// on the CPU.
|
35 |
+
uint32_t getHigherMsb(uint32_t n)
|
36 |
+
{
|
37 |
+
uint32_t msb = sizeof(n) * 4;
|
38 |
+
uint32_t step = msb;
|
39 |
+
while (step > 1)
|
40 |
+
{
|
41 |
+
step /= 2;
|
42 |
+
if (n >> msb)
|
43 |
+
msb += step;
|
44 |
+
else
|
45 |
+
msb -= step;
|
46 |
+
}
|
47 |
+
if (n >> msb)
|
48 |
+
msb++;
|
49 |
+
return msb;
|
50 |
+
}
|
51 |
+
|
52 |
+
// Wrapper method to call auxiliary coarse frustum containment test.
|
53 |
+
// Mark all Gaussians that pass it.
|
54 |
+
__global__ void checkFrustum(int P,
|
55 |
+
const float* orig_points,
|
56 |
+
const float* viewmatrix,
|
57 |
+
const float* projmatrix,
|
58 |
+
bool* present)
|
59 |
+
{
|
60 |
+
auto idx = cg::this_grid().thread_rank();
|
61 |
+
if (idx >= P)
|
62 |
+
return;
|
63 |
+
|
64 |
+
float3 p_view;
|
65 |
+
present[idx] = in_frustum(idx, orig_points, viewmatrix, projmatrix, false, p_view);
|
66 |
+
}
|
67 |
+
|
68 |
+
// Generates one key/value pair for all Gaussian / tile overlaps.
|
69 |
+
// Run once per Gaussian (1:N mapping).
|
70 |
+
__global__ void duplicateWithKeys(
|
71 |
+
int P,
|
72 |
+
const float2* points_xy,
|
73 |
+
const float* depths,
|
74 |
+
const uint32_t* offsets,
|
75 |
+
uint64_t* gaussian_keys_unsorted,
|
76 |
+
uint32_t* gaussian_values_unsorted,
|
77 |
+
int* radii,
|
78 |
+
dim3 grid)
|
79 |
+
{
|
80 |
+
auto idx = cg::this_grid().thread_rank();
|
81 |
+
if (idx >= P)
|
82 |
+
return;
|
83 |
+
|
84 |
+
// Generate no key/value pair for invisible Gaussians
|
85 |
+
if (radii[idx] > 0)
|
86 |
+
{
|
87 |
+
// Find this Gaussian's offset in buffer for writing keys/values.
|
88 |
+
uint32_t off = (idx == 0) ? 0 : offsets[idx - 1];
|
89 |
+
uint2 rect_min, rect_max;
|
90 |
+
|
91 |
+
getRect(points_xy[idx], radii[idx], rect_min, rect_max, grid);
|
92 |
+
|
93 |
+
// For each tile that the bounding rect overlaps, emit a
|
94 |
+
// key/value pair. The key is | tile ID | depth |,
|
95 |
+
// and the value is the ID of the Gaussian. Sorting the values
|
96 |
+
// with this key yields Gaussian IDs in a list, such that they
|
97 |
+
// are first sorted by tile and then by depth.
|
98 |
+
for (int y = rect_min.y; y < rect_max.y; y++)
|
99 |
+
{
|
100 |
+
for (int x = rect_min.x; x < rect_max.x; x++)
|
101 |
+
{
|
102 |
+
uint64_t key = y * grid.x + x;
|
103 |
+
key <<= 32;
|
104 |
+
key |= *((uint32_t*)&depths[idx]);
|
105 |
+
gaussian_keys_unsorted[off] = key;
|
106 |
+
gaussian_values_unsorted[off] = idx;
|
107 |
+
off++;
|
108 |
+
}
|
109 |
+
}
|
110 |
+
}
|
111 |
+
}
|
112 |
+
|
113 |
+
// Check keys to see if it is at the start/end of one tile's range in
|
114 |
+
// the full sorted list. If yes, write start/end of this tile.
|
115 |
+
// Run once per instanced (duplicated) Gaussian ID.
|
116 |
+
__global__ void identifyTileRanges(int L, uint64_t* point_list_keys, uint2* ranges)
|
117 |
+
{
|
118 |
+
auto idx = cg::this_grid().thread_rank();
|
119 |
+
if (idx >= L)
|
120 |
+
return;
|
121 |
+
|
122 |
+
// Read tile ID from key. Update start/end of tile range if at limit.
|
123 |
+
uint64_t key = point_list_keys[idx];
|
124 |
+
uint32_t currtile = key >> 32;
|
125 |
+
if (idx == 0)
|
126 |
+
ranges[currtile].x = 0;
|
127 |
+
else
|
128 |
+
{
|
129 |
+
uint32_t prevtile = point_list_keys[idx - 1] >> 32;
|
130 |
+
if (currtile != prevtile)
|
131 |
+
{
|
132 |
+
ranges[prevtile].y = idx;
|
133 |
+
ranges[currtile].x = idx;
|
134 |
+
}
|
135 |
+
}
|
136 |
+
if (idx == L - 1)
|
137 |
+
ranges[currtile].y = L;
|
138 |
+
}
|
139 |
+
|
140 |
+
// Mark Gaussians as visible/invisible, based on view frustum testing
|
141 |
+
void CudaRasterizer::Rasterizer::markVisible(
|
142 |
+
int P,
|
143 |
+
float* means3D,
|
144 |
+
float* viewmatrix,
|
145 |
+
float* projmatrix,
|
146 |
+
bool* present)
|
147 |
+
{
|
148 |
+
checkFrustum << <(P + 255) / 256, 256 >> > (
|
149 |
+
P,
|
150 |
+
means3D,
|
151 |
+
viewmatrix, projmatrix,
|
152 |
+
present);
|
153 |
+
}
|
154 |
+
|
155 |
+
CudaRasterizer::GeometryState CudaRasterizer::GeometryState::fromChunk(char*& chunk, size_t P)
|
156 |
+
{
|
157 |
+
GeometryState geom;
|
158 |
+
obtain(chunk, geom.depths, P, 128);
|
159 |
+
obtain(chunk, geom.clamped, P * 3, 128);
|
160 |
+
obtain(chunk, geom.internal_radii, P, 128);
|
161 |
+
obtain(chunk, geom.means2D, P, 128);
|
162 |
+
obtain(chunk, geom.cov3D, P * 6, 128);
|
163 |
+
obtain(chunk, geom.conic_opacity, P, 128);
|
164 |
+
obtain(chunk, geom.rgb, P * 3, 128);
|
165 |
+
obtain(chunk, geom.tiles_touched, P, 128);
|
166 |
+
cub::DeviceScan::InclusiveSum(nullptr, geom.scan_size, geom.tiles_touched, geom.tiles_touched, P);
|
167 |
+
obtain(chunk, geom.scanning_space, geom.scan_size, 128);
|
168 |
+
obtain(chunk, geom.point_offsets, P, 128);
|
169 |
+
return geom;
|
170 |
+
}
|
171 |
+
|
172 |
+
CudaRasterizer::ImageState CudaRasterizer::ImageState::fromChunk(char*& chunk, size_t N)
|
173 |
+
{
|
174 |
+
ImageState img;
|
175 |
+
obtain(chunk, img.n_contrib, N, 128);
|
176 |
+
obtain(chunk, img.ranges, N, 128);
|
177 |
+
return img;
|
178 |
+
}
|
179 |
+
|
180 |
+
CudaRasterizer::BinningState CudaRasterizer::BinningState::fromChunk(char*& chunk, size_t P)
|
181 |
+
{
|
182 |
+
BinningState binning;
|
183 |
+
obtain(chunk, binning.point_list, P, 128);
|
184 |
+
obtain(chunk, binning.point_list_unsorted, P, 128);
|
185 |
+
obtain(chunk, binning.point_list_keys, P, 128);
|
186 |
+
obtain(chunk, binning.point_list_keys_unsorted, P, 128);
|
187 |
+
cub::DeviceRadixSort::SortPairs(
|
188 |
+
nullptr, binning.sorting_size,
|
189 |
+
binning.point_list_keys_unsorted, binning.point_list_keys,
|
190 |
+
binning.point_list_unsorted, binning.point_list, P);
|
191 |
+
obtain(chunk, binning.list_sorting_space, binning.sorting_size, 128);
|
192 |
+
return binning;
|
193 |
+
}
|
194 |
+
|
195 |
+
// Forward rendering procedure for differentiable rasterization
|
196 |
+
// of Gaussians.
|
197 |
+
int CudaRasterizer::Rasterizer::forward(
|
198 |
+
std::function<char* (size_t)> geometryBuffer,
|
199 |
+
std::function<char* (size_t)> binningBuffer,
|
200 |
+
std::function<char* (size_t)> imageBuffer,
|
201 |
+
const int P, int D, int M,
|
202 |
+
const float* background,
|
203 |
+
const int width, int height,
|
204 |
+
const float* means3D,
|
205 |
+
const float* shs,
|
206 |
+
const float* colors_precomp,
|
207 |
+
const float* opacities,
|
208 |
+
const float* scales,
|
209 |
+
const float scale_modifier,
|
210 |
+
const float* rotations,
|
211 |
+
const float* cov3D_precomp,
|
212 |
+
const float* viewmatrix,
|
213 |
+
const float* projmatrix,
|
214 |
+
const float* cam_pos,
|
215 |
+
const float tan_fovx, float tan_fovy,
|
216 |
+
const bool prefiltered,
|
217 |
+
float* out_color,
|
218 |
+
float* out_depth,
|
219 |
+
float* out_alpha,
|
220 |
+
int* radii,
|
221 |
+
bool debug)
|
222 |
+
{
|
223 |
+
const float focal_y = height / (2.0f * tan_fovy);
|
224 |
+
const float focal_x = width / (2.0f * tan_fovx);
|
225 |
+
|
226 |
+
size_t chunk_size = required<GeometryState>(P);
|
227 |
+
char* chunkptr = geometryBuffer(chunk_size);
|
228 |
+
GeometryState geomState = GeometryState::fromChunk(chunkptr, P);
|
229 |
+
|
230 |
+
if (radii == nullptr)
|
231 |
+
{
|
232 |
+
radii = geomState.internal_radii;
|
233 |
+
}
|
234 |
+
|
235 |
+
dim3 tile_grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1);
|
236 |
+
dim3 block(BLOCK_X, BLOCK_Y, 1);
|
237 |
+
|
238 |
+
// Dynamically resize image-based auxiliary buffers during training
|
239 |
+
size_t img_chunk_size = required<ImageState>(width * height);
|
240 |
+
char* img_chunkptr = imageBuffer(img_chunk_size);
|
241 |
+
ImageState imgState = ImageState::fromChunk(img_chunkptr, width * height);
|
242 |
+
|
243 |
+
if (NUM_CHANNELS != 3 && colors_precomp == nullptr)
|
244 |
+
{
|
245 |
+
throw std::runtime_error("For non-RGB, provide precomputed Gaussian colors!");
|
246 |
+
}
|
247 |
+
|
248 |
+
// Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB)
|
249 |
+
CHECK_CUDA(FORWARD::preprocess(
|
250 |
+
P, D, M,
|
251 |
+
means3D,
|
252 |
+
(glm::vec3*)scales,
|
253 |
+
scale_modifier,
|
254 |
+
(glm::vec4*)rotations,
|
255 |
+
opacities,
|
256 |
+
shs,
|
257 |
+
geomState.clamped,
|
258 |
+
cov3D_precomp,
|
259 |
+
colors_precomp,
|
260 |
+
viewmatrix, projmatrix,
|
261 |
+
(glm::vec3*)cam_pos,
|
262 |
+
width, height,
|
263 |
+
focal_x, focal_y,
|
264 |
+
tan_fovx, tan_fovy,
|
265 |
+
radii,
|
266 |
+
geomState.means2D,
|
267 |
+
geomState.depths,
|
268 |
+
geomState.cov3D,
|
269 |
+
geomState.rgb,
|
270 |
+
geomState.conic_opacity,
|
271 |
+
tile_grid,
|
272 |
+
geomState.tiles_touched,
|
273 |
+
prefiltered
|
274 |
+
), debug)
|
275 |
+
|
276 |
+
// Compute prefix sum over full list of touched tile counts by Gaussians
|
277 |
+
// E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8]
|
278 |
+
CHECK_CUDA(cub::DeviceScan::InclusiveSum(geomState.scanning_space, geomState.scan_size, geomState.tiles_touched, geomState.point_offsets, P), debug)
|
279 |
+
|
280 |
+
// Retrieve total number of Gaussian instances to launch and resize aux buffers
|
281 |
+
int num_rendered;
|
282 |
+
CHECK_CUDA(cudaMemcpy(&num_rendered, geomState.point_offsets + P - 1, sizeof(int), cudaMemcpyDeviceToHost), debug);
|
283 |
+
|
284 |
+
size_t binning_chunk_size = required<BinningState>(num_rendered);
|
285 |
+
char* binning_chunkptr = binningBuffer(binning_chunk_size);
|
286 |
+
BinningState binningState = BinningState::fromChunk(binning_chunkptr, num_rendered);
|
287 |
+
|
288 |
+
// For each instance to be rendered, produce adequate [ tile | depth ] key
|
289 |
+
// and corresponding dublicated Gaussian indices to be sorted
|
290 |
+
duplicateWithKeys << <(P + 255) / 256, 256 >> > (
|
291 |
+
P,
|
292 |
+
geomState.means2D,
|
293 |
+
geomState.depths,
|
294 |
+
geomState.point_offsets,
|
295 |
+
binningState.point_list_keys_unsorted,
|
296 |
+
binningState.point_list_unsorted,
|
297 |
+
radii,
|
298 |
+
tile_grid)
|
299 |
+
CHECK_CUDA(, debug)
|
300 |
+
|
301 |
+
int bit = getHigherMsb(tile_grid.x * tile_grid.y);
|
302 |
+
|
303 |
+
// Sort complete list of (duplicated) Gaussian indices by keys
|
304 |
+
CHECK_CUDA(cub::DeviceRadixSort::SortPairs(
|
305 |
+
binningState.list_sorting_space,
|
306 |
+
binningState.sorting_size,
|
307 |
+
binningState.point_list_keys_unsorted, binningState.point_list_keys,
|
308 |
+
binningState.point_list_unsorted, binningState.point_list,
|
309 |
+
num_rendered, 0, 32 + bit), debug)
|
310 |
+
|
311 |
+
CHECK_CUDA(cudaMemset(imgState.ranges, 0, tile_grid.x * tile_grid.y * sizeof(uint2)), debug);
|
312 |
+
|
313 |
+
// Identify start and end of per-tile workloads in sorted list
|
314 |
+
if (num_rendered > 0)
|
315 |
+
identifyTileRanges << <(num_rendered + 255) / 256, 256 >> > (
|
316 |
+
num_rendered,
|
317 |
+
binningState.point_list_keys,
|
318 |
+
imgState.ranges);
|
319 |
+
CHECK_CUDA(, debug);
|
320 |
+
|
321 |
+
// Let each tile blend its range of Gaussians independently in parallel
|
322 |
+
const float* feature_ptr = colors_precomp != nullptr ? colors_precomp : geomState.rgb;
|
323 |
+
CHECK_CUDA(FORWARD::render(
|
324 |
+
tile_grid, block,
|
325 |
+
imgState.ranges,
|
326 |
+
binningState.point_list,
|
327 |
+
width, height,
|
328 |
+
geomState.means2D,
|
329 |
+
feature_ptr,
|
330 |
+
geomState.depths,
|
331 |
+
geomState.conic_opacity,
|
332 |
+
out_alpha,
|
333 |
+
imgState.n_contrib,
|
334 |
+
background,
|
335 |
+
out_color,
|
336 |
+
out_depth), debug);
|
337 |
+
|
338 |
+
return num_rendered;
|
339 |
+
}
|
340 |
+
|
341 |
+
// Produce necessary gradients for optimization, corresponding
|
342 |
+
// to forward render pass
|
343 |
+
void CudaRasterizer::Rasterizer::backward(
|
344 |
+
const int P, int D, int M, int R,
|
345 |
+
const float* background,
|
346 |
+
const int width, int height,
|
347 |
+
const float* means3D,
|
348 |
+
const float* shs,
|
349 |
+
const float* colors_precomp,
|
350 |
+
const float* alphas,
|
351 |
+
const float* scales,
|
352 |
+
const float scale_modifier,
|
353 |
+
const float* rotations,
|
354 |
+
const float* cov3D_precomp,
|
355 |
+
const float* viewmatrix,
|
356 |
+
const float* projmatrix,
|
357 |
+
const float* campos,
|
358 |
+
const float tan_fovx, float tan_fovy,
|
359 |
+
const int* radii,
|
360 |
+
char* geom_buffer,
|
361 |
+
char* binning_buffer,
|
362 |
+
char* img_buffer,
|
363 |
+
const float* dL_dpix,
|
364 |
+
const float* dL_dpix_depth,
|
365 |
+
const float* dL_dalphas,
|
366 |
+
float* dL_dmean2D,
|
367 |
+
float* dL_dconic,
|
368 |
+
float* dL_dopacity,
|
369 |
+
float* dL_dcolor,
|
370 |
+
float* dL_ddepth,
|
371 |
+
float* dL_dmean3D,
|
372 |
+
float* dL_dcov3D,
|
373 |
+
float* dL_dsh,
|
374 |
+
float* dL_dscale,
|
375 |
+
float* dL_drot,
|
376 |
+
bool debug)
|
377 |
+
{
|
378 |
+
GeometryState geomState = GeometryState::fromChunk(geom_buffer, P);
|
379 |
+
BinningState binningState = BinningState::fromChunk(binning_buffer, R);
|
380 |
+
ImageState imgState = ImageState::fromChunk(img_buffer, width * height);
|
381 |
+
|
382 |
+
if (radii == nullptr)
|
383 |
+
{
|
384 |
+
radii = geomState.internal_radii;
|
385 |
+
}
|
386 |
+
|
387 |
+
const float focal_y = height / (2.0f * tan_fovy);
|
388 |
+
const float focal_x = width / (2.0f * tan_fovx);
|
389 |
+
|
390 |
+
const dim3 tile_grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1);
|
391 |
+
const dim3 block(BLOCK_X, BLOCK_Y, 1);
|
392 |
+
|
393 |
+
// Compute loss gradients w.r.t. 2D mean position, conic matrix,
|
394 |
+
// opacity and RGB of Gaussians from per-pixel loss gradients.
|
395 |
+
// If we were given precomputed colors and not SHs, use them.
|
396 |
+
const float* color_ptr = (colors_precomp != nullptr) ? colors_precomp : geomState.rgb;
|
397 |
+
const float* depth_ptr = geomState.depths;
|
398 |
+
CHECK_CUDA(BACKWARD::render(
|
399 |
+
tile_grid,
|
400 |
+
block,
|
401 |
+
imgState.ranges,
|
402 |
+
binningState.point_list,
|
403 |
+
width, height,
|
404 |
+
background,
|
405 |
+
geomState.means2D,
|
406 |
+
geomState.conic_opacity,
|
407 |
+
color_ptr,
|
408 |
+
depth_ptr,
|
409 |
+
alphas,
|
410 |
+
imgState.n_contrib,
|
411 |
+
dL_dpix,
|
412 |
+
dL_dpix_depth,
|
413 |
+
dL_dalphas,
|
414 |
+
(float3*)dL_dmean2D,
|
415 |
+
(float4*)dL_dconic,
|
416 |
+
dL_dopacity,
|
417 |
+
dL_dcolor,
|
418 |
+
dL_ddepth), debug)
|
419 |
+
|
420 |
+
// Take care of the rest of preprocessing. Was the precomputed covariance
|
421 |
+
// given to us or a scales/rot pair? If precomputed, pass that. If not,
|
422 |
+
// use the one we computed ourselves.
|
423 |
+
const float* cov3D_ptr = (cov3D_precomp != nullptr) ? cov3D_precomp : geomState.cov3D;
|
424 |
+
CHECK_CUDA(BACKWARD::preprocess(P, D, M,
|
425 |
+
(float3*)means3D,
|
426 |
+
radii,
|
427 |
+
shs,
|
428 |
+
geomState.clamped,
|
429 |
+
(glm::vec3*)scales,
|
430 |
+
(glm::vec4*)rotations,
|
431 |
+
scale_modifier,
|
432 |
+
cov3D_ptr,
|
433 |
+
viewmatrix,
|
434 |
+
projmatrix,
|
435 |
+
focal_x, focal_y,
|
436 |
+
tan_fovx, tan_fovy,
|
437 |
+
(glm::vec3*)campos,
|
438 |
+
(float3*)dL_dmean2D,
|
439 |
+
dL_dconic,
|
440 |
+
(glm::vec3*)dL_dmean3D,
|
441 |
+
dL_dcolor,
|
442 |
+
dL_ddepth,
|
443 |
+
dL_dcov3D,
|
444 |
+
dL_dsh,
|
445 |
+
(glm::vec3*)dL_dscale,
|
446 |
+
(glm::vec4*)dL_drot), debug)
|
447 |
+
}
|
diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.h
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#pragma once
|
13 |
+
|
14 |
+
#include <iostream>
|
15 |
+
#include <vector>
|
16 |
+
#include "rasterizer.h"
|
17 |
+
#include <cuda_runtime_api.h>
|
18 |
+
|
19 |
+
namespace CudaRasterizer
|
20 |
+
{
|
21 |
+
template <typename T>
|
22 |
+
static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
|
23 |
+
{
|
24 |
+
std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
|
25 |
+
ptr = reinterpret_cast<T*>(offset);
|
26 |
+
chunk = reinterpret_cast<char*>(ptr + count);
|
27 |
+
}
|
28 |
+
|
29 |
+
struct GeometryState
|
30 |
+
{
|
31 |
+
size_t scan_size;
|
32 |
+
float* depths;
|
33 |
+
char* scanning_space;
|
34 |
+
bool* clamped;
|
35 |
+
int* internal_radii;
|
36 |
+
float2* means2D;
|
37 |
+
float* cov3D;
|
38 |
+
float4* conic_opacity;
|
39 |
+
float* rgb;
|
40 |
+
uint32_t* point_offsets;
|
41 |
+
uint32_t* tiles_touched;
|
42 |
+
|
43 |
+
static GeometryState fromChunk(char*& chunk, size_t P);
|
44 |
+
};
|
45 |
+
|
46 |
+
struct ImageState
|
47 |
+
{
|
48 |
+
uint2* ranges;
|
49 |
+
uint32_t* n_contrib;
|
50 |
+
|
51 |
+
static ImageState fromChunk(char*& chunk, size_t N);
|
52 |
+
};
|
53 |
+
|
54 |
+
struct BinningState
|
55 |
+
{
|
56 |
+
size_t sorting_size;
|
57 |
+
uint64_t* point_list_keys_unsorted;
|
58 |
+
uint64_t* point_list_keys;
|
59 |
+
uint32_t* point_list_unsorted;
|
60 |
+
uint32_t* point_list;
|
61 |
+
char* list_sorting_space;
|
62 |
+
|
63 |
+
static BinningState fromChunk(char*& chunk, size_t P);
|
64 |
+
};
|
65 |
+
|
66 |
+
template<typename T>
|
67 |
+
size_t required(size_t P)
|
68 |
+
{
|
69 |
+
char* size = nullptr;
|
70 |
+
T::fromChunk(size, P);
|
71 |
+
return ((size_t)size) + 128;
|
72 |
+
}
|
73 |
+
};
|
diff-gaussian-rasterization/diff_gaussian_rasterization/__init__.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (C) 2023, Inria
|
3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# This software is free for non-commercial, research and evaluation use
|
7 |
+
# under the terms of the LICENSE.md file.
|
8 |
+
#
|
9 |
+
# For inquiries contact george.drettakis@inria.fr
|
10 |
+
#
|
11 |
+
|
12 |
+
from typing import NamedTuple
|
13 |
+
import torch.nn as nn
|
14 |
+
import torch
|
15 |
+
from . import _C
|
16 |
+
|
17 |
+
def cpu_deep_copy_tuple(input_tuple):
|
18 |
+
copied_tensors = [item.cpu().clone() if isinstance(item, torch.Tensor) else item for item in input_tuple]
|
19 |
+
return tuple(copied_tensors)
|
20 |
+
|
21 |
+
def rasterize_gaussians(
|
22 |
+
means3D,
|
23 |
+
means2D,
|
24 |
+
sh,
|
25 |
+
colors_precomp,
|
26 |
+
opacities,
|
27 |
+
scales,
|
28 |
+
rotations,
|
29 |
+
cov3Ds_precomp,
|
30 |
+
raster_settings,
|
31 |
+
):
|
32 |
+
return _RasterizeGaussians.apply(
|
33 |
+
means3D,
|
34 |
+
means2D,
|
35 |
+
sh,
|
36 |
+
colors_precomp,
|
37 |
+
opacities,
|
38 |
+
scales,
|
39 |
+
rotations,
|
40 |
+
cov3Ds_precomp,
|
41 |
+
raster_settings,
|
42 |
+
)
|
43 |
+
|
44 |
+
class _RasterizeGaussians(torch.autograd.Function):
|
45 |
+
@staticmethod
|
46 |
+
def forward(
|
47 |
+
ctx,
|
48 |
+
means3D,
|
49 |
+
means2D,
|
50 |
+
sh,
|
51 |
+
colors_precomp,
|
52 |
+
opacities,
|
53 |
+
scales,
|
54 |
+
rotations,
|
55 |
+
cov3Ds_precomp,
|
56 |
+
raster_settings,
|
57 |
+
):
|
58 |
+
|
59 |
+
# Restructure arguments the way that the C++ lib expects them
|
60 |
+
args = (
|
61 |
+
raster_settings.bg,
|
62 |
+
means3D,
|
63 |
+
colors_precomp,
|
64 |
+
opacities,
|
65 |
+
scales,
|
66 |
+
rotations,
|
67 |
+
raster_settings.scale_modifier,
|
68 |
+
cov3Ds_precomp,
|
69 |
+
raster_settings.viewmatrix,
|
70 |
+
raster_settings.projmatrix,
|
71 |
+
raster_settings.tanfovx,
|
72 |
+
raster_settings.tanfovy,
|
73 |
+
raster_settings.image_height,
|
74 |
+
raster_settings.image_width,
|
75 |
+
sh,
|
76 |
+
raster_settings.sh_degree,
|
77 |
+
raster_settings.campos,
|
78 |
+
raster_settings.prefiltered,
|
79 |
+
raster_settings.debug
|
80 |
+
)
|
81 |
+
|
82 |
+
# Invoke C++/CUDA rasterizer
|
83 |
+
if raster_settings.debug:
|
84 |
+
cpu_args = cpu_deep_copy_tuple(args) # Copy them before they can be corrupted
|
85 |
+
try:
|
86 |
+
num_rendered, color, depth, alpha, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
|
87 |
+
except Exception as ex:
|
88 |
+
torch.save(cpu_args, "snapshot_fw.dump")
|
89 |
+
print("\nAn error occured in forward. Please forward snapshot_fw.dump for debugging.")
|
90 |
+
raise ex
|
91 |
+
else:
|
92 |
+
num_rendered, color, depth, alpha, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
|
93 |
+
|
94 |
+
# Keep relevant tensors for backward
|
95 |
+
ctx.raster_settings = raster_settings
|
96 |
+
ctx.num_rendered = num_rendered
|
97 |
+
ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer, alpha)
|
98 |
+
return color, radii, depth, alpha
|
99 |
+
|
100 |
+
@staticmethod
|
101 |
+
def backward(ctx, grad_color, grad_radii, grad_depth, grad_alpha):
|
102 |
+
|
103 |
+
# Restore necessary values from context
|
104 |
+
num_rendered = ctx.num_rendered
|
105 |
+
raster_settings = ctx.raster_settings
|
106 |
+
colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer, alpha = ctx.saved_tensors
|
107 |
+
|
108 |
+
# Restructure args as C++ method expects them
|
109 |
+
args = (raster_settings.bg,
|
110 |
+
means3D,
|
111 |
+
radii,
|
112 |
+
colors_precomp,
|
113 |
+
scales,
|
114 |
+
rotations,
|
115 |
+
raster_settings.scale_modifier,
|
116 |
+
cov3Ds_precomp,
|
117 |
+
raster_settings.viewmatrix,
|
118 |
+
raster_settings.projmatrix,
|
119 |
+
raster_settings.tanfovx,
|
120 |
+
raster_settings.tanfovy,
|
121 |
+
grad_color,
|
122 |
+
grad_depth,
|
123 |
+
grad_alpha,
|
124 |
+
sh,
|
125 |
+
raster_settings.sh_degree,
|
126 |
+
raster_settings.campos,
|
127 |
+
geomBuffer,
|
128 |
+
num_rendered,
|
129 |
+
binningBuffer,
|
130 |
+
imgBuffer,
|
131 |
+
alpha,
|
132 |
+
raster_settings.debug)
|
133 |
+
|
134 |
+
# Compute gradients for relevant tensors by invoking backward method
|
135 |
+
if raster_settings.debug:
|
136 |
+
cpu_args = cpu_deep_copy_tuple(args) # Copy them before they can be corrupted
|
137 |
+
try:
|
138 |
+
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
|
139 |
+
except Exception as ex:
|
140 |
+
torch.save(cpu_args, "snapshot_bw.dump")
|
141 |
+
print("\nAn error occured in backward. Writing snapshot_bw.dump for debugging.\n")
|
142 |
+
raise ex
|
143 |
+
else:
|
144 |
+
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
|
145 |
+
|
146 |
+
grads = (
|
147 |
+
grad_means3D,
|
148 |
+
grad_means2D,
|
149 |
+
grad_sh,
|
150 |
+
grad_colors_precomp,
|
151 |
+
grad_opacities,
|
152 |
+
grad_scales,
|
153 |
+
grad_rotations,
|
154 |
+
grad_cov3Ds_precomp,
|
155 |
+
None,
|
156 |
+
)
|
157 |
+
|
158 |
+
return grads
|
159 |
+
|
160 |
+
class GaussianRasterizationSettings(NamedTuple):
|
161 |
+
image_height: int
|
162 |
+
image_width: int
|
163 |
+
tanfovx : float
|
164 |
+
tanfovy : float
|
165 |
+
bg : torch.Tensor
|
166 |
+
scale_modifier : float
|
167 |
+
viewmatrix : torch.Tensor
|
168 |
+
projmatrix : torch.Tensor
|
169 |
+
sh_degree : int
|
170 |
+
campos : torch.Tensor
|
171 |
+
prefiltered : bool
|
172 |
+
debug : bool
|
173 |
+
|
174 |
+
class GaussianRasterizer(nn.Module):
|
175 |
+
def __init__(self, raster_settings):
|
176 |
+
super().__init__()
|
177 |
+
self.raster_settings = raster_settings
|
178 |
+
|
179 |
+
def markVisible(self, positions):
|
180 |
+
# Mark visible points (based on frustum culling for camera) with a boolean
|
181 |
+
with torch.no_grad():
|
182 |
+
raster_settings = self.raster_settings
|
183 |
+
visible = _C.mark_visible(
|
184 |
+
positions,
|
185 |
+
raster_settings.viewmatrix,
|
186 |
+
raster_settings.projmatrix)
|
187 |
+
|
188 |
+
return visible
|
189 |
+
|
190 |
+
def forward(self, means3D, means2D, opacities, shs = None, colors_precomp = None, scales = None, rotations = None, cov3D_precomp = None):
|
191 |
+
|
192 |
+
raster_settings = self.raster_settings
|
193 |
+
|
194 |
+
if (shs is None and colors_precomp is None) or (shs is not None and colors_precomp is not None):
|
195 |
+
raise Exception('Please provide excatly one of either SHs or precomputed colors!')
|
196 |
+
|
197 |
+
if ((scales is None or rotations is None) and cov3D_precomp is None) or ((scales is not None or rotations is not None) and cov3D_precomp is not None):
|
198 |
+
raise Exception('Please provide exactly one of either scale/rotation pair or precomputed 3D covariance!')
|
199 |
+
|
200 |
+
if shs is None:
|
201 |
+
shs = torch.Tensor([])
|
202 |
+
if colors_precomp is None:
|
203 |
+
colors_precomp = torch.Tensor([])
|
204 |
+
|
205 |
+
if scales is None:
|
206 |
+
scales = torch.Tensor([])
|
207 |
+
if rotations is None:
|
208 |
+
rotations = torch.Tensor([])
|
209 |
+
if cov3D_precomp is None:
|
210 |
+
cov3D_precomp = torch.Tensor([])
|
211 |
+
|
212 |
+
# Invoke C++/CUDA rasterization routine
|
213 |
+
return rasterize_gaussians(
|
214 |
+
means3D,
|
215 |
+
means2D,
|
216 |
+
shs,
|
217 |
+
colors_precomp,
|
218 |
+
opacities,
|
219 |
+
scales,
|
220 |
+
rotations,
|
221 |
+
cov3D_precomp,
|
222 |
+
raster_settings,
|
223 |
+
)
|
224 |
+
|
diff-gaussian-rasterization/ext.cpp
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include <torch/extension.h>
|
13 |
+
#include "rasterize_points.h"
|
14 |
+
|
15 |
+
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
16 |
+
m.def("rasterize_gaussians", &RasterizeGaussiansCUDA);
|
17 |
+
m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA);
|
18 |
+
m.def("mark_visible", &markVisible);
|
19 |
+
}
|
diff-gaussian-rasterization/rasterize_points.cu
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include <math.h>
|
13 |
+
#include <torch/extension.h>
|
14 |
+
#include <cstdio>
|
15 |
+
#include <sstream>
|
16 |
+
#include <iostream>
|
17 |
+
#include <tuple>
|
18 |
+
#include <stdio.h>
|
19 |
+
#include <cuda_runtime_api.h>
|
20 |
+
#include <memory>
|
21 |
+
#include "cuda_rasterizer/config.h"
|
22 |
+
#include "cuda_rasterizer/rasterizer.h"
|
23 |
+
#include <fstream>
|
24 |
+
#include <string>
|
25 |
+
#include <functional>
|
26 |
+
|
27 |
+
std::function<char*(size_t N)> resizeFunctional(torch::Tensor& t) {
|
28 |
+
auto lambda = [&t](size_t N) {
|
29 |
+
t.resize_({(long long)N});
|
30 |
+
return reinterpret_cast<char*>(t.contiguous().data_ptr());
|
31 |
+
};
|
32 |
+
return lambda;
|
33 |
+
}
|
34 |
+
|
35 |
+
std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
|
36 |
+
RasterizeGaussiansCUDA(
|
37 |
+
const torch::Tensor& background,
|
38 |
+
const torch::Tensor& means3D,
|
39 |
+
const torch::Tensor& colors,
|
40 |
+
const torch::Tensor& opacity,
|
41 |
+
const torch::Tensor& scales,
|
42 |
+
const torch::Tensor& rotations,
|
43 |
+
const float scale_modifier,
|
44 |
+
const torch::Tensor& cov3D_precomp,
|
45 |
+
const torch::Tensor& viewmatrix,
|
46 |
+
const torch::Tensor& projmatrix,
|
47 |
+
const float tan_fovx,
|
48 |
+
const float tan_fovy,
|
49 |
+
const int image_height,
|
50 |
+
const int image_width,
|
51 |
+
const torch::Tensor& sh,
|
52 |
+
const int degree,
|
53 |
+
const torch::Tensor& campos,
|
54 |
+
const bool prefiltered,
|
55 |
+
const bool debug)
|
56 |
+
{
|
57 |
+
if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
|
58 |
+
AT_ERROR("means3D must have dimensions (num_points, 3)");
|
59 |
+
}
|
60 |
+
|
61 |
+
const int P = means3D.size(0);
|
62 |
+
const int H = image_height;
|
63 |
+
const int W = image_width;
|
64 |
+
|
65 |
+
auto int_opts = means3D.options().dtype(torch::kInt32);
|
66 |
+
auto float_opts = means3D.options().dtype(torch::kFloat32);
|
67 |
+
|
68 |
+
torch::Tensor out_color = torch::full({NUM_CHANNELS, H, W}, 0.0, float_opts);
|
69 |
+
torch::Tensor out_depth = torch::full({1, H, W}, 0.0, float_opts);
|
70 |
+
torch::Tensor out_alpha = torch::full({1, H, W}, 0.0, float_opts);
|
71 |
+
torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32));
|
72 |
+
|
73 |
+
torch::Device device(torch::kCUDA);
|
74 |
+
torch::TensorOptions options(torch::kByte);
|
75 |
+
torch::Tensor geomBuffer = torch::empty({0}, options.device(device));
|
76 |
+
torch::Tensor binningBuffer = torch::empty({0}, options.device(device));
|
77 |
+
torch::Tensor imgBuffer = torch::empty({0}, options.device(device));
|
78 |
+
std::function<char*(size_t)> geomFunc = resizeFunctional(geomBuffer);
|
79 |
+
std::function<char*(size_t)> binningFunc = resizeFunctional(binningBuffer);
|
80 |
+
std::function<char*(size_t)> imgFunc = resizeFunctional(imgBuffer);
|
81 |
+
|
82 |
+
int rendered = 0;
|
83 |
+
if(P != 0)
|
84 |
+
{
|
85 |
+
int M = 0;
|
86 |
+
if(sh.size(0) != 0)
|
87 |
+
{
|
88 |
+
M = sh.size(1);
|
89 |
+
}
|
90 |
+
|
91 |
+
rendered = CudaRasterizer::Rasterizer::forward(
|
92 |
+
geomFunc,
|
93 |
+
binningFunc,
|
94 |
+
imgFunc,
|
95 |
+
P, degree, M,
|
96 |
+
background.contiguous().data<float>(),
|
97 |
+
W, H,
|
98 |
+
means3D.contiguous().data<float>(),
|
99 |
+
sh.contiguous().data_ptr<float>(),
|
100 |
+
colors.contiguous().data<float>(),
|
101 |
+
opacity.contiguous().data<float>(),
|
102 |
+
scales.contiguous().data_ptr<float>(),
|
103 |
+
scale_modifier,
|
104 |
+
rotations.contiguous().data_ptr<float>(),
|
105 |
+
cov3D_precomp.contiguous().data<float>(),
|
106 |
+
viewmatrix.contiguous().data<float>(),
|
107 |
+
projmatrix.contiguous().data<float>(),
|
108 |
+
campos.contiguous().data<float>(),
|
109 |
+
tan_fovx,
|
110 |
+
tan_fovy,
|
111 |
+
prefiltered,
|
112 |
+
out_color.contiguous().data<float>(),
|
113 |
+
out_depth.contiguous().data<float>(),
|
114 |
+
out_alpha.contiguous().data<float>(),
|
115 |
+
radii.contiguous().data<int>(),
|
116 |
+
debug);
|
117 |
+
}
|
118 |
+
return std::make_tuple(rendered, out_color, out_depth, out_alpha, radii, geomBuffer, binningBuffer, imgBuffer);
|
119 |
+
}
|
120 |
+
|
121 |
+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
|
122 |
+
RasterizeGaussiansBackwardCUDA(
|
123 |
+
const torch::Tensor& background,
|
124 |
+
const torch::Tensor& means3D,
|
125 |
+
const torch::Tensor& radii,
|
126 |
+
const torch::Tensor& colors,
|
127 |
+
const torch::Tensor& scales,
|
128 |
+
const torch::Tensor& rotations,
|
129 |
+
const float scale_modifier,
|
130 |
+
const torch::Tensor& cov3D_precomp,
|
131 |
+
const torch::Tensor& viewmatrix,
|
132 |
+
const torch::Tensor& projmatrix,
|
133 |
+
const float tan_fovx,
|
134 |
+
const float tan_fovy,
|
135 |
+
const torch::Tensor& dL_dout_color,
|
136 |
+
const torch::Tensor& dL_dout_depth,
|
137 |
+
const torch::Tensor& dL_dout_alpha,
|
138 |
+
const torch::Tensor& sh,
|
139 |
+
const int degree,
|
140 |
+
const torch::Tensor& campos,
|
141 |
+
const torch::Tensor& geomBuffer,
|
142 |
+
const int R,
|
143 |
+
const torch::Tensor& binningBuffer,
|
144 |
+
const torch::Tensor& imageBuffer,
|
145 |
+
const torch::Tensor& alphas,
|
146 |
+
const bool debug)
|
147 |
+
{
|
148 |
+
const int P = means3D.size(0);
|
149 |
+
const int H = dL_dout_color.size(1);
|
150 |
+
const int W = dL_dout_color.size(2);
|
151 |
+
|
152 |
+
int M = 0;
|
153 |
+
if(sh.size(0) != 0)
|
154 |
+
{
|
155 |
+
M = sh.size(1);
|
156 |
+
}
|
157 |
+
|
158 |
+
torch::Tensor dL_dmeans3D = torch::zeros({P, 3}, means3D.options());
|
159 |
+
torch::Tensor dL_dmeans2D = torch::zeros({P, 3}, means3D.options());
|
160 |
+
torch::Tensor dL_dcolors = torch::zeros({P, NUM_CHANNELS}, means3D.options());
|
161 |
+
torch::Tensor dL_ddepths = torch::zeros({P, 1}, means3D.options());
|
162 |
+
torch::Tensor dL_dconic = torch::zeros({P, 2, 2}, means3D.options());
|
163 |
+
torch::Tensor dL_dopacity = torch::zeros({P, 1}, means3D.options());
|
164 |
+
torch::Tensor dL_dcov3D = torch::zeros({P, 6}, means3D.options());
|
165 |
+
torch::Tensor dL_dsh = torch::zeros({P, M, 3}, means3D.options());
|
166 |
+
torch::Tensor dL_dscales = torch::zeros({P, 3}, means3D.options());
|
167 |
+
torch::Tensor dL_drotations = torch::zeros({P, 4}, means3D.options());
|
168 |
+
|
169 |
+
if(P != 0)
|
170 |
+
{
|
171 |
+
CudaRasterizer::Rasterizer::backward(P, degree, M, R,
|
172 |
+
background.contiguous().data<float>(),
|
173 |
+
W, H,
|
174 |
+
means3D.contiguous().data<float>(),
|
175 |
+
sh.contiguous().data<float>(),
|
176 |
+
colors.contiguous().data<float>(),
|
177 |
+
alphas.contiguous().data<float>(),
|
178 |
+
scales.data_ptr<float>(),
|
179 |
+
scale_modifier,
|
180 |
+
rotations.data_ptr<float>(),
|
181 |
+
cov3D_precomp.contiguous().data<float>(),
|
182 |
+
viewmatrix.contiguous().data<float>(),
|
183 |
+
projmatrix.contiguous().data<float>(),
|
184 |
+
campos.contiguous().data<float>(),
|
185 |
+
tan_fovx,
|
186 |
+
tan_fovy,
|
187 |
+
radii.contiguous().data<int>(),
|
188 |
+
reinterpret_cast<char*>(geomBuffer.contiguous().data_ptr()),
|
189 |
+
reinterpret_cast<char*>(binningBuffer.contiguous().data_ptr()),
|
190 |
+
reinterpret_cast<char*>(imageBuffer.contiguous().data_ptr()),
|
191 |
+
dL_dout_color.contiguous().data<float>(),
|
192 |
+
dL_dout_depth.contiguous().data<float>(),
|
193 |
+
dL_dout_alpha.contiguous().data<float>(),
|
194 |
+
dL_dmeans2D.contiguous().data<float>(),
|
195 |
+
dL_dconic.contiguous().data<float>(),
|
196 |
+
dL_dopacity.contiguous().data<float>(),
|
197 |
+
dL_dcolors.contiguous().data<float>(),
|
198 |
+
dL_ddepths.contiguous().data<float>(),
|
199 |
+
dL_dmeans3D.contiguous().data<float>(),
|
200 |
+
dL_dcov3D.contiguous().data<float>(),
|
201 |
+
dL_dsh.contiguous().data<float>(),
|
202 |
+
dL_dscales.contiguous().data<float>(),
|
203 |
+
dL_drotations.contiguous().data<float>(),
|
204 |
+
debug);
|
205 |
+
}
|
206 |
+
|
207 |
+
return std::make_tuple(dL_dmeans2D, dL_dcolors, dL_dopacity, dL_dmeans3D, dL_dcov3D, dL_dsh, dL_dscales, dL_drotations);
|
208 |
+
}
|
209 |
+
|
210 |
+
torch::Tensor markVisible(
|
211 |
+
torch::Tensor& means3D,
|
212 |
+
torch::Tensor& viewmatrix,
|
213 |
+
torch::Tensor& projmatrix)
|
214 |
+
{
|
215 |
+
const int P = means3D.size(0);
|
216 |
+
|
217 |
+
torch::Tensor present = torch::full({P}, false, means3D.options().dtype(at::kBool));
|
218 |
+
|
219 |
+
if(P != 0)
|
220 |
+
{
|
221 |
+
CudaRasterizer::Rasterizer::markVisible(P,
|
222 |
+
means3D.contiguous().data<float>(),
|
223 |
+
viewmatrix.contiguous().data<float>(),
|
224 |
+
projmatrix.contiguous().data<float>(),
|
225 |
+
present.contiguous().data<bool>());
|
226 |
+
}
|
227 |
+
|
228 |
+
return present;
|
229 |
+
}
|
diff-gaussian-rasterization/rasterize_points.h
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#pragma once
|
13 |
+
#include <torch/extension.h>
|
14 |
+
#include <cstdio>
|
15 |
+
#include <tuple>
|
16 |
+
#include <string>
|
17 |
+
|
18 |
+
std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
|
19 |
+
RasterizeGaussiansCUDA(
|
20 |
+
const torch::Tensor& background,
|
21 |
+
const torch::Tensor& means3D,
|
22 |
+
const torch::Tensor& colors,
|
23 |
+
const torch::Tensor& opacity,
|
24 |
+
const torch::Tensor& scales,
|
25 |
+
const torch::Tensor& rotations,
|
26 |
+
const float scale_modifier,
|
27 |
+
const torch::Tensor& cov3D_precomp,
|
28 |
+
const torch::Tensor& viewmatrix,
|
29 |
+
const torch::Tensor& projmatrix,
|
30 |
+
const float tan_fovx,
|
31 |
+
const float tan_fovy,
|
32 |
+
const int image_height,
|
33 |
+
const int image_width,
|
34 |
+
const torch::Tensor& sh,
|
35 |
+
const int degree,
|
36 |
+
const torch::Tensor& campos,
|
37 |
+
const bool prefiltered,
|
38 |
+
const bool debug);
|
39 |
+
|
40 |
+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
|
41 |
+
RasterizeGaussiansBackwardCUDA(
|
42 |
+
const torch::Tensor& background,
|
43 |
+
const torch::Tensor& means3D,
|
44 |
+
const torch::Tensor& radii,
|
45 |
+
const torch::Tensor& colors,
|
46 |
+
const torch::Tensor& scales,
|
47 |
+
const torch::Tensor& rotations,
|
48 |
+
const float scale_modifier,
|
49 |
+
const torch::Tensor& cov3D_precomp,
|
50 |
+
const torch::Tensor& viewmatrix,
|
51 |
+
const torch::Tensor& projmatrix,
|
52 |
+
const float tan_fovx,
|
53 |
+
const float tan_fovy,
|
54 |
+
const torch::Tensor& dL_dout_color,
|
55 |
+
const torch::Tensor& dL_dout_depth,
|
56 |
+
const torch::Tensor& dL_dout_alpha,
|
57 |
+
const torch::Tensor& sh,
|
58 |
+
const int degree,
|
59 |
+
const torch::Tensor& campos,
|
60 |
+
const torch::Tensor& geomBuffer,
|
61 |
+
const int R,
|
62 |
+
const torch::Tensor& binningBuffer,
|
63 |
+
const torch::Tensor& imageBuffer,
|
64 |
+
const torch::Tensor& alpha,
|
65 |
+
const bool debug);
|
66 |
+
|
67 |
+
torch::Tensor markVisible(
|
68 |
+
torch::Tensor& means3D,
|
69 |
+
torch::Tensor& viewmatrix,
|
70 |
+
torch::Tensor& projmatrix);
|
diff-gaussian-rasterization/setup.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (C) 2023, Inria
|
3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# This software is free for non-commercial, research and evaluation use
|
7 |
+
# under the terms of the LICENSE.md file.
|
8 |
+
#
|
9 |
+
# For inquiries contact george.drettakis@inria.fr
|
10 |
+
#
|
11 |
+
|
12 |
+
from setuptools import setup
|
13 |
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
|
14 |
+
import os
|
15 |
+
os.path.dirname(os.path.abspath(__file__))
|
16 |
+
|
17 |
+
setup(
|
18 |
+
name="diff_gaussian_rasterization",
|
19 |
+
packages=['diff_gaussian_rasterization'],
|
20 |
+
ext_modules=[
|
21 |
+
CUDAExtension(
|
22 |
+
name="diff_gaussian_rasterization._C",
|
23 |
+
sources=[
|
24 |
+
"cuda_rasterizer/rasterizer_impl.cu",
|
25 |
+
"cuda_rasterizer/forward.cu",
|
26 |
+
"cuda_rasterizer/backward.cu",
|
27 |
+
"rasterize_points.cu",
|
28 |
+
"ext.cpp"],
|
29 |
+
extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]})
|
30 |
+
],
|
31 |
+
cmdclass={
|
32 |
+
'build_ext': BuildExtension
|
33 |
+
}
|
34 |
+
)
|
diff-gaussian-rasterization/third_party/glm/.appveyor.yml
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shallow_clone: true
|
2 |
+
|
3 |
+
platform:
|
4 |
+
- x86
|
5 |
+
- x64
|
6 |
+
|
7 |
+
configuration:
|
8 |
+
- Debug
|
9 |
+
- Release
|
10 |
+
|
11 |
+
image:
|
12 |
+
- Visual Studio 2013
|
13 |
+
- Visual Studio 2015
|
14 |
+
- Visual Studio 2017
|
15 |
+
- Visual Studio 2019
|
16 |
+
|
17 |
+
environment:
|
18 |
+
matrix:
|
19 |
+
- GLM_ARGUMENTS: -DGLM_TEST_FORCE_PURE=ON
|
20 |
+
- GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
|
21 |
+
- GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
|
22 |
+
- GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
|
23 |
+
- GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
|
24 |
+
|
25 |
+
matrix:
|
26 |
+
exclude:
|
27 |
+
- image: Visual Studio 2013
|
28 |
+
GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
|
29 |
+
- image: Visual Studio 2013
|
30 |
+
GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
|
31 |
+
- image: Visual Studio 2013
|
32 |
+
GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
|
33 |
+
- image: Visual Studio 2013
|
34 |
+
configuration: Debug
|
35 |
+
- image: Visual Studio 2015
|
36 |
+
GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
|
37 |
+
- image: Visual Studio 2015
|
38 |
+
GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
|
39 |
+
- image: Visual Studio 2015
|
40 |
+
GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
|
41 |
+
- image: Visual Studio 2015
|
42 |
+
platform: x86
|
43 |
+
- image: Visual Studio 2015
|
44 |
+
configuration: Debug
|
45 |
+
- image: Visual Studio 2017
|
46 |
+
platform: x86
|
47 |
+
- image: Visual Studio 2017
|
48 |
+
configuration: Debug
|
49 |
+
- image: Visual Studio 2019
|
50 |
+
platform: x64
|
51 |
+
|
52 |
+
branches:
|
53 |
+
only:
|
54 |
+
- master
|
55 |
+
|
56 |
+
before_build:
|
57 |
+
- ps: |
|
58 |
+
mkdir build
|
59 |
+
cd build
|
60 |
+
|
61 |
+
if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2013") {
|
62 |
+
$env:generator="Visual Studio 12 2013"
|
63 |
+
}
|
64 |
+
if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2015") {
|
65 |
+
$env:generator="Visual Studio 14 2015"
|
66 |
+
}
|
67 |
+
if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2017") {
|
68 |
+
$env:generator="Visual Studio 15 2017"
|
69 |
+
}
|
70 |
+
if ("$env:APPVEYOR_JOB_NAME" -match "Image: Visual Studio 2019") {
|
71 |
+
$env:generator="Visual Studio 16 2019"
|
72 |
+
}
|
73 |
+
if ($env:PLATFORM -eq "x64") {
|
74 |
+
$env:generator="$env:generator Win64"
|
75 |
+
}
|
76 |
+
echo generator="$env:generator"
|
77 |
+
cmake .. -G "$env:generator" -DCMAKE_INSTALL_PREFIX="$env:APPVEYOR_BUILD_FOLDER/install" -DGLM_QUIET=ON -DGLM_TEST_ENABLE=ON "$env:GLM_ARGUMENTS"
|
78 |
+
|
79 |
+
build_script:
|
80 |
+
- cmake --build . --parallel --config %CONFIGURATION% -- /m /v:minimal
|
81 |
+
- cmake --build . --target install --parallel --config %CONFIGURATION% -- /m /v:minimal
|
82 |
+
|
83 |
+
test_script:
|
84 |
+
- ctest --parallel 4 --verbose -C %CONFIGURATION%
|
85 |
+
- cd ..
|
86 |
+
- ps: |
|
87 |
+
mkdir build_test_cmake
|
88 |
+
cd build_test_cmake
|
89 |
+
cmake ..\test\cmake\ -G "$env:generator" -DCMAKE_PREFIX_PATH="$env:APPVEYOR_BUILD_FOLDER/install"
|
90 |
+
- cmake --build . --parallel --config %CONFIGURATION% -- /m /v:minimal
|
91 |
+
|
92 |
+
deploy: off
|
diff-gaussian-rasterization/third_party/glm/.gitignore
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Compiled Object files
|
2 |
+
*.slo
|
3 |
+
*.lo
|
4 |
+
*.o
|
5 |
+
*.obj
|
6 |
+
|
7 |
+
# Precompiled Headers
|
8 |
+
*.gch
|
9 |
+
*.pch
|
10 |
+
|
11 |
+
# Compiled Dynamic libraries
|
12 |
+
*.so
|
13 |
+
*.dylib
|
14 |
+
*.dll
|
15 |
+
|
16 |
+
# Fortran module files
|
17 |
+
*.mod
|
18 |
+
|
19 |
+
# Compiled Static libraries
|
20 |
+
*.lai
|
21 |
+
*.la
|
22 |
+
*.a
|
23 |
+
*.lib
|
24 |
+
|
25 |
+
# Executables
|
26 |
+
*.exe
|
27 |
+
*.out
|
28 |
+
*.app
|
29 |
+
|
30 |
+
# CMake
|
31 |
+
CMakeCache.txt
|
32 |
+
CMakeFiles
|
33 |
+
cmake_install.cmake
|
34 |
+
install_manifest.txt
|
35 |
+
*.cmake
|
36 |
+
!glmConfig.cmake
|
37 |
+
!glmConfig-version.cmake
|
38 |
+
# ^ May need to add future .cmake files as exceptions
|
39 |
+
|
40 |
+
# Test logs
|
41 |
+
Testing/*
|
42 |
+
|
43 |
+
# Test input
|
44 |
+
test/gtc/*.dds
|
45 |
+
|
46 |
+
# Project Files
|
47 |
+
Makefile
|
48 |
+
*.cbp
|
49 |
+
*.user
|
50 |
+
|
51 |
+
# Misc.
|
52 |
+
*.log
|
53 |
+
|
54 |
+
# local build(s)
|
55 |
+
build*
|
56 |
+
|
57 |
+
/.vs
|
58 |
+
/.vscode
|
59 |
+
/CMakeSettings.json
|
60 |
+
.DS_Store
|
61 |
+
*.swp
|
diff-gaussian-rasterization/third_party/glm/.travis.yml
ADDED
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
language: cpp
|
2 |
+
|
3 |
+
branches:
|
4 |
+
only:
|
5 |
+
- master
|
6 |
+
- stable
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
include:
|
10 |
+
- name: "Xcode 7.3 C++98 pure release"
|
11 |
+
os: osx
|
12 |
+
osx_image: xcode7.3
|
13 |
+
env:
|
14 |
+
- MATRIX_EVAL=""
|
15 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_FORCE_PURE=ON"
|
16 |
+
|
17 |
+
- name: "Xcode 7.3 C++98 sse2 release"
|
18 |
+
os: osx
|
19 |
+
osx_image: xcode7.3
|
20 |
+
env:
|
21 |
+
- MATRIX_EVAL=""
|
22 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
|
23 |
+
|
24 |
+
- name: "Xcode 7.3 C++98 ms release"
|
25 |
+
os: osx
|
26 |
+
osx_image: xcode7.3
|
27 |
+
env:
|
28 |
+
- MATRIX_EVAL=""
|
29 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
|
30 |
+
|
31 |
+
- name: "XCode 7.3 C++11 pure release"
|
32 |
+
os: osx
|
33 |
+
osx_image: xcode7.3
|
34 |
+
env:
|
35 |
+
- MATRIX_EVAL=""
|
36 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_FORCE_PURE=ON"
|
37 |
+
|
38 |
+
- name: "XCode 7.3 C++11 sse2 release"
|
39 |
+
os: osx
|
40 |
+
osx_image: xcode7.3
|
41 |
+
env:
|
42 |
+
- MATRIX_EVAL=""
|
43 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
|
44 |
+
|
45 |
+
- name: "XCode 10.3 C++11 sse2 release"
|
46 |
+
os: osx
|
47 |
+
osx_image: xcode10.3
|
48 |
+
env:
|
49 |
+
- MATRIX_EVAL=""
|
50 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
|
51 |
+
|
52 |
+
- name: "XCode 12.2 C++11 sse2 release"
|
53 |
+
os: osx
|
54 |
+
osx_image: xcode12.2
|
55 |
+
env:
|
56 |
+
- MATRIX_EVAL=""
|
57 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
|
58 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
59 |
+
- CMAKE_ENV="--parallel"
|
60 |
+
|
61 |
+
- name: "XCode 12.2 C++11 sse2 debug"
|
62 |
+
os: osx
|
63 |
+
osx_image: xcode12.2
|
64 |
+
env:
|
65 |
+
- MATRIX_EVAL=""
|
66 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE3=ON"
|
67 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
68 |
+
- CMAKE_ENV="--parallel"
|
69 |
+
|
70 |
+
- name: "XCode 12.2 C++11 avx debug"
|
71 |
+
os: osx
|
72 |
+
osx_image: xcode12.2
|
73 |
+
env:
|
74 |
+
- MATRIX_EVAL=""
|
75 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX=ON"
|
76 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
77 |
+
- CMAKE_ENV="--parallel"
|
78 |
+
|
79 |
+
- name: "XCode 12.2 C++14 avx debug"
|
80 |
+
os: osx
|
81 |
+
osx_image: xcode12.2
|
82 |
+
env:
|
83 |
+
- MATRIX_EVAL=""
|
84 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX=ON"
|
85 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
86 |
+
- CMAKE_ENV="--parallel"
|
87 |
+
|
88 |
+
- name: "XCode 12.2 C++14 pure debug"
|
89 |
+
os: osx
|
90 |
+
osx_image: xcode12.2
|
91 |
+
env:
|
92 |
+
- MATRIX_EVAL=""
|
93 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_FORCE_PURE=ON"
|
94 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
95 |
+
- CMAKE_ENV="--parallel"
|
96 |
+
|
97 |
+
- name: "XCode 12.2 C++17 pure debug"
|
98 |
+
os: osx
|
99 |
+
osx_image: xcode12.2
|
100 |
+
env:
|
101 |
+
- MATRIX_EVAL=""
|
102 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_FORCE_PURE=ON"
|
103 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
104 |
+
- CMAKE_ENV="--parallel"
|
105 |
+
|
106 |
+
- name: "XCode 12.2 C++17 sse2 debug"
|
107 |
+
os: osx
|
108 |
+
osx_image: xcode12.2
|
109 |
+
env:
|
110 |
+
- MATRIX_EVAL=""
|
111 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
|
112 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
113 |
+
- CMAKE_ENV="--parallel"
|
114 |
+
|
115 |
+
- name: "XCode 12.2 C++17 sse2 release"
|
116 |
+
os: osx
|
117 |
+
osx_image: xcode12.2
|
118 |
+
env:
|
119 |
+
- MATRIX_EVAL=""
|
120 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
|
121 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
122 |
+
- CMAKE_ENV="--parallel"
|
123 |
+
|
124 |
+
- name: "XCode 12.2 C++17 avx release"
|
125 |
+
os: osx
|
126 |
+
osx_image: xcode12.2
|
127 |
+
env:
|
128 |
+
- MATRIX_EVAL=""
|
129 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX=ON"
|
130 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
131 |
+
- CMAKE_ENV="--parallel"
|
132 |
+
|
133 |
+
- name: "GCC 4.9 C++98 pure release"
|
134 |
+
os: linux
|
135 |
+
dist: Xenial
|
136 |
+
addons:
|
137 |
+
apt:
|
138 |
+
sources:
|
139 |
+
- ubuntu-toolchain-r-test
|
140 |
+
packages:
|
141 |
+
- g++-4.9
|
142 |
+
env:
|
143 |
+
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
|
144 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_FORCE_PURE=ON"
|
145 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
146 |
+
- CMAKE_ENV="--parallel"
|
147 |
+
|
148 |
+
- name: "GCC 4.9 C++98 pure debug"
|
149 |
+
os: linux
|
150 |
+
dist: Xenial
|
151 |
+
addons:
|
152 |
+
apt:
|
153 |
+
sources:
|
154 |
+
- ubuntu-toolchain-r-test
|
155 |
+
packages:
|
156 |
+
- g++-4.9
|
157 |
+
env:
|
158 |
+
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
|
159 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_FORCE_PURE=ON"
|
160 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
161 |
+
- CMAKE_ENV="--parallel"
|
162 |
+
|
163 |
+
- name: "GCC 4.9 C++98 ms debug"
|
164 |
+
os: linux
|
165 |
+
dist: Xenial
|
166 |
+
addons:
|
167 |
+
apt:
|
168 |
+
sources:
|
169 |
+
- ubuntu-toolchain-r-test
|
170 |
+
packages:
|
171 |
+
- g++-4.9
|
172 |
+
env:
|
173 |
+
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
|
174 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_98=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
|
175 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
176 |
+
- CMAKE_ENV="--parallel"
|
177 |
+
|
178 |
+
- name: "GCC 4.9 C++11 ms debug"
|
179 |
+
os: linux
|
180 |
+
dist: Xenial
|
181 |
+
addons:
|
182 |
+
apt:
|
183 |
+
sources:
|
184 |
+
- ubuntu-toolchain-r-test
|
185 |
+
packages:
|
186 |
+
- g++-4.9
|
187 |
+
env:
|
188 |
+
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
|
189 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
|
190 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
191 |
+
- CMAKE_ENV="--parallel"
|
192 |
+
|
193 |
+
- name: "GCC 4.9 C++11 pure debug"
|
194 |
+
os: linux
|
195 |
+
dist: Xenial
|
196 |
+
addons:
|
197 |
+
apt:
|
198 |
+
sources:
|
199 |
+
- ubuntu-toolchain-r-test
|
200 |
+
packages:
|
201 |
+
- g++-4.9
|
202 |
+
env:
|
203 |
+
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
|
204 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_11=ON -DGLM_TEST_FORCE_PURE=ON"
|
205 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
206 |
+
- CMAKE_ENV="--parallel"
|
207 |
+
|
208 |
+
- name: "GCC 6 C++14 pure debug"
|
209 |
+
os: linux
|
210 |
+
dist: bionic
|
211 |
+
addons:
|
212 |
+
apt:
|
213 |
+
sources:
|
214 |
+
- ubuntu-toolchain-r-test
|
215 |
+
packages:
|
216 |
+
- g++-6
|
217 |
+
env:
|
218 |
+
- MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
|
219 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_FORCE_PURE=ON"
|
220 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
221 |
+
- CMAKE_ENV="--parallel"
|
222 |
+
|
223 |
+
- name: "GCC 6 C++14 ms debug"
|
224 |
+
os: linux
|
225 |
+
dist: bionic
|
226 |
+
addons:
|
227 |
+
apt:
|
228 |
+
sources:
|
229 |
+
- ubuntu-toolchain-r-test
|
230 |
+
packages:
|
231 |
+
- g++-6
|
232 |
+
env:
|
233 |
+
- MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
|
234 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
|
235 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
236 |
+
- CMAKE_ENV="--parallel"
|
237 |
+
|
238 |
+
- name: "GCC 7 C++17 ms debug"
|
239 |
+
os: linux
|
240 |
+
dist: bionic
|
241 |
+
addons:
|
242 |
+
apt:
|
243 |
+
sources:
|
244 |
+
- ubuntu-toolchain-r-test
|
245 |
+
packages:
|
246 |
+
- g++-7
|
247 |
+
env:
|
248 |
+
- MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
|
249 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON"
|
250 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
251 |
+
- CMAKE_ENV="--parallel"
|
252 |
+
|
253 |
+
- name: "GCC 7 C++17 pure debug"
|
254 |
+
os: linux
|
255 |
+
dist: bionic
|
256 |
+
addons:
|
257 |
+
apt:
|
258 |
+
sources:
|
259 |
+
- ubuntu-toolchain-r-test
|
260 |
+
packages:
|
261 |
+
- g++-7
|
262 |
+
env:
|
263 |
+
- MATRIX_EVAL="CC=gcc-7 && CXX=g++-7"
|
264 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
|
265 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
266 |
+
- CMAKE_ENV="--parallel"
|
267 |
+
|
268 |
+
- name: "GCC 10 C++17 pure debug"
|
269 |
+
os: linux
|
270 |
+
dist: bionic
|
271 |
+
addons:
|
272 |
+
apt:
|
273 |
+
sources:
|
274 |
+
- ubuntu-toolchain-r-test
|
275 |
+
packages:
|
276 |
+
- g++-10
|
277 |
+
env:
|
278 |
+
- MATRIX_EVAL="CC=gcc-10 && CXX=g++-10"
|
279 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
|
280 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
281 |
+
- CMAKE_ENV="--parallel"
|
282 |
+
|
283 |
+
- name: "GCC 10 C++17 pure release"
|
284 |
+
os: linux
|
285 |
+
dist: bionic
|
286 |
+
addons:
|
287 |
+
apt:
|
288 |
+
sources:
|
289 |
+
- ubuntu-toolchain-r-test
|
290 |
+
packages:
|
291 |
+
- g++-10
|
292 |
+
env:
|
293 |
+
- MATRIX_EVAL="CC=gcc-10 && CXX=g++-10"
|
294 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
|
295 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
296 |
+
- CMAKE_ENV="--parallel"
|
297 |
+
|
298 |
+
- name: "Clang C++14 pure release"
|
299 |
+
os: linux
|
300 |
+
dist: Xenial
|
301 |
+
env:
|
302 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
303 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_FORCE_PURE=ON"
|
304 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
305 |
+
- CMAKE_ENV="--parallel"
|
306 |
+
|
307 |
+
- name: "Clang C++14 pure debug"
|
308 |
+
os: linux
|
309 |
+
dist: Xenial
|
310 |
+
env:
|
311 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
312 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_FORCE_PURE=ON"
|
313 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
314 |
+
- CMAKE_ENV="--parallel"
|
315 |
+
|
316 |
+
- name: "Clang C++14 sse2 debug"
|
317 |
+
os: linux
|
318 |
+
dist: Xenial
|
319 |
+
env:
|
320 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
321 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
|
322 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
323 |
+
- CMAKE_ENV="--parallel"
|
324 |
+
|
325 |
+
- name: "Clang C++14 sse2 debug"
|
326 |
+
os: linux
|
327 |
+
dist: focal
|
328 |
+
env:
|
329 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
330 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_14=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
|
331 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
332 |
+
- CMAKE_ENV="--parallel"
|
333 |
+
|
334 |
+
- name: "Clang C++17 sse2 debug"
|
335 |
+
os: linux
|
336 |
+
dist: focal
|
337 |
+
env:
|
338 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
339 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_SSE2=ON"
|
340 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
341 |
+
- CMAKE_ENV="--parallel"
|
342 |
+
|
343 |
+
- name: "Clang C++17 avx2 debug"
|
344 |
+
os: linux
|
345 |
+
dist: focal
|
346 |
+
env:
|
347 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
348 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_SIMD_AVX2=ON"
|
349 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
350 |
+
- CMAKE_ENV="--parallel"
|
351 |
+
|
352 |
+
- name: "Clang C++17 pure debug"
|
353 |
+
os: linux
|
354 |
+
dist: focal
|
355 |
+
env:
|
356 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
357 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Debug -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
|
358 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
359 |
+
- CMAKE_ENV="--parallel"
|
360 |
+
|
361 |
+
- name: "Clang C++17 pure release"
|
362 |
+
os: linux
|
363 |
+
dist: focal
|
364 |
+
env:
|
365 |
+
- MATRIX_EVAL="CC=clang && CXX=clang++"
|
366 |
+
- CMAKE_BUILD_ENV="-DCMAKE_BUILD_TYPE=Release -DGLM_TEST_ENABLE=ON -DGLM_TEST_ENABLE_CXX_17=ON -DGLM_TEST_FORCE_PURE=ON"
|
367 |
+
- CTEST_ENV="--parallel 4 --output-on-failure"
|
368 |
+
- CMAKE_ENV="--parallel"
|
369 |
+
|
370 |
+
before_script:
|
371 |
+
- cmake --version
|
372 |
+
- eval "${MATRIX_EVAL}"
|
373 |
+
|
374 |
+
script:
|
375 |
+
- ${CC} --version
|
376 |
+
- mkdir ./build
|
377 |
+
- cd ./build
|
378 |
+
- cmake -DCMAKE_INSTALL_PREFIX=$TRAVIS_BUILD_DIR/install -DCMAKE_CXX_COMPILER=$COMPILER ${CMAKE_BUILD_ENV} ..
|
379 |
+
- cmake --build . ${CMAKE_ENV}
|
380 |
+
- ctest ${CTEST_ENV}
|
381 |
+
- cmake --build . --target install ${CMAKE_ENV}
|
382 |
+
- cd $TRAVIS_BUILD_DIR
|
383 |
+
- mkdir ./build_test_cmake
|
384 |
+
- cd ./build_test_cmake
|
385 |
+
- cmake -DCMAKE_CXX_COMPILER=$COMPILER $TRAVIS_BUILD_DIR/test/cmake/ -DCMAKE_PREFIX_PATH=$TRAVIS_BUILD_DIR/install
|
386 |
+
- cmake --build .
|
387 |
+
|
388 |
+
|
diff-gaussian-rasterization/third_party/glm/CMakeLists.txt
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required(VERSION 3.2 FATAL_ERROR)
|
2 |
+
cmake_policy(VERSION 3.2)
|
3 |
+
|
4 |
+
|
5 |
+
file(READ "glm/detail/setup.hpp" GLM_SETUP_FILE)
|
6 |
+
string(REGEX MATCH "#define[ ]+GLM_VERSION_MAJOR[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
|
7 |
+
set(GLM_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
8 |
+
string(REGEX MATCH "#define[ ]+GLM_VERSION_MINOR[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
|
9 |
+
set(GLM_VERSION_MINOR "${CMAKE_MATCH_1}")
|
10 |
+
string(REGEX MATCH "#define[ ]+GLM_VERSION_PATCH[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
|
11 |
+
set(GLM_VERSION_PATCH "${CMAKE_MATCH_1}")
|
12 |
+
string(REGEX MATCH "#define[ ]+GLM_VERSION_REVISION[ ]+([0-9]+)" _ ${GLM_SETUP_FILE})
|
13 |
+
set(GLM_VERSION_REVISION "${CMAKE_MATCH_1}")
|
14 |
+
|
15 |
+
set(GLM_VERSION ${GLM_VERSION_MAJOR}.${GLM_VERSION_MINOR}.${GLM_VERSION_PATCH}.${GLM_VERSION_REVISION})
|
16 |
+
project(glm VERSION ${GLM_VERSION} LANGUAGES CXX)
|
17 |
+
message(STATUS "GLM: Version " ${GLM_VERSION})
|
18 |
+
|
19 |
+
add_subdirectory(glm)
|
20 |
+
add_library(glm::glm ALIAS glm)
|
21 |
+
|
22 |
+
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
|
23 |
+
|
24 |
+
include(CPack)
|
25 |
+
install(DIRECTORY glm DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} PATTERN "CMakeLists.txt" EXCLUDE)
|
26 |
+
install(EXPORT glm FILE glmConfig.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/glm NAMESPACE glm::)
|
27 |
+
include(CMakePackageConfigHelpers)
|
28 |
+
write_basic_package_version_file("glmConfigVersion.cmake" COMPATIBILITY AnyNewerVersion)
|
29 |
+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/glmConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/glm)
|
30 |
+
|
31 |
+
include(CTest)
|
32 |
+
if(BUILD_TESTING)
|
33 |
+
add_subdirectory(test)
|
34 |
+
endif()
|
35 |
+
|
36 |
+
endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
|
37 |
+
|
38 |
+
if (NOT TARGET uninstall)
|
39 |
+
configure_file(cmake/cmake_uninstall.cmake.in
|
40 |
+
cmake_uninstall.cmake IMMEDIATE @ONLY)
|
41 |
+
|
42 |
+
add_custom_target(uninstall
|
43 |
+
"${CMAKE_COMMAND}" -P
|
44 |
+
"${CMAKE_BINARY_DIR}/cmake_uninstall.cmake")
|
45 |
+
endif()
|