File size: 13,785 Bytes
491eded |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 |
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact george.drettakis@inria.fr
#
import torch
import math
from easydict import EasyDict as edict
import numpy as np
from ..representations.gaussian import Gaussian
from .sh_utils import eval_sh
import torch.nn.functional as F
from easydict import EasyDict as edict
def intrinsics_to_projection(
intrinsics: torch.Tensor,
near: float,
far: float,
) -> torch.Tensor:
"""
Convert OpenCV-style camera intrinsics matrix to OpenGL perspective projection matrix.
This function transforms a standard 3x3 camera intrinsics matrix into a 4x4 perspective
projection matrix compatible with OpenGL rendering pipeline. The resulting matrix
properly handles the coordinate system differences between computer vision and
computer graphics conventions.
Args:
intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix containing focal lengths
and principal point coordinates
near (float): Distance to the near clipping plane (must be positive)
far (float): Distance to the far clipping plane (must be greater than near)
Returns:
torch.Tensor: [4, 4] OpenGL perspective projection matrix for rendering
"""
# Extract focal lengths and principal point from intrinsics matrix
fx, fy = intrinsics[0, 0], intrinsics[1, 1] # Focal lengths in x and y directions
cx, cy = intrinsics[0, 2], intrinsics[1, 2] # Principal point coordinates
# Initialize empty 4x4 projection matrix
ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device)
# Fill in the projection matrix components
ret[0, 0] = 2 * fx # Scale for x axis based on horizontal focal length
ret[1, 1] = 2 * fy # Scale for y axis based on vertical focal length
ret[0, 2] = 2 * cx - 1 # X offset based on principal point (OpenCV to OpenGL conversion)
ret[1, 2] = - 2 * cy + 1 # Y offset based on principal point (with flipped Y axis)
ret[2, 2] = far / (far - near) # Handle depth mapping to clip space
ret[2, 3] = near * far / (near - far) # Term for perspective division in clip space
ret[3, 2] = 1. # Enable perspective division
return ret
def render(viewpoint_camera, pc : Gaussian, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None):
"""
Render the scene using 3D Gaussians.
This function performs the rasterization of 3D Gaussian points into a 2D image from a given viewpoint.
Args:
viewpoint_camera: Camera parameters including position, view transform, and projection
pc (Gaussian): Point cloud represented as 3D Gaussians
pipe: Pipeline configuration parameters
bg_color (torch.Tensor): Background color tensor (must be on GPU)
scaling_modifier (float): Scale modifier for the Gaussian splats
override_color (torch.Tensor, optional): Custom colors to override computed SH-based colors
Returns:
edict: Dictionary containing rendered image, viewspace points, visibility filter, and radii information
"""
# Lazy import of the rasterization module to avoid circular dependencies
# or to improve startup performance when not needed immediately
if 'GaussianRasterizer' not in globals():
from diff_gaussian_rasterization import GaussianRasterizer, GaussianRasterizationSettings
# Create zero tensor for screen space points
# This tensor will hold gradients of the 2D (screen-space) means for optimization
screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
try:
screenspace_points.retain_grad()
except:
pass
# Calculate camera frustum parameters from the field of view
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
# Get kernel size from the pipeline configuration
kernel_size = pipe.kernel_size
# Initialize subpixel offset for all pixels (used for anti-aliasing)
subpixel_offset = torch.zeros((int(viewpoint_camera.image_height), int(viewpoint_camera.image_width), 2),
dtype=torch.float32, device="cuda")
# Configure the Gaussian rasterization settings with all necessary parameters
raster_settings = GaussianRasterizationSettings(
image_height=int(viewpoint_camera.image_height),
image_width=int(viewpoint_camera.image_width),
tanfovx=tanfovx,
tanfovy=tanfovy,
kernel_size=kernel_size,
subpixel_offset=subpixel_offset,
bg=bg_color,
scale_modifier=scaling_modifier,
viewmatrix=viewpoint_camera.world_view_transform,
projmatrix=viewpoint_camera.full_proj_transform,
sh_degree=pc.active_sh_degree,
campos=viewpoint_camera.camera_center,
prefiltered=False,
debug=pipe.debug
)
# Create the rasterizer with the configured settings
rasterizer = GaussianRasterizer(raster_settings=raster_settings)
# Get the Gaussian 3D positions and opacities
means3D = pc.get_xyz
means2D = screenspace_points
opacity = pc.get_opacity
# Handle covariance computation options
# Either use precomputed 3D covariance or let the rasterizer compute it from scales and rotations
scales = None
rotations = None
cov3D_precomp = None
if pipe.compute_cov3D_python:
# Compute 3D covariances in Python before rasterization
cov3D_precomp = pc.get_covariance(scaling_modifier)
else:
# Let the rasterizer compute covariances from scale and rotation
scales = pc.get_scaling
rotations = pc.get_rotation
# Handle color computation options
# Either use override colors, precomputed colors from SHs, or let the rasterizer compute colors from SHs
shs = None
colors_precomp = None
if override_color is None:
if pipe.convert_SHs_python:
# Convert spherical harmonics to RGB colors in Python
shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
# Calculate the view direction from Gaussian center to camera
dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
# Evaluate spherical harmonics to get RGB colors
sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
# Apply offset and clamp to ensure valid color values
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
else:
# Let the rasterizer convert SHs to colors
shs = pc.get_features
else:
# Use provided override colors
colors_precomp = override_color
# Perform the rasterization to generate the final rendered image
# This projects the 3D Gaussians to 2D and blends them according to their opacities
rendered_image, radii = rasterizer(
means3D = means3D,
means2D = means2D,
shs = shs,
colors_precomp = colors_precomp,
opacities = opacity,
scales = scales,
rotations = rotations,
cov3D_precomp = cov3D_precomp
)
# Return the rendering results in a dictionary
# radii > 0 creates a filter for visible Gaussians (those not frustum-culled)
return edict({"render": rendered_image,
"viewspace_points": screenspace_points,
"visibility_filter" : radii > 0,
"radii": radii})
class GaussianRenderer:
"""
A renderer for Gaussian Splatting that converts 3D Gaussian primitives into 2D images.
This renderer projects 3D Gaussian splats onto a 2D image plane using the provided
camera parameters, handling the rasterization process through an optimized backend.
Args:
rendering_options (dict): Configuration options for rendering including resolution,
depth range, background color, and supersampling level.
"""
def __init__(self, rendering_options={}) -> None:
# Initialize default pipeline parameters
self.pipe = edict({
"kernel_size": 0.1, # Size of the Gaussian kernel for rasterization
"convert_SHs_python": False, # Whether to convert Spherical Harmonics to colors in Python
"compute_cov3D_python": False, # Whether to compute 3D covariance matrices in Python
"scale_modifier": 1.0, # Global scaling factor for all Gaussians
"debug": False # Enable/disable debug mode
})
# Initialize default rendering options
self.rendering_options = edict({
"resolution": None, # Output image resolution (width and height)
"near": None, # Near clipping plane distance
"far": None, # Far clipping plane distance
"ssaa": 1, # Super-sampling anti-aliasing factor (1 = disabled)
"bg_color": 'random', # Background color ('random' or specific color)
})
# Update with user-provided options
self.rendering_options.update(rendering_options)
# Initialize background color (will be set during rendering)
self.bg_color = None
def render(
self,
gausssian: Gaussian,
extrinsics: torch.Tensor,
intrinsics: torch.Tensor,
colors_overwrite: torch.Tensor = None
) -> edict:
"""
Render the 3D Gaussian representation from a given camera viewpoint.
This method projects the 3D Gaussians onto a 2D image plane using the provided camera parameters,
handling the full rendering pipeline including projection, rasterization, and optional supersampling.
Args:
gaussian: The Gaussian representation containing positions, features, and other attributes
extrinsics (torch.Tensor): (4, 4) camera extrinsics matrix defining camera position and orientation
intrinsics (torch.Tensor): (3, 3) camera intrinsics matrix with focal lengths and principal point
colors_overwrite (torch.Tensor): Optional (N, 3) tensor to override Gaussian colors
Returns:
edict containing:
color (torch.Tensor): (3, H, W) rendered color image
"""
# Extract rendering parameters from options
resolution = self.rendering_options["resolution"]
near = self.rendering_options["near"]
far = self.rendering_options["far"]
ssaa = self.rendering_options["ssaa"] # Super-sampling anti-aliasing factor
# Set background color based on rendering options
if self.rendering_options["bg_color"] == 'random':
# Randomly choose either black or white background
self.bg_color = torch.zeros(3, dtype=torch.float32, device="cuda")
if np.random.rand() < 0.5:
self.bg_color += 1
else:
# Use specified background color
self.bg_color = torch.tensor(self.rendering_options["bg_color"], dtype=torch.float32, device="cuda")
# Prepare camera parameters for the renderer
view = extrinsics # World-to-camera transform
# Convert OpenCV intrinsics to OpenGL projection matrix
perspective = intrinsics_to_projection(intrinsics, near, far)
# Extract camera center from extrinsics (inverse of view matrix)
camera = torch.inverse(view)[:3, 3]
# Calculate field of view from focal lengths
focalx = intrinsics[0, 0]
focaly = intrinsics[1, 1]
fovx = 2 * torch.atan(0.5 / focalx) # Horizontal FoV in radians
fovy = 2 * torch.atan(0.5 / focaly) # Vertical FoV in radians
# Build complete camera parameter dictionary
camera_dict = edict({
"image_height": resolution * ssaa, # Apply supersampling if enabled
"image_width": resolution * ssaa,
"FoVx": fovx,
"FoVy": fovy,
"znear": near,
"zfar": far,
"world_view_transform": view.T.contiguous(), # Transpose for OpenGL convention
"projection_matrix": perspective.T.contiguous(),
"full_proj_transform": (perspective @ view).T.contiguous(), # Combined projection and view
"camera_center": camera
})
# Perform the actual rendering using the 3D Gaussian rasterizer
render_ret = render(camera_dict, gausssian, self.pipe, self.bg_color,
override_color=colors_overwrite, scaling_modifier=self.pipe.scale_modifier)
# Handle supersampling by downsampling the high-resolution render to the target resolution
if ssaa > 1:
# Use bilinear interpolation with antialiasing to downsample the image
render_ret.render = F.interpolate(render_ret.render[None],
size=(resolution, resolution),
mode='bilinear',
align_corners=False,
antialias=True).squeeze()
# Return the final rendered color image
ret = edict({
'color': render_ret['render']
})
return ret
|