Spaces:

jev-aleks
/

SceneDINO

Running on Zero

App Files Files Community

SceneDINO / scenedino /renderer /nerf.py

jev-aleks

scenedino init

9e15541 15 days ago

raw

history blame contribute delete

25.9 kB

	"""
	NeRF differentiable renderer.
	References:
	https://github.com/bmild/nerf
	https://github.com/kwea123/nerf_pl
	"""
	import torch
	import torch.autograd.profiler as profiler
	from dotmap import DotMap


	class _RenderWrapper(torch.nn.Module):
	def __init__(self, net, renderer, simple_output):
	super().__init__()
	self.net = net
	self.renderer = renderer
	self.simple_output = simple_output

	def forward(
	self,
	rays,
	want_weights=False,
	want_alphas=False,
	want_z_samps=False,
	want_rgb_samps=False,
	sample_from_dist=None,
	):
	if rays.shape[0] == 0:
	return (
	torch.zeros(0, 3, device=rays.device),
	torch.zeros(0, device=rays.device),
	)

	outputs = self.renderer(
	self.net,
	rays,
	want_weights=want_weights and not self.simple_output,
	want_alphas=want_alphas and not self.simple_output,
	want_z_samps=want_z_samps and not self.simple_output,
	want_rgb_samps=want_rgb_samps and not self.simple_output,
	sample_from_dist=sample_from_dist,
	)
	if self.simple_output:
	if self.renderer.using_fine:
	rgb = outputs.fine.rgb
	depth = outputs.fine.depth
	else:
	rgb = outputs.coarse.rgb
	depth = outputs.coarse.depth
	return rgb, depth
	else:
	# Make DotMap to dict to support DataParallel
	return outputs.toDict()


	class NeRFRenderer(torch.nn.Module):
	"""
	NeRF differentiable renderer
	:param n_coarse number of coarse (binned uniform) samples
	:param n_fine number of fine (importance) samples
	:param n_fine_depth number of expected depth samples
	:param noise_std noise to add to sigma. We do not use it
	:param depth_std noise for depth samples
	:param eval_batch_size ray batch size for evaluation
	:param white_bkgd if true, background color is white; else black
	:param lindisp if to use samples linear in disparity instead of distance
	:param sched ray sampling schedule. list containing 3 lists of equal length.
	sched[0] is list of iteration numbers,
	sched[1] is list of coarse sample numbers,
	sched[2] is list of fine sample numbers
	"""

	def __init__(
	self,
	n_coarse=128,
	n_fine=0,
	n_fine_depth=0,
	noise_std=0.0,
	depth_std=0.01,
	eval_batch_size=100000,
	white_bkgd=False,
	lindisp=False,
	sched=None, # ray sampling schedule for coarse and fine rays
	hard_alpha_cap=False,
	render_mode="volumetric",
	surface_sigmoid_scale=.1,
	render_flow=False,
	normalize_dino=False,
	):
	super().__init__()
	self.n_coarse, self.n_fine = n_coarse, n_fine
	self.n_fine_depth = n_fine_depth

	self.noise_std = noise_std
	self.depth_std = depth_std

	self.eval_batch_size = eval_batch_size
	self.white_bkgd = white_bkgd
	self.lindisp = lindisp
	if lindisp:
	print("Using linear displacement rays")
	self.using_fine = n_fine > 0
	self.sched = sched
	if sched is not None and len(sched) == 0:
	self.sched = None
	self.register_buffer(
	"iter_idx", torch.tensor(0, dtype=torch.long), persistent=True
	)
	self.register_buffer(
	"last_sched", torch.tensor(0, dtype=torch.long), persistent=True
	)
	self.hard_alpha_cap = hard_alpha_cap

	assert render_mode in ("volumetric", "surface", "neus")
	self.render_mode = render_mode
	self.only_surface_color = (self.render_mode == "surface")
	self.surface_sigmoid_scale = surface_sigmoid_scale
	self.render_flow = render_flow
	self.normalize_dino = normalize_dino

	def sample_coarse(self, rays):
	"""
	Stratified sampling. Note this is different from original NeRF slightly.
	:param rays ray [origins (3), directions (3), near (1), far (1)] (B, 8)
	:return (B, Kc)
	"""
	device = rays.device
	near, far = rays[:, 6:7], rays[:, 7:8] # (B, 1)

	step = 1.0 / self.n_coarse
	B = rays.shape[0]
	z_steps = torch.linspace(0, 1 - step, self.n_coarse, device=device) # (Kc)
	z_steps = z_steps.unsqueeze(0).repeat(B, 1) # (B, Kc)
	z_steps += torch.rand_like(z_steps) * step
	if not self.lindisp: # Use linear sampling in depth space
	return near * (1 - z_steps) + far * z_steps # (B, Kf)
	else: # Use linear sampling in disparity space
	return 1 / (1 / near * (1 - z_steps) + 1 / far * z_steps) # (B, Kf)

	# Use linear sampling in depth space
	return near * (1 - z_steps) + far * z_steps # (B, Kc)

	def sample_coarse_from_dist(self, rays, weights, z_samp):
	device = rays.device
	B = rays.shape[0]

	num_bins = weights.shape[-1]
	num_samples = self.n_coarse

	weights = weights.detach() + 1e-5 # Prevent division by zero
	pdf = weights / torch.sum(weights, -1, keepdim=True) # (B, Kc)
	cdf = torch.cumsum(pdf, -1) # (B, Kc)
	cdf = torch.cat([torch.zeros_like(cdf[:, :1]), cdf], -1) # (B, Kc+1)

	u = torch.rand(B, num_samples, dtype=torch.float32, device=device) # (B, Kf)
	interval_ids = torch.searchsorted(cdf, u, right=True) - 1 # (B, Kf)
	interval_ids = torch.clamp(interval_ids, 0, num_samples - 1)
	interval_interp = torch.rand_like(interval_ids, dtype=torch.float32)

	# z_samps describe the centers of the respective histogram bins. Therefore, we have to extend them to the left and right
	if self.lindisp:
	z_samp = 1 / z_samp

	centers = 0.5 * (z_samp[:, 1:] + z_samp[:, :-1])
	interval_borders = torch.cat((z_samp[:, :1], centers, z_samp[:, -1:]), dim=-1)

	left_border = torch.gather(interval_borders, dim=-1, index=interval_ids)
	right_border = torch.gather(interval_borders, dim=-1, index=interval_ids + 1)

	z_samp_new = (
	left_border * (1 - interval_interp) + right_border * interval_interp
	)

	if self.lindisp:
	z_samp_new = 1 / z_samp_new

	assert not torch.any(torch.isnan(z_samp_new))

	return z_samp_new

	def sample_fine(self, rays, weights):
	"""min
	Weighted stratified (importance) sample
	:param rays ray [origins (3), directions (3), near (1), far (1)] (B, 8)
	:param weights (B, Kc)
	:return (B, Kf-Kfd)
	"""
	device = rays.device
	B = rays.shape[0]

	weights = weights.detach() + 1e-5 # Prevent division by zero
	pdf = weights / torch.sum(weights, -1, keepdim=True) # (B, Kc)
	cdf = torch.cumsum(pdf, -1) # (B, Kc)
	cdf = torch.cat([torch.zeros_like(cdf[:, :1]), cdf], -1) # (B, Kc+1)

	u = torch.rand(
	B, self.n_fine - self.n_fine_depth, dtype=torch.float32, device=device
	) # (B, Kf)
	inds = torch.searchsorted(cdf, u, right=True).float() - 1.0 # (B, Kf)
	inds = torch.clamp_min(inds, 0.0)

	z_steps = (inds + torch.rand_like(inds)) / self.n_coarse # (B, Kf)

	near, far = rays[:, 6:7], rays[:, 7:8] # (B, 1)
	if not self.lindisp: # Use linear sampling in depth space
	z_samp = near * (1 - z_steps) + far * z_steps # (B, Kf)
	else: # Use linear sampling in disparity space
	z_samp = 1 / (1 / near * (1 - z_steps) + 1 / far * z_steps) # (B, Kf)

	assert not torch.any(torch.isnan(z_samp))

	return z_samp

	def sample_fine_depth(self, rays, depth):
	"""
	Sample around specified depth
	:param rays ray [origins (3), directions (3), near (1), far (1)] (B, 8)
	:param depth (B)
	:return (B, Kfd)
	"""
	z_samp = depth.unsqueeze(1).repeat((1, self.n_fine_depth))
	z_samp += torch.randn_like(z_samp) * self.depth_std
	# Clamp does not support tensor bounds
	z_samp = torch.max(torch.min(z_samp, rays[:, 7:8]), rays[:, 6:7])

	assert not torch.any(torch.isnan(z_samp))

	return z_samp

	def composite(self, model, rays, z_samp, coarse=True, sb=0):
	"""
	Render RGB and depth for each ray using NeRF alpha-compositing formula,
	given sampled positions along each ray (see sample_*)
	:param model should return (B, (r, g, b, sigma)) when called with (B, (x, y, z))
	should also support 'coarse' boolean argument
	:param rays ray [origins (3), directions (3), near (1), far (1)] (B, 8)
	:param z_samp z positions sampled for each ray (B, K)
	:param coarse whether to evaluate using coarse NeRF
	:param sb super-batch dimension; 0 = disable
	:return weights (B, K), rgb (B, 3), depth (B)
	"""
	with profiler.record_function("renderer_composite"):
	B, K = z_samp.shape
	r_dim = rays.shape[-1]

	deltas = z_samp[:, 1:] - z_samp[:, :-1] # (B, K-1)
	delta_inf = 1e10 * torch.ones_like(deltas[:, :1]) # infty (B, 1)
	# delta_inf = rays[:, -1:] - z_samp[:, -1:]
	deltas = torch.cat([deltas, delta_inf], -1) # (B, K)

	# (B, K, 3)
	points = rays[:, None, :3] + z_samp.unsqueeze(2) * rays[:, None, 3:6]
	points = points.reshape(-1, 3) # (B*K, 3)

	if r_dim > 8:
	ray_info = rays[:, None, 8:].expand(-1, K, -1)
	else:
	ray_info = None

	if hasattr(model, "use_viewdirs"):
	use_viewdirs = model.use_viewdirs
	else:
	use_viewdirs = None

	viewdirs_all = []
	rgbs_all, invalid_all, sigmas_all, extras_all, state_dicts_all = [], [], [], [], []

	if sb > 0:
	points = points.reshape(
	sb, -1, 3
	) # (SB, B'*K, 3) B' is real ray batch size

	if ray_info is not None:
	ray_info = ray_info.reshape(sb, -1, ray_info.shape[-1])

	eval_batch_dim = 1
	eval_batch_size = (self.eval_batch_size - 1) // sb + 1
	else:
	eval_batch_size = self.eval_batch_size
	eval_batch_dim = 0

	split_points = torch.split(points, eval_batch_size, dim=eval_batch_dim)

	if ray_info is not None:
	split_ray_infos = torch.split(ray_info, eval_batch_size, dim=eval_batch_dim)
	else:
	split_ray_infos = [None for _ in split_points]

	if use_viewdirs:
	dim1 = K
	viewdirs = rays[:, None, 3:6].expand(-1, dim1, -1)
	if sb > 0:
	viewdirs = viewdirs.reshape(sb, -1, 3) # (SB, B'*K, 3)
	else:
	viewdirs = viewdirs.reshape(-1, 3) # (B*K, 3)
	split_viewdirs = torch.split(
	viewdirs, eval_batch_size, dim=eval_batch_dim
	)

	for i, pnts in enumerate(split_points):
	dirs = split_viewdirs[i]
	infos = split_ray_infos[i]

	rgbs, invalid, sigmas, extras, state_dict = model(
	pnts, coarse=coarse, viewdirs=dirs, only_density=self.only_surface_color, ray_info=ray_info, render_flow=self.render_flow
	)
	rgbs_all.append(rgbs)
	invalid_all.append(invalid)
	sigmas_all.append(sigmas)
	extras_all.append(extras)

	viewdirs_all.append(dirs)
	if state_dict is not None:
	state_dicts_all.append(state_dict)
	else:
	for i, pnts in enumerate(split_points):
	infos = split_ray_infos[i]

	rgbs, invalid, sigmas, extras, state_dict = model(pnts, coarse=coarse, only_density=self.only_surface_color, ray_info=infos, render_flow=self.render_flow)
	rgbs_all.append(rgbs)
	invalid_all.append(invalid)
	sigmas_all.append(sigmas)
	extras_all.append(extras)

	if state_dict is not None:
	state_dicts_all.append(state_dict)

	points, viewdirs = None, None

	# (BK, 4) OR (SB, B'K, 4)
	if not self.only_surface_color:
	rgbs = torch.cat(rgbs_all, dim=eval_batch_dim)
	else:
	rgbs = None
	invalid = torch.cat(invalid_all, dim=eval_batch_dim)
	sigmas = torch.cat(sigmas_all, dim=eval_batch_dim)

	if not extras_all[0] is None:
	extras = torch.cat(extras_all, dim=eval_batch_dim)
	else:
	extras = None

	deltas = deltas.float()
	sigmas = sigmas.float()

	if (
	state_dicts_all is not None and len(state_dicts_all) != 0
	): ## not empty in a list
	state_dicts = {
	key: torch.cat(
	[state_dicts[key] for state_dicts in state_dicts_all],
	dim=eval_batch_dim,
	)
	for key in state_dicts_all[0].keys()
	}
	else:
	state_dicts = None

	if rgbs is not None:
	rgbs = rgbs.reshape(B, K, -1) # (B, K, 4 or 5)
	invalid = invalid.reshape(B, K, -1)
	sigmas = sigmas.reshape(B, K)

	if extras is not None:
	extras = extras.reshape(B, K, -1)

	if state_dicts is not None:
	state_dicts = {
	key: value.reshape(B, K, *value.shape[2:])
	for key, value in state_dicts.items()
	} # BxKx... (BxKxn_viewsx...)

	if self.training and self.noise_std > 0.0:
	sigmas = sigmas + torch.randn_like(sigmas) * self.noise_std

	alphas = 1 - torch.exp(
	-deltas.abs() * torch.relu(sigmas)
	) # (B, K) (delta should be positive anyways)

	if self.hard_alpha_cap:
	alphas[:, -1] = 1

	deltas, sigmas = None, None

	alphas_shifted = torch.cat(
	[torch.ones_like(alphas[:, :1]), 1 - alphas + 1e-10], -1
	) # (B, K+1) = [1, a1, a2, ...]
	T = torch.cumprod(alphas_shifted, -1) # (B)
	weights = alphas * T[:, :-1] # (B, K)
	# alphas = None
	alphas_shifted = None

	depth_final = torch.sum(weights * z_samp, -1) # (B)
	state_dicts["dino_features"] = torch.sum(state_dicts["dino_features"].mul_(weights.unsqueeze(-1)), -2)

	if self.render_mode == "neus":
	# dist_from_surf = z_samp - depth_final[..., None]
	indices = torch.arange(0, weights.shape[-1], device=weights.device, dtype=weights.dtype).unsqueeze(0)
	surface_index = torch.sum(weights * indices, dim=-1, keepdim=True)
	dist_from_surf = surface_index - indices
	weights = torch.exp(-.5 * (dist_from_surf * self.surface_sigmoid_scale) ** 2)
	weights = weights / torch.sum(weights, dim=-1, keepdim=True)

	if not self.only_surface_color:
	rgb_final = torch.sum(weights.unsqueeze(-1) * rgbs, -2) # (B, 3)
	else:
	surface_points = rays[:, None, :3] + depth_final[:, None, None] * rays[:, None, 3:6]
	surface_points = surface_points.reshape(sb, -1, 3)

	if ray_info is not None:
	ray_info = ray_info.reshape(sb, -1, K, ray_info.shape[-1])[:, :, 0, :]

	rgb_final, invalid_colors = model.sample_colors(surface_points, ray_info=ray_info, render_flow=self.render_flow)
	rgb_final = rgb_final.permute(0, 2, 1, 3).reshape(B, -1)
	invalid_colors = invalid_colors.permute(0, 2, 1, 3).reshape(B, 1, -1)
	invalid = ((invalid > .5) \| invalid_colors).float()

	if self.white_bkgd:
	# White background
	pix_alpha = weights.sum(dim=1) # (B), pixel alpha
	rgb_final = rgb_final + 1 - pix_alpha.unsqueeze(-1) # (B, 3)

	if extras is not None:
	extras_final = torch.sum(weights.unsqueeze(-1) * extras, -2) # (B, extras)
	else:
	extras_final = None

	for name, x in [("weights", weights), ("rgb_final", rgb_final), ("depth_final", depth_final), ("alphas", alphas), ("invalid", invalid), ("z_samp", z_samp)]:
	if torch.any(torch.isnan(x)):
	print(f"Detected NaN in {name} ({x.dtype}):")
	print(x)
	exit()

	if ray_info is not None:
	ray_info = rays[:, None, 8:]

	# return (weights, rgb_final, depth_final, alphas, invalid, z_samp, rgbs, viewdirs)
	return (
	weights,
	rgb_final,
	depth_final,
	alphas,
	invalid,
	z_samp,
	rgbs,
	ray_info,
	extras_final,
	state_dicts,
	)

	def forward(
	self,
	model,
	rays,
	want_weights=False,
	want_alphas=False,
	want_z_samps=False,
	want_rgb_samps=False,
	sample_from_dist=None,
	):
	"""
	:model nerf model, should return (SB, B, (r, g, b, sigma))
	when called with (SB, B, (x, y, z)), for multi-object:
	SB = 'super-batch' = size of object batch,
	B = size of per-object ray batch.
	Should also support 'coarse' boolean argument for coarse NeRF.
	:param rays ray spec [origins (3), directions (3), near (1), far (1)] (SB, B, 8)
	:param want_weights if true, returns compositing weights (SB, B, K)
	:return render dict
	"""
	with profiler.record_function("renderer_forward"):
	if self.sched is not None and self.last_sched.item() > 0:
	self.n_coarse = self.sched[1][self.last_sched.item() - 1]
	self.n_fine = self.sched[2][self.last_sched.item() - 1]

	assert len(rays.shape) == 3
	superbatch_size = rays.shape[0]
	r_dim = rays.shape[-1]

	rays = rays.reshape(-1, r_dim) # (SB * B, 8)

	if sample_from_dist is None:
	z_coarse = self.sample_coarse(rays) # (B, Kc)
	else:
	prop_weights, prop_z_samp = sample_from_dist
	n_samples = prop_weights.shape[-1]
	prop_weights = prop_weights.reshape(-1, n_samples)
	prop_z_samp = prop_z_samp.reshape(-1, n_samples)
	z_coarse = self.sample_coarse_from_dist(rays, prop_weights, prop_z_samp)
	z_coarse, _ = torch.sort(z_coarse, dim=-1)
	coarse_composite = self.composite(
	model,
	rays,
	z_coarse,
	coarse=True,
	sb=superbatch_size,
	)

	outputs = DotMap(
	coarse=self._format_outputs(
	coarse_composite,
	superbatch_size,
	want_weights=want_weights,
	want_alphas=want_alphas,
	want_z_samps=want_z_samps,
	want_rgb_samps=want_rgb_samps,
	),
	)
	outputs.state_dict = coarse_composite[-1]

	if self.using_fine:
	all_samps = [z_coarse]
	if self.n_fine - self.n_fine_depth > 0:
	all_samps.append(
	self.sample_fine(rays, coarse_composite[0].detach())
	) # (B, Kf - Kfd)
	if self.n_fine_depth > 0:
	all_samps.append(
	self.sample_fine_depth(rays, coarse_composite[2])
	) # (B, Kfd)
	z_combine = torch.cat(all_samps, dim=-1) # (B, Kc + Kf)
	z_combine_sorted, argsort = torch.sort(z_combine, dim=-1)
	fine_composite = self.composite(
	model,
	rays,
	z_combine_sorted,
	coarse=False,
	sb=superbatch_size,
	)
	outputs.fine = self._format_outputs(
	fine_composite,
	superbatch_size,
	want_weights=want_weights,
	want_alphas=want_alphas,
	want_z_samps=want_z_samps,
	want_rgb_samps=want_rgb_samps,
	)

	return outputs

	def _format_outputs(
	self,
	rendered_outputs,
	superbatch_size,
	want_weights=False,
	want_alphas=False,
	want_z_samps=False,
	want_rgb_samps=False,
	):
	(
	weights,
	rgb_final,
	depth,
	alphas,
	invalid,
	z_samps,
	rgb_samps,
	ray_info,
	extras,
	state_dict,
	) = rendered_outputs
	n_smps = weights.shape[-1]
	out_d_rgb = rgb_final.shape[-1]
	out_d_i = invalid.shape[-1]
	out_d_dino = state_dict["dino_features"].shape[-1]

	if superbatch_size > 0:
	rgb_final = rgb_final.reshape(superbatch_size, -1, out_d_rgb)
	depth = depth.reshape(superbatch_size, -1)
	invalid = invalid.reshape(superbatch_size, -1, n_smps, out_d_i)
	ret_dict = DotMap(rgb=rgb_final, depth=depth, invalid=invalid)
	if ray_info is not None:
	ri_shape = ray_info.shape[-1]
	ray_info = ray_info.reshape(superbatch_size, -1, ri_shape)
	ret_dict.ray_info = ray_info
	if extras is not None:
	extras_shape = extras.shape[-1]
	extras = extras.reshape(superbatch_size, -1, extras_shape)
	ret_dict.extras = extras
	if want_weights:
	weights = weights.reshape(superbatch_size, -1, n_smps)
	ret_dict.weights = weights
	if want_alphas:
	alphas = alphas.reshape(superbatch_size, -1, n_smps)
	ret_dict.alphas = alphas
	if want_z_samps:
	z_samps = z_samps.reshape(superbatch_size, -1, n_smps)
	ret_dict.z_samps = z_samps
	if want_rgb_samps:
	rgb_samps = rgb_samps.reshape(superbatch_size, -1, n_smps, out_d_rgb)
	ret_dict.rgb_samps = rgb_samps
	if "dino_features" in state_dict:
	dino_features = state_dict["dino_features"].reshape(superbatch_size, -1, out_d_dino)
	ret_dict.dino_features = dino_features
	if "invalid_features" in state_dict:
	invalid_features = state_dict["invalid_features"].reshape(superbatch_size, -1, n_smps, out_d_i)
	ret_dict.invalid_features = invalid_features
	return ret_dict

	def sched_step(self, steps=1):
	"""
	Called each training iteration to update sample numbers
	according to schedule
	"""
	if self.sched is None:
	return
	self.iter_idx += steps
	while (
	self.last_sched.item() < len(self.sched[0])
	and self.iter_idx.item() >= self.sched[0][self.last_sched.item()]
	):
	self.n_coarse = self.sched[1][self.last_sched.item()]
	self.n_fine = self.sched[2][self.last_sched.item()]
	print(
	"INFO: NeRF sampling resolution changed on schedule ==> c",
	self.n_coarse,
	"f",
	self.n_fine,
	)
	self.last_sched += 1

	@classmethod
	def from_conf(cls, conf, white_bkgd=False, eval_batch_size=100000):
	return cls(
	conf.get("n_coarse", 128),
	conf.get("n_fine", 0),
	n_fine_depth=conf.get("n_fine_depth", 0),
	noise_std=conf.get("noise_std", 0.0),
	depth_std=conf.get("depth_std", 0.01),
	white_bkgd=conf.get("white_bkgd", white_bkgd),
	lindisp=conf.get("lindisp", True),
	eval_batch_size=conf.get("eval_batch_size", eval_batch_size),
	sched=conf.get("sched", None),
	hard_alpha_cap=conf.get("hard_alpha_cap", False),
	render_mode=conf.get("render_mode", "volumetric"),
	surface_sigmoid_scale=conf.get("surface_sigmoid_scale", 1),
	render_flow=conf.get("render_flow", False),
	normalize_dino=conf.get("normalize_dino", False),
	)

	def bind_parallel(self, net, gpus=None, simple_output=False):
	"""
	Returns a wrapper module compatible with DataParallel.
	Specifically, it renders rays with this renderer
	but always using the given network instance.
	Specify a list of GPU ids in 'gpus' to apply DataParallel automatically.
	:param net A PixelNeRF network
	:param gpus list of GPU ids to parallize to. If length is 1,
	does not parallelize
	:param simple_output only returns rendered (rgb, depth) instead of the
	full render output map. Saves data tranfer cost.
	:return torch module
	"""
	wrapped = _RenderWrapper(net, self, simple_output=simple_output)
	if gpus is not None and len(gpus) > 1:
	print("Using multi-GPU", gpus)
	wrapped = torch.nn.DataParallel(wrapped, gpus, dim=1)
	return wrapped