correct some comments
Browse files- encoding.py +0 -41
- main_nerf.py +10 -10
- nerf/network_grid.py +1 -0
- nerf/utils.py +1 -1
encoding.py
CHANGED
@@ -2,46 +2,6 @@ import torch
|
|
2 |
import torch.nn as nn
|
3 |
import torch.nn.functional as F
|
4 |
|
5 |
-
class FreqEncoder(nn.Module):
|
6 |
-
def __init__(self, input_dim, max_freq_log2, N_freqs,
|
7 |
-
log_sampling=True, include_input=True,
|
8 |
-
periodic_fns=(torch.sin, torch.cos)):
|
9 |
-
|
10 |
-
super().__init__()
|
11 |
-
|
12 |
-
self.input_dim = input_dim
|
13 |
-
self.include_input = include_input
|
14 |
-
self.periodic_fns = periodic_fns
|
15 |
-
|
16 |
-
self.output_dim = 0
|
17 |
-
if self.include_input:
|
18 |
-
self.output_dim += self.input_dim
|
19 |
-
|
20 |
-
self.output_dim += self.input_dim * N_freqs * len(self.periodic_fns)
|
21 |
-
|
22 |
-
if log_sampling:
|
23 |
-
self.freq_bands = 2. ** torch.linspace(0., max_freq_log2, N_freqs)
|
24 |
-
else:
|
25 |
-
self.freq_bands = torch.linspace(2. ** 0., 2. ** max_freq_log2, N_freqs)
|
26 |
-
|
27 |
-
self.freq_bands = self.freq_bands.numpy().tolist()
|
28 |
-
|
29 |
-
def forward(self, input, **kwargs):
|
30 |
-
|
31 |
-
out = []
|
32 |
-
if self.include_input:
|
33 |
-
out.append(input)
|
34 |
-
|
35 |
-
for i in range(len(self.freq_bands)):
|
36 |
-
freq = self.freq_bands[i]
|
37 |
-
for p_fn in self.periodic_fns:
|
38 |
-
out.append(p_fn(input * freq))
|
39 |
-
|
40 |
-
out = torch.cat(out, dim=-1)
|
41 |
-
|
42 |
-
|
43 |
-
return out
|
44 |
-
|
45 |
def get_encoder(encoding, input_dim=3,
|
46 |
multires=6,
|
47 |
degree=4,
|
@@ -52,7 +12,6 @@ def get_encoder(encoding, input_dim=3,
|
|
52 |
return lambda x, **kwargs: x, input_dim
|
53 |
|
54 |
elif encoding == 'frequency':
|
55 |
-
#encoder = FreqEncoder(input_dim=input_dim, max_freq_log2=multires-1, N_freqs=multires, log_sampling=True)
|
56 |
from freqencoder import FreqEncoder
|
57 |
encoder = FreqEncoder(input_dim=input_dim, degree=multires)
|
58 |
|
|
|
2 |
import torch.nn as nn
|
3 |
import torch.nn.functional as F
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def get_encoder(encoding, input_dim=3,
|
6 |
multires=6,
|
7 |
degree=4,
|
|
|
12 |
return lambda x, **kwargs: x, input_dim
|
13 |
|
14 |
elif encoding == 'frequency':
|
|
|
15 |
from freqencoder import FreqEncoder
|
16 |
encoder = FreqEncoder(input_dim=input_dim, degree=multires)
|
17 |
|
main_nerf.py
CHANGED
@@ -5,8 +5,6 @@ from nerf.provider import NeRFDataset
|
|
5 |
from nerf.utils import *
|
6 |
from optimizer import Shampoo
|
7 |
|
8 |
-
from nerf.sd import StableDiffusion
|
9 |
-
from nerf.clip import CLIP
|
10 |
from nerf.gui import NeRFGUI
|
11 |
|
12 |
# torch.autograd.set_detect_anomaly(True)
|
@@ -14,8 +12,8 @@ from nerf.gui import NeRFGUI
|
|
14 |
if __name__ == '__main__':
|
15 |
|
16 |
parser = argparse.ArgumentParser()
|
17 |
-
parser.add_argument('--text', help="text prompt")
|
18 |
-
parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --
|
19 |
parser.add_argument('--test', action='store_true', help="test mode")
|
20 |
parser.add_argument('--workspace', type=str, default='workspace')
|
21 |
parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
|
@@ -31,7 +29,7 @@ if __name__ == '__main__':
|
|
31 |
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
|
32 |
parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
|
33 |
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
|
34 |
-
parser.add_argument('--albedo_iters', type=int, default=15000, help="training iters")
|
35 |
# model options
|
36 |
parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
|
37 |
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
|
@@ -39,8 +37,8 @@ if __name__ == '__main__':
|
|
39 |
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
|
40 |
parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
|
41 |
# rendering resolution in training
|
42 |
-
parser.add_argument('--w', type=int, default=64, help="render width for
|
43 |
-
parser.add_argument('--h', type=int, default=64, help="render height for
|
44 |
|
45 |
### dataset options
|
46 |
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
|
@@ -48,7 +46,7 @@ if __name__ == '__main__':
|
|
48 |
parser.add_argument('--min_near', type=float, default=0.1, help="minimum near distance for camera")
|
49 |
parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
|
50 |
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
|
51 |
-
parser.add_argument('--dir_text', action='store_true', help="direction
|
52 |
|
53 |
### GUI options
|
54 |
parser.add_argument('--gui', action='store_true', help="start a GUI")
|
@@ -58,7 +56,7 @@ if __name__ == '__main__':
|
|
58 |
parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
|
59 |
parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction")
|
60 |
parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction")
|
61 |
-
parser.add_argument('--max_spp', type=int, default=
|
62 |
|
63 |
opt = parser.parse_args()
|
64 |
|
@@ -87,7 +85,7 @@ if __name__ == '__main__':
|
|
87 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
88 |
|
89 |
if opt.test:
|
90 |
-
guidance = None #
|
91 |
|
92 |
trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
|
93 |
|
@@ -103,8 +101,10 @@ if __name__ == '__main__':
|
|
103 |
else:
|
104 |
|
105 |
if opt.guidance == 'stable-diffusion':
|
|
|
106 |
guidance = StableDiffusion(device)
|
107 |
elif opt.guidance == 'clip':
|
|
|
108 |
guidance = CLIP(device)
|
109 |
else:
|
110 |
raise NotImplementedError(f'--guidance {opt.guidance} is not implemented.')
|
|
|
5 |
from nerf.utils import *
|
6 |
from optimizer import Shampoo
|
7 |
|
|
|
|
|
8 |
from nerf.gui import NeRFGUI
|
9 |
|
10 |
# torch.autograd.set_detect_anomaly(True)
|
|
|
12 |
if __name__ == '__main__':
|
13 |
|
14 |
parser = argparse.ArgumentParser()
|
15 |
+
parser.add_argument('--text', default=None, help="text prompt")
|
16 |
+
parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
|
17 |
parser.add_argument('--test', action='store_true', help="test mode")
|
18 |
parser.add_argument('--workspace', type=str, default='workspace')
|
19 |
parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
|
|
|
29 |
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
|
30 |
parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
|
31 |
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
|
32 |
+
parser.add_argument('--albedo_iters', type=int, default=15000, help="training iters that only use albedo shading")
|
33 |
# model options
|
34 |
parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
|
35 |
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
|
|
|
37 |
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
|
38 |
parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
|
39 |
# rendering resolution in training
|
40 |
+
parser.add_argument('--w', type=int, default=64, help="render width for NeRF in training")
|
41 |
+
parser.add_argument('--h', type=int, default=64, help="render height for NeRF in training")
|
42 |
|
43 |
### dataset options
|
44 |
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
|
|
|
46 |
parser.add_argument('--min_near', type=float, default=0.1, help="minimum near distance for camera")
|
47 |
parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
|
48 |
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
|
49 |
+
parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
|
50 |
|
51 |
### GUI options
|
52 |
parser.add_argument('--gui', action='store_true', help="start a GUI")
|
|
|
56 |
parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
|
57 |
parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction")
|
58 |
parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction")
|
59 |
+
parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
|
60 |
|
61 |
opt = parser.parse_args()
|
62 |
|
|
|
85 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
86 |
|
87 |
if opt.test:
|
88 |
+
guidance = None # no need to load guidance model at test
|
89 |
|
90 |
trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
|
91 |
|
|
|
101 |
else:
|
102 |
|
103 |
if opt.guidance == 'stable-diffusion':
|
104 |
+
from nerf.sd import StableDiffusion
|
105 |
guidance = StableDiffusion(device)
|
106 |
elif opt.guidance == 'clip':
|
107 |
+
from nerf.clip import CLIP
|
108 |
guidance = CLIP(device)
|
109 |
else:
|
110 |
raise NotImplementedError(f'--guidance {opt.guidance} is not implemented.')
|
nerf/network_grid.py
CHANGED
@@ -64,6 +64,7 @@ class NeRFNetwork(NeRFRenderer):
|
|
64 |
else:
|
65 |
self.bg_net = None
|
66 |
|
|
|
67 |
def gaussian(self, x):
|
68 |
# x: [B, N, 3]
|
69 |
|
|
|
64 |
else:
|
65 |
self.bg_net = None
|
66 |
|
67 |
+
# add a density blob to the scene center
|
68 |
def gaussian(self, x):
|
69 |
# x: [B, N, 3]
|
70 |
|
nerf/utils.py
CHANGED
@@ -209,6 +209,7 @@ class Trainer(object):
|
|
209 |
self.guidance = guidance
|
210 |
|
211 |
if self.guidance is not None:
|
|
|
212 |
|
213 |
for p in self.guidance.parameters():
|
214 |
p.requires_grad = False
|
@@ -401,7 +402,6 @@ class Trainer(object):
|
|
401 |
|
402 |
return pred_rgb, pred_depth, loss
|
403 |
|
404 |
-
# moved out bg_color and perturb for more flexible control...
|
405 |
def test_step(self, data, bg_color=None, perturb=False):
|
406 |
rays_o = data['rays_o'] # [B, N, 3]
|
407 |
rays_d = data['rays_d'] # [B, N, 3]
|
|
|
209 |
self.guidance = guidance
|
210 |
|
211 |
if self.guidance is not None:
|
212 |
+
assert ref_text is not None, 'Training must provide a text prompt!'
|
213 |
|
214 |
for p in self.guidance.parameters():
|
215 |
p.requires_grad = False
|
|
|
402 |
|
403 |
return pred_rgb, pred_depth, loss
|
404 |
|
|
|
405 |
def test_step(self, data, bg_color=None, perturb=False):
|
406 |
rays_o = data['rays_o'] # [B, N, 3]
|
407 |
rays_d = data['rays_d'] # [B, N, 3]
|