ashawkey commited on
Commit
b4991f6
1 Parent(s): a64a526

update requirements

Browse files
Files changed (5) hide show
  1. main.py +151 -0
  2. nerf/renderer.py +1 -1
  3. readme.md +7 -6
  4. requirements.txt +2 -1
  5. scripts/run.sh +3 -3
main.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import argparse
3
+
4
+ from nerf.provider import NeRFDataset
5
+ from nerf.utils import *
6
+ from optimizer import Shampoo
7
+
8
+ from nerf.gui import NeRFGUI
9
+
10
+ # torch.autograd.set_detect_anomaly(True)
11
+
12
+ if __name__ == '__main__':
13
+
14
+ parser = argparse.ArgumentParser()
15
+ parser.add_argument('--text', default=None, help="text prompt")
16
+ parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
17
+ parser.add_argument('-O2', action='store_true', help="equals --fp16 --dir_text")
18
+ parser.add_argument('--test', action='store_true', help="test mode")
19
+ parser.add_argument('--save_mesh', action='store_true', help="export an obj mesh with texture")
20
+ parser.add_argument('--workspace', type=str, default='workspace')
21
+ parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
22
+ parser.add_argument('--seed', type=int, default=0)
23
+
24
+ ### training options
25
+ parser.add_argument('--iters', type=int, default=15000, help="training iters")
26
+ parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
27
+ parser.add_argument('--ckpt', type=str, default='latest')
28
+ parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
29
+ parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
30
+ parser.add_argument('--num_steps', type=int, default=256, help="num steps sampled per ray (only valid when not using --cuda_ray)")
31
+ parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
32
+ parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
33
+ parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
34
+ parser.add_argument('--albedo_iters', type=int, default=15000, help="training iters that only use albedo shading")
35
+ # model options
36
+ parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
37
+ parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
38
+ # network backbone
39
+ parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
40
+ parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
41
+ # rendering resolution in training
42
+ parser.add_argument('--w', type=int, default=128, help="render width for NeRF in training")
43
+ parser.add_argument('--h', type=int, default=128, help="render height for NeRF in training")
44
+
45
+ ### dataset options
46
+ parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
47
+ parser.add_argument('--dt_gamma', type=float, default=0, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
48
+ parser.add_argument('--min_near', type=float, default=0.1, help="minimum near distance for camera")
49
+ parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
50
+ parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
51
+ parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
52
+ parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
53
+ parser.add_argument('--angle_front', type=float, default=30, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
54
+
55
+ parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
56
+ parser.add_argument('--lambda_orient', type=float, default=1e-2, help="loss scale for orientation")
57
+
58
+ ### GUI options
59
+ parser.add_argument('--gui', action='store_true', help="start a GUI")
60
+ parser.add_argument('--W', type=int, default=800, help="GUI width")
61
+ parser.add_argument('--H', type=int, default=800, help="GUI height")
62
+ parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
63
+ parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
64
+ parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction in [0, 180], corresponding to elevation [90, -90]")
65
+ parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction in [0, 360), azimuth")
66
+ parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
67
+
68
+ opt = parser.parse_args()
69
+
70
+ if opt.O:
71
+ opt.fp16 = True
72
+ opt.cuda_ray = True
73
+ opt.dir_text = True
74
+ elif opt.O2:
75
+ opt.fp16 = True
76
+ opt.dir_text = True
77
+
78
+ if opt.backbone == 'vanilla':
79
+ from nerf.network import NeRFNetwork
80
+ elif opt.backbone == 'tcnn':
81
+ from nerf.network_tcnn import NeRFNetwork
82
+ elif opt.backbone == 'grid':
83
+ from nerf.network_grid import NeRFNetwork
84
+ else:
85
+ raise NotImplementedError(f'--backbone {opt.backbone} is not implemented!')
86
+
87
+ print(opt)
88
+
89
+ seed_everything(opt.seed)
90
+
91
+ model = NeRFNetwork(opt)
92
+
93
+ print(model)
94
+
95
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
96
+
97
+ if opt.test:
98
+ guidance = None # no need to load guidance model at test
99
+
100
+ trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
101
+
102
+ if opt.gui:
103
+ gui = NeRFGUI(opt, trainer)
104
+ gui.render()
105
+
106
+ else:
107
+ test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
108
+ trainer.test(test_loader)
109
+
110
+ if opt.save_mesh:
111
+ trainer.save_mesh(resolution=256)
112
+
113
+ else:
114
+
115
+ if opt.guidance == 'stable-diffusion':
116
+ from nerf.sd import StableDiffusion
117
+ guidance = StableDiffusion(device)
118
+ elif opt.guidance == 'clip':
119
+ from nerf.clip import CLIP
120
+ guidance = CLIP(device)
121
+ else:
122
+ raise NotImplementedError(f'--guidance {opt.guidance} is not implemented.')
123
+
124
+ optimizer = lambda model: torch.optim.Adam(model.get_params(opt.lr), betas=(0.9, 0.99), eps=1e-15)
125
+ # optimizer = lambda model: Shampoo(model.get_params(opt.lr))
126
+
127
+ train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
128
+
129
+ # decay to 0.01 * init_lr at last iter step
130
+ scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.01 ** min(iter / opt.iters, 1))
131
+
132
+ trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=1)
133
+
134
+ if opt.gui:
135
+ trainer.train_loader = train_loader # attach dataloader to trainer
136
+
137
+ gui = NeRFGUI(opt, trainer)
138
+ gui.render()
139
+
140
+ else:
141
+ valid_loader = NeRFDataset(opt, device=device, type='val', H=opt.H, W=opt.W, size=5).dataloader()
142
+
143
+ max_epoch = np.ceil(opt.iters / len(train_loader)).astype(np.int32)
144
+ trainer.train(train_loader, valid_loader, max_epoch)
145
+
146
+ # also test
147
+ test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
148
+ trainer.test(test_loader)
149
+
150
+ if opt.save_mesh:
151
+ trainer.save_mesh(resolution=256)
nerf/renderer.py CHANGED
@@ -168,7 +168,7 @@ class NeRFRenderer(nn.Module):
168
  from sklearn.neighbors import NearestNeighbors
169
  from scipy.ndimage import binary_dilation, binary_erosion
170
 
171
- glctx = dr.RasterizeGLContext()
172
 
173
  atlas = xatlas.Atlas()
174
  atlas.add_mesh(v_np, f_np)
 
168
  from sklearn.neighbors import NearestNeighbors
169
  from scipy.ndimage import binary_dilation, binary_erosion
170
 
171
+ glctx = dr.RasterizeCudaContext()
172
 
173
  atlas = xatlas.Atlas()
174
  atlas.add_mesh(v_np, f_np)
readme.md CHANGED
@@ -68,19 +68,20 @@ First time running will take some time to compile the CUDA extensions.
68
 
69
  ```bash
70
  ### stable-dreamfusion setting
71
- # train with text prompt
72
  # `-O` equals `--cuda_ray --fp16 --dir_text`
73
- python main_nerf.py --text "a hamburger" --workspace trial -O
74
 
 
75
  # test (exporting 360 video, and an obj mesh with png texture)
76
- python main_nerf.py --text "a hamburger" --workspace trial -O --test
77
 
78
  # test with a GUI (free view control!)
79
- python main_nerf.py --text "a hamburger" --workspace trial -O --test --gui
80
 
81
  ### dreamfields (CLIP) setting
82
- python main_nerf.py --text "a hamburger" --workspace trial_clip -O --guidance clip
83
- python main_nerf.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
84
  ```
85
 
86
  # Code organization & Advanced tips
 
68
 
69
  ```bash
70
  ### stable-dreamfusion setting
71
+ ## train with text prompt
72
  # `-O` equals `--cuda_ray --fp16 --dir_text`
73
+ python main.py --text "a hamburger" --workspace trial -O
74
 
75
+ ## after the training is finished:
76
  # test (exporting 360 video, and an obj mesh with png texture)
77
+ python main.py --workspace trial -O --test
78
 
79
  # test with a GUI (free view control!)
80
+ python main.py --workspace trial -O --test --gui
81
 
82
  ### dreamfields (CLIP) setting
83
+ python main.py --text "a hamburger" --workspace trial_clip -O --guidance clip
84
+ python main.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
85
  ```
86
 
87
  # Code organization & Advanced tips
requirements.txt CHANGED
@@ -15,4 +15,5 @@ dearpygui
15
  scipy
16
  diffusers
17
  transformers
18
- xatlas
 
 
15
  scipy
16
  diffusers
17
  transformers
18
+ xatlas
19
+ scikit-learn
scripts/run.sh CHANGED
@@ -1,5 +1,5 @@
1
  #! /bin/bash
2
 
3
- CUDA_VISIBLE_DEVICES=1 python main_nerf.py -O --text "a DSLR photo of cthulhu" --workspace trial_cthulhu
4
- CUDA_VISIBLE_DEVICES=1 python main_nerf.py -O --text "a DSLR photo of a squirrel" --workspace trial_squirrel
5
- CUDA_VISIBLE_DEVICES=1 python main_nerf.py -O --text "a DSLR photo of a cat lying on its side batting at a ball of yarn" --workspace trial_cat_lying
 
1
  #! /bin/bash
2
 
3
+ CUDA_VISIBLE_DEVICES=1 python main.py -O --text "a DSLR photo of cthulhu" --workspace trial_cthulhu
4
+ CUDA_VISIBLE_DEVICES=1 python main.py -O --text "a DSLR photo of a squirrel" --workspace trial_squirrel
5
+ CUDA_VISIBLE_DEVICES=1 python main.py -O --text "a DSLR photo of a cat lying on its side batting at a ball of yarn" --workspace trial_cat_lying