fix lr strategy
Browse files- main.py +6 -6
- nerf/sd.py +3 -3
- readme.md +3 -3
main.py
CHANGED
@@ -23,7 +23,7 @@ if __name__ == '__main__':
|
|
23 |
parser.add_argument('--seed', type=int, default=0)
|
24 |
|
25 |
### training options
|
26 |
-
parser.add_argument('--iters', type=int, default=
|
27 |
parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
|
28 |
parser.add_argument('--ckpt', type=str, default='latest')
|
29 |
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
|
@@ -52,7 +52,7 @@ if __name__ == '__main__':
|
|
52 |
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
|
53 |
parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
|
54 |
parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
|
55 |
-
parser.add_argument('--angle_front', type=float, default=
|
56 |
|
57 |
parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
|
58 |
parser.add_argument('--lambda_opacity', type=float, default=0, help="loss scale for alpha value")
|
@@ -106,7 +106,7 @@ if __name__ == '__main__':
|
|
106 |
if opt.test:
|
107 |
guidance = None # no need to load guidance model at test
|
108 |
|
109 |
-
trainer = Trainer('
|
110 |
|
111 |
if opt.gui:
|
112 |
gui = NeRFGUI(opt, trainer)
|
@@ -135,10 +135,10 @@ if __name__ == '__main__':
|
|
135 |
|
136 |
train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
|
137 |
|
138 |
-
|
139 |
-
scheduler = lambda optimizer: optim.lr_scheduler.
|
140 |
|
141 |
-
trainer = Trainer('
|
142 |
|
143 |
if opt.gui:
|
144 |
trainer.train_loader = train_loader # attach dataloader to trainer
|
|
|
23 |
parser.add_argument('--seed', type=int, default=0)
|
24 |
|
25 |
### training options
|
26 |
+
parser.add_argument('--iters', type=int, default=10000, help="training iters")
|
27 |
parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
|
28 |
parser.add_argument('--ckpt', type=str, default='latest')
|
29 |
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
|
|
|
52 |
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
|
53 |
parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
|
54 |
parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
|
55 |
+
parser.add_argument('--angle_front', type=float, default=60, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
|
56 |
|
57 |
parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
|
58 |
parser.add_argument('--lambda_opacity', type=float, default=0, help="loss scale for alpha value")
|
|
|
106 |
if opt.test:
|
107 |
guidance = None # no need to load guidance model at test
|
108 |
|
109 |
+
trainer = Trainer('df', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
|
110 |
|
111 |
if opt.gui:
|
112 |
gui = NeRFGUI(opt, trainer)
|
|
|
135 |
|
136 |
train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
|
137 |
|
138 |
+
scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.1 ** min(iter / opt.iters, 1))
|
139 |
+
# scheduler = lambda optimizer: optim.lr_scheduler.OneCycleLR(optimizer, max_lr=opt.lr, total_steps=opt.iters, pct_start=0.1)
|
140 |
|
141 |
+
trainer = Trainer('df', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=opt.eval_interval, scheduler_update_every_step=True)
|
142 |
|
143 |
if opt.gui:
|
144 |
trainer.train_loader = train_loader # attach dataloader to trainer
|
nerf/sd.py
CHANGED
@@ -94,9 +94,9 @@ class StableDiffusion(nn.Module):
|
|
94 |
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
95 |
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
96 |
|
97 |
-
# w(t),
|
98 |
-
|
99 |
-
w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
|
100 |
grad = w * (noise_pred - noise)
|
101 |
|
102 |
# clip grad for stable training?
|
|
|
94 |
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
95 |
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
96 |
|
97 |
+
# w(t), sigma_t^2
|
98 |
+
w = (1 - self.alphas[t])
|
99 |
+
# w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
|
100 |
grad = w * (noise_pred - noise)
|
101 |
|
102 |
# clip grad for stable training?
|
readme.md
CHANGED
@@ -17,13 +17,13 @@ This project is a **work-in-progress**, and contains lots of differences from th
|
|
17 |
|
18 |
|
19 |
## Notable differences from the paper
|
20 |
-
* Since the Imagen model is not publicly available, we use [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to replace it (implementation from [diffusers](https://github.com/huggingface/diffusers)). Different from Imagen, Stable-Diffusion is a latent diffusion model, which diffuses in a latent space instead of the original image space. Therefore, we need the loss to propagate back from the VAE's encoder part too, which introduces extra time cost in training. Currently,
|
21 |
* We use the [multi-resolution grid encoder](https://github.com/NVlabs/instant-ngp/) to implement the NeRF backbone (implementation from [torch-ngp](https://github.com/ashawkey/torch-ngp)), which enables much faster rendering (~10FPS at 800x800).
|
22 |
* We use the Adam optimizer with a larger initial learning rate.
|
23 |
|
24 |
|
25 |
## TODOs
|
26 |
-
*
|
27 |
* Better mesh (improve the surface quality).
|
28 |
|
29 |
# Install
|
@@ -82,7 +82,7 @@ python main.py --text "a hamburger" --workspace trial -O
|
|
82 |
|
83 |
# if the above command fails to generate things (learns an empty scene), maybe try:
|
84 |
# 1. disable random lambertian shading, simply use albedo as color:
|
85 |
-
python main.py --text "a hamburger" --workspace trial -O --albedo_iters
|
86 |
# 2. use a smaller density regularization weight:
|
87 |
python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5
|
88 |
|
|
|
17 |
|
18 |
|
19 |
## Notable differences from the paper
|
20 |
+
* Since the Imagen model is not publicly available, we use [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to replace it (implementation from [diffusers](https://github.com/huggingface/diffusers)). Different from Imagen, Stable-Diffusion is a latent diffusion model, which diffuses in a latent space instead of the original image space. Therefore, we need the loss to propagate back from the VAE's encoder part too, which introduces extra time cost in training. Currently, 10000 training steps take about 3 hours to train on a V100.
|
21 |
* We use the [multi-resolution grid encoder](https://github.com/NVlabs/instant-ngp/) to implement the NeRF backbone (implementation from [torch-ngp](https://github.com/ashawkey/torch-ngp)), which enables much faster rendering (~10FPS at 800x800).
|
22 |
* We use the Adam optimizer with a larger initial learning rate.
|
23 |
|
24 |
|
25 |
## TODOs
|
26 |
+
* Alleviate the multi-face [Janus problem](https://twitter.com/poolio/status/1578045212236034048).
|
27 |
* Better mesh (improve the surface quality).
|
28 |
|
29 |
# Install
|
|
|
82 |
|
83 |
# if the above command fails to generate things (learns an empty scene), maybe try:
|
84 |
# 1. disable random lambertian shading, simply use albedo as color:
|
85 |
+
python main.py --text "a hamburger" --workspace trial -O --albedo_iters 10000 # i.e., set --albedo_iters >= --iters, which is default to 10000
|
86 |
# 2. use a smaller density regularization weight:
|
87 |
python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5
|
88 |
|