ashawkey commited on
Commit
76ff91e
1 Parent(s): e81396d

fix lr strategy

Browse files
Files changed (3) hide show
  1. main.py +6 -6
  2. nerf/sd.py +3 -3
  3. readme.md +3 -3
main.py CHANGED
@@ -23,7 +23,7 @@ if __name__ == '__main__':
23
  parser.add_argument('--seed', type=int, default=0)
24
 
25
  ### training options
26
- parser.add_argument('--iters', type=int, default=15000, help="training iters")
27
  parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
28
  parser.add_argument('--ckpt', type=str, default='latest')
29
  parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
@@ -52,7 +52,7 @@ if __name__ == '__main__':
52
  parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
53
  parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
54
  parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
55
- parser.add_argument('--angle_front', type=float, default=30, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
56
 
57
  parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
58
  parser.add_argument('--lambda_opacity', type=float, default=0, help="loss scale for alpha value")
@@ -106,7 +106,7 @@ if __name__ == '__main__':
106
  if opt.test:
107
  guidance = None # no need to load guidance model at test
108
 
109
- trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
110
 
111
  if opt.gui:
112
  gui = NeRFGUI(opt, trainer)
@@ -135,10 +135,10 @@ if __name__ == '__main__':
135
 
136
  train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
137
 
138
- # decay to 0.01 * init_lr at last iter step
139
- scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.01 ** min(iter / opt.iters, 1))
140
 
141
- trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=opt.eval_interval)
142
 
143
  if opt.gui:
144
  trainer.train_loader = train_loader # attach dataloader to trainer
 
23
  parser.add_argument('--seed', type=int, default=0)
24
 
25
  ### training options
26
+ parser.add_argument('--iters', type=int, default=10000, help="training iters")
27
  parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
28
  parser.add_argument('--ckpt', type=str, default='latest')
29
  parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
 
52
  parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
53
  parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
54
  parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
55
+ parser.add_argument('--angle_front', type=float, default=60, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
56
 
57
  parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
58
  parser.add_argument('--lambda_opacity', type=float, default=0, help="loss scale for alpha value")
 
106
  if opt.test:
107
  guidance = None # no need to load guidance model at test
108
 
109
+ trainer = Trainer('df', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
110
 
111
  if opt.gui:
112
  gui = NeRFGUI(opt, trainer)
 
135
 
136
  train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
137
 
138
+ scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.1 ** min(iter / opt.iters, 1))
139
+ # scheduler = lambda optimizer: optim.lr_scheduler.OneCycleLR(optimizer, max_lr=opt.lr, total_steps=opt.iters, pct_start=0.1)
140
 
141
+ trainer = Trainer('df', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=opt.eval_interval, scheduler_update_every_step=True)
142
 
143
  if opt.gui:
144
  trainer.train_loader = train_loader # attach dataloader to trainer
nerf/sd.py CHANGED
@@ -94,9 +94,9 @@ class StableDiffusion(nn.Module):
94
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
95
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
96
 
97
- # w(t), alpha_t * sigma_t^2
98
- # w = (1 - self.alphas[t])
99
- w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
100
  grad = w * (noise_pred - noise)
101
 
102
  # clip grad for stable training?
 
94
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
95
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
96
 
97
+ # w(t), sigma_t^2
98
+ w = (1 - self.alphas[t])
99
+ # w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
100
  grad = w * (noise_pred - noise)
101
 
102
  # clip grad for stable training?
readme.md CHANGED
@@ -17,13 +17,13 @@ This project is a **work-in-progress**, and contains lots of differences from th
17
 
18
 
19
  ## Notable differences from the paper
20
- * Since the Imagen model is not publicly available, we use [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to replace it (implementation from [diffusers](https://github.com/huggingface/diffusers)). Different from Imagen, Stable-Diffusion is a latent diffusion model, which diffuses in a latent space instead of the original image space. Therefore, we need the loss to propagate back from the VAE's encoder part too, which introduces extra time cost in training. Currently, 15000 training steps take about 5 hours to train on a V100.
21
  * We use the [multi-resolution grid encoder](https://github.com/NVlabs/instant-ngp/) to implement the NeRF backbone (implementation from [torch-ngp](https://github.com/ashawkey/torch-ngp)), which enables much faster rendering (~10FPS at 800x800).
22
  * We use the Adam optimizer with a larger initial learning rate.
23
 
24
 
25
  ## TODOs
26
- * The normal evaluation & shading part.
27
  * Better mesh (improve the surface quality).
28
 
29
  # Install
@@ -82,7 +82,7 @@ python main.py --text "a hamburger" --workspace trial -O
82
 
83
  # if the above command fails to generate things (learns an empty scene), maybe try:
84
  # 1. disable random lambertian shading, simply use albedo as color:
85
- python main.py --text "a hamburger" --workspace trial -O --albedo_iters 15000 # i.e., set --albedo_iters >= --iters, which is default to 15000
86
  # 2. use a smaller density regularization weight:
87
  python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5
88
 
 
17
 
18
 
19
  ## Notable differences from the paper
20
+ * Since the Imagen model is not publicly available, we use [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to replace it (implementation from [diffusers](https://github.com/huggingface/diffusers)). Different from Imagen, Stable-Diffusion is a latent diffusion model, which diffuses in a latent space instead of the original image space. Therefore, we need the loss to propagate back from the VAE's encoder part too, which introduces extra time cost in training. Currently, 10000 training steps take about 3 hours to train on a V100.
21
  * We use the [multi-resolution grid encoder](https://github.com/NVlabs/instant-ngp/) to implement the NeRF backbone (implementation from [torch-ngp](https://github.com/ashawkey/torch-ngp)), which enables much faster rendering (~10FPS at 800x800).
22
  * We use the Adam optimizer with a larger initial learning rate.
23
 
24
 
25
  ## TODOs
26
+ * Alleviate the multi-face [Janus problem](https://twitter.com/poolio/status/1578045212236034048).
27
  * Better mesh (improve the surface quality).
28
 
29
  # Install
 
82
 
83
  # if the above command fails to generate things (learns an empty scene), maybe try:
84
  # 1. disable random lambertian shading, simply use albedo as color:
85
+ python main.py --text "a hamburger" --workspace trial -O --albedo_iters 10000 # i.e., set --albedo_iters >= --iters, which is default to 10000
86
  # 2. use a smaller density regularization weight:
87
  python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5
88