Webaverse
/

Stable-Dreamfusion

stable-diffusion

dreamfusion

text2mesh

Model card Files Files and versions Community

ashawkey commited on Oct 10, 2022

Commit

76ff91e

•

1 Parent(s): e81396d

fix lr strategy

Browse files

Files changed (3) hide show

main.py +6 -6
nerf/sd.py +3 -3
readme.md +3 -3

main.py CHANGED Viewed

@@ -23,7 +23,7 @@ if __name__ == '__main__':
     parser.add_argument('--seed', type=int, default=0)
     ### training options
-    parser.add_argument('--iters', type=int, default=15000, help="training iters")
     parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
     parser.add_argument('--ckpt', type=str, default='latest')
     parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
@@ -52,7 +52,7 @@ if __name__ == '__main__':
     parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
     parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
     parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
-    parser.add_argument('--angle_front', type=float, default=30, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
     parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
     parser.add_argument('--lambda_opacity', type=float, default=0, help="loss scale for alpha value")
@@ -106,7 +106,7 @@ if __name__ == '__main__':
     if opt.test:
         guidance = None # no need to load guidance model at test
-        trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
         if opt.gui:
             gui = NeRFGUI(opt, trainer)
@@ -135,10 +135,10 @@ if __name__ == '__main__':
         train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
-        # decay to 0.01 * init_lr at last iter step
-        scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.01 ** min(iter / opt.iters, 1))
-        trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=opt.eval_interval)
         if opt.gui:
             trainer.train_loader = train_loader # attach dataloader to trainer

     parser.add_argument('--seed', type=int, default=0)
     ### training options
+    parser.add_argument('--iters', type=int, default=10000, help="training iters")
     parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
     parser.add_argument('--ckpt', type=str, default='latest')
     parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
     parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
     parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
     parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
+    parser.add_argument('--angle_front', type=float, default=60, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
     parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
     parser.add_argument('--lambda_opacity', type=float, default=0, help="loss scale for alpha value")
     if opt.test:
         guidance = None # no need to load guidance model at test
+        trainer = Trainer('df', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
         if opt.gui:
             gui = NeRFGUI(opt, trainer)
         train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
+        scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.1 ** min(iter / opt.iters, 1))
+        # scheduler = lambda optimizer: optim.lr_scheduler.OneCycleLR(optimizer, max_lr=opt.lr, total_steps=opt.iters, pct_start=0.1)
+        trainer = Trainer('df', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=opt.eval_interval, scheduler_update_every_step=True)
         if opt.gui:
             trainer.train_loader = train_loader # attach dataloader to trainer

nerf/sd.py CHANGED Viewed

@@ -94,9 +94,9 @@ class StableDiffusion(nn.Module):
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-        # w(t), alpha_t * sigma_t^2
-        # w = (1 - self.alphas[t])
-        w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
         grad = w * (noise_pred - noise)
         # clip grad for stable training?

         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        # w(t), sigma_t^2
+        w = (1 - self.alphas[t])
+        # w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
         grad = w * (noise_pred - noise)
         # clip grad for stable training?

readme.md CHANGED Viewed

@@ -17,13 +17,13 @@ This project is a **work-in-progress**, and contains lots of differences from th
 ## Notable differences from the paper
-* Since the Imagen model is not publicly available, we use [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to replace it (implementation from [diffusers](https://github.com/huggingface/diffusers)). Different from Imagen, Stable-Diffusion is a latent diffusion model, which diffuses in a latent space instead of the original image space. Therefore, we need the loss to propagate back from the VAE's encoder part too, which introduces extra time cost in training. Currently, 15000 training steps take about 5 hours to train on a V100.
 * We use the [multi-resolution grid encoder](https://github.com/NVlabs/instant-ngp/) to implement the NeRF backbone (implementation from [torch-ngp](https://github.com/ashawkey/torch-ngp)), which enables much faster rendering (~10FPS at 800x800).
 * We use the Adam optimizer with a larger initial learning rate.
 ## TODOs
-* The normal evaluation & shading part.
 * Better mesh (improve the surface quality).
 # Install
@@ -82,7 +82,7 @@ python main.py --text "a hamburger" --workspace trial -O
 # if the above command fails to generate things (learns an empty scene), maybe try:
 # 1. disable random lambertian shading, simply use albedo as color:
-python main.py --text "a hamburger" --workspace trial -O --albedo_iters 15000 # i.e., set --albedo_iters >= --iters, which is default to 15000
 # 2. use a smaller density regularization weight:
 python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5

 ## Notable differences from the paper
+* Since the Imagen model is not publicly available, we use [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to replace it (implementation from [diffusers](https://github.com/huggingface/diffusers)). Different from Imagen, Stable-Diffusion is a latent diffusion model, which diffuses in a latent space instead of the original image space. Therefore, we need the loss to propagate back from the VAE's encoder part too, which introduces extra time cost in training. Currently, 10000 training steps take about 3 hours to train on a V100.
 * We use the [multi-resolution grid encoder](https://github.com/NVlabs/instant-ngp/) to implement the NeRF backbone (implementation from [torch-ngp](https://github.com/ashawkey/torch-ngp)), which enables much faster rendering (~10FPS at 800x800).
 * We use the Adam optimizer with a larger initial learning rate.
 ## TODOs
+* Alleviate the multi-face [Janus problem](https://twitter.com/poolio/status/1578045212236034048).
 * Better mesh (improve the surface quality).
 # Install
 # if the above command fails to generate things (learns an empty scene), maybe try:
 # 1. disable random lambertian shading, simply use albedo as color:
+python main.py --text "a hamburger" --workspace trial -O --albedo_iters 10000 # i.e., set --albedo_iters >= --iters, which is default to 10000
 # 2. use a smaller density regularization weight:
 python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5