ashawkey commited on
Commit
e81396d
1 Parent(s): 3de5f93

enable random lambertian shading in training

Browse files
Files changed (4) hide show
  1. assets/update_logs.md +4 -0
  2. main.py +5 -5
  3. nerf/provider.py +1 -1
  4. readme.md +14 -5
assets/update_logs.md CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  ### 2022.10.5
2
  * Basic reproduction finished.
3
  * Non --cuda_ray, --tcnn are not working, need to fix.
 
1
+ ### 2022.10.9
2
+ * The shading (partially) starts to work, at least it won't make scene empty. For some prompts, it shows better results (less severe Janus problem). The textureless rendering mode is still disabled.
3
+ * Enable shading by default (--albedo_iters 1000).
4
+
5
  ### 2022.10.5
6
  * Basic reproduction finished.
7
  * Non --cuda_ray, --tcnn are not working, need to fix.
main.py CHANGED
@@ -32,7 +32,7 @@ if __name__ == '__main__':
32
  parser.add_argument('--upsample_steps', type=int, default=64, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
33
  parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
34
  parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
35
- parser.add_argument('--albedo_iters', type=int, default=15000, help="training iters that only use albedo shading")
36
  # model options
37
  parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
38
  parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
@@ -75,14 +75,14 @@ if __name__ == '__main__':
75
  opt.dir_text = True
76
  # use occupancy grid to prune ray sampling, faster rendering.
77
  opt.cuda_ray = True
78
- opt.lambda_entropy = 1e-4
79
- opt.lambda_opacity = 0
80
 
81
  elif opt.O2:
82
  opt.fp16 = True
83
  opt.dir_text = True
84
- opt.lambda_entropy = 1e-3
85
- opt.lambda_opacity = 1e-3 # no occupancy grid, so use a stronger opacity loss.
86
 
87
  if opt.backbone == 'vanilla':
88
  from nerf.network import NeRFNetwork
 
32
  parser.add_argument('--upsample_steps', type=int, default=64, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
33
  parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
34
  parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
35
+ parser.add_argument('--albedo_iters', type=int, default=1000, help="training iters that only use albedo shading")
36
  # model options
37
  parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
38
  parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
 
75
  opt.dir_text = True
76
  # use occupancy grid to prune ray sampling, faster rendering.
77
  opt.cuda_ray = True
78
+ # opt.lambda_entropy = 1e-4
79
+ # opt.lambda_opacity = 0
80
 
81
  elif opt.O2:
82
  opt.fp16 = True
83
  opt.dir_text = True
84
+ opt.lambda_entropy = 1e-4 # necessary to keep non-empty
85
+ opt.lambda_opacity = 3e-3 # no occupancy grid, so use a stronger opacity loss.
86
 
87
  if opt.backbone == 'vanilla':
88
  from nerf.network import NeRFNetwork
nerf/provider.py CHANGED
@@ -55,7 +55,7 @@ def get_view_direction(thetas, phis, overhead, front):
55
  return res
56
 
57
 
58
- def rand_poses(size, device, radius_range=[1, 1.5], theta_range=[0, 150], phi_range=[0, 360], return_dirs=False, angle_overhead=30, angle_front=60, jitter=False):
59
  ''' generate random poses from an orbit camera
60
  Args:
61
  size: batch size of generated poses.
 
55
  return res
56
 
57
 
58
+ def rand_poses(size, device, radius_range=[1, 1.5], theta_range=[0, 100], phi_range=[0, 360], return_dirs=False, angle_overhead=30, angle_front=60, jitter=False):
59
  ''' generate random poses from an orbit camera
60
  Args:
61
  size: batch size of generated poses.
readme.md CHANGED
@@ -73,14 +73,24 @@ First time running will take some time to compile the CUDA extensions.
73
 
74
  ```bash
75
  ### stable-dreamfusion setting
76
- ## train with text prompt
77
  # `-O` equals `--cuda_ray --fp16 --dir_text`
 
 
 
78
  python main.py --text "a hamburger" --workspace trial -O
79
 
 
 
 
 
 
 
80
  ## after the training is finished:
81
- # test (exporting 360 video, and an obj mesh with png texture)
82
  python main.py --workspace trial -O --test
83
-
 
84
  # test with a GUI (free view control!)
85
  python main.py --workspace trial -O --test --gui
86
 
@@ -103,7 +113,7 @@ pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corner
103
  latents = self.encode_imgs(pred_rgb_512)
104
  ... # timestep sampling, noise adding and UNet noise predicting
105
  # 3. the SDS loss, since UNet part is ignored and cannot simply audodiff, we manually set the grad for latents.
106
- w = (1 - self.scheduler.alphas_cumprod[t]).to(self.device)
107
  grad = w * (noise_pred - noise)
108
  latents.backward(gradient=grad, retain_graph=True)
109
  ```
@@ -119,7 +129,6 @@ latents.backward(gradient=grad, retain_graph=True)
119
  Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
120
  More testing is needed...
121
  * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
122
- * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
123
  * light direction: current implementation use a plane light source, instead of a point light source...
124
  * View-dependent prompting: `./nerf/provider.py > get_view_direction`.
125
  * ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
 
73
 
74
  ```bash
75
  ### stable-dreamfusion setting
76
+ ## train with text prompt (with the default settings)
77
  # `-O` equals `--cuda_ray --fp16 --dir_text`
78
+ # `--cuda_ray` enables instant-ngp-like occupancy grid based acceleration.
79
+ # `--fp16` enables half-precision training.
80
+ # `--dir_text` enables view-dependent prompting.
81
  python main.py --text "a hamburger" --workspace trial -O
82
 
83
+ # if the above command fails to generate things (learns an empty scene), maybe try:
84
+ # 1. disable random lambertian shading, simply use albedo as color:
85
+ python main.py --text "a hamburger" --workspace trial -O --albedo_iters 15000 # i.e., set --albedo_iters >= --iters, which is default to 15000
86
+ # 2. use a smaller density regularization weight:
87
+ python main.py --text "a hamburger" --workspace trial -O --lambda_entropy 1e-5
88
+
89
  ## after the training is finished:
90
+ # test (exporting 360 video)
91
  python main.py --workspace trial -O --test
92
+ # also save a mesh (with obj, mtl, and png texture)
93
+ python main.py --workspace trial -O --test --save_mesh
94
  # test with a GUI (free view control!)
95
  python main.py --workspace trial -O --test --gui
96
 
 
113
  latents = self.encode_imgs(pred_rgb_512)
114
  ... # timestep sampling, noise adding and UNet noise predicting
115
  # 3. the SDS loss, since UNet part is ignored and cannot simply audodiff, we manually set the grad for latents.
116
+ w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
117
  grad = w * (noise_pred - noise)
118
  latents.backward(gradient=grad, retain_graph=True)
119
  ```
 
129
  Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
130
  More testing is needed...
131
  * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
 
132
  * light direction: current implementation use a plane light source, instead of a point light source...
133
  * View-dependent prompting: `./nerf/provider.py > get_view_direction`.
134
  * ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?