Xsmos commited on
Commit
55adee8
·
verified ·
1 Parent(s): 74cd8f2

0804-202215

Browse files
context_unet.py CHANGED
@@ -330,7 +330,7 @@ class ContextUnet(nn.Module):
330
  elif image_size == 128:
331
  channel_mult = (1, 1, 2, 3, 4)
332
  elif image_size == 64:
333
- channel_mult = (2,2,4,4,4)#(1, 2, 4)#(2,4,4,4,8)#(1, 2, 2, 4, 4)#(1, 2, 2, 4, 8)#(1, 1, 2, 2, 4, 4)#(1, 2, 4, 8, 16)#(1, 2, 3, 4)#(1, 2, 4, 6, 8)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
334
  elif image_size == 32:
335
  channel_mult = (1, 2, 2, 4)
336
  elif image_size == 28:
@@ -550,4 +550,4 @@ class ContextUnet(nn.Module):
550
  h = self.out(h)
551
  # print("self.out(h)", "h.shape =", h.shape)
552
 
553
- return h
 
330
  elif image_size == 128:
331
  channel_mult = (1, 1, 2, 3, 4)
332
  elif image_size == 64:
333
+ channel_mult = (0.5,1,1,2,2)#(1,1,2)#(1,2)#(1,1,2,2)#(1,1,2,2,4)#(2,2,4,4,4)#(1, 2, 4)#(2,4,4,4,8)#(1, 2, 2, 4, 4)#(1, 2, 2, 4, 8)#(1, 1, 2, 2, 4, 4)#(1, 2, 4, 8, 16)#(1, 2, 3, 4)#(1, 2, 4, 6, 8)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
334
  elif image_size == 32:
335
  channel_mult = (1, 2, 2, 4)
336
  elif image_size == 28:
 
550
  h = self.out(h)
551
  # print("self.out(h)", "h.shape =", h.shape)
552
 
553
+ return h
diffusion.py CHANGED
@@ -256,13 +256,13 @@ class TrainConfig:
256
  # repeat = 2
257
 
258
  # dim = 2
259
- dim = 2
260
- stride = (2,4) if dim == 2 else (2,2,2)
261
- num_image = 3000#300#3000#6000#30#60#6000#1000#2000#20000#15000#7000#25600#3000#10000#1000#10000#5000#2560#800#2560
262
- batch_size = 10#50#10#50#20#50#1#2#50#20#2#100 # 10
263
- n_epoch = 50#1#50#5#50#5#50#100#50#100#30#120#5#4# 10#50#20#20#2#5#25 # 120
264
  HII_DIM = 64
265
- num_redshift = 512#64#512#64#512#64#256CUDAoom#128#64#512#128#64#512#256#256#64#512#128
266
  channel = 1
267
  img_shape = (channel, HII_DIM, num_redshift) if dim == 2 else (channel, HII_DIM, HII_DIM, num_redshift)
268
 
@@ -303,7 +303,7 @@ class TrainConfig:
303
  # params = params
304
  # data_dir = './data' # data directory
305
 
306
- use_fp16 = False
307
  dtype = torch.float16 if use_fp16 else torch.float32
308
  mixed_precision = "fp16"
309
  gradient_accumulation_steps = 1
 
256
  # repeat = 2
257
 
258
  # dim = 2
259
+ dim = 3#2
260
+ stride = (2,4) if dim == 2 else (2,2,4)
261
+ num_image = 480#1200#120#3000#300#3000#6000#30#60#6000#1000#2000#20000#15000#7000#25600#3000#10000#1000#10000#5000#2560#800#2560
262
+ batch_size = 1#10#50#10#50#20#50#1#2#50#20#2#100 # 10
263
+ n_epoch = 50#1#50#10#1#50#1#50#5#50#5#50#100#50#100#30#120#5#4# 10#50#20#20#2#5#25 # 120
264
  HII_DIM = 64
265
+ num_redshift = 512#256#512#256#512#256#512#64#512#64#512#64#256CUDAoom#128#64#512#128#64#512#256#256#64#512#128
266
  channel = 1
267
  img_shape = (channel, HII_DIM, num_redshift) if dim == 2 else (channel, HII_DIM, HII_DIM, num_redshift)
268
 
 
303
  # params = params
304
  # data_dir = './data' # data directory
305
 
306
+ use_fp16 = True
307
  dtype = torch.float16 if use_fp16 else torch.float32
308
  mixed_precision = "fp16"
309
  gradient_accumulation_steps = 1
phoenix_diffusion.sbatch CHANGED
@@ -2,10 +2,10 @@
2
  #SBATCH -J diffusion # Job name
3
  #SBATCH -A gts-jw254-coda20
4
  #SBATCH -qembers
5
- #SBATCH -N8 --gpus-per-node=RTX_6000:1 # -C A100-80GB # Number of nodes and cores per node required
6
  #SBATCH --ntasks-per-node=1
7
- #SBATCH --mem-per-gpu=8G # Memory per core
8
- #SBATCH -t 03:10:00 # Duration of the job (Ex: 15 mins)
9
  #SBATCH -oReport-%j # Combined output and error messages file
10
  #SBATCH --error=error-%j
11
  #SBATCH --mail-type=BEGIN,END,FAIL # Mail preferences
@@ -30,7 +30,7 @@ export MASTER_PORT=$MASTER_PORT
30
 
31
  srun python diffusion.py \
32
  --train 1 \
33
- --resume outputs/model_state-N3000-device_count1-node4-epoch49-172.27.149.191 \
34
  --num_new_img_per_gpu 50 \
35
  --max_num_img_per_gpu 10 \
36
 
 
2
  #SBATCH -J diffusion # Job name
3
  #SBATCH -A gts-jw254-coda20
4
  #SBATCH -qembers
5
+ #SBATCH -N10 --gpus-per-node=V100:1 -C V100-32GB # Number of nodes and cores per node required
6
  #SBATCH --ntasks-per-node=1
7
+ #SBATCH --mem-per-gpu=16G # Memory per core
8
+ #SBATCH -t 08:00:00 # Duration of the job (Ex: 15 mins)
9
  #SBATCH -oReport-%j # Combined output and error messages file
10
  #SBATCH --error=error-%j
11
  #SBATCH --mail-type=BEGIN,END,FAIL # Mail preferences
 
30
 
31
  srun python diffusion.py \
32
  --train 1 \
33
+ --resume outputs/model_state-N3000-device_count1-node8-epoch49-172.27.149.181 \
34
  --num_new_img_per_gpu 50 \
35
  --max_num_img_per_gpu 10 \
36
 
quantify_results.ipynb CHANGED
The diff for this file is too large to render. See raw diff