0804-202215
Browse files- context_unet.py +2 -2
- diffusion.py +7 -7
- phoenix_diffusion.sbatch +4 -4
- quantify_results.ipynb +0 -0
context_unet.py
CHANGED
|
@@ -330,7 +330,7 @@ class ContextUnet(nn.Module):
|
|
| 330 |
elif image_size == 128:
|
| 331 |
channel_mult = (1, 1, 2, 3, 4)
|
| 332 |
elif image_size == 64:
|
| 333 |
-
channel_mult = (2,2,4,4,4)#(1, 2, 4)#(2,4,4,4,8)#(1, 2, 2, 4, 4)#(1, 2, 2, 4, 8)#(1, 1, 2, 2, 4, 4)#(1, 2, 4, 8, 16)#(1, 2, 3, 4)#(1, 2, 4, 6, 8)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
|
| 334 |
elif image_size == 32:
|
| 335 |
channel_mult = (1, 2, 2, 4)
|
| 336 |
elif image_size == 28:
|
|
@@ -550,4 +550,4 @@ class ContextUnet(nn.Module):
|
|
| 550 |
h = self.out(h)
|
| 551 |
# print("self.out(h)", "h.shape =", h.shape)
|
| 552 |
|
| 553 |
-
return h
|
|
|
|
| 330 |
elif image_size == 128:
|
| 331 |
channel_mult = (1, 1, 2, 3, 4)
|
| 332 |
elif image_size == 64:
|
| 333 |
+
channel_mult = (0.5,1,1,2,2)#(1,1,2)#(1,2)#(1,1,2,2)#(1,1,2,2,4)#(2,2,4,4,4)#(1, 2, 4)#(2,4,4,4,8)#(1, 2, 2, 4, 4)#(1, 2, 2, 4, 8)#(1, 1, 2, 2, 4, 4)#(1, 2, 4, 8, 16)#(1, 2, 3, 4)#(1, 2, 4, 6, 8)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
|
| 334 |
elif image_size == 32:
|
| 335 |
channel_mult = (1, 2, 2, 4)
|
| 336 |
elif image_size == 28:
|
|
|
|
| 550 |
h = self.out(h)
|
| 551 |
# print("self.out(h)", "h.shape =", h.shape)
|
| 552 |
|
| 553 |
+
return h
|
diffusion.py
CHANGED
|
@@ -256,13 +256,13 @@ class TrainConfig:
|
|
| 256 |
# repeat = 2
|
| 257 |
|
| 258 |
# dim = 2
|
| 259 |
-
dim = 2
|
| 260 |
-
stride = (2,4) if dim == 2 else (2,2,
|
| 261 |
-
num_image = 3000#300#3000#6000#30#60#6000#1000#2000#20000#15000#7000#25600#3000#10000#1000#10000#5000#2560#800#2560
|
| 262 |
-
batch_size = 10#50#10#50#20#50#1#2#50#20#2#100 # 10
|
| 263 |
-
n_epoch = 50#1#50#5#50#5#50#100#50#100#30#120#5#4# 10#50#20#20#2#5#25 # 120
|
| 264 |
HII_DIM = 64
|
| 265 |
-
num_redshift = 512#64#512#64#512#64#256CUDAoom#128#64#512#128#64#512#256#256#64#512#128
|
| 266 |
channel = 1
|
| 267 |
img_shape = (channel, HII_DIM, num_redshift) if dim == 2 else (channel, HII_DIM, HII_DIM, num_redshift)
|
| 268 |
|
|
@@ -303,7 +303,7 @@ class TrainConfig:
|
|
| 303 |
# params = params
|
| 304 |
# data_dir = './data' # data directory
|
| 305 |
|
| 306 |
-
use_fp16 =
|
| 307 |
dtype = torch.float16 if use_fp16 else torch.float32
|
| 308 |
mixed_precision = "fp16"
|
| 309 |
gradient_accumulation_steps = 1
|
|
|
|
| 256 |
# repeat = 2
|
| 257 |
|
| 258 |
# dim = 2
|
| 259 |
+
dim = 3#2
|
| 260 |
+
stride = (2,4) if dim == 2 else (2,2,4)
|
| 261 |
+
num_image = 480#1200#120#3000#300#3000#6000#30#60#6000#1000#2000#20000#15000#7000#25600#3000#10000#1000#10000#5000#2560#800#2560
|
| 262 |
+
batch_size = 1#10#50#10#50#20#50#1#2#50#20#2#100 # 10
|
| 263 |
+
n_epoch = 50#1#50#10#1#50#1#50#5#50#5#50#100#50#100#30#120#5#4# 10#50#20#20#2#5#25 # 120
|
| 264 |
HII_DIM = 64
|
| 265 |
+
num_redshift = 512#256#512#256#512#256#512#64#512#64#512#64#256CUDAoom#128#64#512#128#64#512#256#256#64#512#128
|
| 266 |
channel = 1
|
| 267 |
img_shape = (channel, HII_DIM, num_redshift) if dim == 2 else (channel, HII_DIM, HII_DIM, num_redshift)
|
| 268 |
|
|
|
|
| 303 |
# params = params
|
| 304 |
# data_dir = './data' # data directory
|
| 305 |
|
| 306 |
+
use_fp16 = True
|
| 307 |
dtype = torch.float16 if use_fp16 else torch.float32
|
| 308 |
mixed_precision = "fp16"
|
| 309 |
gradient_accumulation_steps = 1
|
phoenix_diffusion.sbatch
CHANGED
|
@@ -2,10 +2,10 @@
|
|
| 2 |
#SBATCH -J diffusion # Job name
|
| 3 |
#SBATCH -A gts-jw254-coda20
|
| 4 |
#SBATCH -qembers
|
| 5 |
-
#SBATCH -
|
| 6 |
#SBATCH --ntasks-per-node=1
|
| 7 |
-
#SBATCH --mem-per-gpu=
|
| 8 |
-
#SBATCH -t
|
| 9 |
#SBATCH -oReport-%j # Combined output and error messages file
|
| 10 |
#SBATCH --error=error-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL # Mail preferences
|
|
@@ -30,7 +30,7 @@ export MASTER_PORT=$MASTER_PORT
|
|
| 30 |
|
| 31 |
srun python diffusion.py \
|
| 32 |
--train 1 \
|
| 33 |
-
--resume outputs/model_state-N3000-device_count1-
|
| 34 |
--num_new_img_per_gpu 50 \
|
| 35 |
--max_num_img_per_gpu 10 \
|
| 36 |
|
|
|
|
| 2 |
#SBATCH -J diffusion # Job name
|
| 3 |
#SBATCH -A gts-jw254-coda20
|
| 4 |
#SBATCH -qembers
|
| 5 |
+
#SBATCH -N10 --gpus-per-node=V100:1 -C V100-32GB # Number of nodes and cores per node required
|
| 6 |
#SBATCH --ntasks-per-node=1
|
| 7 |
+
#SBATCH --mem-per-gpu=16G # Memory per core
|
| 8 |
+
#SBATCH -t 08:00:00 # Duration of the job (Ex: 15 mins)
|
| 9 |
#SBATCH -oReport-%j # Combined output and error messages file
|
| 10 |
#SBATCH --error=error-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL # Mail preferences
|
|
|
|
| 30 |
|
| 31 |
srun python diffusion.py \
|
| 32 |
--train 1 \
|
| 33 |
+
--resume outputs/model_state-N3000-device_count1-node8-epoch49-172.27.149.181 \
|
| 34 |
--num_new_img_per_gpu 50 \
|
| 35 |
--max_num_img_per_gpu 10 \
|
| 36 |
|
quantify_results.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|