Mehdi Cherti commited on
Commit
eeb9dce
1 Parent(s): a4a6a13

add scripts

Browse files
scripts/eval_all.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #for model in ddgan_sd_v10 ddgan_laion2b_v2 ddgan_ddb_v1 ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4;do
3
+ #for model in ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4 ddgan_ddb_v5;do
4
+ #for model in ddgan_ddb_v4 ddgan_ddb_v6 ddgan_ddb_v7 ddgan_laion_aesthetic_v15;do
5
+ #for model in ddgan_ddb_v6;do
6
+ #for model in ddgan_laion_aesthetic_v15;do
7
+ #for model in ddgan_ddb_v3 ddgan_ddb_v11 ddgan_laion_aesthetic_v15;do
8
+ #for model in ddgan_ddb_v3 ddgan_ddb_v11 ddgan_ddb_v2;do
9
+ #for model in ddgan_ddb_v6 ddgan_ddb_v4 ddgan_ddb_v10 ddgan_ddb_v9;do
10
+ #for model in ddgan_ddb_v3 ddgan_ddb_v11;do
11
+ for model in ddgan_ddb_v11;do
12
+ #for model in ddgan_ddb_v3;do
13
+ if [ "$model" == "ddgan_ddb_v3" ]; then
14
+ bs=32
15
+ elif [ "$model" == "ddgan_laion_aesthetic_v15" ]; then
16
+ bs=32
17
+ elif [ "$model" == "ddgan_ddb_v6" ]; then
18
+ bs=32
19
+ elif [ "$model" == "ddgan_ddb_v4" ]; then
20
+ bs=16
21
+ elif [ "$model" == "ddgan_ddb_v9" ]; then
22
+ bs=16
23
+ elif [ "$model" == "ddgan_ddb_v10" ]; then
24
+ bs=16
25
+ elif [ "$model" == "ddgan_ddb_v11" ]; then
26
+ bs=16
27
+ else
28
+ bs=64
29
+ fi
30
+ sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --cond-text=parti_prompts.txt --batch-size=$bs --epoch=-1 --compute-image-reward --eval-name=parti_image_reward
31
+ #sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --cond-text=parti_prompts.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=parti;
32
+ #sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --fid --real-img-dir inception_statistics_coco_val2014_256x256.npz --cond-text coco_val2014_captions.txt --batch-size=$bs --epoch=-1 --nb-images-for-fid=30000 --eval-name=coco --compute-clip-score;
33
+ #sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --cond-text=drawbench.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=drawbench;
34
+ done
scripts/fid.sh ADDED
File without changes
scripts/init.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ml purge
2
+ ml use $OTHERSTAGES
3
+ ml Stages/2022
4
+ ml GCC/11.2.0
5
+ ml OpenMPI/4.1.2
6
+ ml CUDA/11.5
7
+ ml cuDNN/8.3.1.22-CUDA-11.5
8
+ ml NCCL/2.12.7-1-CUDA-11.5
9
+ ml PyTorch/1.11-CUDA-11.5
10
+ ml Horovod/0.24
11
+ ml torchvision/0.12.0
12
+ source /p/home/jusers/cherti1/jureca/ccstdl/code/feed_forward_vqgan_clip/envs/jureca_2022/bin/activate
13
+ export HOROVOD_CACHE_CAPACITY=4096
14
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
scripts/run_hdfml.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -x
2
+ #SBATCH --account=cstdl
3
+ #SBATCH --nodes=8
4
+ #SBATCH --ntasks-per-node=4
5
+ #SBATCH --cpus-per-task=8
6
+ #SBATCH --time=06:00:00
7
+ #SBATCH --gres=gpu
8
+ #SBATCH --partition=batch
9
+ ml purge
10
+ ml use $OTHERSTAGES
11
+ ml Stages/2022
12
+ ml GCC/11.2.0
13
+ ml OpenMPI/4.1.2
14
+ ml CUDA/11.5
15
+ ml cuDNN/8.3.1.22-CUDA-11.5
16
+ ml NCCL/2.12.7-1-CUDA-11.5
17
+ ml PyTorch/1.11-CUDA-11.5
18
+ ml Horovod/0.24
19
+ ml torchvision/0.12.0
20
+ source envs/hdfml/bin/activate
21
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
22
+ echo "Job id: $SLURM_JOB_ID"
23
+ export TOKENIZERS_PARALLELISM=false
24
+ export NCCL_ASYNC_ERROR_HANDLING=1
25
+ srun python -u $*
scripts/run_jurecadc_conda.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -x
2
+ #SBATCH --account=zam
3
+ #SBATCH --nodes=1
4
+ #SBATCH --ntasks-per-node=4
5
+ #SBATCH --cpus-per-task=24
6
+ #SBATCH --time=06:00:00
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --partition=dc-gpu
9
+ ml CUDA
10
+ source /p/project/laionize/miniconda/bin/activate
11
+ conda activate ddgan
12
+ #source scripts/init_2022.sh
13
+ #source scripts/init_2020.sh
14
+ #source scripts/init.sh
15
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
16
+ echo "Job id: $SLURM_JOB_ID"
17
+ export TOKENIZERS_PARALLELISM=false
18
+ #export NCCL_ASYNC_ERROR_HANDLING=1
19
+ export NCCL_IB_TIMEOUT=50
20
+ export UCX_RC_TIMEOUT=4s
21
+ export NCCL_IB_RETRY_CNT=10
22
+ export TORCH_DISTRIBUTED_DEBUG=INFO
23
+ srun python -u $*
scripts/run_jurecadc_ddp.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -x
2
+ #SBATCH --account=zam
3
+ #SBATCH --nodes=1
4
+ #SBATCH --ntasks-per-node=4
5
+ #SBATCH --cpus-per-task=24
6
+ #SBATCH --time=06:00:00
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --partition=dc-gpu
9
+ source set_torch_distributed_vars.sh
10
+ #source scripts/init_2022.sh
11
+ #source scripts/init_2020.sh
12
+ source scripts/init.sh
13
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
14
+ echo "Job id: $SLURM_JOB_ID"
15
+ export TOKENIZERS_PARALLELISM=false
16
+ #export NCCL_ASYNC_ERROR_HANDLING=1
17
+ export NCCL_IB_TIMEOUT=50
18
+ export UCX_RC_TIMEOUT=4s
19
+ export NCCL_IB_RETRY_CNT=10
20
+ export TRANSFORMERS_CACHE=cache
21
+ srun python -u $*
scripts/run_jusuf_ddp.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -x
2
+ #SBATCH --account=zam
3
+ #SBATCH --nodes=1
4
+ #SBATCH --ntasks-per-node=1
5
+ #SBATCH --cpus-per-task=24
6
+ #SBATCH --time=06:00:00
7
+ #SBATCH --gres=gpu:1
8
+ #SBATCH --partition=gpus
9
+ source set_torch_distributed_vars.sh
10
+ source scripts/init.sh
11
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
12
+ echo "Job id: $SLURM_JOB_ID"
13
+ export TOKENIZERS_PARALLELISM=false
14
+ srun python -u $*
scripts/run_juwelsbooster_conda.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -x
2
+ #SBATCH --account=laionize
3
+ #SBATCH --nodes=1
4
+ #SBATCH --ntasks-per-node=4
5
+ #SBATCH --cpus-per-task=24
6
+ #SBATCH --time=06:00:00
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --partition=booster
9
+ ml CUDA
10
+ source /p/project/laionize/miniconda/bin/activate
11
+ conda activate ddgan
12
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
13
+ echo "Job id: $SLURM_JOB_ID"
14
+ export TOKENIZERS_PARALLELISM=false
15
+ #export NCCL_ASYNC_ERROR_HANDLING=1
16
+ export NCCL_IB_TIMEOUT=50
17
+ export UCX_RC_TIMEOUT=4s
18
+ export NCCL_IB_RETRY_CNT=10
19
+ srun python -u $*
scripts/run_juwelsbooster_ddp.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -x
2
+ #SBATCH --account=covidnetx
3
+ #SBATCH --nodes=4
4
+ #SBATCH --ntasks-per-node=4
5
+ #SBATCH --cpus-per-task=24
6
+ #SBATCH --time=06:00:00
7
+ #SBATCH --gres=gpu:4
8
+ #SBATCH --partition=booster
9
+ source set_torch_distributed_vars.sh
10
+ #source scripts/init_2022.sh
11
+ #source scripts/init_2020.sh
12
+ source scripts/init.sh
13
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
14
+ echo "Job id: $SLURM_JOB_ID"
15
+ export TOKENIZERS_PARALLELISM=false
16
+ export NCCL_ASYNC_ERROR_HANDLING=1
17
+ srun python -u $*