Spaces:
Runtime error
Runtime error
Mehdi Cherti
commited on
Commit
•
a8858ff
1
Parent(s):
8ab4de9
remove scripts
Browse files- eval_all.sh +0 -21
- scripts/fid.sh +0 -0
- scripts/init.sh +0 -14
- scripts/init_2020.sh +0 -69
- scripts/init_2022.sh +0 -34
- scripts/run_hdfml.sh +0 -25
- scripts/run_jurecadc_conda.sh +0 -23
- scripts/run_jurecadc_ddp.sh +0 -20
- scripts/run_jusuf_ddp.sh +0 -14
- scripts/run_juwelsbooster_conda.sh +0 -19
- scripts/run_juwelsbooster_ddp.sh +0 -17
eval_all.sh
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
#for model in ddgan_sd_v10 ddgan_laion2b_v2 ddgan_ddb_v1 ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4;do
|
3 |
-
#for model in ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4 ddgan_ddb_v5;do
|
4 |
-
#for model in ddgan_ddb_v4 ddgan_ddb_v6 ddgan_ddb_v7 ddgan_laion_aesthetic_v15;do
|
5 |
-
#for model in ddgan_ddb_v6;do
|
6 |
-
for model in ddgan_laion_aesthetic_v15;do
|
7 |
-
if [ "$model" == "ddgan_ddb_v3" ]; then
|
8 |
-
bs=32
|
9 |
-
elif [ "$model" == "ddgan_laion_aesthetic_v15" ]; then
|
10 |
-
bs=32
|
11 |
-
elif [ "$model" == "ddgan_ddb_v6" ]; then
|
12 |
-
bs=32
|
13 |
-
elif [ "$model" == "ddgan_ddb_v4" ]; then
|
14 |
-
bs=16
|
15 |
-
else
|
16 |
-
bs=64
|
17 |
-
fi
|
18 |
-
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh run.py test $model --cond-text=parti_prompts.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=parti;
|
19 |
-
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh run.py test $model --fid --real-img-dir inception_statistics_coco_val2014_256x256.npz --cond-text coco_val2014_captions.txt --batch-size=$bs --epoch=-1 --nb-images-for-fid=30000 --eval-name=coco --compute-clip-score;
|
20 |
-
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh run.py test $model --cond-text=drawbench.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=drawbench;
|
21 |
-
done
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/fid.sh
DELETED
File without changes
|
scripts/init.sh
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
ml purge
|
2 |
-
ml use $OTHERSTAGES
|
3 |
-
ml Stages/2022
|
4 |
-
ml GCC/11.2.0
|
5 |
-
ml OpenMPI/4.1.2
|
6 |
-
ml CUDA/11.5
|
7 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
8 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
9 |
-
ml PyTorch/1.11-CUDA-11.5
|
10 |
-
ml Horovod/0.24
|
11 |
-
ml torchvision/0.12.0
|
12 |
-
source /p/home/jusers/cherti1/jureca/ccstdl/code/feed_forward_vqgan_clip/envs/jureca_2022/bin/activate
|
13 |
-
export HOROVOD_CACHE_CAPACITY=4096
|
14 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/init_2020.sh
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
machine=$(cat /etc/FZJ/systemname)
|
2 |
-
if [[ "$machine" == jurecadc ]]; then
|
3 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
4 |
-
#ml use $OTHERSTAGES
|
5 |
-
#ml Stages/2020
|
6 |
-
#ml GCC/9.3.0
|
7 |
-
#ml OpenMPI/4.1.0rc1
|
8 |
-
#ml CUDA/11.0
|
9 |
-
#ml cuDNN/8.0.2.39-CUDA-11.0
|
10 |
-
#ml NCCL/2.8.3-1-CUDA-11.0
|
11 |
-
#ml PyTorch
|
12 |
-
#ml Horovod/0.20.3-Python-3.8.5
|
13 |
-
#ml scikit
|
14 |
-
#source /p/project/covidnetx/environments/jureca/bin/activate
|
15 |
-
ml purge
|
16 |
-
ml use $OTHERSTAGES
|
17 |
-
ml Stages/2020
|
18 |
-
ml GCC/10.3.0
|
19 |
-
ml OpenMPI/4.1.1
|
20 |
-
ml Horovod/0.23.0-Python-3.8.5
|
21 |
-
ml scikit
|
22 |
-
source /p/project/covidnetx/environments/jureca/bin/activate
|
23 |
-
fi
|
24 |
-
if [[ "$machine" == juwelsbooster ]]; then
|
25 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
26 |
-
#ml use $OTHERSTAGES
|
27 |
-
#ml Stages/2020
|
28 |
-
#ml GCC/9.3.0
|
29 |
-
#ml OpenMPI/4.1.0rc1
|
30 |
-
#ml CUDA/11.0
|
31 |
-
#ml cuDNN/8.0.2.39-CUDA-11.0
|
32 |
-
#ml NCCL/2.8.3-1-CUDA-11.0
|
33 |
-
#ml PyTorch
|
34 |
-
#ml Horovod/0.20.3-Python-3.8.5
|
35 |
-
#ml scikit
|
36 |
-
|
37 |
-
#ml Stages/2021
|
38 |
-
#ml GCC
|
39 |
-
#ml OpenMPI
|
40 |
-
#ml CUDA
|
41 |
-
#ml cuDNN
|
42 |
-
#ml NCCL
|
43 |
-
#ml PyTorch
|
44 |
-
#ml Horovod
|
45 |
-
#ml scikit
|
46 |
-
|
47 |
-
ml purge
|
48 |
-
ml use $OTHERSTAGES
|
49 |
-
ml Stages/2020
|
50 |
-
ml GCC/10.3.0
|
51 |
-
ml OpenMPI/4.1.1
|
52 |
-
ml Horovod/0.23.0-Python-3.8.5
|
53 |
-
ml scikit
|
54 |
-
source /p/project/covidnetx/environments/juwels_booster/bin/activate
|
55 |
-
fi
|
56 |
-
if [[ "$machine" == jusuf ]]; then
|
57 |
-
ml purge
|
58 |
-
ml use $OTHERSTAGES
|
59 |
-
ml Stages/2020
|
60 |
-
ml GCC/9.3.0
|
61 |
-
ml OpenMPI/4.1.0rc1
|
62 |
-
ml CUDA/11.0
|
63 |
-
ml cuDNN/8.0.2.39-CUDA-11.0
|
64 |
-
ml NCCL/2.8.3-1-CUDA-11.0
|
65 |
-
ml PyTorch
|
66 |
-
ml Horovod/0.20.3-Python-3.8.5
|
67 |
-
#ml scikit
|
68 |
-
source /p/project/covidnetx/environments/jusuf/bin/activate
|
69 |
-
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/init_2022.sh
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
machine=$(cat /etc/FZJ/systemname)
|
2 |
-
if [[ "$machine" == jurecadc ]]; then
|
3 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
4 |
-
ml purge
|
5 |
-
ml use $OTHERSTAGES
|
6 |
-
ml Stages/2022
|
7 |
-
ml GCC/11.2.0
|
8 |
-
ml OpenMPI/4.1.2
|
9 |
-
ml CUDA/11.5
|
10 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
11 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
12 |
-
ml PyTorch/1.11-CUDA-11.5
|
13 |
-
ml Horovod/0.24
|
14 |
-
ml torchvision/0.12.0
|
15 |
-
source /p/project/covidnetx/environments/jureca_2022/bin/activate
|
16 |
-
fi
|
17 |
-
if [[ "$machine" == juwelsbooster ]]; then
|
18 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
19 |
-
ml purge
|
20 |
-
ml use $OTHERSTAGES
|
21 |
-
ml Stages/2022
|
22 |
-
ml GCC/11.2.0
|
23 |
-
ml OpenMPI/4.1.2
|
24 |
-
ml CUDA/11.5
|
25 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
26 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
27 |
-
ml PyTorch/1.11-CUDA-11.5
|
28 |
-
ml Horovod/0.24
|
29 |
-
ml torchvision/0.12.0
|
30 |
-
source /p/project/covidnetx/environments/juwels_booster_2022/bin/activate
|
31 |
-
fi
|
32 |
-
if [[ "$machine" == jusuf ]]; then
|
33 |
-
echo not supported
|
34 |
-
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_hdfml.sh
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
#!/bin/bash -x
|
2 |
-
#SBATCH --account=cstdl
|
3 |
-
#SBATCH --nodes=8
|
4 |
-
#SBATCH --ntasks-per-node=4
|
5 |
-
#SBATCH --cpus-per-task=8
|
6 |
-
#SBATCH --time=06:00:00
|
7 |
-
#SBATCH --gres=gpu
|
8 |
-
#SBATCH --partition=batch
|
9 |
-
ml purge
|
10 |
-
ml use $OTHERSTAGES
|
11 |
-
ml Stages/2022
|
12 |
-
ml GCC/11.2.0
|
13 |
-
ml OpenMPI/4.1.2
|
14 |
-
ml CUDA/11.5
|
15 |
-
ml cuDNN/8.3.1.22-CUDA-11.5
|
16 |
-
ml NCCL/2.12.7-1-CUDA-11.5
|
17 |
-
ml PyTorch/1.11-CUDA-11.5
|
18 |
-
ml Horovod/0.24
|
19 |
-
ml torchvision/0.12.0
|
20 |
-
source envs/hdfml/bin/activate
|
21 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
22 |
-
echo "Job id: $SLURM_JOB_ID"
|
23 |
-
export TOKENIZERS_PARALLELISM=false
|
24 |
-
export NCCL_ASYNC_ERROR_HANDLING=1
|
25 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_jurecadc_conda.sh
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
#!/bin/bash -x
|
2 |
-
#SBATCH --account=zam
|
3 |
-
#SBATCH --nodes=1
|
4 |
-
#SBATCH --ntasks-per-node=4
|
5 |
-
#SBATCH --cpus-per-task=24
|
6 |
-
#SBATCH --time=06:00:00
|
7 |
-
#SBATCH --gres=gpu:4
|
8 |
-
#SBATCH --partition=dc-gpu
|
9 |
-
ml CUDA
|
10 |
-
source /p/project/laionize/miniconda/bin/activate
|
11 |
-
conda activate ddgan
|
12 |
-
#source scripts/init_2022.sh
|
13 |
-
#source scripts/init_2020.sh
|
14 |
-
#source scripts/init.sh
|
15 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
16 |
-
echo "Job id: $SLURM_JOB_ID"
|
17 |
-
export TOKENIZERS_PARALLELISM=false
|
18 |
-
#export NCCL_ASYNC_ERROR_HANDLING=1
|
19 |
-
export NCCL_IB_TIMEOUT=50
|
20 |
-
export UCX_RC_TIMEOUT=4s
|
21 |
-
export NCCL_IB_RETRY_CNT=10
|
22 |
-
export TORCH_DISTRIBUTED_DEBUG=INFO
|
23 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_jurecadc_ddp.sh
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
#!/bin/bash -x
|
2 |
-
#SBATCH --account=zam
|
3 |
-
#SBATCH --nodes=1
|
4 |
-
#SBATCH --ntasks-per-node=4
|
5 |
-
#SBATCH --cpus-per-task=24
|
6 |
-
#SBATCH --time=06:00:00
|
7 |
-
#SBATCH --gres=gpu:4
|
8 |
-
#SBATCH --partition=dc-gpu
|
9 |
-
source set_torch_distributed_vars.sh
|
10 |
-
#source scripts/init_2022.sh
|
11 |
-
#source scripts/init_2020.sh
|
12 |
-
source scripts/init.sh
|
13 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
14 |
-
echo "Job id: $SLURM_JOB_ID"
|
15 |
-
export TOKENIZERS_PARALLELISM=false
|
16 |
-
#export NCCL_ASYNC_ERROR_HANDLING=1
|
17 |
-
export NCCL_IB_TIMEOUT=50
|
18 |
-
export UCX_RC_TIMEOUT=4s
|
19 |
-
export NCCL_IB_RETRY_CNT=10
|
20 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_jusuf_ddp.sh
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
#!/bin/bash -x
|
2 |
-
#SBATCH --account=zam
|
3 |
-
#SBATCH --nodes=1
|
4 |
-
#SBATCH --ntasks-per-node=1
|
5 |
-
#SBATCH --cpus-per-task=24
|
6 |
-
#SBATCH --time=06:00:00
|
7 |
-
#SBATCH --gres=gpu:1
|
8 |
-
#SBATCH --partition=gpus
|
9 |
-
source set_torch_distributed_vars.sh
|
10 |
-
source scripts/init.sh
|
11 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
12 |
-
echo "Job id: $SLURM_JOB_ID"
|
13 |
-
export TOKENIZERS_PARALLELISM=false
|
14 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_juwelsbooster_conda.sh
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
#!/bin/bash -x
|
2 |
-
#SBATCH --account=laionize
|
3 |
-
#SBATCH --nodes=1
|
4 |
-
#SBATCH --ntasks-per-node=4
|
5 |
-
#SBATCH --cpus-per-task=24
|
6 |
-
#SBATCH --time=06:00:00
|
7 |
-
#SBATCH --gres=gpu:4
|
8 |
-
#SBATCH --partition=booster
|
9 |
-
ml CUDA
|
10 |
-
source /p/project/laionize/miniconda/bin/activate
|
11 |
-
conda activate ddgan
|
12 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
13 |
-
echo "Job id: $SLURM_JOB_ID"
|
14 |
-
export TOKENIZERS_PARALLELISM=false
|
15 |
-
#export NCCL_ASYNC_ERROR_HANDLING=1
|
16 |
-
export NCCL_IB_TIMEOUT=50
|
17 |
-
export UCX_RC_TIMEOUT=4s
|
18 |
-
export NCCL_IB_RETRY_CNT=10
|
19 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/run_juwelsbooster_ddp.sh
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
#!/bin/bash -x
|
2 |
-
#SBATCH --account=covidnetx
|
3 |
-
#SBATCH --nodes=4
|
4 |
-
#SBATCH --ntasks-per-node=4
|
5 |
-
#SBATCH --cpus-per-task=24
|
6 |
-
#SBATCH --time=06:00:00
|
7 |
-
#SBATCH --gres=gpu:4
|
8 |
-
#SBATCH --partition=booster
|
9 |
-
source set_torch_distributed_vars.sh
|
10 |
-
#source scripts/init_2022.sh
|
11 |
-
#source scripts/init_2020.sh
|
12 |
-
source scripts/init.sh
|
13 |
-
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
14 |
-
echo "Job id: $SLURM_JOB_ID"
|
15 |
-
export TOKENIZERS_PARALLELISM=false
|
16 |
-
export NCCL_ASYNC_ERROR_HANDLING=1
|
17 |
-
srun python -u $*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|