Spaces:
Runtime error
Runtime error
File size: 572 Bytes
eeb9dce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
#!/bin/bash -x
#SBATCH --account=cstdl
#SBATCH --nodes=8
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --time=06:00:00
#SBATCH --gres=gpu
#SBATCH --partition=batch
ml purge
ml use $OTHERSTAGES
ml Stages/2022
ml GCC/11.2.0
ml OpenMPI/4.1.2
ml CUDA/11.5
ml cuDNN/8.3.1.22-CUDA-11.5
ml NCCL/2.12.7-1-CUDA-11.5
ml PyTorch/1.11-CUDA-11.5
ml Horovod/0.24
ml torchvision/0.12.0
source envs/hdfml/bin/activate
export CUDA_VISIBLE_DEVICES=0,1,2,3
echo "Job id: $SLURM_JOB_ID"
export TOKENIZERS_PARALLELISM=false
export NCCL_ASYNC_ERROR_HANDLING=1
srun python -u $*
|