### General options | |
### –- specify queue -- | |
#BSUB -q gpuv100 | |
### -- set the job Name -- | |
#BSUB -J fulem_2g_excl_196 | |
### -- ask for number of cores (default: 1) -- | |
#BSUB -n 8 | |
### -- specify that the cores must be on the same host -- | |
#BSUB -R "span[hosts=1]" | |
### -- Select the resources: 1 gpu in exclusive process mode -- | |
#BSUB -gpu "num=2:mode=exclusive_process" | |
### -- set walltime limit: hh:mm -- maximum 24 hours for GPU-queues right now | |
#BSUB -W 15:00 | |
# request 5GB of system-memory | |
#BSUB -R "rusage[mem=5GB]" | |
### -- set the email address -- | |
# please uncomment the following line and put in your e-mail address, | |
# if you want to receive e-mail notifications on a non-default address | |
#BSUB -u | |
### -- send notification at start -- | |
#BSUB -B | |
### -- send notification at completion-- | |
#BSUB -N | |
### -- Specify the output and error file. %J is the job-id -- | |
### -- -o and -e mean append, -oo and -eo mean overwrite -- | |
#BSUB -o gpu_%J.out | |
#BSUB -e gpu_%J.err | |
# -- end of LSF options -- | |
nvidia-smi | |
# Load the cuda module | |
module load cuda/10.1 | |
module load cudnn/v7.6.5.32-prod-cuda-10.1 | |
echo "checking python bin location" | |
which python3 | |
echo "training txt2motion diffusion model..." | |
make train EXP=fulem_2g_excl_196 | |