# SLURM SUBMIT SCRIPT | |
#SBATCH --partition=g40 | |
#SBATCH --nodes=1 | |
#SBATCH --gpus=8 | |
#SBATCH --cpus-per-gpu=6 | |
#SBATCH --job-name=realfake | |
#SBATCH --comment=laion | |
#SBATCH --signal=SIGUSR1@90 | |
source "${HOME}/venv/bin/activate" | |
export NCCL_DEBUG=INFO | |
export PYTHONFAULTHANDLER=1 | |
export PYTHONPATH="${HOME}/realfake" | |
echo "Working directory: `pwd`" | |
srun python3 realfake/train_cluster.py \ | |
-jf "${HOME}/realfake/metadata/prepared.2000k.jsonl" \ | |
-mn convnext_large -e 5 -bs 128 \ | |
--acceleratorparams.devices=8 \ | |
--acceleratorparams.strategy=ddp_find_unused_parameters_false | |