Spaces:
Sleeping
Sleeping
#SBATCH --partition=batch | |
#SBATCH --job-name=test | |
#SBATCH --output=test.out | |
#SBATCH --error=test.err | |
#SBATCH --time=23:00:00 | |
#SBATCH --mem=110G | |
#SBATCH --gres=gpu:a100:4 | |
#SBATCH --cpus-per-task=16 | |
## run the application: | |
job_name=test # Name of the experiment | |
cfg_path="train_configs/224_v2_llama2_video_stage_2.yaml" # path to the config file | |
number_of_gpus=1 # number of gpus | |
# cd ../../ | |
read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range | |
while : | |
do | |
PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`" | |
ss -lpn | grep -q ":$PORT " || break | |
done | |
echo "Port is $PORT" | |
torchrun --master-port ${PORT} --nproc-per-node $number_of_gpus train.py --job_name ${job_name} --cfg-path ${cfg_path} |