Spaces:
Runtime error
Runtime error
File size: 2,089 Bytes
54199b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
exp=${1:-'test'}
gpu=${2:-'1'}
type=${3:-'local'} # choose slurm if you are running on a cluster with slurm scheduler
if [ "$type" == 'local' ]; then
extra_args=${@:4:99}
else
quotatype=${4:-'auto'} # for slurm
partition=${5:-'1'} # for slurm
extra_args=${@:6:99}
quotatype=spot
partition=YOUR_PARTITION
extra_args=${@:4:99}
fi
name=${name/#configs/logs}
name=${name//.sh//$exp}
work_dir="${name}"
now=$(date +"%Y%m%d_%H%M%S")
mkdir -p $work_dir
ncpu='4'
if [ "$quotatype" == 'reserved_normal' ]; then
quotatype='reserved --phx-priority=${gpu} normal'
fi
if [ "$type" == 'local' ]; then
ava_path=/mnt/afs/xswu/datasets/AVA/images
local_data_path=/mnt/afs/xswu/datasets/preference
local_ava_path=/mnt/afs/xswu/datasets/AVA
local_simulacra_path=/mnt/afs/xswu/datasets/simulacra
local_region_path=/mnt/afs/xswu/datasets/regional_dataset
local_ranking_path=/mnt/afs/xswu/datasets/HPDv2
local_benchmark_path=/mnt/afs/xswu/datasets/benchmark
local_ImageReward_path=/mnt/afs/xswu/datasets/ImageReward
local_pap_path=/mnt/afs/xswu/datasets/PAP
header="torchrun --nproc_per_node=${gpu} --nnodes=1 --max_restarts=3 -m src.training.main "
else
data_path=s3://preference_images/
ava_path=s3://AVA/
simulacra_path=s3://simulacra/
region_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/regional_dataset/
local_data_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/human_preference
local_ava_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/AVA
local_simulacra_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/simulacra
local_region_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/regional_dataset
local_ranking_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/ranking_dataset
local_benchmark_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/benchmark
local_ImageReward_path=/mnt/lustre/wuxiaoshi1.vendor/datasets/ImageReward
header="srun --async --partition=$partition -n${gpu} --mpi=pmi2 --gres=gpu:$gpu --ntasks-per-node=${gpu} --quotatype=$quotatype \
--job-name=$exp --cpus-per-task=$ncpu --kill-on-bad-exit=1 -o local.out python -m src.training.main "
fi
|