Spaces:
Running on Zero
Running on Zero
| # Launch DramaBox IC-LoRA training. Wraps src/train.py with accelerate. | |
| # Usage: | |
| # ./scripts/train.sh --config configs/training_args.yaml --gpus 2,3,4,5,6 | |
| set -e | |
| CONFIG="" | |
| GPUS=${GPUS:-0,1,2,3,4,5,6,7} | |
| NUM_PROCS=${NUM_PROCS:-} | |
| TRAIN_VAL_GPU=${TRAIN_VAL_GPU:-} | |
| EXTRA_ARGS="" | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| --config) CONFIG="$2"; shift 2;; | |
| --gpus) GPUS="$2"; shift 2;; | |
| --num-procs) NUM_PROCS="$2"; shift 2;; | |
| --train-val-gpu) TRAIN_VAL_GPU="$2"; shift 2;; | |
| *) EXTRA_ARGS+=" $1"; shift;; | |
| esac | |
| done | |
| if [[ -z "$CONFIG" ]]; then | |
| echo "Usage: $0 --config <yaml> [--gpus 2,3,4,5,6] [--num-procs N] [--train-val-gpu N]" | |
| exit 1 | |
| fi | |
| REPO="$(cd "$(dirname "$0")/.." && pwd)" | |
| PYTHON="${PYTHON:-/usr/bin/env python}" | |
| # Default num_procs to gpu count if unset. | |
| if [[ -z "$NUM_PROCS" ]]; then | |
| NUM_PROCS=$(awk -F',' '{print NF}' <<< "$GPUS") | |
| fi | |
| # Convert YAML config -> CLI args (accepts a flat dict mapping arg-name -> value | |
| # and a `data_dir` / `speaker_index` list). | |
| get() { "$PYTHON" -c "import yaml,sys; c=yaml.safe_load(open('$CONFIG')); v=c.get('$1', '$2'); print(v if not isinstance(v,(list,tuple)) else ' '.join(map(str,v)))"; } | |
| DATA_DIRS=$(get data_dir "") | |
| SPK_IDX=$(get speaker_index "") | |
| OUT_DIR=$(get output_dir "tts_iclora_v1") | |
| [[ "$OUT_DIR" != /* ]] && OUT_DIR="$REPO/$OUT_DIR" | |
| CKPT=$(get checkpoint "$REPO/ltx-2.3-22b-dev.safetensors") | |
| FULL_CKPT=$(get full_checkpoint "$REPO/ltx-2.3-22b-dev.safetensors") | |
| BASE_MODEL=$(get base_model dev) | |
| LORA_RANK=$(get lora_rank 128) | |
| LORA_ALPHA=$(get lora_alpha 128) | |
| LORA_DROPOUT=$(get lora_dropout 0.0) | |
| RESUME_LORA=$(get resume_lora "") | |
| [[ -n "$RESUME_LORA" && "$RESUME_LORA" != /* ]] && RESUME_LORA="$REPO/$RESUME_LORA" | |
| REF_RATIO=$(get ref_ratio 0.3) | |
| MAX_REF=$(get max_ref_tokens 200) | |
| TEXT_DROP=$(get text_dropout 0.4) | |
| STEPS=$(get steps 10000) | |
| LR=$(get lr 0.0001) | |
| SCHED=$(get lr_scheduler cosine) | |
| BATCH=$(get batch_size 1) | |
| GRAD_ACC=$(get grad_accum 4) | |
| GRAD_NORM=$(get max_grad_norm 1.0) | |
| SAVE_EVERY=$(get save_every 500) | |
| LOG_EVERY=$(get log_every 50) | |
| SEED=$(get seed 53) | |
| WARMUP=$(get warmup_steps 500) | |
| VAL_CFG=$(get val_config "") | |
| [[ -n "$VAL_CFG" && "$VAL_CFG" != /* ]] && VAL_CFG="$REPO/configs/$VAL_CFG" | |
| mkdir -p "$OUT_DIR" | |
| CMD=( "$PYTHON" -u -m accelerate.commands.launch | |
| --num_processes="$NUM_PROCS" --mixed_precision=bf16 | |
| "$REPO/src/train.py" | |
| --data-dir $DATA_DIRS | |
| --speaker-index $SPK_IDX | |
| --output-dir "$OUT_DIR" | |
| --checkpoint "$CKPT" --full-checkpoint "$FULL_CKPT" --base-model "$BASE_MODEL" | |
| --lora-rank "$LORA_RANK" --lora-alpha "$LORA_ALPHA" --lora-dropout "$LORA_DROPOUT" | |
| --ref-ratio "$REF_RATIO" --max-ref-tokens "$MAX_REF" --text-dropout "$TEXT_DROP" | |
| --steps "$STEPS" --lr "$LR" --lr-scheduler "$SCHED" | |
| --batch-size "$BATCH" --grad-accum "$GRAD_ACC" --max-grad-norm "$GRAD_NORM" | |
| --save-every "$SAVE_EVERY" --log-every "$LOG_EVERY" --seed "$SEED" | |
| --warmup-steps "$WARMUP" ) | |
| [[ -n "$RESUME_LORA" ]] && CMD+=( --resume-lora "$RESUME_LORA" ) | |
| [[ -n "$VAL_CFG" ]] && CMD+=( --val-config "$VAL_CFG" ) | |
| CMD+=( $EXTRA_ARGS ) | |
| LAUNCH_ENV=( "CUDA_VISIBLE_DEVICES=$GPUS" ) | |
| [[ -n "$TRAIN_VAL_GPU" ]] && LAUNCH_ENV+=( "TRAIN_VAL_GPU=$TRAIN_VAL_GPU" ) | |
| echo "==== launching DramaBox training ====" | |
| echo " GPUs: $GPUS (procs: $NUM_PROCS)" | |
| echo " out: $OUT_DIR" | |
| echo " ckpt: $CKPT" | |
| echo " steps: $STEPS lr: $LR $SCHED warmup: $WARMUP" | |
| echo "======================================" | |
| env "${LAUNCH_ENV[@]}" "${CMD[@]}" | |