ricl / scripts /train_debug_interactive.sh
doanh25032004's picture
Add files using upload-large-folder tool
991941e verified
#!/bin/bash
# Interactive debug training (no SLURM - runs directly on allocated node)
cd /projects/extern/kisski/kisski-spath/dir.project/VLA_Groot/in_context_learning/ricl_openpi
export PYTHONPATH="/projects/extern/kisski/kisski-spath/dir.project/VLA_Groot/in_context_learning/ricl_openpi/src:$PYTHONPATH"
export HF_HOME="/projects/extern/kisski/kisski-spath/dir.project/VLA_Groot/hf_cache"
# Memory optimization for JAX/XLA
export XLA_PYTHON_CLIENT_PREALLOCATE=false
echo "=========================================="
echo "RICL Interactive Debug Training"
echo "=========================================="
echo "Config:"
echo " Steps: 500 (quick test)"
echo " Batch: 2 (reduced to fit single GPU memory)"
echo " Action Horizon: 15"
echo " Lambda: 10.0"
echo "=========================================="
python scripts/train_pi0_fast_ricl.py \
pi0_fast_ricl_libero \
--exp-name="ricl_libero_debug_interactive" \
--project-name="ricl_openpi" \
--libero-data-dir="/projects/extern/kisski/kisski-spath/dir.project/VLA_Groot/merged_libero_mask_depth_noops_lerobot_10" \
--libero-context-dir="rag/ricl_training_context_libero_10_test" \
--model.lamda=10.0 \
--model.use-action-interpolation \
--model.num-retrieved-observations=1 \
--model.action-horizon=15 \
--model.action-dim=7 \
--model.max-token-len=250 \
--batch-size=2 \
--num-train-steps=500 \
--save-interval=500 \
--log-interval=10 \
--lr-schedule.peak-lr=1e-4 \
--seed=42 \
--no-wandb-enabled \
--overwrite \
--no-resume
echo ""
echo "=========================================="
echo "Debug Training Complete!"
echo "=========================================="