PhysioJEPA / scripts /pod_bootstrap.sh
guychuk's picture
Upload folder using huggingface_hub
31e2456 verified
#!/usr/bin/env bash
# Run on the RunPod pod. Args: <model_letter A|B|C|F> <run_name>
set -euo pipefail
MODEL="${1:?model letter required}"
RUN_NAME="${2:?run name required}"
echo "[bootstrap] model=$MODEL run=$RUN_NAME"
cd /workspace
REPO_DIR=""
for d in PhysioJEPA physiojepa; do
if [ -d "$d" ]; then REPO_DIR="$d"; break; fi
done
[ -n "$REPO_DIR" ] || { echo "no repo dir found at /workspace/{PhysioJEPA,physiojepa}"; exit 1; }
cd "$REPO_DIR"
# Use the image's system Python (already has torch 2.4.1+cu124 wired up).
# Install only the extras we need into the system site-packages.
PY=/usr/bin/python3
$PY -m pip install --quiet --upgrade pip
$PY -m pip install --quiet \
'datasets>=4.8.4' 'einops>=0.8.2' 'matplotlib>=3.10.0' \
'neurokit2>=0.2.13' 'python-dotenv>=1.0' 'pyyaml>=6.0' \
'scikit-learn>=1.5' 'scipy>=1.13' 'tqdm>=4.66' \
'wandb>=0.18' 'wfdb>=4.3.1' 'huggingface_hub>=0.25' 'requests'
RUN_PY="$PY"
# Stage env keys (the launcher will have written /workspace/.env into the pod via send)
if [ -f /workspace/.env ]; then
cp /workspace/.env .env
fi
# Step 1: prepare data (idempotent)
if [ ! -f /workspace/cache/mimic_index.json ]; then
echo "[bootstrap] downloading MIMIC shards + building index"
PYTHONPATH=src $RUN_PY scripts/prepare_data.py \
--root /workspace/cache/mimic \
--index /workspace/cache/mimic_index.json
fi
# write shard_roots json for trainer
PYTHONPATH=src $RUN_PY -c "
import json, pathlib
roots = sorted([str(p) for p in pathlib.Path('/workspace/cache/mimic').glob('shard_*')
if (p / 'dataset_info.json').exists()])
pathlib.Path('/workspace/cache/shard_roots.json').write_text(json.dumps(roots))
print('shards:', len(roots))
"
# Step 2: train
echo "[bootstrap] launching training: model=$MODEL"
PYTHONPATH=src PYTHONUNBUFFERED=1 $RUN_PY -u scripts/train.py \
--config configs/base.yaml \
--model "$MODEL" \
--run_name "$RUN_NAME" \
--epochs 25 \
--shard_roots_json /workspace/cache/shard_roots.json \
--index_path /workspace/cache/mimic_index.json \
--output_dir /workspace/runs \
--num_workers 8 \
--subset_frac 0.10 \
--log_every 25 \
2>&1 | tee "/workspace/runs/${RUN_NAME}.log"
echo "[bootstrap] done"