Spaces:
Paused
Paused
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # setup.sh β Medical VQA Environment Setup | |
| # HỠtrợ: Vast.ai (CUDA), Google Colab, local macOS (CPU/MPS) | |
| # | |
| # CΓ‘ch dΓΉng: | |
| # chmod +x setup.sh && bash setup.sh | |
| # bash setup.sh --colab # Google Colab mode (skip git config) | |
| # bash setup.sh --offline # Offline mode (khΓ΄ng sync WandB) | |
| # bash setup.sh --skip-nltk # Bα» qua download NLTK data | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| set -euo pipefail | |
| # ββ Parse flags ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COLAB_MODE=0 | |
| OFFLINE_MODE=0 | |
| SKIP_NLTK=0 | |
| for arg in "$@"; do | |
| case $arg in | |
| --colab) COLAB_MODE=1 ;; | |
| --offline) OFFLINE_MODE=1 ;; | |
| --skip-nltk) SKIP_NLTK=1 ;; | |
| esac | |
| done | |
| # ββ Colors βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m' | |
| info() { echo -e "${GREEN}[INFO]${NC} $*"; } | |
| warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } | |
| error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; } | |
| echo "" | |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" | |
| echo " π₯ Medical VQA β Environment Setup" | |
| echo " Project: DL Final 523H0173 & 523H0178" | |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" | |
| echo "" | |
| # ββ 1. Python version check ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| PYTHON=$(command -v python3 || command -v python) | |
| PY_VER=$($PYTHON --version 2>&1 | grep -oP '\d+\.\d+') | |
| PY_MAJOR=$(echo $PY_VER | cut -d. -f1) | |
| PY_MINOR=$(echo $PY_VER | cut -d. -f2) | |
| info "Python $PY_VER tαΊ‘i: $($PYTHON -c 'import sys; print(sys.executable)')" | |
| if [ "$PY_MAJOR" -lt 3 ] || { [ "$PY_MAJOR" -eq 3 ] && [ "$PY_MINOR" -lt 10 ]; }; then | |
| error "CαΊ§n Python β₯ 3.10 (hiα»n tαΊ‘i: $PY_VER)" | |
| fi | |
| # ββ 2. GPU detection βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CUDA_AVAILABLE=$($PYTHON -c "import torch; print(torch.cuda.is_available())" 2>/dev/null || echo "False") | |
| if [ "$CUDA_AVAILABLE" = "True" ]; then | |
| GPU_NAME=$($PYTHON -c "import torch; print(torch.cuda.get_device_name(0))" 2>/dev/null || echo "Unknown") | |
| VRAM=$($PYTHON -c "import torch; print(round(torch.cuda.get_device_properties(0).total_memory/1e9,1))" 2>/dev/null || echo "?") | |
| info "GPU: $GPU_NAME | VRAM: ${VRAM}GB" | |
| else | |
| warn "KhΓ΄ng phΓ‘t hiα»n CUDA GPU β training sαΊ½ rαΊ₯t chαΊm trΓͺn CPU" | |
| fi | |
| # ββ 3. Install pip packages ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| info "CΓ i ΔαΊ·t dependencies tα»« requirements.txt..." | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| REQ_FILE="$SCRIPT_DIR/requirements.txt" | |
| if [ ! -f "$REQ_FILE" ]; then | |
| error "KhΓ΄ng tΓ¬m thαΊ₯y $REQ_FILE" | |
| fi | |
| # NΓ’ng pip trΖ°α»c | |
| $PYTHON -m pip install --upgrade pip --quiet | |
| # CΓ i main requirements (quiet Δα» giαΊ£m noise) | |
| $PYTHON -m pip install -r "$REQ_FILE" --quiet || { | |
| warn "CΓ i ΔαΊ·t silent thαΊ₯t bαΊ‘i, thα» vα»i verbose..." | |
| $PYTHON -m pip install -r "$REQ_FILE" | |
| } | |
| # wandb (cαΊ§n version chΓnh xΓ‘c) | |
| $PYTHON -m pip install "wandb>=0.16.0" --quiet | |
| info "β Dependencies ΔΓ£ cΓ i xong" | |
| # ββ 4. NLTK data download βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if [ "$SKIP_NLTK" -eq 0 ]; then | |
| info "TαΊ£i NLTK data (punkt, wordnet)..." | |
| $PYTHON -c " | |
| import nltk | |
| import ssl | |
| try: | |
| _create_unverified_https_context = ssl._create_unverified_context | |
| except AttributeError: | |
| pass | |
| else: | |
| ssl._create_default_https_context = _create_unverified_https_context | |
| for pkg in ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']: | |
| try: | |
| nltk.download(pkg, quiet=True) | |
| except Exception as e: | |
| print(f' [WARN] NLTK {pkg}: {e}') | |
| print(' NLTK data OK') | |
| " | |
| fi | |
| # ββ 5. Python path configuration βββββββββββββββββββββββββββββββββββββββββββββ | |
| info "CαΊ₯u hΓ¬nh Python path..." | |
| # TαΊ‘o .pth file Δα» Python tα»± Δα»ng thΓͺm project root vΓ o sys.path | |
| SITE_PACKAGES=$($PYTHON -c "import site; print(site.getsitepackages()[0])" 2>/dev/null || \ | |
| $PYTHON -c "import site; print(site.getusersitepackages())") | |
| PTH_FILE="$SITE_PACKAGES/medical_vqa.pth" | |
| echo "$SCRIPT_DIR" > "$PTH_FILE" && \ | |
| info "β Path cαΊ₯u hΓ¬nh tαΊ‘i: $PTH_FILE" || \ | |
| warn "KhΓ΄ng thα» ghi vΓ o site-packages, thα» export PYTHONPATH thα»§ cΓ΄ng." | |
| # CΕ©ng export PYTHONPATH trong session hiα»n tαΊ‘i | |
| export PYTHONPATH="$SCRIPT_DIR:${PYTHONPATH:-}" | |
| info "PYTHONPATH = $PYTHONPATH" | |
| # ββ 6. .env file βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ENV_FILE="$SCRIPT_DIR/.env" | |
| ENV_EXAMPLE="$SCRIPT_DIR/.env.example" | |
| if [ ! -f "$ENV_FILE" ] && [ -f "$ENV_EXAMPLE" ]; then | |
| cp "$ENV_EXAMPLE" "$ENV_FILE" | |
| warn "ΔΓ£ tαΊ‘o .env tα»« .env.example β HΓ£y Δiα»n WANDB_API_KEY!" | |
| fi | |
| if [ -f "$ENV_FILE" ]; then | |
| # Source .env (bα» qua comment vΓ dΓ²ng trα»ng) | |
| set -a | |
| source <(grep -v '^\s*#' "$ENV_FILE" | grep -v '^\s*$') 2>/dev/null || true | |
| set +a | |
| info ".env ΔΓ£ Δược load" | |
| fi | |
| # ββ 7. WandB login βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if [ "$OFFLINE_MODE" -eq 1 ]; then | |
| export WANDB_MODE=offline | |
| info "WandB: OFFLINE mode (sync sau bαΊ±ng: wandb sync)" | |
| elif [ -n "${WANDB_API_KEY:-}" ]; then | |
| $PYTHON -m wandb login "$WANDB_API_KEY" --relogin --quiet 2>/dev/null && \ | |
| info "β WandB logged in (entity: SpringWang08)" || \ | |
| warn "WandB login thαΊ₯t bαΊ‘i β kiα»m tra WANDB_API_KEY" | |
| else | |
| warn "WANDB_API_KEY chΖ°a Δược set β WandB sαΊ½ bα» bα» qua khi training" | |
| warn " Set bαΊ±ng: export WANDB_API_KEY=your_key" | |
| warn " HoαΊ·c Δiα»n vΓ o file .env" | |
| fi | |
| # ββ 8. HuggingFace login βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if [ -n "${HF_TOKEN:-}" ]; then | |
| $PYTHON -c "from huggingface_hub import login; login(token='${HF_TOKEN}', add_to_git_credential=False)" 2>/dev/null && \ | |
| info "β HuggingFace logged in" || \ | |
| warn "HF login thαΊ₯t bαΊ‘i β dataset cΓ΄ng khai vαΊ«n tαΊ£i Δược" | |
| else | |
| warn "HF_TOKEN chΖ°a Δược set (khΓ΄ng cαΊ§n nαΊΏu dataset lΓ public)" | |
| fi | |
| # ββ 9. TαΊ‘o thΖ° mα»₯c cαΊ§n thiαΊΏt βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| info "TαΊ‘o thΖ° mα»₯c dα»± Γ‘n..." | |
| for dir in checkpoints logs/history results/charts data scripts; do | |
| mkdir -p "$SCRIPT_DIR/$dir" | |
| done | |
| info "β ThΖ° mα»₯c sαΊ΅n sΓ ng" | |
| # ββ 10. Smoke test import βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| info "Kiα»m tra imports..." | |
| $PYTHON - <<'PYEOF' | |
| import sys, importlib | |
| ok, fail = [], [] | |
| checks = [ | |
| ("torch", "PyTorch"), | |
| ("torchvision", "TorchVision"), | |
| ("transformers", "Transformers"), | |
| ("datasets", "HF Datasets"), | |
| ("peft", "PEFT (LoRA)"), | |
| ("trl", "TRL (SFT/DPO)"), | |
| ("wandb", "WandB"), | |
| ("nltk", "NLTK"), | |
| ("bert_score", "BERTScore"), | |
| ("rouge_score", "ROUGE"), | |
| ("sklearn", "Scikit-learn"), | |
| ("matplotlib", "Matplotlib"), | |
| ("yaml", "PyYAML"), | |
| ("dotenv", "python-dotenv"), | |
| ("cv2", "OpenCV"), | |
| ] | |
| for mod, name in checks: | |
| try: | |
| importlib.import_module(mod) | |
| ok.append(name) | |
| except ImportError: | |
| fail.append(name) | |
| print(f" β OK ({len(ok)}): {', '.join(ok)}") | |
| if fail: | |
| print(f" β MISSING ({len(fail)}): {', '.join(fail)}") | |
| sys.exit(1) | |
| PYEOF | |
| # ββ 11. Kiα»m tra src modules βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| info "Kiα»m tra src modules..." | |
| $PYTHON - <<'PYEOF' | |
| import sys | |
| checks = [ | |
| "src.models.medical_vqa_model", | |
| "src.models.transformer_decoder", | |
| "src.engine.trainer", | |
| "src.engine.medical_eval", | |
| "src.data.medical_dataset", | |
| "src.utils.text_utils", | |
| "src.utils.translator", | |
| ] | |
| ok, fail = [], [] | |
| for mod in checks: | |
| try: | |
| __import__(mod) | |
| ok.append(mod.split(".")[-1]) | |
| except Exception as e: | |
| fail.append(f"{mod.split('.')[-1]} ({e})") | |
| print(f" β src OK ({len(ok)}): {', '.join(ok)}") | |
| if fail: | |
| print(f" β src FAIL ({len(fail)}): {', '.join(fail)}") | |
| PYEOF | |
| # ββ Done βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| echo "" | |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" | |
| echo " β Setup hoΓ n tαΊ₯t!" | |
| echo "" | |
| echo " TiαΊΏp theo:" | |
| echo " export WANDB_API_KEY=your_key # nαΊΏu chΖ°a cΓ³" | |
| echo " python train_medical.py --variant A1" | |
| echo " python train_medical.py --variant A2" | |
| echo " python train_medical.py --variant B1" | |
| echo " python train_medical.py --variant B2" | |
| echo " python train_medical.py --variant DPO" | |
| echo "" | |
| echo " So sΓ‘nh 5 model sau khi train xong:" | |
| echo " python scripts/compare_models.py" | |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" | |
| echo "" | |