| | #!/bin/bash |
| | |
| | |
| | |
| |
|
| | set -e |
| |
|
| | GREEN='\033[0;32m' |
| | RED='\033[0;31m' |
| | YELLOW='\033[1;33m' |
| | BLUE='\033[0;34m' |
| | NC='\033[0m' |
| |
|
| | print_success() { echo -e "${GREEN}β
${NC} $1"; } |
| | print_error() { echo -e "${RED}β${NC} $1"; } |
| | print_warning() { echo -e "${YELLOW}β οΈ${NC} $1"; } |
| | print_header() { echo -e "\n${BLUE}========== $1 ==========${NC}"; } |
| |
|
| | ERRORS=0 |
| |
|
| | print_header "Seriguela Setup Validation" |
| |
|
| | |
| | if [ -d "/home/ubuntu/seriguela" ]; then |
| | cd /home/ubuntu/seriguela |
| | elif [ -d "$(pwd)/seriguela" ]; then |
| | cd seriguela |
| | else |
| | cd . |
| | fi |
| |
|
| | print_header "1. Python Environment" |
| |
|
| | |
| | if python3 --version &> /dev/null; then |
| | PYTHON_VERSION=$(python3 --version) |
| | print_success "Python installed: $PYTHON_VERSION" |
| | else |
| | print_error "Python not found" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| |
|
| | |
| | if [ -d "venv" ]; then |
| | print_success "Virtual environment exists" |
| | source venv/bin/activate |
| | else |
| | print_error "Virtual environment not found" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| |
|
| | |
| | if pip --version &> /dev/null; then |
| | PIP_VERSION=$(pip --version | cut -d' ' -f2) |
| | print_success "pip version: $PIP_VERSION" |
| | else |
| | print_error "pip not found" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| |
|
| | print_header "2. Python Packages" |
| |
|
| | |
| | PACKAGES=( |
| | "transformers:Hugging Face Transformers" |
| | "torch:PyTorch" |
| | "wandb:Weights & Biases" |
| | "peft:Parameter-Efficient Fine-Tuning" |
| | "datasets:Hugging Face Datasets" |
| | ) |
| |
|
| | for pkg_info in "${PACKAGES[@]}"; do |
| | IFS=':' read -r pkg_name pkg_desc <<< "$pkg_info" |
| |
|
| | if python3 -c "import $pkg_name" &> /dev/null; then |
| | VERSION=$(python3 -c "import $pkg_name; print($pkg_name.__version__)" 2>/dev/null || echo "unknown") |
| | print_success "$pkg_desc ($pkg_name) - version $VERSION" |
| | else |
| | print_error "$pkg_desc ($pkg_name) not installed" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| | done |
| |
|
| | |
| | WANDB_VERSION=$(python3 -c "import wandb; print(wandb.__version__)" 2>/dev/null || echo "0.0.0") |
| | REQUIRED_VERSION="0.24.0" |
| |
|
| | if python3 << VERSIONCHECK |
| | import sys |
| | from packaging import version |
| | current = version.parse("$WANDB_VERSION") |
| | required = version.parse("$REQUIRED_VERSION") |
| | sys.exit(0 if current >= required else 1) |
| | VERSIONCHECK |
| | then |
| | print_success "Wandb version $WANDB_VERSION (>= $REQUIRED_VERSION required)" |
| | else |
| | print_warning "Wandb version $WANDB_VERSION is older than recommended $REQUIRED_VERSION" |
| | print_warning "New API key format (wandb_v1_...) requires Wandb >= 0.24.0" |
| | fi |
| |
|
| | print_header "3. Environment Variables" |
| |
|
| | |
| | if [ -f ".env" ]; then |
| | source <(grep -v '^#' .env | sed 's/^/export /') |
| | print_success ".env file loaded" |
| | else |
| | print_warning ".env file not found" |
| | fi |
| |
|
| | |
| | if [ -n "$HF_TOKEN" ]; then |
| | TOKEN_LEN=${#HF_TOKEN} |
| | print_success "HF_TOKEN set ($TOKEN_LEN characters)" |
| | else |
| | print_warning "HF_TOKEN not set (model won't be pushed to Hub)" |
| | fi |
| |
|
| | |
| | if [ -n "$WANDB_API_KEY" ]; then |
| | KEY_LEN=${#WANDB_API_KEY} |
| | print_success "WANDB_API_KEY set ($KEY_LEN characters)" |
| | else |
| | print_error "WANDB_API_KEY not set" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| |
|
| | print_header "4. GPU / CUDA" |
| |
|
| | |
| | if nvidia-smi &> /dev/null; then |
| | GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1) |
| | GPU_MEMORY=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader | head -1) |
| | print_success "GPU detected: $GPU_NAME ($GPU_MEMORY)" |
| | else |
| | print_error "GPU not detected (nvidia-smi failed)" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| |
|
| | |
| | if python3 -c "import torch; assert torch.cuda.is_available()" &> /dev/null; then |
| | CUDA_VERSION=$(python3 -c "import torch; print(torch.version.cuda)") |
| | GPU_COUNT=$(python3 -c "import torch; print(torch.cuda.device_count())") |
| | print_success "CUDA available: version $CUDA_VERSION ($GPU_COUNT GPU(s))" |
| | else |
| | print_error "CUDA not available in PyTorch" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| |
|
| | print_header "5. Wandb Authentication" |
| |
|
| | if [ -n "$WANDB_API_KEY" ]; then |
| | if python3 << WANDBCHECK |
| | import wandb |
| | import sys |
| | try: |
| | result = wandb.login(key="$WANDB_API_KEY", relogin=True) |
| | if result: |
| | print("Login successful") |
| | sys.exit(0) |
| | else: |
| | print("Login failed") |
| | sys.exit(1) |
| | except Exception as e: |
| | print(f"Error: {e}") |
| | sys.exit(1) |
| | WANDBCHECK |
| | then |
| | print_success "Wandb authentication successful" |
| |
|
| | |
| | WANDB_USER=$(python3 << 'GETUSER' |
| | import wandb |
| | try: |
| | api = wandb.Api() |
| | print(api.viewer.get("username", "unknown")) |
| | except: |
| | print("unknown") |
| | GETUSER |
| | ) |
| | print_success "Logged in as: $WANDB_USER" |
| | else |
| | print_error "Wandb authentication failed" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| | else |
| | print_warning "Skipping Wandb auth (no API key)" |
| | fi |
| |
|
| | print_header "6. HuggingFace Authentication" |
| |
|
| | if [ -n "$HF_TOKEN" ]; then |
| | if python3 << HFCHECK |
| | from huggingface_hub import HfApi |
| | import sys |
| | try: |
| | api = HfApi(token="$HF_TOKEN") |
| | user = api.whoami() |
| | print(f"Login successful: {user.get('name', 'unknown')}") |
| | sys.exit(0) |
| | except Exception as e: |
| | print(f"Error: {e}") |
| | sys.exit(1) |
| | HFCHECK |
| | then |
| | print_success "HuggingFace authentication successful" |
| | else |
| | print_error "HuggingFace authentication failed" |
| | ERRORS=$((ERRORS + 1)) |
| | fi |
| | else |
| | print_warning "Skipping HF auth (no token)" |
| | fi |
| |
|
| | print_header "7. Dataset Access" |
| |
|
| | |
| | if python3 << DATASETCHECK |
| | from datasets import load_dataset |
| | import sys |
| | try: |
| | # Quick test load (just get info, don't download) |
| | ds = load_dataset("augustocsc/sintetico_natural", split="train", streaming=True) |
| | print("Dataset accessible") |
| | sys.exit(0) |
| | except Exception as e: |
| | print(f"Error: {e}") |
| | sys.exit(1) |
| | DATASETCHECK |
| | then |
| | print_success "Dataset accessible: augustocsc/sintetico_natural" |
| | else |
| | print_warning "Could not verify dataset access (may require authentication)" |
| | fi |
| |
|
| | print_header "8. Scripts" |
| |
|
| | SCRIPTS=( |
| | "scripts/train.py" |
| | "scripts/evaluate.py" |
| | "scripts/generate.py" |
| | "scripts/aws/monitor_training_auto.sh" |
| | "scripts/aws/analyze_model.sh" |
| | ) |
| |
|
| | for script in "${SCRIPTS[@]}"; do |
| | if [ -f "$script" ]; then |
| | print_success "$script exists" |
| | else |
| | print_warning "$script not found" |
| | fi |
| | done |
| |
|
| | |
| | print_header "Validation Summary" |
| | echo "" |
| |
|
| | if [ $ERRORS -eq 0 ]; then |
| | echo -e "${GREEN}ββββββββββββββββββββββββββββββββββββββββ${NC}" |
| | echo -e "${GREEN}β β${NC}" |
| | echo -e "${GREEN}β β
ALL VALIDATIONS PASSED β
β${NC}" |
| | echo -e "${GREEN}β β${NC}" |
| | echo -e "${GREEN}β Ready for training! π β${NC}" |
| | echo -e "${GREEN}β β${NC}" |
| | echo -e "${GREEN}ββββββββββββββββββββββββββββββββββββββββ${NC}" |
| | echo "" |
| | echo "You can now run:" |
| | echo " python scripts/train.py --help" |
| | echo " bash scripts/aws/run_all_training.sh" |
| | echo "" |
| | exit 0 |
| | else |
| | echo -e "${RED}ββββββββββββββββββββββββββββββββββββββββ${NC}" |
| | echo -e "${RED}β β${NC}" |
| | echo -e "${RED}β β VALIDATION FAILED β β${NC}" |
| | echo -e "${RED}β β${NC}" |
| | echo -e "${RED}β $ERRORS error(s) found β${NC}" |
| | echo -e "${RED}β β${NC}" |
| | echo -e "${RED}ββββββββββββββββββββββββββββββββββββββββ${NC}" |
| | echo "" |
| | echo "Please fix the errors above before training." |
| | echo "" |
| | exit 1 |
| | fi |
| |
|