Spaces:
Running on A10G
Running on A10G
| # Use CUDA 12.1 base image | |
| FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 | |
| # Avoid prompts | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # Install Python 3.11 and other essentials | |
| RUN apt-get update && apt-get install -y \ | |
| python3.11 \ | |
| python3-pip \ | |
| python3.11-dev \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Set python3.11 as default python | |
| RUN ln -s /usr/bin/python3.11 /usr/bin/python | |
| WORKDIR /app | |
| # Upgrade pip | |
| RUN pip install --no-cache-dir -U pip setuptools wheel | |
| # Install PyTorch with CUDA 12.1 support | |
| RUN pip install --no-cache-dir \ | |
| torch==2.4.0 \ | |
| triton \ | |
| xformers \ | |
| --index-url https://download.pytorch.org/whl/cu121 | |
| # Install Unsloth and let it resolve its own compatible TRL/PEFT stack. | |
| RUN pip install --no-cache-dir \ | |
| "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" \ | |
| datasets \ | |
| wandb \ | |
| matplotlib \ | |
| fastapi \ | |
| uvicorn \ | |
| pydantic | |
| # Copy the project files | |
| COPY . . | |
| # Install the local package in editable mode | |
| RUN pip install -e . | |
| # Make scripts executable | |
| RUN chmod +x scripts/*.py | |
| # Set environment variables | |
| ENV MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct" | |
| ENV OUTPUT_DIR="outputs/commitguard-llama-3b-grpo" | |
| ENV WANDB_PROJECT="commitguard" | |
| # Default command: Run training and push to Hub | |
| # Note: HF_TOKEN and WANDB_API_KEY should be set as Space Secrets | |
| CMD ["python", "scripts/train_grpo.py", "--samples", "200", "--max-steps", "300", "--push-to-hub"] | |