Spaces:

TeamGenKI
/

LLMServer

Paused

LLMServer / Dockerfile

Added gitignore and fixed Dockerfile.

2bd30ac 4 months ago

1.3 kB

	# Use Python 3.10 as base image for better compatibility with ML libraries
	FROM python:3.10-slim

	# Set working directory
	WORKDIR /app

	# Install git and required system dependencies
	RUN apt-get update && \
	apt-get install -y git && \
	apt-get clean && \
	rm -rf /var/lib/apt/lists/*

	# Copy requirements first to leverage Docker cache
	COPY requirements.txt .

	# Install Python dependencies
	RUN pip install --no-cache-dir -r requirements.txt

	# Copy the rest of the application
	COPY . .

	# Create checkpoints directory
	RUN mkdir -p /app/checkpoints

	# The token will be passed during build time
	ARG HF_TOKEN
	ENV HF_TOKEN=${HF_TOKEN}

	# Download the Llama 2 model using litgpt
	# Only proceed if HF_TOKEN is provided
	RUN if [ -n "$HF_TOKEN" ]; then \
	python -c "from huggingface_hub import login; from litgpt.cli import download; login('${HF_TOKEN}'); download('meta-llama/Llama-2-3b-chat-hf', '/app/checkpoints')"; \
	else \
	echo "No Hugging Face token provided. Model will need to be downloaded separately."; \
	fi

	# Set environment variables
	ENV LLM_ENGINE_HOST=0.0.0.0
	ENV LLM_ENGINE_PORT=8001
	ENV MODEL_PATH=/app/checkpoints/meta-llama/Llama-2-3b-chat-hf

	# Expose the port the app runs on
	EXPOSE 8001

	# Command to run the application
	CMD ["python", "main/main.py"]