# ---------------------------------------------------------
# STAGE 1: Grab the official, high-speed Llama.cpp server
# ---------------------------------------------------------
FROM ghcr.io/ggerganov/llama.cpp:server AS builder

# ---------------------------------------------------------
# STAGE 2: Build a secure environment for Hugging Face
# ---------------------------------------------------------
FROM ubuntu:22.04

# Install network certificates so the server can download your model safely
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/*

# Create the exact security user Hugging Face requires (UID 1000)
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user
WORKDIR $HOME/app

# Copy the server executable from Stage 1 into our secure environment
COPY --from=builder /llama-server $HOME/app/llama-server

# Open the communication port
EXPOSE 7860

# Launch! It natively downloads Atomight and starts the OpenAI-compatible API
CMD ["./llama-server", "--hf-repo", "NovatasticRoScript/atomight-1.5b-inference-Q4_K_M-GGUF", "--hf-file", "atomight-1.5b-inference.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "--ctx-size", "2048", "--api-key", "free_token_99"]