# --------------------------------------------------------- # STAGE 1: Grab the official, high-speed Llama.cpp server # --------------------------------------------------------- FROM ghcr.io/ggerganov/llama.cpp:server AS builder # --------------------------------------------------------- # STAGE 2: Build a secure environment for Hugging Face # --------------------------------------------------------- FROM ubuntu:22.04 # Install network certificates so the server can download your model safely RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/* # Create the exact security user Hugging Face requires (UID 1000) RUN useradd -m -u 1000 user USER user ENV HOME=/home/user WORKDIR $HOME/app # Copy the server executable from Stage 1 into our secure environment COPY --from=builder /llama-server $HOME/app/llama-server # Open the communication port EXPOSE 7860 # Launch! It natively downloads Atomight and starts the OpenAI-compatible API CMD ["./llama-server", "--hf-repo", "NovatasticRoScript/atomight-1.5b-inference-Q4_K_M-GGUF", "--hf-file", "atomight-1.5b-inference.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "--ctx-size", "2048", "--api-key", "free_token_99"]