# Using a standard Ubuntu base image FROM ubuntu:22.04 # Environment variables ENV CACHE_SIZE=16000 ENV MODEL_NAME="Qwen-7B-gguf" ENV MODEL_FILE="qwen7b-q4_0.gguf" ENV MODEL_USER="MatrixStudio" ENV DEFAULT_MODEL_BRANCH="main" ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${MODEL_FILE}" # Installing necessary packages RUN apt-get update && apt-get upgrade -y \ && apt-get install -y build-essential python3 python3-pip wget curl git \ --no-install-recommends \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Setting up the working directory WORKDIR /app # Cloning the llama.cpp repository RUN git clone https://github.com/ggerganov/llama.cpp.git # Moving to the llama.cpp directory and building the project WORKDIR /app/llama.cpp RUN make # List contents to verify the location of the server executable RUN ls -la # Model download process RUN mkdir -p models/7B && \ wget -O models/7B/${MODEL_FILE} ${MODEL_URL} # Changing ownership to a non-root user RUN useradd -m -u 1000 user && chown -R user:user /app USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH # Setting up the application EXPOSE 8080 # Adjust the CMD to use the absolute path of the server executable CMD ["/app/llama.cpp/server", "-m", "/app/llama.cpp/models/7B/${MODEL_FILE}", "-c", "16000"]