Spaces:

MatrixStudio
/

Qwen-7B-gguf-API

Sleeping

Qwen-7B-gguf-API / Dockerfile

Create Dockerfile

4b621cf verified 12 months ago

1.36 kB

	# Using a standard Ubuntu base image
	FROM ubuntu:22.04

	# Environment variables
	ENV CACHE_SIZE=16000
	ENV MODEL_NAME="Qwen-7B-gguf"
	ENV MODEL_FILE="qwen7b-q4_0.gguf"
	ENV MODEL_USER="MatrixStudio"
	ENV DEFAULT_MODEL_BRANCH="main"
	ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${MODEL_FILE}"

	# Installing necessary packages
	RUN apt-get update && apt-get upgrade -y \
	&& apt-get install -y build-essential python3 python3-pip wget curl git \
	--no-install-recommends \
	&& apt-get clean && rm -rf /var/lib/apt/lists/*

	# Setting up the working directory
	WORKDIR /app

	# Cloning the llama.cpp repository
	RUN git clone https://github.com/ggerganov/llama.cpp.git

	# Moving to the llama.cpp directory and building the project
	WORKDIR /app/llama.cpp
	RUN make

	# List contents to verify the location of the server executable
	RUN ls -la

	# Model download process
	RUN mkdir -p models/7B && \
	wget -O models/7B/${MODEL_FILE} ${MODEL_URL}

	# Changing ownership to a non-root user
	RUN useradd -m -u 1000 user && chown -R user:user /app
	USER user
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH

	# Setting up the application
	EXPOSE 8080

	# Adjust the CMD to use the absolute path of the server executable
	CMD ["/app/llama.cpp/server", "-m", "/app/llama.cpp/models/7B/${MODEL_FILE}", "-c", "16000"]