Spaces:
Sleeping
Sleeping
File size: 1,290 Bytes
4b621cf 859ea12 4b621cf 3fda329 85ab4ac 4b621cf 0e486fd 1d2a4c2 4b621cf 43b8c19 274c38a 4b621cf 859ea12 4b621cf 859ea12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
FROM ubuntu:22.04
ENV MODEL_NAME="Qwen-7B-gguf"
ENV MODEL_FILE="qwen7b-q4_0.gguf"
ENV MODEL_USER="MatrixStudio"
ENV DEFAULT_MODEL_BRANCH="main"
ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${MODEL_FILE}"
RUN apt-get update && apt-get upgrade -y && apt-get install -y build-essential python3 python3-pip wget curl git
WORKDIR /app
COPY ./requirements.txt /app/requirements.txt
RUN pip install --upgrade pip && pip install --no-cache-dir --upgrade -r /app/requirements.txt
RUN git clone https://github.com/ggerganov/llama.cpp.git
WORKDIR /app/llama.cpp
RUN make
RUN mkdir -p models/7B && wget -O models/7B/${MODEL_FILE} ${MODEL_URL}
RUN useradd -m -u 1000 user && chown -R user:user /app
USER user
ENV HOME=/home/user
ENV PATH=/home/user/.local/bin:$PATH
RUN chmod -R 777 /app/llama.cpp
EXPOSE 7860
# Adjust the CMD to use the absolute path of the server executable
# CMD ["/app/llama.cpp/server", "-m", "/app/llama.cpp/models/7B/qwen7b-q4_0.gguf", "-c", "16000", "--host", "0.0.0.0", "--port", "7860"]
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/llama.cpp/models/7B/qwen7b-q4_0.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_threads", "16"]
|