llama-2-gguf / Dockerfile
md-vasim's picture
app file
771e453
raw
history blame
523 Bytes
FROM python:3.9
WORKDIR /code
# Install huggingface-cli
RUN pip install --no-cache-dir huggingface_hub
ENV REPO=TheBloke/Llama-2-7B-Chat-GGUF
ENV MODEL_NAME=llama-2-7b-chat.Q5_K_M.gguf
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
COPY . .
# Download the model
RUN huggingface-cli download \
${REPO} \
${MODEL_NAME} \
--local-dir . \
--local-dir-use-symlinks False
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]