Spaces:

imperialwool
/

llama-cpp-api

Sleeping

File size: 1,131 Bytes

e3396ba
 
85f179c
e3396ba
85f179c
 
e3396ba
 
7fd3f9f
85f179c
4966618
 
 
 
 
 
e3396ba
 
6c6b28d
1fb7f0b
7fd3f9f
85f179c
e3396ba
85f179c
 
 
e3396ba
d8751a6

# Loading base. I'm using Debian, u can use whatever u want.
FROM python:3.11.5-slim-bookworm

# Just for sure everything will be fine.
USER root

# Installing gcc compiler and main library.
RUN apt update && apt install gcc cmake build-essential -y
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python

# Copying files into folder and making it working dir.
RUN mkdir app
COPY . /app
RUN chmod -R 777 /app
WORKDIR /app

# Installing wget and downloading model.
RUN apt install wget -y
RUN wget -q -O model.bin https://huggingface.co/lucianosb/llama-2-7b-langchain-chat-GGUF/resolve/main/llama-2-7b-langchain-chat-q8_0.gguf
RUN ls
# You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".

# Updating pip and installing everything from requirements
RUN python3 -m pip install -U --no-cache-dir pip setuptools wheel
RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt

# Now it's time to run Quart app using uvicorn! (It's faster, trust me.)
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]