FROM python:3.9 WORKDIR /code # Install wget RUN apt-get update && apt-get install -y wget # Copy the requirements.txt file into the container at /code/requirements.txt COPY ./requirements.txt /code/requirements.txt # Install the Python packages specified in requirements.txt RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt # Set the environment variable for the transformers cache ENV TRANSFORMERS_CACHE=/code/cache/huggingface/transformers ENV HF_HOME=/code/cache/huggingface # Create the cache directory with correct permissions RUN mkdir -p /code/cache/huggingface/transformers && \ chmod -R 777 /code/cache/huggingface # Download the llama-2-7b-chat.ggmlv3.q8_0.bin model file into the container # Replace the URL below with the actual URL from where the model file should be downloaded RUN wget -O /code/llama-2-7b-chat.ggmlv3.q8_0.bin https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q8_0.bin # Copy the main.py script into the container COPY ./main.py /code/main.py COPY ./model_on_cli.py /code/model_on_cli.py COPY ./db_faiss /code/db_faiss # Set the command to run the Uvicorn server, serving the app defined in main.py CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]