VisionDoc-RAG-Backend / Dockerfile
lucioY250's picture
Update Dockerfile
07f12e1 verified
### Dockerfile for VisionDoc RAG - Hugging Face Complete ###
FROM python:3.11-slim-bookworm
# --- Env Config ---
ENV MPLCONFIGDIR=/tmp/matplotlib
ENV FONTCONFIG_PATH=/etc/fonts
ENV XDG_CACHE_HOME=/tmp/.cache
ENV NLTK_DATA=/app/nltk_data
ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface
ENV HF_HOME=/tmp/.cache/huggingface
ENV SENTENCE_TRANSFORMERS_HOME=/tmp/.cache/sentence_transformers
ENV FONTCONFIG_FILE=/tmp/fonts.conf
ENV PERSIST_DIR=/tmp/chroma_store
# --- System Dependencies ---
RUN apt-get update && apt-get install -y --no-install-recommends \
poppler-utils \
tesseract-ocr \
libgl1-mesa-glx \
fontconfig \
&& rm -rf /var/lib/apt/lists/*
# --- Working Directory ---
WORKDIR /app
# --- Python Dependencies ---
COPY server/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# --- Pre-download NLTK Data ---
RUN mkdir -p /app/nltk_data && \
python -c "import nltk; nltk.download('punkt', download_dir='/app/nltk_data'); nltk.download('averaged_perceptron_tagger', download_dir='/app/nltk_data'); nltk.download('averaged_perceptron_tagger_eng', download_dir='/app/nltk_data')"
# --- Pre-download HuggingFace Model with proper permissions ---
RUN python -c "import os, sys; print('Setting up cache directories...'); os.makedirs('/tmp/.cache/huggingface/hub', exist_ok=True); os.makedirs('/tmp/.cache/sentence_transformers', exist_ok=True); os.chmod('/tmp/.cache', 0o777); os.chmod('/tmp/.cache/huggingface', 0o777); os.chmod('/tmp/.cache/huggingface/hub', 0o777); os.chmod('/tmp/.cache/sentence_transformers', 0o777); print('Downloading BAAI/bge-m3 model...'); exec('from sentence_transformers import SentenceTransformer; model = SentenceTransformer(\"BAAI/bge-m3\", cache_folder=\"/tmp/.cache/sentence_transformers\"); print(\"BAAI/bge-m3 model downloaded successfully\")')"
RUN ln -s /app/nltk_data /nltk_data
# --- Copy Application Code ---
COPY ./server /app
# --- Create all necessary directories with proper permissions ---
RUN mkdir -p /app/uploaded_pdfs /app/static /app/temp /tmp/uploads /tmp/static \
/tmp/.cache/huggingface/hub /tmp/.cache/sentence_transformers \
/tmp/matplotlib /tmp/fonts /app/chroma_store /tmp/chroma_store && \
chmod -R 777 /app/uploaded_pdfs /app/static /app/temp /tmp/uploads /tmp/static \
/tmp/.cache /tmp/matplotlib /tmp/fonts /app/chroma_store /tmp/chroma_store && \
echo '<?xml version="1.0"?><fontconfig><cachedir>/tmp/fonts</cachedir></fontconfig>' > /tmp/fonts.conf
# --- Expose Port ---
EXPOSE 8000
# --- Run Command ---
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]