Spaces:
Sleeping
Sleeping
### Dockerfile for VisionDoc RAG - Hugging Face Complete ### | |
FROM python:3.11-slim-bookworm | |
# --- Env Config --- | |
ENV MPLCONFIGDIR=/tmp/matplotlib | |
ENV FONTCONFIG_PATH=/etc/fonts | |
ENV XDG_CACHE_HOME=/tmp/.cache | |
ENV NLTK_DATA=/app/nltk_data | |
ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface | |
ENV HF_HOME=/tmp/.cache/huggingface | |
ENV SENTENCE_TRANSFORMERS_HOME=/tmp/.cache/sentence_transformers | |
ENV FONTCONFIG_FILE=/tmp/fonts.conf | |
ENV PERSIST_DIR=/tmp/chroma_store | |
# --- System Dependencies --- | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
poppler-utils \ | |
tesseract-ocr \ | |
libgl1-mesa-glx \ | |
fontconfig \ | |
&& rm -rf /var/lib/apt/lists/* | |
# --- Working Directory --- | |
WORKDIR /app | |
# --- Python Dependencies --- | |
COPY server/requirements.txt . | |
RUN pip install --no-cache-dir -r requirements.txt | |
# --- Pre-download NLTK Data --- | |
RUN mkdir -p /app/nltk_data && \ | |
python -c "import nltk; nltk.download('punkt', download_dir='/app/nltk_data'); nltk.download('averaged_perceptron_tagger', download_dir='/app/nltk_data'); nltk.download('averaged_perceptron_tagger_eng', download_dir='/app/nltk_data')" | |
# --- Pre-download HuggingFace Model with proper permissions --- | |
RUN python -c "import os, sys; print('Setting up cache directories...'); os.makedirs('/tmp/.cache/huggingface/hub', exist_ok=True); os.makedirs('/tmp/.cache/sentence_transformers', exist_ok=True); os.chmod('/tmp/.cache', 0o777); os.chmod('/tmp/.cache/huggingface', 0o777); os.chmod('/tmp/.cache/huggingface/hub', 0o777); os.chmod('/tmp/.cache/sentence_transformers', 0o777); print('Downloading BAAI/bge-m3 model...'); exec('from sentence_transformers import SentenceTransformer; model = SentenceTransformer(\"BAAI/bge-m3\", cache_folder=\"/tmp/.cache/sentence_transformers\"); print(\"BAAI/bge-m3 model downloaded successfully\")')" | |
RUN ln -s /app/nltk_data /nltk_data | |
# --- Copy Application Code --- | |
COPY ./server /app | |
# --- Create all necessary directories with proper permissions --- | |
RUN mkdir -p /app/uploaded_pdfs /app/static /app/temp /tmp/uploads /tmp/static \ | |
/tmp/.cache/huggingface/hub /tmp/.cache/sentence_transformers \ | |
/tmp/matplotlib /tmp/fonts /app/chroma_store /tmp/chroma_store && \ | |
chmod -R 777 /app/uploaded_pdfs /app/static /app/temp /tmp/uploads /tmp/static \ | |
/tmp/.cache /tmp/matplotlib /tmp/fonts /app/chroma_store /tmp/chroma_store && \ | |
echo '<?xml version="1.0"?><fontconfig><cachedir>/tmp/fonts</cachedir></fontconfig>' > /tmp/fonts.conf | |
# --- Expose Port --- | |
EXPOSE 8000 | |
# --- Run Command --- | |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] |