Seperate everything into files
Browse files- Dockerfile +35 -28
- download_model.py +36 -0
- patch_vllm.sh +16 -0
- requirements.txt +5 -0
Dockerfile
CHANGED
|
@@ -1,25 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Base: official vLLM OpenAI-compatible server (tested version family)
|
| 2 |
FROM vllm/vllm-openai:v0.9.1
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
|
|
|
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
#
|
| 10 |
-
#
|
| 11 |
-
RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
| 12 |
-
from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
#
|
|
|
|
| 21 |
COPY start_server.sh /home/user/app/start_server.sh
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Switch to the "user" user
|
| 25 |
USER user
|
|
@@ -39,20 +51,7 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
| 39 |
# Pre-download the model repo using Hugging Face cache
|
| 40 |
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
|
| 41 |
ARG MODEL_ID=rednote-hilab/dots.ocr
|
| 42 |
-
RUN python3
|
| 43 |
-
from huggingface_hub import snapshot_download
|
| 44 |
-
import os
|
| 45 |
-
# Download model to HF cache (default location: ~/.cache/huggingface/hub)
|
| 46 |
-
# This automatically handles caching, deduplication, and proper directory structure
|
| 47 |
-
model_path = snapshot_download(
|
| 48 |
-
repo_id=os.environ.get("MODEL_ID", "rednote-hilab/dots.ocr"),
|
| 49 |
-
allow_patterns=["*"]
|
| 50 |
-
)
|
| 51 |
-
print(f"Model downloaded to: {model_path}")
|
| 52 |
-
# Write the model path to a file for later use
|
| 53 |
-
with open("/home/user/app/model_path.txt", "w") as f:
|
| 54 |
-
f.write(model_path)
|
| 55 |
-
PY
|
| 56 |
|
| 57 |
# Set the model path from the downloaded location
|
| 58 |
RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
|
|
@@ -63,9 +62,17 @@ RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
|
|
| 63 |
ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
|
| 64 |
ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# Expose the Space port
|
| 67 |
EXPOSE 7860
|
| 68 |
ENV PORT=7860
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
# Use the startup script as entrypoint
|
| 71 |
ENTRYPOINT ["/home/user/app/start_server.sh"]
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# vLLM Server with DotsOCR Model for Hugging Face Spaces
|
| 3 |
+
# =============================================================================
|
| 4 |
+
|
| 5 |
# Base: official vLLM OpenAI-compatible server (tested version family)
|
| 6 |
FROM vllm/vllm-openai:v0.9.1
|
| 7 |
|
| 8 |
+
# Add metadata labels
|
| 9 |
+
LABEL maintainer="kybtech-vllm-dotsocr"
|
| 10 |
+
LABEL description="vLLM server with DotsOCR model for document OCR"
|
| 11 |
+
LABEL version="1.0"
|
| 12 |
|
| 13 |
+
# =============================================================================
|
| 14 |
+
# SYSTEM SETUP (as root)
|
| 15 |
+
# =============================================================================
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# Install additional Python packages
|
| 18 |
+
COPY requirements.txt /tmp/requirements.txt
|
| 19 |
+
RUN pip3 install -r /tmp/requirements.txt
|
| 20 |
|
| 21 |
+
# Patch vLLM entrypoint to register DotsOCR model
|
| 22 |
+
# This must be done as root before switching to user (required for HF Spaces)
|
| 23 |
+
COPY patch_vllm.sh /tmp/patch_vllm.sh
|
| 24 |
+
RUN chmod +x /tmp/patch_vllm.sh && /tmp/patch_vllm.sh
|
| 25 |
|
| 26 |
+
# Create user and copy application files
|
| 27 |
+
RUN useradd -m -u 1000 user
|
| 28 |
COPY start_server.sh /home/user/app/start_server.sh
|
| 29 |
+
COPY download_model.py /home/user/app/download_model.py
|
| 30 |
+
RUN chmod +x /home/user/app/start_server.sh /home/user/app/download_model.py
|
| 31 |
+
|
| 32 |
+
# =============================================================================
|
| 33 |
+
# APPLICATION SETUP (as user)
|
| 34 |
+
# =============================================================================
|
| 35 |
|
| 36 |
# Switch to the "user" user
|
| 37 |
USER user
|
|
|
|
| 51 |
# Pre-download the model repo using Hugging Face cache
|
| 52 |
# Note: dots.ocr requires the directory name to avoid '.' (see model card).
|
| 53 |
ARG MODEL_ID=rednote-hilab/dots.ocr
|
| 54 |
+
RUN MODEL_ID=$MODEL_ID python3 /home/user/app/download_model.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
# Set the model path from the downloaded location
|
| 57 |
RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
|
|
|
|
| 62 |
ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
|
| 63 |
ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
|
| 64 |
|
| 65 |
+
# =============================================================================
|
| 66 |
+
# RUNTIME CONFIGURATION
|
| 67 |
+
# =============================================================================
|
| 68 |
+
|
| 69 |
# Expose the Space port
|
| 70 |
EXPOSE 7860
|
| 71 |
ENV PORT=7860
|
| 72 |
|
| 73 |
+
# Add health check (using httpx instead of curl)
|
| 74 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
| 75 |
+
CMD python3 -c "import httpx; httpx.get(f'http://localhost:{PORT}/health', timeout=5)" || exit 1
|
| 76 |
+
|
| 77 |
# Use the startup script as entrypoint
|
| 78 |
ENTRYPOINT ["/home/user/app/start_server.sh"]
|
download_model.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Download the dots.ocr model from Hugging Face Hub.
|
| 4 |
+
This script downloads the model to the HF cache and writes the path to a file.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from huggingface_hub import snapshot_download
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def main():
|
| 13 |
+
# Get model ID from environment variable or use default
|
| 14 |
+
model_id = os.environ.get("MODEL_ID", "rednote-hilab/dots.ocr")
|
| 15 |
+
|
| 16 |
+
print(f"Downloading model: {model_id}")
|
| 17 |
+
|
| 18 |
+
# Download model to HF cache (default location: ~/.cache/huggingface/hub)
|
| 19 |
+
# This automatically handles caching, deduplication, and proper directory structure
|
| 20 |
+
model_path = snapshot_download(
|
| 21 |
+
repo_id=model_id,
|
| 22 |
+
allow_patterns=["*"]
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
print(f"Model downloaded to: {model_path}")
|
| 26 |
+
|
| 27 |
+
# Write the model path to a file for later use
|
| 28 |
+
output_file = "/home/user/app/model_path.txt"
|
| 29 |
+
with open(output_file, "w") as f:
|
| 30 |
+
f.write(model_path)
|
| 31 |
+
|
| 32 |
+
print(f"Model path written to: {output_file}")
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
if __name__ == "__main__":
|
| 36 |
+
main()
|
patch_vllm.sh
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# patch_vllm.sh
|
| 3 |
+
# Patches the vLLM entrypoint to register the DotsOCR model
|
| 4 |
+
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
echo "Patching vLLM entrypoint to register DotsOCR model..."
|
| 8 |
+
|
| 9 |
+
# Patch the vLLM script to import the DotsOCR modeling code
|
| 10 |
+
sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
|
| 11 |
+
from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
|
| 12 |
+
|
| 13 |
+
echo "vLLM script after patch:"
|
| 14 |
+
grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm)
|
| 15 |
+
|
| 16 |
+
echo "vLLM patching completed successfully!"
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Additional packages required for dots.ocr
|
| 2 |
+
flash_attn==2.8.0.post2
|
| 3 |
+
transformers==4.51.3
|
| 4 |
+
huggingface_hub>=0.24.0
|
| 5 |
+
httpx>=0.24.0
|