tommulder commited on
Commit
1a22905
·
1 Parent(s): 65907ea

Seperate everything into files

Browse files
Files changed (4) hide show
  1. Dockerfile +35 -28
  2. download_model.py +36 -0
  3. patch_vllm.sh +16 -0
  4. requirements.txt +5 -0
Dockerfile CHANGED
@@ -1,25 +1,37 @@
 
 
 
 
1
  # Base: official vLLM OpenAI-compatible server (tested version family)
2
  FROM vllm/vllm-openai:v0.9.1
3
 
4
- # Install required packages as root
5
- RUN pip3 install flash_attn==2.8.0.post2
6
- RUN pip3 install transformers==4.51.3
 
7
 
8
- # vLLM needs the custom model to be registered before main() runs.
9
- # The model authors recommend importing their vLLM adapter into the vllm CLI module.
10
- # Do this as root before switching to user (required for HF Spaces)
11
- RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
12
- from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
13
 
14
- # Show the patched part of the vllm script for verification
15
- RUN grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm)
 
16
 
17
- # Set up a new user named "user" with user ID 1000
18
- RUN useradd -m -u 1000 user
 
 
19
 
20
- # Copy the startup script and make it executable (as root)
 
21
  COPY start_server.sh /home/user/app/start_server.sh
22
- RUN chmod +x /home/user/app/start_server.sh
 
 
 
 
 
23
 
24
  # Switch to the "user" user
25
  USER user
@@ -39,20 +51,7 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
39
  # Pre-download the model repo using Hugging Face cache
40
  # Note: dots.ocr requires the directory name to avoid '.' (see model card).
41
  ARG MODEL_ID=rednote-hilab/dots.ocr
42
- RUN python3 - <<'PY'
43
- from huggingface_hub import snapshot_download
44
- import os
45
- # Download model to HF cache (default location: ~/.cache/huggingface/hub)
46
- # This automatically handles caching, deduplication, and proper directory structure
47
- model_path = snapshot_download(
48
- repo_id=os.environ.get("MODEL_ID", "rednote-hilab/dots.ocr"),
49
- allow_patterns=["*"]
50
- )
51
- print(f"Model downloaded to: {model_path}")
52
- # Write the model path to a file for later use
53
- with open("/home/user/app/model_path.txt", "w") as f:
54
- f.write(model_path)
55
- PY
56
 
57
  # Set the model path from the downloaded location
58
  RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
@@ -63,9 +62,17 @@ RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
63
  ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
64
  ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
65
 
 
 
 
 
66
  # Expose the Space port
67
  EXPOSE 7860
68
  ENV PORT=7860
69
 
 
 
 
 
70
  # Use the startup script as entrypoint
71
  ENTRYPOINT ["/home/user/app/start_server.sh"]
 
1
+ # =============================================================================
2
+ # vLLM Server with DotsOCR Model for Hugging Face Spaces
3
+ # =============================================================================
4
+
5
  # Base: official vLLM OpenAI-compatible server (tested version family)
6
  FROM vllm/vllm-openai:v0.9.1
7
 
8
+ # Add metadata labels
9
+ LABEL maintainer="kybtech-vllm-dotsocr"
10
+ LABEL description="vLLM server with DotsOCR model for document OCR"
11
+ LABEL version="1.0"
12
 
13
+ # =============================================================================
14
+ # SYSTEM SETUP (as root)
15
+ # =============================================================================
 
 
16
 
17
+ # Install additional Python packages
18
+ COPY requirements.txt /tmp/requirements.txt
19
+ RUN pip3 install -r /tmp/requirements.txt
20
 
21
+ # Patch vLLM entrypoint to register DotsOCR model
22
+ # This must be done as root before switching to user (required for HF Spaces)
23
+ COPY patch_vllm.sh /tmp/patch_vllm.sh
24
+ RUN chmod +x /tmp/patch_vllm.sh && /tmp/patch_vllm.sh
25
 
26
+ # Create user and copy application files
27
+ RUN useradd -m -u 1000 user
28
  COPY start_server.sh /home/user/app/start_server.sh
29
+ COPY download_model.py /home/user/app/download_model.py
30
+ RUN chmod +x /home/user/app/start_server.sh /home/user/app/download_model.py
31
+
32
+ # =============================================================================
33
+ # APPLICATION SETUP (as user)
34
+ # =============================================================================
35
 
36
  # Switch to the "user" user
37
  USER user
 
51
  # Pre-download the model repo using Hugging Face cache
52
  # Note: dots.ocr requires the directory name to avoid '.' (see model card).
53
  ARG MODEL_ID=rednote-hilab/dots.ocr
54
+ RUN MODEL_ID=$MODEL_ID python3 /home/user/app/download_model.py
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Set the model path from the downloaded location
57
  RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
 
62
  ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
63
  ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
64
 
65
+ # =============================================================================
66
+ # RUNTIME CONFIGURATION
67
+ # =============================================================================
68
+
69
  # Expose the Space port
70
  EXPOSE 7860
71
  ENV PORT=7860
72
 
73
+ # Add health check (using httpx instead of curl)
74
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
75
+ CMD python3 -c "import httpx; httpx.get(f'http://localhost:{PORT}/health', timeout=5)" || exit 1
76
+
77
  # Use the startup script as entrypoint
78
  ENTRYPOINT ["/home/user/app/start_server.sh"]
download_model.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Download the dots.ocr model from Hugging Face Hub.
4
+ This script downloads the model to the HF cache and writes the path to a file.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from huggingface_hub import snapshot_download
10
+
11
+
12
+ def main():
13
+ # Get model ID from environment variable or use default
14
+ model_id = os.environ.get("MODEL_ID", "rednote-hilab/dots.ocr")
15
+
16
+ print(f"Downloading model: {model_id}")
17
+
18
+ # Download model to HF cache (default location: ~/.cache/huggingface/hub)
19
+ # This automatically handles caching, deduplication, and proper directory structure
20
+ model_path = snapshot_download(
21
+ repo_id=model_id,
22
+ allow_patterns=["*"]
23
+ )
24
+
25
+ print(f"Model downloaded to: {model_path}")
26
+
27
+ # Write the model path to a file for later use
28
+ output_file = "/home/user/app/model_path.txt"
29
+ with open(output_file, "w") as f:
30
+ f.write(model_path)
31
+
32
+ print(f"Model path written to: {output_file}")
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
patch_vllm.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # patch_vllm.sh
3
+ # Patches the vLLM entrypoint to register the DotsOCR model
4
+
5
+ set -e
6
+
7
+ echo "Patching vLLM entrypoint to register DotsOCR model..."
8
+
9
+ # Patch the vLLM script to import the DotsOCR modeling code
10
+ sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
11
+ from DotsOCR import modeling_dots_ocr_vllm' $(which vllm)
12
+
13
+ echo "vLLM script after patch:"
14
+ grep -A 1 'from vllm.entrypoints.cli.main import main' $(which vllm)
15
+
16
+ echo "vLLM patching completed successfully!"
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Additional packages required for dots.ocr
2
+ flash_attn==2.8.0.post2
3
+ transformers==4.51.3
4
+ huggingface_hub>=0.24.0
5
+ httpx>=0.24.0