tommulder commited on
Commit
5394e5b
·
1 Parent(s): e612421

Using hf cache for the download location

Browse files
Files changed (1) hide show
  1. Dockerfile +20 -16
Dockerfile CHANGED
@@ -24,28 +24,32 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
24
  RUN pip install --no-cache-dir --upgrade pip && \
25
  pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
26
 
27
- # Pre-download the model repo into /weights/DotsOCR
28
  # Note: dots.ocr requires the directory name to avoid '.' (see model card).
29
  ARG MODEL_ID=rednote-hilab/dots.ocr
30
  RUN python3 - <<'PY'
31
  from huggingface_hub import snapshot_download
32
- import os, shutil
33
- root = "/weights"
34
- os.makedirs(root, exist_ok=True)
35
- # Download into a temp dir then move everything under /weights/DotsOCR
36
- tmp = snapshot_download(repo_id=os.environ.get("MODEL_ID","rednote-hilab/dots.ocr"),
37
- local_dir=os.path.join(root,"_tmp"), allow_patterns=["*"])
38
- dst = os.path.join(root,"DotsOCR")
39
- os.makedirs(dst, exist_ok=True)
40
- for name in os.listdir(tmp):
41
- if name == "DotsOCR": continue
42
- shutil.move(os.path.join(tmp,name), os.path.join(dst,name))
43
- shutil.rmtree(tmp, ignore_errors=True)
44
  PY
45
 
46
- # Ensure the model code is importable as a top-level package "DotsOCR"
47
- ENV HF_MODEL_PATH=/weights/DotsOCR
48
- ENV PYTHONPATH="/weights:${PYTHONPATH}"
 
 
 
 
 
49
 
50
  # vLLM needs the custom model to be registered before main() runs.
51
  # The model authors recommend importing their vLLM adapter into the vllm CLI module.
 
24
  RUN pip install --no-cache-dir --upgrade pip && \
25
  pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
26
 
27
+ # Pre-download the model repo using Hugging Face cache
28
  # Note: dots.ocr requires the directory name to avoid '.' (see model card).
29
  ARG MODEL_ID=rednote-hilab/dots.ocr
30
  RUN python3 - <<'PY'
31
  from huggingface_hub import snapshot_download
32
+ import os
33
+ # Download model to HF cache (default location: ~/.cache/huggingface/hub)
34
+ # This automatically handles caching, deduplication, and proper directory structure
35
+ model_path = snapshot_download(
36
+ repo_id=os.environ.get("MODEL_ID", "rednote-hilab/dots.ocr"),
37
+ allow_patterns=["*"]
38
+ )
39
+ print(f"Model downloaded to: {model_path}")
40
+ # Write the model path to a file for later use
41
+ with open("/home/user/app/model_path.txt", "w") as f:
42
+ f.write(model_path)
 
43
  PY
44
 
45
+ # Set the model path from the downloaded location
46
+ RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
47
+ echo "export HF_MODEL_PATH=$HF_MODEL_PATH" >> /home/user/.bashrc && \
48
+ echo "export PYTHONPATH=\"$HF_MODEL_PATH:\$PYTHONPATH\"" >> /home/user/.bashrc
49
+
50
+ # Set default environment variables (will be overridden by .bashrc in interactive shells)
51
+ ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
52
+ ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
53
 
54
  # vLLM needs the custom model to be registered before main() runs.
55
  # The model authors recommend importing their vLLM adapter into the vllm CLI module.