Spaces:

algoryn
/

kybocr

Sleeping

App Files Files Community

tommulder commited on Sep 18

Commit

5394e5b

1 Parent(s): e612421

Using hf cache for the download location

Browse files

Files changed (1) hide show

Dockerfile +20 -16

Dockerfile CHANGED Viewed

@@ -24,28 +24,32 @@ ENV HF_HUB_ENABLE_HF_TRANSFER=1 \
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
-# Pre-download the model repo into /weights/DotsOCR
 # Note: dots.ocr requires the directory name to avoid '.' (see model card).
 ARG MODEL_ID=rednote-hilab/dots.ocr
 RUN python3 - <<'PY'
 from huggingface_hub import snapshot_download
-import os, shutil
-root = "/weights"
-os.makedirs(root, exist_ok=True)
-# Download into a temp dir then move everything under /weights/DotsOCR
-tmp = snapshot_download(repo_id=os.environ.get("MODEL_ID","rednote-hilab/dots.ocr"),
-                        local_dir=os.path.join(root,"_tmp"), allow_patterns=["*"])
-dst = os.path.join(root,"DotsOCR")
-os.makedirs(dst, exist_ok=True)
-for name in os.listdir(tmp):
-    if name == "DotsOCR": continue
-    shutil.move(os.path.join(tmp,name), os.path.join(dst,name))
-shutil.rmtree(tmp, ignore_errors=True)
 PY
-# Ensure the model code is importable as a top-level package "DotsOCR"
-ENV HF_MODEL_PATH=/weights/DotsOCR
-ENV PYTHONPATH="/weights:${PYTHONPATH}"
 # vLLM needs the custom model to be registered before main() runs.
 # The model authors recommend importing their vLLM adapter into the vllm CLI module.

 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir "transformers>=4.51.1" "huggingface_hub>=0.24.0"
+# Pre-download the model repo using Hugging Face cache
 # Note: dots.ocr requires the directory name to avoid '.' (see model card).
 ARG MODEL_ID=rednote-hilab/dots.ocr
 RUN python3 - <<'PY'
 from huggingface_hub import snapshot_download
+import os
+# Download model to HF cache (default location: ~/.cache/huggingface/hub)
+# This automatically handles caching, deduplication, and proper directory structure
+model_path = snapshot_download(
+    repo_id=os.environ.get("MODEL_ID", "rednote-hilab/dots.ocr"),
+    allow_patterns=["*"]
+)
+print(f"Model downloaded to: {model_path}")
+# Write the model path to a file for later use
+with open("/home/user/app/model_path.txt", "w") as f:
+    f.write(model_path)
 PY
+# Set the model path from the downloaded location
+RUN HF_MODEL_PATH=$(cat /home/user/app/model_path.txt) && \
+    echo "export HF_MODEL_PATH=$HF_MODEL_PATH" >> /home/user/.bashrc && \
+    echo "export PYTHONPATH=\"$HF_MODEL_PATH:\$PYTHONPATH\"" >> /home/user/.bashrc
+# Set default environment variables (will be overridden by .bashrc in interactive shells)
+ENV HF_MODEL_PATH=/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr
+ENV PYTHONPATH="/home/user/.cache/huggingface/hub/models--rednote-hilab--dots.ocr:${PYTHONPATH}"
 # vLLM needs the custom model to be registered before main() runs.
 # The model authors recommend importing their vLLM adapter into the vllm CLI module.