| """Deploy Bee backend to HF Space `cuilabs/bee` via curated git push. |
| |
| The Space's Dockerfile only consumes a subset of the repo. Pushing the |
| full monorepo (apps/, packages/, docs/, tests/, supabase/, ...) bloats |
| the Space's git history with ~140k lines that the Docker build ignores. |
| |
| This script builds a focused deploy by: |
| 1. Resolving the current `master` commit SHA. |
| 2. Copying ONLY the paths the Dockerfile needs into a temp dir. |
| 3. Initialising a fresh git repo there, committing as |
| "HF Space backend deploy [<sha>]". |
| 4. Force-pushing to the space remote's `main` branch β HF Spaces |
| build from the current tree, not the git history; force-push is |
| correct (no commit data is lost; the source of truth is GitHub). |
| 5. Cleaning up the temp dir. |
| |
| The Space rebuild starts automatically after the push (~2-10 min, |
| visible at https://huggingface.co/spaces/cuilabs/bee). |
| |
| Usage: |
| python scripts/deploy_hf_space.py [--dry-run] |
| |
| Authentication: the script reuses the credentials baked into the |
| `space` git remote (https://huggingface.co/spaces/cuilabs/bee). If |
| you've never pushed before, run `huggingface-cli login` first or set |
| HF_TOKEN in the environment so the http auth helper picks it up. |
| |
| Curation list β kept in sync with the Dockerfile's COPY directives. |
| Update both when adding new runtime dependencies. |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parent.parent |
|
|
| |
| |
| REQUIRED_PATHS = [ |
| "Dockerfile", |
| "requirements.docker.txt", |
| "requirements.txt", |
| "README.md", |
| ".env.example", |
| "pyproject.toml", |
| "bee", |
| "scripts", |
| ] |
|
|
| |
| |
| |
| OPTIONAL_PATHS = [ |
| "data/datasets", |
| "data/rag_index", |
| "data/lora_checkpoints", |
| ] |
|
|
| |
| IGNORE = shutil.ignore_patterns( |
| "__pycache__", |
| "*.pyc", |
| "*.pyo", |
| ".pytest_cache", |
| ".DS_Store", |
| ".mypy_cache", |
| ".ruff_cache", |
| "*.log", |
| ".venv", |
| "node_modules", |
| ) |
|
|
| |
| |
| |
| |
| MAX_FILE_SIZE = 10 * 1024 * 1024 |
|
|
| SPACE_REMOTE = "https://huggingface.co/spaces/cuilabs/bee" |
| SPACE_BRANCH = "main" |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| HF_SPACE_FRONTMATTER = """--- |
| title: Bee Intelligence Engine |
| emoji: π |
| colorFrom: yellow |
| colorTo: gray |
| sdk: docker |
| app_port: 7860 |
| pinned: true |
| license: apache-2.0 |
| short_description: The Intelligence Engine β domain LoRA adapters |
| --- |
| |
| """ |
|
|
|
|
| def run(cmd: list[str], cwd: Path) -> subprocess.CompletedProcess[str]: |
| return subprocess.run(cmd, cwd=cwd, check=True, capture_output=True, text=True) |
|
|
|
|
| def main() -> None: |
| p = argparse.ArgumentParser() |
| p.add_argument("--dry-run", action="store_true", |
| help="build the deploy tree but skip the push") |
| args = p.parse_args() |
|
|
| sha = run(["git", "rev-parse", "--short", "HEAD"], cwd=ROOT).stdout.strip() |
| full_sha = run(["git", "rev-parse", "HEAD"], cwd=ROOT).stdout.strip() |
| branch = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=ROOT).stdout.strip() |
| print(f"deploying {sha} (branch {branch}) to {SPACE_REMOTE}:{SPACE_BRANCH}") |
|
|
| with tempfile.TemporaryDirectory() as tmp: |
| tmp = Path(tmp) |
|
|
| |
| |
| |
| |
| for rel in REQUIRED_PATHS: |
| src = ROOT / rel |
| if not src.exists(): |
| print(f" β MISSING required path: {rel}") |
| sys.exit(2) |
| dst = tmp / rel |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| if src.is_dir(): |
| shutil.copytree(src, dst, ignore=IGNORE) |
| elif rel == "README.md": |
| |
| content = src.read_text(encoding="utf-8") |
| if not content.lstrip().startswith("---"): |
| dst.write_text(HF_SPACE_FRONTMATTER + content, encoding="utf-8") |
| print(f" + {rel} (with injected HF frontmatter)") |
| continue |
| shutil.copy2(src, dst) |
| else: |
| shutil.copy2(src, dst) |
| print(f" + {rel}") |
|
|
| |
| for rel in OPTIONAL_PATHS: |
| src = ROOT / rel |
| if src.exists(): |
| dst = tmp / rel |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| if src.is_dir(): |
| shutil.copytree(src, dst, ignore=IGNORE) |
| else: |
| shutil.copy2(src, dst) |
| print(f" + {rel} (optional, present)") |
| else: |
| print(f" - {rel} (optional, not present, skipped)") |
|
|
| |
| |
| |
| stripped: list[tuple[Path, int]] = [] |
| for f in list(tmp.rglob("*")): |
| if f.is_file() and f.stat().st_size > MAX_FILE_SIZE: |
| stripped.append((f, f.stat().st_size)) |
| f.unlink() |
| if stripped: |
| print(f"\n stripped {len(stripped)} file(s) larger than {MAX_FILE_SIZE // (1024 * 1024)} MiB:") |
| for f, size in stripped: |
| rel = f.relative_to(tmp) |
| print(f" - {rel} ({size / 1024 / 1024:.1f} MiB)") |
|
|
| if args.dry_run: |
| total = sum(1 for _ in tmp.rglob("*") if _.is_file()) |
| size = sum(f.stat().st_size for f in tmp.rglob("*") if f.is_file()) |
| print(f"\n[dry-run] {total} files, {size:,} bytes total. Skipping push.") |
| return |
|
|
| |
| |
| |
| run(["git", "init", "-q", "--initial-branch=main"], cwd=tmp) |
| run(["git", "config", "user.name", "Bee Deploy"], cwd=tmp) |
| run(["git", "config", "user.email", "ops@cuilabs.io"], cwd=tmp) |
| run(["git", "add", "-A"], cwd=tmp) |
| run(["git", "commit", "-q", "-m", f"HF Space backend deploy [{sha}]\n\nGitHub master: {full_sha}"], cwd=tmp) |
|
|
| run(["git", "remote", "add", "space", SPACE_REMOTE], cwd=tmp) |
| push = subprocess.run( |
| ["git", "push", "--force", "space", f"main:{SPACE_BRANCH}"], |
| cwd=tmp, capture_output=True, text=True, |
| ) |
| if push.returncode != 0: |
| print(f" push failed:\n{push.stderr}", file=sys.stderr) |
| sys.exit(push.returncode) |
| print(f"\n pushed β {SPACE_REMOTE}:{SPACE_BRANCH}") |
| print(f" HF Space is rebuilding now. Verify at:") |
| print(f" https://huggingface.co/spaces/cuilabs/bee") |
| print(f" https://cuilabs-bee.hf.space/v1/adapters (404 β still building)") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|