| |
| """ |
| Bootstrap this project into your own Hugging Face Space and/or Endpoint repo. |
| |
| Examples: |
| python scripts/hf_clone.py space --repo-id your-name/ace-step-lora-studio |
| python scripts/hf_clone.py endpoint --repo-id your-name/ace-step-endpoint |
| python scripts/hf_clone.py af3-endpoint --repo-id your-name/af3-caption-endpoint |
| python scripts/hf_clone.py af3-nvidia-endpoint --repo-id your-name/af3-nvidia-endpoint |
| python scripts/hf_clone.py all --space-repo-id your-name/ace-step-lora-studio --endpoint-repo-id your-name/ace-step-endpoint |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import shutil |
| import tempfile |
| from pathlib import Path |
| from typing import Iterable |
|
|
| from huggingface_hub import HfApi |
|
|
|
|
| PROJECT_ROOT = Path(__file__).resolve().parents[1] |
|
|
| COMMON_SKIP_DIRS = { |
| ".git", |
| "__pycache__", |
| ".pytest_cache", |
| ".mypy_cache", |
| ".ruff_cache", |
| ".venv", |
| "venv", |
| "env", |
| ".idea", |
| ".vscode", |
| ".cache", |
| ".huggingface", |
| ".gradio", |
| "checkpoints", |
| "lora_output", |
| "outputs", |
| "artifacts", |
| "models", |
| "datasets", |
| "Lora-ace-step", |
| } |
|
|
| COMMON_SKIP_FILES = { |
| ".env", |
| } |
|
|
| COMMON_SKIP_PREFIXES = ( |
| "song_summaries_llm", |
| ) |
|
|
| COMMON_SKIP_SUFFIXES = { |
| ".wav", |
| ".flac", |
| ".mp3", |
| ".ogg", |
| ".opus", |
| ".m4a", |
| ".aac", |
| ".pt", |
| ".bin", |
| ".safetensors", |
| ".ckpt", |
| ".onnx", |
| ".log", |
| ".pyc", |
| ".pyo", |
| ".pyd", |
| } |
|
|
| MAX_FILE_BYTES = 30 * 1024 * 1024 |
|
|
|
|
| def _should_skip_common(rel_path: Path, is_dir: bool) -> bool: |
| if any(part in COMMON_SKIP_DIRS for part in rel_path.parts): |
| return True |
| if rel_path.name in COMMON_SKIP_FILES: |
| return True |
| if any(rel_path.name.startswith(prefix) for prefix in COMMON_SKIP_PREFIXES): |
| return True |
| if not is_dir and rel_path.suffix.lower() in COMMON_SKIP_SUFFIXES: |
| return True |
| return False |
|
|
|
|
| def _copy_file(src: Path, dst: Path) -> None: |
| dst.parent.mkdir(parents=True, exist_ok=True) |
| shutil.copy2(src, dst) |
|
|
|
|
| def _stage_space_snapshot(staging_dir: Path) -> tuple[int, int, list[str]]: |
| copied = 0 |
| bytes_total = 0 |
| skipped: list[str] = [] |
|
|
| for src in PROJECT_ROOT.rglob("*"): |
| rel = src.relative_to(PROJECT_ROOT) |
|
|
| if src.is_dir(): |
| if _should_skip_common(rel, is_dir=True): |
| skipped.append(f"{rel}/") |
| continue |
|
|
| if _should_skip_common(rel, is_dir=False): |
| skipped.append(str(rel)) |
| continue |
|
|
| size = src.stat().st_size |
| if size > MAX_FILE_BYTES: |
| skipped.append(f"{rel} (>{MAX_FILE_BYTES // (1024 * 1024)}MB)") |
| continue |
|
|
| dst = staging_dir / rel |
| _copy_file(src, dst) |
| copied += 1 |
| bytes_total += size |
|
|
| return copied, bytes_total, skipped |
|
|
|
|
| def _iter_endpoint_paths() -> Iterable[Path]: |
| |
| required = [ |
| PROJECT_ROOT / "handler.py", |
| PROJECT_ROOT / "requirements.txt", |
| PROJECT_ROOT / "packages.txt", |
| PROJECT_ROOT / "acestep", |
| ] |
| for p in required: |
| if p.exists(): |
| yield p |
|
|
| template_readme = PROJECT_ROOT / "templates" / "hf-endpoint" / "README.md" |
| if template_readme.exists(): |
| yield template_readme |
|
|
|
|
| def _stage_endpoint_snapshot(staging_dir: Path) -> tuple[int, int]: |
| copied = 0 |
| bytes_total = 0 |
|
|
| for src in _iter_endpoint_paths(): |
| if src.is_file(): |
| rel_dst = Path("README.md") if src.name == "README.md" and "templates" in src.parts else Path(src.name) |
| dst = staging_dir / rel_dst |
| _copy_file(src, dst) |
| copied += 1 |
| bytes_total += src.stat().st_size |
| continue |
|
|
| if src.is_dir(): |
| for nested in src.rglob("*"): |
| rel_nested = nested.relative_to(src) |
| if nested.is_dir(): |
| if _should_skip_common(Path(src.name) / rel_nested, is_dir=True): |
| continue |
| continue |
| if _should_skip_common(Path(src.name) / rel_nested, is_dir=False): |
| continue |
| if nested.suffix.lower() in {".wav", ".flac", ".mp3", ".ogg"}: |
| continue |
|
|
| dst = staging_dir / src.name / rel_nested |
| _copy_file(nested, dst) |
| copied += 1 |
| bytes_total += nested.stat().st_size |
|
|
| return copied, bytes_total |
|
|
|
|
| def _iter_qwen_endpoint_template_paths() -> Iterable[tuple[Path, Path]]: |
| template_dir = PROJECT_ROOT / "templates" / "hf-qwen-caption-endpoint" |
| mapping = { |
| "handler.py": Path("handler.py"), |
| "requirements.txt": Path("requirements.txt"), |
| "README.md": Path("README.md"), |
| } |
| for src_name, dst_rel in mapping.items(): |
| src = template_dir / src_name |
| if src.exists(): |
| yield src, dst_rel |
|
|
|
|
| def _stage_qwen_endpoint_snapshot(staging_dir: Path) -> tuple[int, int]: |
| copied = 0 |
| bytes_total = 0 |
| for src, rel_dst in _iter_qwen_endpoint_template_paths(): |
| dst = staging_dir / rel_dst |
| _copy_file(src, dst) |
| copied += 1 |
| bytes_total += src.stat().st_size |
| return copied, bytes_total |
|
|
|
|
| def _iter_af3_endpoint_template_paths() -> Iterable[tuple[Path, Path]]: |
| template_dir = PROJECT_ROOT / "templates" / "hf-af3-caption-endpoint" |
| mapping = { |
| "handler.py": Path("handler.py"), |
| "requirements.txt": Path("requirements.txt"), |
| "README.md": Path("README.md"), |
| } |
| for src_name, dst_rel in mapping.items(): |
| src = template_dir / src_name |
| if src.exists(): |
| yield src, dst_rel |
|
|
|
|
| def _stage_af3_endpoint_snapshot(staging_dir: Path) -> tuple[int, int]: |
| copied = 0 |
| bytes_total = 0 |
| for src, rel_dst in _iter_af3_endpoint_template_paths(): |
| dst = staging_dir / rel_dst |
| _copy_file(src, dst) |
| copied += 1 |
| bytes_total += src.stat().st_size |
| return copied, bytes_total |
|
|
|
|
| def _iter_af3_nvidia_endpoint_template_paths() -> Iterable[tuple[Path, Path]]: |
| template_dir = PROJECT_ROOT / "templates" / "hf-af3-nvidia-endpoint" |
| mapping = { |
| "handler.py": Path("handler.py"), |
| "requirements.txt": Path("requirements.txt"), |
| "README.md": Path("README.md"), |
| } |
| for src_name, dst_rel in mapping.items(): |
| src = template_dir / src_name |
| if src.exists(): |
| yield src, dst_rel |
|
|
|
|
| def _stage_af3_nvidia_endpoint_snapshot(staging_dir: Path) -> tuple[int, int]: |
| copied = 0 |
| bytes_total = 0 |
| for src, rel_dst in _iter_af3_nvidia_endpoint_template_paths(): |
| dst = staging_dir / rel_dst |
| _copy_file(src, dst) |
| copied += 1 |
| bytes_total += src.stat().st_size |
| return copied, bytes_total |
|
|
|
|
| def _resolve_token(arg_token: str) -> str | None: |
| if arg_token: |
| return arg_token |
| env_token = os.getenv("HF_TOKEN") or os.getenv("hf_token") |
| if env_token: |
| return env_token |
|
|
| dotenv = PROJECT_ROOT / ".env" |
| if dotenv.exists(): |
| for raw in dotenv.read_text(encoding="utf-8").splitlines(): |
| line = raw.strip() |
| if not line or line.startswith("#") or "=" not in line: |
| continue |
| k, v = line.split("=", 1) |
| if k.strip() in {"HF_TOKEN", "hf_token"}: |
| return v.strip().strip('"').strip("'") |
| return None |
|
|
|
|
| def _ensure_repo( |
| api: HfApi, |
| repo_id: str, |
| repo_type: str, |
| private: bool, |
| space_sdk: str | None = None, |
| ) -> None: |
| kwargs = { |
| "repo_id": repo_id, |
| "repo_type": repo_type, |
| "private": private, |
| "exist_ok": True, |
| } |
| if repo_type == "space" and space_sdk: |
| kwargs["space_sdk"] = space_sdk |
| api.create_repo(**kwargs) |
|
|
|
|
| def _upload_snapshot( |
| api: HfApi, |
| repo_id: str, |
| repo_type: str, |
| folder_path: Path, |
| commit_message: str, |
| ) -> None: |
| api.upload_folder( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| folder_path=str(folder_path), |
| commit_message=commit_message, |
| delete_patterns=[], |
| ) |
|
|
|
|
| def _fmt_mb(num_bytes: int) -> str: |
| return f"{num_bytes / (1024 * 1024):.2f} MB" |
|
|
|
|
| def clone_space(repo_id: str, private: bool, token: str | None, dry_run: bool) -> None: |
| with tempfile.TemporaryDirectory(prefix="hf_space_clone_") as tmp: |
| staging = Path(tmp) |
| copied, bytes_total, skipped = _stage_space_snapshot(staging) |
| print(f"[space] staged files: {copied}, size: {_fmt_mb(bytes_total)}") |
| if skipped: |
| print(f"[space] skipped entries: {len(skipped)}") |
| for item in skipped[:20]: |
| print(f" - {item}") |
| if len(skipped) > 20: |
| print(f" ... and {len(skipped) - 20} more") |
|
|
| if dry_run: |
| print("[space] dry-run complete (nothing uploaded).") |
| return |
|
|
| api = HfApi(token=token) |
| _ensure_repo(api, repo_id=repo_id, repo_type="space", private=private, space_sdk="gradio") |
| _upload_snapshot( |
| api, |
| repo_id=repo_id, |
| repo_type="space", |
| folder_path=staging, |
| commit_message="Bootstrap ACE-Step LoRA Studio Space", |
| ) |
| print(f"[space] uploaded to https://huggingface.co/spaces/{repo_id}") |
|
|
|
|
| def clone_endpoint(repo_id: str, private: bool, token: str | None, dry_run: bool) -> None: |
| with tempfile.TemporaryDirectory(prefix="hf_endpoint_clone_") as tmp: |
| staging = Path(tmp) |
| copied, bytes_total = _stage_endpoint_snapshot(staging) |
| print(f"[endpoint] staged files: {copied}, size: {_fmt_mb(bytes_total)}") |
|
|
| if dry_run: |
| print("[endpoint] dry-run complete (nothing uploaded).") |
| return |
|
|
| api = HfApi(token=token) |
| _ensure_repo(api, repo_id=repo_id, repo_type="model", private=private) |
| _upload_snapshot( |
| api, |
| repo_id=repo_id, |
| repo_type="model", |
| folder_path=staging, |
| commit_message="Bootstrap ACE-Step custom endpoint repo", |
| ) |
| print(f"[endpoint] uploaded to https://huggingface.co/{repo_id}") |
|
|
|
|
| def clone_qwen_endpoint(repo_id: str, private: bool, token: str | None, dry_run: bool) -> None: |
| with tempfile.TemporaryDirectory(prefix="hf_qwen_endpoint_clone_") as tmp: |
| staging = Path(tmp) |
| copied, bytes_total = _stage_qwen_endpoint_snapshot(staging) |
| print(f"[qwen-endpoint] staged files: {copied}, size: {_fmt_mb(bytes_total)}") |
|
|
| if dry_run: |
| print("[qwen-endpoint] dry-run complete (nothing uploaded).") |
| return |
|
|
| api = HfApi(token=token) |
| _ensure_repo(api, repo_id=repo_id, repo_type="model", private=private) |
| _upload_snapshot( |
| api, |
| repo_id=repo_id, |
| repo_type="model", |
| folder_path=staging, |
| commit_message="Bootstrap Qwen2-Audio custom endpoint repo", |
| ) |
| print(f"[qwen-endpoint] uploaded to https://huggingface.co/{repo_id}") |
|
|
|
|
| def clone_af3_endpoint(repo_id: str, private: bool, token: str | None, dry_run: bool) -> None: |
| with tempfile.TemporaryDirectory(prefix="hf_af3_endpoint_clone_") as tmp: |
| staging = Path(tmp) |
| copied, bytes_total = _stage_af3_endpoint_snapshot(staging) |
| print(f"[af3-endpoint] staged files: {copied}, size: {_fmt_mb(bytes_total)}") |
|
|
| if dry_run: |
| print("[af3-endpoint] dry-run complete (nothing uploaded).") |
| return |
|
|
| api = HfApi(token=token) |
| _ensure_repo(api, repo_id=repo_id, repo_type="model", private=private) |
| _upload_snapshot( |
| api, |
| repo_id=repo_id, |
| repo_type="model", |
| folder_path=staging, |
| commit_message="Bootstrap Audio Flamingo 3 custom endpoint repo", |
| ) |
| print(f"[af3-endpoint] uploaded to https://huggingface.co/{repo_id}") |
|
|
|
|
| def clone_af3_nvidia_endpoint(repo_id: str, private: bool, token: str | None, dry_run: bool) -> None: |
| with tempfile.TemporaryDirectory(prefix="hf_af3_nvidia_endpoint_clone_") as tmp: |
| staging = Path(tmp) |
| copied, bytes_total = _stage_af3_nvidia_endpoint_snapshot(staging) |
| print(f"[af3-nvidia-endpoint] staged files: {copied}, size: {_fmt_mb(bytes_total)}") |
|
|
| if dry_run: |
| print("[af3-nvidia-endpoint] dry-run complete (nothing uploaded).") |
| return |
|
|
| api = HfApi(token=token) |
| _ensure_repo(api, repo_id=repo_id, repo_type="model", private=private) |
| _upload_snapshot( |
| api, |
| repo_id=repo_id, |
| repo_type="model", |
| folder_path=staging, |
| commit_message="Bootstrap Audio Flamingo 3 NVIDIA-stack endpoint repo", |
| ) |
| print(f"[af3-nvidia-endpoint] uploaded to https://huggingface.co/{repo_id}") |
|
|
|
|
| def build_parser() -> argparse.ArgumentParser: |
| parser = argparse.ArgumentParser(description="Clone this project into your own HF Space/Endpoint repos.") |
| subparsers = parser.add_subparsers(dest="cmd", required=True) |
|
|
| p_space = subparsers.add_parser("space", help="Create/update your HF Space from this project.") |
| p_space.add_argument("--repo-id", required=True, help="Target space repo id, e.g. username/my-space.") |
| p_space.add_argument("--private", action="store_true", help="Create repo as private.") |
| p_space.add_argument("--token", type=str, default="", help="HF token (default: HF_TOKEN env var).") |
| p_space.add_argument("--dry-run", action="store_true", help="Stage files only; do not upload.") |
|
|
| p_endpoint = subparsers.add_parser("endpoint", help="Create/update your custom endpoint model repo.") |
| p_endpoint.add_argument("--repo-id", required=True, help="Target model repo id, e.g. username/my-endpoint.") |
| p_endpoint.add_argument("--private", action="store_true", help="Create repo as private.") |
| p_endpoint.add_argument("--token", type=str, default="", help="HF token (default: HF_TOKEN env var).") |
| p_endpoint.add_argument("--dry-run", action="store_true", help="Stage files only; do not upload.") |
|
|
| p_qwen_endpoint = subparsers.add_parser("qwen-endpoint", help="Create/update Qwen2-Audio custom endpoint repo.") |
| p_qwen_endpoint.add_argument("--repo-id", required=True, help="Target model repo id, e.g. username/my-qwen-endpoint.") |
| p_qwen_endpoint.add_argument("--private", action="store_true", help="Create repo as private.") |
| p_qwen_endpoint.add_argument("--token", type=str, default="", help="HF token (default: HF_TOKEN env var).") |
| p_qwen_endpoint.add_argument("--dry-run", action="store_true", help="Stage files only; do not upload.") |
|
|
| p_af3_endpoint = subparsers.add_parser("af3-endpoint", help="Create/update Audio Flamingo 3 custom endpoint repo.") |
| p_af3_endpoint.add_argument("--repo-id", required=True, help="Target model repo id, e.g. username/my-af3-endpoint.") |
| p_af3_endpoint.add_argument("--private", action="store_true", help="Create repo as private.") |
| p_af3_endpoint.add_argument("--token", type=str, default="", help="HF token (default: HF_TOKEN env var).") |
| p_af3_endpoint.add_argument("--dry-run", action="store_true", help="Stage files only; do not upload.") |
|
|
| p_af3_nvidia_endpoint = subparsers.add_parser( |
| "af3-nvidia-endpoint", |
| help="Create/update AF3 NVIDIA-stack (llava+stage35) endpoint repo.", |
| ) |
| p_af3_nvidia_endpoint.add_argument( |
| "--repo-id", |
| required=True, |
| help="Target model repo id, e.g. username/my-af3-nvidia-endpoint.", |
| ) |
| p_af3_nvidia_endpoint.add_argument("--private", action="store_true", help="Create repo as private.") |
| p_af3_nvidia_endpoint.add_argument("--token", type=str, default="", help="HF token (default: HF_TOKEN env var).") |
| p_af3_nvidia_endpoint.add_argument("--dry-run", action="store_true", help="Stage files only; do not upload.") |
|
|
| p_all = subparsers.add_parser("all", help="Run both Space and Endpoint bootstrap.") |
| p_all.add_argument("--space-repo-id", required=True, help="Target space repo id.") |
| p_all.add_argument("--endpoint-repo-id", required=True, help="Target endpoint model repo id.") |
| p_all.add_argument("--space-private", action="store_true", help="Create Space as private.") |
| p_all.add_argument("--endpoint-private", action="store_true", help="Create endpoint repo as private.") |
| p_all.add_argument("--token", type=str, default="", help="HF token (default: HF_TOKEN env var).") |
| p_all.add_argument("--dry-run", action="store_true", help="Stage files only; do not upload.") |
|
|
| return parser |
|
|
|
|
| def main() -> int: |
| args = build_parser().parse_args() |
| token = _resolve_token(args.token) |
|
|
| if not token and not args.dry_run: |
| print("HF token not found. Set HF_TOKEN or pass --token.") |
| return 1 |
|
|
| if args.cmd == "space": |
| clone_space(args.repo_id, private=bool(args.private), token=token, dry_run=bool(args.dry_run)) |
| elif args.cmd == "endpoint": |
| clone_endpoint(args.repo_id, private=bool(args.private), token=token, dry_run=bool(args.dry_run)) |
| elif args.cmd == "qwen-endpoint": |
| clone_qwen_endpoint(args.repo_id, private=bool(args.private), token=token, dry_run=bool(args.dry_run)) |
| elif args.cmd == "af3-endpoint": |
| clone_af3_endpoint(args.repo_id, private=bool(args.private), token=token, dry_run=bool(args.dry_run)) |
| elif args.cmd == "af3-nvidia-endpoint": |
| clone_af3_nvidia_endpoint(args.repo_id, private=bool(args.private), token=token, dry_run=bool(args.dry_run)) |
| else: |
| clone_space(args.space_repo_id, private=bool(args.space_private), token=token, dry_run=bool(args.dry_run)) |
| clone_endpoint( |
| args.endpoint_repo_id, |
| private=bool(args.endpoint_private), |
| token=token, |
| dry_run=bool(args.dry_run), |
| ) |
|
|
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|