distill-m-6a3lnzvb-code / scripts /backup_to_hf.py
Delta-Vector's picture
add phase-2 ultra-conservative sweep (J,K,L,M) + waiter that auto-launches after phase 1 from the best ckpt
729546e verified
#!/usr/bin/env python3
"""Push the distill code/configs to the HF backup repo.
Usage:
.venv/bin/python scripts/backup_to_hf.py "<commit message>"
"""
import os
import sys
from pathlib import Path
from huggingface_hub import HfApi, CommitOperationAdd, create_commit
REPO_ID = "Delta-Vector/distill-m-6a3lnzvb-code"
REPO_TYPE = "model"
# Files/directories to mirror to the repo
INCLUDE = [
"distill.py",
"configs/base.toml",
"configs/zero_14_17.toml",
"configs/replicate_zero4.toml",
"configs/grow40_winning.toml",
"configs/grow40_simple.toml",
"configs/grow40_winning_v2.toml",
"configs/sweep/A_resume_lr1e7_cos.toml",
"configs/sweep/B_resume_lr5e8_cos.toml",
"configs/sweep/C_resume_lr2e8_cos.toml",
"configs/sweep/D_resume_lr1e7_const.toml",
"configs/sweep/E_resume_lr5e8_b95.toml",
"configs/sweep/F_cold_lr1e7_grow40.toml",
"configs/sweep/G_cold_lr2e7_grow40.toml",
"configs/sweep/H_cold_lr1e7_32L.toml",
"configs/sweep/I_cold_paramgroups_grow40.toml",
"configs/sweep/J_phase2_lr5e9_const.toml",
"configs/sweep/K_phase2_lr2e8_const.toml",
"configs/sweep/L_phase2_lr1e8_warmup500.toml",
"configs/sweep/M_phase2_lr2e8_largebatch.toml",
"configs/accelerate.yaml",
"scripts/backup_to_hf.py",
"scripts/run_sweep.sh",
"scripts/run_sweep_rerun.sh",
"scripts/run_hparam_sweep.sh",
"scripts/run_phase2_sweep.sh",
"pyproject.toml",
"requirements.lock.txt",
]
def main():
msg = sys.argv[1] if len(sys.argv) > 1 else "update"
token = os.environ.get("HF_TOKEN")
if not token:
print("HF_TOKEN env var required", file=sys.stderr)
sys.exit(1)
root = Path(__file__).resolve().parent.parent
ops = []
for rel in INCLUDE:
local = root / rel
if not local.exists():
print(f" skip (missing): {rel}")
continue
ops.append(
CommitOperationAdd(path_in_repo=rel, path_or_fileobj=str(local))
)
print(f" add: {rel}")
if not ops:
print("nothing to upload")
return
api = HfApi(token=token)
api.create_commit(
repo_id=REPO_ID,
repo_type=REPO_TYPE,
operations=ops,
commit_message=msg,
)
print(f"pushed {len(ops)} files to {REPO_ID}: {msg}")
if __name__ == "__main__":
main()