graphtestbed / server /space /space_entry.py
Zhu Jiajun (jz28583)
Add agents/ harness integrations and HF Space scoring deployment
d094faf
"""Entry point for the GraphTestbed scoring server on HF Spaces.
On boot:
1. snapshot_download the companion dataset repo (lanczos/graphtestbed-gt by
default) into /data: gt/*.csv, leaderboard.db, submissions/**/*.csv.
2. Spawn a daemon thread that every BACKUP_INTERVAL seconds:
a. SELECT COUNT(*) FROM submissions; bail if unchanged.
b. sqlite3.Connection.backup() into a temp file (atomic, lock-safe).
c. upload_file the temp file β†’ leaderboard.db in the dataset repo.
d. upload_folder /data/submissions/ β†’ submissions/ in the dataset repo
(huggingface_hub diffs by content-hash; unchanged files don't transfer).
3. Hand off to server/api.py via Flask app.run(threaded=True).
Env vars (all have sensible defaults baked into the Dockerfile):
HF_TOKEN required write scope on GT_DATASET_REPO
GT_DATASET_REPO optional default: lanczos/graphtestbed-gt
GT_DATA_ROOT optional default: /data
GT_BACKUP_INTERVAL optional default: 60 (seconds)
PORT optional default: 7860
"""
from __future__ import annotations
import os
import sqlite3
import sys
import threading
import time
from pathlib import Path
from huggingface_hub import snapshot_download, upload_file, upload_folder
HF_TOKEN = os.environ.get("HF_TOKEN")
HF_REPO = os.environ.get("GT_DATASET_REPO", "lanczos/graphtestbed-gt")
DATA_DIR = Path(os.environ.get("GT_DATA_ROOT", "/data"))
GT_DIR = DATA_DIR / "gt"
DB_PATH = DATA_DIR / "leaderboard.db"
ARCHIVE_DIR = DATA_DIR / "submissions"
BACKUP_INTERVAL = int(os.environ.get("GT_BACKUP_INTERVAL", "60"))
PORT = int(os.environ.get("PORT", "7860"))
def _require_token() -> str:
if not HF_TOKEN:
raise SystemExit(
"HF_TOKEN is unset. Set it as a Space secret with write scope on "
f"{HF_REPO}."
)
return HF_TOKEN
def bootstrap() -> None:
"""Pull GT files, leaderboard, and submission archive from the dataset repo."""
token = _require_token()
for d in (DATA_DIR, GT_DIR, ARCHIVE_DIR):
d.mkdir(parents=True, exist_ok=True)
print(f"snapshot_download {HF_REPO} β†’ {DATA_DIR}", flush=True)
try:
snapshot_download(
HF_REPO,
repo_type="dataset",
local_dir=str(DATA_DIR),
allow_patterns=["gt/*.csv", "leaderboard.db", "submissions/**/*.csv"],
token=token,
)
except Exception as e:
# First-deploy or empty repo: keep going with empty /data.
print(f"snapshot_download warning ({type(e).__name__}): {e}", flush=True)
n_gt = len(list(GT_DIR.glob("*.csv")))
print(f"GT files present: {n_gt}", flush=True)
if DB_PATH.exists():
try:
n = int(sqlite3.connect(DB_PATH).execute(
"SELECT COUNT(*) FROM submissions"
).fetchone()[0])
print(f"restored leaderboard.db ({n} submissions)", flush=True)
except sqlite3.OperationalError:
print("leaderboard.db present but no submissions table yet", flush=True)
else:
print("no prior leaderboard.db; starting fresh", flush=True)
def _submission_count() -> int:
if not DB_PATH.exists():
return 0
try:
conn = sqlite3.connect(DB_PATH)
try:
row = conn.execute("SELECT COUNT(*) FROM submissions").fetchone()
return int(row[0]) if row else 0
finally:
conn.close()
except sqlite3.OperationalError:
return 0
def _atomic_db_copy(dst: Path) -> None:
"""sqlite3.backup() is lock-safe β€” readers/writers stay consistent."""
src = sqlite3.connect(DB_PATH)
try:
target = sqlite3.connect(dst)
try:
src.backup(target)
finally:
target.close()
finally:
src.close()
def backup_loop() -> None:
token = _require_token()
last_count = -1
print(f"backup_loop started (interval={BACKUP_INTERVAL}s)", flush=True)
while True:
time.sleep(BACKUP_INTERVAL)
n = _submission_count()
if n == last_count:
continue
try:
tmp = DATA_DIR / "_leaderboard.db.tmp"
_atomic_db_copy(tmp)
upload_file(
path_or_fileobj=str(tmp),
path_in_repo="leaderboard.db",
repo_id=HF_REPO, repo_type="dataset",
token=token,
commit_message=f"backup leaderboard ({n} submissions)",
)
tmp.unlink()
except Exception as e:
print(f"leaderboard backup failed: {type(e).__name__}: {e}", flush=True)
continue
if ARCHIVE_DIR.exists() and any(ARCHIVE_DIR.rglob("*.csv")):
try:
upload_folder(
folder_path=str(ARCHIVE_DIR),
path_in_repo="submissions",
repo_id=HF_REPO, repo_type="dataset",
token=token,
commit_message=f"archive submissions ({n} total)",
allow_patterns=["**/*.csv"],
)
except Exception as e:
print(f"submission archive failed: {type(e).__name__}: {e}", flush=True)
last_count = n
print(f"backup pushed: {n} submissions", flush=True)
def main() -> int:
bootstrap()
# Make sure server/api.py reads paths consistent with what we just bootstrapped.
os.environ.setdefault("GT_DIR", str(GT_DIR))
os.environ.setdefault("GT_DB", str(DB_PATH))
os.environ.setdefault("GT_ARCHIVE_DIR", str(ARCHIVE_DIR))
threading.Thread(target=backup_loop, daemon=True).start()
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from api import app # noqa: E402 β€” env vars must be set first
print(f"serving on 0.0.0.0:{PORT}", flush=True)
app.run(host="0.0.0.0", port=PORT, threaded=True, use_reloader=False)
return 0
if __name__ == "__main__":
raise SystemExit(main())