Spaces:

lanczos
/

graphtestbed

Sleeping

graphtestbed / server /space /space_entry.py

Zhu Jiajun (jz28583)

Add agents/ harness integrations and HF Space scoring deployment

d094faf 20 days ago

6 kB

	"""Entry point for the GraphTestbed scoring server on HF Spaces.

	On boot:
	1. snapshot_download the companion dataset repo (lanczos/graphtestbed-gt by
	default) into /data: gt/.csv, leaderboard.db, submissions//.csv.
	2. Spawn a daemon thread that every BACKUP_INTERVAL seconds:
	a. SELECT COUNT(*) FROM submissions; bail if unchanged.
	b. sqlite3.Connection.backup() into a temp file (atomic, lock-safe).
	c. upload_file the temp file → leaderboard.db in the dataset repo.
	d. upload_folder /data/submissions/ → submissions/ in the dataset repo
	(huggingface_hub diffs by content-hash; unchanged files don't transfer).
	3. Hand off to server/api.py via Flask app.run(threaded=True).

	Env vars (all have sensible defaults baked into the Dockerfile):
	HF_TOKEN required write scope on GT_DATASET_REPO
	GT_DATASET_REPO optional default: lanczos/graphtestbed-gt
	GT_DATA_ROOT optional default: /data
	GT_BACKUP_INTERVAL optional default: 60 (seconds)
	PORT optional default: 7860
	"""

	from __future__ import annotations

	import os
	import sqlite3
	import sys
	import threading
	import time
	from pathlib import Path

	from huggingface_hub import snapshot_download, upload_file, upload_folder

	HF_TOKEN = os.environ.get("HF_TOKEN")
	HF_REPO = os.environ.get("GT_DATASET_REPO", "lanczos/graphtestbed-gt")
	DATA_DIR = Path(os.environ.get("GT_DATA_ROOT", "/data"))
	GT_DIR = DATA_DIR / "gt"
	DB_PATH = DATA_DIR / "leaderboard.db"
	ARCHIVE_DIR = DATA_DIR / "submissions"
	BACKUP_INTERVAL = int(os.environ.get("GT_BACKUP_INTERVAL", "60"))
	PORT = int(os.environ.get("PORT", "7860"))


	def _require_token() -> str:
	if not HF_TOKEN:
	raise SystemExit(
	"HF_TOKEN is unset. Set it as a Space secret with write scope on "
	f"{HF_REPO}."
	)
	return HF_TOKEN


	def bootstrap() -> None:
	"""Pull GT files, leaderboard, and submission archive from the dataset repo."""
	token = _require_token()
	for d in (DATA_DIR, GT_DIR, ARCHIVE_DIR):
	d.mkdir(parents=True, exist_ok=True)

	print(f"snapshot_download {HF_REPO} → {DATA_DIR}", flush=True)
	try:
	snapshot_download(
	HF_REPO,
	repo_type="dataset",
	local_dir=str(DATA_DIR),
	allow_patterns=["gt/.csv", "leaderboard.db", "submissions//.csv"],
	token=token,
	)
	except Exception as e:
	# First-deploy or empty repo: keep going with empty /data.
	print(f"snapshot_download warning ({type(e).__name__}): {e}", flush=True)

	n_gt = len(list(GT_DIR.glob("*.csv")))
	print(f"GT files present: {n_gt}", flush=True)
	if DB_PATH.exists():
	try:
	n = int(sqlite3.connect(DB_PATH).execute(
	"SELECT COUNT(*) FROM submissions"
	).fetchone()[0])
	print(f"restored leaderboard.db ({n} submissions)", flush=True)
	except sqlite3.OperationalError:
	print("leaderboard.db present but no submissions table yet", flush=True)
	else:
	print("no prior leaderboard.db; starting fresh", flush=True)


	def _submission_count() -> int:
	if not DB_PATH.exists():
	return 0
	try:
	conn = sqlite3.connect(DB_PATH)
	try:
	row = conn.execute("SELECT COUNT(*) FROM submissions").fetchone()
	return int(row[0]) if row else 0
	finally:
	conn.close()
	except sqlite3.OperationalError:
	return 0


	def _atomic_db_copy(dst: Path) -> None:
	"""sqlite3.backup() is lock-safe — readers/writers stay consistent."""
	src = sqlite3.connect(DB_PATH)
	try:
	target = sqlite3.connect(dst)
	try:
	src.backup(target)
	finally:
	target.close()
	finally:
	src.close()


	def backup_loop() -> None:
	token = _require_token()
	last_count = -1
	print(f"backup_loop started (interval={BACKUP_INTERVAL}s)", flush=True)
	while True:
	time.sleep(BACKUP_INTERVAL)
	n = _submission_count()
	if n == last_count:
	continue

	try:
	tmp = DATA_DIR / "_leaderboard.db.tmp"
	_atomic_db_copy(tmp)
	upload_file(
	path_or_fileobj=str(tmp),
	path_in_repo="leaderboard.db",
	repo_id=HF_REPO, repo_type="dataset",
	token=token,
	commit_message=f"backup leaderboard ({n} submissions)",
	)
	tmp.unlink()
	except Exception as e:
	print(f"leaderboard backup failed: {type(e).__name__}: {e}", flush=True)
	continue

	if ARCHIVE_DIR.exists() and any(ARCHIVE_DIR.rglob("*.csv")):
	try:
	upload_folder(
	folder_path=str(ARCHIVE_DIR),
	path_in_repo="submissions",
	repo_id=HF_REPO, repo_type="dataset",
	token=token,
	commit_message=f"archive submissions ({n} total)",
	allow_patterns=["*/.csv"],
	)
	except Exception as e:
	print(f"submission archive failed: {type(e).__name__}: {e}", flush=True)

	last_count = n
	print(f"backup pushed: {n} submissions", flush=True)


	def main() -> int:
	bootstrap()

	# Make sure server/api.py reads paths consistent with what we just bootstrapped.
	os.environ.setdefault("GT_DIR", str(GT_DIR))
	os.environ.setdefault("GT_DB", str(DB_PATH))
	os.environ.setdefault("GT_ARCHIVE_DIR", str(ARCHIVE_DIR))

	threading.Thread(target=backup_loop, daemon=True).start()

	sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
	from api import app # noqa: E402 — env vars must be set first

	print(f"serving on 0.0.0.0:{PORT}", flush=True)
	app.run(host="0.0.0.0", port=PORT, threaded=True, use_reloader=False)
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())