Spaces:

cuilabs
/

bee

Running

bee / scripts /push_kaggle_tpu_kernel.py

Bee Deploy

HF Space backend deploy [de0cba5]

5e21013 1 day ago

5.37 kB

	"""Build and push the bee-train-online-tpu Kaggle kernel from local source.

	Sister script to scripts/push_kaggle_kernel.py — same workflow, different
	kernel ID, different runner source (workers/kaggle-tpu-train/train.py),
	different accelerator (TpuV6E8). Both kernels can run concurrently on
	distinct Kaggle quota pools (30h/week GPU vs 20h/week TPU).

	Source of truth: workers/kaggle-tpu-train/train.py (content between
	`# === KAGGLE-PASTE START ===` and `# === KAGGLE-PASTE END ===`).

	Pre-flight guard: refuses to push if the kernel is already running or
	queued (unless --force). Same lesson as the GPU script.
	"""
	from __future__ import annotations

	import argparse
	import json
	import re
	import subprocess
	import sys
	from pathlib import Path

	REPO_ROOT = Path(__file__).resolve().parent.parent
	SOURCE = REPO_ROOT / "workers/kaggle-tpu-train/train.py"
	PUSH_DIR = Path("/tmp/bee-kaggle-tpu-push")

	# Distinct kernel ID so it runs alongside the GPU kernel without colliding
	# in the user's kernel list. Same secrets dataset attaches.
	KERNEL_ID = "ceocxx/bee-train-online-tpu"
	SECRETS_DATASET = "ceocxx/bee-secrets"


	def kernel_status(kernel_id: str) -> str:
	try:
	res = subprocess.run(
	["kaggle", "kernels", "status", kernel_id],
	capture_output=True, text=True, timeout=30, check=False,
	)
	m = re.search(r'status\s+"KernelWorkerStatus\.([A-Z_]+)"', res.stdout)
	return m.group(1).lower() if m else ""
	except Exception:
	return ""


	def main() -> None:
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument(
	"--force",
	action="store_true",
	help="Push even if the kernel is currently running/queued (use with care).",
	)
	args = parser.parse_args()

	status = kernel_status(KERNEL_ID)
	if status in {"running", "queued"} and not args.force:
	print(
	f"[refuse] {KERNEL_ID} status={status!r} — pushing now would create "
	f"a duplicate session and waste Kaggle quota.\n"
	f" Use --force to override, or wait for the current run "
	f"to finish (the cron will pick up automatically).",
	file=sys.stderr,
	)
	sys.exit(2)
	if status:
	print(f"[ok] {KERNEL_ID} status={status!r} — proceeding to push.")

	src = SOURCE.read_text(encoding="utf-8")
	m = re.search(r"# === KAGGLE-PASTE START ===\n(.*?)# === KAGGLE-PASTE END ===", src, re.DOTALL)
	if not m:
	sys.exit("paste markers not found in workers/kaggle-tpu-train/train.py")
	cell_source = m.group(1).rstrip() + "\n"

	PUSH_DIR.mkdir(parents=True, exist_ok=True)
	nb = {
	"metadata": {
	"kernelspec": {"language": "python", "display_name": "Python 3", "name": "python3"},
	"language_info": {
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"version": "3.12",
	"file_extension": ".py",
	"codemirror_mode": {"name": "ipython", "version": 3},
	"name": "python",
	"mimetype": "text/x-python",
	},
	"kaggle": {
	# Kaggle's TPU offering as of 2026-05 is v6e-8 (8 cores).
	"accelerator": "TpuV6E8",
	"dataSources": [{"sourceType": "datasetVersion", "datasetId": SECRETS_DATASET}],
	"isInternetEnabled": True,
	"language": "python",
	"sourceType": "notebook",
	# `isGpuEnabled` stays false for TPU kernels; Kaggle infers
	# the accelerator from the metadata above.
	"isGpuEnabled": False,
	},
	},
	"nbformat_minor": 4,
	"nbformat": 4,
	"cells": [
	{"cell_type": "code", "source": cell_source, "metadata": {"trusted": True},
	"outputs": [], "execution_count": None}
	],
	}
	(PUSH_DIR / "bee-train-online-tpu.ipynb").write_text(json.dumps(nb), encoding="utf-8")

	meta = {
	"id": KERNEL_ID,
	"title": "bee-train-online-tpu",
	"code_file": "bee-train-online-tpu.ipynb",
	"language": "python",
	"kernel_type": "notebook",
	"is_private": True,
	"enable_gpu": False,
	"enable_tpu": True,
	"enable_internet": True,
	"keywords": [],
	"dataset_sources": [SECRETS_DATASET],
	"kernel_sources": [],
	"competition_sources": [],
	"model_sources": [],
	}
	(PUSH_DIR / "kernel-metadata.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")

	print(f"wrote {PUSH_DIR}/bee-train-online-tpu.ipynb ({len(cell_source)} chars in cell)")
	print(f"dataset_sources: [{SECRETS_DATASET}]")

	# Force TPU v6e-8 explicitly — same lesson as the GPU side, where the
	# CLI silently fell back to the default if the accelerator string
	# didn't match Kaggle's expected enum. `TpuV6E8` is the documented
	# Kaggle API value as of 2026-05.
	res = subprocess.run(
	["kaggle", "kernels", "push", "-p", str(PUSH_DIR),
	"--accelerator", "TpuV6E8"],
	capture_output=True, text=True,
	)
	print(res.stdout.strip())
	if res.returncode != 0:
	print(res.stderr.strip(), file=sys.stderr)
	sys.exit(res.returncode)


	if __name__ == "__main__":
	main()