bee / scripts /push_kaggle_tpu_kernel.py
Bee Deploy
HF Space backend deploy [de0cba5]
5e21013
"""Build and push the bee-train-online-tpu Kaggle kernel from local source.
Sister script to scripts/push_kaggle_kernel.py β€” same workflow, different
kernel ID, different runner source (workers/kaggle-tpu-train/train.py),
different accelerator (TpuV6E8). Both kernels can run concurrently on
distinct Kaggle quota pools (30h/week GPU vs 20h/week TPU).
Source of truth: workers/kaggle-tpu-train/train.py (content between
`# === KAGGLE-PASTE START ===` and `# === KAGGLE-PASTE END ===`).
Pre-flight guard: refuses to push if the kernel is already running or
queued (unless --force). Same lesson as the GPU script.
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
SOURCE = REPO_ROOT / "workers/kaggle-tpu-train/train.py"
PUSH_DIR = Path("/tmp/bee-kaggle-tpu-push")
# Distinct kernel ID so it runs alongside the GPU kernel without colliding
# in the user's kernel list. Same secrets dataset attaches.
KERNEL_ID = "ceocxx/bee-train-online-tpu"
SECRETS_DATASET = "ceocxx/bee-secrets"
def kernel_status(kernel_id: str) -> str:
try:
res = subprocess.run(
["kaggle", "kernels", "status", kernel_id],
capture_output=True, text=True, timeout=30, check=False,
)
m = re.search(r'status\s+"KernelWorkerStatus\.([A-Z_]+)"', res.stdout)
return m.group(1).lower() if m else ""
except Exception:
return ""
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--force",
action="store_true",
help="Push even if the kernel is currently running/queued (use with care).",
)
args = parser.parse_args()
status = kernel_status(KERNEL_ID)
if status in {"running", "queued"} and not args.force:
print(
f"[refuse] {KERNEL_ID} status={status!r} β€” pushing now would create "
f"a duplicate session and waste Kaggle quota.\n"
f" Use --force to override, or wait for the current run "
f"to finish (the cron will pick up automatically).",
file=sys.stderr,
)
sys.exit(2)
if status:
print(f"[ok] {KERNEL_ID} status={status!r} β€” proceeding to push.")
src = SOURCE.read_text(encoding="utf-8")
m = re.search(r"# === KAGGLE-PASTE START ===\n(.*?)# === KAGGLE-PASTE END ===", src, re.DOTALL)
if not m:
sys.exit("paste markers not found in workers/kaggle-tpu-train/train.py")
cell_source = m.group(1).rstrip() + "\n"
PUSH_DIR.mkdir(parents=True, exist_ok=True)
nb = {
"metadata": {
"kernelspec": {"language": "python", "display_name": "Python 3", "name": "python3"},
"language_info": {
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"version": "3.12",
"file_extension": ".py",
"codemirror_mode": {"name": "ipython", "version": 3},
"name": "python",
"mimetype": "text/x-python",
},
"kaggle": {
# Kaggle's TPU offering as of 2026-05 is v6e-8 (8 cores).
"accelerator": "TpuV6E8",
"dataSources": [{"sourceType": "datasetVersion", "datasetId": SECRETS_DATASET}],
"isInternetEnabled": True,
"language": "python",
"sourceType": "notebook",
# `isGpuEnabled` stays false for TPU kernels; Kaggle infers
# the accelerator from the metadata above.
"isGpuEnabled": False,
},
},
"nbformat_minor": 4,
"nbformat": 4,
"cells": [
{"cell_type": "code", "source": cell_source, "metadata": {"trusted": True},
"outputs": [], "execution_count": None}
],
}
(PUSH_DIR / "bee-train-online-tpu.ipynb").write_text(json.dumps(nb), encoding="utf-8")
meta = {
"id": KERNEL_ID,
"title": "bee-train-online-tpu",
"code_file": "bee-train-online-tpu.ipynb",
"language": "python",
"kernel_type": "notebook",
"is_private": True,
"enable_gpu": False,
"enable_tpu": True,
"enable_internet": True,
"keywords": [],
"dataset_sources": [SECRETS_DATASET],
"kernel_sources": [],
"competition_sources": [],
"model_sources": [],
}
(PUSH_DIR / "kernel-metadata.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
print(f"wrote {PUSH_DIR}/bee-train-online-tpu.ipynb ({len(cell_source)} chars in cell)")
print(f"dataset_sources: [{SECRETS_DATASET}]")
# Force TPU v6e-8 explicitly β€” same lesson as the GPU side, where the
# CLI silently fell back to the default if the accelerator string
# didn't match Kaggle's expected enum. `TpuV6E8` is the documented
# Kaggle API value as of 2026-05.
res = subprocess.run(
["kaggle", "kernels", "push", "-p", str(PUSH_DIR),
"--accelerator", "TpuV6E8"],
capture_output=True, text=True,
)
print(res.stdout.strip())
if res.returncode != 0:
print(res.stderr.strip(), file=sys.stderr)
sys.exit(res.returncode)
if __name__ == "__main__":
main()