| """Create the 6 cuilabs/bee-* HF model repos with honest, auditable READMEs. |
| |
| This is a one-shot bootstrap. Each repo: |
| - Is private at first (we toggle public when adapters are real and validated) |
| - Gets a README that states WHAT the repo represents and WHAT'S TRAINED today |
| - Does not pretend tiers without compute are "production-ready" |
| |
| Mapping mirrors apps/workspace/src/lib/models/catalog.ts (`hf_repo` field): |
| |
| bee-cell β cuilabs/bee-cell (active β Kaggle T4 trains here) |
| bee-comb β cuilabs/bee-comb (placeholder β same base, larger context) |
| bee-hive β cuilabs/bee-hive (placeholder β 3B base, needs paid GPU) |
| bee-swarm β cuilabs/bee-swarm (placeholder β 7B+ base, needs paid GPU) |
| bee-enclave β cuilabs/bee-enclave (placeholder β deployment mode of Hive/Swarm) |
| bee-ignite β cuilabs/bee-ignite (placeholder β experimental R&D track) |
| """ |
| from __future__ import annotations |
|
|
| import os |
|
|
| REPOS = [ |
| { |
| "name": "cuilabs/bee-cell", |
| "tagline": "Private AI on your own machine.", |
| "tier": "Bee Cell β production tier 1", |
| "base_model": "HuggingFaceTB/SmolLM2-360M-Instruct", |
| "status": ( |
| "**Active training.** This repo holds LoRA adapters trained on " |
| "the [`cuilabs/bee-interactions`](https://huggingface.co/datasets/cuilabs/bee-interactions) " |
| "dataset by the Kaggle notebook " |
| "[`ceocxx/bee-train-online`](https://www.kaggle.com/code/ceocxx/bee-train-online), " |
| "kicked daily by the Vercel cron at " |
| "`/api/cron/kaggle-dispatch` in the [Bee monorepo](https://github.com/cuilabs/bee). " |
| "Adapter branches are named `<domain>/<utc-timestamp>` " |
| "(e.g. `general/2026-04-28-1430`)." |
| ), |
| }, |
| { |
| "name": "cuilabs/bee-comb", |
| "tagline": "Workstation-grade AI for serious builders.", |
| "tier": "Bee Comb β production tier 2", |
| "base_model": "(TBD β same family as Cell, larger context)", |
| "status": ( |
| "**Placeholder repo.** Bee Comb shares Cell's training pipeline " |
| "but with a wider context window and higher throughput targets. " |
| "No adapters trained yet β we are validating the Cell pipeline " |
| "first before scaling up. Once Cell adapters are demonstrably " |
| "useful, the same training script will fan out to Comb." |
| ), |
| }, |
| { |
| "name": "cuilabs/bee-hive", |
| "tagline": "Team-grade AI for startups and SMBs.", |
| "tier": "Bee Hive β production tier 3", |
| "base_model": "(TBD β 3B parameter class)", |
| "status": ( |
| "**Placeholder repo.** Bee Hive is a 3B-class model. Kaggle's " |
| "free T4 cannot train 3B with LoRA at useful throughput; this " |
| "tier needs paid GPU (Modal, RunPod, Lambda, or HF Inference " |
| "Endpoints). No adapters trained yet β compute provisioning " |
| "pending. The `training_runs` Postgres table already supports " |
| "this tier via `model_id = 'bee-hive'`." |
| ), |
| }, |
| { |
| "name": "cuilabs/bee-swarm", |
| "tagline": "Enterprise-grade AI with quantum reasoning.", |
| "tier": "Bee Swarm β production tier 4", |
| "base_model": "(TBD β 7B+ parameter class)", |
| "status": ( |
| "**Placeholder repo.** Bee Swarm is a 7B+ class model with " |
| "quantum-reasoning routing enabled. Requires H100-class compute. " |
| "No adapters trained yet β compute provisioning pending." |
| ), |
| }, |
| { |
| "name": "cuilabs/bee-enclave", |
| "tagline": "Private, auditable deployment of any Hive/Swarm workload.", |
| "tier": "Bee Enclave β deployment mode (not a separate model)", |
| "base_model": "(inherits Hive or Swarm weights at deploy time)", |
| "status": ( |
| "**Placeholder repo / deployment artefact.** Bee Enclave is " |
| "**not** a separate model tier above Swarm; it is a deployment " |
| "*mode* that wraps Hive/Swarm-class capability inside a private, " |
| "auditable boundary (data residency, audit logs, tenant-specific " |
| "adapters, PQC transport). This repo will hold tenant-pinned " |
| "snapshots of Hive or Swarm adapters once those exist. Billed " |
| "per-deployment, not per-token." |
| ), |
| }, |
| { |
| "name": "cuilabs/bee-ignite", |
| "tagline": "Experimental Bee-native architecture.", |
| "tier": "Bee Ignite β research / R&D", |
| "base_model": "(experimental β Bee-native MoE + SSM + custom attention)", |
| "status": ( |
| "**Placeholder repo / R&D track.** Bee Ignite is the experimental " |
| "Bee-native architecture (custom attention, SSM memory, MoE " |
| "routing, hierarchical neural compression). Research-only until " |
| "benchmark-validated. Hidden from public model menus by default. " |
| "No commercial availability." |
| ), |
| }, |
| ] |
|
|
|
|
| def readme_for(r: dict) -> str: |
| return f"""--- |
| license: other |
| language: |
| - en |
| library_name: peft |
| pipeline_tag: text-generation |
| tags: |
| - bee |
| - cuilabs |
| - lora |
| --- |
| |
| # {r["name"]} |
| |
| **{r["tagline"]}** |
| |
| {r["tier"]} β published by [CUI Labs Pte. Ltd.](https://www.cuilabs.io) |
| |
| --- |
| |
| ## What this repo holds |
| |
| LoRA adapters for the **{r["name"].split("/")[1]}** tier of the Bee |
| Intelligence Engine. Base model: {r["base_model"]}. |
| |
| {r["status"]} |
| |
| ## Branch convention |
| |
| `<domain>/<utc-timestamp>`, e.g. `programming/2026-04-28-1430`. The |
| `main` branch may be empty until the first successful training run. To |
| load a specific domain adapter: |
| |
| ```python |
| from peft import PeftModel |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| |
| base = AutoModelForCausalLM.from_pretrained("{r["base_model"]}", torch_dtype="auto") |
| tok = AutoTokenizer.from_pretrained("{r["base_model"]}") |
| model = PeftModel.from_pretrained(base, "{r["name"]}", revision="programming/2026-04-28-1430") |
| ``` |
| |
| ## Training transparency |
| |
| Every adapter version corresponds to a row in the `training_runs` |
| Postgres table on [workspace.bee.cuilabs.io](https://workspace.bee.cuilabs.io) |
| with `model_id = "{r["name"].split("/")[1]}"`. Metrics are real loss |
| values from the actual run, not estimates. Status is one of |
| `completed`, `partial`, or `failed` β partial means the run finished |
| cleanly but had nothing to train on yet. |
| |
| ## License |
| |
| Adapter weights: see [CUI Labs licensing](https://www.cuilabs.io). Base |
| model weights are governed by their respective upstream licenses. |
| """ |
|
|
|
|
| def main() -> None: |
| token = os.environ.get("HF_TOKEN") |
| if not token: |
| raise SystemExit("HF_TOKEN env var required") |
|
|
| from huggingface_hub import HfApi, create_repo |
|
|
| api = HfApi(token=token) |
| for r in REPOS: |
| print(f"\n=== {r['name']} ===") |
| try: |
| create_repo( |
| r["name"], |
| repo_type="model", |
| private=True, |
| exist_ok=True, |
| token=token, |
| ) |
| print(" repo: ok (created or already existed)") |
| except Exception as e: |
| print(f" repo: ERROR {e!r}") |
| continue |
|
|
| readme = readme_for(r) |
| try: |
| api.upload_file( |
| path_or_fileobj=readme.encode("utf-8"), |
| path_in_repo="README.md", |
| repo_id=r["name"], |
| repo_type="model", |
| commit_message="bootstrap: tier-honest README", |
| token=token, |
| ) |
| print(f" README: uploaded ({len(readme)} bytes)") |
| except Exception as e: |
| print(f" README: ERROR {e!r}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|