| |
| """Codette Merge-Only Script — Merge orchestrator LoRA into base Llama 3.1 8B. |
| |
| Lightweight script that: |
| 1. Downloads the orchestrator adapter from HF |
| 2. Loads the base model on CPU (float16) to avoid GPU OOM |
| 3. Merges LoRA weights into base |
| 4. Uploads merged model to Raiff1982/codette-llama-3.1-8b-merged |
| |
| Designed to run on HF Jobs with cpu-basic or a10g-small. |
| """ |
|
|
| import subprocess, sys |
|
|
| print("=" * 60) |
| print("Codette Orchestrator Merge — Installing Dependencies") |
| print("=" * 60) |
| subprocess.check_call([ |
| sys.executable, "-m", "pip", "install", "-q", |
| "torch", "transformers>=4.40.0", "peft>=0.10.0", |
| "accelerate>=0.28.0", "huggingface_hub>=0.22.0", |
| "sentencepiece", "protobuf", "safetensors", |
| ]) |
| print("Dependencies installed.\n") |
|
|
| import os, gc, torch, traceback |
| from datetime import datetime |
| from huggingface_hub import HfApi, snapshot_download |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from peft import PeftModel |
|
|
| |
| MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct" |
| ADAPTER_REPO = "Raiff1982/codette-lora-adapters" |
| MERGED_REPO = "Raiff1982/codette-llama-3.1-8b-merged" |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") |
|
|
| def main(): |
| api = HfApi(token=HF_TOKEN) |
|
|
| |
| print("=" * 60) |
| print("Step 1: Downloading orchestrator adapter") |
| print("=" * 60) |
| adapter_dir = "/tmp/orchestrator_adapter" |
| snapshot_download( |
| repo_id=ADAPTER_REPO, |
| allow_patterns=["orchestrator/*"], |
| local_dir="/tmp/adapter_download", |
| token=HF_TOKEN, |
| ) |
| adapter_dir = "/tmp/adapter_download/orchestrator" |
|
|
| if not os.path.exists(adapter_dir): |
| print(f"ERROR: Adapter not found at {adapter_dir}") |
| |
| adapter_dir = "/tmp/adapter_download" |
| if not os.path.exists(os.path.join(adapter_dir, "adapter_config.json")): |
| print("ERROR: No adapter_config.json found. Listing downloaded files:") |
| for root, dirs, files in os.walk("/tmp/adapter_download"): |
| for f in files: |
| print(f" {os.path.join(root, f)}") |
| return |
|
|
| print(f" Adapter ready at: {adapter_dir}") |
| print(f" Files: {os.listdir(adapter_dir)}") |
|
|
| |
| print("\n" + "=" * 60) |
| print("Step 2: Loading base model on CPU (float16)") |
| print("=" * 60) |
| print(" This avoids GPU OOM — merge is a one-time weight operation.") |
|
|
| base_model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| torch_dtype=torch.float16, |
| device_map="cpu", |
| trust_remote_code=True, |
| token=HF_TOKEN, |
| low_cpu_mem_usage=True, |
| ) |
| print(" Base model loaded on CPU.") |
|
|
| |
| print("\n" + "=" * 60) |
| print("Step 3: Merging LoRA adapter into base model") |
| print("=" * 60) |
|
|
| print(" Loading orchestrator LoRA adapter...") |
| merged_model = PeftModel.from_pretrained(base_model, adapter_dir) |
|
|
| print(" Merging weights (this may take a few minutes on CPU)...") |
| merged_model = merged_model.merge_and_unload() |
| print(" Merge complete!") |
|
|
| |
| merged_dir = "/tmp/merged_model" |
| print(f"\n Saving merged model to {merged_dir}...") |
| os.makedirs(merged_dir, exist_ok=True) |
| merged_model.save_pretrained(merged_dir, safe_serialization=True) |
|
|
| print(" Saving tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) |
| tokenizer.save_pretrained(merged_dir) |
|
|
| |
| model_card = f"""--- |
| license: llama3.1 |
| base_model: {MODEL_NAME} |
| tags: |
| - codette |
| - multi-perspective-reasoning |
| - orchestrator |
| - phase6+ |
| - lora-merged |
| --- |
| |
| # Codette Orchestrator Model (Merged) |
| |
| **Base Model**: {MODEL_NAME} |
| **Merged Adapter**: Orchestrator (Phase 6+ framework) |
| **Created**: {datetime.now().isoformat()} |
| |
| ## Overview |
| |
| This is the Codette orchestrator model — Llama 3.1 8B Instruct with the |
| orchestrator LoRA adapter merged into the base weights. It serves as the |
| central reasoning coordinator for the Codette multi-perspective AI system. |
| |
| ## Capabilities |
| |
| - **Query Classification**: Routes queries as SIMPLE/MEDIUM/COMPLEX |
| - **Adapter Routing**: Selects optimal perspective combinations |
| - **Coherence Monitoring**: Tracks Γ field health (target: 0.4-0.8) |
| - **Semantic Tension**: Detects and manages ξ between perspectives |
| - **Multi-Agent Debate**: Coordinates rounds with conflict resolution |
| - **AEGIS Governance**: 6-framework ethical validation |
| - **Synthesis**: Integrates diverse perspectives into unified responses |
| |
| ## Framework Metrics |
| |
| - **ψ (Psi)**: 5D state vector (psi, tau, chi, phi, lambda) |
| - **ξ (Xi)**: Epistemic tension = 0.6*semantic + 0.4*heuristic |
| - **Γ (Gamma)**: System coherence/health score |
| |
| ## Usage |
| |
| Use as standalone model or pair with 8 perspective LoRA adapters: |
| - Newton (analytical physics) |
| - DaVinci (creative synthesis) |
| - Empathy (emotional intelligence) |
| - Philosophy (conceptual analysis) |
| - Quantum (probabilistic reasoning) |
| - Consciousness (meta-cognition / RC+ξ) |
| - Multi-Perspective (integration) |
| - Systems Architecture (design) |
| |
| Adapters: https://huggingface.co/{ADAPTER_REPO} |
| """ |
| with open(f"{merged_dir}/README.md", "w") as f: |
| f.write(model_card) |
|
|
| |
| del base_model, merged_model |
| gc.collect() |
|
|
| |
| print("\n" + "=" * 60) |
| print(f"Step 5: Uploading merged model to {MERGED_REPO}") |
| print("=" * 60) |
|
|
| try: |
| api.create_repo(MERGED_REPO, private=False, token=HF_TOKEN) |
| print(" Created new repo.") |
| except Exception: |
| print(" Repo already exists.") |
|
|
| api.upload_folder( |
| folder_path=merged_dir, |
| repo_id=MERGED_REPO, |
| token=HF_TOKEN, |
| ) |
| print(f" Uploaded: https://huggingface.co/{MERGED_REPO}") |
|
|
| print("\n" + "=" * 60) |
| print("MERGE COMPLETE!") |
| print("=" * 60) |
|
|
| if __name__ == "__main__": |
| main() |
|
|