Codette-Reasoning / training /merge_orchestrator.py
Jonathan Harrison
Full Codette codebase sync — transparency release
74f2af5
#!/usr/bin/env python3
"""Codette Merge-Only Script — Merge orchestrator LoRA into base Llama 3.1 8B.
Lightweight script that:
1. Downloads the orchestrator adapter from HF
2. Loads the base model on CPU (float16) to avoid GPU OOM
3. Merges LoRA weights into base
4. Uploads merged model to Raiff1982/codette-llama-3.1-8b-merged
Designed to run on HF Jobs with cpu-basic or a10g-small.
"""
import subprocess, sys
print("=" * 60)
print("Codette Orchestrator Merge — Installing Dependencies")
print("=" * 60)
subprocess.check_call([
sys.executable, "-m", "pip", "install", "-q",
"torch", "transformers>=4.40.0", "peft>=0.10.0",
"accelerate>=0.28.0", "huggingface_hub>=0.22.0",
"sentencepiece", "protobuf", "safetensors",
])
print("Dependencies installed.\n")
import os, gc, torch, traceback
from datetime import datetime
from huggingface_hub import HfApi, snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# ── Config ──
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
ADAPTER_REPO = "Raiff1982/codette-lora-adapters"
MERGED_REPO = "Raiff1982/codette-llama-3.1-8b-merged"
HF_TOKEN = os.environ.get("HF_TOKEN", "")
def main():
api = HfApi(token=HF_TOKEN)
# Step 1: Download orchestrator adapter
print("=" * 60)
print("Step 1: Downloading orchestrator adapter")
print("=" * 60)
adapter_dir = "/tmp/orchestrator_adapter"
snapshot_download(
repo_id=ADAPTER_REPO,
allow_patterns=["orchestrator/*"],
local_dir="/tmp/adapter_download",
token=HF_TOKEN,
)
adapter_dir = "/tmp/adapter_download/orchestrator"
if not os.path.exists(adapter_dir):
print(f"ERROR: Adapter not found at {adapter_dir}")
# Try flat structure
adapter_dir = "/tmp/adapter_download"
if not os.path.exists(os.path.join(adapter_dir, "adapter_config.json")):
print("ERROR: No adapter_config.json found. Listing downloaded files:")
for root, dirs, files in os.walk("/tmp/adapter_download"):
for f in files:
print(f" {os.path.join(root, f)}")
return
print(f" Adapter ready at: {adapter_dir}")
print(f" Files: {os.listdir(adapter_dir)}")
# Step 2: Load base model on CPU in float16
print("\n" + "=" * 60)
print("Step 2: Loading base model on CPU (float16)")
print("=" * 60)
print(" This avoids GPU OOM — merge is a one-time weight operation.")
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="cpu",
trust_remote_code=True,
token=HF_TOKEN,
low_cpu_mem_usage=True,
)
print(" Base model loaded on CPU.")
# Step 3: Load adapter and merge
print("\n" + "=" * 60)
print("Step 3: Merging LoRA adapter into base model")
print("=" * 60)
print(" Loading orchestrator LoRA adapter...")
merged_model = PeftModel.from_pretrained(base_model, adapter_dir)
print(" Merging weights (this may take a few minutes on CPU)...")
merged_model = merged_model.merge_and_unload()
print(" Merge complete!")
# Step 4: Save merged model
merged_dir = "/tmp/merged_model"
print(f"\n Saving merged model to {merged_dir}...")
os.makedirs(merged_dir, exist_ok=True)
merged_model.save_pretrained(merged_dir, safe_serialization=True)
print(" Saving tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
tokenizer.save_pretrained(merged_dir)
# Model card
model_card = f"""---
license: llama3.1
base_model: {MODEL_NAME}
tags:
- codette
- multi-perspective-reasoning
- orchestrator
- phase6+
- lora-merged
---
# Codette Orchestrator Model (Merged)
**Base Model**: {MODEL_NAME}
**Merged Adapter**: Orchestrator (Phase 6+ framework)
**Created**: {datetime.now().isoformat()}
## Overview
This is the Codette orchestrator model — Llama 3.1 8B Instruct with the
orchestrator LoRA adapter merged into the base weights. It serves as the
central reasoning coordinator for the Codette multi-perspective AI system.
## Capabilities
- **Query Classification**: Routes queries as SIMPLE/MEDIUM/COMPLEX
- **Adapter Routing**: Selects optimal perspective combinations
- **Coherence Monitoring**: Tracks Γ field health (target: 0.4-0.8)
- **Semantic Tension**: Detects and manages ξ between perspectives
- **Multi-Agent Debate**: Coordinates rounds with conflict resolution
- **AEGIS Governance**: 6-framework ethical validation
- **Synthesis**: Integrates diverse perspectives into unified responses
## Framework Metrics
- **ψ (Psi)**: 5D state vector (psi, tau, chi, phi, lambda)
- **ξ (Xi)**: Epistemic tension = 0.6*semantic + 0.4*heuristic
- **Γ (Gamma)**: System coherence/health score
## Usage
Use as standalone model or pair with 8 perspective LoRA adapters:
- Newton (analytical physics)
- DaVinci (creative synthesis)
- Empathy (emotional intelligence)
- Philosophy (conceptual analysis)
- Quantum (probabilistic reasoning)
- Consciousness (meta-cognition / RC+ξ)
- Multi-Perspective (integration)
- Systems Architecture (design)
Adapters: https://huggingface.co/{ADAPTER_REPO}
"""
with open(f"{merged_dir}/README.md", "w") as f:
f.write(model_card)
# Free memory before upload
del base_model, merged_model
gc.collect()
# Step 5: Upload
print("\n" + "=" * 60)
print(f"Step 5: Uploading merged model to {MERGED_REPO}")
print("=" * 60)
try:
api.create_repo(MERGED_REPO, private=False, token=HF_TOKEN)
print(" Created new repo.")
except Exception:
print(" Repo already exists.")
api.upload_folder(
folder_path=merged_dir,
repo_id=MERGED_REPO,
token=HF_TOKEN,
)
print(f" Uploaded: https://huggingface.co/{MERGED_REPO}")
print("\n" + "=" * 60)
print("MERGE COMPLETE!")
print("=" * 60)
if __name__ == "__main__":
main()