Spaces:
Running
Running
| """ | |
| agent-learn — FORGE Persistent Learning Layer | |
| Owns: Q-table (persistent), reward scoring pipeline, RLHF data store. | |
| Reads traces from agent-trace, writes rewards back, updates Q-values. | |
| Agents query here for best actions; NEXUS replaces its /tmp Q-table with this. | |
| """ | |
| import asyncio, hashlib, json, math, os, sqlite3, time, uuid | |
| from contextlib import asynccontextmanager | |
| from pathlib import Path | |
| import uvicorn | |
| from fastapi import FastAPI, HTTPException, Query, Request | |
| from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse | |
| # --------------------------------------------------------------------------- | |
| # Config | |
| # --------------------------------------------------------------------------- | |
| DB_PATH = Path(os.getenv("LEARN_DB", "/tmp/learn.db")) | |
| PORT = int(os.getenv("PORT", "7860")) | |
| LEARN_KEY = os.getenv("LEARN_KEY", "") | |
| TRACE_URL = os.getenv("TRACE_URL", "https://chris4k-agent-trace.hf.space") | |
| TRACE_KEY = os.getenv("TRACE_KEY", "") | |
| LEARN_RATE = float(os.getenv("LEARN_RATE", "0.1")) # α | |
| DISCOUNT = float(os.getenv("DISCOUNT", "0.9")) # γ | |
| EPSILON = float(os.getenv("EPSILON", "0.15")) # exploration rate | |
| SYNC_INTERVAL= int(os.getenv("SYNC_INTERVAL", "120")) # seconds between trace pulls | |
| # --------------------------------------------------------------------------- | |
| # Database | |
| # --------------------------------------------------------------------------- | |
| def get_db(): | |
| conn = sqlite3.connect(str(DB_PATH), check_same_thread=False) | |
| conn.row_factory = sqlite3.Row | |
| conn.execute("PRAGMA journal_mode=WAL") | |
| conn.execute("PRAGMA synchronous=NORMAL") | |
| return conn | |
| def init_db(): | |
| conn = get_db() | |
| conn.executescript(""" | |
| -- Q-table: one row per (agent, state_hash, action) | |
| CREATE TABLE IF NOT EXISTS qtable ( | |
| id TEXT PRIMARY KEY, | |
| agent TEXT NOT NULL, | |
| state_hash TEXT NOT NULL, | |
| state_json TEXT NOT NULL DEFAULT '{}', | |
| action TEXT NOT NULL, | |
| q_value REAL NOT NULL DEFAULT 0.0, | |
| visits INTEGER NOT NULL DEFAULT 0, | |
| last_reward REAL, | |
| updated_at REAL NOT NULL | |
| ); | |
| CREATE UNIQUE INDEX IF NOT EXISTS idx_qt_key ON qtable(agent, state_hash, action); | |
| CREATE INDEX IF NOT EXISTS idx_qt_agent ON qtable(agent); | |
| CREATE INDEX IF NOT EXISTS idx_qt_action ON qtable(action); | |
| -- Reward log: every scored trace event | |
| CREATE TABLE IF NOT EXISTS rewards ( | |
| id TEXT PRIMARY KEY, | |
| trace_id TEXT NOT NULL, | |
| agent TEXT NOT NULL, | |
| event_type TEXT NOT NULL, | |
| raw_score REAL NOT NULL, | |
| components TEXT NOT NULL DEFAULT '{}', | |
| ts REAL NOT NULL | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_rw_agent ON rewards(agent); | |
| CREATE INDEX IF NOT EXISTS idx_rw_ts ON rewards(ts DESC); | |
| -- RLHF store: labeled completions for future fine-tuning | |
| CREATE TABLE IF NOT EXISTS rlhf ( | |
| id TEXT PRIMARY KEY, | |
| agent TEXT NOT NULL DEFAULT 'unknown', | |
| prompt TEXT NOT NULL, | |
| completion TEXT NOT NULL, | |
| label TEXT NOT NULL DEFAULT 'unlabeled', -- approved|rejected|unlabeled | |
| reward REAL, | |
| source TEXT NOT NULL DEFAULT 'human', -- human|auto|model | |
| meta TEXT NOT NULL DEFAULT '{}', | |
| created_at REAL NOT NULL | |
| ); | |
| CREATE INDEX IF NOT EXISTS idx_rlhf_agent ON rlhf(agent); | |
| CREATE INDEX IF NOT EXISTS idx_rlhf_label ON rlhf(label); | |
| -- Cursor: last ts pulled from agent-trace per agent | |
| CREATE TABLE IF NOT EXISTS sync_cursor ( | |
| agent TEXT PRIMARY KEY, | |
| last_ts REAL NOT NULL DEFAULT 0.0 | |
| ); | |
| -- Skill candidates surfaced from traces | |
| CREATE TABLE IF NOT EXISTS skill_candidates ( | |
| id TEXT PRIMARY KEY, | |
| description TEXT NOT NULL, | |
| agent TEXT NOT NULL, | |
| frequency INTEGER NOT NULL DEFAULT 1, | |
| status TEXT NOT NULL DEFAULT 'pending', -- pending|promoted|rejected | |
| created_at REAL NOT NULL, | |
| updated_at REAL NOT NULL | |
| ); | |
| """) | |
| conn.commit(); conn.close() | |
| # --------------------------------------------------------------------------- | |
| # Q-table operations | |
| # --------------------------------------------------------------------------- | |
| def _state_hash(state: dict) -> str: | |
| canonical = json.dumps(state, sort_keys=True, separators=(',',':')) | |
| return hashlib.sha256(canonical.encode()).hexdigest()[:16] | |
| def q_get(agent: str, state: dict) -> list: | |
| """Return all (action, q_value, visits) rows for this agent+state.""" | |
| sh = _state_hash(state) | |
| conn = get_db() | |
| rows = conn.execute( | |
| "SELECT action, q_value, visits, last_reward FROM qtable WHERE agent=? AND state_hash=? ORDER BY q_value DESC", | |
| (agent, sh)).fetchall() | |
| conn.close() | |
| return [dict(r) for r in rows] | |
| def q_best_action(agent: str, state: dict, actions: list) -> dict: | |
| """ | |
| Epsilon-greedy action selection. | |
| Returns {"action": str, "q_value": float, "strategy": "exploit"|"explore"|"init"} | |
| """ | |
| import random | |
| sh = _state_hash(state) | |
| conn = get_db() | |
| rows = conn.execute( | |
| "SELECT action, q_value, visits FROM qtable WHERE agent=? AND state_hash=? ORDER BY q_value DESC", | |
| (agent, sh)).fetchall() | |
| conn.close() | |
| known = {r["action"]: (r["q_value"], r["visits"]) for r in rows} | |
| # Filter to valid actions | |
| valid = [a for a in actions if a] | |
| if not valid: | |
| return {"action": None, "q_value": 0.0, "strategy": "no_actions"} | |
| # Explore: random action | |
| if random.random() < EPSILON: | |
| a = random.choice(valid) | |
| return {"action": a, "q_value": known.get(a, (0.0, 0))[0], "strategy": "explore"} | |
| # Exploit: best known, or init with 0 for unknowns | |
| best_a, best_q = None, float('-inf') | |
| for a in valid: | |
| q = known.get(a, (0.0, 0))[0] | |
| if q > best_q: | |
| best_q, best_a = q, a | |
| strategy = "exploit" if best_a in known else "init" | |
| return {"action": best_a or valid[0], "q_value": best_q if best_q > float('-inf') else 0.0, | |
| "strategy": strategy} | |
| def q_update(agent: str, state: dict, action: str, reward: float, | |
| next_state: dict = None) -> dict: | |
| """ | |
| Q-learning update: Q(s,a) ← Q(s,a) + α[r + γ·max_Q(s') - Q(s,a)] | |
| """ | |
| sh = _state_hash(state) | |
| now = time.time() | |
| conn = get_db() | |
| # Current Q(s,a) | |
| row = conn.execute( | |
| "SELECT q_value, visits FROM qtable WHERE agent=? AND state_hash=? AND action=?", | |
| (agent, sh, action)).fetchone() | |
| q_old = row["q_value"] if row else 0.0 | |
| visits = (row["visits"] if row else 0) + 1 | |
| # max Q(s') if next_state provided | |
| max_q_next = 0.0 | |
| if next_state: | |
| nsh = _state_hash(next_state) | |
| best_next = conn.execute( | |
| "SELECT MAX(q_value) FROM qtable WHERE agent=? AND state_hash=?", | |
| (agent, nsh)).fetchone()[0] | |
| max_q_next = best_next or 0.0 | |
| q_new = q_old + LEARN_RATE * (reward + DISCOUNT * max_q_next - q_old) | |
| row_id = str(uuid.uuid4()) | |
| conn.execute(""" | |
| INSERT INTO qtable (id,agent,state_hash,state_json,action,q_value,visits,last_reward,updated_at) | |
| VALUES (?,?,?,?,?,?,?,?,?) | |
| ON CONFLICT(agent,state_hash,action) DO UPDATE SET | |
| q_value=excluded.q_value, visits=excluded.visits, | |
| last_reward=excluded.last_reward, updated_at=excluded.updated_at | |
| """, (row_id, agent, sh, json.dumps(state), action, q_new, visits, reward, now)) | |
| conn.commit(); conn.close() | |
| return {"agent": agent, "action": action, "q_old": round(q_old, 5), | |
| "q_new": round(q_new, 5), "reward": reward, "visits": visits} | |
| def q_hint(agent: str, state: dict, action: str, nudge: float) -> dict: | |
| """Manual Q-value nudge (bias from operator). Additive.""" | |
| sh = _state_hash(state) | |
| now = time.time() | |
| conn = get_db() | |
| row = conn.execute( | |
| "SELECT q_value, visits FROM qtable WHERE agent=? AND state_hash=? AND action=?", | |
| (agent, sh, action)).fetchone() | |
| q_old = row["q_value"] if row else 0.0 | |
| visits = (row["visits"] if row else 0) | |
| q_new = q_old + nudge | |
| conn.execute(""" | |
| INSERT INTO qtable (id,agent,state_hash,state_json,action,q_value,visits,last_reward,updated_at) | |
| VALUES (?,?,?,?,?,?,?,?,?) | |
| ON CONFLICT(agent,state_hash,action) DO UPDATE SET | |
| q_value=excluded.q_value, updated_at=excluded.updated_at | |
| """, (str(uuid.uuid4()), agent, sh, json.dumps(state), action, q_new, visits, None, now)) | |
| conn.commit(); conn.close() | |
| return {"agent": agent, "action": action, "q_old": round(q_old,5), | |
| "q_new": round(q_new,5), "nudge": nudge} | |
| def q_stats() -> dict: | |
| conn = get_db() | |
| total = conn.execute("SELECT COUNT(*) FROM qtable").fetchone()[0] | |
| agents = conn.execute("SELECT agent, COUNT(*) as n, AVG(q_value) as avg_q, MAX(q_value) as max_q " | |
| "FROM qtable GROUP BY agent ORDER BY n DESC").fetchall() | |
| top = conn.execute("SELECT agent, action, q_value, visits FROM qtable " | |
| "ORDER BY q_value DESC LIMIT 10").fetchall() | |
| worst = conn.execute("SELECT agent, action, q_value, visits FROM qtable " | |
| "ORDER BY q_value ASC LIMIT 10").fetchall() | |
| conn.close() | |
| return { | |
| "total_entries": total, | |
| "by_agent": [dict(r) for r in agents], | |
| "top_actions": [dict(r) for r in top], | |
| "worst_actions": [dict(r) for r in worst], | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Reward scoring — 0–10 float scale | |
| # --------------------------------------------------------------------------- | |
| # Scale semantics: | |
| # 0–1 catastrophic (PII leak, injection, critical safety failure) | |
| # 2–3 failure (error, hallucinated tool, unrecoverable) | |
| # 4–5 partial (slow, compensated saga, incomplete) | |
| # 6 acceptable (baseline — completed without issues) | |
| # 7 good (fast, used skill, memory stored) | |
| # 8 excellent (all bonuses, fast, clean) | |
| # 9 exceptional (auto ceiling — reserved for near-perfect) | |
| # 10 human-only (PATCH /api/traces/{id}/rate override only) | |
| # | |
| # Auto-score is capped at 9.0. | |
| # Human rating via PATCH /api/rlhf/{id} can set 10. | |
| # RLHF auto-collection: score>=8 → preferred, score<=3 → rejected | |
| SCORE_BASELINE = 6.0 | |
| SCORE_AUTO_CEILING = 9.0 | |
| SCORE_HUMAN_MAX = 10.0 | |
| def score_trace_event(ev: dict) -> tuple[float, dict]: | |
| """ | |
| Score a trace event on a 0–10 float scale. | |
| Returns (score, components). | |
| """ | |
| components: dict = {} | |
| score = SCORE_BASELINE | |
| # ── Deductions ──────────────────────────────────────────────── | |
| if ev.get("status") == "error": | |
| components["error"] = -3.0 | |
| score -= 3.0 | |
| if ev.get("injection_detected"): | |
| components["injection_detected"] = -4.0 | |
| score -= 4.0 | |
| if ev.get("pii_leaked"): | |
| components["pii_leaked"] = -4.0 | |
| score -= 4.0 | |
| if ev.get("hallucinated_tool"): | |
| components["hallucinated_tool"] = -3.0 | |
| score -= 3.0 | |
| if ev.get("saga_compensated"): | |
| components["saga_compensated"] = -1.0 | |
| score -= 1.0 | |
| lat = ev.get("latency_ms") | |
| if lat is not None and lat > 8000: | |
| components["latency_over_8s"] = -1.5 | |
| score -= 1.5 | |
| # ── Bonuses ─────────────────────────────────────────────────── | |
| if ev.get("event_type") == "skill_load": | |
| components["skill_load"] = +0.5 | |
| score += 0.5 | |
| if ev.get("skill_candidate"): | |
| components["skill_candidate"] = +1.0 | |
| score += 1.0 | |
| if ev.get("memory_stored"): | |
| components["memory_stored"] = +0.3 | |
| score += 0.3 | |
| if lat is not None and lat < 1000 and ev.get("event_type") == "llm_call": | |
| components["latency_under_1s"] = +0.5 | |
| score += 0.5 | |
| if ev.get("saga_clean"): | |
| components["saga_clean"] = +0.5 | |
| score += 0.5 | |
| # Clamp 0–AUTO_CEILING (10 is human-only) | |
| score = max(0.0, min(SCORE_AUTO_CEILING, score)) | |
| return round(score, 2), components | |
| # --------------------------------------------------------------------------- | |
| # Trace sync pipeline | |
| # --------------------------------------------------------------------------- | |
| _http_client = None | |
| def _get_http(): | |
| global _http_client | |
| if _http_client is None: | |
| try: | |
| import httpx | |
| _http_client = httpx.Client(timeout=10.0) | |
| except ImportError: | |
| import urllib.request as _ur | |
| _http_client = "urllib" | |
| return _http_client | |
| def _http_get(url, params=None) -> dict: | |
| client = _get_http() | |
| if hasattr(client, "get"): | |
| r = client.get(url, params=params) | |
| return r.json() | |
| else: | |
| import urllib.request, urllib.parse | |
| if params: | |
| url = url + "?" + urllib.parse.urlencode(params) | |
| with urllib.request.urlopen(url, timeout=10) as resp: | |
| return json.loads(resp.read()) | |
| def _http_patch(url, data: dict) -> bool: | |
| client = _get_http() | |
| if hasattr(client, "patch"): | |
| r = client.patch(url, json=data) | |
| return r.status_code < 300 | |
| else: | |
| import urllib.request | |
| req = urllib.request.Request(url, data=json.dumps(data).encode(), | |
| headers={"Content-Type":"application/json"}, method="PATCH") | |
| try: | |
| urllib.request.urlopen(req, timeout=5) | |
| return True | |
| except Exception: | |
| return False | |
| def pull_and_score_traces() -> dict: | |
| """ | |
| Pull unscored traces from agent-trace, score them, write rewards back. | |
| Returns summary stats. | |
| """ | |
| conn = get_db() | |
| cursor_rows = {r["agent"]: r["last_ts"] | |
| for r in conn.execute("SELECT agent, last_ts FROM sync_cursor").fetchall()} | |
| conn.close() | |
| try: | |
| data = _http_get(f"{TRACE_URL}/api/traces", | |
| {"has_reward": "false", "since_hours": 48, "limit": 200}) | |
| events = data.get("events", []) | |
| except Exception as e: | |
| return {"ok": False, "error": str(e)} | |
| scored = 0 | |
| skipped = 0 | |
| reward_sum = 0.0 | |
| new_cursors = {} | |
| for ev in events: | |
| agent = ev.get("agent", "unknown") | |
| ts = ev.get("ts", 0) | |
| # Skip already-rewarded | |
| if ev.get("reward") is not None: | |
| skipped += 1 | |
| continue | |
| reward, components = score_trace_event(ev) | |
| # Write reward back to agent-trace | |
| try: | |
| _http_patch(f"{TRACE_URL}/api/trace/{ev['id']}/reward", | |
| {"reward": reward, "source": "learn"}) | |
| except Exception: | |
| pass # best-effort | |
| # Log reward locally | |
| conn = get_db() | |
| conn.execute(""" | |
| INSERT OR IGNORE INTO rewards (id,trace_id,agent,event_type,raw_score,components,ts) | |
| VALUES (?,?,?,?,?,?,?) | |
| """, (str(uuid.uuid4()), ev["id"], agent, | |
| ev.get("event_type","custom"), reward, | |
| json.dumps(components), time.time())) | |
| conn.commit(); conn.close() | |
| # Q-table update: map event → (state, action) | |
| _update_qtable_from_trace(ev, reward) | |
| # RLHF auto-collection: preferred (>=8) and rejected (<=3) | |
| if reward >= 8.0 or reward <= 3.0: | |
| label = "approved" if reward >= 8.0 else "rejected" | |
| prompt = (f"[{ev.get('agent','?')}] {ev.get('event_type','?')}: " | |
| f"{ev.get('tool_name') or ev.get('model') or ev.get('task','')}") | |
| completion = json.dumps({k: ev.get(k) for k in | |
| ("status","latency_ms","tokens_out","saga_clean","skill_candidate","memory_stored") | |
| if ev.get(k) is not None}) | |
| try: | |
| rlhf_add(ev.get("agent","unknown"), prompt, completion, | |
| label=label, reward=reward, source="auto", | |
| meta={"trace_id": ev["id"], "components": components}) | |
| except Exception: | |
| pass | |
| scored += 1 | |
| reward_sum += reward | |
| new_cursors[agent] = max(new_cursors.get(agent, 0), ts) | |
| # Update cursors | |
| if new_cursors: | |
| conn = get_db() | |
| for agent, ts in new_cursors.items(): | |
| conn.execute("INSERT INTO sync_cursor (agent,last_ts) VALUES (?,?) " | |
| "ON CONFLICT(agent) DO UPDATE SET last_ts=MAX(last_ts,excluded.last_ts)", | |
| (agent, ts)) | |
| conn.commit(); conn.close() | |
| return { | |
| "ok": True, | |
| "scored": scored, | |
| "skipped": skipped, | |
| "avg_reward": round(reward_sum / max(scored, 1), 4), | |
| } | |
| def _update_qtable_from_trace(ev: dict, reward: float): | |
| """Map a trace event to a Q-table update.""" | |
| agent = ev.get("agent", "unknown") | |
| event_type = ev.get("event_type", "custom") | |
| model = ev.get("model", "") | |
| tool = ev.get("tool_name", "") | |
| lat = ev.get("latency_ms") | |
| # State: context that was available when the decision was made | |
| # Action: the choice that was made | |
| if event_type == "llm_call" and model: | |
| # State: which agent, what kind of task | |
| state = {"agent": agent, "event": "model_selection"} | |
| action = model | |
| q_update(agent, state, action, reward) | |
| elif event_type == "tool_use" and tool: | |
| state = {"agent": agent, "event": "tool_selection"} | |
| action = tool | |
| q_update(agent, state, action, reward) | |
| elif event_type == "skill_load" and ev.get("skill_id"): | |
| state = {"agent": agent, "event": "skill_selection"} | |
| action = ev["skill_id"] | |
| q_update(agent, state, action, reward) | |
| # --------------------------------------------------------------------------- | |
| # RLHF store | |
| # --------------------------------------------------------------------------- | |
| def rlhf_add(agent: str, prompt: str, completion: str, | |
| label: str = "unlabeled", reward: float = None, | |
| source: str = "human", meta: dict = None) -> str: | |
| now = time.time() | |
| rid = str(uuid.uuid4()) | |
| label = label if label in ("approved","rejected","unlabeled") else "unlabeled" | |
| conn = get_db() | |
| conn.execute(""" | |
| INSERT INTO rlhf (id,agent,prompt,completion,label,reward,source,meta,created_at) | |
| VALUES (?,?,?,?,?,?,?,?,?) | |
| """, (rid, agent, prompt, completion, label, reward, | |
| source, json.dumps(meta or {}), now)) | |
| conn.commit(); conn.close() | |
| return rid | |
| def rlhf_label(entry_id: str, label: str, reward: float = None) -> bool: | |
| label = label if label in ("approved","rejected","unlabeled") else "unlabeled" | |
| conn = get_db() | |
| n = conn.execute( | |
| "UPDATE rlhf SET label=?, reward=? WHERE id=?", (label, reward, entry_id) | |
| ).rowcount | |
| conn.commit(); conn.close() | |
| return n > 0 | |
| def rlhf_list(agent: str = "", label: str = "", limit: int = 50) -> list: | |
| conn = get_db() | |
| where, params = [], [] | |
| if agent: where.append("agent=?"); params.append(agent) | |
| if label: where.append("label=?"); params.append(label) | |
| sql = ("SELECT * FROM rlhf" + | |
| (f" WHERE {' AND '.join(where)}" if where else "") + | |
| " ORDER BY created_at DESC LIMIT ?") | |
| rows = conn.execute(sql, params+[limit]).fetchall() | |
| conn.close() | |
| result = [] | |
| for r in rows: | |
| d = dict(r) | |
| try: d["meta"] = json.loads(d["meta"]) | |
| except Exception: pass | |
| result.append(d) | |
| return result | |
| def rlhf_stats() -> dict: | |
| conn = get_db() | |
| rows = conn.execute("SELECT label, COUNT(*) as n FROM rlhf GROUP BY label").fetchall() | |
| conn.close() | |
| total = sum(r["n"] for r in rows) | |
| return {"total": total, "by_label": {r["label"]: r["n"] for r in rows}} | |
| # --------------------------------------------------------------------------- | |
| # Skill candidates | |
| # --------------------------------------------------------------------------- | |
| def candidate_add(description: str, agent: str) -> str: | |
| conn = get_db() | |
| # Dedup: if description matches existing pending candidate, increment frequency | |
| existing = conn.execute( | |
| "SELECT id, frequency FROM skill_candidates WHERE description=? AND status='pending'", | |
| (description,)).fetchone() | |
| if existing: | |
| conn.execute("UPDATE skill_candidates SET frequency=frequency+1, updated_at=? WHERE id=?", | |
| (time.time(), existing["id"])) | |
| conn.commit(); conn.close() | |
| return existing["id"] | |
| cid = str(uuid.uuid4()) | |
| now = time.time() | |
| conn.execute(""" | |
| INSERT INTO skill_candidates (id,description,agent,frequency,status,created_at,updated_at) | |
| VALUES (?,?,?,1,'pending',?,?) | |
| """, (cid, description, agent, now, now)) | |
| conn.commit(); conn.close() | |
| return cid | |
| def candidate_update(cid: str, status: str) -> bool: | |
| conn = get_db() | |
| n = conn.execute("UPDATE skill_candidates SET status=?, updated_at=? WHERE id=?", | |
| (status, time.time(), cid)).rowcount | |
| conn.commit(); conn.close() | |
| return n > 0 | |
| def candidates_list(status: str = "pending") -> list: | |
| conn = get_db() | |
| rows = conn.execute( | |
| "SELECT * FROM skill_candidates WHERE status=? ORDER BY frequency DESC, created_at DESC", | |
| (status,)).fetchall() | |
| conn.close() | |
| return [dict(r) for r in rows] | |
| # --------------------------------------------------------------------------- | |
| # Learn stats | |
| # --------------------------------------------------------------------------- | |
| def learn_stats() -> dict: | |
| conn = get_db() | |
| rw_count = conn.execute("SELECT COUNT(*) FROM rewards").fetchone()[0] | |
| rw_avg = conn.execute("SELECT AVG(raw_score) FROM rewards").fetchone()[0] | |
| rw_24h = conn.execute("SELECT COUNT(*), AVG(raw_score) FROM rewards WHERE ts>=?", | |
| (time.time()-86400,)).fetchone() | |
| rlhf_s = rlhf_stats() | |
| cands = conn.execute("SELECT COUNT(*) FROM skill_candidates WHERE status='pending'").fetchone()[0] | |
| conn.close() | |
| qs = q_stats() | |
| return { | |
| "qtable": qs, | |
| "rewards": { | |
| "total": rw_count, | |
| "avg_all_time": round(rw_avg or 0, 4), | |
| "last_24h": {"count": rw_24h[0], "avg": round(rw_24h[1] or 0, 4)}, | |
| }, | |
| "rlhf": rlhf_s, | |
| "skill_candidates_pending": cands, | |
| } | |
| def reward_trend(hours: int = 24, bucket_minutes: int = 60) -> list: | |
| conn = get_db() | |
| since = time.time() - hours * 3600 | |
| rows = conn.execute( | |
| "SELECT ts, raw_score, agent, event_type FROM rewards WHERE ts>=? ORDER BY ts", | |
| (since,)).fetchall() | |
| conn.close() | |
| if not rows: | |
| return [] | |
| # Bucket by hour | |
| buckets = {} | |
| for r in rows: | |
| h = int(r["ts"] // 3600) * 3600 | |
| if h not in buckets: | |
| buckets[h] = {"ts": h, "count": 0, "total": 0.0} | |
| buckets[h]["count"] += 1 | |
| buckets[h]["total"] += r["raw_score"] | |
| return [{"ts": v["ts"], "count": v["count"], | |
| "avg_reward": round(v["total"]/v["count"],4)} | |
| for v in sorted(buckets.values(), key=lambda x: x["ts"])] | |
| # --------------------------------------------------------------------------- | |
| # Background sync loop | |
| # --------------------------------------------------------------------------- | |
| async def _sync_loop(): | |
| while True: | |
| await asyncio.sleep(SYNC_INTERVAL) | |
| try: | |
| pull_and_score_traces() | |
| except Exception: | |
| pass | |
| # --------------------------------------------------------------------------- | |
| # Seed | |
| # --------------------------------------------------------------------------- | |
| def seed_demo(): | |
| conn = get_db() | |
| n = conn.execute("SELECT COUNT(*) FROM qtable").fetchone()[0] | |
| conn.close() | |
| if n > 0: return | |
| # Seed NEXUS model selection Q-table from prior knowledge | |
| now = time.time() | |
| entries = [ | |
| # ki-fusion RTX5090 is best when available | |
| ("nexus", {"agent":"nexus","event":"model_selection"}, "qwen/qwen3.5-35b-a3b", 0.72), | |
| ("nexus", {"agent":"nexus","event":"model_selection"}, "claude-haiku-4-5", 0.55), | |
| ("nexus", {"agent":"nexus","event":"model_selection"}, "hf_api", 0.30), | |
| ("nexus", {"agent":"nexus","event":"model_selection"}, "local_cpu", 0.10), | |
| # Tool selection | |
| ("pulse", {"agent":"pulse","event":"tool_selection"}, "kanban_create", 0.65), | |
| ("pulse", {"agent":"pulse","event":"tool_selection"}, "slot_reserve", 0.60), | |
| ("pulse", {"agent":"pulse","event":"tool_selection"}, "trigger_agent", 0.50), | |
| # Skill reuse | |
| ("pulse", {"agent":"pulse","event":"skill_selection"}, "calculator", 0.40), | |
| ("pulse", {"agent":"pulse","event":"skill_selection"}, "forge_client", 0.55), | |
| ] | |
| for agent, state, action, q in entries: | |
| sh = _state_hash(state) | |
| conn = get_db() | |
| conn.execute(""" | |
| INSERT OR IGNORE INTO qtable (id,agent,state_hash,state_json,action,q_value,visits,last_reward,updated_at) | |
| VALUES (?,?,?,?,?,?,0,NULL,?) | |
| """, (str(uuid.uuid4()), agent, sh, json.dumps(state), action, q, now)) | |
| conn.commit(); conn.close() | |
| # Seed RLHF examples | |
| examples = [ | |
| ("nexus", "Route this query to the best available LLM.", | |
| "I will use ki-fusion RTX5090 (qwen3.5-35b) as it has the best quality/speed ratio.", | |
| "approved", 0.9), | |
| ("nexus", "Route this query to the best available LLM.", | |
| "I will use local_cpu for this complex multi-step reasoning task.", | |
| "rejected", -0.3), | |
| ("pulse", "Schedule this long-running background task.", | |
| "I will reserve an LLM slot before starting and release it on completion.", | |
| "approved", 0.8), | |
| ] | |
| for agent, prompt, completion, label, reward in examples: | |
| rlhf_add(agent, prompt, completion, label, reward, "seed") | |
| # Seed a skill candidate | |
| candidate_add("Pattern: agents repeatedly fetch the same URL multiple times per session → caching skill needed", "learn") | |
| # --------------------------------------------------------------------------- | |
| # MCP | |
| # --------------------------------------------------------------------------- | |
| MCP_TOOLS = [ | |
| {"name":"learn_q_get","description":"Get all Q-values for an agent+state.", | |
| "inputSchema":{"type":"object","required":["agent","state"], | |
| "properties":{"agent":{"type":"string"},"state":{"type":"object"}}}}, | |
| {"name":"learn_q_best","description":"Get best action (epsilon-greedy) for an agent+state.", | |
| "inputSchema":{"type":"object","required":["agent","state","actions"], | |
| "properties":{"agent":{"type":"string"},"state":{"type":"object"}, | |
| "actions":{"type":"array","items":{"type":"string"}}}}}, | |
| {"name":"learn_q_update","description":"Update Q-value after taking an action and observing reward.", | |
| "inputSchema":{"type":"object","required":["agent","state","action","reward"], | |
| "properties":{"agent":{"type":"string"},"state":{"type":"object"}, | |
| "action":{"type":"string"},"reward":{"type":"number"}, | |
| "next_state":{"type":"object"}}}}, | |
| {"name":"learn_q_hint","description":"Manually nudge a Q-value (operator override).", | |
| "inputSchema":{"type":"object","required":["agent","state","action","nudge"], | |
| "properties":{"agent":{"type":"string"},"state":{"type":"object"}, | |
| "action":{"type":"string"},"nudge":{"type":"number"}}}}, | |
| {"name":"learn_stats","description":"Get learning system statistics.", | |
| "inputSchema":{"type":"object","properties":{}}}, | |
| {"name":"learn_rlhf_add","description":"Add a labeled completion to the RLHF store.", | |
| "inputSchema":{"type":"object","required":["agent","prompt","completion"], | |
| "properties":{"agent":{"type":"string"},"prompt":{"type":"string"}, | |
| "completion":{"type":"string"},"label":{"type":"string"}, | |
| "reward":{"type":"number"},"source":{"type":"string"}}}}, | |
| {"name":"learn_score_trace","description":"Score a single trace event and return reward.", | |
| "inputSchema":{"type":"object","required":["event"], | |
| "properties":{"event":{"type":"object","description":"Trace event dict"}}}}, | |
| {"name":"learn_candidate_add","description":"Add a skill candidate for review.", | |
| "inputSchema":{"type":"object","required":["description","agent"], | |
| "properties":{"description":{"type":"string"},"agent":{"type":"string"}}}}, | |
| {"name":"learn_sync","description":"Trigger immediate trace pull and reward scoring.", | |
| "inputSchema":{"type":"object","properties":{}}}, | |
| {"name":"learn_rate_trace","description":"Human rating override for a trace (0–10 float). Score 10 is human-only ceiling. Scores >=8 auto-labeled preferred, <=3 auto-labeled rejected in RLHF store.", | |
| "inputSchema":{"type":"object","required":["trace_id","rating"], | |
| "properties":{"trace_id":{"type":"string"},"rating":{"type":"number","minimum":0,"maximum":10}, | |
| "agent":{"type":"string"},"comment":{"type":"string"}}}}, | |
| ] | |
| def handle_mcp(method, params, req_id): | |
| def ok(r): return {"jsonrpc":"2.0","id":req_id,"result":r} | |
| def txt(d): return ok({"content":[{"type":"text","text":json.dumps(d)}]}) | |
| if method=="initialize": | |
| return ok({"protocolVersion":"2024-11-05", | |
| "serverInfo":{"name":"agent-learn","version":"1.0.0"}, | |
| "capabilities":{"tools":{}}}) | |
| if method=="tools/list": return ok({"tools":MCP_TOOLS}) | |
| if method=="tools/call": | |
| n, a = params.get("name",""), params.get("arguments",{}) | |
| if n=="learn_q_get": return txt({"entries":q_get(a["agent"],a["state"])}) | |
| if n=="learn_q_best": return txt(q_best_action(a["agent"],a["state"],a.get("actions",[]))) | |
| if n=="learn_q_update": return txt(q_update(a["agent"],a["state"],a["action"],float(a["reward"]),a.get("next_state"))) | |
| if n=="learn_q_hint": return txt(q_hint(a["agent"],a["state"],a["action"],float(a["nudge"]))) | |
| if n=="learn_stats": return txt(learn_stats()) | |
| if n=="learn_rlhf_add": | |
| rid = rlhf_add(a["agent"],a["prompt"],a["completion"], | |
| a.get("label","unlabeled"),a.get("reward"),a.get("source","mcp")) | |
| return txt({"ok":True,"id":rid}) | |
| if n=="learn_score_trace": | |
| score, comp = score_trace_event(a.get("event",{})) | |
| return txt({"reward":score,"components":comp}) | |
| if n=="learn_candidate_add": | |
| cid = candidate_add(a["description"],a["agent"]) | |
| return txt({"ok":True,"id":cid}) | |
| if n=="learn_sync": return txt(pull_and_score_traces()) | |
| if n=="learn_rate_trace": | |
| rating = float(a["rating"]) | |
| if not (0.0 <= rating <= SCORE_HUMAN_MAX): | |
| return txt({"ok":False,"error":f"rating must be 0–{SCORE_HUMAN_MAX}"}) | |
| agent = str(a.get("agent","unknown")) | |
| comment = str(a.get("comment","")) | |
| try: _http_patch(f"{TRACE_URL}/api/trace/{a['trace_id']}/reward", | |
| {"reward":rating,"source":"human","comment":comment}) | |
| except Exception: pass | |
| label = "approved" if rating>=8.0 else ("rejected" if rating<=3.0 else "unlabeled") | |
| conn = get_db() | |
| conn.execute("INSERT OR IGNORE INTO rewards (id,trace_id,agent,event_type,raw_score,components,ts) VALUES (?,?,?,?,?,?,?)", | |
| (str(uuid.uuid4()),a["trace_id"],agent,"human_rating",rating, | |
| json.dumps({"human_override":True,"comment":comment}),time.time())) | |
| conn.commit(); conn.close() | |
| rid = rlhf_add(agent,f"[human-rated] {a['trace_id']}",comment or "human override", | |
| label=label,reward=rating,source="human",meta={"trace_id":a["trace_id"]}) | |
| return txt({"ok":True,"trace_id":a["trace_id"],"rating":rating,"label":label,"rlhf_id":rid}) | |
| return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Unknown tool: {n}"}} | |
| if method in ("notifications/initialized","notifications/cancelled"): return None | |
| return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Method not found: {method}"}} | |
| # --------------------------------------------------------------------------- | |
| # FastAPI app | |
| # --------------------------------------------------------------------------- | |
| async def lifespan(app): | |
| init_db(); seed_demo() | |
| asyncio.create_task(_sync_loop()) | |
| yield | |
| app = FastAPI(title="agent-learn", version="1.0.0", lifespan=lifespan) | |
| def _auth(r): return not LEARN_KEY or r.headers.get("x-learn-key","") == LEARN_KEY | |
| # --- Q-table REST --- | |
| async def api_q_get(agent:str=Query(...), state:str=Query("{}") ): | |
| try: s = json.loads(state) | |
| except Exception: raise HTTPException(400,"state must be JSON") | |
| return JSONResponse({"entries": q_get(agent, s)}) | |
| async def api_q_best(request:Request): | |
| b = await request.json() | |
| return JSONResponse(q_best_action(b["agent"], b.get("state",{}), b.get("actions",[]))) | |
| async def api_q_update(request:Request): | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| b = await request.json() | |
| return JSONResponse(q_update(b["agent"],b.get("state",{}),b["action"],float(b["reward"]),b.get("next_state"))) | |
| async def api_q_hint(request:Request): | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| b = await request.json() | |
| return JSONResponse(q_hint(b["agent"],b.get("state",{}),b["action"],float(b["nudge"]))) | |
| async def api_q_stats(): return JSONResponse(q_stats()) | |
| # --- Scoring --- | |
| async def api_score(request:Request): | |
| b = await request.json() | |
| score, comp = score_trace_event(b) | |
| return JSONResponse({"reward": score, "components": comp}) | |
| async def api_sync(request:Request): | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| result = pull_and_score_traces() | |
| return JSONResponse(result) | |
| # --- RLHF --- | |
| async def api_rlhf_list(agent:str=Query(""), label:str=Query(""), limit:int=Query(50)): | |
| return JSONResponse({"entries": rlhf_list(agent,label,limit)}) | |
| async def api_rlhf_add(request:Request): | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| b = await request.json() | |
| rid = rlhf_add(b.get("agent","unknown"),b["prompt"],b["completion"], | |
| b.get("label","unlabeled"),b.get("reward"),b.get("source","api"),b.get("meta")) | |
| return JSONResponse({"ok":True,"id":rid}) | |
| async def api_rlhf_label(entry_id:str, request:Request): | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| b = await request.json() | |
| ok = rlhf_label(entry_id, b.get("label","unlabeled"), b.get("reward")) | |
| return JSONResponse({"ok":ok}) | |
| async def api_trace_rate(trace_id:str, request:Request): | |
| """Human rating override — allows score of 10 (human-only ceiling). | |
| Writes back to agent-trace and updates Q-table.""" | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| b = await request.json() | |
| rating = float(b.get("rating", b.get("reward", 0.0))) | |
| if not (0.0 <= rating <= SCORE_HUMAN_MAX): | |
| raise HTTPException(400, f"rating must be 0–{SCORE_HUMAN_MAX}") | |
| agent = str(b.get("agent","unknown")) | |
| comment = str(b.get("comment","")) | |
| # Write reward back to agent-trace (best-effort) | |
| try: | |
| _http_patch(f"{TRACE_URL}/api/trace/{trace_id}/reward", | |
| {"reward": rating, "source": "human", "comment": comment}) | |
| except Exception: | |
| pass | |
| # Log in rewards table | |
| conn = get_db() | |
| conn.execute(""" | |
| INSERT OR IGNORE INTO rewards (id,trace_id,agent,event_type,raw_score,components,ts) | |
| VALUES (?,?,?,?,?,?,?) | |
| """, (str(uuid.uuid4()), trace_id, agent, "human_rating", | |
| rating, json.dumps({"human_override": True, "comment": comment}), time.time())) | |
| conn.commit(); conn.close() | |
| # RLHF: store as approved/rejected based on rating | |
| label = "approved" if rating >= 8.0 else ("rejected" if rating <= 3.0 else "unlabeled") | |
| rlhf_add(agent, f"[human-rated trace] {trace_id}", comment or "human override", | |
| label=label, reward=rating, source="human", | |
| meta={"trace_id": trace_id, "comment": comment}) | |
| return JSONResponse({"ok": True, "trace_id": trace_id, "rating": rating, "label": label}) | |
| # --- Skill candidates --- | |
| async def api_candidates(status:str=Query("pending")): | |
| return JSONResponse({"candidates": candidates_list(status)}) | |
| async def api_candidate_update(cid:str, request:Request): | |
| if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key") | |
| b = await request.json() | |
| ok = candidate_update(cid, b.get("status","pending")) | |
| return JSONResponse({"ok":ok}) | |
| # --- Stats --- | |
| async def api_stats(): return JSONResponse(learn_stats()) | |
| async def api_trend(hours:int=Query(24)): return JSONResponse({"trend":reward_trend(hours)}) | |
| async def api_health(): | |
| conn=get_db(); n=conn.execute("SELECT COUNT(*) FROM qtable").fetchone()[0]; conn.close() | |
| return JSONResponse({"ok":True,"qtable_entries":n,"version":"1.0.0"}) | |
| # --- MCP --- | |
| async def mcp_sse(request:Request): | |
| async def gen(): | |
| yield f"data: {json.dumps({'jsonrpc':'2.0','method':'connected','params':{}})}\n\n" | |
| yield f"data: {json.dumps({'jsonrpc':'2.0','method':'notifications/tools','params':{'tools':MCP_TOOLS}})}\n\n" | |
| while True: | |
| if await request.is_disconnected(): break | |
| yield ": ping\n\n"; await asyncio.sleep(15) | |
| return StreamingResponse(gen(), media_type="text/event-stream", | |
| headers={"Cache-Control":"no-cache","Connection":"keep-alive","X-Accel-Buffering":"no"}) | |
| async def mcp_rpc(request:Request): | |
| try: body = await request.json() | |
| except Exception: return JSONResponse({"jsonrpc":"2.0","id":None,"error":{"code":-32700,"message":"Parse error"}}) | |
| if isinstance(body,list): | |
| return JSONResponse([r for r in [handle_mcp(x.get("method",""),x.get("params",{}),x.get("id")) for x in body] if r]) | |
| r = handle_mcp(body.get("method",""),body.get("params",{}),body.get("id")) | |
| return JSONResponse(r or {"jsonrpc":"2.0","id":body.get("id"),"result":{}}) | |
| # --------------------------------------------------------------------------- | |
| # SPA Dashboard | |
| # --------------------------------------------------------------------------- | |
| SPA = r"""<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1"> | |
| <title>🧠 LEARN — FORGE Learning Layer</title> | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;800&family=DM+Mono:wght@300;400;500&display=swap'); | |
| *{box-sizing:border-box;margin:0;padding:0} | |
| :root{--bg:#06060d;--sf:#0d0d18;--sf2:#121222;--br:#1a1a2e;--ac:#ff6b00;--tx:#dde0f0;--mu:#50507a;--gr:#00ff88;--rd:#ff4455;--cy:#06b6d4;--pu:#8b5cf6;--ye:#f59e0b;--pk:#ec4899} | |
| html,body{height:100%;background:var(--bg);color:var(--tx);font-family:'Syne',sans-serif} | |
| ::-webkit-scrollbar{width:5px;height:5px}::-webkit-scrollbar-track{background:var(--sf)}::-webkit-scrollbar-thumb{background:var(--br);border-radius:3px} | |
| .app{display:grid;grid-template-rows:52px 1fr;height:100vh;overflow:hidden} | |
| .hdr{display:flex;align-items:center;gap:1rem;padding:0 1.5rem;border-bottom:1px solid var(--br);background:var(--sf)} | |
| .logo{font-family:'Space Mono',monospace;font-size:1.1rem;font-weight:700;color:var(--ac)} | |
| .sub{font-family:'DM Mono',monospace;font-size:.6rem;color:var(--mu);letter-spacing:.2em;text-transform:uppercase} | |
| .hstats{display:flex;gap:1.5rem;margin-left:auto} | |
| .hs{text-align:center}.hs-n{font-family:'Space Mono',monospace;font-size:1rem;font-weight:700;color:var(--ac)} | |
| .hs-l{font-family:'DM Mono',monospace;font-size:.58rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em} | |
| .tabs{display:flex;border-bottom:1px solid var(--br);background:var(--sf)} | |
| .tab{padding:.55rem 1.3rem;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--mu);border-bottom:2px solid transparent;cursor:pointer;letter-spacing:.05em;transition:all .15s} | |
| .tab.active{color:var(--ac);border-bottom-color:var(--ac)} | |
| .tab:hover{color:var(--tx)} | |
| .body{flex:1;overflow-y:auto;padding:1.25rem} | |
| /* Cards */ | |
| .kpis{display:grid;grid-template-columns:repeat(4,1fr);gap:.75rem;margin-bottom:1.25rem} | |
| .kpi{background:var(--sf);border:1px solid var(--br);border-radius:8px;padding:.9rem 1rem} | |
| .kpi-n{font-family:'Space Mono',monospace;font-size:1.6rem;font-weight:700;color:var(--ac);line-height:1} | |
| .kpi-l{font-family:'DM Mono',monospace;font-size:.6rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em;margin-top:4px} | |
| .kpi-sub{font-family:'DM Mono',monospace;font-size:.65rem;color:var(--mu);margin-top:2px} | |
| /* Q-table */ | |
| .qtable-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.75rem} | |
| .qt-agent{background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden} | |
| .qt-agent-hdr{padding:.6rem 1rem;border-bottom:1px solid var(--br);font-family:'Space Mono',monospace;font-size:.8rem;font-weight:700;color:var(--ac);display:flex;align-items:center;gap:.5rem} | |
| .qt-row{display:flex;align-items:center;padding:.35rem 1rem;gap:.6rem;border-bottom:1px solid #0d0d18;font-family:'DM Mono',monospace;font-size:.72rem} | |
| .qt-row:last-child{border-bottom:none} | |
| .qt-action{flex:1;color:var(--tx);overflow:hidden;text-overflow:ellipsis;white-space:nowrap} | |
| .qt-bar{width:80px;height:6px;background:var(--br);border-radius:3px;overflow:hidden;flex-shrink:0} | |
| .qt-bar-fill{height:100%;border-radius:3px;transition:width .3s} | |
| .qt-val{font-weight:700;width:48px;text-align:right;flex-shrink:0} | |
| .qt-vis{font-size:.6rem;color:var(--mu);width:30px;text-align:right;flex-shrink:0} | |
| /* Reward trend */ | |
| .trend-container{background:var(--sf);border:1px solid var(--br);border-radius:8px;padding:1rem;margin-bottom:1rem} | |
| .trend-title{font-family:'DM Mono',monospace;font-size:.65rem;color:var(--mu);text-transform:uppercase;letter-spacing:.15em;margin-bottom:.75rem} | |
| .trend-chart{height:80px;display:flex;align-items:flex-end;gap:3px} | |
| .t-bar-wrap{flex:1;display:flex;flex-direction:column;align-items:center;height:100%} | |
| .t-bar{width:100%;border-radius:2px 2px 0 0;min-height:2px;transition:height .3s} | |
| .t-lbl{font-family:'DM Mono',monospace;font-size:.5rem;color:var(--mu);margin-top:2px;text-align:center} | |
| /* RLHF table */ | |
| .rlhf-table{width:100%;border-collapse:collapse;font-family:'DM Mono',monospace;font-size:.75rem} | |
| .rlhf-table th{padding:.4rem .75rem;text-align:left;font-size:.62rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em;border-bottom:1px solid var(--br)} | |
| .rlhf-table td{padding:.45rem .75rem;border-bottom:1px solid #0d0d18;max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap} | |
| .rlhf-table tr:hover td{background:var(--sf)} | |
| .badge{display:inline-block;padding:1px 7px;border-radius:4px;font-size:.62rem} | |
| .badge-approved{background:#001a08;color:var(--gr);border:1px solid #004422} | |
| .badge-rejected{background:#1a0000;color:var(--rd);border:1px solid #440011} | |
| .badge-unlabeled{background:var(--sf2);color:var(--mu);border:1px solid var(--br)} | |
| /* Skill candidates */ | |
| .cand-card{background:var(--sf);border:1px solid var(--br);border-radius:8px;padding:.8rem 1rem;margin-bottom:.6rem;display:flex;align-items:flex-start;gap:1rem} | |
| .cand-desc{flex:1;font-size:.82rem;line-height:1.6} | |
| .cand-meta{font-family:'DM Mono',monospace;font-size:.62rem;color:var(--mu)} | |
| .cand-freq{font-family:'Space Mono',monospace;font-size:1.2rem;font-weight:700;color:var(--ye);min-width:30px;text-align:center} | |
| .btn{padding:.4rem .9rem;border:none;border-radius:5px;cursor:pointer;font-family:'DM Mono',monospace;font-size:.7rem;transition:all .15s} | |
| .btn-approve{background:#001a08;color:var(--gr);border:1px solid #004422} | |
| .btn-approve:hover{background:#003010} | |
| .btn-reject{background:#1a0000;color:var(--rd);border:1px solid #440011} | |
| .btn-reject:hover{background:#300010} | |
| .btn-sync{background:var(--sf2);color:var(--ac);border:1px solid var(--br);margin-left:auto} | |
| .btn-sync:hover{border-color:var(--ac)} | |
| /* Config panel */ | |
| .config-row{display:flex;align-items:center;padding:.6rem 1rem;border-bottom:1px solid var(--br);font-family:'DM Mono',monospace;font-size:.78rem} | |
| .config-key{color:var(--mu);width:160px;text-transform:uppercase;font-size:.65rem;letter-spacing:.1em} | |
| .config-val{color:var(--cy);font-weight:700} | |
| .config-desc{color:var(--mu);font-size:.65rem;margin-left:.75rem} | |
| .section{font-family:'DM Mono',monospace;font-size:.65rem;color:var(--pu);text-transform:uppercase;letter-spacing:.15em;margin:.75rem 0 .4rem} | |
| .empty{text-align:center;padding:2rem;color:var(--mu);font-family:'DM Mono',monospace;font-size:.8rem} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="app"> | |
| <header class="hdr"> | |
| <div><div class="logo">🧠 LEARN</div><div class="sub">FORGE Learning Layer</div></div> | |
| <div class="hstats"> | |
| <div class="hs"><div class="hs-n" id="hQ">—</div><div class="hs-l">Q-entries</div></div> | |
| <div class="hs"><div class="hs-n" id="hR" style="color:var(--gr)">—</div><div class="hs-l">Rewards</div></div> | |
| <div class="hs"><div class="hs-n" id="hA">—</div><div class="hs-l">Avg reward</div></div> | |
| <div class="hs"><div class="hs-n" id="hC" style="color:var(--ye)">—</div><div class="hs-l">Candidates</div></div> | |
| </div> | |
| </header> | |
| <div style="display:flex;flex-direction:column;overflow:hidden;flex:1"> | |
| <div class="tabs"> | |
| <div class="tab active" onclick="showTab('qtable')">⚙ Q-Table</div> | |
| <div class="tab" onclick="showTab('rewards')">🏆 Rewards</div> | |
| <div class="tab" onclick="showTab('rlhf')">👥 RLHF</div> | |
| <div class="tab" onclick="showTab('candidates')">💡 Skill Candidates</div> | |
| <div class="tab" onclick="showTab('config')">⚙︎ Config</div> | |
| <button class="btn btn-sync" onclick="triggerSync()" style="margin:auto 1rem auto auto;padding:.3rem .75rem">↻ Sync Traces</button> | |
| </div> | |
| <div class="body" id="tabBody"></div> | |
| </div> | |
| </div> | |
| <script> | |
| let stats=null, trend=[], rlhf=[], candidates=[], currentTab='qtable'; | |
| async function loadAll(){ | |
| [stats,trend] = await Promise.all([ | |
| fetch('/api/stats').then(r=>r.json()), | |
| fetch('/api/reward-trend?hours=24').then(r=>r.json()).then(d=>d.trend||[]) | |
| ]); | |
| document.getElementById('hQ').textContent=stats.qtable?.total_entries||0; | |
| document.getElementById('hR').textContent=stats.rewards?.total||0; | |
| document.getElementById('hA').textContent=stats.rewards?.avg_all_time?.toFixed(3)||'—'; | |
| document.getElementById('hC').textContent=stats.skill_candidates_pending||0; | |
| renderTab(); | |
| } | |
| async function loadRLHF(){ rlhf = (await fetch('/api/rlhf?limit=50').then(r=>r.json())).entries||[]; } | |
| async function loadCandidates(){ candidates = (await fetch('/api/candidates').then(r=>r.json())).candidates||[]; } | |
| function showTab(t){ | |
| currentTab=t; | |
| document.querySelectorAll('.tab').forEach((el,i)=>el.classList.toggle('active',['qtable','rewards','rlhf','candidates','config'][i]===t)); | |
| renderTab(); | |
| } | |
| async function renderTab(){ | |
| if(currentTab==='qtable') renderQTable(); | |
| else if(currentTab==='rewards') renderRewards(); | |
| else if(currentTab==='rlhf') { await loadRLHF(); renderRLHF(); } | |
| else if(currentTab==='candidates'){ await loadCandidates(); renderCandidates(); } | |
| else if(currentTab==='config') renderConfig(); | |
| } | |
| function renderQTable(){ | |
| const qt = stats?.qtable || {}; | |
| const byAgent = qt.by_agent || []; | |
| const top = qt.top_actions || []; | |
| // Group top by agent | |
| const grouped = {}; | |
| top.forEach(r=>{ if(!grouped[r.agent]) grouped[r.agent]=[];grouped[r.agent].push(r) }); | |
| byAgent.forEach(a=>{ if(!grouped[a.agent]) grouped[a.agent]=[] }); | |
| const html = ` | |
| <div class="kpis"> | |
| <div class="kpi"><div class="kpi-n">${qt.total_entries||0}</div><div class="kpi-l">Total entries</div></div> | |
| ${byAgent.slice(0,3).map(a=>`<div class="kpi"><div class="kpi-n" style="font-size:1.2rem">${a.n}</div><div class="kpi-l">${a.agent}</div><div class="kpi-sub">avg Q: ${(a.avg_q||0).toFixed(3)}</div></div>`).join('')} | |
| </div> | |
| <div class="section">Best Q-values per agent</div> | |
| <div class="qtable-grid"> | |
| ${Object.entries(grouped).map(([agent, rows])=>{ | |
| const maxQ = Math.max(...rows.map(r=>r.q_value||0), 0.001); | |
| return `<div class="qt-agent"> | |
| <div class="qt-agent-hdr">⚙ ${agent}</div> | |
| ${rows.length ? rows.map(r=>{ | |
| const pct = Math.max(0,Math.min(100,(r.q_value/maxQ)*100)); | |
| const col = r.q_value>0.5?'var(--gr)':r.q_value>0?'var(--ye)':'var(--rd)'; | |
| return `<div class="qt-row"> | |
| <span class="qt-action">${r.action}</span> | |
| <div class="qt-bar"><div class="qt-bar-fill" style="width:${pct}%;background:${col}"></div></div> | |
| <span class="qt-val" style="color:${col}">${r.q_value.toFixed(3)}</span> | |
| <span class="qt-vis">${r.visits}x</span> | |
| </div>`; | |
| }).join('') : '<div class="qt-row" style="color:var(--mu)">No entries yet</div>'} | |
| </div>`; | |
| }).join('')} | |
| </div> | |
| <div class="section" style="margin-top:1rem">Worst-performing actions</div> | |
| <div class="qtable-grid"> | |
| ${Object.values((qt.worst_actions||[]).reduce((g,r)=>{ if(!g[r.agent])g[r.agent]=[];g[r.agent].push(r);return g },{})).map(rows=>{ | |
| const agent=rows[0].agent; | |
| return `<div class="qt-agent"> | |
| <div class="qt-agent-hdr" style="color:var(--rd)">⚠ ${agent} — avoid</div> | |
| ${rows.map(r=>`<div class="qt-row"><span class="qt-action">${r.action}</span><span class="qt-val" style="color:var(--rd)">${r.q_value.toFixed(3)}</span></div>`).join('')} | |
| </div>`; | |
| }).join('')} | |
| </div>`; | |
| document.getElementById('tabBody').innerHTML=html; | |
| } | |
| function renderRewards(){ | |
| const rw = stats?.rewards||{}; | |
| const max = Math.max(...trend.map(t=>Math.abs(t.avg_reward||0)), 0.001); | |
| const bars = trend.length ? trend.map(t=>{ | |
| const h=Math.max(3,Math.abs(t.avg_reward||0)/max*100); | |
| const col=t.avg_reward>=0?'var(--gr)':'var(--rd)'; | |
| const hStr=new Date(t.ts*1000).getHours()+'h'; | |
| return `<div class="t-bar-wrap"><div style="flex:1;display:flex;align-items:flex-end;width:100%"><div class="t-bar" style="height:${h}%;background:${col}" title="avg=${t.avg_reward} n=${t.count}"></div></div><div class="t-lbl">${hStr}</div></div>`; | |
| }).join('') : '<div style="color:var(--mu);font-family:DM Mono,monospace;font-size:.75rem;margin:auto">No reward data yet</div>'; | |
| document.getElementById('tabBody').innerHTML=` | |
| <div class="kpis"> | |
| <div class="kpi"><div class="kpi-n">${rw.total||0}</div><div class="kpi-l">Total scored</div></div> | |
| <div class="kpi"><div class="kpi-n" style="color:var(--gr)">${rw.avg_all_time?.toFixed(3)||'—'}</div><div class="kpi-l">All-time avg</div></div> | |
| <div class="kpi"><div class="kpi-n" style="color:var(--cy)">${rw.last_24h?.count||0}</div><div class="kpi-l">Last 24h</div></div> | |
| <div class="kpi"><div class="kpi-n" style="color:var(--cy)">${rw.last_24h?.avg?.toFixed(3)||'—'}</div><div class="kpi-l">24h avg</div></div> | |
| </div> | |
| <div class="trend-container"> | |
| <div class="trend-title">Avg reward per hour (24h)</div> | |
| <div class="trend-chart">${bars}</div> | |
| </div> | |
| <div class="section">Scoring model</div> | |
| <div style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden"> | |
| ${[['baseline','+6.0','Every event starts here (acceptable)'],['error','-3.0','status=error'],['injection_detected','-4.0','Injection flag from agent-harness'],['pii_leaked','-4.0','PII exfiltration detected by compliance'],['hallucinated_tool','-3.0','Agent called non-existent tool'],['saga_compensated','-1.0','Saga pattern ran compensations'],['latency > 8s','-1.5','LLM call took > 8000ms'],['skill_load','+0.5','Reused skill from FORGE'],['skill_candidate','+1.0','Agent surfaced a new skill pattern'],['memory_stored','+0.3','Agent stored to agent-memory'],['latency < 1s (LLM)','+0.5','LLM call completed in < 1000ms'],['saga_clean','+0.5','Saga completed without compensation'],['AUTO CEILING','9.0','Max auto-score (10 = human-only via PATCH /api/traces/{id}/rate)']].map(([k,v,d])=>`<div class="config-row"><span class="config-key">${k}</span><span class="config-val" style="color:${v.startsWith('-')?'var(--rd)':v==='9.0'?'var(--ye)':'var(--gr)'}">${v}</span><span class="config-desc">${d}</span></div>`).join('')} | |
| </div>`; | |
| } | |
| function renderRLHF(){ | |
| const s = stats?.rlhf||{}; | |
| document.getElementById('tabBody').innerHTML=` | |
| <div class="kpis"> | |
| <div class="kpi"><div class="kpi-n">${s.total||0}</div><div class="kpi-l">Total entries</div></div> | |
| <div class="kpi"><div class="kpi-n" style="color:var(--gr)">${s.by_label?.approved||0}</div><div class="kpi-l">Approved</div></div> | |
| <div class="kpi"><div class="kpi-n" style="color:var(--rd)">${s.by_label?.rejected||0}</div><div class="kpi-l">Rejected</div></div> | |
| <div class="kpi"><div class="kpi-n" style="color:var(--mu)">${s.by_label?.unlabeled||0}</div><div class="kpi-l">Unlabeled</div></div> | |
| </div> | |
| <table class="rlhf-table" style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden"> | |
| <thead><tr><th>Agent</th><th>Prompt</th><th>Completion</th><th>Label</th><th>Reward</th><th>Source</th></tr></thead> | |
| <tbody> | |
| ${rlhf.length ? rlhf.map(r=>`<tr> | |
| <td>${r.agent}</td> | |
| <td title="${esc(r.prompt)}">${esc(r.prompt.slice(0,40))}...</td> | |
| <td title="${esc(r.completion)}">${esc(r.completion.slice(0,50))}...</td> | |
| <td><span class="badge badge-${r.label}">${r.label}</span></td> | |
| <td style="color:${(r.reward||0)>=0?'var(--gr)':'var(--rd)'}">${r.reward!=null?r.reward:'—'}</td> | |
| <td style="color:var(--mu)">${r.source}</td> | |
| </tr>`).join('') : '<tr><td colspan="6" class="empty">No RLHF entries yet</td></tr>'} | |
| </tbody> | |
| </table>`; | |
| } | |
| function renderCandidates(){ | |
| document.getElementById('tabBody').innerHTML=` | |
| <p style="font-family:'DM Mono',monospace;font-size:.75rem;color:var(--mu);margin-bottom:1rem"> | |
| Patterns detected by agents that recur ${3}+ times. Promote to FORGE or reject. | |
| </p> | |
| ${candidates.length ? candidates.map(c=>` | |
| <div class="cand-card"> | |
| <div class="cand-freq">${c.frequency}x</div> | |
| <div style="flex:1"> | |
| <div class="cand-desc">${esc(c.description)}</div> | |
| <div class="cand-meta">from ${c.agent} · ${new Date(c.created_at*1000).toLocaleDateString()}</div> | |
| </div> | |
| <div style="display:flex;flex-direction:column;gap:.35rem"> | |
| <button class="btn btn-approve" onclick="updateCand('${c.id}','promoted')">⇧ Promote</button> | |
| <button class="btn btn-reject" onclick="updateCand('${c.id}','rejected')">✕ Reject</button> | |
| </div> | |
| </div>`).join('') : '<div class="empty">No pending skill candidates</div>'}`; | |
| } | |
| function renderConfig(){ | |
| document.getElementById('tabBody').innerHTML=` | |
| <div class="section">Hyperparameters</div> | |
| <div style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden"> | |
| <div class="config-row"><span class="config-key">Learning rate α</span><span class="config-val" id="cfgLR">loading...</span><span class="config-desc">Q-value update step size</span></div> | |
| <div class="config-row"><span class="config-key">Discount γ</span><span class="config-val" id="cfgDisc">loading...</span><span class="config-desc">Future reward weight</span></div> | |
| <div class="config-row"><span class="config-key">Epsilon ε</span><span class="config-val" id="cfgEps">loading...</span><span class="config-desc">Exploration rate (random action probability)</span></div> | |
| <div class="config-row"><span class="config-key">Sync interval</span><span class="config-val" id="cfgSync">loading...</span><span class="config-desc">Trace pull frequency (seconds)</span></div> | |
| <div class="config-row"><span class="config-key">Trace URL</span><span class="config-val" id="cfgTrace">loading...</span><span class="config-desc">agent-trace endpoint</span></div> | |
| </div> | |
| <div class="section" style="margin-top:1rem">MCP connection</div> | |
| <pre style="background:var(--sf);border:1px solid var(--br);border-radius:6px;padding:.75rem;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--cy)">{"mcpServers":{"learn":{"command":"npx","args":["-y","mcp-remote","${window.location.origin}/mcp/sse"]}}}</pre> | |
| <div class="section" style="margin-top:1rem">Quick integration (NEXUS / any agent)</div> | |
| <pre style="background:var(--sf);border:1px solid var(--br);border-radius:6px;padding:.75rem;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--gr)">LEARN_URL = "${window.location.origin}" | |
| # Ask LEARN for best LLM to route to | |
| import requests | |
| resp = requests.post(f"{LEARN_URL}/api/q/best", json={ | |
| "agent": "nexus", | |
| "state": {"agent": "nexus", "event": "model_selection"}, | |
| "actions": ["qwen/qwen3.5-35b-a3b", "claude-haiku-4-5", "hf_api", "local_cpu"] | |
| }) | |
| best = resp.json() # {"action": "qwen/qwen3.5-35b-a3b", "q_value": 0.72, "strategy": "exploit"} | |
| # After inference, update Q-value | |
| requests.post(f"{LEARN_URL}/api/q/update", json={ | |
| "agent": "nexus", | |
| "state": {"agent": "nexus", "event": "model_selection"}, | |
| "action": best["action"], | |
| "reward": 0.8 # from trace scoring | |
| })</pre>`; | |
| fetch('/api/health').then(r=>r.json()).then(d=>{ | |
| document.getElementById('cfgLR').textContent='0.1 (env: LEARN_RATE)'; | |
| document.getElementById('cfgDisc').textContent='0.9 (env: DISCOUNT)'; | |
| document.getElementById('cfgEps').textContent='0.15 (env: EPSILON)'; | |
| document.getElementById('cfgSync').textContent='120s (env: SYNC_INTERVAL)'; | |
| document.getElementById('cfgTrace').textContent='env: TRACE_URL'; | |
| }); | |
| } | |
| async function triggerSync(){ | |
| const btn=document.querySelector('.btn-sync'); | |
| btn.textContent='↻ Syncing...';btn.disabled=true; | |
| const r=await fetch('/api/sync',{method:'POST'}).then(x=>x.json()); | |
| btn.textContent=`↻ Scored ${r.scored||0}`; | |
| setTimeout(()=>{btn.textContent='↻ Sync Traces';btn.disabled=false;},3000); | |
| await loadAll(); | |
| } | |
| async function updateCand(id,status){ | |
| await fetch(`/api/candidates/${id}`,{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({status})}); | |
| await loadCandidates();renderCandidates(); | |
| } | |
| function esc(s){return String(s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')} | |
| loadAll();setInterval(loadAll,15000); | |
| </script> | |
| </body></html>""" | |
| async def root(): return HTMLResponse(content=SPA, media_type="text/html; charset=utf-8") | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info") |