| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>AI System Architecture — Reference Blueprint</title> |
| <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css" rel="stylesheet"> |
| <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Outfit:wght@300;400;500;600;700&display=swap" rel="stylesheet"> |
| <style> |
| :root { |
| --bg:#05080f; --bg1:#0a0f1a; --bg2:#101728; --bg3:#182038; |
| --border:#1e2a45; --border2:#2a3a5c; --border3:#3a4a6b; |
| --text:#c8d0e0; --text2:#6a7a9b; --text3:#3a4a6b; |
| --white:#e8ecf4; |
| --accent:#4a90ff; --accent2:#7c5cfc; |
| --green:#22c982; --amber:#f5a623; --rose:#f06292; --cyan:#00d4e6; |
| --red:#ef5350; --orange:#ff8a50; --lime:#84cc16; --sky:#38bdf8; |
|
|
| --c-present:#ef5350; --c-orch:#22c982; --c-llm:#7c5cfc; |
| --c-data:#f5a623; --c-infra:#38bdf8; |
| --c-sec:#f06292; --c-obs:#9c7cfc; --c-conf:#6a7a9b; |
| --c-mem:#00d4e6; --c-life:#84cc16; --c-dev:#ff8a50; |
|
|
| --mono:'IBM Plex Mono',monospace; |
| --sans:'Outfit',sans-serif; |
| } |
| *,*::before,*::after{margin:0;padding:0;box-sizing:border-box} |
| html{font-size:14px} |
| body{font-family:var(--sans);background:var(--bg);color:var(--text);min-height:100vh;overflow-x:hidden} |
| body::after{content:'';position:fixed;inset:0;background:url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.8' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.025'/%3E%3C/svg%3E");pointer-events:none;z-index:9999} |
| body::before{content:'';position:fixed;inset:0;background-image:linear-gradient(var(--border) 1px,transparent 1px),linear-gradient(90deg,var(--border) 1px,transparent 1px);background-size:60px 60px;opacity:0.1;pointer-events:none} |
| ::selection{background:var(--accent);color:var(--bg)} |
|
|
| /* ═══ HEADER ═══ */ |
| .hdr{padding:20px 28px 0;display:flex;align-items:center;gap:16px;flex-wrap:wrap;position:relative;z-index:10} |
| .hdr-mark{width:32px;height:32px;border:2px solid var(--accent);border-radius:7px;display:flex;align-items:center;justify-content:center;font-family:var(--mono);font-weight:700;font-size:0.7rem;color:var(--accent);position:relative;overflow:hidden;flex-shrink:0} |
| .hdr-mark::after{content:'';position:absolute;inset:0;background:var(--accent);opacity:0.06} |
| .hdr h1{font-family:var(--mono);font-size:1.05rem;font-weight:600;color:var(--white);letter-spacing:-0.02em} |
| .hdr h1 b{color:var(--accent)} |
| .hdr-r{margin-left:auto;font-family:var(--mono);font-size:0.68rem;color:var(--text3);display:flex;gap:12px;align-items:center} |
| .hdr-r kbd{display:inline-block;padding:1px 5px;border:1px solid var(--border2);border-radius:3px;font-size:0.65rem;background:var(--bg2);color:var(--text3);margin:0 1px} |
|
|
| /* ═══ TOOLBAR ═══ */ |
| .toolbar{padding:10px 28px;display:flex;gap:6px;align-items:center;flex-wrap:wrap;position:relative;z-index:10} |
| .tb{font-family:var(--mono);font-size:0.67rem;padding:5px 12px;border-radius:5px;border:1px solid var(--border2);background:var(--bg2);color:var(--text2);cursor:pointer;transition:all 0.2s;display:flex;align-items:center;gap:5px;white-space:nowrap} |
| .tb:hover{border-color:var(--accent);color:var(--white)} |
| .tb.on{border-color:var(--accent);color:var(--accent);background:rgba(74,144,255,0.06)} |
| .tb i{font-size:0.62rem} |
| .tb-sep{width:1px;height:18px;background:var(--border);margin:0 2px} |
|
|
| /* ═══ MAIN LAYOUT ═══ */ |
| .main{display:grid;grid-template-columns:1fr 400px;gap:16px;padding:0 28px 32px;position:relative;z-index:10} |
| @media(max-width:1080px){.main{grid-template-columns:1fr}.hdr,.toolbar,.main{padding-left:12px;padding-right:12px}} |
|
|
| /* ═══ VIZ PANEL ═══ */ |
| .viz{background:var(--bg1);border:1px solid var(--border);border-radius:12px;padding:16px;position:relative;overflow:hidden} |
| .viz::before{content:'';position:absolute;width:500px;height:500px;background:radial-gradient(circle,rgba(74,144,255,0.03),transparent 70%);top:-120px;left:-120px;pointer-events:none} |
|
|
| /* ═══ ARCHITECTURE GRID ═══ */ |
| .arch{display:grid;grid-template-columns:auto 1fr;gap:0;position:relative} |
|
|
| /* cross-cutting sidebar */ |
| .xcut{display:flex;gap:4px;padding-right:10px} |
| .xbar{width:36px;border-radius:6px;border:1px solid var(--border);cursor:pointer;transition:all 0.25s;position:relative;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:6px;padding:8px 0} |
| .xbar:hover{border-color:var(--border2);background:rgba(255,255,255,0.015)} |
| .xbar.sel{border-color:color-mix(in srgb, var(--xc) 50%, transparent);box-shadow:0 0 16px color-mix(in srgb, var(--xc) 12%, transparent)} |
| .xbar .xdot{width:6px;height:6px;border-radius:50%;background:var(--xc)} |
| .xbar .xicon{font-size:0.7rem;color:var(--xc);opacity:0.7} |
| .xbar .xlabel{writing-mode:vertical-rl;text-orientation:mixed;font-family:var(--mono);font-size:0.55rem;font-weight:600;text-transform:uppercase;letter-spacing:0.12em;color:var(--text3);transform:rotate(180deg)} |
|
|
| /* core stack */ |
| .stack{display:flex;flex-direction:column;gap:4px} |
|
|
| .lyr{display:grid;grid-template-columns:30px 1fr auto;align-items:center;gap:10px;padding:11px 14px;border-radius:7px;cursor:pointer;border:1px solid transparent;transition:all 0.22s;position:relative;background:color-mix(in srgb, var(--lc) 5%, var(--bg2))} |
| .lyr::before{content:'';position:absolute;left:0;top:0;bottom:0;width:3px;border-radius:3px 0 0 3px;background:var(--lc);opacity:0;transition:opacity 0.2s} |
| .lyr:hover{background:color-mix(in srgb, var(--lc) 8%, var(--bg2));transform:translateX(3px)} |
| .lyr:hover::before{opacity:0.5} |
| .lyr.sel{background:color-mix(in srgb, var(--lc) 12%, var(--bg2));border-color:color-mix(in srgb, var(--lc) 30%, transparent);transform:translateX(5px);box-shadow:0 0 20px color-mix(in srgb, var(--lc) 10%, transparent)} |
| .lyr.sel::before{opacity:1} |
| .lyr.flash{animation:flashLyr 0.5s ease-out} |
| @keyframes flashLyr{0%{box-shadow:0 0 0 0 color-mix(in srgb, var(--lc) 50%, transparent)}50%{box-shadow:0 0 28px 4px color-mix(in srgb, var(--lc) 35%, transparent)}100%{box-shadow:0 0 20px color-mix(in srgb, var(--lc) 10%, transparent)}} |
|
|
| .lyr .ico{width:30px;height:30px;border-radius:6px;display:flex;align-items:center;justify-content:center;font-size:12px;background:color-mix(in srgb, var(--lc) 15%, var(--bg3));color:var(--lc)} |
| .lyr .nm{font-weight:600;font-size:0.85rem} |
| .lyr .nm small{font-weight:400;color:var(--text2);font-size:0.72rem;margin-left:6px} |
| .lyr .tgs{display:flex;gap:3px;flex-wrap:wrap;justify-content:flex-end} |
| .lyr .tgs span{font-family:var(--mono);font-size:0.54rem;padding:2px 5px;border-radius:3px;background:rgba(255,255,255,0.04);color:var(--text3);letter-spacing:0.02em} |
|
|
| .fcon{display:flex;justify-content:center;height:6px;position:relative} |
| .fcon::before{content:'';width:1px;height:100%;background:var(--border2)} |
|
|
| /* ═══ LIFECYCLE LOOP ═══ */ |
| .lifecycle{margin-top:10px;border:1px solid var(--border);border-radius:7px;padding:10px 14px;display:flex;align-items:center;gap:10px;cursor:pointer;transition:all 0.22s} |
| .lifecycle:hover{border-color:var(--border2);background:rgba(132,204,22,0.03)} |
| .lifecycle.sel{border-color:color-mix(in srgb, var(--c-life) 40%, transparent);box-shadow:0 0 16px color-mix(in srgb, var(--c-life) 10%, transparent)} |
| .lifecycle .lf-icon{color:var(--c-life);font-size:0.75rem} |
| .lifecycle .lf-label{font-family:var(--mono);font-size:0.7rem;font-weight:600;color:var(--c-life)} |
| .lifecycle .lf-steps{font-family:var(--mono);font-size:0.58rem;color:var(--text3);display:flex;gap:4px;flex-wrap:wrap;align-items:center} |
| .lifecycle .lf-steps span{padding:2px 6px;border-radius:3px;border:1px solid var(--border);background:var(--bg2)} |
| .lifecycle .lf-steps .arr{border:none;background:none;color:var(--text3);padding:0} |
|
|
| /* ═══ DEV PRACTICES ═══ */ |
| .devprac{margin-top:6px;border:1px dashed var(--border2);border-radius:7px;padding:10px 14px;display:flex;align-items:center;gap:10px;cursor:pointer;transition:all 0.22s;opacity:0.7} |
| .devprac:hover{opacity:1;border-color:var(--c-dev)} |
| .devprac.sel{opacity:1;border-color:var(--c-dev);box-shadow:0 0 12px color-mix(in srgb, var(--c-dev) 8%, transparent)} |
| .devprac .dp-icon{color:var(--c-dev);font-size:0.7rem} |
| .devprac .dp-label{font-family:var(--mono);font-size:0.68rem;font-weight:600;color:var(--c-dev)} |
| .devprac .dp-sub{font-family:var(--mono);font-size:0.56rem;color:var(--text3)} |
|
|
| /* ═══ DETAIL PANEL ═══ */ |
| .det{background:var(--bg1);border:1px solid var(--border);border-radius:12px;overflow:hidden;display:flex;flex-direction:column;max-height:calc(100vh - 100px);position:sticky;top:12px} |
| .det-tabs{display:flex;border-bottom:1px solid var(--border);background:var(--bg2);flex-shrink:0} |
| .dt{flex:1;padding:9px 6px;text-align:center;font-family:var(--mono);font-size:0.63rem;font-weight:500;color:var(--text3);cursor:pointer;border-bottom:2px solid transparent;transition:all 0.2s;text-transform:uppercase;letter-spacing:0.06em} |
| .dt:hover{color:var(--text2)} |
| .dt.act{color:var(--accent);border-bottom-color:var(--accent)} |
| .det-body{padding:20px;overflow-y:auto;flex:1} |
| .det-body::-webkit-scrollbar{width:5px} |
| .det-body::-webkit-scrollbar-track{background:transparent} |
| .det-body::-webkit-scrollbar-thumb{background:var(--border2);border-radius:3px} |
|
|
| .d-title{font-family:var(--mono);font-size:1rem;font-weight:700;color:var(--white);margin-bottom:3px} |
| .d-bar{width:32px;height:3px;border-radius:2px;margin-bottom:16px} |
| .d-h{font-family:var(--mono);font-size:0.64rem;font-weight:600;text-transform:uppercase;letter-spacing:0.1em;color:var(--text3);margin:18px 0 6px} |
| .d-h:first-of-type{margin-top:0} |
| .d-p{font-size:0.82rem;line-height:1.6;color:var(--text)} |
| .d-ul{list-style:none;padding:0} |
| .d-ul li{padding:4px 0;font-size:0.8rem;color:var(--text);display:flex;gap:7px;line-height:1.45} |
| .d-ul li::before{content:'▸';color:var(--text3);flex-shrink:0;font-size:0.65rem;margin-top:2px} |
| .d-badges{display:flex;flex-wrap:wrap;gap:4px} |
| .d-badge{font-family:var(--mono);font-size:0.63rem;padding:3px 8px;border-radius:4px;background:var(--bg3);border:1px solid var(--border);color:var(--text2);transition:all 0.2s;cursor:default} |
| .d-badge:hover{border-color:var(--accent);color:var(--white)} |
| .d-code{background:var(--bg);border:1px solid var(--border);border-radius:7px;padding:12px 14px;margin-top:6px;font-family:var(--mono);font-size:0.64rem;line-height:1.75;color:var(--text2);overflow-x:auto;white-space:pre} |
| .d-code .k{color:var(--accent)} .d-code .s{color:var(--green)} .d-code .c{color:var(--text3);font-style:italic} .d-code .f{color:var(--amber)} .d-code .t{color:var(--cyan)} |
| .d-pattern{background:var(--bg);border:1px solid var(--border);border-radius:7px;padding:14px;margin-top:6px} |
| .d-pattern svg{width:100%;display:block} |
|
|
| .empty{display:flex;flex-direction:column;align-items:center;justify-content:center;height:350px;color:var(--text3);gap:8px;text-align:center} |
| .empty i{font-size:2rem;opacity:0.2} |
| .empty span{font-size:0.8rem} |
|
|
| /* ═══ TRACE BAR ═══ */ |
| .trace-bar{position:fixed;bottom:0;left:0;right:0;background:var(--bg2);border-top:1px solid var(--border);padding:9px 28px;display:none;align-items:center;gap:14px;z-index:100;font-family:var(--mono);font-size:0.68rem} |
| .trace-bar.show{display:flex} |
| .trace-bar .t-prog{height:3px;background:var(--border);border-radius:2px;position:absolute;top:0;left:0;right:0} |
| .trace-bar .t-fill{height:100%;background:linear-gradient(90deg,var(--accent),var(--accent2));border-radius:2px;transition:width 0.35s;width:0%} |
| .trace-bar .t-lbl{color:var(--accent);font-weight:600} |
| .trace-bar .t-desc{color:var(--text2);flex:1} |
| .trace-bar .t-time{color:var(--text3)} |
|
|
| /* stagger */ |
| @keyframes sUp{from{opacity:0;transform:translateY(10px)}to{opacity:1;transform:translateY(0)}} |
| .lyr,.xbar,.lifecycle,.devprac{animation:sUp 0.35s ease both} |
| .xbar:nth-child(1){animation-delay:.05s}.xbar:nth-child(2){animation-delay:.1s}.xbar:nth-child(3){animation-delay:.15s}.xbar:nth-child(4){animation-delay:.2s} |
| .lyr:nth-child(1){animation-delay:.12s}.lyr:nth-child(3){animation-delay:.18s}.lyr:nth-child(5){animation-delay:.24s}.lyr:nth-child(7){animation-delay:.30s}.lyr:nth-child(9){animation-delay:.36s} |
| </style> |
| </head> |
| <body> |
|
|
| <div class="hdr"> |
| <div class="hdr-mark">AI</div> |
| <h1><b>//</b> AI System Architecture <b>·</b> Reference Blueprint</h1> |
| <div class="hdr-r"> |
| <span><kbd>↑</kbd><kbd>↓</kbd> nav</span> |
| <span><kbd>Enter</kbd> tab</span> |
| <span><kbd>T</kbd> trace</span> |
| </div> |
| </div> |
|
|
| <div class="toolbar"> |
| <button class="tb" onclick="runTrace(traceReq)"><i class="fas fa-route"></i> Trace Request</button> |
| <button class="tb" onclick="runTrace(traceRag)"><i class="fas fa-magnifying-glass"></i> RAG Flow</button> |
| <button class="tb" onclick="runTrace(traceAgent)"><i class="fas fa-robot"></i> Agent Loop</button> |
| <div class="tb-sep"></div> |
| <button class="tb" onclick="runTrace(traceFeedback)"><i class="fas fa-arrows-spin"></i> Feedback Loop</button> |
| </div> |
|
|
| <div class="main"> |
| |
| <div class="viz"> |
| <div class="arch"> |
| |
| <div class="xcut"> |
| <div class="xbar" data-key="security" style="--xc:var(--c-sec)"> |
| <span class="xdot"></span> |
| <i class="fas fa-shield-halved xicon"></i> |
| <span class="xlabel">Security</span> |
| </div> |
| <div class="xbar" data-key="observability" style="--xc:var(--c-obs)"> |
| <span class="xdot"></span> |
| <i class="fas fa-chart-line xicon"></i> |
| <span class="xlabel">Observability</span> |
| </div> |
| <div class="xbar" data-key="configuration" style="--xc:var(--c-conf)"> |
| <span class="xdot"></span> |
| <i class="fas fa-gear xicon"></i> |
| <span class="xlabel">Config</span> |
| </div> |
| <div class="xbar" data-key="memory" style="--xc:var(--c-mem)"> |
| <span class="xdot"></span> |
| <i class="fas fa-brain xicon"></i> |
| <span class="xlabel">Memory</span> |
| </div> |
| </div> |
|
|
| |
| <div class="stack"> |
| <div class="lyr" data-key="presentation" style="--lc:var(--c-present)"> |
| <div class="ico"><i class="fas fa-desktop"></i></div> |
| <span class="nm">Presentation <small>UI · API · CLI</small></span> |
| <div class="tgs"><span>Gradio</span><span>Vue</span><span>Chainlit</span><span>SSE</span><span>CLI</span></div> |
| </div> |
| <div class="fcon"></div> |
| <div class="lyr" data-key="orchestration" style="--lc:var(--c-orch)"> |
| <div class="ico"><i class="fas fa-sitemap"></i></div> |
| <span class="nm">Orchestration & Agents</span> |
| <div class="tgs"><span>smolagents</span><span>CrewAI</span><span>LangGraph</span><span>MCP</span><span>A2A</span></div> |
| </div> |
| <div class="fcon"></div> |
| <div class="lyr" data-key="llm" style="--lc:var(--c-llm)"> |
| <div class="ico"><i class="fas fa-microchip"></i></div> |
| <span class="nm">LLM Services <small>inference · embed · cache</small></span> |
| <div class="tgs"><span>Claude</span><span>DeepSeek</span><span>Kimi</span><span>Llama</span><span>LoRA</span></div> |
| </div> |
| <div class="fcon"></div> |
| <div class="lyr" data-key="data" style="--lc:var(--c-data)"> |
| <div class="ico"><i class="fas fa-database"></i></div> |
| <span class="nm">Data & Retrieval <small>store · pipeline · KG</small></span> |
| <div class="tgs"><span>pgvector</span><span>ChromaDB</span><span>Neo4j</span><span>MinIO</span><span>ETL</span></div> |
| </div> |
| <div class="fcon"></div> |
| <div class="lyr" data-key="infrastructure" style="--lc:var(--c-infra)"> |
| <div class="ico"><i class="fas fa-server"></i></div> |
| <span class="nm">Infrastructure <small>compute · deploy · scale</small></span> |
| <div class="tgs"><span>Docker</span><span>K8s</span><span>Terraform</span><span>GPU</span><span>CI/CD</span></div> |
| </div> |
|
|
| |
| <div class="lifecycle" data-key="lifecycle"> |
| <i class="fas fa-arrows-spin lf-icon"></i> |
| <span class="lf-label">Continuous Improvement</span> |
| <div class="lf-steps"> |
| <span>Feedback</span><span class="arr">→</span> |
| <span>Eval</span><span class="arr">→</span> |
| <span>Fine-tune</span><span class="arr">→</span> |
| <span>Deploy</span><span class="arr">→</span> |
| <span>Monitor</span><span class="arr">→</span> |
| <span class="arr">↻</span> |
| </div> |
| </div> |
|
|
| |
| <div class="devprac" data-key="devpractices"> |
| <i class="fas fa-code dp-icon"></i> |
| <span class="dp-label">Dev Practices</span> |
| <span class="dp-sub">Testing · CI/CD · Code Review · Documentation · LLM Eval</span> |
| </div> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div class="det"> |
| <div class="det-tabs"> |
| <div class="dt act" data-tab="overview">Overview</div> |
| <div class="dt" data-tab="stack">Stack</div> |
| <div class="dt" data-tab="code">Code</div> |
| <div class="dt" data-tab="patterns">Patterns</div> |
| </div> |
| <div class="det-body" id="detBody"> |
| <div class="empty"> |
| <i class="fas fa-layer-group"></i> |
| <span>Click any layer or bar to explore<br>Press <kbd style="font-family:var(--mono);font-size:0.65rem;padding:1px 4px;border:1px solid var(--border2);border-radius:3px;background:var(--bg2)">T</kbd> to trace a request</span> |
| </div> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div class="trace-bar" id="traceBar"> |
| <div class="t-prog"><div class="t-fill" id="tFill"></div></div> |
| <i class="fas fa-circle-dot" style="color:var(--accent);font-size:0.55rem"></i> |
| <span class="t-lbl" id="tLbl">—</span> |
| <span class="t-desc" id="tDesc">—</span> |
| <span class="t-time" id="tTime">0ms</span> |
| </div> |
|
|
| <script> |
| // ═══════════════════════════════════════════════════════ |
| // DATA — all 11 sections |
| // ═══════════════════════════════════════════════════════ |
| const D = { |
| |
| // ─── CORE STACK ─────────────────────────────────────── |
| |
| presentation: { |
| title:"Presentation", color:"var(--c-present)", |
| overview:{ |
| purpose:"Every interface a user or system touches — browser UIs, chat, voice, API endpoints, CLI tools, mobile SDKs. Not all consumers are humans with browsers.", |
| components:[ |
| "Web frameworks — Vue 3, React, Svelte, Astro, plain HTML+Alpine", |
| "AI-native UIs — Gradio, Streamlit, Chainlit, Open WebUI", |
| "Browser agents — OpenClaw / PicoClaw (single-file HTML + WebGPU/WebLLM)", |
| "API surfaces — REST, GraphQL, gRPC endpoints for machine consumers", |
| "CLI / Terminal — Claude Code pattern, click-based TUIs, REPL interfaces", |
| "Multi-modal I/O — text, voice (Whisper STT / TTS), image, video, PDF", |
| "Real-time — WebSockets, SSE (server-sent events), streaming token delivery", |
| "Mobile / native — React Native, Flutter, native SDKs wrapping the API layer" |
| ] |
| }, |
| stack:["Vue 3","React","Svelte","Gradio","Streamlit","Chainlit","Open WebUI","OpenClaw","WebSockets","SSE","Whisper","FastAPI","GraphQL","gRPC","CLI"], |
| code:`<span class="c"><!-- OpenClaw: single-file browser agent (WebGPU) --></span> |
| <span class="k"><script</span> type=<span class="s">"module"</span><span class="k">></span> |
| <span class="k">import</span> { CreateMLCEngine } <span class="k">from</span> |
| <span class="s">"https://esm.run/@mlc-ai/web-llm"</span>; |
| |
| <span class="c">// Hermes-3 running entirely client-side</span> |
| <span class="k">const</span> engine = <span class="k">await</span> CreateMLCEngine( |
| <span class="s">"Hermes-3-Llama-3.1-8B-q4f16_1-MLC"</span>, |
| { initProgressCallback: p => |
| status.textContent = p.text } |
| ); |
| |
| <span class="c">// stream response with tool-calling</span> |
| <span class="k">const</span> stream = <span class="k">await</span> engine.chat.completions |
| .create({ |
| messages: conversation, |
| stream: <span class="f">true</span>, |
| tools: toolDefinitions, |
| }); |
| |
| <span class="k">for await</span> (<span class="k">const</span> chunk <span class="k">of</span> stream) { |
| output.textContent += |
| chunk.choices[<span class="f">0</span>]?.delta?.content || <span class="s">""</span>; |
| } |
| <span class="k"></script></span>`, |
| patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#ef5350" font-size="8" font-family="IBM Plex Mono" font-weight="600">Multi-Surface Architecture</text> |
| <rect x="8" y="24" width="60" height="55" rx="4" fill="none" stroke="#ef5350" stroke-width="1.2"/> |
| <text x="38" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Browser</text> |
| <text x="38" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Chat UI</text> |
| <rect x="76" y="24" width="60" height="55" rx="4" fill="none" stroke="#ff8a50" stroke-width="1.2"/> |
| <text x="106" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">CLI</text> |
| <text x="106" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Terminal</text> |
| <rect x="144" y="24" width="60" height="55" rx="4" fill="none" stroke="#f5a623" stroke-width="1.2"/> |
| <text x="174" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">API</text> |
| <text x="174" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">REST/gRPC</text> |
| <rect x="212" y="24" width="60" height="55" rx="4" fill="none" stroke="#22c982" stroke-width="1.2"/> |
| <text x="242" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Mobile</text> |
| <text x="242" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">SDK</text> |
| <rect x="280" y="24" width="52" height="55" rx="4" fill="none" stroke="#00d4e6" stroke-width="1.2"/> |
| <text x="306" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Voice</text> |
| <text x="306" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Whisper</text> |
| </svg>` |
| }, |
| |
| orchestration: { |
| title:"Orchestration & Agents", color:"var(--c-orch)", |
| overview:{ |
| purpose:"The brain — agent lifecycle, multi-step reasoning (ReAct), RAG pipelines, tool routing, workflow orchestration, human-in-the-loop, retries & circuit breakers.", |
| components:[ |
| "Agent frameworks — smolagents (HF), CrewAI, AutoGen, Agent Zero, Hermes function-calling", |
| "Workflow engines — LangGraph, Prefect, Temporal, Airflow", |
| "RAG pipeline — chunking strategy → embed → retrieve → rerank → generate", |
| "Tool routing — MCP servers, OpenAI-style function calling, A2A (Agent-to-Agent protocol)", |
| "Prompt management — versioning, templates, A/B testing, dynamic few-shot selection", |
| "Context window management — token budgeting, sliding window, summarization", |
| "Resilience — retry with exponential backoff, circuit breakers, fallback chains, timeout policies", |
| "Human-in-the-loop — approval gates, escalation, confidence thresholds" |
| ] |
| }, |
| stack:["smolagents","CrewAI","AutoGen","Agent Zero","Hermes","LangGraph","Prefect","Temporal","LangChain","MCP","A2A","Haystack","DSPy"], |
| code:`<span class="c"># smolagents: multi-step code agent with MCP tools</span> |
| <span class="k">from</span> smolagents <span class="k">import</span> ( |
| CodeAgent, HfApiModel, tool |
| ) |
| |
| <span class="t">@tool</span> |
| <span class="k">def</span> <span class="f">search_catalog</span>(query: str) -> str: |
| <span class="s">"""Search product catalog."""</span> |
| <span class="k">return</span> catalog.search(query, limit=<span class="f">10</span>) |
| |
| <span class="t">@tool</span> |
| <span class="k">def</span> <span class="f">check_gdpr_status</span>( |
| system: str, record_id: str |
| ) -> str: |
| <span class="s">"""Check deletion status."""</span> |
| <span class="k">return</span> deltionprotovcoll.check(system, record_id) |
| |
| model = HfApiModel(<span class="s">"Qwen/Qwen2.5-72B"</span>) |
| agent = CodeAgent( |
| tools=[search_catalog, check_gdpr_status], |
| model=model, |
| max_steps=<span class="f">8</span>, |
| planning_interval=<span class="f">3</span>, |
| ) |
| |
| <span class="c"># Agent will: plan → search → observe → check → report</span> |
| result = agent.run( |
| <span class="s">"Find frozen pizza products, check their"</span> |
| <span class="s">" price in SAP"</span> |
| )`, |
| patterns:`<svg viewBox="0 0 340 110" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#22c982" font-size="8" font-family="IBM Plex Mono" font-weight="600">ReAct Agent Loop + Circuit Breaker</text> |
| <rect x="120" y="24" width="80" height="26" rx="5" fill="none" stroke="#22c982" stroke-width="1.3"/> |
| <text x="160" y="41" text-anchor="middle" fill="#c8d0e0" font-size="8" font-family="IBM Plex Mono">Think/Plan</text> |
| <path d="M200 37 Q240 37 240 57 Q240 77 200 77" fill="none" stroke="#4a90ff" stroke-width="1"/> |
| <text x="255" y="60" fill="#4a90ff" font-size="7" font-family="IBM Plex Mono">Act (tools)</text> |
| <rect x="120" y="64" width="80" height="26" rx="5" fill="none" stroke="#f5a623" stroke-width="1.3"/> |
| <text x="160" y="81" text-anchor="middle" fill="#c8d0e0" font-size="8" font-family="IBM Plex Mono">Observe</text> |
| <path d="M120 77 Q80 77 80 57 Q80 37 120 37" fill="none" stroke="#f06292" stroke-width="1"/> |
| <text x="42" y="60" fill="#f06292" font-size="7" font-family="IBM Plex Mono">Loop</text> |
| <rect x="120" y="93" width="80" height="14" rx="3" fill="none" stroke="#ef5350" stroke-width="0.8" stroke-dasharray="3"/> |
| <text x="160" y="103" text-anchor="middle" fill="#ef5350" font-size="6" font-family="IBM Plex Mono">circuit breaker / timeout</text> |
| </svg>` |
| }, |
| |
| llm: { |
| title:"LLM Services", color:"var(--c-llm)", |
| overview:{ |
| purpose:"Three operationally distinct sub-services: real-time inference (low latency, auto-scale), embedding service (high throughput, batch), offline fine-tuning (GPU-intensive, scheduled). Each scales differently.", |
| components:[ |
| "ᴿᵀ Inference — Commercial: Claude, GPT-4o, Gemini 2, Kimi (Moonshot); Open: DeepSeek-V3/R1, Llama 3.x, Mistral, Qwen 2.5", |
| "ᴿᵀ Inference — Local: LM Studio, Ollama, vLLM, TGI, llama.cpp", |
| "ᴿᵀ Prompt caching — Claude prompt caching, Gemini context caching (massive cost reduction)", |
| "ᴿᵀ Structured output — JSON mode, tool use schemas, constrained generation (Outlines, LMQL)", |
| "ᴿᵀ Multimodal — vision (image→text), audio (Whisper), PDF parsing, video understanding", |
| "ᴱᴹᴮ Embedding service — BGE-M3, Nomic-embed, OpenAI text-embedding-3, sentence-transformers", |
| "ᶠᵀ Fine-tuning — LoRA, QLoRA, full SFT, DPO/RLHF, Unsloth, axolotl", |
| "ᶠᵀ Quantization — GGUF, AWQ, GPTQ, BitNet 1.58b, EXL2", |
| "Routing — litellm, OpenRouter, latency-based + cost-based routing, fallback chains" |
| ] |
| }, |
| stack:["Claude","GPT-4o","Gemini 2","Kimi","DeepSeek-V3","Llama 3","Mistral","Qwen 2.5","LM Studio","Ollama","vLLM","TGI","litellm","BGE-M3","Nomic","LoRA","QLoRA","Unsloth","BitNet","Outlines","LMQL"], |
| code:`<span class="c"># litellm: multi-provider routing + caching</span> |
| <span class="k">from</span> litellm <span class="k">import</span> Router |
| |
| router = Router(model_list=[ |
| {<span class="s">"model_name"</span>: <span class="s">"best"</span>, |
| <span class="s">"litellm_params"</span>: { |
| <span class="s">"model"</span>: <span class="s">"claude-sonnet-4-20250514"</span>, |
| <span class="s">"cache"</span>: {<span class="s">"type"</span>: <span class="s">"prompt_caching"</span>}}}, |
| {<span class="s">"model_name"</span>: <span class="s">"best"</span>, |
| <span class="s">"litellm_params"</span>: { |
| <span class="s">"model"</span>: <span class="s">"deepseek/deepseek-chat"</span>}}, |
| {<span class="s">"model_name"</span>: <span class="s">"local"</span>, |
| <span class="s">"litellm_params"</span>: { |
| <span class="s">"model"</span>: <span class="s">"ollama/llama3.1"</span>, |
| <span class="s">"api_base"</span>: <span class="s">"http://192.168.188.25:1234"</span>}}, |
| ], routing_strategy=<span class="s">"latency-based-routing"</span>) |
| |
| <span class="c"># structured output (JSON mode)</span> |
| resp = <span class="k">await</span> router.acompletion( |
| model=<span class="s">"best"</span>, |
| messages=[{<span class="s">"role"</span>:<span class="s">"user"</span>, <span class="s">"content"</span>:prompt}], |
| response_format={<span class="s">"type"</span>: <span class="s">"json_object"</span>}, |
| )`, |
| patterns:`<svg viewBox="0 0 340 100" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#7c5cfc" font-size="8" font-family="IBM Plex Mono" font-weight="600">Three Sub-Services (different scaling profiles)</text> |
| <rect x="8" y="24" width="100" height="65" rx="5" fill="none" stroke="#7c5cfc" stroke-width="1.2"/> |
| <text x="58" y="40" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono" font-weight="600">RT Inference</text> |
| <text x="58" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">low latency</text> |
| <text x="58" y="62" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">auto-scale</text> |
| <text x="58" y="72" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">prompt cache</text> |
| <text x="58" y="82" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">multi-modal</text> |
| <rect x="118" y="24" width="100" height="65" rx="5" fill="none" stroke="#22c982" stroke-width="1.2"/> |
| <text x="168" y="40" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono" font-weight="600">Embedding Svc</text> |
| <text x="168" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">high throughput</text> |
| <text x="168" y="62" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">batch-friendly</text> |
| <text x="168" y="72" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">CPU or GPU</text> |
| <rect x="228" y="24" width="100" height="65" rx="5" fill="none" stroke="#f5a623" stroke-width="1.2"/> |
| <text x="278" y="40" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono" font-weight="600">Fine-tuning</text> |
| <text x="278" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">GPU-intensive</text> |
| <text x="278" y="62" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">scheduled/batch</text> |
| <text x="278" y="72" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">LoRA/QLoRA</text> |
| <text x="278" y="82" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">quantization</text> |
| </svg>` |
| }, |
| |
| data: { |
| title:"Data & Retrieval", color:"var(--c-data)", |
| overview:{ |
| purpose:"Storage AND the pipeline that feeds it. Covers relational, vector, document, graph, cache, object storage, plus the ingestion/chunking/ETL pipeline that most RAG projects actually break on.", |
| components:[ |
| "Relational — PostgreSQL, MySQL, SQLite", |
| "Vector stores — pgvector, ChromaDB, Qdrant, Weaviate, Pinecone", |
| "Graph — Neo4j, ArangoDB (knowledge graphs, entity relationships)", |
| "Document — MongoDB, Elasticsearch (full-text + BM25)", |
| "Cache — Redis, DragonflyDB, Memcached (hot tier)", |
| "Object/File — MinIO (S3-compat, S3 Object Lock for WORM/GDPR)", |
| "Ingestion pipeline — web scraping, XML/RSS feeds, PDF parsing, OCR", |
| "Chunking strategies — fixed-size, semantic, parent-child, recursive", |
| "Data versioning — DVC, lakeFS (reproducible experiments)", |
| "Storage tiers — hot (Redis) → warm (PostgreSQL) → cold (S3/MinIO)" |
| ] |
| }, |
| stack:["PostgreSQL","pgvector","ChromaDB","Qdrant","Weaviate","Neo4j","MongoDB","Elasticsearch","Redis","DragonflyDB","MinIO","DVC","lakeFS","Unstructured","LlamaParse"], |
| code:`<span class="c">-- pgvector: hybrid search (vector + BM25)</span> |
| <span class="k">WITH</span> semantic <span class="k">AS</span> ( |
| <span class="k">SELECT</span> id, content, metadata, |
| embedding <span class="k"><=></span> <span class="s">$1</span> <span class="k">AS</span> vec_dist, |
| ts_rank(tsv, plainto_tsquery(<span class="s">$2</span>)) <span class="k">AS</span> bm25 |
| <span class="k">FROM</span> documents |
| <span class="k">WHERE</span> embedding <span class="k"><=></span> <span class="s">$1</span> < <span class="f">0.8</span> |
| <span class="k">AND</span> tenant_id = <span class="s">$3</span> <span class="c">-- data residency</span> |
| ) |
| <span class="k">SELECT</span> *, (<span class="f">0.7</span> * (<span class="f">1</span>-vec_dist) + <span class="f">0.3</span> * bm25) |
| <span class="k">AS</span> score |
| <span class="k">FROM</span> semantic <span class="k">ORDER BY</span> score <span class="k">DESC</span> |
| <span class="k">LIMIT</span> <span class="f">10</span>; |
| |
| <span class="c">-- Knowledge graph enrichment</span> |
| <span class="c">-- MATCH (p:Product)-[:HAS_CATEGORY]->(c)</span> |
| <span class="c">-- WHERE p.name =~ '.*Pizza.*'</span> |
| <span class="c">-- RETURN p, c, p.gdpr_status</span>`, |
| patterns:`<svg viewBox="0 0 340 100" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#f5a623" font-size="8" font-family="IBM Plex Mono" font-weight="600">Ingestion Pipeline + Hybrid Retrieval</text> |
| <rect x="8" y="26" width="50" height="24" rx="4" fill="none" stroke="#6a7a9b" stroke-width="1"/> |
| <text x="33" y="42" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Source</text> |
| <line x1="58" y1="38" x2="72" y2="38" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="72" y="26" width="50" height="24" rx="4" fill="none" stroke="#ff8a50" stroke-width="1"/> |
| <text x="97" y="42" text-anchor="middle" fill="#ff8a50" font-size="6" font-family="IBM Plex Mono">Parse</text> |
| <line x1="122" y1="38" x2="136" y2="38" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="136" y="26" width="50" height="24" rx="4" fill="none" stroke="#22c982" stroke-width="1"/> |
| <text x="161" y="42" text-anchor="middle" fill="#22c982" font-size="6" font-family="IBM Plex Mono">Chunk</text> |
| <line x1="186" y1="38" x2="200" y2="38" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="200" y="26" width="50" height="24" rx="4" fill="none" stroke="#7c5cfc" stroke-width="1"/> |
| <text x="225" y="42" text-anchor="middle" fill="#7c5cfc" font-size="6" font-family="IBM Plex Mono">Embed</text> |
| <line x1="250" y1="38" x2="264" y2="38" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="264" y="26" width="62" height="24" rx="4" fill="none" stroke="#f5a623" stroke-width="1.2"/> |
| <text x="295" y="42" text-anchor="middle" fill="#f5a623" font-size="6" font-family="IBM Plex Mono">Store</text> |
| <text x="170" y="70" text-anchor="middle" fill="#3a4a6b" font-size="7" font-family="IBM Plex Mono">↑ This pipeline is where most RAG projects break</text> |
| <text x="170" y="82" text-anchor="middle" fill="#3a4a6b" font-size="7" font-family="IBM Plex Mono">Hot (Redis) → Warm (PostgreSQL) → Cold (MinIO/S3)</text> |
| </svg>` |
| }, |
| |
| infrastructure: { |
| title:"Infrastructure", color:"var(--c-infra)", |
| overview:{ |
| purpose:"Without this layer, nothing runs. Compute provisioning, containerization, orchestration, GPU management, networking, CI/CD deployment pipelines.", |
| components:[ |
| "Containers — Docker, Podman, OCI images", |
| "Orchestration — Kubernetes, Docker Compose, Nomad", |
| "IaC — Terraform, Pulumi, Ansible, CloudFormation", |
| "GPU provisioning — NVIDIA Container Toolkit, MIG, time-slicing", |
| "CI/CD — GitHub Actions, GitLab CI, ArgoCD, Flux", |
| "Networking — Ingress (Nginx, Traefik), service mesh (Istio, Linkerd)", |
| "Registries — Harbor, ECR, GHCR (container images + model artifacts)", |
| "Environments — dev → staging → prod with promotion gates" |
| ] |
| }, |
| stack:["Docker","Kubernetes","Terraform","Pulumi","Ansible","GitHub Actions","ArgoCD","NVIDIA Container Toolkit","Traefik","Nginx","Istio","Harbor"], |
| code:`<span class="c"># docker-compose.yml — AI platform stack</span> |
| <span class="k">services</span>: |
| <span class="k">api</span>: |
| image: <span class="s">ai-platform/api:latest</span> |
| environment: |
| - <span class="s">LLM_PROVIDER=litellm</span> |
| - <span class="s">VECTOR_DB=pgvector</span> |
| depends_on: [postgres, redis, minio] |
| |
| <span class="k">vllm</span>: |
| image: <span class="s">vllm/vllm-openai:latest</span> |
| deploy: |
| resources: |
| reservations: |
| devices: |
| - driver: <span class="s">nvidia</span> |
| count: <span class="f">1</span> |
| capabilities: [<span class="s">gpu</span>] |
| command: > |
| --model <span class="s">meta-llama/Llama-3.1-8B</span> |
| --max-model-len <span class="f">8192</span> |
| --gpu-memory-utilization <span class="f">0.9</span> |
| |
| <span class="k">postgres</span>: |
| image: <span class="s">pgvector/pgvector:pg16</span> |
| volumes: [<span class="s">pg_data:/var/lib/postgresql/data</span>] |
| |
| <span class="k">minio</span>: |
| image: <span class="s">minio/minio:latest</span> |
| command: <span class="s">server /data --console-address ":9001"</span> |
| environment: |
| - <span class="s">MINIO_OBJECT_LOCKING=on</span> <span class="c"># WORM for GDPR</span>`, |
| patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#38bdf8" font-size="8" font-family="IBM Plex Mono" font-weight="600">Deployment Pipeline</text> |
| <rect x="8" y="28" width="55" height="22" rx="4" fill="none" stroke="#6a7a9b" stroke-width="1"/> |
| <text x="35" y="43" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">git push</text> |
| <line x1="63" y1="39" x2="78" y2="39" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="78" y="28" width="55" height="22" rx="4" fill="none" stroke="#ff8a50" stroke-width="1"/> |
| <text x="105" y="43" text-anchor="middle" fill="#ff8a50" font-size="6" font-family="IBM Plex Mono">CI test</text> |
| <line x1="133" y1="39" x2="148" y2="39" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="148" y="28" width="55" height="22" rx="4" fill="none" stroke="#7c5cfc" stroke-width="1"/> |
| <text x="175" y="43" text-anchor="middle" fill="#7c5cfc" font-size="6" font-family="IBM Plex Mono">Build</text> |
| <line x1="203" y1="39" x2="218" y2="39" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="218" y="28" width="55" height="22" rx="4" fill="none" stroke="#22c982" stroke-width="1"/> |
| <text x="245" y="43" text-anchor="middle" fill="#22c982" font-size="6" font-family="IBM Plex Mono">Deploy</text> |
| <line x1="273" y1="39" x2="288" y2="39" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="288" y="28" width="44" height="22" rx="4" fill="none" stroke="#38bdf8" stroke-width="1.2"/> |
| <text x="310" y="43" text-anchor="middle" fill="#38bdf8" font-size="6" font-family="IBM Plex Mono">Prod</text> |
| <text x="170" y="70" text-anchor="middle" fill="#3a4a6b" font-size="7" font-family="IBM Plex Mono">DEV → STAGING → PROD with promotion gates</text> |
| </svg>` |
| }, |
| |
| // ─── CROSS-CUTTING ──────────────────────────────────── |
| |
| security: { |
| title:"Security & Compliance", color:"var(--c-sec)", |
| overview:{ |
| purpose:"Applied AT every layer independently — auth at the API gateway, PII redaction in orchestration, output validation at presentation, GDPR compliance in data, cost controls per user.", |
| components:[ |
| "Authentication — OAuth2, OIDC, JWT, SAML, Keycloak", |
| "Authorization — RBAC, ABAC, OPA policies, row-level security", |
| "AI Safety — Llama Guard 3, NeMo Guardrails, Claude built-in moderation", |
| "PII detection & redaction — Presidio, custom NER, regex (multi-language DE/NL/FR)", |
| "Prompt injection defense — input sanitization, output validation, sandwich defense", |
| "GDPR compliance — workflows, audit trails, data residency per country", |
| "Data residency / sovereignty — DE vs NL vs FR data routing (critical for multi-country)", |
| "Rate limiting & abuse prevention — per-user, per-model, per-endpoint", |
| "Cost controls — budget caps per user/team, model access policies", |
| "Content moderation — toxicity, bias, factuality scoring" |
| ] |
| }, |
| stack:["OAuth2","JWT","Keycloak","OPA","Llama Guard 3","NeMo Guardrails","Presidio","RBAC","ABAC","GDPR","S3 Object Lock"], |
| code:`<span class="c"># multi-layer security: applied at each layer</span> |
| |
| <span class="c"># ① Presentation: rate limit</span> |
| <span class="t">@app.middleware</span>(<span class="s">"http"</span>) |
| <span class="k">async def</span> <span class="f">rate_limit</span>(request, call_next): |
| user = get_user(request) |
| <span class="k">if</span> <span class="k">await</span> redis.incr(f<span class="s">"rl:{user.id}"</span>) > <span class="f">100</span>: |
| <span class="k">raise</span> HTTPException(<span class="f">429</span>) |
| <span class="k">return await</span> call_next(request) |
| |
| <span class="c"># ② Orchestration: PII redaction</span> |
| analyzer = AnalyzerEngine() |
| results = analyzer.analyze(user_input, <span class="s">"de"</span>) |
| redacted = anonymize(user_input, results) |
| |
| <span class="c"># ③ LLM: guardrails</span> |
| rails = LLMRails(guardrails_config) |
| safe = <span class="k">await</span> rails.generate(messages) |
| |
| <span class="c"># ④ Data: residency routing</span> |
| db = get_db_for_country(user.country) |
| <span class="c"># DE → eu-central, NL → eu-west, FR → eu-west</span>`, |
| patterns:`<svg viewBox="0 0 340 100" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#f06292" font-size="8" font-family="IBM Plex Mono" font-weight="600">Security Applied Per Layer (not wrapping)</text> |
| <line x1="18" y1="25" x2="18" y2="90" stroke="#f06292" stroke-width="1.5" opacity="0.5"/> |
| <rect x="30" y="24" width="290" height="14" rx="3" fill="none" stroke="#ef5350" stroke-width="0.8"/> |
| <text x="175" y="34" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Presentation: rate limit, auth, CORS</text> |
| <rect x="30" y="42" width="290" height="14" rx="3" fill="none" stroke="#22c982" stroke-width="0.8"/> |
| <text x="175" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Orchestration: PII redaction, prompt injection defense</text> |
| <rect x="30" y="60" width="290" height="14" rx="3" fill="none" stroke="#7c5cfc" stroke-width="0.8"/> |
| <text x="175" y="70" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">LLM: guardrails, output validation, cost caps</text> |
| <rect x="30" y="78" width="290" height="14" rx="3" fill="none" stroke="#f5a623" stroke-width="0.8"/> |
| <text x="175" y="88" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Data: encryption, residency, GDPR AI ACT</text> |
| </svg>` |
| }, |
| |
| observability: { |
| title:"Observability", color:"var(--c-obs)", |
| overview:{ |
| purpose:"Instruments INSIDE each layer — not around it. LLM-specific traces (token cost, latency, quality), system metrics, distributed tracing, cost analytics, alerting.", |
| components:[ |
| "LLM Observability — LangSmith, LangFuse, Helicone (cost/token/latency per call)", |
| "Infrastructure metrics — Prometheus + Grafana dashboards", |
| "Distributed tracing — OpenTelemetry, Jaeger, Tempo (end-to-end request tracing)", |
| "Log aggregation — Loki, ELK, Datadog logs", |
| "Cost analytics — per-model, per-user, per-feature cost breakdowns", |
| "Quality metrics — RAGAS scores, hallucination rate, retrieval precision/recall", |
| "Alerting — PagerDuty, Opsgenie, Slack (latency SLA, error rate, cost spike)", |
| "Custom dashboards — model comparison, A/B test results, drift detection" |
| ] |
| }, |
| stack:["LangSmith","LangFuse","Helicone","Prometheus","Grafana","OpenTelemetry","Jaeger","Loki","Datadog","Tempo","PagerDuty"], |
| code:`<span class="c"># observability: instrumented at each layer</span> |
| <span class="k">from</span> langfuse.decorators <span class="k">import</span> observe |
| <span class="k">from</span> opentelemetry <span class="k">import</span> trace |
| <span class="k">from</span> prometheus_client <span class="k">import</span> Histogram |
| |
| tracer = trace.get_tracer(<span class="s">"ai-platform"</span>) |
| llm_lat = Histogram(<span class="s">"llm_seconds"</span>, <span class="s">"LLM latency"</span>, |
| [<span class="s">"model"</span>,<span class="s">"provider"</span>]) |
| |
| <span class="t">@observe</span>(name=<span class="s">"rag-pipeline"</span>) |
| <span class="k">async def</span> <span class="f">answer</span>(query): |
| <span class="k">with</span> tracer.start_as_current_span(<span class="s">"retrieve"</span>): |
| chunks = <span class="k">await</span> retrieve(query) |
| |
| <span class="k">with</span> tracer.start_as_current_span(<span class="s">"generate"</span>): |
| <span class="k">with</span> llm_lat.labels(<span class="s">"claude"</span>,<span class="s">"anthropic"</span>)\\ |
| .time(): |
| resp = <span class="k">await</span> generate(query, chunks) |
| |
| <span class="c"># auto: tokens, cost, latency → LangFuse</span> |
| <span class="c"># auto: spans → Jaeger via OTel</span> |
| <span class="c"># auto: llm_seconds → Prometheus → Grafana</span> |
| <span class="k">return</span> resp`, |
| patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#9c7cfc" font-size="8" font-family="IBM Plex Mono" font-weight="600">Observability Stack (Three Pillars + LLM)</text> |
| <rect x="8" y="26" width="75" height="50" rx="5" fill="none" stroke="#9c7cfc" stroke-width="1"/> |
| <text x="45" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Metrics</text> |
| <text x="45" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Prometheus</text> |
| <text x="45" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Grafana</text> |
| <rect x="92" y="26" width="75" height="50" rx="5" fill="none" stroke="#4a90ff" stroke-width="1"/> |
| <text x="129" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Traces</text> |
| <text x="129" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">OTel</text> |
| <text x="129" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Jaeger</text> |
| <rect x="176" y="26" width="75" height="50" rx="5" fill="none" stroke="#22c982" stroke-width="1"/> |
| <text x="213" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Logs</text> |
| <text x="213" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Loki</text> |
| <text x="213" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">ELK</text> |
| <rect x="260" y="26" width="72" height="50" rx="5" fill="none" stroke="#f5a623" stroke-width="1.2"/> |
| <text x="296" y="44" text-anchor="middle" fill="#f5a623" font-size="7" font-family="IBM Plex Mono" font-weight="600">LLM</text> |
| <text x="296" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">LangFuse</text> |
| <text x="296" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">cost+quality</text> |
| </svg>` |
| }, |
| |
| configuration: { |
| title:"Configuration", color:"var(--c-conf)", |
| overview:{ |
| purpose:"Foundational — every layer reads from it. Secrets, env vars, model parameters, feature flags, runtime toggles. Not the outermost layer; it's the first thing any layer bootstraps from.", |
| components:[ |
| "Secret management — Vault, SOPS, AWS Secrets Manager, sealed-secrets", |
| "Config distribution — ConfigMaps, etcd, Consul, Spring Cloud Config", |
| "Token & API key rotation with automatic expiry and alerting", |
| "Model parameter registry — MLflow, W&B (hyperparams, versions, metadata)", |
| "Feature flags — LaunchDarkly, Unleash, Flagsmith (toggle features per env/user)", |
| "Environment promotion — dev → staging → prod with config drift detection" |
| ] |
| }, |
| stack:["Vault","etcd","Consul","ConfigMaps","MLflow","W&B","LaunchDarkly","Unleash","SOPS","dotenv","AWS SSM"], |
| code:`<span class="c"># config hierarchy: secrets → env → flags → runtime</span> |
| <span class="k">from</span> pydantic_settings <span class="k">import</span> BaseSettings |
| |
| <span class="k">class</span> <span class="t">Config</span>(BaseSettings): |
| <span class="c"># from Vault (highest priority)</span> |
| anthropic_api_key: str |
| db_password: str |
| |
| <span class="c"># from env / ConfigMap</span> |
| default_model: str = <span class="s">"claude-sonnet-4-20250514"</span> |
| fallback_models: list = [ |
| <span class="s">"deepseek-chat"</span>, <span class="s">"ollama/llama3.1"</span> |
| ] |
| embedding_model: str = <span class="s">"BAAI/bge-m3"</span> |
| max_tokens: int = <span class="f">4096</span> |
| |
| <span class="c"># from feature flags (runtime toggleable)</span> |
| enable_streaming: bool = <span class="f">True</span> |
| enable_prompt_caching: bool = <span class="f">True</span> |
| rag_reranking_enabled: bool = <span class="f">False</span> |
| |
| <span class="k">class</span> <span class="t">Config</span>: |
| env_prefix = <span class="s">"AI_"</span>`, |
| patterns:`<svg viewBox="0 0 340 75" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#6a7a9b" font-size="8" font-family="IBM Plex Mono" font-weight="600">Config Cascade (priority order)</text> |
| <rect x="8" y="24" width="75" height="40" rx="5" fill="none" stroke="#ef5350" stroke-width="1.2"/> |
| <text x="45" y="42" text-anchor="middle" fill="#ef5350" font-size="7" font-family="IBM Plex Mono">Vault</text> |
| <text x="45" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">secrets ①</text> |
| <line x1="83" y1="44" x2="93" y2="44" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="93" y="24" width="75" height="40" rx="5" fill="none" stroke="#f5a623" stroke-width="1"/> |
| <text x="130" y="42" text-anchor="middle" fill="#f5a623" font-size="7" font-family="IBM Plex Mono">Env</text> |
| <text x="130" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">config ②</text> |
| <line x1="168" y1="44" x2="178" y2="44" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="178" y="24" width="75" height="40" rx="5" fill="none" stroke="#22c982" stroke-width="1"/> |
| <text x="215" y="42" text-anchor="middle" fill="#22c982" font-size="7" font-family="IBM Plex Mono">Flags</text> |
| <text x="215" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">runtime ③</text> |
| <line x1="253" y1="44" x2="263" y2="44" stroke="#2a3a5c" stroke-width="1"/> |
| <rect x="263" y="24" width="68" height="40" rx="5" fill="none" stroke="#4a90ff" stroke-width="1"/> |
| <text x="297" y="42" text-anchor="middle" fill="#4a90ff" font-size="7" font-family="IBM Plex Mono">Default</text> |
| <text x="297" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">code ④</text> |
| </svg>` |
| }, |
| |
| memory: { |
| title:"Memory & State", color:"var(--c-mem)", |
| overview:{ |
| purpose:"Conversation history, agent working memory, episodic memory, session state, user preferences. Redis-as-cache is NOT the same as a memory system. Critical for multi-turn agents.", |
| components:[ |
| "Conversation history — per-session message buffer, sliding window + summarization", |
| "Agent working memory — scratchpad for multi-step reasoning, intermediate results", |
| "Episodic memory — long-term retrieval of past interactions (mem0, Zep, custom)", |
| "Semantic memory — knowledge distilled from conversations into structured facts", |
| "Session state — user context, preferences, auth tokens, active tool state", |
| "Shared state — multi-agent shared blackboard for cooperative task solving", |
| "Persistence — conversation → DB, with TTL policies and GDPR-compliant deletion" |
| ] |
| }, |
| stack:["Redis","mem0","Zep","PostgreSQL","LangGraph checkpointing","Custom episodic stores"], |
| code:`<span class="c"># memory system: layered (working → episodic → semantic)</span> |
| <span class="k">from</span> mem0 <span class="k">import</span> Memory |
| |
| memory = Memory.from_config({ |
| <span class="s">"vector_store"</span>: { |
| <span class="s">"provider"</span>: <span class="s">"qdrant"</span>, |
| <span class="s">"config"</span>: {<span class="s">"url"</span>: <span class="s">"http://localhost:6333"</span>} |
| } |
| }) |
| |
| <span class="c"># add interaction to long-term memory</span> |
| memory.add( |
| <span class="s">"User asked about GDPR Art.17 deletion"</span> |
| <span class="s">" for SAP records in DE region"</span>, |
| user_id=<span class="s">"christof"</span>, |
| metadata={<span class="s">"topic"</span>: <span class="s">"gdpr"</span>, <span class="s">"system"</span>: <span class="s">"sap"</span>} |
| ) |
| |
| <span class="c"># retrieve relevant memories for context</span> |
| memories = memory.search( |
| <span class="s">"GDPR deletion status"</span>, |
| user_id=<span class="s">"christof"</span>, |
| limit=<span class="f">5</span> |
| ) |
| <span class="c"># → inject into system prompt as context</span>`, |
| patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#00d4e6" font-size="8" font-family="IBM Plex Mono" font-weight="600">Memory Hierarchy</text> |
| <rect x="8" y="26" width="100" height="50" rx="5" fill="none" stroke="#ef5350" stroke-width="1"/> |
| <text x="58" y="42" text-anchor="middle" fill="#ef5350" font-size="7" font-family="IBM Plex Mono">Working</text> |
| <text x="58" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">current turn</text> |
| <text x="58" y="64" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">scratchpad</text> |
| <rect x="118" y="26" width="100" height="50" rx="5" fill="none" stroke="#00d4e6" stroke-width="1.2"/> |
| <text x="168" y="42" text-anchor="middle" fill="#00d4e6" font-size="7" font-family="IBM Plex Mono">Episodic</text> |
| <text x="168" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">past interactions</text> |
| <text x="168" y="64" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">mem0 / Zep</text> |
| <rect x="228" y="26" width="100" height="50" rx="5" fill="none" stroke="#f5a623" stroke-width="1"/> |
| <text x="278" y="42" text-anchor="middle" fill="#f5a623" font-size="7" font-family="IBM Plex Mono">Semantic</text> |
| <text x="278" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">distilled facts</text> |
| <text x="278" y="64" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">knowledge graph</text> |
| </svg>` |
| }, |
| |
| // ─── LIFECYCLE & DEV ────────────────────────────────── |
| |
| lifecycle: { |
| title:"Continuous Improvement Loop", color:"var(--c-life)", |
| overview:{ |
| purpose:"The closed loop that makes the system LEARN. User feedback → evaluation → fine-tuning → deployment → monitoring → repeat. Without this, your system is frozen at day-one quality.", |
| components:[ |
| "User feedback — thumbs up/down, corrections, explicit ratings", |
| "Automated eval — RAGAS, DeepEval, custom eval harnesses on golden datasets", |
| "Drift detection — embedding drift, answer quality degradation over time", |
| "Fine-tuning trigger — when eval scores drop below threshold → retrain", |
| "Model versioning — MLflow model registry, A/B testing between versions", |
| "Canary deployment — gradual rollout of new model versions (5% → 25% → 100%)", |
| "Governance — approval gates for production model updates, rollback capability" |
| ] |
| }, |
| stack:["MLflow","DVC","RAGAS","DeepEval","W&B","ArgoCD","Canary deploys","A/B testing"], |
| code:`<span class="c"># feedback loop: eval → trigger retrain</span> |
| <span class="k">from</span> mlflow <span class="k">import</span> MlflowClient |
| |
| client = MlflowClient() |
| |
| <span class="c"># 1. evaluate current model on golden set</span> |
| scores = run_eval_suite( |
| model=<span class="s">"production"</span>, |
| dataset=<span class="s">"golden-v3"</span>, |
| metrics=[<span class="s">"relevancy"</span>,<span class="s">"faithfulness"</span>,<span class="s">"recall"</span>] |
| ) |
| |
| <span class="c"># 2. check if degraded</span> |
| <span class="k">if</span> scores[<span class="s">"relevancy"</span>] < <span class="f">0.75</span>: |
| <span class="c"># 3. trigger fine-tune with recent feedback</span> |
| new_run = trigger_finetune( |
| base_model=<span class="s">"llama3.1-8b"</span>, |
| dataset=<span class="s">"feedback-2025-q2"</span>, |
| method=<span class="s">"qlora"</span>, |
| ) |
| <span class="c"># 4. register + canary deploy</span> |
| client.transition_model_version_stage( |
| name=<span class="s">"rag-model"</span>, |
| version=new_run.version, |
| stage=<span class="s">"Staging"</span> <span class="c"># → canary 5%</span> |
| )`, |
| patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#84cc16" font-size="8" font-family="IBM Plex Mono" font-weight="600">Continuous Improvement Cycle</text> |
| <rect x="120" y="22" width="70" height="22" rx="4" fill="none" stroke="#84cc16" stroke-width="1.2"/> |
| <text x="155" y="37" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Deploy</text> |
| <path d="M190 33 Q230 33 230 50 Q230 67 190 67" fill="none" stroke="#9c7cfc" stroke-width="1"/> |
| <text x="240" y="53" fill="#9c7cfc" font-size="6" font-family="IBM Plex Mono">Monitor</text> |
| <rect x="120" y="56" width="70" height="22" rx="4" fill="none" stroke="#f5a623" stroke-width="1.2"/> |
| <text x="155" y="71" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Eval</text> |
| <path d="M120 67 Q80 67 80 50 Q80 33 120 33" fill="none" stroke="#ef5350" stroke-width="1"/> |
| <text x="48" y="53" fill="#ef5350" font-size="6" font-family="IBM Plex Mono">Feedback</text> |
| <text x="155" y="51" text-anchor="middle" fill="#3a4a6b" font-size="6" font-family="IBM Plex Mono">fine-tune</text> |
| </svg>` |
| }, |
| |
| devpractices: { |
| title:"Dev Practices", color:"var(--c-dev)", |
| overview:{ |
| purpose:"Development-time concerns — NOT a runtime layer. Testing, CI/CD, code review, documentation, LLM evals. These live in your pipeline, not in your production stack.", |
| components:[ |
| "Unit & Integration testing — pytest, vitest, Jest", |
| "E2E testing — Playwright, Cypress, Selenium", |
| "LLM Evaluation — DeepEval, RAGAS, custom eval harnesses, golden datasets", |
| "Prompt regression — compare outputs across model versions on fixed inputs", |
| "CI/CD — GitHub Actions, GitLab CI, pre-commit hooks", |
| "Code review — PR templates, architecture decision records (ADRs)", |
| "Documentation — API docs (OpenAPI), runbooks, architecture diagrams (this!)", |
| "Linting & formatting — ruff, black, prettier, eslint" |
| ] |
| }, |
| stack:["pytest","vitest","Playwright","Cypress","DeepEval","RAGAS","GitHub Actions","pre-commit","ruff","black","ADRs"], |
| code:`<span class="c"># CI pipeline: test → eval → deploy</span> |
| <span class="c"># .github/workflows/ai-platform.yml</span> |
| <span class="k">name</span>: <span class="s">AI Platform CI</span> |
| <span class="k">on</span>: [push, pull_request] |
| <span class="k">jobs</span>: |
| <span class="k">test</span>: |
| <span class="k">steps</span>: |
| - <span class="s">pytest tests/ -x --tb=short</span> |
| - <span class="s">playwright test e2e/</span> |
| |
| <span class="k">llm-eval</span>: |
| <span class="k">needs</span>: test |
| <span class="k">steps</span>: |
| - <span class="s">python eval/run_golden_set.py</span> |
| - <span class="k">if</span>: <span class="s">steps.eval.outputs.score < 0.7</span> |
| <span class="k">run</span>: <span class="s">echo "::error::Eval below threshold"</span> |
| |
| <span class="k">deploy</span>: |
| <span class="k">needs</span>: llm-eval |
| <span class="k">if</span>: <span class="s">github.ref == 'refs/heads/main'</span> |
| <span class="k">steps</span>: |
| - <span class="s">argocd app sync ai-platform --strategy canary</span>`, |
| patterns:`<svg viewBox="0 0 340 70" xmlns="http://www.w3.org/2000/svg"> |
| <text x="8" y="14" fill="#ff8a50" font-size="8" font-family="IBM Plex Mono" font-weight="600">Dev-Time vs Runtime (category distinction)</text> |
| <rect x="8" y="26" width="155" height="35" rx="5" fill="none" stroke="#ff8a50" stroke-width="1.2" stroke-dasharray="4"/> |
| <text x="85" y="40" text-anchor="middle" fill="#ff8a50" font-size="7" font-family="IBM Plex Mono">DEV-TIME</text> |
| <text x="85" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">test · lint · eval · CI/CD · review</text> |
| <rect x="175" y="26" width="155" height="35" rx="5" fill="none" stroke="#4a90ff" stroke-width="1.2"/> |
| <text x="252" y="40" text-anchor="middle" fill="#4a90ff" font-size="7" font-family="IBM Plex Mono">RUNTIME</text> |
| <text x="252" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">security · observability · config · memory</text> |
| </svg>` |
| } |
| |
| }; |
| |
| // ═══════════════════════════════════════════════════════ |
| // RENDERING |
| // ═══════════════════════════════════════════════════════ |
| let curKey=null, curTab='overview'; |
| const detBody=document.getElementById('detBody'); |
| const tabs=document.querySelectorAll('.dt'); |
| const allEl=document.querySelectorAll('.lyr,.xbar,.lifecycle,.devprac'); |
| |
| function render(key,tab){ |
| const d=D[key]; if(!d)return; |
| curKey=key; curTab=tab||'overview'; |
| tabs.forEach(t=>t.classList.toggle('act',t.dataset.tab===curTab)); |
| let h=`<div class="d-title">${d.title}</div><div class="d-bar" style="background:${d.color}"></div>`; |
| if(curTab==='overview'){ |
| h+=`<div class="d-h">Purpose</div><p class="d-p">${d.overview.purpose}</p>`; |
| h+=`<div class="d-h">Components</div><ul class="d-ul">${d.overview.components.map(c=>`<li>${c}</li>`).join('')}</ul>`; |
| }else if(curTab==='stack'){ |
| h+=`<div class="d-h">Technology Stack</div><div class="d-badges">${d.stack.map(t=>`<span class="d-badge">${t}</span>`).join('')}</div>`; |
| }else if(curTab==='code'){ |
| h+=`<div class="d-h">Example</div><div class="d-code">${d.code}</div>`; |
| }else if(curTab==='patterns'){ |
| h+=`<div class="d-h">Architecture Pattern</div><div class="d-pattern">${d.patterns}</div>`; |
| } |
| detBody.innerHTML=h; |
| } |
| |
| allEl.forEach(el=>{ |
| el.addEventListener('click',e=>{ |
| e.stopPropagation(); |
| const key=el.dataset.key; if(!key||!D[key])return; |
| allEl.forEach(x=>x.classList.remove('sel')); |
| el.classList.add('sel'); |
| render(key,curTab); |
| }); |
| }); |
| tabs.forEach(t=>t.addEventListener('click',()=>{if(curKey)render(curKey,t.dataset.tab)})); |
| |
| // ═══ KEYBOARD ═══ |
| const nav=['security','observability','configuration','memory','presentation','orchestration','llm','data','infrastructure','lifecycle','devpractices']; |
| let ni=-1; |
| document.addEventListener('keydown',e=>{ |
| if(e.key==='ArrowDown'||e.key==='ArrowUp'){ |
| e.preventDefault(); |
| ni=e.key==='ArrowDown'?Math.min(ni+1,nav.length-1):Math.max(ni-1,0); |
| const el=document.querySelector(`[data-key="${nav[ni]}"]`); |
| if(el){el.click();el.scrollIntoView({behavior:'smooth',block:'nearest'})} |
| } |
| if(e.key==='Enter'&&curKey){ |
| const ts=['overview','stack','code','patterns']; |
| render(curKey,ts[(ts.indexOf(curTab)+1)%ts.length]); |
| } |
| if((e.key==='t'||e.key==='T')&&!tracing)runTrace(traceReq); |
| }); |
| |
| // ═══ TRACE ═══ |
| let tracing=false; |
| const traceReq=[ |
| {key:'presentation', lbl:'① Presentation', desc:'User sends chat message via WebSocket',time:'0ms'}, |
| {key:'security', lbl:'② Security', desc:'Rate limit check → auth → input sanitization → PII redaction',time:'15ms'}, |
| {key:'configuration',lbl:'③ Config', desc:'Load model routing config, feature flags',time:'18ms'}, |
| {key:'orchestration', lbl:'④ Orchestration', desc:'Agent plans: Think → retrieve → generate',time:'32ms'}, |
| {key:'memory', lbl:'⑤ Memory', desc:'Load conversation history + relevant episodic memories',time:'45ms'}, |
| {key:'data', lbl:'⑥ Data', desc:'Hybrid pgvector search: 0.7×vector + 0.3×BM25, top-10',time:'78ms'}, |
| {key:'llm', lbl:'⑦ LLM', desc:'Claude generates with context + prompt cache hit (streaming)',time:'320ms'}, |
| {key:'security', lbl:'⑧ Security', desc:'Output validation: PII check, guardrails, toxicity',time:'328ms'}, |
| {key:'observability',lbl:'⑨ Observability', desc:'Trace logged → LangFuse (tokens, latency, cost, quality)',time:'332ms'}, |
| {key:'presentation', lbl:'⑩ Presentation', desc:'SSE stream delivers tokens to user',time:'340ms'}, |
| ]; |
| const traceRag=[ |
| {key:'presentation', lbl:'① Query', desc:'User: "What is GDPR Art.17?"',time:'0ms'}, |
| {key:'orchestration',lbl:'② Plan', desc:'RAG strategy selected: hybrid search + rerank',time:'12ms'}, |
| {key:'llm', lbl:'③ Embed', desc:'Query → BGE-M3 embedding (1024d)',time:'25ms'}, |
| {key:'data', lbl:'④ Retrieve', desc:'pgvector hybrid: 10 candidates from 50k chunks',time:'48ms'}, |
| {key:'orchestration',lbl:'⑤ Rerank', desc:'Cross-encoder reranks → top-5 relevant chunks',time:'110ms'}, |
| {key:'memory', lbl:'⑥ Context', desc:'Inject conversation history + past GDPR queries',time:'118ms'}, |
| {key:'llm', lbl:'⑦ Generate', desc:'Claude generates answer with 5 chunks + memory',time:'420ms'}, |
| {key:'observability',lbl:'⑧ Eval', desc:'RAGAS: relevancy=0.92 faithfulness=0.88 recall=0.85',time:'435ms'}, |
| ]; |
| const traceAgent=[ |
| {key:'presentation', lbl:'① Task', desc:'User: "Check pizza products GDPR deletion status"',time:'0ms'}, |
| {key:'orchestration',lbl:'② Plan', desc:'Agent creates 3-step plan (search → check → report)',time:'180ms'}, |
| {key:'orchestration',lbl:'③ Act', desc:'MCP tool call: search_catalog("frozen pizza")',time:'220ms'}, |
| {key:'data', lbl:'④ Fetch', desc:'Product catalog returns 12 pizza SKUs',time:'340ms'}, |
| {key:'orchestration',lbl:'⑤ Observe', desc:'Agent parses results, iterates GDPR checks',time:'500ms'}, |
| {key:'orchestration',lbl:'⑥ Act', desc:'MCP: check_gdpr_status(system="SAP", batch=12)',time:'540ms'}, |
| {key:'data', lbl:'⑦ Fetch', desc:'Deletion protocoll returns status for all 12 records',time:'680ms'}, |
| {key:'llm', lbl:'⑧ Synthesize',desc:'Agent generates formatted summary report',time:'1100ms'}, |
| {key:'observability',lbl:'⑨ Trace', desc:'8 steps, 3 tool calls, 4200 tokens logged',time:'1120ms'}, |
| {key:'presentation', lbl:'⑩ Respond', desc:'Table + status summary streamed to user',time:'1200ms'}, |
| ]; |
| const traceFeedback=[ |
| {key:'observability',lbl:'① Monitor', desc:'Quality score drift detected: relevancy dropped 0.82→0.71',time:'0ms'}, |
| {key:'data', lbl:'② Collect', desc:'Aggregate 2000 user feedback samples from last 30 days',time:'5s'}, |
| {key:'llm', lbl:'③ Fine-tune', desc:'QLoRA training on feedback dataset (RTX 5090, 2 epochs)',time:'45min'}, |
| {key:'infrastructure',lbl:'④ Build', desc:'Quantize to GGUF Q4_K_M, build container, push registry',time:'52min'}, |
| {key:'infrastructure',lbl:'⑤ Deploy', desc:'Canary rollout: 5% traffic to new model version',time:'53min'}, |
| {key:'observability',lbl:'⑥ Validate', desc:'A/B eval: new model relevancy=0.86 vs baseline=0.71',time:'2h'}, |
| {key:'configuration',lbl:'⑦ Promote', desc:'Feature flag: route 100% to new model, archive old',time:'2h'}, |
| ]; |
| |
| function runTrace(steps){ |
| if(tracing)return; tracing=true; |
| const bar=document.getElementById('traceBar'),fill=document.getElementById('tFill'); |
| const lbl=document.getElementById('tLbl'),desc=document.getElementById('tDesc'),tm=document.getElementById('tTime'); |
| bar.classList.add('show'); fill.style.width='0%'; |
| let i=0; |
| (function next(){ |
| if(i>=steps.length){setTimeout(()=>{bar.classList.remove('show');tracing=false},1800);return} |
| const s=steps[i]; |
| fill.style.width=((i+1)/steps.length*100).toFixed(0)+'%'; |
| lbl.textContent=s.lbl; desc.textContent=s.desc; tm.textContent=s.time; |
| const el=document.querySelector(`[data-key="${s.key}"]`); |
| if(el){el.classList.add('flash');el.click();setTimeout(()=>el.classList.remove('flash'),500)} |
| i++; setTimeout(next,700); |
| })(); |
| } |
| </script> |
| </body> |
| </html> |