Eric Xu
Redesign CTR calibration for marketer workflow — metric input upfront, not buried
9166125 unverified | <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>SGO — Semantic Gradient Optimization</title> | |
| <style> | |
| :root { | |
| --bg: #0a0a0f; | |
| --surface: #12121a; | |
| --surface2: #1a1a26; | |
| --border: #2a2a3a; | |
| --text: #e0e0e8; | |
| --text2: #8888a0; | |
| --accent: #6c5ce7; | |
| --accent2: #a29bfe; | |
| --green: #00b894; | |
| --yellow: #fdcb6e; | |
| --red: #e17055; | |
| --orange: #e67e22; | |
| --radius: 12px; | |
| } | |
| * { margin: 0; padding: 0; box-sizing: border-box; } | |
| body { | |
| font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; | |
| background: var(--bg); | |
| color: var(--text); | |
| line-height: 1.6; | |
| min-height: 100vh; | |
| } | |
| .container { max-width: 900px; margin: 0 auto; padding: 24px 20px; } | |
| /* Header */ | |
| header { | |
| text-align: center; | |
| padding: 48px 0 32px; | |
| border-bottom: 1px solid var(--border); | |
| margin-bottom: 32px; | |
| } | |
| header h1 { | |
| font-size: 2rem; | |
| font-weight: 700; | |
| letter-spacing: -0.03em; | |
| background: linear-gradient(135deg, var(--accent2), var(--accent)); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| header p { color: var(--text2); margin-top: 8px; font-size: 1rem; } | |
| .config-badge { | |
| display: inline-block; | |
| margin-top: 12px; | |
| padding: 4px 12px; | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 20px; | |
| font-size: 0.8rem; | |
| color: var(--text2); | |
| } | |
| .config-badge.ok { border-color: var(--green); color: var(--green); } | |
| .config-badge.warn { border-color: var(--yellow); color: var(--yellow); } | |
| /* Steps */ | |
| .step { | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: var(--radius); | |
| padding: 28px; | |
| margin-bottom: 20px; | |
| transition: border-color 0.2s; | |
| } | |
| .step.active { border-color: var(--accent); } | |
| .step.done { border-color: var(--green); } | |
| .step-header { | |
| display: flex; | |
| align-items: center; | |
| gap: 12px; | |
| margin-bottom: 16px; | |
| } | |
| .step-num { | |
| width: 32px; height: 32px; | |
| border-radius: 50%; | |
| background: var(--surface2); | |
| border: 2px solid var(--border); | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-size: 0.85rem; | |
| font-weight: 600; | |
| flex-shrink: 0; | |
| } | |
| .step.active .step-num { border-color: var(--accent); color: var(--accent); } | |
| .step.done .step-num { border-color: var(--green); background: var(--green); color: var(--bg); } | |
| .step-title { font-size: 1.1rem; font-weight: 600; } | |
| .step-desc { color: var(--text2); font-size: 0.9rem; margin-bottom: 16px; } | |
| /* Forms */ | |
| textarea, input, select { | |
| width: 100%; | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 8px; | |
| color: var(--text); | |
| padding: 12px; | |
| font-family: inherit; | |
| font-size: 0.9rem; | |
| resize: vertical; | |
| transition: border-color 0.2s; | |
| } | |
| textarea:focus, input:focus, select:focus { | |
| outline: none; | |
| border-color: var(--accent); | |
| } | |
| textarea { min-height: 160px; } | |
| label { | |
| display: block; | |
| font-size: 0.85rem; | |
| font-weight: 500; | |
| margin-bottom: 6px; | |
| color: var(--text2); | |
| } | |
| .field { margin-bottom: 16px; } | |
| /* Buttons */ | |
| button { | |
| background: var(--accent); | |
| color: white; | |
| border: none; | |
| border-radius: 8px; | |
| padding: 10px 24px; | |
| font-size: 0.9rem; | |
| font-weight: 600; | |
| cursor: pointer; | |
| transition: opacity 0.2s, transform 0.1s; | |
| } | |
| button:hover { opacity: 0.9; } | |
| button:active { transform: scale(0.98); } | |
| button:disabled { opacity: 0.4; cursor: not-allowed; } | |
| button.secondary { | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| color: var(--text); | |
| } | |
| .btn-row { display: flex; gap: 10px; flex-wrap: wrap; } | |
| /* Segments editor */ | |
| .segments-list { display: flex; flex-direction: column; gap: 8px; margin-bottom: 12px; } | |
| .seg-row { | |
| display: flex; gap: 8px; align-items: center; | |
| } | |
| .seg-row input:first-child { flex: 3; } | |
| .seg-row input:nth-child(2) { flex: 1; max-width: 80px; text-align: center; } | |
| .seg-row button { padding: 8px 12px; background: var(--surface2); border: 1px solid var(--border); } | |
| /* Progress */ | |
| .progress-bar { | |
| width: 100%; | |
| height: 6px; | |
| background: var(--surface2); | |
| border-radius: 3px; | |
| overflow: hidden; | |
| margin: 12px 0; | |
| } | |
| .progress-fill { | |
| height: 100%; | |
| background: linear-gradient(90deg, var(--accent), var(--accent2)); | |
| border-radius: 3px; | |
| transition: width 0.3s; | |
| width: 0%; | |
| } | |
| .progress-fill.pulsing { | |
| animation: pulse-bar 1.5s ease-in-out infinite; | |
| } | |
| @keyframes pulse-bar { | |
| 0%, 100% { opacity: 1; } | |
| 50% { opacity: 0.5; } | |
| } | |
| .progress-text { | |
| font-size: 0.85rem; | |
| color: var(--text2); | |
| margin-bottom: 8px; | |
| } | |
| .eval-log { | |
| max-height: 200px; | |
| overflow-y: auto; | |
| font-family: 'JetBrains Mono', 'Fira Code', monospace; | |
| font-size: 0.8rem; | |
| background: var(--bg); | |
| border-radius: 8px; | |
| padding: 12px; | |
| margin-top: 12px; | |
| } | |
| .eval-log div { padding: 2px 0; } | |
| .eval-log .pos { color: var(--green); } | |
| .eval-log .neu { color: var(--yellow); } | |
| .eval-log .neg { color: var(--red); } | |
| .eval-log .err { color: var(--red); opacity: 0.7; } | |
| /* Results */ | |
| .score-big { | |
| font-size: 3rem; | |
| font-weight: 700; | |
| text-align: center; | |
| padding: 24px; | |
| } | |
| .score-big span { font-size: 1.2rem; color: var(--text2); font-weight: 400; } | |
| .stats-row { | |
| display: flex; | |
| justify-content: center; | |
| gap: 32px; | |
| margin: 16px 0; | |
| } | |
| .stat { | |
| text-align: center; | |
| } | |
| .stat-val { font-size: 1.5rem; font-weight: 600; } | |
| .stat-label { font-size: 0.8rem; color: var(--text2); } | |
| .stat.pos .stat-val { color: var(--green); } | |
| .stat.neu .stat-val { color: var(--yellow); } | |
| .stat.neg .stat-val { color: var(--red); } | |
| .results-details { | |
| background: var(--bg); | |
| border-radius: 8px; | |
| padding: 16px; | |
| margin-top: 16px; | |
| white-space: pre-wrap; | |
| font-family: 'JetBrains Mono', 'Fira Code', monospace; | |
| font-size: 0.8rem; | |
| max-height: 400px; | |
| overflow-y: auto; | |
| line-height: 1.5; | |
| } | |
| /* Changes editor */ | |
| .change-card { | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 8px; | |
| padding: 16px; | |
| margin-bottom: 10px; | |
| } | |
| .change-card .field { margin-bottom: 10px; } | |
| .change-card input, .change-card textarea { background: var(--bg); } | |
| .change-card textarea { min-height: 60px; } | |
| .change-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; } | |
| .change-header span { font-weight: 600; font-size: 0.9rem; } | |
| /* Gradient */ | |
| .gradient-table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 16px 0; | |
| font-size: 0.85rem; | |
| } | |
| .gradient-table th { | |
| text-align: left; | |
| padding: 10px 12px; | |
| border-bottom: 2px solid var(--border); | |
| color: var(--text2); | |
| font-weight: 500; | |
| } | |
| .gradient-table td { | |
| padding: 10px 12px; | |
| border-bottom: 1px solid var(--border); | |
| } | |
| .gradient-table tr:hover { background: var(--surface2); } | |
| .delta-pos { color: var(--green); font-weight: 600; } | |
| .delta-neg { color: var(--red); font-weight: 600; } | |
| .delta-bar { | |
| height: 8px; | |
| border-radius: 4px; | |
| display: inline-block; | |
| vertical-align: middle; | |
| } | |
| /* Templates */ | |
| .template-chips { | |
| display: flex; | |
| gap: 8px; | |
| margin-bottom: 12px; | |
| flex-wrap: wrap; | |
| } | |
| .template-chip { | |
| padding: 6px 14px; | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 20px; | |
| font-size: 0.8rem; | |
| cursor: pointer; | |
| color: var(--text2); | |
| transition: all 0.2s; | |
| } | |
| .template-chip:hover { border-color: var(--accent); color: var(--text); } | |
| /* Download button */ | |
| .btn-download { | |
| background: var(--surface2); | |
| border: 1px solid var(--green); | |
| color: var(--green); | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| } | |
| .btn-download:hover { background: color-mix(in srgb, var(--green) 15%, var(--surface2)); } | |
| /* Responsive */ | |
| @media (max-width: 600px) { | |
| .container { padding: 16px 12px; } | |
| .step { padding: 20px; } | |
| .stats-row { gap: 20px; } | |
| header h1 { font-size: 1.5rem; } | |
| } | |
| /* Utility */ | |
| .hidden { display: none ; } | |
| .mt-12 { margin-top: 12px; } | |
| .mt-16 { margin-top: 16px; } | |
| .mb-8 { margin-bottom: 8px; } | |
| .text-center { text-align: center; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <header> | |
| <h1>Semantic Gradient Optimization</h1> | |
| <p>Build a panel. See what they think. Test what to change next.</p> | |
| <div id="configBadge" class="config-badge" style="display:none"></div> | |
| <div id="nemotronBadge" class="config-badge" style="display:none;margin-left:8px"></div> | |
| </header> | |
| <!-- API Key setup (shown if no key configured) --> | |
| <div class="step hidden" id="apiKeySetup" style="border-color:var(--yellow)"> | |
| <div class="step-header"> | |
| <div class="step-num" style="border-color:var(--yellow);color:var(--yellow)">!</div> | |
| <div class="step-title">Connect your LLM</div> | |
| </div> | |
| <p class="step-desc">SGO works with any OpenAI-compatible API. Your key stays in your browser and is sent to the server only via encrypted headers — never logged, stored, or visible in URLs.</p> | |
| <div class="field"> | |
| <label>API key</label> | |
| <input type="password" id="apiKeyInput" placeholder="sk-..."> | |
| </div> | |
| <div class="field"> | |
| <label>Base URL</label> | |
| <input type="text" id="apiBaseUrl" placeholder="e.g. https://openrouter.ai/api/v1 or https://api.openai.com/v1"> | |
| </div> | |
| <div class="field"> | |
| <label>Model</label> | |
| <input type="text" id="apiModel" placeholder="e.g. gpt-4o-mini or anthropic/claude-sonnet-4" value="openai/gpt-4o-mini"> | |
| </div> | |
| <button onclick="saveApiKey()">Connect</button> | |
| </div> | |
| <!-- Nemotron setup (shown if not available) --> | |
| <div class="step hidden" id="nemotronSetup" style="border-color:var(--yellow)"> | |
| <div class="step-header"> | |
| <div class="step-num" style="border-color:var(--yellow);color:var(--yellow)">!</div> | |
| <div class="step-title">Panel data needed</div> | |
| </div> | |
| <p class="step-desc">SGO panels are built from real census-grounded personas. Pick a country to load — this is a one-time setup.</p> | |
| <div style="display:flex;gap:12px;align-items:flex-end;flex-wrap:wrap"> | |
| <div class="field" style="flex:1;min-width:120px"> | |
| <label>Country</label> | |
| <select id="nemotronDataset" style="padding:10px 12px"> | |
| <option value="USA">USA (6M personas)</option> | |
| <option value="Japan">Japan (6M)</option> | |
| <option value="India">India (21M)</option> | |
| <option value="Singapore">Singapore (888K)</option> | |
| <option value="Brazil">Brazil (6M)</option> | |
| <option value="France">France (6M)</option> | |
| </select> | |
| </div> | |
| <div class="field" id="nemotronPathField" style="flex:2;min-width:200px"> | |
| <label>Folder</label> | |
| <input type="text" id="nemotronPath" placeholder="e.g. data/nemotron"> | |
| </div> | |
| </div> | |
| <div class="btn-row"> | |
| <button onclick="setupNemotron()">Load personas</button> | |
| </div> | |
| <div id="nemotronStatus" class="hidden mt-16"> | |
| <div class="progress-text" id="nemotronStatusText"></div> | |
| <div class="progress-bar"><div class="progress-fill" id="nemotronProgressBar" style="width:0%"></div></div> | |
| </div> | |
| </div> | |
| <!-- STEP 1: Entity + Evaluate (one click) --> | |
| <div class="step active" id="step1"> | |
| <div class="step-header"> | |
| <div class="step-num">1</div> | |
| <div class="step-title">Build your panel review</div> | |
| </div> | |
| <p class="step-desc">Paste what you want reviewed, set your goal and audience, and let the panel react.</p> | |
| <div class="template-chips"> | |
| <span class="template-chip" onclick="loadTemplate('product')">Product</span> | |
| <span class="template-chip" onclick="loadTemplate('resume')">Resume</span> | |
| <span class="template-chip" onclick="loadTemplate('pitch')">Pitch</span> | |
| <span class="template-chip" onclick="loadTemplate('policy')">Policy</span> | |
| <span class="template-chip" onclick="loadTemplate('listing')">Listing</span> | |
| <span class="template-chip" onclick="loadTemplate('blog')">Blog Post</span> | |
| </div> | |
| <div class="field"> | |
| <label>What should the panel review?</label> | |
| <textarea id="entityText" placeholder="Paste the text someone would actually see: landing page, resume, pitch, profile, policy..."></textarea> | |
| </div> | |
| <div class="field"> | |
| <div style="display:flex;justify-content:space-between;align-items:center"> | |
| <label>What outcome are you optimizing for?</label> | |
| <button class="secondary" style="padding:4px 12px;font-size:0.75rem" onclick="inferSpec()">Auto-fill</button> | |
| </div> | |
| <input type="text" id="goalText" placeholder="e.g. 'Increase conversion rate' or 'Get more interview callbacks'"> | |
| </div> | |
| <div class="field"> | |
| <div style="display:flex;justify-content:space-between;align-items:center"> | |
| <label>Who should be on the panel?</label> | |
| <button class="secondary" style="padding:4px 12px;font-size:0.75rem" onclick="inferSpec()">Auto-fill</button> | |
| </div> | |
| <input type="text" id="cohortDesc" placeholder="e.g. 'Engineering managers at mid-stage startups' or 'US consumers aged 25-45'"> | |
| </div> | |
| <div class="field" id="metricAnchorField"> | |
| <label>Know the current performance?</label> | |
| <div style="display:flex;gap:8px;align-items:center;flex-wrap:wrap"> | |
| <select id="calMetricName" style="width:auto;min-width:100px;padding:8px 10px"> | |
| <option value="CTR">CTR</option> | |
| <option value="conversion rate">Conversion rate</option> | |
| <option value="open rate">Open rate</option> | |
| <option value="revenue">Revenue</option> | |
| <option value="">Custom...</option> | |
| </select> | |
| <input type="text" id="calMetricNameCustom" class="hidden" placeholder="Metric name" | |
| style="width:120px;padding:8px 10px"> | |
| <input type="number" id="calMetricValue" step="any" placeholder="e.g. 2.1" | |
| style="width:100px;padding:8px 10px"> | |
| <input type="text" id="calMetricUnit" value="%" style="width:50px;padding:8px 10px;text-align:center"> | |
| <div id="calStatus" class="hidden" style="font-size:0.85rem;margin-left:4px"></div> | |
| </div> | |
| <p style="font-size:0.75rem;color:var(--text2);margin-top:4px"> | |
| Optional — if set, SGO translates score changes into predicted metric changes. | |
| </p> | |
| </div> | |
| <details class="mb-8"> | |
| <summary style="cursor:pointer;color:var(--text2);font-size:0.85rem">Panel settings</summary> | |
| <div style="padding:12px 0"> | |
| <div class="field"> | |
| <label>Panel size (more people = steadier results)</label> | |
| <input type="number" id="panelSize" value="30" min="5" max="80" | |
| style="width:80px;padding:6px;text-align:center"> | |
| </div> | |
| <label style="display:inline-flex;align-items:center;gap:8px;margin:0;font-size:0.85rem;cursor:pointer"> | |
| <input type="checkbox" id="biasCalibration" checked style="width:auto;margin:0"> | |
| Reduce framing and authority effects | |
| </label> | |
| </div> | |
| </details> | |
| <button onclick="runFullPipeline()" id="goBtn">Build panel and review</button> | |
| <div id="pipelineProgress" class="hidden mt-16"> | |
| <div class="progress-text" id="pipelineProgressText">Starting...</div> | |
| <div class="progress-bar"><div class="progress-fill" id="pipelineProgressBar"></div></div> | |
| <div class="eval-log" id="evalLog"></div> | |
| </div> | |
| <div id="evalResults" class="hidden mt-16"> | |
| <div class="score-big" id="avgScore">0<span>/10</span></div> | |
| <div class="stats-row"> | |
| <div class="stat pos"><div class="stat-val" id="posCount">0</div><div class="stat-label">would say yes</div></div> | |
| <div class="stat neu"><div class="stat-val" id="neuCount">0</div><div class="stat-label">unsure</div></div> | |
| <div class="stat neg"><div class="stat-val" id="negCount">0</div><div class="stat-label">would say no</div></div> | |
| </div> | |
| <details> | |
| <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full analysis</summary> | |
| <div class="results-details" id="evalAnalysis"></div> | |
| </details> | |
| <div class="btn-row mt-16"> | |
| <button onclick="runDirections()">Test what to change next</button> | |
| <button class="secondary" onclick="goToStep(3)">Check panel realism</button> | |
| <button class="btn-download" onclick="downloadReport()">⤓ Download report</button> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- STEP 2: Directions --> | |
| <div class="step hidden" id="step2"> | |
| <div class="step-header"> | |
| <div class="step-num">2</div> | |
| <div class="step-title">Test what to change next</div> | |
| </div> | |
| <p class="step-desc">Testing changes with people who are <em>on the fence</em> — the ones who could go either way.</p> | |
| <div id="cfProgress" class="mt-16"> | |
| <div class="progress-text" id="cfProgressText">Analyzing concerns...</div> | |
| <div class="progress-bar"><div class="progress-fill" id="cfProgressBar"></div></div> | |
| <div class="eval-log" id="cfLog"></div> | |
| </div> | |
| <div id="cfResults" class="hidden mt-16"> | |
| <h3 style="margin-bottom:12px">Priority Actions</h3> | |
| <table class="gradient-table" id="gradientTable"> | |
| <thead> | |
| <tr><th>#</th><th>Change</th><th>Avg Impact</th><th>Range</th><th>Helps</th><th>Hurts</th></tr> | |
| </thead> | |
| <tbody></tbody> | |
| </table> | |
| <div id="gradientText" class="hidden"></div> | |
| <div id="changesTested" class="hidden"></div> | |
| <div class="btn-row mt-16"> | |
| <button class="btn-download" onclick="downloadReport()">⤓ Download full report</button> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- STEP 3: Bias Audit --> | |
| <div class="step hidden" id="step3"> | |
| <div class="step-header"> | |
| <div class="step-num">3</div> | |
| <div class="step-title">Check panel realism</div> | |
| </div> | |
| <p class="step-desc"> | |
| See whether your panel reacts more like real people or more like an LLM. | |
| Based on <a href="https://arxiv.org/abs/2509.13588" target="_blank" style="color:var(--accent2)">CoBRA (CHI'26)</a>. | |
| </p> | |
| <div class="field"> | |
| <label>Checks to run</label> | |
| <div style="display:flex;gap:16px;margin-bottom:12px"> | |
| <label style="display:flex;align-items:center;gap:6px;margin:0;font-size:0.85rem"> | |
| <input type="checkbox" id="probeFraming" checked> Does wording change the score? | |
| </label> | |
| <label style="display:flex;align-items:center;gap:6px;margin:0;font-size:0.85rem"> | |
| <input type="checkbox" id="probeAuthority" checked> Do trust signals change the score? | |
| </label> | |
| <label style="display:flex;align-items:center;gap:6px;margin:0;font-size:0.85rem"> | |
| <input type="checkbox" id="probeOrder" checked> Does section order change the score? | |
| </label> | |
| </div> | |
| </div> | |
| <div class="btn-row"> | |
| <button onclick="runBiasAudit()" id="auditBtn">Run checks</button> | |
| <div style="flex:1"></div> | |
| <label style="display:flex;align-items:center;gap:6px;margin:0"> | |
| <span style="font-size:0.8rem;color:var(--text2)">Sample size:</span> | |
| <input type="number" id="auditSample" value="10" min="1" max="50" | |
| style="width:60px;padding:6px;text-align:center"> | |
| </label> | |
| </div> | |
| <div id="auditProgress" class="hidden mt-16"> | |
| <div class="progress-text" id="auditProgressText">Running bias probes...</div> | |
| <div class="progress-bar"><div class="progress-fill" id="auditProgressBar"></div></div> | |
| </div> | |
| <div id="auditResults" class="hidden mt-16"> | |
| <h3 style="margin-bottom:12px">Panel Realism Check</h3> | |
| <table class="gradient-table" id="auditTable"> | |
| <thead> | |
| <tr><th>Check</th><th>Shifted %</th><th>Avg score change</th><th>Human baseline</th><th>Assessment</th></tr> | |
| </thead> | |
| <tbody></tbody> | |
| </table> | |
| <details class="mt-12"> | |
| <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full report</summary> | |
| <div class="results-details" id="auditReport"></div> | |
| </details> | |
| <div class="btn-row mt-16"> | |
| <button class="secondary" onclick="rerunWithCalibration()">Run again with realism tuning</button> | |
| <button class="secondary" onclick="goToStep(2)">Test what to change next</button> | |
| <button class="btn-download" onclick="downloadReport()">⤓ Download full report</button> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| const TEMPLATES = { | |
| product: `# Beacon — Infrastructure Monitoring for Growing Teams | |
| ## One-liner | |
| Beacon watches your servers, databases, and APIs so your on-call engineer doesn't have to stare at dashboards at 3am. | |
| ## Key features | |
| - Anomaly detection that learns your traffic patterns — no manual threshold tuning | |
| - Incident timelines that auto-correlate logs, metrics, and deploys | |
| - PagerDuty, Slack, and Opsgenie integrations out of the box | |
| - 5-minute setup: one-line agent install, auto-discovers services | |
| ## Pricing | |
| - Starter: $49/mo — 10 hosts, 7-day retention, email alerts | |
| - Team: $149/mo — 50 hosts, 30-day retention, all integrations | |
| - Enterprise: Custom — unlimited hosts, SSO, SLA, dedicated support | |
| ## Trust signals | |
| - Used by 820 engineering teams | |
| - 99.97% uptime over the past 12 months | |
| - SOC 2 Type II certified | |
| - Founded by ex-Datadog and ex-AWS engineers | |
| ## Target user | |
| Engineering teams of 5-50 who've outgrown free tools but don't need (or want to pay for) enterprise observability platforms. | |
| ## What's NOT included | |
| - No APM / distributed tracing (planned Q4) | |
| - No log management (we integrate with your existing log provider) | |
| - No free tier | |
| - No on-premise deployment option`, | |
| resume: `# Jordan Nakamura | |
| ## Target role | |
| Senior Product Manager — B2B SaaS, growth stage | |
| ## Summary | |
| Product manager with 7 years of experience shipping B2B tools that grow revenue. Led the payments platform at Brex from $2M to $18M ARR. I care about talking to users, shipping fast, and measuring what matters. | |
| ## Experience | |
| - **Brex** — Senior Product Manager (2021–2025) | |
| Owned the payments platform. Launched instant payouts (drove 34% activation lift), redesigned onboarding (cut time-to-value from 14 days to 3), and ran pricing experiments that increased ARPU 22%. | |
| - **Figma** — Product Manager (2019–2021) | |
| Shipped the plugin marketplace and developer API. Grew plugin ecosystem from 200 to 4,000+ plugins. Managed 2 engineers and 1 designer. | |
| - **Deloitte Digital** — Business Analyst (2017–2019) | |
| Client-facing consulting for Fortune 500 digital transformations. Led requirements gathering and stakeholder alignment for a $4M CRM migration. | |
| ## Education | |
| - MBA, Kellogg School of Management, 2021 | |
| - BS Computer Science, UC San Diego, 2017 | |
| ## Skills | |
| - Product strategy, A/B testing, SQL, user research, pricing | |
| - Tools: Amplitude, Mixpanel, Linear, Figma, dbt`, | |
| pitch: `# Archway — AI Compliance for Regulated Industries | |
| ## Problem | |
| Financial services firms spend $12B/year on compliance staff manually reviewing documents, flagging risks, and filing reports. A single missed filing costs $50K-$5M in fines. The work is repetitive, error-prone, and doesn't scale. | |
| ## Solution | |
| Archway uses LLMs fine-tuned on regulatory filings to automate compliance review. Upload a document, get a risk assessment in seconds with citations to specific regulations. Integrates with existing GRC platforms. | |
| ## Market | |
| - TAM: $28B (global RegTech) | |
| - SAM: $4.2B (US financial services compliance automation) | |
| - SOM: $180M (mid-market banks and fintech, our beachhead) | |
| ## Traction | |
| - $840K ARR, growing 28% month-over-month | |
| - 14 paying customers (3 banks, 11 fintech companies) | |
| - 92% gross retention, 118% net retention | |
| - Processing 45,000 documents/month | |
| ## Team | |
| - **Maya Torres** (CEO) — 10 years at Goldman Sachs compliance, built their internal ML screening tool | |
| - **Raj Patel** (CTO) — Former Palantir, led NLP team. Published 8 papers on document understanding | |
| - **Lisa Chang** (Head of Sales) — Scaled Plaid's financial services GTM from $5M to $40M | |
| ## Ask | |
| Raising $6M Series A to hire 5 engineers, 3 enterprise sales reps, and expand to insurance vertical.`, | |
| policy: `# Proposed Remote Work Policy — Meridian Technologies | |
| ## Summary | |
| Effective Q3, Meridian Technologies will transition from a hybrid (3 days in-office) model to a flexible-first policy where employees choose their work location, with 4 required in-person days per month for team collaboration. | |
| ## Key Changes | |
| - No default in-office days. Employees choose where to work daily. | |
| - 4 "collaboration days" per month — the team picks the dates together | |
| - $1,500/year home office stipend for all remote-eligible employees | |
| - Core hours: 10am-3pm local time (for meetings and availability) | |
| - Managers cannot require in-office presence beyond the 4 collaboration days | |
| ## Who This Affects | |
| - All full-time employees in engineering, product, design, marketing, and operations (approx. 340 people) | |
| - Customer support and facilities teams remain on existing schedules | |
| - New hires in their first 90 days will have 8 collaboration days/month | |
| ## What We Expect | |
| - Maintained or improved sprint velocity and shipping cadence | |
| - Employee satisfaction scores above 7.5/10 on quarterly surveys | |
| - No degradation in cross-team collaboration metrics | |
| ## What We're NOT Changing | |
| - Compensation bands remain location-based | |
| - Performance review criteria unchanged | |
| - Existing PTO and benefits policies unchanged`, | |
| listing: `# Sunny 2BR Condo in Logan Square — $1,850/mo | |
| Prime location on Milwaukee Ave, 2 blocks from the Blue Line. This updated 2-bedroom, 1-bath unit features hardwood floors throughout, in-unit washer/dryer, and a private balcony overlooking a tree-lined street. | |
| ## Unit Details | |
| - 2 bedrooms, 1 bathroom, ~950 sq ft | |
| - 3rd floor of a walk-up (no elevator) | |
| - In-unit washer/dryer | |
| - Central AC and gas heat | |
| - Updated kitchen with dishwasher, gas stove, granite counters | |
| - Private south-facing balcony | |
| - One parking spot included ($75 value) | |
| ## Building & Neighborhood | |
| - 6-unit building, quiet and well-maintained | |
| - 2-minute walk to Logan Square Blue Line | |
| - Walkable to Longman & Eagle, Lost Lake, Gaslight Coffee | |
| - Whole Foods 4 blocks north | |
| ## Lease Terms | |
| - 12-month lease, available August 1 | |
| - $1,850/month, $1,850 security deposit | |
| - Pets allowed (dogs under 40 lbs, $250 pet deposit) | |
| - Credit check and proof of income required (2.5x rent) | |
| ## What's NOT included | |
| - Utilities (electric, gas, internet) — tenant responsibility | |
| - Storage unit — available for $50/mo | |
| - No doorman or package room`, | |
| blog: `# Why We Stopped Doing Sprint Planning | |
| After 3 years of two-week sprints, our team of 8 engineers quietly stopped doing sprint planning. Here's what happened and what we do instead. | |
| ## The Problem | |
| Sprint planning took 2 hours every other Monday. We'd estimate stories, negotiate scope, and commit to a sprint goal. Then reality would hit: a production incident on Tuesday, a customer escalation on Wednesday, a "quick favor" from the CEO on Thursday. | |
| By Friday, the sprint backlog was fiction. We'd hit maybe 60% of our committed stories, feel bad about it, and repeat. | |
| The problem wasn't discipline. It was that our planning horizon didn't match our reality. Two weeks is too long when your priorities shift daily. | |
| ## What We Do Instead | |
| **Daily priorities, weekly themes.** Each Monday, the team picks a theme for the week — usually one large initiative or problem area. Each morning in standup, people pull from a prioritized backlog. No commitments, no estimates, no sprint goals. | |
| **Ship when ready.** We deploy 3-4 times per day. There's no "end of sprint" release. Features go out when they're done, behind feature flags. | |
| **Weekly retro, not sprint review.** Every Friday we ask: what shipped this week? What blocked us? What should we do differently? No velocity charts. No burndown. | |
| ## Results After 6 Months | |
| - Deploy frequency: 2x/sprint → 15x/week | |
| - Engineer satisfaction (internal survey): 6.2 → 8.1 | |
| - Customer-reported bugs: down 40% | |
| - Time spent in planning meetings: 4 hrs/month → 30 min/month | |
| ## The Catch | |
| This only works because we have a strong product manager who keeps the backlog prioritized and a team that communicates well async. If your team struggles with alignment, removing sprint structure might make things worse, not better.` | |
| }; | |
| let sessionId = null; | |
| let evalResultsData = null; | |
| let lastGradientData = null; | |
| // LLM credentials — stored only in browser JS memory, never persisted | |
| let llmApiKey = ''; | |
| let llmBaseUrl = ''; | |
| let llmModel = ''; | |
| function llmHeaders() { | |
| // Send credentials via custom header (not Authorization — HF proxy intercepts that) | |
| const h = {'Content-Type': 'application/json'}; | |
| if (llmApiKey) { | |
| h['X-LLM-Key'] = llmApiKey; | |
| if (llmBaseUrl) h['X-LLM-Base'] = llmBaseUrl; | |
| if (llmModel) h['X-LLM-Model'] = llmModel; | |
| } | |
| return h; | |
| } | |
| function apiUrl(path) { | |
| // No credentials in URLs — they get logged | |
| return path; | |
| } | |
| // XSS sanitization helper | |
| function esc(str) { | |
| if (str == null) return ''; | |
| return String(str) | |
| .replace(/&/g, '&') | |
| .replace(/</g, '<') | |
| .replace(/>/g, '>') | |
| .replace(/"/g, '"') | |
| .replace(/'/g, '''); | |
| } | |
| // ── Init ── | |
| async function init() { | |
| const resp = await fetch('/api/config'); | |
| const cfg = await resp.json(); | |
| const badge = document.getElementById('configBadge'); | |
| if (cfg.has_api_key) { | |
| badge.textContent = cfg.model; | |
| badge.className = 'config-badge ok'; | |
| badge.style.display = ''; | |
| } else if (!cfg.is_spaces) { | |
| badge.textContent = 'No API key'; | |
| badge.className = 'config-badge warn'; | |
| badge.style.display = ''; | |
| document.getElementById('apiKeySetup').classList.remove('hidden'); | |
| } | |
| const nemBadge = document.getElementById('nemotronBadge'); | |
| if (cfg.nemotron_available) { | |
| nemBadge.textContent = 'Nemotron 1M'; | |
| nemBadge.className = 'config-badge ok'; | |
| nemBadge.style.display = ''; | |
| } else if (!cfg.is_spaces) { | |
| nemBadge.textContent = 'No panel data'; | |
| nemBadge.className = 'config-badge warn'; | |
| nemBadge.style.display = ''; | |
| document.getElementById('nemotronSetup').classList.remove('hidden'); | |
| document.getElementById('nemotronPath').value = 'data/nemotron'; | |
| } else { | |
| // On Spaces without data — show setup but hide folder field | |
| document.getElementById('nemotronSetup').classList.remove('hidden'); | |
| document.getElementById('nemotronPathField').classList.add('hidden'); | |
| } | |
| // Changes are auto-generated from evaluation concerns | |
| } | |
| function saveApiKey() { | |
| const key = document.getElementById('apiKeyInput').value.trim(); | |
| if (!key) return alert('Enter your API key.'); | |
| llmApiKey = key; | |
| llmBaseUrl = document.getElementById('apiBaseUrl').value.trim(); | |
| llmModel = document.getElementById('apiModel').value.trim() || 'openai/gpt-4o-mini'; | |
| document.getElementById('configBadge').textContent = llmModel; | |
| document.getElementById('configBadge').className = 'config-badge ok'; | |
| document.getElementById('apiKeySetup').classList.add('hidden'); | |
| } | |
| async function setupNemotron() { | |
| const path = document.getElementById('nemotronPath').value.trim() || 'data/nemotron'; | |
| const dataset = document.getElementById('nemotronDataset').value; | |
| const btn = document.querySelector('#nemotronSetup button'); | |
| btn.disabled = true; | |
| btn.textContent = 'Loading...'; | |
| const status = document.getElementById('nemotronStatus'); | |
| const text = document.getElementById('nemotronStatusText'); | |
| const bar = document.getElementById('nemotronProgressBar'); | |
| status.classList.remove('hidden'); | |
| text.textContent = `Loading ${dataset} personas — this may take 2-5 minutes on first run...`; | |
| text.style.color = ''; | |
| // Animate progress bar to show activity | |
| bar.style.width = '10%'; | |
| const progressInterval = setInterval(() => { | |
| const current = parseFloat(bar.style.width); | |
| if (current < 85) bar.style.width = (current + 2) + '%'; | |
| }, 3000); | |
| try { | |
| const resp = await fetch('/api/nemotron/setup', { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({path, dataset}), | |
| }); | |
| clearInterval(progressInterval); | |
| const data = await resp.json(); | |
| if (!resp.ok) throw new Error(data.detail || 'Failed'); | |
| bar.style.width = '100%'; | |
| text.textContent = `${data.count.toLocaleString()} ${dataset} personas ready`; | |
| text.style.color = 'var(--green)'; | |
| const nemBadge = document.getElementById('nemotronBadge'); | |
| nemBadge.textContent = `${dataset} personas`; | |
| nemBadge.className = 'config-badge ok'; | |
| setTimeout(() => { | |
| document.getElementById('nemotronSetup').classList.add('hidden'); | |
| }, 2000); | |
| } catch (e) { | |
| clearInterval(progressInterval); | |
| bar.style.width = '100%'; | |
| bar.style.background = 'var(--red)'; | |
| text.textContent = `Failed to load personas: ${e.message}`; | |
| text.style.color = 'var(--red)'; | |
| btn.disabled = false; | |
| btn.textContent = 'Load personas'; | |
| } | |
| } | |
| // ── Templates ── | |
| function loadTemplate(name) { | |
| document.getElementById('entityText').value = TEMPLATES[name] || ''; | |
| } | |
| // ── Step navigation ── | |
| function goToStep(n) { | |
| for (let i = 1; i <= 3; i++) { | |
| const el = document.getElementById(`step${i}`); | |
| if (!el) continue; | |
| if (i < n) { | |
| el.classList.remove('hidden', 'active'); | |
| el.classList.add('done'); | |
| } else if (i === n) { | |
| el.classList.remove('hidden', 'done'); | |
| el.classList.add('active'); | |
| } else { | |
| el.classList.add('hidden'); | |
| el.classList.remove('active', 'done'); | |
| } | |
| } | |
| } | |
| // ── Logging helper ── | |
| function logStep(msg, cls = '') { | |
| const log = document.getElementById('evalLog'); | |
| log.innerHTML += `<div class="${esc(cls)}">${esc(msg)}</div>`; | |
| log.scrollTop = log.scrollHeight; | |
| } | |
| // ── Step 1: Full pipeline (one click) ── | |
| async function inferSpec() { | |
| const text = document.getElementById('entityText').value.trim(); | |
| if (!text) return alert('Paste something to review first.'); | |
| const goalField = document.getElementById('goalText'); | |
| const audienceField = document.getElementById('cohortDesc'); | |
| if (goalField.value.trim() && audienceField.value.trim()) return; | |
| // Show loading state with elapsed timer | |
| const btns = document.querySelectorAll('#step1 button.secondary'); | |
| btns.forEach(b => { b.disabled = true; b.dataset.origText = b.textContent; }); | |
| let elapsed = 0; | |
| const tick = () => { elapsed++; btns.forEach(b => { b.textContent = `Thinking (${elapsed}s)`; }); }; | |
| tick(); | |
| const timer = setInterval(tick, 1000); | |
| if (!goalField.value.trim()) goalField.placeholder = 'Thinking...'; | |
| if (!audienceField.value.trim()) audienceField.placeholder = 'Thinking...'; | |
| try { | |
| const resp = await fetch(apiUrl('/api/infer-spec'), { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({entity_text: text}), | |
| }); | |
| const data = await resp.json(); | |
| if (!goalField.value.trim() && data.goal) goalField.value = data.goal; | |
| if (!audienceField.value.trim() && data.audience) audienceField.value = data.audience; | |
| } catch (e) { | |
| // Restore placeholders on failure | |
| } finally { | |
| clearInterval(timer); | |
| btns.forEach(b => { b.disabled = false; b.textContent = b.dataset.origText || 'Auto-fill'; }); | |
| if (!goalField.value.trim()) goalField.placeholder = "e.g. 'Increase conversion rate' or 'Get more interview callbacks'"; | |
| if (!audienceField.value.trim()) audienceField.placeholder = "e.g. 'Engineering managers at mid-stage startups' or 'US consumers aged 25-45'"; | |
| } | |
| } | |
| async function runFullPipeline() { | |
| const text = document.getElementById('entityText').value.trim(); | |
| if (!text) return alert('Paste something for the panel to review first.'); | |
| const btn = document.getElementById('goBtn'); | |
| btn.disabled = true; | |
| const progress = document.getElementById('pipelineProgress'); | |
| progress.classList.remove('hidden'); | |
| document.getElementById('evalResults').classList.add('hidden'); | |
| document.getElementById('evalLog').innerHTML = ''; | |
| document.getElementById('pipelineProgressBar').style.width = '5%'; | |
| // Auto-infer goal + audience if not provided | |
| const goalField = document.getElementById('goalText'); | |
| const audienceField = document.getElementById('cohortDesc'); | |
| if (!goalField.value.trim() || !audienceField.value.trim()) { | |
| document.getElementById('pipelineProgressText').textContent = 'Understanding your goal and audience...'; | |
| logStep('Understanding what you want to optimize and who should judge it...'); | |
| await inferSpec(); | |
| if (goalField.value) logStep(`Goal: ${goalField.value}`, 'pos'); | |
| if (audienceField.value) logStep(`Audience: ${audienceField.value}`, 'pos'); | |
| } | |
| const biasCal = document.getElementById('biasCalibration').checked; | |
| const panelSize = parseInt(document.getElementById('panelSize').value) || 30; | |
| const audienceCtx = audienceField.value.trim(); | |
| try { | |
| // Phase 1: Create session | |
| document.getElementById('pipelineProgressText').textContent = 'Setting up...'; | |
| const sessResp = await fetch('/api/session', { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({entity_text: text}), | |
| }); | |
| const sessData = await sessResp.json(); | |
| sessionId = sessData.session_id; | |
| // Store goal/audience in session for report generation | |
| fetch(`/api/session/${sessionId}`, { | |
| method: 'PATCH', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({goal: goalField.value.trim(), audience: audienceCtx}), | |
| }).catch(() => {}); | |
| // Start elapsed timer | |
| const startTime = Date.now(); | |
| const timerInterval = setInterval(() => { | |
| const elapsed = Math.round((Date.now() - startTime) / 1000); | |
| const current = document.getElementById('pipelineProgressText').textContent; | |
| const base = current.replace(/ \(\d+s\)$/, ''); | |
| document.getElementById('pipelineProgressText').textContent = `${base} (${elapsed}s)`; | |
| }, 1000); | |
| document.getElementById('pipelineProgressBar').classList.add('pulsing'); | |
| // Phase 2: Suggest segments | |
| document.getElementById('pipelineProgressText').textContent = 'Choosing panel segments...'; | |
| document.getElementById('pipelineProgressBar').style.width = '10%'; | |
| logStep('Asking LLM to choose panel segments...'); | |
| const segResp = await fetch(apiUrl('/api/suggest-segments'), { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({ | |
| entity_text: text, | |
| audience_context: audienceCtx || `People who would evaluate: ${text.substring(0, 200)}`, | |
| }), | |
| }); | |
| if (!segResp.ok) { | |
| const err = await segResp.json().catch(() => ({})); | |
| throw new Error(`Failed to choose segments: ${err.detail || segResp.status}`); | |
| } | |
| const segData = await segResp.json(); | |
| const segments = segData.segments || []; | |
| // Scale segment counts to match requested panel size | |
| const totalSuggested = segments.reduce((a, s) => a + (s.count || 8), 0); | |
| const scale = panelSize / (totalSuggested || 1); | |
| segments.forEach(s => { s.count = Math.max(2, Math.round((s.count || 8) * scale)); }); | |
| segments.forEach(s => logStep(` Added segment: ${s.label} (${s.count} people)`)); | |
| logStep(`${segments.length} segments chosen`, 'pos'); | |
| document.getElementById('pipelineProgressBar').style.width = '20%'; | |
| // Phase 3: Generate cohort | |
| document.getElementById('pipelineProgressText').textContent = 'Building your panel (this takes 30-60s)...'; | |
| logStep('Building panel members for each segment...'); | |
| const desc = audienceCtx || `People evaluating: ${text.substring(0, 200)}`; | |
| const cohortResp = await fetch(apiUrl('/api/cohort/generate'), { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({description: desc, audience_context: audienceCtx, segments, parallel: 3}), | |
| }); | |
| if (!cohortResp.ok) { | |
| const err = await cohortResp.json().catch(() => ({})); | |
| throw new Error(`Failed to build panel: ${err.detail || cohortResp.status}`); | |
| } | |
| const cohortData = await cohortResp.json(); | |
| // Upload cohort to our session | |
| await fetch(`/api/cohort/upload/${sessionId}`, { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify(cohortData.cohort), | |
| }); | |
| const src = cohortData.source === 'nemotron' ? 'census-grounded (Nemotron)' : 'LLM-generated'; | |
| if (cohortData.filters && Object.keys(cohortData.filters).length > 0) { | |
| const f = cohortData.filters; | |
| const parts = []; | |
| if (f.sex) parts.push(f.sex); | |
| if (f.age_min || f.age_max) parts.push(`age ${f.age_min||'?'}-${f.age_max||'?'}`); | |
| if (f.city) parts.push(f.city); | |
| if (f.state) parts.push(f.state); | |
| if (f.occupation) parts.push(f.occupation); | |
| logStep(`Filtered: ${parts.join(', ')}`, 'pos'); | |
| } | |
| logStep(`${cohortData.cohort_size} panel members ready — ${src}`, 'pos'); | |
| if (panelSize < 40) { | |
| logStep(`Tip: a panel of 40-60 gives more reliable results across segments`, 'neu'); | |
| } | |
| document.getElementById('pipelineProgressBar').style.width = '35%'; | |
| // Phase 4: Evaluate via SSE | |
| document.getElementById('pipelineProgressText').textContent = 'Evaluating...'; | |
| logStep('Running the panel review — each member scores what you wrote...'); | |
| await new Promise((resolve, reject) => { | |
| const params = new URLSearchParams({parallel: 5, bias_calibration: biasCal}); | |
| const es = new EventSource(`/api/evaluate/stream/${sessionId}?${params}`); | |
| es.addEventListener('start', (e) => { | |
| const d = JSON.parse(e.data); | |
| const calLabel = d.bias_calibration ? ' (bias-calibrated)' : ''; | |
| document.getElementById('pipelineProgressText').textContent = | |
| `Evaluating ${d.total} personas${calLabel}...`; | |
| }); | |
| es.addEventListener('progress', (e) => { | |
| const d = JSON.parse(e.data); | |
| const base = 35; | |
| const pct = base + Math.round(d.done / d.total * (100 - base)); | |
| document.getElementById('pipelineProgressBar').style.width = pct + '%'; | |
| document.getElementById('pipelineProgressText').textContent = | |
| `${d.done}/${d.total} evaluated`; | |
| const cls = d.error ? 'err' : d.action === 'positive' ? 'pos' : d.action === 'negative' ? 'neg' : 'neu'; | |
| const icon = d.error ? 'ERR' : d.action === 'positive' ? '+' : d.action === 'negative' ? '-' : '~'; | |
| const score = d.score != null ? `${d.score}/10` : ''; | |
| logStep(`[${d.done}/${d.total}] ${d.name}: ${icon} ${score}`, cls); | |
| }); | |
| es.addEventListener('complete', (e) => { | |
| es.close(); | |
| const d = JSON.parse(e.data); | |
| evalResultsData = d.results; | |
| document.getElementById('pipelineProgressBar').style.width = '100%'; | |
| document.getElementById('pipelineProgressText').textContent = `Done in ${d.elapsed}s`; | |
| document.getElementById('avgScore').innerHTML = `${d.avg_score}<span>/10</span>`; | |
| document.getElementById('posCount').textContent = d.positive; | |
| document.getElementById('neuCount').textContent = d.neutral; | |
| document.getElementById('negCount').textContent = d.negative; | |
| document.getElementById('evalAnalysis').textContent = d.analysis; | |
| document.getElementById('evalResults').classList.remove('hidden'); | |
| // Auto-apply calibration if user entered a metric value before running eval | |
| const calVal = parseFloat(document.getElementById('calMetricValue').value); | |
| if (calVal > 0) applyCalibration(); | |
| resolve(); | |
| }); | |
| es.onerror = () => { es.close(); reject(new Error('Connection lost — you can try again without losing your draft')); }; | |
| }); | |
| } catch (e) { | |
| document.getElementById('pipelineProgressText').textContent = `Error: ${e.message}`; | |
| logStep(`Error: ${e.message}`, 'err'); | |
| } finally { | |
| btn.disabled = false; | |
| clearInterval(timerInterval); | |
| document.getElementById('pipelineProgressBar').classList.remove('pulsing'); | |
| } | |
| } | |
| function rerunWithCalibration() { | |
| document.getElementById('biasCalibration').checked = true; | |
| goToStep(1); | |
| runFullPipeline(); | |
| } | |
| // ── Step 2: Directions (auto-flow) ── | |
| let suggestedChanges = []; | |
| async function runDirections() { | |
| if (!sessionId || !evalResultsData) return alert('Run evaluation first.'); | |
| goToStep(2); | |
| document.getElementById('cfResults').classList.add('hidden'); | |
| document.getElementById('cfLog').innerHTML = ''; | |
| document.getElementById('cfProgressBar').style.width = '5%'; | |
| const entityText = document.getElementById('entityText').value.trim(); | |
| const goal = document.getElementById('goalText').value.trim(); | |
| try { | |
| // Phase 1: Extract concerns from persuadable middle | |
| document.getElementById('cfProgressText').textContent = 'Looking at what\u2019s holding back the on-the-fence group...'; | |
| const persuadable = evalResultsData.filter(r => r && r.score >= 4 && r.score <= 7); | |
| const concerns = []; | |
| persuadable.forEach(r => { | |
| (r.concerns || []).forEach(c => { | |
| if (!concerns.includes(c)) concerns.push(c); | |
| }); | |
| }); | |
| const log = document.getElementById('cfLog'); | |
| concerns.slice(0, 8).forEach(c => { | |
| log.innerHTML += `<div style="color:var(--text2)">Concern: ${esc(c)}</div>`; | |
| }); | |
| log.innerHTML += `<div>${concerns.length} concerns from ${persuadable.length} on-the-fence panel members</div>`; | |
| document.getElementById('cfProgressBar').style.width = '15%'; | |
| // Phase 2: LLM generates candidate changes from concerns | |
| document.getElementById('cfProgressText').textContent = 'Proposing changes to test...'; | |
| const suggestResp = await fetch(apiUrl('/api/suggest-changes'), { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({entity_text: entityText, goal, concerns}), | |
| }); | |
| const suggestData = await suggestResp.json(); | |
| suggestedChanges = suggestData.changes || []; | |
| suggestedChanges.forEach(c => { | |
| log.innerHTML += `<div class="pos">Change: ${esc(c.label)} — ${esc(c.description)}</div>`; | |
| }); | |
| log.scrollTop = log.scrollHeight; | |
| document.getElementById('cfProgressBar').style.width = '25%'; | |
| // Phase 3: Run counterfactual probes | |
| document.getElementById('cfProgressText').textContent = 'Testing changes against persuadable middle...'; | |
| // POST config first, get a ticket, then SSE with just the ticket | |
| const prepResp = await fetch(`/api/counterfactual/prepare/${sessionId}`, { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({ | |
| changes: suggestedChanges, | |
| goal: goal, | |
| min_score: 4, | |
| max_score: 7, | |
| parallel: 5, | |
| }), | |
| }); | |
| const {ticket} = await prepResp.json(); | |
| await new Promise((resolve, reject) => { | |
| const es = new EventSource(`/api/counterfactual/stream/${sessionId}?ticket=${ticket}`); | |
| es.addEventListener('start', (e) => { | |
| const d = JSON.parse(e.data); | |
| const goalLabel = d.goal ? ` toward "${d.goal}"` : ''; | |
| document.getElementById('cfProgressText').textContent = | |
| `Probing ${d.total} evaluators across ${d.changes} changes${goalLabel}...`; | |
| }); | |
| es.addEventListener('goal_weights', (e) => { | |
| const d = JSON.parse(e.data); | |
| document.getElementById('cfProgressText').textContent = d.message; | |
| log.innerHTML += `<div>${esc(d.message)}</div>`; | |
| log.scrollTop = log.scrollHeight; | |
| }); | |
| es.addEventListener('progress', (e) => { | |
| const d = JSON.parse(e.data); | |
| const pct = 25 + Math.round(d.done / d.total * 75); | |
| document.getElementById('cfProgressBar').style.width = pct + '%'; | |
| document.getElementById('cfProgressText').textContent = `${d.done}/${d.total} probed`; | |
| const delta = d.best_delta > 0 ? `+${d.best_delta}` : d.best_delta; | |
| const changeName = (suggestedChanges.find(c => c.id === d.best_change) || {}).label || d.best_change; | |
| log.innerHTML += `<div>${esc(d.name)} (orig ${d.original_score}): best ${delta} from "${esc(changeName)}"</div>`; | |
| log.scrollTop = log.scrollHeight; | |
| }); | |
| es.addEventListener('complete', (e) => { | |
| es.close(); | |
| const d = JSON.parse(e.data); | |
| document.getElementById('cfProgressBar').style.width = '100%'; | |
| document.getElementById('cfProgressText').textContent = d.elapsed ? `Done in ${d.elapsed}s` : 'Done'; | |
| if (d.error) { | |
| document.getElementById('cfProgressText').textContent = d.error; | |
| reject(new Error(d.error)); | |
| return; | |
| } | |
| if (d.calibration) currentCalibration = d.calibration; | |
| lastGradientData = {results: d.results, changes: suggestedChanges, ranked: d.ranked}; | |
| renderGradientTable(d.results, suggestedChanges, d.ranked, d.calibrated); | |
| document.getElementById('gradientText').textContent = d.gradient; | |
| document.getElementById('changesTested').textContent = | |
| suggestedChanges.map(c => `${c.label}: ${c.description}`).join('\n'); | |
| document.getElementById('cfResults').classList.remove('hidden'); | |
| resolve(); | |
| }); | |
| es.onerror = () => { es.close(); reject(new Error('Connection lost — you can try again without losing your draft')); }; | |
| }); | |
| } catch (e) { | |
| document.getElementById('cfProgressText').textContent = `Error: ${e.message}`; | |
| } | |
| } | |
| function renderGradientTable(results, changes, ranked, calibrated) { | |
| // Use backend-provided ranked data (respects goal weights / VJP) when available, | |
| // falling back to client-side aggregation only for legacy responses. | |
| if (!ranked || !ranked.length) { | |
| // Legacy fallback: recompute from raw results (unweighted) | |
| const valid = results.filter(r => r && r.counterfactuals); | |
| const labels = {}; | |
| const descs = {}; | |
| changes.forEach(c => { labels[c.id] = c.label; descs[c.id] = c.description; }); | |
| const byChange = {}; | |
| valid.forEach(r => { | |
| const ev = r._evaluator || {}; | |
| (r.counterfactuals || []).forEach(cf => { | |
| const cid = cf.change_id; | |
| if (!byChange[cid]) byChange[cid] = {deltas: [], pos: 0, neg: 0, details: []}; | |
| const delta = cf.delta || 0; | |
| byChange[cid].deltas.push(delta); | |
| if (delta > 0) byChange[cid].pos++; | |
| if (delta < 0) byChange[cid].neg++; | |
| byChange[cid].details.push({ | |
| name: ev.name || '?', age: ev.age || '', | |
| occupation: ev.occupation || '', delta, reasoning: cf.reasoning || '', | |
| }); | |
| }); | |
| }); | |
| ranked = Object.entries(byChange).map(([cid, d]) => { | |
| const avg = d.deltas.reduce((a, b) => a + b, 0) / d.deltas.length; | |
| d.details.sort((a, b) => b.delta - a.delta); | |
| return { | |
| id: cid, label: labels[cid] || cid, desc: descs[cid] || '', | |
| avg_delta: avg, min_delta: Math.min(...d.deltas), max_delta: Math.max(...d.deltas), | |
| positive: d.pos, negative: d.neg, details: d.details, | |
| }; | |
| }); | |
| ranked.sort((a, b) => b.avg_delta - a.avg_delta); | |
| } else { | |
| // Attach descriptions from changes list | |
| const descs = {}; | |
| changes.forEach(c => { descs[c.id] = c.description; }); | |
| ranked.forEach(r => { if (!r.desc) r.desc = descs[r.id] || ''; }); | |
| } | |
| // Build calibration lookup if available | |
| const calLookup = {}; | |
| const hasCal = calibrated && calibrated.items && calibrated.items.length > 0; | |
| if (hasCal) { | |
| calibrated.items.forEach(item => { calLookup[item.id] = item; }); | |
| } | |
| // Update table header | |
| const thead = document.querySelector('#gradientTable thead tr'); | |
| const mn = (currentCalibration && currentCalibration.metric_name) || 'Metric'; | |
| thead.innerHTML = hasCal | |
| ? `<th>#</th><th>Change</th><th>Score</th><th>${esc(mn)} Impact</th><th>Predicted ${esc(mn)}</th><th>Helps</th><th>Hurts</th>` | |
| : '<th>#</th><th>Change</th><th>Avg Impact</th><th>Range</th><th>Helps</th><th>Hurts</th>'; | |
| // Show calibration summary above table | |
| let calSummaryEl = document.getElementById('calSummary'); | |
| if (!calSummaryEl) { | |
| calSummaryEl = document.createElement('div'); | |
| calSummaryEl.id = 'calSummary'; | |
| calSummaryEl.style.cssText = 'font-size:0.85rem;margin-bottom:12px'; | |
| document.getElementById('gradientTable').parentElement.insertBefore( | |
| calSummaryEl, document.getElementById('gradientTable')); | |
| } | |
| if (hasCal && currentCalibration) { | |
| const mn = currentCalibration.metric_name || 'metric'; | |
| const mu = currentCalibration.metric_unit || ''; | |
| calSummaryEl.innerHTML = `<span style="color:var(--accent2)">Calibrated to ${esc(mn)}</span> — current: <strong>${calibrated.current_metric}${esc(mu)}</strong>`; | |
| calSummaryEl.classList.remove('hidden'); | |
| } else { | |
| calSummaryEl.classList.add('hidden'); | |
| } | |
| const tbody = document.querySelector('#gradientTable tbody'); | |
| tbody.innerHTML = ''; | |
| ranked.forEach((r, i) => { | |
| const avg = r.avg_delta; | |
| const cls = avg >= 0 ? 'delta-pos' : 'delta-neg'; | |
| const barWidth = Math.min(Math.abs(avg) * 30, 120); | |
| const barColor = avg >= 0 ? 'var(--green)' : 'var(--red)'; | |
| const rowId = `gradient-detail-${i}`; | |
| const calItem = calLookup[r.id]; | |
| let calCols = ''; | |
| if (hasCal && calItem) { | |
| const mu = (currentCalibration && currentCalibration.metric_unit) || ''; | |
| const md = calItem.metric_delta; | |
| const mdCls = md >= 0 ? 'delta-pos' : 'delta-neg'; | |
| calCols = ` | |
| <td class="${mdCls}">${md >= 0 ? '+' : ''}${formatMetric(md, mu)}</td> | |
| <td style="font-weight:600">${formatMetric(calItem.predicted_metric, mu)}</td> | |
| `; | |
| } else if (hasCal) { | |
| calCols = '<td>—</td><td>—</td>'; | |
| } | |
| const rangeCols = hasCal | |
| ? `<td style="color:var(--green)">${r.positive}</td> | |
| <td style="color:var(--red)">${r.negative}</td>` | |
| : `<td style="color:var(--text2)">${r.min_delta >= 0 ? '+' : ''}${r.min_delta} to +${r.max_delta}</td> | |
| <td style="color:var(--green)">${r.positive}</td> | |
| <td style="color:var(--red)">${r.negative}</td>`; | |
| // Summary row (clickable) | |
| tbody.innerHTML += ` | |
| <tr onclick="document.getElementById('${rowId}').classList.toggle('hidden')" style="cursor:pointer"> | |
| <td>${i + 1}</td> | |
| <td> | |
| <div style="font-weight:600">${esc(r.label)}</div> | |
| <div style="font-size:0.75rem;color:var(--text2);margin-top:2px">${esc(r.desc)}</div> | |
| </td> | |
| <td class="${cls}"> | |
| ${avg >= 0 ? '+' : ''}${avg.toFixed(1)} | |
| <span class="delta-bar" style="width:${barWidth}px;background:${barColor};margin-left:8px"></span> | |
| </td> | |
| ${calCols}${rangeCols} | |
| </tr> | |
| `; | |
| // Detail row (hidden by default) | |
| const details = r.details || []; | |
| const helped = details.filter(d => d.delta > 0).slice(0, 5); | |
| const hurt = details.filter(d => d.delta < 0).slice(0, 3); | |
| const neutral = details.filter(d => d.delta === 0).length; | |
| let detailHtml = '<div style="padding:12px 16px;font-size:0.8rem;line-height:1.6">'; | |
| if (helped.length) { | |
| detailHtml += '<div style="color:var(--green);font-weight:600;margin-bottom:4px">Helps:</div>'; | |
| helped.forEach(d => { | |
| detailHtml += `<div style="margin-left:12px;margin-bottom:4px">+${d.delta} <strong>${esc(d.name)}</strong> (${esc(d.age)}, ${esc(d.occupation)}): ${esc(d.reasoning)}</div>`; | |
| }); | |
| } | |
| if (hurt.length) { | |
| detailHtml += '<div style="color:var(--red);font-weight:600;margin-top:8px;margin-bottom:4px">Hurts:</div>'; | |
| hurt.forEach(d => { | |
| detailHtml += `<div style="margin-left:12px;margin-bottom:4px">${d.delta} <strong>${esc(d.name)}</strong> (${esc(d.age)}, ${esc(d.occupation)}): ${esc(d.reasoning)}</div>`; | |
| }); | |
| } | |
| if (neutral) { | |
| detailHtml += `<div style="color:var(--text2);margin-top:8px">${neutral} evaluators unaffected</div>`; | |
| } | |
| detailHtml += '</div>'; | |
| tbody.innerHTML += ` | |
| <tr id="${rowId}" class="hidden"> | |
| <td colspan="${hasCal ? 7 : 6}" style="padding:0;background:var(--bg);border-bottom:2px solid var(--border)">${detailHtml}</td> | |
| </tr> | |
| `; | |
| }); | |
| } | |
| // ── Step 5: Bias Audit ── | |
| function runBiasAudit() { | |
| if (!sessionId) return alert('Start with Step 1 to create a panel review.'); | |
| const probes = []; | |
| if (document.getElementById('probeFraming').checked) probes.push('framing'); | |
| if (document.getElementById('probeAuthority').checked) probes.push('authority'); | |
| if (document.getElementById('probeOrder').checked) probes.push('order'); | |
| if (probes.length === 0) return alert('Select at least one probe.'); | |
| const sample = parseInt(document.getElementById('auditSample').value) || 10; | |
| const btn = document.getElementById('auditBtn'); | |
| btn.disabled = true; | |
| document.getElementById('auditProgress').classList.remove('hidden'); | |
| document.getElementById('auditResults').classList.add('hidden'); | |
| let probesDone = 0; | |
| const totalProbes = probes.length; | |
| const params = new URLSearchParams({probes: probes.join(','), sample, parallel: 5}); | |
| const es = new EventSource(`/api/bias-audit/stream/${sessionId}?${params}`); | |
| es.addEventListener('start', (e) => { | |
| const d = JSON.parse(e.data); | |
| document.getElementById('auditProgressText').textContent = | |
| `Running ${d.probes.length} probes on ${d.sample_size} evaluators (${d.model})...`; | |
| }); | |
| es.addEventListener('probe_start', (e) => { | |
| const d = JSON.parse(e.data); | |
| document.getElementById('auditProgressText').textContent = | |
| `Running ${d.probe} probe...`; | |
| }); | |
| es.addEventListener('probe_complete', (e) => { | |
| probesDone++; | |
| const pct = Math.round(probesDone / totalProbes * 100); | |
| document.getElementById('auditProgressBar').style.width = pct + '%'; | |
| const d = JSON.parse(e.data); | |
| document.getElementById('auditProgressText').textContent = | |
| `${d.probe}: ${d.analysis.shifted_pct}% shifted (${probesDone}/${totalProbes} probes done)`; | |
| }); | |
| es.addEventListener('complete', (e) => { | |
| es.close(); | |
| const d = JSON.parse(e.data); | |
| document.getElementById('auditProgressBar').style.width = '100%'; | |
| document.getElementById('auditProgressText').textContent = 'Audit complete'; | |
| const tbody = document.querySelector('#auditTable tbody'); | |
| tbody.innerHTML = ''; | |
| const baselines = {framing: 30, authority: 20, order: 0}; | |
| d.analyses.forEach(a => { | |
| if (a.error) { | |
| tbody.innerHTML += `<tr><td>${esc(a.probe)}</td><td colspan="4">Error: ${esc(a.error)}</td></tr>`; | |
| return; | |
| } | |
| const expected = baselines[a.probe]; | |
| const gap = a.shifted_pct - (expected || 0); | |
| let assessment, assessCls; | |
| if (expected !== undefined) { | |
| if (gap > 10) { assessment = 'Over-biased'; assessCls = 'color:var(--red)'; } | |
| else if (gap < -10) { assessment = 'Under-biased'; assessCls = 'color:var(--yellow)'; } | |
| else { assessment = 'Well-calibrated'; assessCls = 'color:var(--green)'; } | |
| } else { | |
| assessment = '—'; assessCls = ''; | |
| } | |
| tbody.innerHTML += ` | |
| <tr> | |
| <td style="font-weight:600">${esc(a.probe)}</td> | |
| <td>${a.shifted_pct.toFixed(1)}%</td> | |
| <td>${a.avg_abs_delta.toFixed(2)}</td> | |
| <td style="color:var(--text2)">${expected !== undefined ? expected + '%' : '—'}</td> | |
| <td style="${assessCls};font-weight:600">${assessment}</td> | |
| </tr> | |
| `; | |
| }); | |
| document.getElementById('auditReport').textContent = d.report; | |
| document.getElementById('auditResults').classList.remove('hidden'); | |
| // If over-biased, auto-check the bias calibration checkbox | |
| const hasOverBias = d.analyses.some(a => a.shifted_pct - (baselines[a.probe] || 0) > 10); | |
| if (hasOverBias) { | |
| document.getElementById('biasCalibration').checked = true; | |
| } | |
| btn.disabled = false; | |
| }); | |
| es.onerror = () => { | |
| es.close(); | |
| document.getElementById('auditProgressText').textContent = 'Connection lost — you can try again without losing your draft'; | |
| btn.disabled = false; | |
| }; | |
| } | |
| // ── Metric Calibration ── | |
| let currentCalibration = null; | |
| function formatMetric(value, unit) { | |
| if (unit === '%') return value.toFixed(2) + '%'; | |
| if (unit === '$') return '$' + value.toFixed(2); | |
| return value.toFixed(4) + (unit ? ' ' + unit : ''); | |
| } | |
| // Show/hide custom metric name input based on dropdown | |
| document.getElementById('calMetricName').addEventListener('change', function() { | |
| const custom = document.getElementById('calMetricNameCustom'); | |
| if (this.value === '') { | |
| custom.classList.remove('hidden'); | |
| custom.focus(); | |
| } else { | |
| custom.classList.add('hidden'); | |
| } | |
| applyCalibration(); | |
| }); | |
| // Re-apply calibration when value or unit changes (debounced) | |
| let _calDebounce = null; | |
| function debouncedApplyCalibration() { | |
| clearTimeout(_calDebounce); | |
| _calDebounce = setTimeout(() => { | |
| const v = parseFloat(document.getElementById('calMetricValue').value); | |
| if (v > 0) applyCalibration(); | |
| else if (currentCalibration) clearCalibration(); | |
| }, 600); | |
| } | |
| document.getElementById('calMetricValue').addEventListener('input', debouncedApplyCalibration); | |
| document.getElementById('calMetricUnit').addEventListener('input', debouncedApplyCalibration); | |
| function getMetricName() { | |
| const sel = document.getElementById('calMetricName').value; | |
| if (sel === '') return document.getElementById('calMetricNameCustom').value.trim() || 'metric'; | |
| return sel; | |
| } | |
| function getMeanScore() { | |
| const valid = (evalResultsData || []).filter(r => r && typeof r.score === 'number'); | |
| if (!valid.length) return null; | |
| return valid.reduce((s, r) => s + r.score, 0) / valid.length; | |
| } | |
| async function applyCalibration() { | |
| if (!sessionId) return; | |
| const metricName = getMetricName(); | |
| const metricValue = parseFloat(document.getElementById('calMetricValue').value); | |
| const metricUnit = document.getElementById('calMetricUnit').value.trim() || ''; | |
| if (!metricValue || metricValue <= 0) return; | |
| const meanScore = getMeanScore(); | |
| if (!meanScore || meanScore <= 0) return; | |
| const anchors = [{mean_score: meanScore, metric_value: metricValue}]; | |
| try { | |
| const resp = await fetch(`/api/calibrate/${sessionId}`, { | |
| method: 'POST', | |
| headers: llmHeaders(), | |
| body: JSON.stringify({metric_name: metricName, metric_unit: metricUnit, anchors}), | |
| }); | |
| const data = await resp.json(); | |
| if (!resp.ok) throw new Error(data.detail || 'Calibration failed'); | |
| currentCalibration = data.calibration; | |
| const status = document.getElementById('calStatus'); | |
| status.innerHTML = `<span style="color:var(--green)">Anchored: score ${meanScore.toFixed(1)} = ${metricValue}${esc(metricUnit)} ${esc(metricName)}</span>`; | |
| status.classList.remove('hidden'); | |
| // Re-render gradient table with calibration if it exists | |
| if (data.calibrated_gradient && lastGradientData) { | |
| renderGradientTable(lastGradientData.results, lastGradientData.changes, lastGradientData.ranked, data.calibrated_gradient); | |
| } | |
| } catch (e) { | |
| const status = document.getElementById('calStatus'); | |
| status.innerHTML = `<span style="color:var(--red)">${esc(e.message)}</span>`; | |
| status.classList.remove('hidden'); | |
| } | |
| } | |
| async function clearCalibration() { | |
| if (!sessionId) return; | |
| await fetch(`/api/calibrate/${sessionId}`, {method: 'DELETE', headers: llmHeaders()}); | |
| currentCalibration = null; | |
| document.getElementById('calStatus').classList.add('hidden'); | |
| document.getElementById('calMetricValue').value = ''; | |
| // Re-render gradient table without calibration | |
| if (lastGradientData) { | |
| renderGradientTable(lastGradientData.results, lastGradientData.changes, lastGradientData.ranked, null); | |
| } | |
| // Clear summary above gradient table | |
| const calSummaryEl = document.getElementById('calSummary'); | |
| if (calSummaryEl) calSummaryEl.classList.add('hidden'); | |
| } | |
| // ── Download report ── | |
| function downloadReport() { | |
| if (!sessionId) return alert('Run an evaluation first.'); | |
| const a = document.createElement('a'); | |
| a.href = `/api/report/${sessionId}`; | |
| a.download = `sgo-report-${sessionId}.md`; | |
| document.body.appendChild(a); | |
| a.click(); | |
| document.body.removeChild(a); | |
| } | |
| // Boot | |
| init(); | |
| </script> | |
| </body> | |
| </html> | |