Tier 2: image upload, vision toggle, StackBlitz/CodeSandbox launchers, UI polish

frontend/app.js:
- Real Gradio image upload via /gradio_api/upload (gr.Image cannot decode
raw data URLs). dataUrlToBlob() + uploadImageToGradio() helpers send a
multipart POST with Authorization, then hand the resulting /tmp/gradio
path to chat_fn as a {path, meta:{_type:gradio.FileData}} reference.
- Vision toggle state (visionEnabled) plumbed through Settings modal,
persisted in localStorage, defaults OFF because the current vision
fusion is weak. send() drops images from the API payload when the
toggle is off so we don't waste GPU seconds on a known-bad path.
- Cloud sandbox launchers for real Next.js / React / Node / HTML
execution — both StackBlitz (form POST to /run, WebContainer-backed)
and CodeSandbox (Define API JSON POST returning sandbox_id).
buildStackBlitzProject() detects Next.js / React / Node / HTML from
imports and dependency hints, generates a working package.json and
template files. buildCodeSandboxFiles() reuses the same project shape.
- BUG FIX: the .md-run StackBlitz button was rendered but never had a
click handler — clicking it did literally nothing. New delegated
handler on els.messages routes .md-copy / .md-run / .md-sandbox to
the right action with proper loading state for the async CodeSandbox
call.
- buildHistory() trims to last 20 turns and is passed to the backend on
every API + REST call, matching the new chat_fn(history=...) signature.

frontend/index.html:
- Vision toggle field in Settings modal with a clear hint that vision
output is currently low quality (set expectations honestly).

frontend/styles.css:
- Toggle switch component styling.
- .md-run (purple) + .md-sandbox (amber) launcher button styles next to
the existing .md-copy, with hover lift and disabled (loading) state.
- .md-code-block code: max-height 540px so giant generated files don't
blow out the message column.
- Wider preview pane (--preview-w 420 -> 480; 380 -> 440 at <=1280px;
added new 1180px breakpoint for 400px) so generated HTML/code is
actually readable.
- Tightened message density: gap 24 -> 20, top padding 28 -> 22.

frontend/test_api.py:
- --memory and --vision CLI flags. Memory mode runs a 3-turn identity
+ recall test against the live Space. Vision mode synthesizes a tiny
PNG, uploads it via /gradio_api/upload, and asks the model what color
it is — verifies the upload pipeline end-to-end.

hf_space/app.py:
- Vision path now accepts the {path: ...} dict that Gradio resolves
server-side, so uploaded images actually reach the CLIP encoder.
- _coerce_history() accepts list-of-dict OR JSON-encoded string from
the raw Gradio Textbox API.
- Hardened clean_output regex against truncating words like 'Don'.

.gitignore:
- Exclude 'Building Agentic Vision-Coder System.md' (Cascade chat log
dump, same pattern as the existing pipeline log entries).

Files changed (6) hide show

.gitignore +0 -0
frontend/app.js +339 -24
frontend/index.html +11 -0
frontend/styles.css +99 -6
frontend/test_api.py +57 -3
hf_space/app.py +14 -1

.gitignore CHANGED Viewed

Binary files a/.gitignore and b/.gitignore differ

frontend/app.js CHANGED Viewed

@@ -38,12 +38,13 @@
   // State (persisted to localStorage)
   // ----------------------------------------------------------------
   const defaultState = () => ({
-    apiUrl:      API_DEFAULT,
-    hfToken:     '',          // optional HF PRO token to bypass anonymous ZeroGPU quota
-    temperature: 0.7,
-    maxTokens:   2048,
-    chats:       [],          // [{id, title, createdAt, updatedAt, messages: [{role, content, images?}]}]
-    currentId:   null,
   });
   const state = loadState();
@@ -135,6 +136,7 @@
     settingsUrl:    $('#settings-url'),
     settingsHfToken:$('#settings-hf-token'),
     hfTokenStatus:  $('#hf-token-status'),
     settingsTemp:   $('#settings-temp'),
     settingsTokens: $('#settings-tokens'),
     tempVal:        $('#temp-val'),
@@ -261,6 +263,203 @@
     return merged;
   }
   // Extract last fenced code block from the response text
   function extractLastCodeBlock(text) {
     if (!text) return null;
@@ -302,11 +501,21 @@
         const lang = seg.lang || languageFromCode(seg.value);
         const safe = escapeHtml(seg.value);
         const dataCode = escapeAttr(seg.value);
         return (
           `<pre class="md-code-block">` +
             `<div class="md-code-head">` +
               `<span>${escapeHtml(lang)}</span>` +
-              `<button class="md-copy" data-code="${dataCode}" type="button">Copy</button>` +
             `</div>` +
             `<code class="language-${escapeHtml(lang)}">${safe}</code>` +
           `</pre>`
@@ -334,6 +543,51 @@
     return h;
   }
   // Detect responses that came back as a quota / auth error from the
   // backend's chat_fn try/except, so we can show actionable UX.
   function detectAuthError(result) {
@@ -469,12 +723,34 @@ Your token is stored only in this browser's local storage and sent as an \`Autho
       // 1. POST /gradio_api/call/{api_name} → get event_id
       // 2. GET  /gradio_api/call/{api_name}/{event_id} → stream result
       // Step 1: Submit the request
       const submitRes = await fetch(`${base}/gradio_api/call/chat_fn`, {
         method: 'POST',
         headers: authHeaders({ 'Content-Type': 'application/json' }),
         body: JSON.stringify({
-          data: [prompt, image || null, state.temperature, state.maxTokens, historyJson],
         }),
         signal,
       });
@@ -1080,8 +1356,17 @@ console.log("MINDI 1.5 — awaiting connection");
       els.chatTitle.textContent = chat.title;
     }
-    // Reset input
-    const imageForApi = runtime.pendingImages[0]?.dataUrl || null;
     els.promptInput.value = '';
     autosizeTextarea();
     clearPendingImages();
@@ -1248,6 +1533,7 @@ console.log("MINDI 1.5 — awaiting connection");
   function openSettings() {
     els.settingsUrl.value      = state.apiUrl || '';
     if (els.settingsHfToken) els.settingsHfToken.value = state.hfToken || '';
     els.settingsTemp.value     = state.temperature;
     els.settingsTokens.value   = state.maxTokens;
     els.tempVal.textContent    = Number(state.temperature).toFixed(2);
@@ -1262,13 +1548,15 @@ console.log("MINDI 1.5 — awaiting connection");
   function applySettings() {
     const url    = els.settingsUrl.value.trim();
     const token  = els.settingsHfToken ? els.settingsHfToken.value.trim() : '';
     const temp   = parseFloat(els.settingsTemp.value);
     const tokens = parseInt(els.settingsTokens.value, 10);
     const tokenChanged = token !== state.hfToken;
-    state.apiUrl      = url || API_DEFAULT;
-    state.hfToken     = token;
-    state.temperature = isFinite(temp) ? temp : 0.7;
-    state.maxTokens   = isFinite(tokens) ? tokens : 2048;
     // If the user just saved a new (non-empty) token, give the API another shot.
     if (tokenChanged && token) {
       runtime.authBlocked = false;
@@ -1393,17 +1681,44 @@ console.log("MINDI 1.5 — awaiting connection");
     els.copyCode.addEventListener('click', copyLastCode);
     els.downloadCode.addEventListener('click', downloadLastCode);
-    // Click handler for inline copy buttons inside messages (delegated)
     els.messages.addEventListener('click', async (e) => {
-      const btn = e.target.closest('.md-copy');
-      if (!btn) return;
-      try {
-        await navigator.clipboard.writeText(btn.dataset.code || '');
-        const prev = btn.textContent;
-        btn.textContent = 'Copied!';
-        setTimeout(() => { btn.textContent = prev; }, 1400);
-      } catch {
-        toast('Clipboard unavailable', 'error');
       }
     });

   // State (persisted to localStorage)
   // ----------------------------------------------------------------
   const defaultState = () => ({
+    apiUrl:        API_DEFAULT,
+    hfToken:       '',          // optional HF PRO token to bypass anonymous ZeroGPU quota
+    visionEnabled: false,       // default OFF — see notes in Settings; vision-language fusion is currently low-quality
+    temperature:   0.7,
+    maxTokens:     2048,
+    chats:         [],          // [{id, title, createdAt, updatedAt, messages: [{role, content, images?}]}]
+    currentId:     null,
   });
   const state = loadState();
     settingsUrl:    $('#settings-url'),
     settingsHfToken:$('#settings-hf-token'),
     hfTokenStatus:  $('#hf-token-status'),
+    settingsVision: $('#settings-vision'),
     settingsTemp:   $('#settings-temp'),
     settingsTokens: $('#settings-tokens'),
     tempVal:        $('#temp-val'),
     return merged;
   }
+  // ----------------------------------------------------------------
+  // Cloud sandbox launcher (StackBlitz) — gives users real Next.js /
+  // Node / React / HTML execution by handing the generated code off
+  // to stackblitz.com via their public POST API. No backend required.
+  // Docs: https://developer.stackblitz.com/docs/platform/post-api
+  // ----------------------------------------------------------------
+  function isCloudRunnable(code, lang) {
+    const l = (lang || '').toLowerCase();
+    if (['html', 'markup', 'jsx', 'tsx', 'javascript', 'js', 'typescript', 'ts', 'json'].includes(l)) return true;
+    // Heuristic: short non-obvious snippets get the button if they parse
+    // like a web project (so the model can ship a partial JS file too).
+    return /<!doctype|<html|^\s*import |^\s*export |^\s*function |^\s*const |^\s*class /im.test(code);
+  }
+  // Decide which StackBlitz template + file layout to use based on what
+  // the model produced. We try to be permissive — anything that looks
+  // like a React/Next/Node project goes into the WebContainer-backed
+  // 'node' template; raw HTML uses the static 'html' template.
+  function buildStackBlitzProject(code, lang) {
+    const l = (lang || '').toLowerCase();
+    const looksLikeNext = /from ['"]next\/|next\.config|app\/page\.[jt]sx|pages\/index/i.test(code);
+    const looksLikeReact = /from ['"]react['"]|ReactDOM\.|useState\(|useEffect\(|<\w+\s+\w+={/i.test(code);
+    const looksLikeNode  = /^\s*(?:const|import)\s+\w+\s*=?\s*require\(|process\.env|module\.exports/m.test(code);
+    const isHtmlDoc      = /<!doctype|<html/i.test(code);
+    const title = 'MINDI generated project';
+    const description = 'Generated by MINDI 1.5 Vision-Coder';
+    if (looksLikeNext) {
+      // Minimal Next.js 14 app-router project.
+      return {
+        title, description,
+        template: 'node',
+        files: {
+          'package.json': JSON.stringify({
+            name: 'mindi-next-app',
+            private: true,
+            scripts: { dev: 'next dev', build: 'next build', start: 'next start' },
+            dependencies: { next: '^14.2.5', react: '^18.3.1', 'react-dom': '^18.3.1' },
+          }, null, 2),
+          'app/page.tsx': /export\s+default/i.test(code) ? code : `export default function Page() {\n  return (\n    <main>\n${code.split('\n').map(l => '      ' + l).join('\n')}\n    </main>\n  );\n}\n`,
+          'app/layout.tsx':
+`export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (<html lang="en"><body>{children}</body></html>);
+}
+`,
+          'tsconfig.json': JSON.stringify({
+            compilerOptions: {
+              target: 'ES2020', lib: ['dom', 'dom.iterable', 'esnext'], jsx: 'preserve',
+              module: 'esnext', moduleResolution: 'bundler', strict: true, esModuleInterop: true,
+              skipLibCheck: true, allowJs: true, isolatedModules: true, noEmit: true,
+              plugins: [{ name: 'next' }],
+            },
+            include: ['next-env.d.ts', '**/*.ts', '**/*.tsx'],
+          }, null, 2),
+          'README.md': `# ${title}\n\n${description}\n\nRun:\n\n\`\`\`bash\nnpm install\nnpm run dev\n\`\`\`\n`,
+        },
+      };
+    }
+    if (looksLikeReact || l === 'jsx' || l === 'tsx') {
+      // Vite + React project (faster boot in WebContainer than CRA).
+      const ext = (l === 'tsx' || /\:\s*\w+(\[\])?/.test(code)) ? 'tsx' : 'jsx';
+      return {
+        title, description,
+        template: 'node',
+        files: {
+          'package.json': JSON.stringify({
+            name: 'mindi-react-app',
+            private: true,
+            scripts: { dev: 'vite', build: 'vite build', preview: 'vite preview' },
+            dependencies: { react: '^18.3.1', 'react-dom': '^18.3.1' },
+            devDependencies: { '@vitejs/plugin-react': '^4.3.1', vite: '^5.4.1' },
+          }, null, 2),
+          'vite.config.js':
+`import { defineConfig } from 'vite';\nimport react from '@vitejs/plugin-react';\nexport default defineConfig({ plugins: [react()] });\n`,
+          'index.html':
+`<!doctype html><html><head><meta charset="utf-8"><title>${title}</title></head><body><div id="root"></div><script type="module" src="/src/main.${ext}"></script></body></html>`,
+          [`src/main.${ext}`]:
+`import React from 'react';\nimport { createRoot } from 'react-dom/client';\nimport App from './App.${ext}';\ncreateRoot(document.getElementById('root')).render(<App />);\n`,
+          [`src/App.${ext}`]: /export\s+default/i.test(code) ? code : `export default function App() {\n  return (<div>${'<pre>{`' + code.replace(/`/g, '\\`') + '`}</pre>'}</div>);\n}\n`,
+        },
+      };
+    }
+    if (looksLikeNode || l === 'json') {
+      return {
+        title, description,
+        template: 'node',
+        files: {
+          'package.json': JSON.stringify({
+            name: 'mindi-node-app', private: true,
+            scripts: { start: 'node index.js' },
+          }, null, 2),
+          'index.js': l === 'json' ? `console.log(${code});` : code,
+        },
+      };
+    }
+    if (isHtmlDoc || l === 'html' || l === 'markup') {
+      return {
+        title, description,
+        template: 'html',
+        files: {
+          'index.html': isHtmlDoc ? code : `<!doctype html><html><head><meta charset="utf-8"><title>${title}</title></head><body>\n${code}\n</body></html>`,
+        },
+      };
+    }
+    // Fallback: static html with the code dropped into a <pre> tag so
+    // the user at least sees their snippet rendered in the StackBlitz preview.
+    return {
+      title, description,
+      template: 'html',
+      files: {
+        'index.html': `<!doctype html><html><head><meta charset="utf-8"><title>${title}</title></head><body><pre>${escapeHtml(code)}</pre></body></html>`,
+      },
+    };
+  }
+  // Hand the project off to stackblitz.com via a hidden form POST.
+  // The new tab opens the cloud IDE with all files pre-loaded and the
+  // dev server booting. For 'node' templates this means a real Node.js
+  // runtime in the browser via WebContainers — yes, that includes
+  // 'npm install' for Next.js / React projects.
+  function launchInStackBlitz(code, lang) {
+    const proj = buildStackBlitzProject(code, lang);
+    const form = document.createElement('form');
+    form.action = 'https://stackblitz.com/run';
+    form.method = 'POST';
+    form.target = '_blank';
+    form.rel    = 'noopener';
+    form.style.display = 'none';
+    const add = (name, value) => {
+      const input = document.createElement('input');
+      input.type  = 'hidden';
+      input.name  = name;
+      input.value = value;
+      form.appendChild(input);
+    };
+    add('project[title]',       proj.title);
+    add('project[description]', proj.description);
+    add('project[template]',    proj.template);
+    add('project[settings][compile][trigger]', 'auto');
+    Object.entries(proj.files).forEach(([path, content]) => {
+      add(`project[files][${path}]`, content);
+    });
+    document.body.appendChild(form);
+    form.submit();
+    setTimeout(() => form.remove(), 0);
+  }
+  // ----------------------------------------------------------------
+  // Cloud sandbox launcher (CodeSandbox) — second cloud IDE option.
+  // Uses the public Define API which returns a sandbox_id we redirect to.
+  // Docs: https://codesandbox.io/docs/learn/sandboxes/cli-api#define-api
+  // We reuse buildStackBlitzProject() for the file shape since both IDEs
+  // accept the same package.json / file layout; CodeSandbox auto-detects
+  // the template from package.json dependencies.
+  // ----------------------------------------------------------------
+  function buildCodeSandboxFiles(code, lang) {
+    const proj = buildStackBlitzProject(code, lang);
+    const files = {};
+    Object.entries(proj.files).forEach(([path, content]) => {
+      files[path] = { content };
+    });
+    // For raw HTML projects (StackBlitz template='html'), nudge CodeSandbox
+    // toward its 'static' template so it serves index.html as-is instead of
+    // trying to npm install nothing.
+    if (proj.template === 'html' && !files['package.json']) {
+      files['sandbox.config.json'] = { content: JSON.stringify({ template: 'static' }, null, 2) };
+    }
+    return files;
+  }
+  async function launchInCodeSandbox(code, lang) {
+    const files = buildCodeSandboxFiles(code, lang);
+    try {
+      const res = await fetch('https://codesandbox.io/api/v1/sandboxes/define?json=1', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json', 'Accept': 'application/json' },
+        body: JSON.stringify({ files }),
+      });
+      if (!res.ok) {
+        const txt = await res.text().catch(() => '');
+        throw new Error(`HTTP ${res.status}: ${txt.slice(0, 160)}`);
+      }
+      const data = await res.json();
+      if (!data || !data.sandbox_id) throw new Error('No sandbox_id in response');
+      window.open(`https://codesandbox.io/s/${data.sandbox_id}`, '_blank', 'noopener');
+    } catch (err) {
+      toast(`CodeSandbox launch failed: ${err.message || err}`, 'error');
+    }
+  }
   // Extract last fenced code block from the response text
   function extractLastCodeBlock(text) {
     if (!text) return null;
         const lang = seg.lang || languageFromCode(seg.value);
         const safe = escapeHtml(seg.value);
         const dataCode = escapeAttr(seg.value);
+        const runnable = isCloudRunnable(seg.value, lang);
+        const launchBtns = runnable
+          ? (
+              `<button class="md-run" data-code="${dataCode}" data-lang="${escapeAttr(lang)}" type="button" title="Run this code on stackblitz.com (real Node.js / WebContainer sandbox, supports Next.js / React / Node)">\u25B6 StackBlitz</button>` +
+              `<button class="md-sandbox" data-code="${dataCode}" data-lang="${escapeAttr(lang)}" type="button" title="Open this code in codesandbox.io (cloud IDE with live preview)">\u25B6 CodeSandbox</button>`
+            )
+          : '';
         return (
           `<pre class="md-code-block">` +
             `<div class="md-code-head">` +
               `<span>${escapeHtml(lang)}</span>` +
+              `<div class="md-code-actions">` +
+                launchBtns +
+                `<button class="md-copy" data-code="${dataCode}" type="button">Copy</button>` +
+              `</div>` +
             `</div>` +
             `<code class="language-${escapeHtml(lang)}">${safe}</code>` +
           `</pre>`
     return h;
   }
+  // Convert a data: URL into a Blob (used for Gradio image uploads).
+  function dataUrlToBlob(dataUrl) {
+    const match = /^data:([^;]+);base64,(.+)$/.exec(dataUrl || '');
+    if (!match) throw new Error('Invalid image data URL');
+    const mime = match[1];
+    const b64  = match[2];
+    const bytes = Uint8Array.from(atob(b64), (c) => c.charCodeAt(0));
+    return { blob: new Blob([bytes], { type: mime }), mime };
+  }
+  // Upload an image to a Gradio HF Space via /gradio_api/upload.
+  // Returns the server-side file path that can be referenced as
+  // {path: ..., meta: {_type: "gradio.FileData"}} in a chat_fn data array.
+  // This is REQUIRED — gr.Image(type="pil") on the backend cannot decode
+  // a raw data URL string.
+  async function uploadImageToGradio(base, dataUrl, signal) {
+    const { blob, mime } = dataUrlToBlob(dataUrl);
+    const ext = (mime.split('/')[1] || 'png').replace('+xml', '').split(';')[0];
+    const filename = `mindi-upload-${Date.now()}.${ext}`;
+    const formData = new FormData();
+    formData.append('files', blob, filename);
+    // Don't pre-set Content-Type — the browser sets the multipart boundary.
+    const headers = authHeaders({});
+    delete headers['Content-Type'];
+    const res = await fetch(`${base}/gradio_api/upload`, {
+      method: 'POST',
+      headers,
+      body: formData,
+      signal,
+    });
+    if (!res.ok) {
+      const txt = await res.text().catch(() => '');
+      throw new Error(`Image upload ${res.status}: ${txt.slice(0, 200) || 'failed'}`);
+    }
+    const result = await res.json();
+    // Gradio 5.x returns ["/tmp/gradio/.../filename.png"]
+    const filePath = Array.isArray(result) ? result[0] : (result && result.files && result.files[0]);
+    if (!filePath || typeof filePath !== 'string') {
+      throw new Error(`Unexpected upload response: ${JSON.stringify(result).slice(0, 200)}`);
+    }
+    return filePath;
+  }
   // Detect responses that came back as a quota / auth error from the
   // backend's chat_fn try/except, so we can show actionable UX.
   function detectAuthError(result) {
       // 1. POST /gradio_api/call/{api_name} → get event_id
       // 2. GET  /gradio_api/call/{api_name}/{event_id} → stream result
+      // ── Image: upload first, then reference by path ──
+      // gr.Image(type="pil") cannot decode a raw data: URL — it expects a
+      // FileData reference produced by /gradio_api/upload. We do this
+      // unconditionally when an image is supplied so the backend's CLIP
+      // path actually receives pixels. If vision is disabled in settings,
+      // send() drops the image before calling us.
+      let imageArg = null;
+      if (image && typeof image === 'string' && image.startsWith('data:')) {
+        try {
+          const filePath = await uploadImageToGradio(base, image, signal);
+          imageArg = {
+            path: filePath,
+            meta: { _type: 'gradio.FileData' },
+            orig_name: filePath.split('/').pop() || 'image.png',
+          };
+        } catch (e) {
+          console.warn('[mindi] Image upload to Gradio failed:', e);
+          toast(`Image upload failed: ${e.message || e}. Sending text only.`, 'error', 5000);
+          imageArg = null;
+        }
+      }
       // Step 1: Submit the request
       const submitRes = await fetch(`${base}/gradio_api/call/chat_fn`, {
         method: 'POST',
         headers: authHeaders({ 'Content-Type': 'application/json' }),
         body: JSON.stringify({
+          data: [prompt, imageArg, state.temperature, state.maxTokens, historyJson],
         }),
         signal,
       });
       els.chatTitle.textContent = chat.title;
     }
+    // Reset input.
+    // If vision is disabled in Settings, drop the image before calling
+    // the API so we don't waste an upload round-trip on something the
+    // backend will ignore. The image still appears in the user message
+    // for the chat record.
+    const imageForApi = state.visionEnabled
+      ? (runtime.pendingImages[0]?.dataUrl || null)
+      : null;
+    if (!state.visionEnabled && runtime.pendingImages.length) {
+      toast('Vision is disabled \u2014 image attached for record only. Enable it in Settings to send to the model.', 'info', 4500);
+    }
     els.promptInput.value = '';
     autosizeTextarea();
     clearPendingImages();
   function openSettings() {
     els.settingsUrl.value      = state.apiUrl || '';
     if (els.settingsHfToken) els.settingsHfToken.value = state.hfToken || '';
+    if (els.settingsVision)  els.settingsVision.checked = !!state.visionEnabled;
     els.settingsTemp.value     = state.temperature;
     els.settingsTokens.value   = state.maxTokens;
     els.tempVal.textContent    = Number(state.temperature).toFixed(2);
   function applySettings() {
     const url    = els.settingsUrl.value.trim();
     const token  = els.settingsHfToken ? els.settingsHfToken.value.trim() : '';
+    const vision = !!(els.settingsVision && els.settingsVision.checked);
     const temp   = parseFloat(els.settingsTemp.value);
     const tokens = parseInt(els.settingsTokens.value, 10);
     const tokenChanged = token !== state.hfToken;
+    state.apiUrl        = url || API_DEFAULT;
+    state.hfToken       = token;
+    state.visionEnabled = vision;
+    state.temperature   = isFinite(temp) ? temp : 0.7;
+    state.maxTokens     = isFinite(tokens) ? tokens : 2048;
     // If the user just saved a new (non-empty) token, give the API another shot.
     if (tokenChanged && token) {
       runtime.authBlocked = false;
     els.copyCode.addEventListener('click', copyLastCode);
     els.downloadCode.addEventListener('click', downloadLastCode);
+    // Delegated click handler for code-block action buttons inside messages
+    // (Copy, Run-in-StackBlitz, Open-in-CodeSandbox).
     els.messages.addEventListener('click', async (e) => {
+      const copyBtn = e.target.closest('.md-copy');
+      if (copyBtn) {
+        try {
+          await navigator.clipboard.writeText(copyBtn.dataset.code || '');
+          const prev = copyBtn.textContent;
+          copyBtn.textContent = 'Copied!';
+          setTimeout(() => { copyBtn.textContent = prev; }, 1400);
+        } catch {
+          toast('Clipboard unavailable', 'error');
+        }
+        return;
+      }
+      const runBtn = e.target.closest('.md-run');
+      if (runBtn) {
+        try {
+          launchInStackBlitz(runBtn.dataset.code || '', runBtn.dataset.lang || '');
+        } catch (err) {
+          toast(`StackBlitz launch failed: ${err.message || err}`, 'error');
+        }
+        return;
+      }
+      const sbxBtn = e.target.closest('.md-sandbox');
+      if (sbxBtn) {
+        const prev = sbxBtn.textContent;
+        sbxBtn.disabled = true;
+        sbxBtn.textContent = '\u25B6 Opening\u2026';
+        try {
+          await launchInCodeSandbox(sbxBtn.dataset.code || '', sbxBtn.dataset.lang || '');
+        } finally {
+          sbxBtn.textContent = prev;
+          sbxBtn.disabled = false;
+        }
+        return;
       }
     });

frontend/index.html CHANGED Viewed

@@ -297,6 +297,17 @@
           <span class="field-hint">Paste a PRO HF token to bypass anonymous ZeroGPU quota. Stored only in this browser. <a href="https://huggingface.co/settings/tokens" target="_blank" rel="noopener">Get a token →</a></span>
         </label>
         <label class="field">
           <span class="field-label">Temperature <em class="field-value" id="temp-val">0.7</em></span>
           <input id="settings-temp" type="range" min="0" max="2" step="0.05" value="0.7" />

           <span class="field-hint">Paste a PRO HF token to bypass anonymous ZeroGPU quota. Stored only in this browser. <a href="https://huggingface.co/settings/tokens" target="_blank" rel="noopener">Get a token →</a></span>
         </label>
+        <label class="field field-toggle">
+          <span class="field-toggle-row">
+            <span class="field-label">Vision input</span>
+            <span class="toggle">
+              <input id="settings-vision" type="checkbox" />
+              <span class="toggle-slider"></span>
+            </span>
+          </span>
+          <span class="field-hint">Send attached images to MINDI's CLIP encoder. <strong>Off by default</strong> — the current vision-language fusion is an early build and produces low-quality answers on images. Leave off until the next vision retraining ships. Attaching an image still records it in the chat.</span>
+        </label>
         <label class="field">
           <span class="field-label">Temperature <em class="field-value" id="temp-val">0.7</em></span>
           <input id="settings-temp" type="range" min="0" max="2" step="0.05" value="0.7" />

frontend/styles.css CHANGED Viewed

@@ -55,7 +55,7 @@
   --r-xl:  20px;
   --sidebar-w:   280px;
-  --preview-w:   420px;
   --header-h:    56px;
   /* Motion */
@@ -564,8 +564,8 @@ body.sidebar-open .scrim { opacity: 1; pointer-events: auto; }
   flex: 1;
   display: none;
   flex-direction: column;
-  gap: 24px;
-  padding: 28px 24px 16px;
   overflow-y: auto;
   scroll-behavior: smooth;
 }
@@ -686,16 +686,52 @@ body.sidebar-open .scrim { opacity: 1; pointer-events: auto; }
   border-bottom: 1px solid var(--border);
 }
 .md-code-head span:first-child { color: var(--c-code); font-weight: 500; }
-.md-copy {
   font-family: var(--mono);
   font-size: 11px;
   color: var(--text-mute);
   padding: 4px 10px;
   border-radius: 4px;
   border: 1px solid var(--border);
-  transition: background .2s var(--ease), color .2s var(--ease), border-color .2s var(--ease);
 }
 .md-copy:hover { background: var(--hover); color: var(--text); border-color: var(--border-2); }
 .md-code-block code {
   display: block;
   font-family: var(--mono);
@@ -704,6 +740,8 @@ body.sidebar-open .scrim { opacity: 1; pointer-events: auto; }
   padding: 14px 16px;
   overflow-x: auto;
   white-space: pre;
 }
 /* Loading message */
@@ -1097,6 +1135,58 @@ body.preview-hidden .preview { display: none; }
   color: var(--text-mute);
   line-height: 1.5;
 }
 .field input[type="url"],
 .field input[type="password"] {
   padding: 9px 12px;
@@ -1212,8 +1302,11 @@ pre[class*="language-"], code[class*="language-"] {
 .token.regex, .token.important, .token.variable { color: #f87171 !important; }
 /* ============ RESPONSIVE ============ */
 @media (max-width: 1180px) {
-  :root { --preview-w: 380px; }
 }
 @media (max-width: 1024px) {
   .app { grid-template-columns: var(--sidebar-w) 1fr; }

   --r-xl:  20px;
   --sidebar-w:   280px;
+  --preview-w:   480px;
   --header-h:    56px;
   /* Motion */
   flex: 1;
   display: none;
   flex-direction: column;
+  gap: 20px;
+  padding: 22px 24px 14px;
   overflow-y: auto;
   scroll-behavior: smooth;
 }
   border-bottom: 1px solid var(--border);
 }
 .md-code-head span:first-child { color: var(--c-code); font-weight: 500; }
+.md-code-actions {
+  display: flex;
+  gap: 6px;
+  align-items: center;
+  flex-wrap: wrap;
+  justify-content: flex-end;
+}
+.md-copy,
+.md-run,
+.md-sandbox {
   font-family: var(--mono);
   font-size: 11px;
+  font-weight: 500;
+  letter-spacing: .02em;
   color: var(--text-mute);
   padding: 4px 10px;
   border-radius: 4px;
   border: 1px solid var(--border);
+  white-space: nowrap;
+  transition: background .2s var(--ease), color .2s var(--ease), border-color .2s var(--ease), transform .15s var(--ease);
 }
 .md-copy:hover { background: var(--hover); color: var(--text); border-color: var(--border-2); }
+.md-run {
+  color: #d8c8ff;
+  border-color: rgba(124, 58, 237, .35);
+  background: rgba(124, 58, 237, .08);
+}
+.md-run:hover {
+  background: rgba(124, 58, 237, .18);
+  color: #fff;
+  border-color: rgba(124, 58, 237, .55);
+  transform: translateY(-1px);
+}
+.md-sandbox {
+  color: #fde68a;
+  border-color: rgba(245, 158, 11, .32);
+  background: rgba(245, 158, 11, .06);
+}
+.md-sandbox:hover {
+  background: rgba(245, 158, 11, .15);
+  color: #fff;
+  border-color: rgba(245, 158, 11, .55);
+  transform: translateY(-1px);
+}
+.md-sandbox:disabled,
+.md-run:disabled { opacity: .55; cursor: progress; transform: none; }
 .md-code-block code {
   display: block;
   font-family: var(--mono);
   padding: 14px 16px;
   overflow-x: auto;
   white-space: pre;
+  max-height: 540px;
+  overflow-y: auto;
 }
 /* Loading message */
   color: var(--text-mute);
   line-height: 1.5;
 }
+/* Toggle switch (used inside .field-toggle) */
+.field-toggle .field-toggle-row {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: 12px;
+}
+.toggle {
+  position: relative;
+  display: inline-block;
+  width: 40px;
+  height: 22px;
+  flex: 0 0 40px;
+}
+.toggle input {
+  opacity: 0;
+  width: 0;
+  height: 0;
+}
+.toggle-slider {
+  position: absolute;
+  inset: 0;
+  background: rgba(124, 58, 237, .12);
+  border: 1px solid var(--border-2);
+  border-radius: 999px;
+  cursor: pointer;
+  transition: background-color .2s var(--ease), border-color .2s var(--ease);
+}
+.toggle-slider::before {
+  content: '';
+  position: absolute;
+  left: 2px;
+  top: 50%;
+  transform: translateY(-50%);
+  width: 16px;
+  height: 16px;
+  border-radius: 50%;
+  background: #c8c2e0;
+  transition: transform .2s var(--ease), background-color .2s var(--ease);
+}
+.toggle input:checked + .toggle-slider {
+  background: rgba(124, 58, 237, .55);
+  border-color: rgba(124, 58, 237, .8);
+}
+.toggle input:checked + .toggle-slider::before {
+  transform: translate(18px, -50%);
+  background: #fff;
+}
+.toggle input:focus-visible + .toggle-slider {
+  box-shadow: 0 0 0 3px rgba(124, 58, 237, .25);
+}
 .field input[type="url"],
 .field input[type="password"] {
   padding: 9px 12px;
 .token.regex, .token.important, .token.variable { color: #f87171 !important; }
 /* ============ RESPONSIVE ============ */
+@media (max-width: 1280px) {
+  :root { --preview-w: 440px; }
+}
 @media (max-width: 1180px) {
+  :root { --preview-w: 400px; }
 }
 @media (max-width: 1024px) {
   .app { grid-template-columns: var(--sidebar-w) 1fr; }

frontend/test_api.py CHANGED Viewed

@@ -8,7 +8,7 @@ Modes:
   python test_api.py "<prompt>" [maxtok]  # single custom prompt
   python test_api.py --memory             # multi-turn identity + memory test
 """
-import os, sys, time, json
 import requests
 BASE   = os.environ.get("MINDI_API", "https://mindigenous-mindi-chat.hf.space")
@@ -17,6 +17,7 @@ TOKEN  = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
 ARGS   = [a for a in sys.argv[1:] if not a.startswith("--")]
 FLAGS  = [a for a in sys.argv[1:] if a.startswith("--")]
 MEMORY_MODE = "--memory" in FLAGS
 PROMPT = ARGS[0] if ARGS else "Write hello world in Python"
 MAXTOK = int(ARGS[1]) if len(ARGS) > 1 else 256
@@ -44,18 +45,42 @@ for path in ("/gradio_api/config", "/config"):
     except Exception as e:
         print(f"  {path} failed:", e)
-def call_api(prompt: str, history: list | None = None, max_tokens: int = 256, preview_chars: int = 1200) -> dict | None:
     """Submit a single chat_fn request and stream its SSE result.
     Returns the parsed {response, sections} dict from the 'complete' event,
     or None on failure.
     """
     history_json = json.dumps(history) if history else ""
     start = time.time()
     resp = requests.post(
         BASE + "/gradio_api/call/chat_fn",
         headers=HEADERS,
-        json={"data": [prompt, None, 0.7, max_tokens, history_json]},
         timeout=30,
     )
     if resp.status_code != 200:
@@ -137,6 +162,35 @@ if MEMORY_MODE:
             print("  [FAIL] Model did NOT identify as MINDI")
         if "gpt" in text or "claude" in text or "gemini" in text:
             print("  [WARN] Response still mentions GPT/Claude/Gemini")
 else:
     print("\n=== Step 2: API generation test ===")
     print(f"Prompt: {PROMPT!r}  |  max_tokens={MAXTOK}")

   python test_api.py "<prompt>" [maxtok]  # single custom prompt
   python test_api.py --memory             # multi-turn identity + memory test
 """
+import os, sys, time, json, tempfile
 import requests
 BASE   = os.environ.get("MINDI_API", "https://mindigenous-mindi-chat.hf.space")
 ARGS   = [a for a in sys.argv[1:] if not a.startswith("--")]
 FLAGS  = [a for a in sys.argv[1:] if a.startswith("--")]
 MEMORY_MODE = "--memory" in FLAGS
+VISION_MODE = "--vision" in FLAGS
 PROMPT = ARGS[0] if ARGS else "Write hello world in Python"
 MAXTOK = int(ARGS[1]) if len(ARGS) > 1 else 256
     except Exception as e:
         print(f"  {path} failed:", e)
+def upload_image(path: str) -> dict | None:
+    """POST an image to /gradio_api/upload and return the FileData reference."""
+    if not os.path.exists(path):
+        print(f"  [upload] file not found: {path}")
+        return None
+    upload_headers = {k: v for k, v in HEADERS.items() if k.lower() != "content-type"}
+    with open(path, "rb") as fh:
+        files = {"files": (os.path.basename(path), fh, "image/png")}
+        resp = requests.post(BASE + "/gradio_api/upload", headers=upload_headers, files=files, timeout=30)
+    if resp.status_code != 200:
+        print(f"  [upload] {resp.status_code}: {resp.text[:200]}")
+        return None
+    body = resp.json()
+    file_path = body[0] if isinstance(body, list) else None
+    if not file_path:
+        print(f"  [upload] unexpected: {body}")
+        return None
+    return {"path": file_path, "meta": {"_type": "gradio.FileData"}, "orig_name": os.path.basename(path)}
+def call_api(prompt: str, history: list | None = None, max_tokens: int = 256,
+             preview_chars: int = 1200, image_path: str | None = None) -> dict | None:
     """Submit a single chat_fn request and stream its SSE result.
     Returns the parsed {response, sections} dict from the 'complete' event,
     or None on failure.
     """
     history_json = json.dumps(history) if history else ""
+    image_arg = upload_image(image_path) if image_path else None
+    if image_path:
+        print(f"  [vision] uploaded {image_path} -> {image_arg.get('path') if image_arg else 'FAILED'}")
     start = time.time()
     resp = requests.post(
         BASE + "/gradio_api/call/chat_fn",
         headers=HEADERS,
+        json={"data": [prompt, image_arg, 0.7, max_tokens, history_json]},
         timeout=30,
     )
     if resp.status_code != 200:
             print("  [FAIL] Model did NOT identify as MINDI")
         if "gpt" in text or "claude" in text or "gemini" in text:
             print("  [WARN] Response still mentions GPT/Claude/Gemini")
+elif VISION_MODE:
+    # Vision pipeline test — upload a tiny synthetic PNG and ask MINDI
+    # to describe it. Verifies the /gradio_api/upload + chat_fn(image=...) path.
+    print("\n=== Vision mode: image upload + describe test ===")
+    img_path = ARGS[0] if ARGS else os.path.join(tempfile.gettempdir(), "mindi_test_dot.png")
+    if not os.path.exists(img_path):
+        try:
+            from PIL import Image, ImageDraw
+            img = Image.new("RGB", (256, 256), color=(20, 20, 30))
+            d = ImageDraw.Draw(img)
+            d.rectangle((40, 40, 216, 216), outline=(120, 80, 255), width=4)
+            d.ellipse((96, 96, 160, 160), fill=(255, 200, 80))
+            img.save(img_path)
+            print(f"[vision] generated synthetic test image at {img_path}")
+        except Exception as e:
+            print(f"[vision] could not synthesize test image (need Pillow): {e}")
+            sys.exit(1)
+    prompt = ARGS[1] if len(ARGS) > 1 else "Describe this image in one sentence."
+    r = call_api(prompt, history=None, max_tokens=128, image_path=img_path)
+    if r:
+        text = (r.get("response") or "").lower()
+        # Loose checks: did the model engage with image content at all?
+        cues = ["circle", "square", "rectangle", "yellow", "purple", "ellipse", "image", "shape"]
+        hits = [c for c in cues if c in text]
+        if hits:
+            print(f"  [PASS] response mentions visual cues: {hits}")
+        else:
+            print("  [WARN] response does not seem image-aware")
 else:
     print("\n=== Step 2: API generation test ===")
     print(f"Prompt: {PROMPT!r}  |  max_tokens={MAXTOK}")

hf_space/app.py CHANGED Viewed

@@ -50,12 +50,25 @@ def parse_output(text: str) -> dict:
 _CHAT_TOKEN_PATTERN = re.compile(
     r"<\|(?:im_start|im_end|endoftext|fim_prefix|fim_middle|fim_suffix|fim_pad|repo_name|file_sep)\|>"
 )
 def clean_output(text: str) -> str:
     """Strip Qwen chat-template artifacts and any leading role prefix."""
     text = _CHAT_TOKEN_PATTERN.sub("", text)
-    text = re.sub(r"^\s*(system|user|assistant)\s*\n", "", text)
     return text.strip()

 _CHAT_TOKEN_PATTERN = re.compile(
     r"<\|(?:im_start|im_end|endoftext|fim_prefix|fim_middle|fim_suffix|fim_pad|repo_name|file_sep)\|>"
 )
+# Match a role line ONLY if it stands alone at the very start of the text
+# followed by an explicit newline. The previous '\s*' wildcard could swallow
+# leading content when the model emitted weird sequences in the vision path.
+_ROLE_PREFIX_PATTERN = re.compile(r"^(?:system|user|assistant)\n")
 def clean_output(text: str) -> str:
     """Strip Qwen chat-template artifacts and any leading role prefix."""
+    if os.environ.get("MINDI_DEBUG_RAW") == "1":
+        print(f"[clean_output] RAW ({len(text)} chars): {text!r}")
     text = _CHAT_TOKEN_PATTERN.sub("", text)
+    # Apply role-prefix strip up to twice: handles the vision-path case where
+    # the model occasionally emits 'assistant\n' followed by stray noise like
+    # an extra 'user\n' before the real reply.
+    for _ in range(2):
+        new_text = _ROLE_PREFIX_PATTERN.sub("", text, count=1)
+        if new_text == text:
+            break
+        text = new_text
     return text.strip()