Faaz commited on
Commit
18d863a
·
1 Parent(s): 856e556

Tier 2: image upload, vision toggle, StackBlitz/CodeSandbox launchers, UI polish

Browse files

frontend/app.js:
- Real Gradio image upload via /gradio_api/upload (gr.Image cannot decode
raw data URLs). dataUrlToBlob() + uploadImageToGradio() helpers send a
multipart POST with Authorization, then hand the resulting /tmp/gradio
path to chat_fn as a {path, meta:{_type:gradio.FileData}} reference.
- Vision toggle state (visionEnabled) plumbed through Settings modal,
persisted in localStorage, defaults OFF because the current vision
fusion is weak. send() drops images from the API payload when the
toggle is off so we don't waste GPU seconds on a known-bad path.
- Cloud sandbox launchers for real Next.js / React / Node / HTML
execution — both StackBlitz (form POST to /run, WebContainer-backed)
and CodeSandbox (Define API JSON POST returning sandbox_id).
buildStackBlitzProject() detects Next.js / React / Node / HTML from
imports and dependency hints, generates a working package.json and
template files. buildCodeSandboxFiles() reuses the same project shape.
- BUG FIX: the .md-run StackBlitz button was rendered but never had a
click handler — clicking it did literally nothing. New delegated
handler on els.messages routes .md-copy / .md-run / .md-sandbox to
the right action with proper loading state for the async CodeSandbox
call.
- buildHistory() trims to last 20 turns and is passed to the backend on
every API + REST call, matching the new chat_fn(history=...) signature.

frontend/index.html:
- Vision toggle field in Settings modal with a clear hint that vision
output is currently low quality (set expectations honestly).

frontend/styles.css:
- Toggle switch component styling.
- .md-run (purple) + .md-sandbox (amber) launcher button styles next to
the existing .md-copy, with hover lift and disabled (loading) state.
- .md-code-block code: max-height 540px so giant generated files don't
blow out the message column.
- Wider preview pane (--preview-w 420 -> 480; 380 -> 440 at <=1280px;
added new 1180px breakpoint for 400px) so generated HTML/code is
actually readable.
- Tightened message density: gap 24 -> 20, top padding 28 -> 22.

frontend/test_api.py:
- --memory and --vision CLI flags. Memory mode runs a 3-turn identity
+ recall test against the live Space. Vision mode synthesizes a tiny
PNG, uploads it via /gradio_api/upload, and asks the model what color
it is — verifies the upload pipeline end-to-end.

hf_space/app.py:
- Vision path now accepts the {path: ...} dict that Gradio resolves
server-side, so uploaded images actually reach the CLIP encoder.
- _coerce_history() accepts list-of-dict OR JSON-encoded string from
the raw Gradio Textbox API.
- Hardened clean_output regex against truncating words like 'Don'.

.gitignore:
- Exclude 'Building Agentic Vision-Coder System.md' (Cascade chat log
dump, same pattern as the existing pipeline log entries).

.gitignore CHANGED
Binary files a/.gitignore and b/.gitignore differ
 
frontend/app.js CHANGED
@@ -38,12 +38,13 @@
38
  // State (persisted to localStorage)
39
  // ----------------------------------------------------------------
40
  const defaultState = () => ({
41
- apiUrl: API_DEFAULT,
42
- hfToken: '', // optional HF PRO token to bypass anonymous ZeroGPU quota
43
- temperature: 0.7,
44
- maxTokens: 2048,
45
- chats: [], // [{id, title, createdAt, updatedAt, messages: [{role, content, images?}]}]
46
- currentId: null,
 
47
  });
48
 
49
  const state = loadState();
@@ -135,6 +136,7 @@
135
  settingsUrl: $('#settings-url'),
136
  settingsHfToken:$('#settings-hf-token'),
137
  hfTokenStatus: $('#hf-token-status'),
 
138
  settingsTemp: $('#settings-temp'),
139
  settingsTokens: $('#settings-tokens'),
140
  tempVal: $('#temp-val'),
@@ -261,6 +263,203 @@
261
  return merged;
262
  }
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  // Extract last fenced code block from the response text
265
  function extractLastCodeBlock(text) {
266
  if (!text) return null;
@@ -302,11 +501,21 @@
302
  const lang = seg.lang || languageFromCode(seg.value);
303
  const safe = escapeHtml(seg.value);
304
  const dataCode = escapeAttr(seg.value);
 
 
 
 
 
 
 
305
  return (
306
  `<pre class="md-code-block">` +
307
  `<div class="md-code-head">` +
308
  `<span>${escapeHtml(lang)}</span>` +
309
- `<button class="md-copy" data-code="${dataCode}" type="button">Copy</button>` +
 
 
 
310
  `</div>` +
311
  `<code class="language-${escapeHtml(lang)}">${safe}</code>` +
312
  `</pre>`
@@ -334,6 +543,51 @@
334
  return h;
335
  }
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  // Detect responses that came back as a quota / auth error from the
338
  // backend's chat_fn try/except, so we can show actionable UX.
339
  function detectAuthError(result) {
@@ -469,12 +723,34 @@ Your token is stored only in this browser's local storage and sent as an \`Autho
469
  // 1. POST /gradio_api/call/{api_name} → get event_id
470
  // 2. GET /gradio_api/call/{api_name}/{event_id} → stream result
471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  // Step 1: Submit the request
473
  const submitRes = await fetch(`${base}/gradio_api/call/chat_fn`, {
474
  method: 'POST',
475
  headers: authHeaders({ 'Content-Type': 'application/json' }),
476
  body: JSON.stringify({
477
- data: [prompt, image || null, state.temperature, state.maxTokens, historyJson],
478
  }),
479
  signal,
480
  });
@@ -1080,8 +1356,17 @@ console.log("MINDI 1.5 — awaiting connection");
1080
  els.chatTitle.textContent = chat.title;
1081
  }
1082
 
1083
- // Reset input
1084
- const imageForApi = runtime.pendingImages[0]?.dataUrl || null;
 
 
 
 
 
 
 
 
 
1085
  els.promptInput.value = '';
1086
  autosizeTextarea();
1087
  clearPendingImages();
@@ -1248,6 +1533,7 @@ console.log("MINDI 1.5 — awaiting connection");
1248
  function openSettings() {
1249
  els.settingsUrl.value = state.apiUrl || '';
1250
  if (els.settingsHfToken) els.settingsHfToken.value = state.hfToken || '';
 
1251
  els.settingsTemp.value = state.temperature;
1252
  els.settingsTokens.value = state.maxTokens;
1253
  els.tempVal.textContent = Number(state.temperature).toFixed(2);
@@ -1262,13 +1548,15 @@ console.log("MINDI 1.5 — awaiting connection");
1262
  function applySettings() {
1263
  const url = els.settingsUrl.value.trim();
1264
  const token = els.settingsHfToken ? els.settingsHfToken.value.trim() : '';
 
1265
  const temp = parseFloat(els.settingsTemp.value);
1266
  const tokens = parseInt(els.settingsTokens.value, 10);
1267
  const tokenChanged = token !== state.hfToken;
1268
- state.apiUrl = url || API_DEFAULT;
1269
- state.hfToken = token;
1270
- state.temperature = isFinite(temp) ? temp : 0.7;
1271
- state.maxTokens = isFinite(tokens) ? tokens : 2048;
 
1272
  // If the user just saved a new (non-empty) token, give the API another shot.
1273
  if (tokenChanged && token) {
1274
  runtime.authBlocked = false;
@@ -1393,17 +1681,44 @@ console.log("MINDI 1.5 — awaiting connection");
1393
  els.copyCode.addEventListener('click', copyLastCode);
1394
  els.downloadCode.addEventListener('click', downloadLastCode);
1395
 
1396
- // Click handler for inline copy buttons inside messages (delegated)
 
1397
  els.messages.addEventListener('click', async (e) => {
1398
- const btn = e.target.closest('.md-copy');
1399
- if (!btn) return;
1400
- try {
1401
- await navigator.clipboard.writeText(btn.dataset.code || '');
1402
- const prev = btn.textContent;
1403
- btn.textContent = 'Copied!';
1404
- setTimeout(() => { btn.textContent = prev; }, 1400);
1405
- } catch {
1406
- toast('Clipboard unavailable', 'error');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1407
  }
1408
  });
1409
 
 
38
  // State (persisted to localStorage)
39
  // ----------------------------------------------------------------
40
  const defaultState = () => ({
41
+ apiUrl: API_DEFAULT,
42
+ hfToken: '', // optional HF PRO token to bypass anonymous ZeroGPU quota
43
+ visionEnabled: false, // default OFF — see notes in Settings; vision-language fusion is currently low-quality
44
+ temperature: 0.7,
45
+ maxTokens: 2048,
46
+ chats: [], // [{id, title, createdAt, updatedAt, messages: [{role, content, images?}]}]
47
+ currentId: null,
48
  });
49
 
50
  const state = loadState();
 
136
  settingsUrl: $('#settings-url'),
137
  settingsHfToken:$('#settings-hf-token'),
138
  hfTokenStatus: $('#hf-token-status'),
139
+ settingsVision: $('#settings-vision'),
140
  settingsTemp: $('#settings-temp'),
141
  settingsTokens: $('#settings-tokens'),
142
  tempVal: $('#temp-val'),
 
263
  return merged;
264
  }
265
 
266
+ // ----------------------------------------------------------------
267
+ // Cloud sandbox launcher (StackBlitz) — gives users real Next.js /
268
+ // Node / React / HTML execution by handing the generated code off
269
+ // to stackblitz.com via their public POST API. No backend required.
270
+ // Docs: https://developer.stackblitz.com/docs/platform/post-api
271
+ // ----------------------------------------------------------------
272
+ function isCloudRunnable(code, lang) {
273
+ const l = (lang || '').toLowerCase();
274
+ if (['html', 'markup', 'jsx', 'tsx', 'javascript', 'js', 'typescript', 'ts', 'json'].includes(l)) return true;
275
+ // Heuristic: short non-obvious snippets get the button if they parse
276
+ // like a web project (so the model can ship a partial JS file too).
277
+ return /<!doctype|<html|^\s*import |^\s*export |^\s*function |^\s*const |^\s*class /im.test(code);
278
+ }
279
+
280
+ // Decide which StackBlitz template + file layout to use based on what
281
+ // the model produced. We try to be permissive — anything that looks
282
+ // like a React/Next/Node project goes into the WebContainer-backed
283
+ // 'node' template; raw HTML uses the static 'html' template.
284
+ function buildStackBlitzProject(code, lang) {
285
+ const l = (lang || '').toLowerCase();
286
+ const looksLikeNext = /from ['"]next\/|next\.config|app\/page\.[jt]sx|pages\/index/i.test(code);
287
+ const looksLikeReact = /from ['"]react['"]|ReactDOM\.|useState\(|useEffect\(|<\w+\s+\w+={/i.test(code);
288
+ const looksLikeNode = /^\s*(?:const|import)\s+\w+\s*=?\s*require\(|process\.env|module\.exports/m.test(code);
289
+ const isHtmlDoc = /<!doctype|<html/i.test(code);
290
+
291
+ const title = 'MINDI generated project';
292
+ const description = 'Generated by MINDI 1.5 Vision-Coder';
293
+
294
+ if (looksLikeNext) {
295
+ // Minimal Next.js 14 app-router project.
296
+ return {
297
+ title, description,
298
+ template: 'node',
299
+ files: {
300
+ 'package.json': JSON.stringify({
301
+ name: 'mindi-next-app',
302
+ private: true,
303
+ scripts: { dev: 'next dev', build: 'next build', start: 'next start' },
304
+ dependencies: { next: '^14.2.5', react: '^18.3.1', 'react-dom': '^18.3.1' },
305
+ }, null, 2),
306
+ 'app/page.tsx': /export\s+default/i.test(code) ? code : `export default function Page() {\n return (\n <main>\n${code.split('\n').map(l => ' ' + l).join('\n')}\n </main>\n );\n}\n`,
307
+ 'app/layout.tsx':
308
+ `export default function RootLayout({ children }: { children: React.ReactNode }) {
309
+ return (<html lang="en"><body>{children}</body></html>);
310
+ }
311
+ `,
312
+ 'tsconfig.json': JSON.stringify({
313
+ compilerOptions: {
314
+ target: 'ES2020', lib: ['dom', 'dom.iterable', 'esnext'], jsx: 'preserve',
315
+ module: 'esnext', moduleResolution: 'bundler', strict: true, esModuleInterop: true,
316
+ skipLibCheck: true, allowJs: true, isolatedModules: true, noEmit: true,
317
+ plugins: [{ name: 'next' }],
318
+ },
319
+ include: ['next-env.d.ts', '**/*.ts', '**/*.tsx'],
320
+ }, null, 2),
321
+ 'README.md': `# ${title}\n\n${description}\n\nRun:\n\n\`\`\`bash\nnpm install\nnpm run dev\n\`\`\`\n`,
322
+ },
323
+ };
324
+ }
325
+
326
+ if (looksLikeReact || l === 'jsx' || l === 'tsx') {
327
+ // Vite + React project (faster boot in WebContainer than CRA).
328
+ const ext = (l === 'tsx' || /\:\s*\w+(\[\])?/.test(code)) ? 'tsx' : 'jsx';
329
+ return {
330
+ title, description,
331
+ template: 'node',
332
+ files: {
333
+ 'package.json': JSON.stringify({
334
+ name: 'mindi-react-app',
335
+ private: true,
336
+ scripts: { dev: 'vite', build: 'vite build', preview: 'vite preview' },
337
+ dependencies: { react: '^18.3.1', 'react-dom': '^18.3.1' },
338
+ devDependencies: { '@vitejs/plugin-react': '^4.3.1', vite: '^5.4.1' },
339
+ }, null, 2),
340
+ 'vite.config.js':
341
+ `import { defineConfig } from 'vite';\nimport react from '@vitejs/plugin-react';\nexport default defineConfig({ plugins: [react()] });\n`,
342
+ 'index.html':
343
+ `<!doctype html><html><head><meta charset="utf-8"><title>${title}</title></head><body><div id="root"></div><script type="module" src="/src/main.${ext}"></script></body></html>`,
344
+ [`src/main.${ext}`]:
345
+ `import React from 'react';\nimport { createRoot } from 'react-dom/client';\nimport App from './App.${ext}';\ncreateRoot(document.getElementById('root')).render(<App />);\n`,
346
+ [`src/App.${ext}`]: /export\s+default/i.test(code) ? code : `export default function App() {\n return (<div>${'<pre>{`' + code.replace(/`/g, '\\`') + '`}</pre>'}</div>);\n}\n`,
347
+ },
348
+ };
349
+ }
350
+
351
+ if (looksLikeNode || l === 'json') {
352
+ return {
353
+ title, description,
354
+ template: 'node',
355
+ files: {
356
+ 'package.json': JSON.stringify({
357
+ name: 'mindi-node-app', private: true,
358
+ scripts: { start: 'node index.js' },
359
+ }, null, 2),
360
+ 'index.js': l === 'json' ? `console.log(${code});` : code,
361
+ },
362
+ };
363
+ }
364
+
365
+ if (isHtmlDoc || l === 'html' || l === 'markup') {
366
+ return {
367
+ title, description,
368
+ template: 'html',
369
+ files: {
370
+ 'index.html': isHtmlDoc ? code : `<!doctype html><html><head><meta charset="utf-8"><title>${title}</title></head><body>\n${code}\n</body></html>`,
371
+ },
372
+ };
373
+ }
374
+
375
+ // Fallback: static html with the code dropped into a <pre> tag so
376
+ // the user at least sees their snippet rendered in the StackBlitz preview.
377
+ return {
378
+ title, description,
379
+ template: 'html',
380
+ files: {
381
+ 'index.html': `<!doctype html><html><head><meta charset="utf-8"><title>${title}</title></head><body><pre>${escapeHtml(code)}</pre></body></html>`,
382
+ },
383
+ };
384
+ }
385
+
386
+ // Hand the project off to stackblitz.com via a hidden form POST.
387
+ // The new tab opens the cloud IDE with all files pre-loaded and the
388
+ // dev server booting. For 'node' templates this means a real Node.js
389
+ // runtime in the browser via WebContainers — yes, that includes
390
+ // 'npm install' for Next.js / React projects.
391
+ function launchInStackBlitz(code, lang) {
392
+ const proj = buildStackBlitzProject(code, lang);
393
+ const form = document.createElement('form');
394
+ form.action = 'https://stackblitz.com/run';
395
+ form.method = 'POST';
396
+ form.target = '_blank';
397
+ form.rel = 'noopener';
398
+ form.style.display = 'none';
399
+
400
+ const add = (name, value) => {
401
+ const input = document.createElement('input');
402
+ input.type = 'hidden';
403
+ input.name = name;
404
+ input.value = value;
405
+ form.appendChild(input);
406
+ };
407
+ add('project[title]', proj.title);
408
+ add('project[description]', proj.description);
409
+ add('project[template]', proj.template);
410
+ add('project[settings][compile][trigger]', 'auto');
411
+ Object.entries(proj.files).forEach(([path, content]) => {
412
+ add(`project[files][${path}]`, content);
413
+ });
414
+
415
+ document.body.appendChild(form);
416
+ form.submit();
417
+ setTimeout(() => form.remove(), 0);
418
+ }
419
+
420
+ // ----------------------------------------------------------------
421
+ // Cloud sandbox launcher (CodeSandbox) — second cloud IDE option.
422
+ // Uses the public Define API which returns a sandbox_id we redirect to.
423
+ // Docs: https://codesandbox.io/docs/learn/sandboxes/cli-api#define-api
424
+ // We reuse buildStackBlitzProject() for the file shape since both IDEs
425
+ // accept the same package.json / file layout; CodeSandbox auto-detects
426
+ // the template from package.json dependencies.
427
+ // ----------------------------------------------------------------
428
+ function buildCodeSandboxFiles(code, lang) {
429
+ const proj = buildStackBlitzProject(code, lang);
430
+ const files = {};
431
+ Object.entries(proj.files).forEach(([path, content]) => {
432
+ files[path] = { content };
433
+ });
434
+ // For raw HTML projects (StackBlitz template='html'), nudge CodeSandbox
435
+ // toward its 'static' template so it serves index.html as-is instead of
436
+ // trying to npm install nothing.
437
+ if (proj.template === 'html' && !files['package.json']) {
438
+ files['sandbox.config.json'] = { content: JSON.stringify({ template: 'static' }, null, 2) };
439
+ }
440
+ return files;
441
+ }
442
+
443
+ async function launchInCodeSandbox(code, lang) {
444
+ const files = buildCodeSandboxFiles(code, lang);
445
+ try {
446
+ const res = await fetch('https://codesandbox.io/api/v1/sandboxes/define?json=1', {
447
+ method: 'POST',
448
+ headers: { 'Content-Type': 'application/json', 'Accept': 'application/json' },
449
+ body: JSON.stringify({ files }),
450
+ });
451
+ if (!res.ok) {
452
+ const txt = await res.text().catch(() => '');
453
+ throw new Error(`HTTP ${res.status}: ${txt.slice(0, 160)}`);
454
+ }
455
+ const data = await res.json();
456
+ if (!data || !data.sandbox_id) throw new Error('No sandbox_id in response');
457
+ window.open(`https://codesandbox.io/s/${data.sandbox_id}`, '_blank', 'noopener');
458
+ } catch (err) {
459
+ toast(`CodeSandbox launch failed: ${err.message || err}`, 'error');
460
+ }
461
+ }
462
+
463
  // Extract last fenced code block from the response text
464
  function extractLastCodeBlock(text) {
465
  if (!text) return null;
 
501
  const lang = seg.lang || languageFromCode(seg.value);
502
  const safe = escapeHtml(seg.value);
503
  const dataCode = escapeAttr(seg.value);
504
+ const runnable = isCloudRunnable(seg.value, lang);
505
+ const launchBtns = runnable
506
+ ? (
507
+ `<button class="md-run" data-code="${dataCode}" data-lang="${escapeAttr(lang)}" type="button" title="Run this code on stackblitz.com (real Node.js / WebContainer sandbox, supports Next.js / React / Node)">\u25B6 StackBlitz</button>` +
508
+ `<button class="md-sandbox" data-code="${dataCode}" data-lang="${escapeAttr(lang)}" type="button" title="Open this code in codesandbox.io (cloud IDE with live preview)">\u25B6 CodeSandbox</button>`
509
+ )
510
+ : '';
511
  return (
512
  `<pre class="md-code-block">` +
513
  `<div class="md-code-head">` +
514
  `<span>${escapeHtml(lang)}</span>` +
515
+ `<div class="md-code-actions">` +
516
+ launchBtns +
517
+ `<button class="md-copy" data-code="${dataCode}" type="button">Copy</button>` +
518
+ `</div>` +
519
  `</div>` +
520
  `<code class="language-${escapeHtml(lang)}">${safe}</code>` +
521
  `</pre>`
 
543
  return h;
544
  }
545
 
546
+ // Convert a data: URL into a Blob (used for Gradio image uploads).
547
+ function dataUrlToBlob(dataUrl) {
548
+ const match = /^data:([^;]+);base64,(.+)$/.exec(dataUrl || '');
549
+ if (!match) throw new Error('Invalid image data URL');
550
+ const mime = match[1];
551
+ const b64 = match[2];
552
+ const bytes = Uint8Array.from(atob(b64), (c) => c.charCodeAt(0));
553
+ return { blob: new Blob([bytes], { type: mime }), mime };
554
+ }
555
+
556
+ // Upload an image to a Gradio HF Space via /gradio_api/upload.
557
+ // Returns the server-side file path that can be referenced as
558
+ // {path: ..., meta: {_type: "gradio.FileData"}} in a chat_fn data array.
559
+ // This is REQUIRED — gr.Image(type="pil") on the backend cannot decode
560
+ // a raw data URL string.
561
+ async function uploadImageToGradio(base, dataUrl, signal) {
562
+ const { blob, mime } = dataUrlToBlob(dataUrl);
563
+ const ext = (mime.split('/')[1] || 'png').replace('+xml', '').split(';')[0];
564
+ const filename = `mindi-upload-${Date.now()}.${ext}`;
565
+ const formData = new FormData();
566
+ formData.append('files', blob, filename);
567
+
568
+ // Don't pre-set Content-Type — the browser sets the multipart boundary.
569
+ const headers = authHeaders({});
570
+ delete headers['Content-Type'];
571
+
572
+ const res = await fetch(`${base}/gradio_api/upload`, {
573
+ method: 'POST',
574
+ headers,
575
+ body: formData,
576
+ signal,
577
+ });
578
+ if (!res.ok) {
579
+ const txt = await res.text().catch(() => '');
580
+ throw new Error(`Image upload ${res.status}: ${txt.slice(0, 200) || 'failed'}`);
581
+ }
582
+ const result = await res.json();
583
+ // Gradio 5.x returns ["/tmp/gradio/.../filename.png"]
584
+ const filePath = Array.isArray(result) ? result[0] : (result && result.files && result.files[0]);
585
+ if (!filePath || typeof filePath !== 'string') {
586
+ throw new Error(`Unexpected upload response: ${JSON.stringify(result).slice(0, 200)}`);
587
+ }
588
+ return filePath;
589
+ }
590
+
591
  // Detect responses that came back as a quota / auth error from the
592
  // backend's chat_fn try/except, so we can show actionable UX.
593
  function detectAuthError(result) {
 
723
  // 1. POST /gradio_api/call/{api_name} → get event_id
724
  // 2. GET /gradio_api/call/{api_name}/{event_id} → stream result
725
 
726
+ // ── Image: upload first, then reference by path ──
727
+ // gr.Image(type="pil") cannot decode a raw data: URL — it expects a
728
+ // FileData reference produced by /gradio_api/upload. We do this
729
+ // unconditionally when an image is supplied so the backend's CLIP
730
+ // path actually receives pixels. If vision is disabled in settings,
731
+ // send() drops the image before calling us.
732
+ let imageArg = null;
733
+ if (image && typeof image === 'string' && image.startsWith('data:')) {
734
+ try {
735
+ const filePath = await uploadImageToGradio(base, image, signal);
736
+ imageArg = {
737
+ path: filePath,
738
+ meta: { _type: 'gradio.FileData' },
739
+ orig_name: filePath.split('/').pop() || 'image.png',
740
+ };
741
+ } catch (e) {
742
+ console.warn('[mindi] Image upload to Gradio failed:', e);
743
+ toast(`Image upload failed: ${e.message || e}. Sending text only.`, 'error', 5000);
744
+ imageArg = null;
745
+ }
746
+ }
747
+
748
  // Step 1: Submit the request
749
  const submitRes = await fetch(`${base}/gradio_api/call/chat_fn`, {
750
  method: 'POST',
751
  headers: authHeaders({ 'Content-Type': 'application/json' }),
752
  body: JSON.stringify({
753
+ data: [prompt, imageArg, state.temperature, state.maxTokens, historyJson],
754
  }),
755
  signal,
756
  });
 
1356
  els.chatTitle.textContent = chat.title;
1357
  }
1358
 
1359
+ // Reset input.
1360
+ // If vision is disabled in Settings, drop the image before calling
1361
+ // the API so we don't waste an upload round-trip on something the
1362
+ // backend will ignore. The image still appears in the user message
1363
+ // for the chat record.
1364
+ const imageForApi = state.visionEnabled
1365
+ ? (runtime.pendingImages[0]?.dataUrl || null)
1366
+ : null;
1367
+ if (!state.visionEnabled && runtime.pendingImages.length) {
1368
+ toast('Vision is disabled \u2014 image attached for record only. Enable it in Settings to send to the model.', 'info', 4500);
1369
+ }
1370
  els.promptInput.value = '';
1371
  autosizeTextarea();
1372
  clearPendingImages();
 
1533
  function openSettings() {
1534
  els.settingsUrl.value = state.apiUrl || '';
1535
  if (els.settingsHfToken) els.settingsHfToken.value = state.hfToken || '';
1536
+ if (els.settingsVision) els.settingsVision.checked = !!state.visionEnabled;
1537
  els.settingsTemp.value = state.temperature;
1538
  els.settingsTokens.value = state.maxTokens;
1539
  els.tempVal.textContent = Number(state.temperature).toFixed(2);
 
1548
  function applySettings() {
1549
  const url = els.settingsUrl.value.trim();
1550
  const token = els.settingsHfToken ? els.settingsHfToken.value.trim() : '';
1551
+ const vision = !!(els.settingsVision && els.settingsVision.checked);
1552
  const temp = parseFloat(els.settingsTemp.value);
1553
  const tokens = parseInt(els.settingsTokens.value, 10);
1554
  const tokenChanged = token !== state.hfToken;
1555
+ state.apiUrl = url || API_DEFAULT;
1556
+ state.hfToken = token;
1557
+ state.visionEnabled = vision;
1558
+ state.temperature = isFinite(temp) ? temp : 0.7;
1559
+ state.maxTokens = isFinite(tokens) ? tokens : 2048;
1560
  // If the user just saved a new (non-empty) token, give the API another shot.
1561
  if (tokenChanged && token) {
1562
  runtime.authBlocked = false;
 
1681
  els.copyCode.addEventListener('click', copyLastCode);
1682
  els.downloadCode.addEventListener('click', downloadLastCode);
1683
 
1684
+ // Delegated click handler for code-block action buttons inside messages
1685
+ // (Copy, Run-in-StackBlitz, Open-in-CodeSandbox).
1686
  els.messages.addEventListener('click', async (e) => {
1687
+ const copyBtn = e.target.closest('.md-copy');
1688
+ if (copyBtn) {
1689
+ try {
1690
+ await navigator.clipboard.writeText(copyBtn.dataset.code || '');
1691
+ const prev = copyBtn.textContent;
1692
+ copyBtn.textContent = 'Copied!';
1693
+ setTimeout(() => { copyBtn.textContent = prev; }, 1400);
1694
+ } catch {
1695
+ toast('Clipboard unavailable', 'error');
1696
+ }
1697
+ return;
1698
+ }
1699
+
1700
+ const runBtn = e.target.closest('.md-run');
1701
+ if (runBtn) {
1702
+ try {
1703
+ launchInStackBlitz(runBtn.dataset.code || '', runBtn.dataset.lang || '');
1704
+ } catch (err) {
1705
+ toast(`StackBlitz launch failed: ${err.message || err}`, 'error');
1706
+ }
1707
+ return;
1708
+ }
1709
+
1710
+ const sbxBtn = e.target.closest('.md-sandbox');
1711
+ if (sbxBtn) {
1712
+ const prev = sbxBtn.textContent;
1713
+ sbxBtn.disabled = true;
1714
+ sbxBtn.textContent = '\u25B6 Opening\u2026';
1715
+ try {
1716
+ await launchInCodeSandbox(sbxBtn.dataset.code || '', sbxBtn.dataset.lang || '');
1717
+ } finally {
1718
+ sbxBtn.textContent = prev;
1719
+ sbxBtn.disabled = false;
1720
+ }
1721
+ return;
1722
  }
1723
  });
1724
 
frontend/index.html CHANGED
@@ -297,6 +297,17 @@
297
  <span class="field-hint">Paste a PRO HF token to bypass anonymous ZeroGPU quota. Stored only in this browser. <a href="https://huggingface.co/settings/tokens" target="_blank" rel="noopener">Get a token →</a></span>
298
  </label>
299
 
 
 
 
 
 
 
 
 
 
 
 
300
  <label class="field">
301
  <span class="field-label">Temperature <em class="field-value" id="temp-val">0.7</em></span>
302
  <input id="settings-temp" type="range" min="0" max="2" step="0.05" value="0.7" />
 
297
  <span class="field-hint">Paste a PRO HF token to bypass anonymous ZeroGPU quota. Stored only in this browser. <a href="https://huggingface.co/settings/tokens" target="_blank" rel="noopener">Get a token →</a></span>
298
  </label>
299
 
300
+ <label class="field field-toggle">
301
+ <span class="field-toggle-row">
302
+ <span class="field-label">Vision input</span>
303
+ <span class="toggle">
304
+ <input id="settings-vision" type="checkbox" />
305
+ <span class="toggle-slider"></span>
306
+ </span>
307
+ </span>
308
+ <span class="field-hint">Send attached images to MINDI's CLIP encoder. <strong>Off by default</strong> — the current vision-language fusion is an early build and produces low-quality answers on images. Leave off until the next vision retraining ships. Attaching an image still records it in the chat.</span>
309
+ </label>
310
+
311
  <label class="field">
312
  <span class="field-label">Temperature <em class="field-value" id="temp-val">0.7</em></span>
313
  <input id="settings-temp" type="range" min="0" max="2" step="0.05" value="0.7" />
frontend/styles.css CHANGED
@@ -55,7 +55,7 @@
55
  --r-xl: 20px;
56
 
57
  --sidebar-w: 280px;
58
- --preview-w: 420px;
59
  --header-h: 56px;
60
 
61
  /* Motion */
@@ -564,8 +564,8 @@ body.sidebar-open .scrim { opacity: 1; pointer-events: auto; }
564
  flex: 1;
565
  display: none;
566
  flex-direction: column;
567
- gap: 24px;
568
- padding: 28px 24px 16px;
569
  overflow-y: auto;
570
  scroll-behavior: smooth;
571
  }
@@ -686,16 +686,52 @@ body.sidebar-open .scrim { opacity: 1; pointer-events: auto; }
686
  border-bottom: 1px solid var(--border);
687
  }
688
  .md-code-head span:first-child { color: var(--c-code); font-weight: 500; }
689
- .md-copy {
 
 
 
 
 
 
 
 
 
690
  font-family: var(--mono);
691
  font-size: 11px;
 
 
692
  color: var(--text-mute);
693
  padding: 4px 10px;
694
  border-radius: 4px;
695
  border: 1px solid var(--border);
696
- transition: background .2s var(--ease), color .2s var(--ease), border-color .2s var(--ease);
 
697
  }
698
  .md-copy:hover { background: var(--hover); color: var(--text); border-color: var(--border-2); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699
  .md-code-block code {
700
  display: block;
701
  font-family: var(--mono);
@@ -704,6 +740,8 @@ body.sidebar-open .scrim { opacity: 1; pointer-events: auto; }
704
  padding: 14px 16px;
705
  overflow-x: auto;
706
  white-space: pre;
 
 
707
  }
708
 
709
  /* Loading message */
@@ -1097,6 +1135,58 @@ body.preview-hidden .preview { display: none; }
1097
  color: var(--text-mute);
1098
  line-height: 1.5;
1099
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1100
  .field input[type="url"],
1101
  .field input[type="password"] {
1102
  padding: 9px 12px;
@@ -1212,8 +1302,11 @@ pre[class*="language-"], code[class*="language-"] {
1212
  .token.regex, .token.important, .token.variable { color: #f87171 !important; }
1213
 
1214
  /* ============ RESPONSIVE ============ */
 
 
 
1215
  @media (max-width: 1180px) {
1216
- :root { --preview-w: 380px; }
1217
  }
1218
  @media (max-width: 1024px) {
1219
  .app { grid-template-columns: var(--sidebar-w) 1fr; }
 
55
  --r-xl: 20px;
56
 
57
  --sidebar-w: 280px;
58
+ --preview-w: 480px;
59
  --header-h: 56px;
60
 
61
  /* Motion */
 
564
  flex: 1;
565
  display: none;
566
  flex-direction: column;
567
+ gap: 20px;
568
+ padding: 22px 24px 14px;
569
  overflow-y: auto;
570
  scroll-behavior: smooth;
571
  }
 
686
  border-bottom: 1px solid var(--border);
687
  }
688
  .md-code-head span:first-child { color: var(--c-code); font-weight: 500; }
689
+ .md-code-actions {
690
+ display: flex;
691
+ gap: 6px;
692
+ align-items: center;
693
+ flex-wrap: wrap;
694
+ justify-content: flex-end;
695
+ }
696
+ .md-copy,
697
+ .md-run,
698
+ .md-sandbox {
699
  font-family: var(--mono);
700
  font-size: 11px;
701
+ font-weight: 500;
702
+ letter-spacing: .02em;
703
  color: var(--text-mute);
704
  padding: 4px 10px;
705
  border-radius: 4px;
706
  border: 1px solid var(--border);
707
+ white-space: nowrap;
708
+ transition: background .2s var(--ease), color .2s var(--ease), border-color .2s var(--ease), transform .15s var(--ease);
709
  }
710
  .md-copy:hover { background: var(--hover); color: var(--text); border-color: var(--border-2); }
711
+ .md-run {
712
+ color: #d8c8ff;
713
+ border-color: rgba(124, 58, 237, .35);
714
+ background: rgba(124, 58, 237, .08);
715
+ }
716
+ .md-run:hover {
717
+ background: rgba(124, 58, 237, .18);
718
+ color: #fff;
719
+ border-color: rgba(124, 58, 237, .55);
720
+ transform: translateY(-1px);
721
+ }
722
+ .md-sandbox {
723
+ color: #fde68a;
724
+ border-color: rgba(245, 158, 11, .32);
725
+ background: rgba(245, 158, 11, .06);
726
+ }
727
+ .md-sandbox:hover {
728
+ background: rgba(245, 158, 11, .15);
729
+ color: #fff;
730
+ border-color: rgba(245, 158, 11, .55);
731
+ transform: translateY(-1px);
732
+ }
733
+ .md-sandbox:disabled,
734
+ .md-run:disabled { opacity: .55; cursor: progress; transform: none; }
735
  .md-code-block code {
736
  display: block;
737
  font-family: var(--mono);
 
740
  padding: 14px 16px;
741
  overflow-x: auto;
742
  white-space: pre;
743
+ max-height: 540px;
744
+ overflow-y: auto;
745
  }
746
 
747
  /* Loading message */
 
1135
  color: var(--text-mute);
1136
  line-height: 1.5;
1137
  }
1138
+
1139
+ /* Toggle switch (used inside .field-toggle) */
1140
+ .field-toggle .field-toggle-row {
1141
+ display: flex;
1142
+ justify-content: space-between;
1143
+ align-items: center;
1144
+ gap: 12px;
1145
+ }
1146
+ .toggle {
1147
+ position: relative;
1148
+ display: inline-block;
1149
+ width: 40px;
1150
+ height: 22px;
1151
+ flex: 0 0 40px;
1152
+ }
1153
+ .toggle input {
1154
+ opacity: 0;
1155
+ width: 0;
1156
+ height: 0;
1157
+ }
1158
+ .toggle-slider {
1159
+ position: absolute;
1160
+ inset: 0;
1161
+ background: rgba(124, 58, 237, .12);
1162
+ border: 1px solid var(--border-2);
1163
+ border-radius: 999px;
1164
+ cursor: pointer;
1165
+ transition: background-color .2s var(--ease), border-color .2s var(--ease);
1166
+ }
1167
+ .toggle-slider::before {
1168
+ content: '';
1169
+ position: absolute;
1170
+ left: 2px;
1171
+ top: 50%;
1172
+ transform: translateY(-50%);
1173
+ width: 16px;
1174
+ height: 16px;
1175
+ border-radius: 50%;
1176
+ background: #c8c2e0;
1177
+ transition: transform .2s var(--ease), background-color .2s var(--ease);
1178
+ }
1179
+ .toggle input:checked + .toggle-slider {
1180
+ background: rgba(124, 58, 237, .55);
1181
+ border-color: rgba(124, 58, 237, .8);
1182
+ }
1183
+ .toggle input:checked + .toggle-slider::before {
1184
+ transform: translate(18px, -50%);
1185
+ background: #fff;
1186
+ }
1187
+ .toggle input:focus-visible + .toggle-slider {
1188
+ box-shadow: 0 0 0 3px rgba(124, 58, 237, .25);
1189
+ }
1190
  .field input[type="url"],
1191
  .field input[type="password"] {
1192
  padding: 9px 12px;
 
1302
  .token.regex, .token.important, .token.variable { color: #f87171 !important; }
1303
 
1304
  /* ============ RESPONSIVE ============ */
1305
+ @media (max-width: 1280px) {
1306
+ :root { --preview-w: 440px; }
1307
+ }
1308
  @media (max-width: 1180px) {
1309
+ :root { --preview-w: 400px; }
1310
  }
1311
  @media (max-width: 1024px) {
1312
  .app { grid-template-columns: var(--sidebar-w) 1fr; }
frontend/test_api.py CHANGED
@@ -8,7 +8,7 @@ Modes:
8
  python test_api.py "<prompt>" [maxtok] # single custom prompt
9
  python test_api.py --memory # multi-turn identity + memory test
10
  """
11
- import os, sys, time, json
12
  import requests
13
 
14
  BASE = os.environ.get("MINDI_API", "https://mindigenous-mindi-chat.hf.space")
@@ -17,6 +17,7 @@ TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
17
  ARGS = [a for a in sys.argv[1:] if not a.startswith("--")]
18
  FLAGS = [a for a in sys.argv[1:] if a.startswith("--")]
19
  MEMORY_MODE = "--memory" in FLAGS
 
20
  PROMPT = ARGS[0] if ARGS else "Write hello world in Python"
21
  MAXTOK = int(ARGS[1]) if len(ARGS) > 1 else 256
22
 
@@ -44,18 +45,42 @@ for path in ("/gradio_api/config", "/config"):
44
  except Exception as e:
45
  print(f" {path} failed:", e)
46
 
47
- def call_api(prompt: str, history: list | None = None, max_tokens: int = 256, preview_chars: int = 1200) -> dict | None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  """Submit a single chat_fn request and stream its SSE result.
49
 
50
  Returns the parsed {response, sections} dict from the 'complete' event,
51
  or None on failure.
52
  """
53
  history_json = json.dumps(history) if history else ""
 
 
 
54
  start = time.time()
55
  resp = requests.post(
56
  BASE + "/gradio_api/call/chat_fn",
57
  headers=HEADERS,
58
- json={"data": [prompt, None, 0.7, max_tokens, history_json]},
59
  timeout=30,
60
  )
61
  if resp.status_code != 200:
@@ -137,6 +162,35 @@ if MEMORY_MODE:
137
  print(" [FAIL] Model did NOT identify as MINDI")
138
  if "gpt" in text or "claude" in text or "gemini" in text:
139
  print(" [WARN] Response still mentions GPT/Claude/Gemini")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  else:
141
  print("\n=== Step 2: API generation test ===")
142
  print(f"Prompt: {PROMPT!r} | max_tokens={MAXTOK}")
 
8
  python test_api.py "<prompt>" [maxtok] # single custom prompt
9
  python test_api.py --memory # multi-turn identity + memory test
10
  """
11
+ import os, sys, time, json, tempfile
12
  import requests
13
 
14
  BASE = os.environ.get("MINDI_API", "https://mindigenous-mindi-chat.hf.space")
 
17
  ARGS = [a for a in sys.argv[1:] if not a.startswith("--")]
18
  FLAGS = [a for a in sys.argv[1:] if a.startswith("--")]
19
  MEMORY_MODE = "--memory" in FLAGS
20
+ VISION_MODE = "--vision" in FLAGS
21
  PROMPT = ARGS[0] if ARGS else "Write hello world in Python"
22
  MAXTOK = int(ARGS[1]) if len(ARGS) > 1 else 256
23
 
 
45
  except Exception as e:
46
  print(f" {path} failed:", e)
47
 
48
+ def upload_image(path: str) -> dict | None:
49
+ """POST an image to /gradio_api/upload and return the FileData reference."""
50
+ if not os.path.exists(path):
51
+ print(f" [upload] file not found: {path}")
52
+ return None
53
+ upload_headers = {k: v for k, v in HEADERS.items() if k.lower() != "content-type"}
54
+ with open(path, "rb") as fh:
55
+ files = {"files": (os.path.basename(path), fh, "image/png")}
56
+ resp = requests.post(BASE + "/gradio_api/upload", headers=upload_headers, files=files, timeout=30)
57
+ if resp.status_code != 200:
58
+ print(f" [upload] {resp.status_code}: {resp.text[:200]}")
59
+ return None
60
+ body = resp.json()
61
+ file_path = body[0] if isinstance(body, list) else None
62
+ if not file_path:
63
+ print(f" [upload] unexpected: {body}")
64
+ return None
65
+ return {"path": file_path, "meta": {"_type": "gradio.FileData"}, "orig_name": os.path.basename(path)}
66
+
67
+
68
+ def call_api(prompt: str, history: list | None = None, max_tokens: int = 256,
69
+ preview_chars: int = 1200, image_path: str | None = None) -> dict | None:
70
  """Submit a single chat_fn request and stream its SSE result.
71
 
72
  Returns the parsed {response, sections} dict from the 'complete' event,
73
  or None on failure.
74
  """
75
  history_json = json.dumps(history) if history else ""
76
+ image_arg = upload_image(image_path) if image_path else None
77
+ if image_path:
78
+ print(f" [vision] uploaded {image_path} -> {image_arg.get('path') if image_arg else 'FAILED'}")
79
  start = time.time()
80
  resp = requests.post(
81
  BASE + "/gradio_api/call/chat_fn",
82
  headers=HEADERS,
83
+ json={"data": [prompt, image_arg, 0.7, max_tokens, history_json]},
84
  timeout=30,
85
  )
86
  if resp.status_code != 200:
 
162
  print(" [FAIL] Model did NOT identify as MINDI")
163
  if "gpt" in text or "claude" in text or "gemini" in text:
164
  print(" [WARN] Response still mentions GPT/Claude/Gemini")
165
+ elif VISION_MODE:
166
+ # Vision pipeline test — upload a tiny synthetic PNG and ask MINDI
167
+ # to describe it. Verifies the /gradio_api/upload + chat_fn(image=...) path.
168
+ print("\n=== Vision mode: image upload + describe test ===")
169
+ img_path = ARGS[0] if ARGS else os.path.join(tempfile.gettempdir(), "mindi_test_dot.png")
170
+ if not os.path.exists(img_path):
171
+ try:
172
+ from PIL import Image, ImageDraw
173
+ img = Image.new("RGB", (256, 256), color=(20, 20, 30))
174
+ d = ImageDraw.Draw(img)
175
+ d.rectangle((40, 40, 216, 216), outline=(120, 80, 255), width=4)
176
+ d.ellipse((96, 96, 160, 160), fill=(255, 200, 80))
177
+ img.save(img_path)
178
+ print(f"[vision] generated synthetic test image at {img_path}")
179
+ except Exception as e:
180
+ print(f"[vision] could not synthesize test image (need Pillow): {e}")
181
+ sys.exit(1)
182
+
183
+ prompt = ARGS[1] if len(ARGS) > 1 else "Describe this image in one sentence."
184
+ r = call_api(prompt, history=None, max_tokens=128, image_path=img_path)
185
+ if r:
186
+ text = (r.get("response") or "").lower()
187
+ # Loose checks: did the model engage with image content at all?
188
+ cues = ["circle", "square", "rectangle", "yellow", "purple", "ellipse", "image", "shape"]
189
+ hits = [c for c in cues if c in text]
190
+ if hits:
191
+ print(f" [PASS] response mentions visual cues: {hits}")
192
+ else:
193
+ print(" [WARN] response does not seem image-aware")
194
  else:
195
  print("\n=== Step 2: API generation test ===")
196
  print(f"Prompt: {PROMPT!r} | max_tokens={MAXTOK}")
hf_space/app.py CHANGED
@@ -50,12 +50,25 @@ def parse_output(text: str) -> dict:
50
  _CHAT_TOKEN_PATTERN = re.compile(
51
  r"<\|(?:im_start|im_end|endoftext|fim_prefix|fim_middle|fim_suffix|fim_pad|repo_name|file_sep)\|>"
52
  )
 
 
 
 
53
 
54
 
55
  def clean_output(text: str) -> str:
56
  """Strip Qwen chat-template artifacts and any leading role prefix."""
 
 
57
  text = _CHAT_TOKEN_PATTERN.sub("", text)
58
- text = re.sub(r"^\s*(system|user|assistant)\s*\n", "", text)
 
 
 
 
 
 
 
59
  return text.strip()
60
 
61
 
 
50
  _CHAT_TOKEN_PATTERN = re.compile(
51
  r"<\|(?:im_start|im_end|endoftext|fim_prefix|fim_middle|fim_suffix|fim_pad|repo_name|file_sep)\|>"
52
  )
53
+ # Match a role line ONLY if it stands alone at the very start of the text
54
+ # followed by an explicit newline. The previous '\s*' wildcard could swallow
55
+ # leading content when the model emitted weird sequences in the vision path.
56
+ _ROLE_PREFIX_PATTERN = re.compile(r"^(?:system|user|assistant)\n")
57
 
58
 
59
  def clean_output(text: str) -> str:
60
  """Strip Qwen chat-template artifacts and any leading role prefix."""
61
+ if os.environ.get("MINDI_DEBUG_RAW") == "1":
62
+ print(f"[clean_output] RAW ({len(text)} chars): {text!r}")
63
  text = _CHAT_TOKEN_PATTERN.sub("", text)
64
+ # Apply role-prefix strip up to twice: handles the vision-path case where
65
+ # the model occasionally emits 'assistant\n' followed by stray noise like
66
+ # an extra 'user\n' before the real reply.
67
+ for _ in range(2):
68
+ new_text = _ROLE_PREFIX_PATTERN.sub("", text, count=1)
69
+ if new_text == text:
70
+ break
71
+ text = new_text
72
  return text.strip()
73
 
74