HarbourSOFT's picture
Update index.html
0829c1b verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta
name="viewport"
content="width=device-width,initial-scale=1,viewport-fit=cover" />
<title>Qwen2.5‑Coder‑3B‑Instruct — WebGPU (ONNX)</title>
<meta name="color-scheme" content="light" />
<style>
:root{
--bg:#f6f8fc; --panel:#ffffff; --text:#0b1220; --muted:#5f6c7b; --accent:#2b6cff;
--ok:#1aa36f; --warn:#b7791f; --err:#d64545;
--radius:14px; --border:1px solid rgba(0,16,61,.1);
--shadow:0 6px 24px rgba(2,16,56,.08)
}
*{box-sizing:border-box}
body{
margin:0;
background:
radial-gradient(900px 500px at -10% -10%, #e6efff 0%, transparent 60%),
radial-gradient(900px 500px at 110% 10%, #ecf3ff 0%, transparent 60%),
var(--bg);
color:var(--text);
font:16px/1.6 system-ui,-apple-system,Segoe UI,Roboto,Inter,Arial,sans-serif;
padding:20px
}
header{
display:flex;gap:16px;align-items:center;justify-content:space-between;
padding:14px 16px;border-radius:var(--radius);
background:var(--panel);border:var(--border);box-shadow:var(--shadow)
}
h1{margin:0;font-size:22px}
.muted{color:var(--muted);font-size:12px}
.row{display:flex;align-items:center;gap:8px;flex-wrap:wrap}
.grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(240px,1fr));gap:12px}
.card{
background:var(--panel);border:var(--border);border-radius:var(--radius);
padding:14px;margin-top:16px;box-shadow:var(--shadow)
}
textarea{width:100%;height:180px;background:#fff;border:var(--border);border-radius:12px;color:var(--text);padding:10px}
input[type="text"],select{width:100%;background:#fff;border:var(--border);border-radius:12px;color:var(--text);padding:10px}
button{
appearance:none;border:0;border-radius:12px;padding:10px 14px;cursor:pointer;font-weight:600;
background:linear-gradient(180deg,#4f80ff,#2b6cff);color:white;box-shadow:0 4px 14px rgba(43,108,255,.25)
}
button.secondary{background:linear-gradient(180deg,#a4adc2,#8791aa);box-shadow:none;color:#102148}
button.ghost{background:#fff;border:var(--border);color:#223150;box-shadow:none}
#chat{
display:flex;flex-direction:column;gap:8px;height:280px;overflow:auto;border-radius:12px;background:#fff;border:var(--border);padding:10px
}
.msg{padding:10px;border-radius:10px;max-width:90%;white-space:pre-wrap}
.me{background:#e7efff;align-self:flex-end}
.bot{background:#f4f6fb;align-self:flex-start}
#log{
background:#fff;color:#24324a;border:var(--border);border-radius:12px;height:160px;overflow:auto;padding:8px;font-size:12px
}
.chip{
display:inline-flex;align-items:center;gap:6px;border-radius:999px;padding:6px 10px;border:var(--border);background:#fff
}
.ok{background:#e6f6ef;border-color:rgba(26,163,111,.35)}
.warn{background:#fff2df;border-color:rgba(183,121,31,.35)}
.err{background:#ffe8e8;border-color:rgba(214,69,69,.35)}
.examples{display:grid;grid-template-columns:repeat(auto-fill,minmax(220px,1fr));gap:10px;margin-top:10px}
.examples button{justify-content:flex-start}
.toggle{display:flex;align-items:center;gap:8px}
progress{width:260px;height:10px}
a.link{color:var(--accent);text-decoration:none}
a.link:hover{text-decoration:underline}
.hint{font-size:12px;color:var(--muted)}
</style>
</head>
<body>
<header>
<div>
<h1>Qwen2.5‑Coder‑3B‑Instruct — WebGPU (ONNX)</h1>
<div class="muted">Pure front‑end · Transformers.js v3 + ONNX Runtime Web · WebGPU preferred</div>
</div>
<div class="row">
<button id="load">Load Model</button>
<button id="cancel" class="secondary" style="display:none;">Cancel & Abort Downloads</button>
<button id="clearlog" class="ghost">Clear Log</button>
<span id="status" class="chip" style="display:none"></span>
</div>
</header>
<div class="card">
<div class="row" style="margin-bottom:8px">
<span class="chip warn">First run will download ~2 GB+ (q4* ONNX weights). Desktop Chrome/Edge recommended. HTTPS or localhost required for WebGPU.</span>
</div>
<!-- Configuration -->
<div class="grid" style="margin-bottom:8px">
<div>
<label class="muted">Model ID or local path</label>
<input id="modelId" type="text" value="onnx-community/Qwen2.5-Coder-3B-Instruct" />
<div class="hint">If you mirror locally, ensure it contains an <code>onnx/</code> directory.</div>
</div>
<div>
<label class="muted">Device</label>
<select id="deviceSel">
<option value="webgpu" selected>webgpu (preferred)</option>
<option value="wasm">wasm (CPU)</option>
</select>
</div>
<div>
<label class="muted">dtype (quantization/precision)</label>
<select id="dtypeSel">
<option value="q4f16" selected>q4f16 (smaller graph; recommended)</option>
<option value="q4">q4</option>
<option value="int8">int8</option>
<option value="fp16">fp16</option>
</select>
</div>
<div>
<label class="muted">Local mirror (optional)</label>
<input id="localPath" type="text" placeholder="/models/" />
<div class="row">
<label class="toggle"><input id="localOnly" type="checkbox" /> Local only (disable remote)</label>
</div>
<div class="hint">Example mirror path: <code>/models/onnx-community/Qwen2.5-Coder-3B-Instruct/onnx/*</code></div>
</div>
</div>
<!-- Confirmation -->
<div id="confirm">
<p>This will download model files (potentially several GB) and run inference via WebGPU. Proceed?</p>
<div class="row">
<button id="confirm-yes">Yes, download now</button>
</div>
</div>
<!-- Loading / progress -->
<div id="loading" style="display:none">
<div class="row" style="align-items:center">
<progress id="progress" value="0" max="100"></progress>
<span id="progress-label" class="muted">0%</span>
<span class="muted">Downloading… do not close this tab.</span>
</div>
<div class="muted">Note: Safari/Firefox may need experimental flags; if WebGPU is unavailable, the app will fall back to WASM.</div>
</div>
<!-- Chat UI -->
<div id="ui" style="display:none">
<div class="grid">
<div>
<label class="muted">System prompt (optional)</label>
<input id="sys" type="text" placeholder="You are a senior coding assistant. Keep answers concise." />
</div>
<div>
<label class="muted">Status</label>
<div class="row"><span id="status2" class="chip" style="display:inline-flex"></span></div>
</div>
</div>
<div id="chat" aria-live="polite" aria-busy="false"></div>
<textarea id="prompt" placeholder="Ask something (you can paste code)…"></textarea>
<div class="row">
<button id="send">Send</button>
<button id="stop" class="ghost" disabled>Stop</button>
<button id="clear" class="ghost">Clear Chat</button>
</div>
</div>
</div>
<div class="card">
<h3 style="margin:6px 0">Examples</h3>
<div class="toggle"><input id="autorun" type="checkbox" checked/> <label for="autorun" class="muted">Auto‑run on click</label></div>
<div id="examples" class="examples"></div>
</div>
<div class="card">
<h3 style="margin:6px 0">Debug log</h3>
<div class="row" style="margin-bottom:8px">
<button id="clear-hf-cache" class="ghost">Clear HF browser caches (may require refresh)</button>
</div>
<div id="log" role="log" aria-live="polite"></div>
</div>
<script type="module">
// ================= Configuration =================
const DEFAULT_MODEL_ID = 'onnx-community/Qwen2.5-Coder-3B-Instruct';
// Pinned to a stable v3.x release (includes TextStreamer / InterruptableStoppingCriteria / WebGPU support)
const TRANSFORMERS_CDN = 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
// Prefer WebGPU; gracefully fall back if unavailable
const makeOptions = (device, dtype) => ({
device, // 'webgpu' | 'wasm'
dtype, // 'q4f16' | 'q4' | 'int8' | 'fp16'
progress_callback: setProgress,
});
const EXAMPLES = [
{
title: 'Parse Apache logs (Python)',
sys: 'You are a senior coding assistant. Keep answers concise and show tested code.',
prompt: 'Write a Python function parse_log(line: str) that parses Apache combined log format into a dict with keys ip, time, method, path, status, bytes, referrer, ua. Include robust regex, timezone handling, and 5 pytest unit tests.'
},
{
title: 'Refactor callbacks → async/await (Node.js)',
sys: 'You are a pragmatic JS refactoring assistant.',
prompt:
`Refactor this Node.js callback code to async/await with proper error handling and backpressure using streams:
const fs = require('fs');
fs.readFile('in.txt', (e, d) => {
if (e) throw e;
fs.writeFile('out.txt', d.toString().toUpperCase(), err => {
if (err) throw err;
console.log('done');
});
});
Provide a short explanation.`
},
{
title: 'Cohort retention SQL (Postgres)',
sys: 'You are a data engineer.',
prompt: 'Given events(user_id, event_time, event_name) with sign_up and active events, write a SQL that computes weekly user retention (cohorted by signup week) as a pivoted table. Assume UTC timestamps. Explain indexes briefly.'
},
{
title: 'Unit tests with pytest',
sys: 'You are a Python testing expert.',
prompt: 'Generate pytest tests for a function normalize_phone(s: str) that returns E.164 format or raises ValueError. Cover edge cases and property tests with hypothesis.'
},
{
title: 'Explain code step by step',
sys: 'Be a clear explainer for junior developers.',
prompt:
`Explain the following code step by step, then suggest two improvements for readability and performance:
from collections import defaultdict
def f(nums):
d = defaultdict(int)
for x in nums:
d[x] += 1
m = max(d.values())
return [k for k, v in d.items() if v == m]`
},
{
title: 'Regex with explanation',
sys: 'You write readable regex with comments.',
prompt: 'Write a single regex that matches a valid IPv4 or IPv6 address. Provide a commented, multi-line version and a short, single-line version, plus examples of matches and non-matches.'
}
];
// ================= DOM =================
const $ = s=>document.querySelector(s);
const logDiv=$('#log'), chatDiv=$('#chat');
const loadBtn=$('#load'), cancelBtn=$('#cancel'), clearBtn=$('#clearlog');
const ui=$('#ui'), loading=$('#loading'), confirmBox=$('#confirm');
const pbar=$('#progress'), plabel=$('#progress-label');
const sysEl=$('#sys'), promptEl=$('#prompt');
const sendBtn=$('#send'), stopBtn=$('#stop'), clearChatBtn=$('#clear');
const statusChip=$('#status'), status2=$('#status2');
const exWrap=$('#examples'), autoRun=$('#autorun');
const modelIdEl=$('#modelId'), deviceSel=$('#deviceSel'), dtypeSel=$('#dtypeSel');
const localPathEl=$('#localPath'), localOnlyEl=$('#localOnly');
// ================= State =================
let hf = null; // Transformers.js module
let pipe = null;
let stopping = null; // InterruptableStoppingCriteria
let currentStreamer = null;
let abortCtrl = null; // AbortController for true-cancel
let restoreFetch = null; // function to restore global fetch
// ================= Utils =================
function log(...a){
const s = a.map(x => (typeof x === 'string' ? x : JSON.stringify(x))).join(' ');
logDiv.textContent += s + '\n';
logDiv.scrollTop = logDiv.scrollHeight;
console.log('[LOG]', ...a);
}
function chip(el, kind, text){
el.className = 'chip ' + (kind || '');
el.textContent = text;
el.style.display = 'inline-flex';
}
function clearChip(el){ el.style.display='none'; }
function toPct(progress){
// Some builds report 0..100, others 0..1
if (progress == null) return 0;
if (progress > 1) return Math.max(0, Math.min(100, Math.round(progress)));
return Math.max(0, Math.min(100, Math.round(progress * 100)));
}
function setProgress(evt){
if(!evt || !evt.status) return;
if(evt.status === 'progress'){
const pct = toPct(evt.progress);
pbar.value = pct;
const label = `${pct}% ${(evt.name || evt.file || '')}`.trim();
plabel.textContent = label || `${pct}%`;
} else {
log(`status: ${evt.status} ${evt.name || evt.file || ''}`.trim());
}
}
function addMsg(text, me=false){
const d=document.createElement('div'); d.className='msg '+(me?'me':'bot'); d.textContent=text;
chatDiv.appendChild(d); chatDiv.scrollTop=chatDiv.scrollHeight; return d;
}
function needWasmFallback(err){
const s = String(err?.message || err || '');
return s.includes('GroupQueryAttention') ||
s.includes('Input "key" is expected to have 3, 4, or 5 dimensions') ||
s.includes('WebGPU') || s.includes('GPU') || s.includes('Device');
}
// ================= Examples =================
function renderExamples(){
exWrap.innerHTML='';
EXAMPLES.forEach((ex)=>{
const b=document.createElement('button');
b.textContent=ex.title; b.className='ghost';
b.addEventListener('click',()=>{
sysEl.value = ex.sys || '';
promptEl.value = ex.prompt || '';
if(autoRun.checked){ sendBtn.click(); }
});
exWrap.appendChild(b);
});
}
renderExamples();
// ================= Import & env =================
async function importTransformers(){
log('Importing library:', TRANSFORMERS_CDN);
hf = await import(TRANSFORMERS_CDN);
return hf;
}
async function configureEnv(){
// Optional: local mirror settings
const localPath = (localPathEl.value || '').trim();
const localOnly = !!localOnlyEl.checked;
if(localPath){
hf.env.localModelPath = localPath.endsWith('/') ? localPath : localPath + '/';
log('env.localModelPath =', hf.env.localModelPath);
}
if(localOnly){
hf.env.allowRemoteModels = false;
log('env.allowRemoteModels = false (local only)');
}
// hf.env.useBrowserCache = true; // default is true
}
// ========= True cancel (experimental): override fetch with AbortController =========
function enableAbortableFetch(){
if(restoreFetch) return; // already wrapped
abortCtrl = new AbortController();
const originalFetch = window.fetch.bind(window);
window.fetch = (input, init={}) => {
if(!init.signal) init.signal = abortCtrl.signal;
return originalFetch(input, init);
};
restoreFetch = ()=>{
window.fetch = originalFetch;
restoreFetch = null;
abortCtrl = null;
};
log('Fetch overridden with AbortController (experimental).');
}
function abortDownloads(){
try{
abortCtrl?.abort?.();
log('AbortController: aborted ongoing downloads.');
}catch(e){
log('Abort error:', e?.message||e);
}finally{
restoreFetch?.();
}
}
// ================= Events =================
loadBtn.addEventListener('click', ()=>{
confirmBox.style.display='block';
});
clearBtn.addEventListener('click', ()=>{ logDiv.textContent=''; });
$('#clear-hf-cache').addEventListener('click', async ()=>{
try{
if (!('caches' in window)) { log('Cache API not available in this browser.'); return; }
const keys = await caches.keys();
for(const k of keys){
if(/transformers|hugging|hf/i.test(k)) await caches.delete(k);
}
log('Cleared HF-related caches. You may refresh the page.');
}catch(e){ log('Cache clear error:', e?.message||e); }
});
clearChatBtn.addEventListener('click', ()=>{ chatDiv.innerHTML=''; });
cancelBtn.addEventListener('click',()=>{
cancelBtn.style.display='none'; loading.style.display='none';
abortDownloads(); // actually abort network requests
chip(statusChip,'warn','Canceled / aborted'); setTimeout(()=>clearChip(statusChip),1200);
// back to initial UI
confirmBox.style.display='block';
});
$('#confirm-yes').addEventListener('click', async ()=>{
try{
// Prepare UI
confirmBox.style.display='none';
loading.style.display='block';
pbar.value=0; plabel.textContent='0%';
chip(statusChip,'','Preparing…');
// Enable abortable downloads
enableAbortableFetch();
// Import library
await importTransformers();
await configureEnv();
// Device selection with auto fallback if WebGPU unsupported
let device = deviceSel.value; // 'webgpu' | 'wasm'
if (device === 'webgpu' && !('gpu' in navigator)) {
chip(statusChip,'warn','WebGPU not detected — falling back to WASM');
device = 'wasm';
}
const dtype = dtypeSel.value; // 'q4f16' | 'q4' | 'int8' | 'fp16'
const modelId = (modelIdEl.value || DEFAULT_MODEL_ID).trim();
const baseOpts = makeOptions(device, dtype);
// Build pipeline
chip(statusChip,'','Creating text-generation pipeline…');
pipe = await hf.pipeline('text-generation', modelId, baseOpts);
stopping = new hf.InterruptableStoppingCriteria();
loading.style.display='none';
chip(statusChip,'ok','Model ready'); setTimeout(()=>clearChip(statusChip),1200);
chip(status2,'ok', `device=${device}, dtype=${dtype}`);
ui.style.display='block';
cancelBtn.style.display='none';
}catch(err){
loading.style.display='none';
chip(statusChip,'err','Load failed');
log('❌ Load failed:', err?.message||err);
// Restore fetch wrapper no matter what
restoreFetch?.();
// Back to initial
confirmBox.style.display='block';
}finally{
// End of download phase — restore original fetch
restoreFetch?.();
}
});
function setBusy(b){
chatDiv.setAttribute('aria-busy', b ? 'true' : 'false');
stopBtn.disabled = !b;
}
async function safeGenerate(messages, genOpts, streamer){
const modelId = (modelIdEl.value || DEFAULT_MODEL_ID).trim();
try{
return await pipe(messages, { ...genOpts, streamer });
}catch(e){
log('Generation error (1st attempt):', e?.message||e);
if(needWasmFallback(e)){
chip(status2,'warn','WebGPU error — falling back to WASM…');
pipe = await hf.pipeline('text-generation', modelId, makeOptions('wasm', dtypeSel.value));
return await pipe(messages, { ...genOpts, streamer });
}
throw e;
}
}
sendBtn.addEventListener('click', async ()=>{
try{
if(!pipe){ chip(status2,'warn','Model not loaded'); return; }
const user=promptEl.value.trim(); if(!user) return; promptEl.value='';
const sys=sysEl.value.trim();
addMsg(user,true);
const botEl=addMsg('…');
setBusy(true); chip(status2,'','Generating…');
// Chat messages (Qwen2.5 provides chat_template; pass messages directly)
const messages = [];
if (sys) messages.push({ role: 'system', content: sys });
messages.push({ role: 'user', content: user });
// Streamer
let outText = '';
currentStreamer = new hf.TextStreamer(pipe.tokenizer, {
skip_prompt: true,
callback_function: (chunk) => { outText += chunk; botEl.textContent = outText; }
});
// Stoppable
stopping?.reset?.();
const out = await safeGenerate(messages, {
max_new_tokens: 5120,
temperature: 0.01,
top_p: 0.9,
repetition_penalty: 1.05,
stopping_criteria: stopping,
}, currentStreamer);
if (!outText && Array.isArray(out) && out[0]?.generated_text) {
botEl.textContent = (typeof out[0].generated_text === 'string')
? out[0].generated_text
: JSON.stringify(out[0].generated_text);
}
chip(status2,'ok','Done'); setTimeout(()=>clearChip(status2), 1200);
}catch(err){
log('❌ Generation error:', err?.message||err);
chip(status2,'err','Generation failed');
}finally{
setBusy(false); currentStreamer=null;
}
});
stopBtn.addEventListener('click',()=>{
try{
stopping?.interrupt?.();
chip(status2,'warn','Stopped'); setTimeout(()=>clearChip(status2),1200);
}catch{}
});
// Global errors
window.addEventListener('error',e=>log('window.error:',e.message,e.filename,`${e.lineno}:${e.colno}`));
window.addEventListener('unhandledrejection',e=>log('unhandledrejection:',e.reason?.message||e.reason));
</script>
</body>
</html>