webgpu-smoke-test / index.html
jscmp4's picture
devug
f22a52c verified
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Web AI - 长音频修复版</title>
<style>
body { font-family: -apple-system, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; color: #333; }
.container { background: #fff; padding: 25px; border-radius: 12px; box-shadow: 0 4px 15px rgba(0,0,0,0.05); }
.progress-wrapper { width: 100%; background-color: #e9ecef; border-radius: 8px; height: 20px; margin: 15px 0; overflow: hidden; display: none; }
.progress-bar { height: 100%; background-color: #28a745; width: 0%; text-align: center; line-height: 20px; color: white; font-size: 12px; font-weight: bold; transition: width 0.2s ease; }
.progress-bar.processing { background-image: linear-gradient(45deg,rgba(255,255,255,.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,.15) 50%,rgba(255,255,255,.15) 75%,transparent 75%,transparent); background-size: 1rem 1rem; animation: stripes 1s linear infinite; }
@keyframes stripes { from { background-position: 1rem 0; } to { background-position: 0 0; } }
#drop-zone { border: 2px dashed #ccc; border-radius: 10px; padding: 40px 20px; text-align: center; cursor: pointer; background: #fafafa; margin-bottom: 20px; transition: 0.2s;}
#drop-zone.drag-over { border-color: #007bff; background-color: #eef6ff; }
#file-upload { display: none; }
.controls { display: flex; gap: 10px; margin-bottom: 15px; }
select, button { padding: 10px; border-radius: 6px; border: 1px solid #ddd; }
button { background: #000; color: #fff; border: none; cursor: pointer; font-weight: bold; padding: 10px 25px;}
button:disabled { background: #ccc; cursor: not-allowed; }
#result-area { width: 100%; height: 300px; padding: 15px; border: 1px solid #ddd; border-radius: 6px; font-family: monospace; resize: vertical; background: #fdfdfd; margin-top: 15px;}
</style>
</head>
<body>
<h1>🚀 21分钟长音频修复版 (解码分离)</h1>
<p>解决了 "AudioContext is not available" 错误。主线程解码,后台线程计算。</p>
<div class="container">
<div id="status">🔵 正在启动引擎...</div>
<div class="progress-wrapper" id="progress-wrapper">
<div class="progress-bar" id="progress-bar">0%</div>
</div>
<div id="drop-zone">
<p>☁️ 拖入音频文件 (mp3, m4a, wav)</p>
</div>
<input type="file" id="file-upload" accept="audio/*,video/*,.m4a,.wav,.mp3">
<div class="controls">
<select id="language-select">
<option value="auto">🌐 自动识别语言</option>
<option value="chinese">🇨🇳 中文</option>
<option value="english">🇺🇸 英文</option>
</select>
<button id="run-btn" disabled>开始转换</button>
<button id="stop-btn" style="background:#d9534f; display:none;">停止</button>
</div>
<textarea id="result-area" placeholder="等待转换..."></textarea>
</div>
<script id="worker-code" type="javascript/worker">
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
env.allowLocalModels = false;
env.useBrowserCache = true;
let transcriber = null;
self.onmessage = async (e) => {
const msg = e.data;
if (msg.type === 'load') {
try {
self.postMessage({ type: 'status', text: '⏳ 后台加载模型 (Whisper-Base)...' });
// 使用 base 模型防幻觉
transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-base', {
progress_callback: (data) => {
if (data.status === 'progress') {
self.postMessage({
type: 'download_progress',
percent: Math.round((data.loaded / data.total) * 100)
});
}
}
});
self.postMessage({ type: 'ready' });
} catch (err) {
self.postMessage({ type: 'error', error: err.message });
}
}
if (msg.type === 'run') {
try {
self.postMessage({ type: 'status', text: '🚀 模型正在推理中...' });
// 这里的 msg.audio 已经是 Float32Array 纯数字了,不需要 AudioContext 解码
const output = await transcriber(msg.audio, {
chunk_length_s: 30,
stride_length_s: 5,
task: 'transcribe',
language: msg.language !== 'auto' ? msg.language : undefined,
return_timestamps: true, // 防复读关键
no_repeat_ngram_size: 2, // 防复读关键
temperature: 0, // 降低随机性
});
self.postMessage({ type: 'result', text: output.text });
} catch (err) {
self.postMessage({ type: 'error', error: err.message });
}
}
};
</script>
<script type="module">
const workerBlob = new Blob([document.getElementById('worker-code').textContent], { type: "text/javascript" });
const workerUrl = URL.createObjectURL(workerBlob);
const worker = new Worker(workerUrl, { type: "module" });
const statusEl = document.getElementById('status');
const progressBar = document.getElementById('progress-bar');
const progressWrapper = document.getElementById('progress-wrapper');
const runBtn = document.getElementById('run-btn');
const stopBtn = document.getElementById('stop-btn');
const resultArea = document.getElementById('result-area');
const dropZone = document.getElementById('drop-zone');
const fileInput = document.getElementById('file-upload');
const langSelect = document.getElementById('language-select');
let currentFile = null;
// --- 核心修复函数:在主线程解码音频 ---
async function decodeAudio(file) {
// 1. 创建 AudioContext,强制采样率为 16000 (Whisper 需要 16k)
const audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
// 2. 读取文件为 ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
// 3. 解码 (这一步必须在主线程做)
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
// 4. 获取单声道数据 (Float32Array)
let audioData = audioBuffer.getChannelData(0);
// 5. 关闭上下文释放资源
await audioCtx.close();
return audioData;
}
// ------------------------------------
worker.onmessage = (e) => {
const msg = e.data;
if (msg.type === 'download_progress') {
progressWrapper.style.display = 'block';
progressBar.style.width = msg.percent + '%';
progressBar.innerText = msg.percent + '%';
if(msg.percent === 100) statusEl.innerText = "⏳ 下载完成,正在编译...";
}
if (msg.type === 'ready') {
statusEl.innerText = "✅ 引擎就绪";
statusEl.style.color = "green";
runBtn.disabled = false;
progressBar.style.width = '0%';
progressWrapper.style.display = 'none';
}
if (msg.type === 'status') statusEl.innerText = msg.text;
if (msg.type === 'result') {
resultArea.value = msg.text;
statusEl.innerText = "✅ 转换完成!";
progressBar.classList.remove('processing');
runBtn.disabled = false;
stopBtn.style.display = 'none';
}
if (msg.type === 'error') {
statusEl.innerText = "❌ " + msg.error;
statusEl.style.color = "red";
progressBar.classList.remove('processing');
runBtn.disabled = false;
}
};
worker.postMessage({ type: 'load' });
function handleFile(file) {
currentFile = file;
statusEl.innerText = `📂 已加载: ${file.name}`;
dropZone.innerHTML = `<p>📄 ${file.name}</p>`;
}
dropZone.addEventListener('dragover', (e) => { e.preventDefault(); dropZone.classList.add('drag-over'); });
dropZone.addEventListener('dragleave', (e) => { e.preventDefault(); dropZone.classList.remove('drag-over'); });
dropZone.addEventListener('drop', (e) => { e.preventDefault(); dropZone.classList.remove('drag-over'); if(e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]); });
dropZone.addEventListener('click', () => fileInput.click());
fileInput.addEventListener('change', (e) => { if(e.target.files.length) handleFile(e.target.files[0]); });
runBtn.addEventListener('click', async () => {
if(!currentFile) return alert("请先上传文件");
runBtn.disabled = true;
stopBtn.style.display = 'inline-block';
progressWrapper.style.display = 'block';
progressBar.classList.add('processing');
progressBar.innerText = "解码中...";
statusEl.innerText = "⏳ 正在预处理音频 (解码)...";
try {
// 1. 先在主线程解码
const audioData = await decodeAudio(currentFile);
progressBar.innerText = "计算中...";
statusEl.innerText = "🚀 音频数据已发送给后台,正在推理...";
// 2. 把解码好的纯数据 (audioData) 发给 Worker
// 注意:这里我们使用 Transferable Object 传输,效率极高
worker.postMessage({
type: 'run',
audio: audioData,
language: langSelect.value
}, [audioData.buffer]); // 这里的 buffer 转移所有权,零拷贝
} catch (err) {
console.error(err);
statusEl.innerText = "❌ 解码失败: " + err.message;
runBtn.disabled = false;
progressBar.classList.remove('processing');
}
});
stopBtn.addEventListener('click', () => {
if(confirm("确定要终止吗?")) {
worker.terminate();
location.reload();
}
});
</script>
</body>
</html>