import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.12/dist/transformers.min.js'; class SpeechTranscriber { constructor() { this.transcriber = null; this.currentAudio = null; this.init(); } async init() { this.setupEventListeners(); } setupEventListeners() { const fileInput = document.getElementById('audio-file'); const urlInput = document.getElementById('audio-url'); const loadUrlBtn = document.getElementById('load-url'); const sampleBtn = document.getElementById('sample-btn'); const transcribeBtn = document.getElementById('transcribe-btn'); const copyBtn = document.getElementById('copy-btn'); fileInput.addEventListener('change', (e) => this.handleFileUpload(e)); loadUrlBtn.addEventListener('click', () => this.loadAudioFromUrl()); sampleBtn.addEventListener('click', () => this.loadSampleAudio()); transcribeBtn.addEventListener('click', () => this.transcribeAudio()); copyBtn.addEventListener('click', () => this.copyText()); // Drag and drop const fileLabel = document.querySelector('.file-label'); fileLabel.addEventListener('dragover', (e) => { e.preventDefault(); fileLabel.classList.add('drag-over'); }); fileLabel.addEventListener('dragleave', () => { fileLabel.classList.remove('drag-over'); }); fileLabel.addEventListener('drop', (e) => { e.preventDefault(); fileLabel.classList.remove('drag-over'); if (e.dataTransfer.files.length) { fileInput.files = e.dataTransfer.files; this.handleFileUpload({ target: fileInput }); } }); } handleFileUpload(event) { const file = event.target.files[0]; if (file && file.type.startsWith('audio/')) { this.currentAudio = URL.createObjectURL(file); this.enableTranscribeButton(); this.showAudioPreview(file.name); } } loadAudioFromUrl() { const url = document.getElementById('audio-url').value.trim(); if (url) { this.currentAudio = url; this.enableTranscribeButton(); this.showAudioPreview(url); } } loadSampleAudio() { this.currentAudio = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav'; this.enableTranscribeButton(); this.showAudioPreview('Sample JFK Speech'); } showAudioPreview(source) { const preview = document.createElement('div'); preview.className = 'audio-preview'; preview.innerHTML = `

Loaded: ${source}

`; document.querySelector('.upload-section').appendChild(preview); } enableTranscribeButton() { document.getElementById('transcribe-btn').disabled = false; } async transcribeAudio() { if (!this.currentAudio) return; const timestamps = document.getElementById('timestamps').checked; const wordTimestamps = document.getElementById('word-timestamps').checked; const options = {}; if (wordTimestamps) { options.return_timestamps = 'word'; } else if (timestamps) { options.return_timestamps = true; } try { this.showProgress('Loading model...'); await this.loadModel(); this.showProgress('Transcribing audio...'); const output = await this.transcriber(this.currentAudio, options); this.displayResults(output); } catch (error) { console.error('Transcription error:', error); this.showError('Transcription failed: ' + error.message); } } async loadModel() { if (!this.transcriber) { this.transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { progress_callback: (progress) => { if (progress.status === 'progress') { const percent = Math.round(progress.progress * 100); this.showProgress(`Loading model... ${percent}%`); } } }); } } showProgress(message) { const progressContainer = document.getElementById('progress-container'); const progressText = document.getElementById('progress-text'); progressContainer.style.display = 'block'; progressText.textContent = message; // Hide results section document.getElementById('result-section').style.display = 'none'; } displayResults(output) { document.getElementById('progress-container').style.display = 'none'; const resultSection = document.getElementById('result-section'); const transcriptionText = document.getElementById('transcription-text'); const chunksContainer = document.getElementById('chunks-container'); transcriptionText.textContent = output.text; chunksContainer.innerHTML = ''; if (output.chunks && output.chunks.length > 0) { const chunksList = document.createElement('div'); chunksList.className = 'chunks-list'; output.chunks.forEach(chunk => { const chunkElement = document.createElement('div'); chunkElement.className = 'chunk-item'; if (chunk.timestamp) { const [start, end] = chunk.timestamp; chunkElement.innerHTML = ` [${start.toFixed(2)}s - ${end.toFixed(2)}s] ${chunk.text} `; } else { chunkElement.innerHTML = `${chunk.text}`; } chunksList.appendChild(chunkElement); }); chunksContainer.appendChild(chunksList); } resultSection.style.display = 'block'; } showError(message) { document.getElementById('progress-container').style.display = 'none'; alert(message); } copyText() { const text = document.getElementById('transcription-text').textContent; navigator.clipboard.writeText(text) .then(() => { const copyBtn = document.getElementById('copy-btn'); const originalText = copyBtn.textContent; copyBtn.textContent = 'Copied!'; setTimeout(() => { copyBtn.textContent = originalText; }, 2000); }) .catch(err => { console.error('Failed to copy text: ', err); }); } } // Initialize the application when the page loads document.addEventListener('DOMContentLoaded', () => { new SpeechTranscriber(); });