class MyPipeline { static task = 'automatic-speech-recognition'; static model = 'Xenova/whisper-tiny.en'; static instance = null; static async getInstance(progress_callback = null) { if (this.instance === null) { let { pipeline, env } = await import('@huggingface/transformers'); // NOTE: Uncomment this to change the cache directory env.cacheDir = './.cache'; this.instance = pipeline(this.task, this.model, { progress_callback }); } return this.instance; } } //MyPipeline.getInstance(); const http = require('http'); const url = require('url'); const wavefile = require('wavefile'); http.createServer(async (req, res) => { res.writeHead(200, {'Content-Type': 'text/html'}); var u = url.parse(req.url, true); if (u.query.q) { const transcriber = await MyPipeline.getInstance(); //let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav'; let buffer = Buffer.from(await fetch(u.query.q).then(x => x.arrayBuffer())) // Read .wav file and convert it to required format let wav = new wavefile.WaveFile(buffer); wav.toBitDepth('32f'); // Pipeline expects input as a Float32Array wav.toSampleRate(16000); // Whisper expects audio with a sampling rate of 16000 let audioData = wav.getSamples(); if (Array.isArray(audioData)) { if (audioData.length > 1) { const SCALING_FACTOR = Math.sqrt(2); // Merge channels (into first channel to save memory) for (let i = 0; i < audioData[0].length; ++i) { audioData[0][i] = SCALING_FACTOR * (audioData[0][i] + audioData[1][i]) / 2; } } // Select first channel audioData = audioData[0]; } // Run model let start = performance.now(); let output = await transcriber(audioData); let end = performance.now(); res.write(`Execution duration: ${(end - start) / 1000} seconds
`); res.end(JSON.stringify(output)); } else { res.end("Empty query"); } }).listen(8080);