Spaces:
Runtime error
Runtime error
// audioUtils.ts | |
const SAMPLE_RATE = 16000; // ASR models typically expect 16kHz audio | |
/** | |
* Reads an audio Blob (or File) and converts it to a Float32Array of PCM audio data | |
* at a specified sample rate. | |
* @param file The audio Blob or File. | |
* @returns A Promise resolving with the Float32Array of resampled audio data. | |
*/ | |
export async function readAudio(file: Blob): Promise<Float32Array> { | |
const audioContext = new AudioContext(); // Use a standard AudioContext to decode initially | |
const arrayBuffer = await file.arrayBuffer(); | |
// Decode the audio data from the ArrayBuffer. This handles various formats (mp3, wav, webm, etc.) | |
// and gives you an AudioBuffer with raw PCM data at the original sample rate. | |
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); | |
// Check if resampling is needed | |
if (audioBuffer.sampleRate === SAMPLE_RATE) { | |
// If sample rate matches, return the first channel's data directly | |
// Ensure it's mono; if stereo, you might need to mix or pick a channel | |
if (audioBuffer.numberOfChannels > 1) { | |
// Basic mixing or just take the first channel for simplicity | |
// For ASR, mono is usually sufficient and expected by models | |
const monoData = audioBuffer.getChannelData(0); | |
// If needed, mix channels: | |
// const channelData1 = audioBuffer.getChannelData(0); | |
// const channelData2 = audioBuffer.getChannelData(1); | |
// const monoData = new Float32Array(channelData1.length); | |
// for (let i = 0; i < monoData.length; i++) { | |
// monoData[i] = (channelData1[i] + channelData2[i]) / 2; | |
// } | |
return monoData; | |
} else { | |
return audioBuffer.getChannelData(0); // Already mono | |
} | |
} else { | |
// Resampling is needed | |
const targetSampleRate = SAMPLE_RATE; | |
const numberOfChannels = 1; // ASR models typically expect mono input | |
// Calculate the length of the resampled buffer | |
const duration = audioBuffer.duration; | |
const resampledLength = Math.ceil(duration * targetSampleRate); | |
// Create an OfflineAudioContext for resampling | |
// This context renders audio offline and allows changing the sample rate | |
const offlineAudioContext = new OfflineAudioContext( | |
numberOfChannels, | |
resampledLength, | |
targetSampleRate | |
); | |
// Create a buffer source node from the original AudioBuffer | |
const source = offlineAudioContext.createBufferSource(); | |
source.buffer = audioBuffer; | |
// Connect the source to the offline context's destination | |
source.connect(offlineAudioContext.destination); | |
// Start the source (playback in the offline context) | |
source.start(0); | |
// Render the audio. This performs the resampling. | |
const resampledBuffer = await offlineAudioContext.startRendering(); | |
// Return the resampled audio data from the first channel | |
return resampledBuffer.getChannelData(0); | |
} | |
} |