Spaces:
Runtime error
Runtime error
| // audioUtils.ts | |
| const SAMPLE_RATE = 16000; // ASR models typically expect 16kHz audio | |
| /** | |
| * Reads an audio Blob (or File) and converts it to a Float32Array of PCM audio data | |
| * at a specified sample rate. | |
| * @param file The audio Blob or File. | |
| * @returns A Promise resolving with the Float32Array of resampled audio data. | |
| */ | |
| export async function readAudio(file: Blob): Promise<Float32Array> { | |
| const audioContext = new AudioContext(); // Use a standard AudioContext to decode initially | |
| const arrayBuffer = await file.arrayBuffer(); | |
| // Decode the audio data from the ArrayBuffer. This handles various formats (mp3, wav, webm, etc.) | |
| // and gives you an AudioBuffer with raw PCM data at the original sample rate. | |
| const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); | |
| // Check if resampling is needed | |
| if (audioBuffer.sampleRate === SAMPLE_RATE) { | |
| // If sample rate matches, return the first channel's data directly | |
| // Ensure it's mono; if stereo, you might need to mix or pick a channel | |
| if (audioBuffer.numberOfChannels > 1) { | |
| // Basic mixing or just take the first channel for simplicity | |
| // For ASR, mono is usually sufficient and expected by models | |
| const monoData = audioBuffer.getChannelData(0); | |
| // If needed, mix channels: | |
| // const channelData1 = audioBuffer.getChannelData(0); | |
| // const channelData2 = audioBuffer.getChannelData(1); | |
| // const monoData = new Float32Array(channelData1.length); | |
| // for (let i = 0; i < monoData.length; i++) { | |
| // monoData[i] = (channelData1[i] + channelData2[i]) / 2; | |
| // } | |
| return monoData; | |
| } else { | |
| return audioBuffer.getChannelData(0); // Already mono | |
| } | |
| } else { | |
| // Resampling is needed | |
| const targetSampleRate = SAMPLE_RATE; | |
| const numberOfChannels = 1; // ASR models typically expect mono input | |
| // Calculate the length of the resampled buffer | |
| const duration = audioBuffer.duration; | |
| const resampledLength = Math.ceil(duration * targetSampleRate); | |
| // Create an OfflineAudioContext for resampling | |
| // This context renders audio offline and allows changing the sample rate | |
| const offlineAudioContext = new OfflineAudioContext( | |
| numberOfChannels, | |
| resampledLength, | |
| targetSampleRate | |
| ); | |
| // Create a buffer source node from the original AudioBuffer | |
| const source = offlineAudioContext.createBufferSource(); | |
| source.buffer = audioBuffer; | |
| // Connect the source to the offline context's destination | |
| source.connect(offlineAudioContext.destination); | |
| // Start the source (playback in the offline context) | |
| source.start(0); | |
| // Render the audio. This performs the resampling. | |
| const resampledBuffer = await offlineAudioContext.startRendering(); | |
| // Return the resampled audio data from the first channel | |
| return resampledBuffer.getChannelData(0); | |
| } | |
| } |