Spaces:

KingNish
/

Transformers.js-Playground

Runtime error

App Files Files Community

Transformers.js-Playground / app /components /audioUtils.ts

KingNish

Upload 29 files

3baa9da verified 3 months ago

raw

history blame contribute delete

2.99 kB

	// audioUtils.ts
	const SAMPLE_RATE = 16000; // ASR models typically expect 16kHz audio
	/**
	* Reads an audio Blob (or File) and converts it to a Float32Array of PCM audio data
	* at a specified sample rate.
	* @param file The audio Blob or File.
	* @returns A Promise resolving with the Float32Array of resampled audio data.
	*/
	export async function readAudio(file: Blob): Promise<Float32Array> {
	const audioContext = new AudioContext(); // Use a standard AudioContext to decode initially
	const arrayBuffer = await file.arrayBuffer();

	// Decode the audio data from the ArrayBuffer. This handles various formats (mp3, wav, webm, etc.)
	// and gives you an AudioBuffer with raw PCM data at the original sample rate.
	const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);

	// Check if resampling is needed
	if (audioBuffer.sampleRate === SAMPLE_RATE) {
	// If sample rate matches, return the first channel's data directly
	// Ensure it's mono; if stereo, you might need to mix or pick a channel
	if (audioBuffer.numberOfChannels > 1) {
	// Basic mixing or just take the first channel for simplicity
	// For ASR, mono is usually sufficient and expected by models
	const monoData = audioBuffer.getChannelData(0);
	// If needed, mix channels:
	// const channelData1 = audioBuffer.getChannelData(0);
	// const channelData2 = audioBuffer.getChannelData(1);
	// const monoData = new Float32Array(channelData1.length);
	// for (let i = 0; i < monoData.length; i++) {
	// monoData[i] = (channelData1[i] + channelData2[i]) / 2;
	// }
	return monoData;
	} else {
	return audioBuffer.getChannelData(0); // Already mono
	}

	} else {
	// Resampling is needed
	const targetSampleRate = SAMPLE_RATE;
	const numberOfChannels = 1; // ASR models typically expect mono input

	// Calculate the length of the resampled buffer
	const duration = audioBuffer.duration;
	const resampledLength = Math.ceil(duration * targetSampleRate);

	// Create an OfflineAudioContext for resampling
	// This context renders audio offline and allows changing the sample rate
	const offlineAudioContext = new OfflineAudioContext(
	numberOfChannels,
	resampledLength,
	targetSampleRate
	);

	// Create a buffer source node from the original AudioBuffer
	const source = offlineAudioContext.createBufferSource();
	source.buffer = audioBuffer;

	// Connect the source to the offline context's destination
	source.connect(offlineAudioContext.destination);

	// Start the source (playback in the offline context)
	source.start(0);

	// Render the audio. This performs the resampling.
	const resampledBuffer = await offlineAudioContext.startRendering();

	// Return the resampled audio data from the first channel
	return resampledBuffer.getChannelData(0);
	}
	}