Zamanonymize3

Sleeping

Upload 10 files

5397a6e verified about 1 month ago

1.34 kB

	import librosa
	import torch
	def preprocess_audio(file_path, target_sr=16000):
	"""
	Loads and resamples audio from the specified file.

	Parameters:
	file_path (str): Path to the audio file.
	target_sr (int): Target sampling rate. Defaults to 16000 Hz.

	Returns:
	resampled_audio (np.ndarray): Resampled audio data.
	"""
	audio_input, sample_rate = librosa.load(file_path, sr=None) # Keep original sample rate
	resampled_audio = librosa.resample(audio_input, orig_sr=sample_rate, target_sr=target_sr)
	return resampled_audio

	def transcribe_audio(model, processor, audio, target_sr=16000):
	"""
	Transcribes the given audio using the Whisper model.

	Parameters:
	model: The Whisper model.
	processor: The processor used for preparing the input features.
	audio (np.ndarray): The resampled audio data.
	target_sr (int): The target sampling rate for the audio.

	Returns:
	transcription (str): The transcribed text from the audio.
	"""
	input_features = processor(audio, sampling_rate=target_sr, return_tensors="pt").input_features
	with torch.no_grad():
	predicted_ids = model.generate(input_features)
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
	return transcription