Spaces:
Sleeping
Sleeping
import librosa | |
import torch | |
def preprocess_audio(file_path, target_sr=16000): | |
""" | |
Loads and resamples audio from the specified file. | |
Parameters: | |
file_path (str): Path to the audio file. | |
target_sr (int): Target sampling rate. Defaults to 16000 Hz. | |
Returns: | |
resampled_audio (np.ndarray): Resampled audio data. | |
""" | |
audio_input, sample_rate = librosa.load(file_path, sr=None) # Keep original sample rate | |
resampled_audio = librosa.resample(audio_input, orig_sr=sample_rate, target_sr=target_sr) | |
return resampled_audio | |
def transcribe_audio(model, processor, audio, target_sr=16000): | |
""" | |
Transcribes the given audio using the Whisper model. | |
Parameters: | |
model: The Whisper model. | |
processor: The processor used for preparing the input features. | |
audio (np.ndarray): The resampled audio data. | |
target_sr (int): The target sampling rate for the audio. | |
Returns: | |
transcription (str): The transcribed text from the audio. | |
""" | |
input_features = processor(audio, sampling_rate=target_sr, return_tensors="pt").input_features | |
with torch.no_grad(): | |
predicted_ids = model.generate(input_features) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
return transcription | |