voice / my_utils.py
Altadmin's picture
Upload 38 files
890b83f
raw
history blame
969 Bytes
import ffmpeg
import numpy as np
import requests
import logging
import wave
import librosa
def load_audio(file_path, sr):
try:
with wave.open(file_path, 'rb') as audio_file:
channels = audio_file.getnchannels()
sample_width = audio_file.getsampwidth()
frame_rate = audio_file.getframerate()
frames = audio_file.readframes(audio_file.getnframes())
audio_data = np.frombuffer(frames, dtype=np.int16)
audio_data = audio_data.astype(np.float32) / np.iinfo(np.int16).max # Convert to float and normalize to the range [-1, 1]
# Resample the audio if the sample rate is different
if frame_rate != sr:
audio_data = librosa.resample(audio_data, orig_sr=frame_rate, target_sr=sr)
# Perform any required audio processing or conversion
# ...
except Exception as e:
raise RuntimeError(f"Failed to load audio: {e}")
return audio_data