Spaces:
Runtime error
Runtime error
"""Package which defines utility functions for voice conversion.""" | |
import numpy as np | |
from numpy.typing import NDArray | |
import ffmpeg | |
def load_audio(file: str, sr: int) -> NDArray[np.float32]: | |
""" | |
Load an audio file into a numpy array with a target sample rate. | |
A subprocess is launched to decode the given audio file while | |
down-mixing and resampling as necessary. | |
Parameters | |
---------- | |
file : str | |
Path to the audio file. | |
sr : int | |
Target sample rate. | |
Returns | |
------- | |
NDArray[np.float32] | |
Decoded audio file in numpy array format. | |
Raises | |
------ | |
RuntimeError | |
If the audio file cannot be loaded. | |
See Also | |
-------- | |
https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 | |
Notes | |
----- | |
Requires the ffmpeg CLI and `typed-ffmpeg` package to be installed. | |
""" | |
try: | |
# NOTE prevent the input path from containing spaces and | |
# carriage returns at the beginning and end. | |
file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") | |
out, _ = ( | |
ffmpeg.input(file, threads=0) | |
.output( | |
filename="-", | |
f="f32le", | |
acodec="pcm_f32le", | |
ac=1, | |
ar=sr, | |
) | |
.run( | |
cmd=["ffmpeg", "-nostdin"], | |
capture_stdout=True, | |
capture_stderr=True, | |
) | |
) | |
except Exception as e: | |
err_msg = f"Failed to load audio: {e}" | |
raise RuntimeError(err_msg) from e | |
return np.frombuffer(out, np.float32).flatten() | |