import subprocess import tempfile import numpy as np import numpy.typing as npt import pytube def download_yt_audio(url: str, max_length: int) -> str: yt = pytube.YouTube(url) if (max_length is not None) and (yt.length > max_length): raise ValueError(f"Youtube video exceeds max length of {max_length}") video = yt.streams.filter(only_audio=True).first() tmp_path = tempfile.mkdtemp() fname = video.download(output_path=tmp_path) assert isinstance(fname, str) return fname def check_ffmpeg_installed() -> None: cmd = ["ffmpeg", "-version"] # sic try: subprocess.run(cmd, check=True) except FileNotFoundError as exc: raise RuntimeError("This feature requires ffmpeg to be installed") from exc # from openai whisper def load_audio(file: str, sampling_rate: int) -> npt.NDArray[np.float32]: """Open an audio file and read as mono waveform, resampling as necessary Parameters ---------- file: str The audio file to open sampling_rate: int The sample rate to resample the audio if necessary Returns ------- A NumPy array containing the audio waveform, in float32 dtype. """ check_ffmpeg_installed() # BB # This launches a subprocess to decode audio while down-mixing # and resampling as necessary. Requires the ffmpeg CLI in PATH. # fmt: off cmd = [ "ffmpeg", "-nostdin", "-threads", "0", "-i", file, "-f", "s16le", "-ac", "1", "-acodec", "pcm_s16le", "-ar", str(sampling_rate), "-" ] # fmt: on try: out = subprocess.run(cmd, capture_output=True, check=True).stdout except subprocess.CalledProcessError as e: raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0