import subprocess
import tempfile

import numpy as np
import numpy.typing as npt
import pytube


def download_yt_audio(url: str, max_length: int) -> str:
    yt = pytube.YouTube(url)
    if (max_length is not None) and (yt.length > max_length):
        raise ValueError(f"Youtube video exceeds max length of {max_length}")

    video = yt.streams.filter(only_audio=True).first()
    tmp_path = tempfile.mkdtemp()
    fname = video.download(output_path=tmp_path)
    assert isinstance(fname, str)
    return fname


def check_ffmpeg_installed() -> None:
    cmd = ["ffmpeg", "-version"]  # sic
    try:
        subprocess.run(cmd, check=True)
    except FileNotFoundError as exc:
        raise RuntimeError("This feature requires ffmpeg to be installed") from exc


# from openai whisper
def load_audio(file: str, sampling_rate: int) -> npt.NDArray[np.float32]:
    """Open an audio file and read as mono waveform, resampling as necessary

    Parameters
    ----------
    file: str
        The audio file to open

    sampling_rate: int
        The sample rate to resample the audio if necessary

    Returns
    -------
    A NumPy array containing the audio waveform, in float32 dtype.

    """
    check_ffmpeg_installed()  # BB

    # This launches a subprocess to decode audio while down-mixing
    # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
    # fmt: off
    cmd = [
        "ffmpeg",
        "-nostdin",
        "-threads", "0",
        "-i", file,
        "-f", "s16le",
        "-ac", "1",
        "-acodec", "pcm_s16le",
        "-ar", str(sampling_rate),
        "-"
    ]
    # fmt: on
    try:
        out = subprocess.run(cmd, capture_output=True, check=True).stdout
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e

    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0