Benjamin Bossan
Add youtube transcription processor
history blame
1.91 kB
import subprocess
import tempfile
import numpy as np
import numpy.typing as npt
import pytube
def download_yt_audio(url: str, max_length: int) -> str:
yt = pytube.YouTube(url)
if (max_length is not None) and (yt.length > max_length):
raise ValueError(f"Youtube video exceeds max length of {max_length}")
video = yt.streams.filter(only_audio=True).first()
tmp_path = tempfile.mkdtemp()
fname =
assert isinstance(fname, str)
return fname
def check_ffmpeg_installed() -> None:
cmd = ["ffmpeg", "-version"] # sic
try:, check=True)
except FileNotFoundError as exc:
raise RuntimeError("This feature requires ffmpeg to be installed") from exc
# from openai whisper
def load_audio(file: str, sampling_rate: int) -> npt.NDArray[np.float32]:
"""Open an audio file and read as mono waveform, resampling as necessary
file: str
The audio file to open
sampling_rate: int
The sample rate to resample the audio if necessary
A NumPy array containing the audio waveform, in float32 dtype.
check_ffmpeg_installed() # BB
# This launches a subprocess to decode audio while down-mixing
# and resampling as necessary. Requires the ffmpeg CLI in PATH.
# fmt: off
cmd = [
"-threads", "0",
"-i", file,
"-f", "s16le",
"-ac", "1",
"-acodec", "pcm_s16le",
"-ar", str(sampling_rate),
# fmt: on
out =, capture_output=True, check=True).stdout
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0