h2oaimichalmarszalek's picture
development
3469f37
import backoff
from langchain_community.document_loaders import YoutubeLoader
from smolagents import tool
@tool
def load_youtube(video_url: str, phrase: str) -> str:
"""
Load YouTube video content with customizable language and translation options.
This function retrieves the transcript and metadata from a YouTube video with retry logic
using exponential backoff for robust handling of temporary failures.
Args:
video_url (str): The YouTube video URL to load content from.
phrase (str): The phrase looking for in transcript.
Returns:
str: The loaded video content that occour after the looking phrase.
Raises:
Exception: Any exception that occurs after exhausting all retry attempts (max 3 tries).
Examples:
>>> load_youtube_with_options("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "Spanish transcript")
"Video content with transcript..."
"Spanish transcript translated to English..."
"""
@backoff.on_exception(backoff.expo, Exception, max_tries=8, max_time=60)
def _loader(video_url) -> str:
loader = YoutubeLoader.from_youtube_url(video_url)
doc = loader.load()
if doc:
return doc[0].page_content.lower()
else:
raise Exception("Empty document")
phrase = phrase.replace(".", "").replace("?", "").lower()
content = _loader(video_url)
if phrase in content:
return content[content.index(phrase): len(content)]
return content