|
|
|
from smolagents import tool
|
|
from youtube_transcript_api import YouTubeTranscriptApi
|
|
from bs4 import BeautifulSoup
|
|
from tools.fetch import fetch_webpage
|
|
|
|
@tool
|
|
|
|
def get_youtube_transcript(video_id: str) -> str:
|
|
"""
|
|
Fetches the transcript of a YouTube video given its video ID.
|
|
Args:
|
|
video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345.
|
|
Returns:
|
|
str: The transcript of the YouTube video. as a single string with each line separated by a newline character.
|
|
"""
|
|
|
|
ytt_api = YouTubeTranscriptApi()
|
|
fetched_transcript = ytt_api.fetch(video_id)
|
|
raw_data = fetched_transcript.to_raw_data()
|
|
|
|
transcript = "\n".join([item['text'] for item in raw_data])
|
|
return transcript
|
|
|
|
|
|
@tool
|
|
|
|
def get_youtube_title_description(video_url: str) -> str:
|
|
"""
|
|
Fetches the title and description of a YouTube video given its video ID.
|
|
Args:
|
|
video_url (str): The url of the YouTube video.
|
|
Returns:
|
|
str: The title and description of the YouTube video.
|
|
"""
|
|
|
|
soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser")
|
|
|
|
metatitle = soup.find("meta", {"name": "title"})
|
|
if metatitle is not None:
|
|
title = metatitle["content"]
|
|
else:
|
|
title = "No title found"
|
|
|
|
|
|
metadescription = soup.find("meta", {"name": "description"})
|
|
if metadescription is not None:
|
|
description = metadescription["content"]
|
|
else:
|
|
description = "No description found"
|
|
|
|
return f"Title: {title}\nDescription: {description}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from dotenv import load_dotenv
|
|
load_dotenv
|
|
|
|
video_id = "1htKBjuUWec"
|
|
video_url = "https://www.youtube.com/watch?v=" + video_id
|
|
|
|
try:
|
|
title_description = get_youtube_title_description(video_url)
|
|
print(title_description)
|
|
except Exception as e:
|
|
print(f"Error fetching title and description: {e}")
|
|
|
|
try:
|
|
transcript = get_youtube_transcript(video_id)
|
|
except Exception as e:
|
|
print(f"Error fetching transcript: {e}")
|
|
print(transcript) |