Spaces:
Runtime error
Runtime error
from smolagents import tool | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from bs4 import BeautifulSoup | |
from tools.fetch import fetch_webpage | |
# a function to get youtube transcript from video id | |
def get_youtube_transcript(video_id: str) -> str: | |
""" | |
Fetches the transcript of a YouTube video given its video ID. | |
Args: | |
video_id (str): The ID of the YouTube video. Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345. | |
Returns: | |
str: The transcript of the YouTube video. as a single string with each line separated by a newline character. | |
""" | |
# Initialize the YouTubeTranscriptApi | |
ytt_api = YouTubeTranscriptApi() | |
fetched_transcript = ytt_api.fetch(video_id) | |
raw_data = fetched_transcript.to_raw_data() | |
# raw data is in the form of [{ 'text': 'Hey there', 'start': 0.0, 'duration': 1.54 }, { 'text': 'how are you',, 'start': 1.54, 'duration': 4.16 }, ... ] we will return ony the text element as lines | |
transcript = "\n".join([item['text'] for item in raw_data]) | |
return transcript | |
# a function to get video title and description from video url | |
def get_youtube_title_description(video_url: str) -> str: | |
""" | |
Fetches the title and description of a YouTube video given its video ID. | |
Args: | |
video_url (str): The url of the YouTube video. | |
Returns: | |
str: The title and description of the YouTube video. | |
""" | |
# Initialize the YouTube object | |
soup = BeautifulSoup(fetch_webpage(video_url, convert_to_markdown=False), "html.parser") | |
# Extract the title by looking at the meta tag with name="title" and getting the content | |
metatitle = soup.find("meta", {"name": "title"}) | |
if metatitle is not None: | |
title = metatitle["content"] | |
else: | |
title = "No title found" | |
# same for description | |
metadescription = soup.find("meta", {"name": "description"}) | |
if metadescription is not None: | |
description = metadescription["content"] | |
else: | |
description = "No description found" | |
return f"Title: {title}\nDescription: {description}" | |
if __name__ == "__main__": | |
from dotenv import load_dotenv | |
load_dotenv | |
# Test the function | |
video_id = "1htKBjuUWec" # Replace with your YouTube video ID | |
video_url = "https://www.youtube.com/watch?v=" + video_id | |
# Get the title and description | |
try: | |
title_description = get_youtube_title_description(video_url) | |
print(title_description) | |
except Exception as e: | |
print(f"Error fetching title and description: {e}") | |
try: | |
transcript = get_youtube_transcript(video_id) | |
except Exception as e: | |
print(f"Error fetching transcript: {e}") | |
print(transcript) |