ConversAI / src /components /loaders /youtubeLoader.py
techconsptrs's picture
UPDATE: code update
1802405
from langchain_community.document_loaders import YoutubeLoader
from src.utils.exceptions import CustomException
from src.utils.functions import cleanText
from src.utils.logging import logger
class YoutubeTranscriptLoader:
def __init__(self):
"""Initialize the YoutubeTranscriptLoader."""
pass
def getTranscripts(self, urls: str) -> str:
"""
Retrieve transcripts from a list of YouTube URLs.
Args:
urls (str): Comma-separated YouTube URLs to fetch transcripts from.
Returns:
str: Combined transcripts cleaned and joined by newlines.
"""
texts = []
for url in set(urls):
try:
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
doc = " ".join([x.page_content for x in loader.load()])
texts.append(cleanText(text=doc))
except Exception as e:
logger.error(CustomException(e))
texts.append("") # Append an empty string on error
return "\n".join(texts)