Spaces:
Running
Running
File size: 1,137 Bytes
7e24b41 1802405 7e24b41 1802405 7e24b41 1802405 7e24b41 1802405 7e24b41 1802405 7e24b41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from langchain_community.document_loaders import YoutubeLoader
from src.utils.exceptions import CustomException
from src.utils.functions import cleanText
from src.utils.logging import logger
class YoutubeTranscriptLoader:
def __init__(self):
"""Initialize the YoutubeTranscriptLoader."""
pass
def getTranscripts(self, urls: str) -> str:
"""
Retrieve transcripts from a list of YouTube URLs.
Args:
urls (str): Comma-separated YouTube URLs to fetch transcripts from.
Returns:
str: Combined transcripts cleaned and joined by newlines.
"""
texts = []
for url in set(urls):
try:
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
doc = " ".join([x.page_content for x in loader.load()])
texts.append(cleanText(text=doc))
except Exception as e:
logger.error(CustomException(e))
texts.append("") # Append an empty string on error
return "\n".join(texts) |