File size: 1,137 Bytes
7e24b41
 
 
 
 
 
 
1802405
7e24b41
 
1802405
 
 
 
 
 
 
 
 
 
7e24b41
 
 
1802405
7e24b41
1802405
7e24b41
 
1802405
 
7e24b41
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from langchain_community.document_loaders import YoutubeLoader
from src.utils.exceptions import CustomException
from src.utils.functions import cleanText
from src.utils.logging import logger

class YoutubeTranscriptLoader:
    def __init__(self):
        """Initialize the YoutubeTranscriptLoader."""
        pass
        
    def getTranscripts(self, urls: str) -> str:
        """

        Retrieve transcripts from a list of YouTube URLs.



        Args:

            urls (str): Comma-separated YouTube URLs to fetch transcripts from.



        Returns:

            str: Combined transcripts cleaned and joined by newlines.

        """
        texts = []
        for url in set(urls):
            try:
                loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
                doc = " ".join([x.page_content for x in loader.load()])
                texts.append(cleanText(text=doc))
            except Exception as e:
                logger.error(CustomException(e))
                texts.append("")  # Append an empty string on error
                
        return "\n".join(texts)