File size: 2,375 Bytes
01186d8
 
d616b4c
30ffa0e
33b9b1f
 
d8d794a
 
 
 
 
 
da4e402
d8d794a
 
 
b114e5e
 
d616b4c
 
6056455
d616b4c
33b9b1f
d616b4c
6056455
d616b4c
1db6e4e
 
9b3c11b
30ffa0e
33b9b1f
 
 
 
 
 
 
 
 
 
 
 
 
 
30ffa0e
 
 
 
 
 
 
 
05dbca8
 
 
 
 
 
 
 
30ffa0e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from langchain_google_community import GooglePlacesTool
from langchain_community.agent_toolkits.load_tools import load_tools
from smolagents.tools import Tool, tool
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import WebshareProxyConfig
import os

google_map_tool = Tool.from_langchain(GooglePlacesTool())

from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

wikipedia_tool = Tool.from_langchain(WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2)))

arxiv_tools = [Tool.from_langchain(tool) for tool in load_tools(["arxiv"])]

community_tools = [google_map_tool]
search_tools = [wikipedia_tool, *arxiv_tools]

@tool
def get_youtube_transcript_from_url(video_url: str)->str:
    """
    Get the transcript of a YouTube video using proxy configuration
    Args:
        video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
    Returns:
        The transcript of the YouTube video as a string
    """ 
    video_id = video_url.split("=")[1]
    try:
        # Get proxy credentials from environment variables
        proxy_username = os.getenv("WEBSHARE_PROXY_USERNAME")
        proxy_password = os.getenv("WEBSHARE_PROXY_PASSWORD")
        
        # Configure proxy if credentials are available
        if proxy_username and proxy_password:
            proxy_config = WebshareProxyConfig(
                proxy_username=proxy_username,
                proxy_password=proxy_password,
            )
            ytt_api = YouTubeTranscriptApi(proxy_config=proxy_config)
        else:
            ytt_api = YouTubeTranscriptApi()

        fetched_transcript = ytt_api.fetch(video_id)

        # is iterable
        transcript = ""
        for snippet in fetched_transcript:
            transcript += f"{snippet['text']}\n"
        return transcript
    except Exception as e:
        #Get manual transcript
        def get_manual_transcript(video_id: str)->str:
            curr_dir = os.path.dirname(os.path.abspath(__file__))
            transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt")
            with open(transcript_path, "r") as f:
                transcript = f.read()
            return transcript
        transcript = get_manual_transcript(video_id)
        return transcript