Spaces:
Sleeping
Sleeping
File size: 2,375 Bytes
01186d8 d616b4c 30ffa0e 33b9b1f d8d794a da4e402 d8d794a b114e5e d616b4c 6056455 d616b4c 33b9b1f d616b4c 6056455 d616b4c 1db6e4e 9b3c11b 30ffa0e 33b9b1f 30ffa0e 05dbca8 30ffa0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from langchain_google_community import GooglePlacesTool
from langchain_community.agent_toolkits.load_tools import load_tools
from smolagents.tools import Tool, tool
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import WebshareProxyConfig
import os
google_map_tool = Tool.from_langchain(GooglePlacesTool())
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
wikipedia_tool = Tool.from_langchain(WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2)))
arxiv_tools = [Tool.from_langchain(tool) for tool in load_tools(["arxiv"])]
community_tools = [google_map_tool]
search_tools = [wikipedia_tool, *arxiv_tools]
@tool
def get_youtube_transcript_from_url(video_url: str)->str:
"""
Get the transcript of a YouTube video using proxy configuration
Args:
video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
Returns:
The transcript of the YouTube video as a string
"""
video_id = video_url.split("=")[1]
try:
# Get proxy credentials from environment variables
proxy_username = os.getenv("WEBSHARE_PROXY_USERNAME")
proxy_password = os.getenv("WEBSHARE_PROXY_PASSWORD")
# Configure proxy if credentials are available
if proxy_username and proxy_password:
proxy_config = WebshareProxyConfig(
proxy_username=proxy_username,
proxy_password=proxy_password,
)
ytt_api = YouTubeTranscriptApi(proxy_config=proxy_config)
else:
ytt_api = YouTubeTranscriptApi()
fetched_transcript = ytt_api.fetch(video_id)
# is iterable
transcript = ""
for snippet in fetched_transcript:
transcript += f"{snippet['text']}\n"
return transcript
except Exception as e:
#Get manual transcript
def get_manual_transcript(video_id: str)->str:
curr_dir = os.path.dirname(os.path.abspath(__file__))
transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt")
with open(transcript_path, "r") as f:
transcript = f.read()
return transcript
transcript = get_manual_transcript(video_id)
return transcript
|