Final_Assignment_Template / community_tools.py
huytofu92's picture
Prompts!
b114e5e
from langchain_google_community import GooglePlacesTool
from langchain_community.agent_toolkits.load_tools import load_tools
from smolagents.tools import Tool, tool
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import WebshareProxyConfig
import os
google_map_tool = Tool.from_langchain(GooglePlacesTool())
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
wikipedia_tool = Tool.from_langchain(WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2)))
arxiv_tools = [Tool.from_langchain(tool) for tool in load_tools(["arxiv"])]
community_tools = [google_map_tool]
search_tools = [wikipedia_tool, *arxiv_tools]
@tool
def get_youtube_transcript_from_url(video_url: str)->str:
"""
Get the transcript of a YouTube video using proxy configuration
Args:
video_url: The URL of the YouTube video (e.g. https://www.youtube.com/watch?v=dQw4w9WgXcQ)
Returns:
The transcript of the YouTube video as a string
"""
video_id = video_url.split("=")[1]
try:
# Get proxy credentials from environment variables
proxy_username = os.getenv("WEBSHARE_PROXY_USERNAME")
proxy_password = os.getenv("WEBSHARE_PROXY_PASSWORD")
# Configure proxy if credentials are available
if proxy_username and proxy_password:
proxy_config = WebshareProxyConfig(
proxy_username=proxy_username,
proxy_password=proxy_password,
)
ytt_api = YouTubeTranscriptApi(proxy_config=proxy_config)
else:
ytt_api = YouTubeTranscriptApi()
fetched_transcript = ytt_api.fetch(video_id)
# is iterable
transcript = ""
for snippet in fetched_transcript:
transcript += f"{snippet['text']}\n"
return transcript
except Exception as e:
#Get manual transcript
def get_manual_transcript(video_id: str)->str:
curr_dir = os.path.dirname(os.path.abspath(__file__))
transcript_path = os.path.join(curr_dir, "transcripts", f"{video_id}.txt")
with open(transcript_path, "r") as f:
transcript = f.read()
return transcript
transcript = get_manual_transcript(video_id)
return transcript