Final_Assignment_Template / langg_agent_tools.py
diego.sancristobal
feat: :sparkles: Functional agent obtaining 9 out of 20
a14137a
from langchain_core.tools import tool
from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.document_loaders import YoutubeLoader
from langchain_community.tools import TavilySearchResults
import json
import sys
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
@tool
def add(values: list[int]) -> float:
"""
Add all numbers in a list together
Args:
values: A list of numbers to sum
Returns:
The sum of all numbers in the list
"""
logger.info(f"Adding numbers: {values}")
return sum(values)
@tool
def subtract(a: int, b: int) -> int:
"""
Subtract two numbers
Args:
a: The first number
b: The second number
Returns:
The difference between the two numbers
"""
logger.info(f"Subtracting {a} - {b}")
return a - b
@tool
def multiply(a: int, b: int) -> int:
"""
Multiply two numbers
Args:
a: The first number
b: The second number
Returns:
The product of the two numbers
"""
logger.info(f"Multiplying {a} * {b}")
return a * b
@tool
def divide(a: int, b: int) -> float:
"""
Divide two numbers
Args:
a: The first number
b: The second number
Returns:
The quotient of the two numbers
"""
logger.info(f"Dividing {a} / {b}")
return a / b
@tool
def modulo(a: int, b: int) -> int:
"""
Calculate the modulo of two numbers
Args:
a: The first number
b: The second number
Returns:
The remainder of the division of the two numbers
"""
logger.info(f"Calculating modulo of {a} % {b}")
return a % b
@tool
def wikipedia_search(query: str) -> str:
"""
Search Wikipedia for information
Args:
query: The query to search for
Returns:
The search results
"""
logger.info(f"Searching Wikipedia for: {query}")
docs_found = WikipediaLoader(query=query, load_max_docs=3).load()
# format the docs found into a string keeping just first paragraph
formatted_results = []
for i, doc in enumerate(docs_found, 1):
source = doc.metadata.get('source', 'Unknown source')
title = doc.metadata.get('title', 'Untitled')
# Get the first paragraph (split by \n\n and take first part)
content = doc.page_content.strip()
#first_paragraph = content.split('\n\n')[0] if content else "No content available"
first_paragraph = content if content else "No content available"
formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_paragraph}
--- DOCUMENT {i} END ---"""
formatted_results.append(formatted_doc)
return "\n\n".join(formatted_results)
@tool
def arxiv_search(query: str) -> str:
"""
Search ArXiv for research papers
Args:
query: The query to search for
Returns:
The search results with abstracts
"""
logger.info(f"Searching ArXiv for: {query}")
docs_found = ArxivLoader(query=query, load_max_docs=3).load()
formatted_results = []
for i, doc in enumerate(docs_found, 1):
source = doc.metadata.get('source', 'Unknown source')
title = doc.metadata.get('title', 'Untitled')
# For ArXiv, the abstract is typically in the page_content or metadata
abstract = doc.page_content.strip() if doc.page_content else "No abstract available"
formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Abstract: {abstract}
--- DOCUMENT {i} END ---"""
formatted_results.append(formatted_doc)
return "\n\n".join(formatted_results)
@tool
def web_search(query: str) -> str:
"""
Search the web for information
Args:
query: The query to search for (should be a list of URLs or single URL)
Returns:
The search results
"""
logger.info(f"Searching the web for: {query}")
# Note: WebBaseLoader requires URLs, so this assumes query contains URLs
# For a more general web search, you'd need a different approach like SerpAPI
try:
if isinstance(query, str):
urls = [query] if query.startswith('http') else []
else:
urls = query
if not urls:
return "No valid URLs provided for web search."
# Limit to 2 URLs maximum
urls = urls[:2]
docs_found = WebBaseLoader(urls).load()
formatted_results = []
for i, doc in enumerate(docs_found, 1):
source = doc.metadata.get('source', 'Unknown source')
title = doc.metadata.get('title', 'Untitled')
# Get first 1000 characters of content
content = doc.page_content.strip()
first_1000_chars = content if content else "No content available"
# if len(content) > 1000:
# first_1000_chars += "..."
formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_1000_chars}
--- DOCUMENT {i} END ---"""
formatted_results.append(formatted_doc)
return "\n\n".join(formatted_results)
except Exception as e:
return f"Error during web search: {str(e)}"
@tool
def youtube_transcript(url: str) -> str:
"""
Get transcript of YouTube video.
Args:
url: YouTube video url in ""
"""
logger.info(f"Getting transcript of YouTube video: {url}")
video_id = url.partition("https://www.youtube.com/watch?v=")[2]
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = " ".join([item["text"] for item in transcript])
return {"youtube_transcript": transcript_text}
@tool
def python_interpreter(code: str) -> str:
"""
Execute Python code and return the result.
Args:
code: Python code to execute
Returns:
The output of the executed code or error message
"""
try:
# Create a StringIO object to capture stdout
import sys
from io import StringIO
# Capture stdout
old_stdout = sys.stdout
sys.stdout = captured_output = StringIO()
# Create a local namespace for execution
local_namespace = {
'__builtins__': __builtins__,
'print': print,
'len': len,
'str': str,
'int': int,
'float': float,
'list': list,
'dict': dict,
'tuple': tuple,
'set': set,
'range': range,
'enumerate': enumerate,
'zip': zip,
'map': map,
'filter': filter,
'sum': sum,
'max': max,
'min': min,
'abs': abs,
'round': round,
'sorted': sorted,
'reversed': reversed,
'any': any,
'all': all,
'isinstance': isinstance,
'type': type,
'hasattr': hasattr,
'getattr': getattr,
'setattr': setattr,
'dir': dir,
'help': help,
}
# Allow common safe imports
allowed_modules = {
'math', 'random', 'datetime', 'json', 'collections',
'itertools', 'functools', 're', 'statistics'
}
# Parse and execute the code
import ast
# Check for dangerous operations
dangerous_keywords = ['import os', 'import sys', 'import subprocess', 'exec', 'eval', '__import__', 'open(', 'file(']
for keyword in dangerous_keywords:
if keyword in code.lower():
if keyword.startswith('import') and any(module in code for module in allowed_modules):
continue
else:
return f"Error: Potentially dangerous operation detected: {keyword}"
# Execute the code
exec(code, {"__builtins__": {}}, local_namespace)
# Get the captured output
output = captured_output.getvalue()
# Restore stdout
sys.stdout = old_stdout
return output if output else "Code executed successfully (no output)"
except Exception as e:
# Restore stdout in case of error
sys.stdout = old_stdout
return f"Error executing code: {str(e)}"
finally:
# Ensure stdout is always restored
sys.stdout = old_stdout
@tool
def web_search_duckduckgo(query: str) -> dict:
"""Search DuckDuckGo for a query and return maximum 3 results.
Args:
query: The search query."""
search_docs = DuckDuckGoSearchResults(output_format="list", max_results=3).invoke(input=query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document>\n{doc.get("content", "")}\n</Document>'
for doc in search_docs
])
return {"web_results": formatted_search_docs}
@tool
def youtube_loader(youtube_url: str) -> dict:
"""Elaborate a YouTube video to transcript the content, return the transcript.
Args:
youtube_url: The YouTube video URL."""
loader = YoutubeLoader.from_youtube_url(
youtube_url,
add_video_info=True
)
return {"youtube_transcript": loader.load()}
@tool
def web_search_tavily(query: str) -> dict:
"""Search Tavily for a query and return maximum 3 results.
Args:
query: The search query."""
search_docs = TavilySearchResults(max_results=3).invoke({'query': query})
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document>\n{doc.get("content", "")}\n</Document>'
for doc in search_docs
])
return {"web_results": formatted_search_docs}