GAIA-Agent / src /tools.py
Mikkel Skovdal
Finishing touches
fb62e9e
from smolagents import tool
from langchain_community.document_loaders import WikipediaLoader
@tool
def multiply(a: int, b: int) -> int:
"""Multiply two numbers.
Args:
a: first int
b: second int
"""
return a * b
@tool
def add(a: int, b: int) -> int:
"""Add two numbers.
Args:
a: first int
b: second int
"""
return a + b
@tool
def subtract(a: int, b: int) -> int:
"""Subtract two numbers.
Args:
a: first int
b: second int
"""
return a - b
@tool
def divide(a: int, b: int) -> int:
"""Divide two numbers.
Args:
a: first int
b: second int
"""
if b == 0:
raise ValueError("Cannot divide by zero.")
return a / b
@tool
def modulus(a: int, b: int) -> int:
"""Get the modulus of two numbers.
Args:
a: first int
b: second int
"""
return a % b
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results.
Args:
query: The search query.
"""
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
@tool
def read_excel_file(file_path: str, query: str) -> str:
"""
This function uses pandas to read an Excel file and perform some basic analysis.
It returns the number of rows and columns, the column names, and some summary statistics.
Args:
file_path: Path to the Excel file
query: Question about the data
"""
try:
import pandas as pd
df = pd.read_excel(file_path)
result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
result += "Summary statistics:\n"
result += str(df.describe())
return result
except ImportError:
return "Error: pandasis not installed. Please install it with 'pip install pandas'."
except Exception as e:
return f"Error analyzing Excel file: {str(e)}"
@tool
def transcribe_audio_file(mp3_file_path: str) -> str:
"""
Transcribe text from an mp3 file.
It returns the text extracted from the mp3 file.
Args:
mp3_file_path (str): Path to the mp3 file.
"""
try:
import speech_recognition as sr
from pydub import AudioSegment
import os
file, _ = os.path.splitext(mp3_file_path)
audio = AudioSegment.from_mp3(mp3_file_path)
wav_file = f"{file}.wav"
audio.export(wav_file, format="wav")
recognizer = sr.Recognizer()
with sr.AudioFile(wav_file) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except Exception as e:
return f"Error transcribing mp3 file: {e}"
@tool
def transcribe_from_youtube(youtube_id: str) -> str:
"""
Transcribe text from a youtube video.
It returns the text extracted from the youtube video.
Args:
youtube_id (str): ID of the youtube video. Not the full URL. Example: "dQw4w9WgXcQ"
"""
try:
from youtube_transcript_api import YouTubeTranscriptApi
ytt_api = YouTubeTranscriptApi()
fetched_transcript = ytt_api.fetch(youtube_id)
plaintext = " ".join(snippet.text for snippet in fetched_transcript)
return plaintext
except:
return "Could not extract transcript from YouTube video."