simoncwang's picture
changed some tools and prompting
84013cf
from smolagents import SpeechToTextTool, FinalAnswerTool, GoogleSearchTool, VisitWebpageTool, WikipediaSearchTool
from smolagents import Tool, tool
from langchain_community.agent_toolkits.load_tools import load_tools
import wikipedia
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
from youtube_transcript_api import YouTubeTranscriptApi
# uses youtube transcript api to fetch the transcript of a YouTube video
class YouTubeTranscript(Tool):
name = "youtube_transcript"
description = "Fetches the transcript of a YouTube video including timestamps given its URL."
inputs = {
"video_url": {
"type": "string",
"description": "The URL of the YouTube video to fetch the transcript for."
}
}
output_type = "string"
def get_video_id(self, video_url: str) -> str:
"""Extracts the video ID from a YouTube URL."""
if "youtu.be" in video_url:
return video_url.split("/")[-1]
elif "youtube.com/watch?v=" in video_url:
return video_url.split("v=")[-1].split("&")[0]
else:
raise ValueError("Invalid YouTube URL format.")
def forward(self, video_url: str) -> str:
ytt_api = YouTubeTranscriptApi()
video_id = self.get_video_id(video_url)
fetched_transcript = ytt_api.fetch(video_id)
raw_text = ""
for snippet in fetched_transcript:
start = snippet.start
duration = snippet.duration
text = snippet.text
raw_text += f"[{start:.2f} - {start + duration:.2f}] {text}\n"
return raw_text.strip() if raw_text else "No transcript available for this video."
# uses openai model to analyze an image from a URL and answer questions about it
class AnalyzeImage(Tool):
name = "image_analyzer"
description = "Given a URL to an image, can analyze it to answer questions."
inputs = {
"question": {
"type": "string",
"description": "A question about the image. E.g. 'What is in this image?'"
},
"image_url": {
"type": "string",
"description": "A URL to an image to analyze."
}
}
output_type = "string"
def forward(self, question: str, image_url: str) -> str:
client = OpenAI()
response = client.responses.create(
model="gpt-4.1-mini",
input=[
{
"role": "user",
"content": [
{
"type": "input_text",
"text": question
},
{
"type": "input_image",
"image_url": image_url,
}
]
}
]
)
return response.output_text
class Multiply(Tool):
name = "multiply"
description = "Multiplies two numbers and returns the result."
inputs = {
"a": {
"type": "number",
"description": "The first number to multiply."
},
"b": {
"type": "number",
"description": "The second number to multiply."
}
}
output_type = "number"
def forward(self, a: float, b: float) -> float:
return a * b
class Divide(Tool):
name = "divide"
description = "Divides the first number by the second and returns the result."
inputs = {
"a": {
"type": "number",
"description": "The number to be divided."
},
"b": {
"type": "number",
"description": "The number to divide by. Must not be zero."
}
}
output_type = "number"
def forward(self, a: float, b: float) -> float:
if b == 0:
raise ValueError("Division by zero is not allowed.")
return a / b
class Add(Tool):
name = "add"
description = "Adds two numbers and returns the result."
inputs = {
"a": {
"type": "number",
"description": "The first number to add."
},
"b": {
"type": "number",
"description": "The second number to add."
}
}
output_type = "number"
def forward(self, a: float, b: float) -> float:
return a + b
class Subtract(Tool):
name = "subtract"
description = "Subtracts the second number from the first and returns the result."
inputs = {
"a": {
"type": "number",
"description": "The number to subtract from."
},
"b": {
"type": "number",
"description": "The number to subtract."
}
}
output_type = "number"
def forward(self, a: float, b: float) -> float:
return a - b
class Modulus(Tool):
name = "modulus"
description = "Calculates the modulus of the first number by the second and returns the result."
inputs = {
"a": {
"type": "number",
"description": "The dividend."
},
"b": {
"type": "number",
"description": "The divisor. Must not be zero."
}
}
output_type = "number"
def forward(self, a: float, b: float) -> float:
if b == 0:
raise ValueError("Division by zero is not allowed.")
return a % b
class CalculatorTool(Tool):
name = "calculator"
description = "Evaluates simple arithmentic and returns the result as a float"
inputs = {
"expression": {
"type": "string",
"description": "An arithmetic expression to solve"
}
}
output_type = "string"
def forward(self, expression: str) -> float:
allowed_names = {"__builtins__": {}}
try:
result = eval(expression, allowed_names)
if isinstance(result, (int, float)):
return str(result)
else:
raise ValueError("Expression did not return a number.")
except Exception as e:
return f"Error evaluating expression: {e}"
class WikiSearchTool(Tool):
name = "wiki_search"
description = "Searches Wikipedia and returns a summary of the top result."
inputs = {
"query": {
"type": "string",
"description": "The search query to find information on Wikipedia."
}
}
output_type = "string"
def forward(self, query: str) -> str:
try:
summary = wikipedia.summary(query, sentences=5)
return summary
except wikipedia.DisambiguationError as e:
return f"Disambiguation error. Possible topics: {', '.join(e.options[:5])}"
except wikipedia.PageError:
return "No Wikipedia page found for the given query."
except Exception as e:
return f"Error: {str(e)}"
# for testing
if __name__ == "__main__":
# Example usage
wiki_tool = WikiSearchTool()
print("Testing WikiSearchTool:\n")
print(wiki_tool.forward("Python programming language"))
calc_tool = CalculatorTool()
print("\nTesting CalculatorTool:\n")
print(calc_tool.forward("2 + 2 * 3"))
youtube_tool = YouTubeTranscript()
print("\nTesting YouTubeTranscript:\n")
print(youtube_tool.forward("https://www.youtube.com/watch?v=JSH2GW2Aaxw"))
image_tool = AnalyzeImage()
print("\nTesting AnalyzeImage:\n")
print(image_tool.forward("What is in this image?", "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"))