gaia / agents /tools /ai_tools.py
bstraehle's picture
Update agents/tools/ai_tools.py
9ea5fe7 verified
# References:
# https://docs.crewai.com/introduction
# https://ai.google.dev/gemini-api/docs
import base64, os, re, time
from agents.models.llms import (
LLM_WEB_SEARCH,
LLM_WEB_BROWSER,
LLM_IMAGE_ANALYSIS,
LLM_AUDIO_ANALYSIS,
LLM_VIDEO_ANALYSIS,
LLM_YOUTUBE_ANALYSIS,
LLM_DOCUMENT_ANALYSIS,
LLM_CODE_GENERATION,
LLM_CODE_EXECUTION,
LLM_IMAGE_TO_FEN,
LLM_ALGEBRAIC_NOTATION,
LLM_FINAL_ANSWER,
LLM_FALLBACK,
THINKING_LEVEL_WEB_SEARCH,
THINKING_LEVEL_MEDIA_ANALYSIS,
THINKING_LEVEL_YOUTUBE_ANALYSIS,
THINKING_LEVEL_DOCUMENT_ANALYSIS,
THINKING_LEVEL_CODE_GENERATION,
THINKING_LEVEL_CODE_EXECUTION,
THINKING_LEVEL_IMAGE_TO_FEN,
THINKING_LEVEL_ALGEBRAIC_NOTATION,
THINKING_LEVEL_FINAL_ANSWER
)
from agents.models.prompts import (
PROMPT_IMG_TO_FEN,
PROMPT_ALGEBRAIC_NOTATION,
PROMPT_FINAL_ANSWER
)
from crewai.tools import tool
from crewai_tools import StagehandTool
from google import genai
from google.genai import types
from utils.utils import (
read_docx_text,
read_pptx_text,
is_ext
)
class AITools():
def _get_client():
return genai.Client(api_key=os.environ["GEMINI_API_KEY"])
def _is_rate_limit_error(exception):
error_str = str(exception)
return "429" in error_str and "RESOURCE_EXHAUSTED" in error_str
def _media_analysis_tool(tool_name: str, model: str, question: str, file_path: str) -> str:
print("")
print(f"๐Ÿ› ๏ธ AITools: {tool_name}: question={question}, file_path={file_path}")
client = AITools._get_client()
current_model = model
for attempt in range(2):
try:
file = client.files.upload(file=file_path)
while True:
media_file = client.files.get(name=file.name)
if media_file.state == "ACTIVE":
break
elif media_file.state == "FAILED":
raise RuntimeError("Media file processing failed")
time.sleep(1)
config_params = {}
if current_model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_MEDIA_ANALYSIS
)
response = client.models.generate_content(
model=current_model,
contents=[file, question],
config=types.GenerateContentConfig(**config_params)
)
result = response.text
print(f"๐Ÿ› ๏ธ AITools: {tool_name}: model={current_model}")
if current_model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: {tool_name}: thinking_level={THINKING_LEVEL_MEDIA_ANALYSIS}")
print(f"๐Ÿ› ๏ธ AITools: {tool_name}: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: {tool_name}: Daily rate limit hit with {current_model}, falling back to {LLM_FALLBACK}")
current_model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: {tool_name}: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
def _extract_execution_result(response):
for part in response.candidates[0].content.parts:
if part.code_execution_result is not None:
return part.code_execution_result.output
return None
@tool("Web Search Tool")
def web_search_tool(question: str) -> str:
"""Given a question only, search the web to answer the question.
Args:
question (str): Question to answer
Returns:
str: Answer to the question
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: web_search_tool: question={question}")
client = AITools._get_client()
model = LLM_WEB_SEARCH
for attempt in range(2):
try:
config_params = {"tools": [types.Tool(google_search=types.GoogleSearch())]}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_WEB_SEARCH
)
response = client.models.generate_content(
model=model,
contents=question,
config=types.GenerateContentConfig(**config_params)
)
result = response.text
print(f"๐Ÿ› ๏ธ AITools: web_search_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: web_search_tool: thinking_level={THINKING_LEVEL_WEB_SEARCH}")
print(f"๐Ÿ› ๏ธ AITools: web_search_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: web_search_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: web_search_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Web Browser Tool")
def web_browser_tool(question: str, url: str) -> str:
"""Given a question and URL, load the URL and act, extract, or observe to answer the question.
Args:
question (str): Question about a URL
url (str): The target URL (must be http/https). "http://"/"https://" will be auto-added if missing.
Returns:
str: Answer to the question
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: web_browser_tool: question={question}, url={url}")
try:
url_str = url.strip()
if not url_str.lower().startswith(("http://", "https://")):
url_str = f"https://{url_str}"
with StagehandTool(
api_key=os.environ["BROWSERBASE_API_KEY"],
project_id=os.environ["BROWSERBASE_PROJECT_ID"],
model_api_key=os.environ["ANTHROPIC_API_KEY"],
model_name=LLM_WEB_BROWSER,
dom_settle_timeout_ms=5000,
headless=True,
self_heal=True,
wait_for_captcha_solves=True,
verbose=3
) as stagehand_tool:
result = stagehand_tool.run(
instruction=question,
url=url_str,
command_type="act" # TODO: act, extract, observe
)
print(f"๐Ÿ› ๏ธ AITools: web_browser_tool: model={LLM_WEB_BROWSER}")
print(f"๐Ÿ› ๏ธ AITools: web_browser_tool: command_type=act")
print(f"๐Ÿ› ๏ธ AITools: web_browser_tool: result={result}")
return result
except Exception as e:
print(f"โš ๏ธ AITools: web_browser_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Image Analysis Tool")
def image_analysis_tool(question: str, file_path: str) -> str:
"""Given a question and image file, analyze the image to answer the question.
Args:
question (str): Question about an image file
file_path (str): The image file path
Returns:
str: Answer to the question about the image file
Raises:
RuntimeError: If processing fails
"""
return AITools._media_analysis_tool("image_analysis_tool", LLM_IMAGE_ANALYSIS, question, file_path)
@tool("Audio Analysis Tool")
def audio_analysis_tool(question: str, file_path: str) -> str:
"""Given a question and audio file, analyze the audio to answer the question.
Args:
question (str): Question about an audio file
file_path (str): The audio file path
Returns:
str: Answer to the question about the audio file
Raises:
RuntimeError: If processing fails
"""
return AITools._media_analysis_tool("audio_analysis_tool", LLM_AUDIO_ANALYSIS, question, file_path)
@tool("Video Analysis Tool")
def video_analysis_tool(question: str, file_path: str) -> str:
"""Given a question and video file, analyze the video to answer the question.
Args:
question (str): Question about a video file
file_path (str): The video file path
Returns:
str: Answer to the question about the video file
Raises:
RuntimeError: If processing fails
"""
return AITools._media_analysis_tool("video_analysis_tool", LLM_VIDEO_ANALYSIS, question, file_path)
@tool("YouTube Analysis Tool")
def youtube_analysis_tool(question: str, url: str) -> str:
"""Given a question and YouTube URL, analyze the video to answer the question.
Args:
question (str): Question about a YouTube video
url (str): The YouTube URL
Returns:
str: Answer to the question about the YouTube video
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: youtube_analysis_tool: question={question}, url={url}")
client = AITools._get_client()
model = LLM_YOUTUBE_ANALYSIS
for attempt in range(2):
try:
config_params = {}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_YOUTUBE_ANALYSIS
)
result = client.models.generate_content(
model=model,
contents=types.Content(
parts=[types.Part(file_data=types.FileData(file_uri=url)),
types.Part(text=question)]
),
config=types.GenerateContentConfig(**config_params)
)
print(f"๐Ÿ› ๏ธ AITools: youtube_analysis_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: youtube_analysis_tool: thinking_level={THINKING_LEVEL_YOUTUBE_ANALYSIS}")
print(f"๐Ÿ› ๏ธ AITools: youtube_analysis_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: youtube_analysis_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: youtube_analysis_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Document Analysis Tool")
def document_analysis_tool(question: str, file_path: str) -> str:
"""Given a question and document file, analyze the document to answer the question.
Args:
question (str): Question about a document file
file_path (str): The document file path
Returns:
str: Answer to the question about the document file
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: document_analysis_tool: question={question}, file_path={file_path}")
client = AITools._get_client()
model = LLM_DOCUMENT_ANALYSIS
for attempt in range(2):
try:
contents = []
if is_ext(file_path, ".docx"):
text_data = read_docx_text(file_path)
contents = [f"{question}\n{text_data}"]
print(f"๐Ÿ› ๏ธ Text data:\n{text_data}")
elif is_ext(file_path, ".pptx"):
text_data = read_pptx_text(file_path)
contents = [f"{question}\n{text_data}"]
print(f"๐Ÿ› ๏ธ Text data:\n{text_data}")
else:
file = client.files.upload(file=file_path)
contents = [file, question]
config_params = {}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_DOCUMENT_ANALYSIS
)
response = client.models.generate_content(
model=model,
contents=contents,
config=types.GenerateContentConfig(**config_params)
)
result = response.text
print(f"๐Ÿ› ๏ธ AITools: document_analysis_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: document_analysis_tool: thinking_level={THINKING_LEVEL_DOCUMENT_ANALYSIS}")
print(f"๐Ÿ› ๏ธ AITools: document_analysis_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: document_analysis_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: document_analysis_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Code Generation and Execution Tool")
def code_generation_and_execution_tool(question: str, json_data: str) -> str:
"""Given a question and JSON data, generate and execute code to answer the question.
Args:
question (str): Question to answer
file_path (str): The JSON data
Returns:
str: Answer to the question
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: code_generation_and_execution_tool: question={question}, json_data={json_data}")
client = AITools._get_client()
model = LLM_CODE_GENERATION
for attempt in range(2):
try:
config_params = {"tools": [types.Tool(code_execution=types.ToolCodeExecution)]}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_CODE_GENERATION
)
response = client.models.generate_content(
model=model,
contents=[f"{question}\n{json_data}"],
config=types.GenerateContentConfig(**config_params),
)
result = AITools._extract_execution_result(response)
print(f"๐Ÿ› ๏ธ AITools: code_generation_and_execution_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: code_generation_and_execution_tool: thinking_level={THINKING_LEVEL_CODE_GENERATION}")
print(f"๐Ÿ› ๏ธ AITools: code_generation_and_execution_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: code_generation_and_execution_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: code_generation_and_execution_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Code Execution Tool")
def code_execution_tool(question: str, file_path: str) -> str:
"""Given a question and Python file, execute the file to answer the question.
Args:
question (str): Question to answer
file_path (str): The Python file path
Returns:
str: Answer to the question
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: code_execution_tool: question={question}, file_path={file_path}")
client = AITools._get_client()
model = LLM_CODE_EXECUTION
for attempt in range(2):
try:
file = client.files.upload(file=file_path)
config_params = {"tools": [types.Tool(code_execution=types.ToolCodeExecution)]}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_CODE_EXECUTION
)
response = client.models.generate_content(
model=model,
contents=[file, question],
config=types.GenerateContentConfig(**config_params),
)
result = AITools._extract_execution_result(response)
print(f"๐Ÿ› ๏ธ AITools: code_execution_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: code_execution_tool: thinking_level={THINKING_LEVEL_CODE_EXECUTION}")
print(f"๐Ÿ› ๏ธ AITools: code_execution_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: code_execution_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: code_execution_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Image to FEN Tool")
def img_to_fen_tool(question: str, file_path: str, active_color: str) -> str:
"""Given a chess question, image file, and active color, return the FEN.
Args:
question (str): The chess question
file_path (str): The image file path
active_color (str): The active color
Returns:
str: FEN of the chess position
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: img_to_fen_tool: question={question}, file_path={file_path}, active_color={active_color}")
client = AITools._get_client()
model = LLM_IMAGE_TO_FEN
for attempt in range(2):
try:
with open(file_path, "rb") as f:
img_bytes = f.read()
img_b64 = base64.b64encode(img_bytes).decode("ascii")
prompt = PROMPT_IMG_TO_FEN.format(question=question, active_color=active_color)
content = types.Content(
parts=[
types.Part(text=prompt),
types.Part(
inline_data=types.Blob(
mime_type="image/png",
data=base64.b64decode(img_b64),
)
)
]
)
config_params = {}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_IMAGE_TO_FEN
)
response = client.models.generate_content(
model=model,
contents=[content],
config=types.GenerateContentConfig(**config_params)
)
result = None
for part in response.parts:
if part.text is not None:
result = part.text
break
fen_pattern = r'\b([rnbqkpRNBQKP1-8\/]+\s+[wb]\s+(?:-|[KQkq]+)\s+(?:-|[a-h][36])\s+\d+\s+\d+)\b'
is_match = re.search(fen_pattern, result)
if is_match:
result = is_match.group(1)
else:
lines = result.strip().split("\n")
for line in lines:
line = line.strip()
if "/" in line and (" w " in line or " b " in line):
result = line
break
print(f"๐Ÿ› ๏ธ AITools: img_to_fen_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: img_to_fen_tool: thinking_level={THINKING_LEVEL_IMAGE_TO_FEN}")
print(f"๐Ÿ› ๏ธ AITools: img_to_fen_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: img_to_fen_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: img_to_fen_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
@tool("Algebraic Notation Tool")
def algebraic_notation_tool(question: str, file_path: str, position_evaluation: str) -> str:
"""Given a chess question, image file, and position evaluation in UCI notation, answer the question in algebraic notation.
Args:
question (str): The chess question
file_path (str): The image file path
position_evaluation (str): The position evaluation in UCI notation
Returns:
str: Answer to the question in algebraic notation
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: algebraic_notation_tool: question={question}, file_path={file_path}, position_evaluation={position_evaluation}")
client = AITools._get_client()
model = LLM_ALGEBRAIC_NOTATION
for attempt in range(2):
try:
with open(file_path, "rb") as f:
img_bytes = f.read()
img_b64 = base64.b64encode(img_bytes).decode("ascii")
prompt = PROMPT_ALGEBRAIC_NOTATION.format(question=question, position_evaluation=position_evaluation)
content = types.Content(
parts=[
types.Part(text=prompt),
types.Part(
inline_data=types.Blob(
mime_type="image/png",
data=base64.b64decode(img_b64),
)
)
]
)
config_params = {}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_ALGEBRAIC_NOTATION
)
response = client.models.generate_content(
model=model,
contents=[content],
config=types.GenerateContentConfig(**config_params)
)
result = None
for part in response.parts:
if part.text is not None:
result = part.text
break
print(f"๐Ÿ› ๏ธ AITools: algebraic_notation_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: algebraic_notation_tool: thinking_level={THINKING_LEVEL_ALGEBRAIC_NOTATION}")
print(f"๐Ÿ› ๏ธ AITools: algebraic_notation_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: algebraic_notation_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: algebraic_notation_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")
def final_answer_tool(question: str, answer: str) -> str:
"""Given a question and initial answer, get the final answer.
Args:
question (str): The question
answer (str): The initial answer
Returns:
str: Final answer
Raises:
RuntimeError: If processing fails
"""
print("")
print(f"๐Ÿ› ๏ธ AITools: final_answer_tool: question={question}, answer={answer}")
client = AITools._get_client()
model = LLM_FINAL_ANSWER
for attempt in range(2):
try:
prompt = PROMPT_FINAL_ANSWER.format(question=question, answer=answer)
config_params = {}
if model != LLM_FALLBACK:
config_params["thinking_config"] = types.ThinkingConfig(
thinking_level=THINKING_LEVEL_FINAL_ANSWER
)
response = client.models.generate_content(
model=model,
contents=[prompt],
config=types.GenerateContentConfig(**config_params)
)
result = response.text.strip()
print(f"๐Ÿ› ๏ธ AITools: final_answer_tool: model={model}")
if model != LLM_FALLBACK:
print(f"๐Ÿ› ๏ธ AITools: final_answer_tool: thinking_level={THINKING_LEVEL_FINAL_ANSWER}")
print(f"๐Ÿ› ๏ธ AITools: final_answer_tool: result={result}")
return result
except Exception as e:
if attempt == 0 and AITools._is_rate_limit_error(e):
print(f"โš ๏ธ AITools: final_answer_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
model = LLM_FALLBACK
continue
print(f"โš ๏ธ AITools: final_answer_tool: exception={str(e)}")
raise RuntimeError(f"Processing failed: {str(e)}")