Spaces:

bstraehle
/

gaia

Running

App Files Files Community

gaia / agents /tools /ai_tools.py

bstraehle

Update agents/tools/ai_tools.py

9ea5fe7 verified 4 days ago

raw

history blame contribute delete

27.6 kB

	# References:

	# https://docs.crewai.com/introduction
	# https://ai.google.dev/gemini-api/docs

	import base64, os, re, time
	from agents.models.llms import (
	LLM_WEB_SEARCH,
	LLM_WEB_BROWSER,
	LLM_IMAGE_ANALYSIS,
	LLM_AUDIO_ANALYSIS,
	LLM_VIDEO_ANALYSIS,
	LLM_YOUTUBE_ANALYSIS,
	LLM_DOCUMENT_ANALYSIS,
	LLM_CODE_GENERATION,
	LLM_CODE_EXECUTION,
	LLM_IMAGE_TO_FEN,
	LLM_ALGEBRAIC_NOTATION,
	LLM_FINAL_ANSWER,
	LLM_FALLBACK,

	THINKING_LEVEL_WEB_SEARCH,
	THINKING_LEVEL_MEDIA_ANALYSIS,
	THINKING_LEVEL_YOUTUBE_ANALYSIS,
	THINKING_LEVEL_DOCUMENT_ANALYSIS,
	THINKING_LEVEL_CODE_GENERATION,
	THINKING_LEVEL_CODE_EXECUTION,
	THINKING_LEVEL_IMAGE_TO_FEN,
	THINKING_LEVEL_ALGEBRAIC_NOTATION,
	THINKING_LEVEL_FINAL_ANSWER
	)
	from agents.models.prompts import (
	PROMPT_IMG_TO_FEN,
	PROMPT_ALGEBRAIC_NOTATION,
	PROMPT_FINAL_ANSWER
	)
	from crewai.tools import tool
	from crewai_tools import StagehandTool
	from google import genai
	from google.genai import types
	from utils.utils import (
	read_docx_text,
	read_pptx_text,
	is_ext
	)

	class AITools():
	def _get_client():
	return genai.Client(api_key=os.environ["GEMINI_API_KEY"])

	def _is_rate_limit_error(exception):
	error_str = str(exception)
	return "429" in error_str and "RESOURCE_EXHAUSTED" in error_str

	def _media_analysis_tool(tool_name: str, model: str, question: str, file_path: str) -> str:
	print("")
	print(f"🛠️ AITools: {tool_name}: question={question}, file_path={file_path}")

	client = AITools._get_client()
	current_model = model

	for attempt in range(2):
	try:
	file = client.files.upload(file=file_path)

	while True:
	media_file = client.files.get(name=file.name)
	if media_file.state == "ACTIVE":
	break
	elif media_file.state == "FAILED":
	raise RuntimeError("Media file processing failed")
	time.sleep(1)

	config_params = {}

	if current_model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_MEDIA_ANALYSIS
	)

	response = client.models.generate_content(
	model=current_model,
	contents=[file, question],
	config=types.GenerateContentConfig(**config_params)
	)

	result = response.text

	print(f"🛠️ AITools: {tool_name}: model={current_model}")
	if current_model != LLM_FALLBACK:
	print(f"🛠️ AITools: {tool_name}: thinking_level={THINKING_LEVEL_MEDIA_ANALYSIS}")
	print(f"🛠️ AITools: {tool_name}: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: {tool_name}: Daily rate limit hit with {current_model}, falling back to {LLM_FALLBACK}")
	current_model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: {tool_name}: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	def _extract_execution_result(response):
	for part in response.candidates[0].content.parts:
	if part.code_execution_result is not None:
	return part.code_execution_result.output

	return None

	@tool("Web Search Tool")
	def web_search_tool(question: str) -> str:
	"""Given a question only, search the web to answer the question.

	Args:
	question (str): Question to answer

	Returns:
	str: Answer to the question

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: web_search_tool: question={question}")

	client = AITools._get_client()
	model = LLM_WEB_SEARCH

	for attempt in range(2):
	try:
	config_params = {"tools": [types.Tool(google_search=types.GoogleSearch())]}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_WEB_SEARCH
	)

	response = client.models.generate_content(
	model=model,
	contents=question,
	config=types.GenerateContentConfig(**config_params)
	)

	result = response.text

	print(f"🛠️ AITools: web_search_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: web_search_tool: thinking_level={THINKING_LEVEL_WEB_SEARCH}")
	print(f"🛠️ AITools: web_search_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: web_search_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: web_search_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Web Browser Tool")
	def web_browser_tool(question: str, url: str) -> str:
	"""Given a question and URL, load the URL and act, extract, or observe to answer the question.

	Args:
	question (str): Question about a URL
	url (str): The target URL (must be http/https). "http://"/"https://" will be auto-added if missing.

	Returns:
	str: Answer to the question

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: web_browser_tool: question={question}, url={url}")

	try:
	url_str = url.strip()

	if not url_str.lower().startswith(("http://", "https://")):
	url_str = f"https://{url_str}"

	with StagehandTool(
	api_key=os.environ["BROWSERBASE_API_KEY"],
	project_id=os.environ["BROWSERBASE_PROJECT_ID"],
	model_api_key=os.environ["ANTHROPIC_API_KEY"],
	model_name=LLM_WEB_BROWSER,
	dom_settle_timeout_ms=5000,
	headless=True,
	self_heal=True,
	wait_for_captcha_solves=True,
	verbose=3
	) as stagehand_tool:
	result = stagehand_tool.run(
	instruction=question,
	url=url_str,
	command_type="act" # TODO: act, extract, observe
	)

	print(f"🛠️ AITools: web_browser_tool: model={LLM_WEB_BROWSER}")
	print(f"🛠️ AITools: web_browser_tool: command_type=act")
	print(f"🛠️ AITools: web_browser_tool: result={result}")

	return result
	except Exception as e:
	print(f"⚠️ AITools: web_browser_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Image Analysis Tool")
	def image_analysis_tool(question: str, file_path: str) -> str:
	"""Given a question and image file, analyze the image to answer the question.

	Args:
	question (str): Question about an image file
	file_path (str): The image file path

	Returns:
	str: Answer to the question about the image file

	Raises:
	RuntimeError: If processing fails
	"""
	return AITools._media_analysis_tool("image_analysis_tool", LLM_IMAGE_ANALYSIS, question, file_path)

	@tool("Audio Analysis Tool")
	def audio_analysis_tool(question: str, file_path: str) -> str:
	"""Given a question and audio file, analyze the audio to answer the question.

	Args:
	question (str): Question about an audio file
	file_path (str): The audio file path

	Returns:
	str: Answer to the question about the audio file

	Raises:
	RuntimeError: If processing fails
	"""
	return AITools._media_analysis_tool("audio_analysis_tool", LLM_AUDIO_ANALYSIS, question, file_path)

	@tool("Video Analysis Tool")
	def video_analysis_tool(question: str, file_path: str) -> str:
	"""Given a question and video file, analyze the video to answer the question.

	Args:
	question (str): Question about a video file
	file_path (str): The video file path

	Returns:
	str: Answer to the question about the video file

	Raises:
	RuntimeError: If processing fails
	"""
	return AITools._media_analysis_tool("video_analysis_tool", LLM_VIDEO_ANALYSIS, question, file_path)

	@tool("YouTube Analysis Tool")
	def youtube_analysis_tool(question: str, url: str) -> str:
	"""Given a question and YouTube URL, analyze the video to answer the question.

	Args:
	question (str): Question about a YouTube video
	url (str): The YouTube URL

	Returns:
	str: Answer to the question about the YouTube video

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: youtube_analysis_tool: question={question}, url={url}")

	client = AITools._get_client()
	model = LLM_YOUTUBE_ANALYSIS

	for attempt in range(2):
	try:
	config_params = {}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_YOUTUBE_ANALYSIS
	)

	result = client.models.generate_content(
	model=model,
	contents=types.Content(
	parts=[types.Part(file_data=types.FileData(file_uri=url)),
	types.Part(text=question)]
	),
	config=types.GenerateContentConfig(**config_params)
	)

	print(f"🛠️ AITools: youtube_analysis_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: youtube_analysis_tool: thinking_level={THINKING_LEVEL_YOUTUBE_ANALYSIS}")
	print(f"🛠️ AITools: youtube_analysis_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: youtube_analysis_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: youtube_analysis_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Document Analysis Tool")
	def document_analysis_tool(question: str, file_path: str) -> str:
	"""Given a question and document file, analyze the document to answer the question.

	Args:
	question (str): Question about a document file
	file_path (str): The document file path

	Returns:
	str: Answer to the question about the document file

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: document_analysis_tool: question={question}, file_path={file_path}")

	client = AITools._get_client()
	model = LLM_DOCUMENT_ANALYSIS

	for attempt in range(2):
	try:
	contents = []

	if is_ext(file_path, ".docx"):
	text_data = read_docx_text(file_path)
	contents = [f"{question}\n{text_data}"]
	print(f"🛠️ Text data:\n{text_data}")
	elif is_ext(file_path, ".pptx"):
	text_data = read_pptx_text(file_path)
	contents = [f"{question}\n{text_data}"]
	print(f"🛠️ Text data:\n{text_data}")
	else:
	file = client.files.upload(file=file_path)
	contents = [file, question]

	config_params = {}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_DOCUMENT_ANALYSIS
	)

	response = client.models.generate_content(
	model=model,
	contents=contents,
	config=types.GenerateContentConfig(**config_params)
	)

	result = response.text

	print(f"🛠️ AITools: document_analysis_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: document_analysis_tool: thinking_level={THINKING_LEVEL_DOCUMENT_ANALYSIS}")
	print(f"🛠️ AITools: document_analysis_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: document_analysis_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: document_analysis_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Code Generation and Execution Tool")
	def code_generation_and_execution_tool(question: str, json_data: str) -> str:
	"""Given a question and JSON data, generate and execute code to answer the question.
	Args:
	question (str): Question to answer
	file_path (str): The JSON data

	Returns:
	str: Answer to the question

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: code_generation_and_execution_tool: question={question}, json_data={json_data}")

	client = AITools._get_client()
	model = LLM_CODE_GENERATION

	for attempt in range(2):
	try:
	config_params = {"tools": [types.Tool(code_execution=types.ToolCodeExecution)]}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_CODE_GENERATION
	)

	response = client.models.generate_content(
	model=model,
	contents=[f"{question}\n{json_data}"],
	config=types.GenerateContentConfig(**config_params),
	)

	result = AITools._extract_execution_result(response)

	print(f"🛠️ AITools: code_generation_and_execution_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: code_generation_and_execution_tool: thinking_level={THINKING_LEVEL_CODE_GENERATION}")
	print(f"🛠️ AITools: code_generation_and_execution_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: code_generation_and_execution_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: code_generation_and_execution_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Code Execution Tool")
	def code_execution_tool(question: str, file_path: str) -> str:
	"""Given a question and Python file, execute the file to answer the question.

	Args:
	question (str): Question to answer
	file_path (str): The Python file path

	Returns:
	str: Answer to the question

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: code_execution_tool: question={question}, file_path={file_path}")

	client = AITools._get_client()
	model = LLM_CODE_EXECUTION

	for attempt in range(2):
	try:
	file = client.files.upload(file=file_path)

	config_params = {"tools": [types.Tool(code_execution=types.ToolCodeExecution)]}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_CODE_EXECUTION
	)

	response = client.models.generate_content(
	model=model,
	contents=[file, question],
	config=types.GenerateContentConfig(**config_params),
	)

	result = AITools._extract_execution_result(response)

	print(f"🛠️ AITools: code_execution_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: code_execution_tool: thinking_level={THINKING_LEVEL_CODE_EXECUTION}")
	print(f"🛠️ AITools: code_execution_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: code_execution_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: code_execution_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Image to FEN Tool")
	def img_to_fen_tool(question: str, file_path: str, active_color: str) -> str:
	"""Given a chess question, image file, and active color, return the FEN.

	Args:
	question (str): The chess question
	file_path (str): The image file path
	active_color (str): The active color

	Returns:
	str: FEN of the chess position

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: img_to_fen_tool: question={question}, file_path={file_path}, active_color={active_color}")

	client = AITools._get_client()
	model = LLM_IMAGE_TO_FEN

	for attempt in range(2):
	try:
	with open(file_path, "rb") as f:
	img_bytes = f.read()
	img_b64 = base64.b64encode(img_bytes).decode("ascii")

	prompt = PROMPT_IMG_TO_FEN.format(question=question, active_color=active_color)

	content = types.Content(
	parts=[
	types.Part(text=prompt),
	types.Part(
	inline_data=types.Blob(
	mime_type="image/png",
	data=base64.b64decode(img_b64),
	)
	)
	]
	)

	config_params = {}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_IMAGE_TO_FEN
	)

	response = client.models.generate_content(
	model=model,
	contents=[content],
	config=types.GenerateContentConfig(**config_params)
	)

	result = None

	for part in response.parts:
	if part.text is not None:
	result = part.text
	break

	fen_pattern = r'\b([rnbqkpRNBQKP1-8\/]+\s+[wb]\s+(?:-\|[KQkq]+)\s+(?:-\|[a-h][36])\s+\d+\s+\d+)\b'

	is_match = re.search(fen_pattern, result)

	if is_match:
	result = is_match.group(1)
	else:
	lines = result.strip().split("\n")

	for line in lines:
	line = line.strip()

	if "/" in line and (" w " in line or " b " in line):
	result = line
	break

	print(f"🛠️ AITools: img_to_fen_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: img_to_fen_tool: thinking_level={THINKING_LEVEL_IMAGE_TO_FEN}")
	print(f"🛠️ AITools: img_to_fen_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: img_to_fen_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: img_to_fen_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	@tool("Algebraic Notation Tool")
	def algebraic_notation_tool(question: str, file_path: str, position_evaluation: str) -> str:
	"""Given a chess question, image file, and position evaluation in UCI notation, answer the question in algebraic notation.

	Args:
	question (str): The chess question
	file_path (str): The image file path
	position_evaluation (str): The position evaluation in UCI notation

	Returns:
	str: Answer to the question in algebraic notation

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: algebraic_notation_tool: question={question}, file_path={file_path}, position_evaluation={position_evaluation}")

	client = AITools._get_client()
	model = LLM_ALGEBRAIC_NOTATION

	for attempt in range(2):
	try:
	with open(file_path, "rb") as f:
	img_bytes = f.read()
	img_b64 = base64.b64encode(img_bytes).decode("ascii")

	prompt = PROMPT_ALGEBRAIC_NOTATION.format(question=question, position_evaluation=position_evaluation)

	content = types.Content(
	parts=[
	types.Part(text=prompt),
	types.Part(
	inline_data=types.Blob(
	mime_type="image/png",
	data=base64.b64decode(img_b64),
	)
	)
	]
	)

	config_params = {}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_ALGEBRAIC_NOTATION
	)

	response = client.models.generate_content(
	model=model,
	contents=[content],
	config=types.GenerateContentConfig(**config_params)
	)

	result = None

	for part in response.parts:
	if part.text is not None:
	result = part.text
	break

	print(f"🛠️ AITools: algebraic_notation_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: algebraic_notation_tool: thinking_level={THINKING_LEVEL_ALGEBRAIC_NOTATION}")
	print(f"🛠️ AITools: algebraic_notation_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: algebraic_notation_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: algebraic_notation_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")

	def final_answer_tool(question: str, answer: str) -> str:
	"""Given a question and initial answer, get the final answer.

	Args:
	question (str): The question
	answer (str): The initial answer

	Returns:
	str: Final answer

	Raises:
	RuntimeError: If processing fails
	"""
	print("")
	print(f"🛠️ AITools: final_answer_tool: question={question}, answer={answer}")

	client = AITools._get_client()
	model = LLM_FINAL_ANSWER

	for attempt in range(2):
	try:
	prompt = PROMPT_FINAL_ANSWER.format(question=question, answer=answer)

	config_params = {}

	if model != LLM_FALLBACK:
	config_params["thinking_config"] = types.ThinkingConfig(
	thinking_level=THINKING_LEVEL_FINAL_ANSWER
	)

	response = client.models.generate_content(
	model=model,
	contents=[prompt],
	config=types.GenerateContentConfig(**config_params)
	)

	result = response.text.strip()

	print(f"🛠️ AITools: final_answer_tool: model={model}")
	if model != LLM_FALLBACK:
	print(f"🛠️ AITools: final_answer_tool: thinking_level={THINKING_LEVEL_FINAL_ANSWER}")
	print(f"🛠️ AITools: final_answer_tool: result={result}")

	return result
	except Exception as e:
	if attempt == 0 and AITools._is_rate_limit_error(e):
	print(f"⚠️ AITools: final_answer_tool: Daily rate limit hit with {model}, falling back to {LLM_FALLBACK}")
	model = LLM_FALLBACK
	continue
	print(f"⚠️ AITools: final_answer_tool: exception={str(e)}")
	raise RuntimeError(f"Processing failed: {str(e)}")