agents-course-Final_Assignment

Paused

App Files Files Community

agents-course-Final_Assignment / tools.py

lucasnseq

Update tools.py

32bb869 verified 10 days ago

raw

history blame contribute delete

4.61 kB

	# Libs
	import os
	import requests
	import pandas as pd
	import google.genai as genai
	import base64
	from openai import OpenAI
	from smolagents import Tool

	# Local
	from consts import DEFAULT_API_URL

	# Dynamic model ID
	try:
	from app import _SELECTED_MODEL_ID
	if not _SELECTED_MODEL_ID:
	raise ImportError("Model ID not set in app.py")
	except ImportError:
	_SELECTED_MODEL_ID = "gpt-4.1-mini"

	class GetTaskFileTool(Tool):
	name = "get_task_file_tool"
	description = """This tool downloads the file content associated with the given task_id if exists. Returns absolute file path"""
	inputs = {
	"task_id": {"type": "string", "description": "Task id"},
	"file_name": {"type": "string", "description": "File name"},
	}
	output_type = "string"

	def forward(self, task_id: str, file_name: str) -> str:
	response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
	response.raise_for_status()
	with open(file_name, 'wb') as file:
	file.write(response.content)
	return os.path.abspath(file_name)

	class LoadXlsxFileTool(Tool):
	name = "load_xlsx_file_tool"
	description = """This tool loads xlsx file into pandas and returns it"""
	inputs = {
	"file_path": {"type": "string", "description": "File path"}
	}
	output_type = "object"

	def forward(self, file_path: str) -> object:
	return pd.read_excel(file_path)

	class LoadTextFileTool(Tool):
	name = "load_text_file_tool"
	description = """This tool loads any text file"""
	inputs = {
	"file_path": {"type": "string", "description": "File path"}
	}
	output_type = "string"

	def forward(self, file_path: str) -> object:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()

	class AnalyzeImageTool(Tool):
	name = "analyze_image_tool"
	description = """This tool performs a custom analysis of the provided image and returns the corresponding result."""
	inputs = {
	"image_path": {"type": "string", "description": "Image path"},
	"task": {"type": "string", "description": "Task to perform on the image, be detailed and clear"},
	}
	output_type = "string"

	def __init__(self, model_id=None):
	super().__init__()
	self.model_id = model_id or "gpt-4.1-mini"

	def forward(self, image_path: str, task: str) -> str:
	"""
	Analyze the image at `image_path` according to `task` and return the textual result.
	"""
	header = "Image analysis result:\n\n"
	llm_instruction = (
	"You are a highly capable image analysis tool, designed to examine images and deliver detailed descriptions, "
	"insights, and relevant interpretations based on the task at hand.\n\n"
	"Approach the task methodically and provide a thorough and well-reasoned response to the following:\n\n---\nTask:\n"
	f"{task}\n\n"
	)
	try:
	if "gemini" in self.model_id:
	return header + self._analyze_with_gemini(image_path, llm_instruction)
	return header + self._analyze_with_openai(image_path, llm_instruction)
	except Exception as e:
	return f"Error analyzing image: {e}.\nPlease try again."

	def _analyze_with_gemini(self, image_path: str, task: str) -> str:
	api_key = os.getenv("GOOGLEAI_API_KEY")
	if not api_key:
	raise ValueError("Environment variable GOOGLEAI_API_KEY is not set.")
	client = genai.Client(api_key=api_key)

	with open(image_path, "rb") as f:
	image_data = f.read()

	contents = [
	{"inline_data": {"mime_type": "image/jpeg", "data": image_data}},
	{"text": task},
	]
	response = client.models.generate_content(model=self.model_id, contents=contents)
	return response.candidates[0].content.parts[0].text

	def _analyze_with_openai(self, image_path: str, task: str) -> str:
	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	with open(image_path, "rb") as f:
	encoded_image = base64.b64encode(f.read()).decode("utf-8")

	payload = [
	{
	"role": "user",
	"content": [
	{"type": "input_text", "text": task},
	{"type": "input_image", "image_url": f"data:image/jpeg;base64,{encoded_image}"},
	],
	}
	]
	response = client.responses.create(model=self.model_id, input=payload)
	return response.output[0].content[0].text