AI_Agents_Course_Submission

Running

AI_Agents_Course_Submission / tools /multimodal_tools.py

omarequalmars

added excel/csv analysis

a1c1d9a 3 months ago

15.2 kB

	# tools/multimodal_tools.py
	import requests
	import json
	import pandas as pd
	import os # ✅ Added for file operations
	import io # ✅ Added for code execution
	import contextlib # ✅ Added for code execution
	import ast # ✅ Added for code validation
	import traceback # ✅ Added for error handling
	from typing import Optional, Dict, Any
	from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger

	class MultimodalTools:
	"""Free multimodal AI tools using OpenRouter and other free services"""

	def __init__(self, openrouter_key: Optional[str] = None):
	self.openrouter_key = openrouter_key or get_env_var("OPENROUTER_API_KEY", None)
	self.openrouter_url = "https://openrouter.ai/api/v1/chat/completions"
	self.headers = {
	"Authorization": f"Bearer {self.openrouter_key}",
	"Content-Type": "application/json",
	"HTTP-Referer": "https://your-app.com", # Optional: for analytics
	"X-Title": "Multimodal Tools" # Optional: for analytics
	}

	# Available free multimodal models
	self.vision_model = "google/gemini-2.5-flash-preview-05-20"
	self.text_model = "google/gemini-2.5-flash-preview-05-20"

	def _make_openrouter_request(self, payload: Dict[str, Any]) -> str:
	"""Make request to OpenRouter API with error handling"""
	try:
	response = requests.post(
	self.openrouter_url,
	headers=self.headers,
	json=payload,
	timeout=60
	)
	response.raise_for_status()

	result = response.json()
	if 'choices' in result and len(result['choices']) > 0:
	return result['choices'][0]['message']['content']
	else:
	logger.error(f"Unexpected response format: {result}")
	return "Error: Invalid response format"

	except requests.exceptions.RequestException as e:
	logger.error(f"OpenRouter API request failed: {str(e)}")
	return f"Error making API request: {str(e)}"
	except Exception as e:
	logger.error(f"Unexpected error: {str(e)}")
	return f"Unexpected error: {str(e)}"

	def analyze_image(self, image_path: str, question: str = "Describe this image in detail") -> str:
	"""
	Analyze image content using multimodal AI

	Args:
	image_path: Path to image file
	question: Question about the image

	Returns:
	AI analysis of the image
	"""
	if not validate_file_exists(image_path):
	return f"Error: Image file not found at {image_path}"

	try:
	encoded_image = encode_image_to_base64(image_path)

	payload = {
	"model": self.vision_model,
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": question},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
	}
	]
	}
	],
	"temperature": 0,
	"max_tokens": 2048
	}

	return self._make_openrouter_request(payload)

	except Exception as e:
	error_msg = f"Error analyzing image: {str(e)}"
	logger.error(error_msg)
	return error_msg

	def extract_text_from_image(self, image_path: str) -> str:
	"""
	Extract text from image using OCR via multimodal AI

	Args:
	image_path: Path to image file

	Returns:
	Extracted text from image
	"""
	ocr_prompt = """Extract all visible text from this image.
	Return only the text content without any additional commentary or formatting.
	If no text is visible, return 'No text found'."""

	return self.analyze_image(image_path, ocr_prompt)

	def analyze_audio_transcript(self, transcript: str, question: str = "Summarize this audio content") -> str:
	"""
	Analyze audio content via transcript

	Args:
	transcript: Audio transcript text
	question: Question about the audio content

	Returns:
	AI analysis of the audio content
	"""
	if not transcript.strip():
	return "Error: Empty transcript provided"

	try:
	payload = {
	"model": self.text_model,
	"messages": [
	{
	"role": "user",
	"content": f"Audio transcript: {transcript}\n\nQuestion: {question}"
	}
	],
	"temperature": 0,
	"max_tokens": 2048
	}

	return self._make_openrouter_request(payload)

	except Exception as e:
	error_msg = f"Error analyzing audio transcript: {str(e)}"
	logger.error(error_msg)
	return error_msg

	def analyze_excel_file(self, file_path: str, question: str) -> str:
	"""
	Analyze Excel or CSV file content using AI

	Args:
	file_path: Path to Excel (.xlsx) or CSV file
	question: Question about the data

	Returns:
	AI analysis of the spreadsheet data
	"""
	if not validate_file_exists(file_path):
	return f"Error: File not found at {file_path}"

	try:
	# Try reading as Excel first, then CSV
	try:
	df = pd.read_excel(file_path)
	except Exception:
	try:
	df = pd.read_csv(file_path)
	except Exception as e:
	return f"Error reading file: Unable to read as Excel or CSV - {str(e)}"

	# Convert dataframe to text representation for AI analysis
	data_summary = f"""
	Data file analysis:
	- Shape: {df.shape[0]} rows, {df.shape[1]} columns
	- Columns: {list(df.columns)}

	First few rows:
	{df.head().to_string()}

	Data types:
	{df.dtypes.to_string()}

	Summary statistics:
	{df.describe().to_string()}
	"""

	payload = {
	"model": self.text_model,
	"messages": [
	{
	"role": "user",
	"content": f"Analyze this spreadsheet data and answer the question.\n\n{data_summary}\n\nQuestion: {question}"
	}
	],
	"temperature": 0,
	"max_tokens": 2048
	}

	return self._make_openrouter_request(payload)

	except Exception as e:
	error_msg = f"Error analyzing Excel file: {str(e)}"
	logger.error(error_msg)
	return error_msg

	# ✅ NEW METHOD - Added Python code processing
	def _validate_python_code(self, code: str) -> bool:
	"""Validate Python code syntax"""
	try:
	ast.parse(code)
	return True
	except SyntaxError:
	return False

	def _execute_python_code(self, code: str) -> str:
	"""
	Safely execute Python code and capture output
	Based on search results from LlamaIndex SimpleCodeExecutor pattern
	"""
	# Capture stdout and stderr
	stdout = io.StringIO()
	stderr = io.StringIO()
	output = ""
	return_value = None

	# Create a safe execution namespace
	safe_globals = {
	'__builtins__': {
	'print': print,
	'len': len,
	'str': str,
	'int': int,
	'float': float,
	'list': list,
	'dict': dict,
	'sum': sum,
	'max': max,
	'min': min,
	'abs': abs,
	'round': round,
	'range': range,
	'enumerate': enumerate,
	'zip': zip,
	}
	}
	safe_locals = {}

	try:
	# Execute with captured output
	with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
	# Try to detect if there's a return value (last expression)
	try:
	tree = ast.parse(code)
	last_node = tree.body[-1] if tree.body else None

	# If the last statement is an expression, capture its value
	if isinstance(last_node, ast.Expr):
	# Split code to add a return value assignment
	lines = code.rstrip().split('\n')
	last_line = lines[-1]
	exec_code = '\n'.join(lines[:-1]) + f'\n__result__ = {last_line}'

	# Execute modified code
	exec(exec_code, safe_globals, safe_locals)
	return_value = safe_locals.get('__result__')
	else:
	# Normal execution
	exec(code, safe_globals, safe_locals)
	except:
	# If parsing fails, just execute the code as is
	exec(code, safe_globals, safe_locals)

	# Get output
	output = stdout.getvalue()
	if stderr.getvalue():
	output += "\n" + stderr.getvalue()

	# Add return value if it exists
	if return_value is not None:
	output += f"\n\nFinal result: {return_value}"

	return output.strip() if output.strip() else str(return_value) if return_value is not None else "Code executed successfully (no output)"

	except Exception as e:
	# Capture exception information
	error_output = f"Error: {type(e).__name__}: {str(e)}"
	logger.error(f"Code execution error: {error_output}")
	return error_output

	def analyze_python_file(self, file_path: str, question: str = "What is the final output of this code?") -> str:
	"""
	Read and analyze Python code file

	Args:
	file_path: Path to Python (.py) file
	question: Question about the code

	Returns:
	Analysis or execution result of the Python code
	"""
	if not validate_file_exists(file_path):
	return f"Error: Python file not found at {file_path}"

	try:
	# Read the Python file
	with open(file_path, 'r', encoding='utf-8') as f:
	code_content = f.read()

	if not code_content.strip():
	return "Error: Python file is empty"

	# Validate syntax
	if not self._validate_python_code(code_content):
	return "Error: Python file contains syntax errors"

	# If question asks for output/result, execute the code
	if any(keyword in question.lower() for keyword in ['output', 'result', 'execute', 'run', 'final']):
	logger.info(f"Executing Python code from {file_path}")
	execution_result = self._execute_python_code(code_content)

	# Also provide AI analysis if needed
	if len(execution_result) < 50: # Short result, add AI analysis
	payload = {
	"model": self.text_model,
	"messages": [
	{
	"role": "user",
	"content": f"Python code:\n``````\n\nExecution result: {execution_result}\n\nQuestion: {question}"
	}
	],
	"temperature": 0,
	"max_tokens": 1024
	}

	ai_analysis = self._make_openrouter_request(payload)
	return f"Execution result: {execution_result}\n\nAnalysis: {ai_analysis}"
	else:
	return execution_result
	else:
	# Just analyze the code without execution
	payload = {
	"model": self.text_model,
	"messages": [
	{
	"role": "user",
	"content": f"Analyze this Python code and answer the question.\n\nPython code:\n``````\n\nQuestion: {question}"
	}
	],
	"temperature": 0,
	"max_tokens": 2048
	}

	return self._make_openrouter_request(payload)

	except Exception as e:
	error_msg = f"Error analyzing Python file: {str(e)}"
	logger.error(error_msg)
	return error_msg

	def describe_image(self, image_path: str) -> str:
	"""Get a detailed description of an image"""
	return self.analyze_image(
	image_path,
	"Provide a detailed, objective description of this image including objects, people, colors, setting, and any notable details."
	)

	def answer_visual_question(self, image_path: str, question: str) -> str:
	"""Answer a specific question about an image"""
	return self.analyze_image(image_path, question)

	# Convenience functions for direct use
	def analyze_image(image_path: str, question: str = "Describe this image in detail") -> str:
	"""Standalone function to analyze an image"""
	tools = MultimodalTools()
	return tools.analyze_image(image_path, question)

	def extract_text(image_path: str) -> str:
	"""Standalone function to extract text from an image"""
	tools = MultimodalTools()
	return tools.extract_text_from_image(image_path)

	def analyze_transcript(transcript: str, question: str = "Summarize this content") -> str:
	"""Standalone function to analyze audio transcript"""
	tools = MultimodalTools()
	return tools.analyze_audio_transcript(transcript, question)

	def analyze_excel(file_path: str, question: str) -> str:
	"""Standalone function to analyze Excel/CSV files"""
	tools = MultimodalTools()
	return tools.analyze_excel_file(file_path, question)

	# ✅ NEW FUNCTION - Added Python code convenience function
	def analyze_python(file_path: str, question: str = "What is the final output of this code?") -> str:
	"""Standalone function to analyze Python files"""
	tools = MultimodalTools()
	return tools.analyze_python_file(file_path, question)