AI_Agents_Course_Submission

Running

File size: 15,198 Bytes

# tools/multimodal_tools.py
import requests
import json
import pandas as pd
import os  # ✅ Added for file operations
import io  # ✅ Added for code execution
import contextlib  # ✅ Added for code execution
import ast  # ✅ Added for code validation
import traceback  # ✅ Added for error handling
from typing import Optional, Dict, Any
from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger

class MultimodalTools:
    """Free multimodal AI tools using OpenRouter and other free services"""
    
    def __init__(self, openrouter_key: Optional[str] = None):
        self.openrouter_key = openrouter_key or get_env_var("OPENROUTER_API_KEY", None)
        self.openrouter_url = "https://openrouter.ai/api/v1/chat/completions"
        self.headers = {
            "Authorization": f"Bearer {self.openrouter_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://your-app.com",  # Optional: for analytics
            "X-Title": "Multimodal Tools"  # Optional: for analytics
        }
        
        # Available free multimodal models
        self.vision_model = "google/gemini-2.5-flash-preview-05-20"
        self.text_model = "google/gemini-2.5-flash-preview-05-20"
    
    def _make_openrouter_request(self, payload: Dict[str, Any]) -> str:
        """Make request to OpenRouter API with error handling"""
        try:
            response = requests.post(
                self.openrouter_url, 
                headers=self.headers, 
                json=payload,
                timeout=60
            )
            response.raise_for_status()
            
            result = response.json()
            if 'choices' in result and len(result['choices']) > 0:
                return result['choices'][0]['message']['content']
            else:
                logger.error(f"Unexpected response format: {result}")
                return "Error: Invalid response format"
                
        except requests.exceptions.RequestException as e:
            logger.error(f"OpenRouter API request failed: {str(e)}")
            return f"Error making API request: {str(e)}"
        except Exception as e:
            logger.error(f"Unexpected error: {str(e)}")
            return f"Unexpected error: {str(e)}"
    
    def analyze_image(self, image_path: str, question: str = "Describe this image in detail") -> str:
        """
        Analyze image content using multimodal AI
        
        Args:
            image_path: Path to image file
            question: Question about the image
            
        Returns:
            AI analysis of the image
        """
        if not validate_file_exists(image_path):
            return f"Error: Image file not found at {image_path}"
        
        try:
            encoded_image = encode_image_to_base64(image_path)
            
            payload = {
                "model": self.vision_model,
                "messages": [
                    {
                        "role": "user", 
                        "content": [
                            {"type": "text", "text": question},
                            {
                                "type": "image_url", 
                                "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
                            }
                        ]
                    }
                ],
                "temperature": 0,
                "max_tokens": 2048
            }
            
            return self._make_openrouter_request(payload)
            
        except Exception as e:
            error_msg = f"Error analyzing image: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def extract_text_from_image(self, image_path: str) -> str:
        """
        Extract text from image using OCR via multimodal AI
        
        Args:
            image_path: Path to image file
            
        Returns:
            Extracted text from image
        """
        ocr_prompt = """Extract all visible text from this image. 
        Return only the text content without any additional commentary or formatting. 
        If no text is visible, return 'No text found'."""
        
        return self.analyze_image(image_path, ocr_prompt)
    
    def analyze_audio_transcript(self, transcript: str, question: str = "Summarize this audio content") -> str:
        """
        Analyze audio content via transcript
        
        Args:
            transcript: Audio transcript text
            question: Question about the audio content
            
        Returns:
            AI analysis of the audio content
        """
        if not transcript.strip():
            return "Error: Empty transcript provided"
        
        try:
            payload = {
                "model": self.text_model,
                "messages": [
                    {
                        "role": "user", 
                        "content": f"Audio transcript: {transcript}\n\nQuestion: {question}"
                    }
                ],
                "temperature": 0,
                "max_tokens": 2048
            }
            
            return self._make_openrouter_request(payload)
            
        except Exception as e:
            error_msg = f"Error analyzing audio transcript: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def analyze_excel_file(self, file_path: str, question: str) -> str:
        """
        Analyze Excel or CSV file content using AI
        
        Args:
            file_path: Path to Excel (.xlsx) or CSV file
            question: Question about the data
            
        Returns:
            AI analysis of the spreadsheet data
        """
        if not validate_file_exists(file_path):
            return f"Error: File not found at {file_path}"
        
        try:
            # Try reading as Excel first, then CSV
            try:
                df = pd.read_excel(file_path)
            except Exception:
                try:
                    df = pd.read_csv(file_path)
                except Exception as e:
                    return f"Error reading file: Unable to read as Excel or CSV - {str(e)}"
            
            # Convert dataframe to text representation for AI analysis
            data_summary = f"""
Data file analysis:
- Shape: {df.shape[0]} rows, {df.shape[1]} columns
- Columns: {list(df.columns)}

First few rows:
{df.head().to_string()}

Data types:
{df.dtypes.to_string()}

Summary statistics:
{df.describe().to_string()}
"""
            
            payload = {
                "model": self.text_model,
                "messages": [
                    {
                        "role": "user", 
                        "content": f"Analyze this spreadsheet data and answer the question.\n\n{data_summary}\n\nQuestion: {question}"
                    }
                ],
                "temperature": 0,
                "max_tokens": 2048
            }
            
            return self._make_openrouter_request(payload)
            
        except Exception as e:
            error_msg = f"Error analyzing Excel file: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    # ✅ NEW METHOD - Added Python code processing
    def _validate_python_code(self, code: str) -> bool:
        """Validate Python code syntax"""
        try:
            ast.parse(code)
            return True
        except SyntaxError:
            return False
    
    def _execute_python_code(self, code: str) -> str:
        """
        Safely execute Python code and capture output
        Based on search results from LlamaIndex SimpleCodeExecutor pattern
        """
        # Capture stdout and stderr
        stdout = io.StringIO()
        stderr = io.StringIO()
        output = ""
        return_value = None
        
        # Create a safe execution namespace
        safe_globals = {
            '__builtins__': {
                'print': print,
                'len': len,
                'str': str,
                'int': int,
                'float': float,
                'list': list,
                'dict': dict,
                'sum': sum,
                'max': max,
                'min': min,
                'abs': abs,
                'round': round,
                'range': range,
                'enumerate': enumerate,
                'zip': zip,
            }
        }
        safe_locals = {}
        
        try:
            # Execute with captured output
            with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
                # Try to detect if there's a return value (last expression)
                try:
                    tree = ast.parse(code)
                    last_node = tree.body[-1] if tree.body else None
                    
                    # If the last statement is an expression, capture its value
                    if isinstance(last_node, ast.Expr):
                        # Split code to add a return value assignment
                        lines = code.rstrip().split('\n')
                        last_line = lines[-1]
                        exec_code = '\n'.join(lines[:-1]) + f'\n__result__ = {last_line}'
                        
                        # Execute modified code
                        exec(exec_code, safe_globals, safe_locals)
                        return_value = safe_locals.get('__result__')
                    else:
                        # Normal execution
                        exec(code, safe_globals, safe_locals)
                except:
                    # If parsing fails, just execute the code as is
                    exec(code, safe_globals, safe_locals)
            
            # Get output
            output = stdout.getvalue()
            if stderr.getvalue():
                output += "\n" + stderr.getvalue()
            
            # Add return value if it exists
            if return_value is not None:
                output += f"\n\nFinal result: {return_value}"
            
            return output.strip() if output.strip() else str(return_value) if return_value is not None else "Code executed successfully (no output)"
            
        except Exception as e:
            # Capture exception information
            error_output = f"Error: {type(e).__name__}: {str(e)}"
            logger.error(f"Code execution error: {error_output}")
            return error_output
    
    def analyze_python_file(self, file_path: str, question: str = "What is the final output of this code?") -> str:
        """
        Read and analyze Python code file
        
        Args:
            file_path: Path to Python (.py) file
            question: Question about the code
            
        Returns:
            Analysis or execution result of the Python code
        """
        if not validate_file_exists(file_path):
            return f"Error: Python file not found at {file_path}"
        
        try:
            # Read the Python file
            with open(file_path, 'r', encoding='utf-8') as f:
                code_content = f.read()
            
            if not code_content.strip():
                return "Error: Python file is empty"
            
            # Validate syntax
            if not self._validate_python_code(code_content):
                return "Error: Python file contains syntax errors"
            
            # If question asks for output/result, execute the code
            if any(keyword in question.lower() for keyword in ['output', 'result', 'execute', 'run', 'final']):
                logger.info(f"Executing Python code from {file_path}")
                execution_result = self._execute_python_code(code_content)
                
                # Also provide AI analysis if needed
                if len(execution_result) < 50:  # Short result, add AI analysis
                    payload = {
                        "model": self.text_model,
                        "messages": [
                            {
                                "role": "user", 
                                "content": f"Python code:\n``````\n\nExecution result: {execution_result}\n\nQuestion: {question}"
                            }
                        ],
                        "temperature": 0,
                        "max_tokens": 1024
                    }
                    
                    ai_analysis = self._make_openrouter_request(payload)
                    return f"Execution result: {execution_result}\n\nAnalysis: {ai_analysis}"
                else:
                    return execution_result
            else:
                # Just analyze the code without execution
                payload = {
                    "model": self.text_model,
                    "messages": [
                        {
                            "role": "user", 
                            "content": f"Analyze this Python code and answer the question.\n\nPython code:\n``````\n\nQuestion: {question}"
                        }
                    ],
                    "temperature": 0,
                    "max_tokens": 2048
                }
                
                return self._make_openrouter_request(payload)
            
        except Exception as e:
            error_msg = f"Error analyzing Python file: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def describe_image(self, image_path: str) -> str:
        """Get a detailed description of an image"""
        return self.analyze_image(
            image_path, 
            "Provide a detailed, objective description of this image including objects, people, colors, setting, and any notable details."
        )
    
    def answer_visual_question(self, image_path: str, question: str) -> str:
        """Answer a specific question about an image"""
        return self.analyze_image(image_path, question)

# Convenience functions for direct use
def analyze_image(image_path: str, question: str = "Describe this image in detail") -> str:
    """Standalone function to analyze an image"""
    tools = MultimodalTools()
    return tools.analyze_image(image_path, question)

def extract_text(image_path: str) -> str:
    """Standalone function to extract text from an image"""
    tools = MultimodalTools()
    return tools.extract_text_from_image(image_path)

def analyze_transcript(transcript: str, question: str = "Summarize this content") -> str:
    """Standalone function to analyze audio transcript"""
    tools = MultimodalTools()
    return tools.analyze_audio_transcript(transcript, question)

def analyze_excel(file_path: str, question: str) -> str:
    """Standalone function to analyze Excel/CSV files"""
    tools = MultimodalTools()
    return tools.analyze_excel_file(file_path, question)

# ✅ NEW FUNCTION - Added Python code convenience function
def analyze_python(file_path: str, question: str = "What is the final output of this code?") -> str:
    """Standalone function to analyze Python files"""
    tools = MultimodalTools()
    return tools.analyze_python_file(file_path, question)