ai-final-assessment / tools /excel_analysis_tool.py
antonchirikalov's picture
Final fix
c752db9
"""
Excel analysis tool for the AI agent project.
"""
import os
import pandas as pd
from typing import Optional
from .base_tool import EnhancedTool
class ExcelAnalysisTool(EnhancedTool):
"""Tool for analyzing Excel spreadsheets."""
name = "ExcelAnalysisTool"
description = "Analyze a downloaded Excel or CSV file associated with a task ID."
inputs = {
"task_id": {
"type": "string",
"description": "Task ID for which the Excel/CSV file has been downloaded"
},
"query": {
"type": "string",
"description": "Query describing what to analyze in the file",
"nullable": True
}
}
output_type = "string"
def forward(self, task_id: str, query: Optional[str] = None) -> str:
"""
Analyze an Excel/CSV file.
Args:
task_id: Task ID for which the file has been downloaded
query: Query describing what to analyze
Returns:
Analysis results
"""
try:
# Construct filename based on task_id
filename = f"{task_id}_downloaded_file"
# Check if file exists
if not os.path.exists(filename):
return f"Error: File for task {task_id} does not exist. Please download it first."
# Try to determine file type and read accordingly
try:
# First try Excel format
df = pd.read_excel(filename, engine="openpyxl")
except Exception as excel_error:
# If Excel reading fails, try CSV
try:
df = pd.read_csv(filename)
except Exception as csv_error:
# If CSV reading fails, try TSV
try:
df = pd.read_csv(filename, sep='\t')
except Exception as tsv_error:
return f"Error: Unable to read file as Excel, CSV, or TSV. Original error: {str(excel_error)}"
# Basic analysis if no specific query
if not query:
# Create a basic overview of the data
info = []
info.append(f"Number of rows: {df.shape[0]}")
info.append(f"Number of columns: {df.shape[1]}")
info.append(f"Column names: {', '.join(df.columns.tolist())}")
# Data types
info.append("\nData types:")
for column, dtype in df.dtypes.items():
info.append(f"- {column}: {dtype}")
# Basic statistics for numeric columns
if df.select_dtypes(include=['number']).shape[1] > 0:
info.append("\nBasic statistics for numeric columns:")
desc = df.describe().to_string()
info.append(desc)
# Sample data
info.append("\nFirst 5 rows:")
info.append(df.head().to_string())
return "\n".join(info)
# Handle specific query
query_lower = query.lower()
if "sum" in query_lower or "total" in query_lower:
# Extract column name if specified
for col in df.columns:
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns:
return f"Sum of values in column {col}: {df[col].sum()}"
# Otherwise sum all numeric columns
return df.select_dtypes(include=['number']).sum().to_string()
elif "average" in query_lower or "mean" in query_lower:
# Extract column name if specified
for col in df.columns:
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns:
return f"Average value in column {col}: {df[col].mean()}"
# Otherwise calculate means for all numeric columns
return df.select_dtypes(include=['number']).mean().to_string()
elif "maximum" in query_lower or "max" in query_lower:
# Extract column name if specified
for col in df.columns:
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns:
return f"Maximum value in column {col}: {df[col].max()}"
# Otherwise find maxima for all numeric columns
return df.select_dtypes(include=['number']).max().to_string()
elif "minimum" in query_lower or "min" in query_lower:
# Extract column name if specified
for col in df.columns:
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns:
return f"Minimum value in column {col}: {df[col].min()}"
# Otherwise find minima for all numeric columns
return df.select_dtypes(include=['number']).min().to_string()
elif "count" in query_lower:
# Count non-empty values
return df.count().to_string()
else:
# Default: return basic info and first few rows
return f"Analysis for query '{query}':\n\n{df.head().to_string()}"
except Exception as e:
return f"Error analyzing Excel file: {str(e)}"