Spaces:
Runtime error
Runtime error
""" | |
Excel analysis tool for the AI agent project. | |
""" | |
import os | |
import pandas as pd | |
from typing import Optional | |
from .base_tool import EnhancedTool | |
class ExcelAnalysisTool(EnhancedTool): | |
"""Tool for analyzing Excel spreadsheets.""" | |
name = "ExcelAnalysisTool" | |
description = "Analyze a downloaded Excel or CSV file associated with a task ID." | |
inputs = { | |
"task_id": { | |
"type": "string", | |
"description": "Task ID for which the Excel/CSV file has been downloaded" | |
}, | |
"query": { | |
"type": "string", | |
"description": "Query describing what to analyze in the file", | |
"nullable": True | |
} | |
} | |
output_type = "string" | |
def forward(self, task_id: str, query: Optional[str] = None) -> str: | |
""" | |
Analyze an Excel/CSV file. | |
Args: | |
task_id: Task ID for which the file has been downloaded | |
query: Query describing what to analyze | |
Returns: | |
Analysis results | |
""" | |
try: | |
# Construct filename based on task_id | |
filename = f"{task_id}_downloaded_file" | |
# Check if file exists | |
if not os.path.exists(filename): | |
return f"Error: File for task {task_id} does not exist. Please download it first." | |
# Try to determine file type and read accordingly | |
try: | |
# First try Excel format | |
df = pd.read_excel(filename, engine="openpyxl") | |
except Exception as excel_error: | |
# If Excel reading fails, try CSV | |
try: | |
df = pd.read_csv(filename) | |
except Exception as csv_error: | |
# If CSV reading fails, try TSV | |
try: | |
df = pd.read_csv(filename, sep='\t') | |
except Exception as tsv_error: | |
return f"Error: Unable to read file as Excel, CSV, or TSV. Original error: {str(excel_error)}" | |
# Basic analysis if no specific query | |
if not query: | |
# Create a basic overview of the data | |
info = [] | |
info.append(f"Number of rows: {df.shape[0]}") | |
info.append(f"Number of columns: {df.shape[1]}") | |
info.append(f"Column names: {', '.join(df.columns.tolist())}") | |
# Data types | |
info.append("\nData types:") | |
for column, dtype in df.dtypes.items(): | |
info.append(f"- {column}: {dtype}") | |
# Basic statistics for numeric columns | |
if df.select_dtypes(include=['number']).shape[1] > 0: | |
info.append("\nBasic statistics for numeric columns:") | |
desc = df.describe().to_string() | |
info.append(desc) | |
# Sample data | |
info.append("\nFirst 5 rows:") | |
info.append(df.head().to_string()) | |
return "\n".join(info) | |
# Handle specific query | |
query_lower = query.lower() | |
if "sum" in query_lower or "total" in query_lower: | |
# Extract column name if specified | |
for col in df.columns: | |
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns: | |
return f"Sum of values in column {col}: {df[col].sum()}" | |
# Otherwise sum all numeric columns | |
return df.select_dtypes(include=['number']).sum().to_string() | |
elif "average" in query_lower or "mean" in query_lower: | |
# Extract column name if specified | |
for col in df.columns: | |
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns: | |
return f"Average value in column {col}: {df[col].mean()}" | |
# Otherwise calculate means for all numeric columns | |
return df.select_dtypes(include=['number']).mean().to_string() | |
elif "maximum" in query_lower or "max" in query_lower: | |
# Extract column name if specified | |
for col in df.columns: | |
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns: | |
return f"Maximum value in column {col}: {df[col].max()}" | |
# Otherwise find maxima for all numeric columns | |
return df.select_dtypes(include=['number']).max().to_string() | |
elif "minimum" in query_lower or "min" in query_lower: | |
# Extract column name if specified | |
for col in df.columns: | |
if col.lower() in query_lower and col in df.select_dtypes(include=['number']).columns: | |
return f"Minimum value in column {col}: {df[col].min()}" | |
# Otherwise find minima for all numeric columns | |
return df.select_dtypes(include=['number']).min().to_string() | |
elif "count" in query_lower: | |
# Count non-empty values | |
return df.count().to_string() | |
else: | |
# Default: return basic info and first few rows | |
return f"Analysis for query '{query}':\n\n{df.head().to_string()}" | |
except Exception as e: | |
return f"Error analyzing Excel file: {str(e)}" |