NexDatawork-Mini-Agent / src /agents /dataframe_agent.py
svar-chandak
Revamp docs, add requirements, and modularize agents
5a3fcad
"""
DataFrame Analysis Agent
This module implements the pandas DataFrame analysis agent that processes
CSV files and answers natural language questions about the data.
The agent uses LangChain's create_pandas_dataframe_agent to enable
natural language interaction with pandas DataFrames.
Example:
>>> from src.agents import ask_agent
>>> result = ask_agent(files, "What is the average revenue by region?")
"""
import io
import contextlib
from typing import List, Optional, Any
import pandas as pd
from langchain.agents import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from ..prompts import get_analysis_prompt
class DataFrameAgent:
"""
Agent for analyzing pandas DataFrames using natural language queries.
This agent wraps LangChain's pandas DataFrame agent and provides
a simplified interface for data analysis tasks.
Attributes:
model: The LLM model to use for inference (e.g., AzureChatOpenAI).
verbose: Whether to enable verbose logging for debugging.
Example:
>>> agent = DataFrameAgent(model=azure_llm)
>>> df = pd.read_csv("sales.csv")
>>> result = agent.analyze(df, "What are the top 5 products by revenue?")
"""
def __init__(self, model: Any, verbose: bool = True):
"""
Initialize the DataFrame agent.
Args:
model: The LLM model instance to use for inference.
Must be a LangChain-compatible chat model.
verbose: Enable verbose output for debugging (default: True).
"""
self.model = model
self.verbose = verbose
def analyze(self, df: pd.DataFrame, question: str) -> str:
"""
Analyze a DataFrame and answer a natural language question.
This method creates a LangChain pandas agent, constructs the full
prompt, and invokes the agent to generate insights.
Args:
df: The pandas DataFrame to analyze.
question: The natural language question about the data.
Returns:
str: The agent's analysis and answer in Markdown format.
Raises:
Exception: If the agent encounters an error during analysis.
Example:
>>> result = agent.analyze(sales_df, "Show monthly revenue trends")
>>> print(result) # Markdown formatted analysis
"""
try:
# Create the pandas DataFrame agent with ZERO_SHOT_REACT approach
# This agent type can handle tasks without needing few-shot examples
pandas_agent = create_pandas_dataframe_agent(
llm=self.model,
df=df,
verbose=self.verbose,
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
allow_dangerous_code=True, # Required for code execution
handle_parsing_errors=True, # Gracefully handle LLM parsing issues
)
# Construct the full prompt with prefix and suffix
full_prompt = get_analysis_prompt(question)
# Capture stdout to get the agent's reasoning trace
buffer = io.StringIO()
with contextlib.redirect_stdout(buffer):
result = pandas_agent.invoke(full_prompt)
# Extract the final output from the agent response
return result.get("output", str(result))
except Exception as e:
return f"Analysis error: {e}"
def ask_agent(
files: List[Any],
question: str,
model: Optional[Any] = None
) -> str:
"""
Analyze uploaded CSV files and answer a question about the data.
This is a convenience function that handles file loading, DataFrame
concatenation, and agent invocation in one call.
Args:
files: List of file objects with a .name attribute pointing to CSV paths.
Typically comes from Gradio's file upload component.
question: The natural language question to answer about the data.
model: Optional LLM model to use. If None, uses the global model.
Returns:
str: The analysis result in Markdown format, or an error message.
Note:
Multiple CSV files are concatenated into a single DataFrame before
analysis. Ensure files have compatible schemas for meaningful results.
Example:
>>> # With Gradio file input
>>> result = ask_agent(uploaded_files, "What is the total revenue?")
"""
# Step 1: Load and concatenate all uploaded CSV files
try:
dataframes = [pd.read_csv(f.name) for f in files]
combined_df = pd.concat(dataframes, ignore_index=True)
except Exception as e:
return f"Could not read CSV files: {e}"
# Step 2: Create agent and perform analysis
if model is None:
return "Error: No LLM model provided. Please configure the model first."
agent = DataFrameAgent(model=model)
return agent.analyze(combined_df, question)