Spaces:

NexDatawork
/

NexDatawork-Mini-Agent

Runtime error

NexDatawork-Mini-Agent / src /agents /dataframe_agent.py

svar-chandak

Revamp docs, add requirements, and modularize agents

5a3fcad 2 months ago

5.18 kB

	"""
	DataFrame Analysis Agent

	This module implements the pandas DataFrame analysis agent that processes
	CSV files and answers natural language questions about the data.

	The agent uses LangChain's create_pandas_dataframe_agent to enable
	natural language interaction with pandas DataFrames.

	Example:
	>>> from src.agents import ask_agent
	>>> result = ask_agent(files, "What is the average revenue by region?")
	"""

	import io
	import contextlib
	from typing import List, Optional, Any

	import pandas as pd
	from langchain.agents import AgentType
	from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

	from ..prompts import get_analysis_prompt


	class DataFrameAgent:
	"""
	Agent for analyzing pandas DataFrames using natural language queries.

	This agent wraps LangChain's pandas DataFrame agent and provides
	a simplified interface for data analysis tasks.

	Attributes:
	model: The LLM model to use for inference (e.g., AzureChatOpenAI).
	verbose: Whether to enable verbose logging for debugging.

	Example:
	>>> agent = DataFrameAgent(model=azure_llm)
	>>> df = pd.read_csv("sales.csv")
	>>> result = agent.analyze(df, "What are the top 5 products by revenue?")
	"""

	def __init__(self, model: Any, verbose: bool = True):
	"""
	Initialize the DataFrame agent.

	Args:
	model: The LLM model instance to use for inference.
	Must be a LangChain-compatible chat model.
	verbose: Enable verbose output for debugging (default: True).
	"""
	self.model = model
	self.verbose = verbose

	def analyze(self, df: pd.DataFrame, question: str) -> str:
	"""
	Analyze a DataFrame and answer a natural language question.

	This method creates a LangChain pandas agent, constructs the full
	prompt, and invokes the agent to generate insights.

	Args:
	df: The pandas DataFrame to analyze.
	question: The natural language question about the data.

	Returns:
	str: The agent's analysis and answer in Markdown format.

	Raises:
	Exception: If the agent encounters an error during analysis.

	Example:
	>>> result = agent.analyze(sales_df, "Show monthly revenue trends")
	>>> print(result) # Markdown formatted analysis
	"""
	try:
	# Create the pandas DataFrame agent with ZERO_SHOT_REACT approach
	# This agent type can handle tasks without needing few-shot examples
	pandas_agent = create_pandas_dataframe_agent(
	llm=self.model,
	df=df,
	verbose=self.verbose,
	agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	allow_dangerous_code=True, # Required for code execution
	handle_parsing_errors=True, # Gracefully handle LLM parsing issues
	)

	# Construct the full prompt with prefix and suffix
	full_prompt = get_analysis_prompt(question)

	# Capture stdout to get the agent's reasoning trace
	buffer = io.StringIO()
	with contextlib.redirect_stdout(buffer):
	result = pandas_agent.invoke(full_prompt)

	# Extract the final output from the agent response
	return result.get("output", str(result))

	except Exception as e:
	return f"Analysis error: {e}"


	def ask_agent(
	files: List[Any],
	question: str,
	model: Optional[Any] = None
	) -> str:
	"""
	Analyze uploaded CSV files and answer a question about the data.

	This is a convenience function that handles file loading, DataFrame
	concatenation, and agent invocation in one call.

	Args:
	files: List of file objects with a .name attribute pointing to CSV paths.
	Typically comes from Gradio's file upload component.
	question: The natural language question to answer about the data.
	model: Optional LLM model to use. If None, uses the global model.

	Returns:
	str: The analysis result in Markdown format, or an error message.

	Note:
	Multiple CSV files are concatenated into a single DataFrame before
	analysis. Ensure files have compatible schemas for meaningful results.

	Example:
	>>> # With Gradio file input
	>>> result = ask_agent(uploaded_files, "What is the total revenue?")
	"""
	# Step 1: Load and concatenate all uploaded CSV files
	try:
	dataframes = [pd.read_csv(f.name) for f in files]
	combined_df = pd.concat(dataframes, ignore_index=True)
	except Exception as e:
	return f"Could not read CSV files: {e}"

	# Step 2: Create agent and perform analysis
	if model is None:
	return "Error: No LLM model provided. Please configure the model first."

	agent = DataFrameAgent(model=model)
	return agent.analyze(combined_df, question)