Spaces:
Runtime error
Runtime error
| """ | |
| DataFrame Analysis Agent | |
| This module implements the pandas DataFrame analysis agent that processes | |
| CSV files and answers natural language questions about the data. | |
| The agent uses LangChain's create_pandas_dataframe_agent to enable | |
| natural language interaction with pandas DataFrames. | |
| Example: | |
| >>> from src.agents import ask_agent | |
| >>> result = ask_agent(files, "What is the average revenue by region?") | |
| """ | |
| import io | |
| import contextlib | |
| from typing import List, Optional, Any | |
| import pandas as pd | |
| from langchain.agents import AgentType | |
| from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent | |
| from ..prompts import get_analysis_prompt | |
| class DataFrameAgent: | |
| """ | |
| Agent for analyzing pandas DataFrames using natural language queries. | |
| This agent wraps LangChain's pandas DataFrame agent and provides | |
| a simplified interface for data analysis tasks. | |
| Attributes: | |
| model: The LLM model to use for inference (e.g., AzureChatOpenAI). | |
| verbose: Whether to enable verbose logging for debugging. | |
| Example: | |
| >>> agent = DataFrameAgent(model=azure_llm) | |
| >>> df = pd.read_csv("sales.csv") | |
| >>> result = agent.analyze(df, "What are the top 5 products by revenue?") | |
| """ | |
| def __init__(self, model: Any, verbose: bool = True): | |
| """ | |
| Initialize the DataFrame agent. | |
| Args: | |
| model: The LLM model instance to use for inference. | |
| Must be a LangChain-compatible chat model. | |
| verbose: Enable verbose output for debugging (default: True). | |
| """ | |
| self.model = model | |
| self.verbose = verbose | |
| def analyze(self, df: pd.DataFrame, question: str) -> str: | |
| """ | |
| Analyze a DataFrame and answer a natural language question. | |
| This method creates a LangChain pandas agent, constructs the full | |
| prompt, and invokes the agent to generate insights. | |
| Args: | |
| df: The pandas DataFrame to analyze. | |
| question: The natural language question about the data. | |
| Returns: | |
| str: The agent's analysis and answer in Markdown format. | |
| Raises: | |
| Exception: If the agent encounters an error during analysis. | |
| Example: | |
| >>> result = agent.analyze(sales_df, "Show monthly revenue trends") | |
| >>> print(result) # Markdown formatted analysis | |
| """ | |
| try: | |
| # Create the pandas DataFrame agent with ZERO_SHOT_REACT approach | |
| # This agent type can handle tasks without needing few-shot examples | |
| pandas_agent = create_pandas_dataframe_agent( | |
| llm=self.model, | |
| df=df, | |
| verbose=self.verbose, | |
| agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, | |
| allow_dangerous_code=True, # Required for code execution | |
| handle_parsing_errors=True, # Gracefully handle LLM parsing issues | |
| ) | |
| # Construct the full prompt with prefix and suffix | |
| full_prompt = get_analysis_prompt(question) | |
| # Capture stdout to get the agent's reasoning trace | |
| buffer = io.StringIO() | |
| with contextlib.redirect_stdout(buffer): | |
| result = pandas_agent.invoke(full_prompt) | |
| # Extract the final output from the agent response | |
| return result.get("output", str(result)) | |
| except Exception as e: | |
| return f"Analysis error: {e}" | |
| def ask_agent( | |
| files: List[Any], | |
| question: str, | |
| model: Optional[Any] = None | |
| ) -> str: | |
| """ | |
| Analyze uploaded CSV files and answer a question about the data. | |
| This is a convenience function that handles file loading, DataFrame | |
| concatenation, and agent invocation in one call. | |
| Args: | |
| files: List of file objects with a .name attribute pointing to CSV paths. | |
| Typically comes from Gradio's file upload component. | |
| question: The natural language question to answer about the data. | |
| model: Optional LLM model to use. If None, uses the global model. | |
| Returns: | |
| str: The analysis result in Markdown format, or an error message. | |
| Note: | |
| Multiple CSV files are concatenated into a single DataFrame before | |
| analysis. Ensure files have compatible schemas for meaningful results. | |
| Example: | |
| >>> # With Gradio file input | |
| >>> result = ask_agent(uploaded_files, "What is the total revenue?") | |
| """ | |
| # Step 1: Load and concatenate all uploaded CSV files | |
| try: | |
| dataframes = [pd.read_csv(f.name) for f in files] | |
| combined_df = pd.concat(dataframes, ignore_index=True) | |
| except Exception as e: | |
| return f"Could not read CSV files: {e}" | |
| # Step 2: Create agent and perform analysis | |
| if model is None: | |
| return "Error: No LLM model provided. Please configure the model first." | |
| agent = DataFrameAgent(model=model) | |
| return agent.analyze(combined_df, question) | |