"""Agent for working with pandas objects.""" from io import IOBase from typing import Any, Dict, List, Optional, Sequence, Tuple, Union from langchain._api import warn_deprecated from langchain.agents import AgentExecutor, BaseSingleActionAgent from langchain_experimental.agents.agent_toolkits.pandas.prompt import ( FUNCTIONS_WITH_DF, FUNCTIONS_WITH_MULTI_DF, MULTI_DF_PREFIX, MULTI_DF_PREFIX_FUNCTIONS, PREFIX, PREFIX_FUNCTIONS, SUFFIX_NO_DF, SUFFIX_WITH_DF, SUFFIX_WITH_MULTI_DF, ) from langchain.agents.mrkl.base import ZeroShotAgent from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent from langchain.agents.types import AgentType from langchain.callbacks.base import BaseCallbackManager from langchain.chains.llm import LLMChain from langchain.schema import BasePromptTemplate from langchain.schema.language_model import BaseLanguageModel from langchain.schema.messages import SystemMessage from langchain.tools import BaseTool from langchain_experimental.tools.python.tool import PythonAstREPLTool def _get_multi_prompt( dfs: List[Any], prefix: Optional[str] = None, suffix: Optional[str] = None, input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, ) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: num_dfs = len(dfs) if suffix is not None: suffix_to_use = suffix include_dfs_head = True elif include_df_in_prompt: suffix_to_use = SUFFIX_WITH_MULTI_DF include_dfs_head = True else: suffix_to_use = SUFFIX_NO_DF include_dfs_head = False if input_variables is None: input_variables = ["input", "agent_scratchpad", "num_dfs"] if include_dfs_head: input_variables += ["dfs_head"] if prefix is None: prefix = MULTI_DF_PREFIX df_locals = {} for i, dataframe in enumerate(dfs): df_locals[f"df{i + 1}"] = dataframe tools = [PythonAstREPLTool(locals=df_locals)] prompt = ZeroShotAgent.create_prompt( tools, prefix=prefix, suffix=suffix_to_use, input_variables=input_variables ) partial_prompt = prompt.partial() if "dfs_head" in input_variables: dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs]) partial_prompt = partial_prompt.partial(num_dfs=str(num_dfs), dfs_head=dfs_head) if "num_dfs" in input_variables: partial_prompt = partial_prompt.partial(num_dfs=str(num_dfs)) return partial_prompt, tools def _get_single_prompt( df: Any, prefix: Optional[str] = None, suffix: Optional[str] = None, input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, format_instructions=FORMAT_INSTRUCTIONS, ) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: if suffix is not None: suffix_to_use = suffix include_df_head = True elif include_df_in_prompt: suffix_to_use = SUFFIX_WITH_DF include_df_head = True else: suffix_to_use = SUFFIX_NO_DF include_df_head = False if input_variables is None: input_variables = ["input", "agent_scratchpad"] if include_df_head: input_variables += ["df_head"] if prefix is None: prefix = PREFIX tools = [PythonAstREPLTool(locals={"df": df})] prompt = ZeroShotAgent.create_prompt( tools, prefix=prefix, suffix=suffix_to_use, input_variables=input_variables, format_instructions=format_instructions, ) partial_prompt = prompt.partial() if "df_head" in input_variables: partial_prompt = partial_prompt.partial( df_head=str(df.head(number_of_head_rows).to_markdown()) ) return partial_prompt, tools def _get_prompt_and_tools( df: Any, prefix: Optional[str] = None, suffix: Optional[str] = None, input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, format_instructions=FORMAT_INSTRUCTIONS, ) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: try: import pandas as pd pd.set_option("display.max_columns", None) except ImportError: raise ImportError( "pandas package not found, please install with `pip install pandas`" ) if include_df_in_prompt is not None and suffix is not None: raise ValueError("If suffix is specified, include_df_in_prompt should not be.") if isinstance(df, list): for item in df: if not isinstance(item, pd.DataFrame): raise ValueError(f"Expected pandas object, got {type(df)}") return _get_multi_prompt( df, prefix=prefix, suffix=suffix, input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, ) else: if not isinstance(df, pd.DataFrame): raise ValueError(f"Expected pandas object, got {type(df)}") return _get_single_prompt( df, prefix=prefix, suffix=suffix, input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, format_instructions=format_instructions, ) def _get_functions_single_prompt( df: Any, prefix: Optional[str] = None, suffix: Optional[str] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, ) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: if suffix is not None: suffix_to_use = suffix if include_df_in_prompt: suffix_to_use = suffix_to_use.format( df_head=str(df.head(number_of_head_rows).to_markdown()) ) elif include_df_in_prompt: suffix_to_use = FUNCTIONS_WITH_DF.format( df_head=str(df.head(number_of_head_rows).to_markdown()) ) else: suffix_to_use = "" if prefix is None: prefix = PREFIX_FUNCTIONS tools = [PythonAstREPLTool(locals={"df": df})] system_message = SystemMessage(content=prefix + suffix_to_use) prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message) return prompt, tools def _get_functions_multi_prompt( dfs: Any, prefix: Optional[str] = None, suffix: Optional[str] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, ) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: if suffix is not None: suffix_to_use = suffix if include_df_in_prompt: dfs_head = "\n\n".join( [d.head(number_of_head_rows).to_markdown() for d in dfs] ) suffix_to_use = suffix_to_use.format( dfs_head=dfs_head, ) elif include_df_in_prompt: dfs_head = "\n\n".join([d.head(number_of_head_rows).to_markdown() for d in dfs]) suffix_to_use = FUNCTIONS_WITH_MULTI_DF.format( dfs_head=dfs_head, ) else: suffix_to_use = "" if prefix is None: prefix = MULTI_DF_PREFIX_FUNCTIONS prefix = prefix.format(num_dfs=str(len(dfs))) df_locals = {} for i, dataframe in enumerate(dfs): df_locals[f"df{i + 1}"] = dataframe tools = [PythonAstREPLTool(locals=df_locals)] system_message = SystemMessage(content=prefix + suffix_to_use) prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message) return prompt, tools def _get_functions_prompt_and_tools( df: Any, prefix: Optional[str] = None, suffix: Optional[str] = None, input_variables: Optional[List[str]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, ) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]: try: import pandas as pd pd.set_option("display.max_columns", None) except ImportError: raise ImportError( "pandas package not found, please install with `pip install pandas`" ) if input_variables is not None: raise ValueError("`input_variables` is not supported at the moment.") if include_df_in_prompt is not None and suffix is not None: raise ValueError("If suffix is specified, include_df_in_prompt should not be.") if isinstance(df, list): for item in df: if not isinstance(item, pd.DataFrame): raise ValueError(f"Expected pandas object, got {type(df)}") return _get_functions_multi_prompt( df, prefix=prefix, suffix=suffix, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, ) else: if not isinstance(df, pd.DataFrame): raise ValueError(f"Expected pandas object, got {type(df)}") return _get_functions_single_prompt( df, prefix=prefix, suffix=suffix, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, ) def create_pandas_dataframe_agent( llm: BaseLanguageModel, df: Any, agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION, callback_manager: Optional[BaseCallbackManager] = None, prefix: Optional[str] = None, suffix: Optional[str] = None, input_variables: Optional[List[str]] = None, verbose: bool = False, return_intermediate_steps: bool = False, max_iterations: Optional[int] = 15, max_execution_time: Optional[float] = None, early_stopping_method: str = "force", agent_executor_kwargs: Optional[Dict[str, Any]] = None, include_df_in_prompt: Optional[bool] = True, number_of_head_rows: int = 5, extra_tools: Sequence[BaseTool] = (), format_instructions="", **kwargs: Any, ) -> AgentExecutor: """Construct a pandas agent from an LLM and dataframe.""" warn_deprecated( since="0.0.314", message=( "On 2023-10-27 this module will be be deprecated from langchain, and " "will be available from the langchain-experimental package." "This code is already available in langchain-experimental." "See https://github.com/langchain-ai/langchain/discussions/11680." ), pending=True, ) agent: BaseSingleActionAgent if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION: prompt, base_tools = _get_prompt_and_tools( df, prefix=prefix, suffix=suffix, input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, format_instructions=format_instructions, ) tools = base_tools + list(extra_tools) llm_chain = LLMChain( llm=llm, prompt=prompt, callback_manager=callback_manager, ) tool_names = [tool.name for tool in tools] agent = ZeroShotAgent( llm_chain=llm_chain, allowed_tools=tool_names, callback_manager=callback_manager, **kwargs, ) elif agent_type == AgentType.OPENAI_FUNCTIONS: _prompt, base_tools = _get_functions_prompt_and_tools( df, prefix=prefix, suffix=suffix, input_variables=input_variables, include_df_in_prompt=include_df_in_prompt, number_of_head_rows=number_of_head_rows, ) tools = base_tools + list(extra_tools) agent = OpenAIFunctionsAgent( llm=llm, prompt=_prompt, tools=tools, callback_manager=callback_manager, **kwargs, ) else: raise ValueError(f"Agent type {agent_type} not supported at the moment.") return AgentExecutor.from_agent_and_tools( agent=agent, tools=tools, callback_manager=callback_manager, verbose=verbose, return_intermediate_steps=return_intermediate_steps, max_iterations=max_iterations, max_execution_time=max_execution_time, early_stopping_method=early_stopping_method, **(agent_executor_kwargs or {}), ) def create_csv_agent( llm: BaseLanguageModel, path: Union[str, IOBase, List[Union[str, IOBase]]], pandas_kwargs: Optional[dict] = None, **kwargs: Any, ) -> AgentExecutor: """Create csv agent by loading to a dataframe and using pandas agent.""" try: import pandas as pd except ImportError: raise ImportError( "pandas package not found, please install with `pip install pandas`" ) _kwargs = pandas_kwargs or {} if isinstance(path, (str, IOBase)): df = pd.read_csv(path, **_kwargs) elif isinstance(path, list): df = [] for item in path: if not isinstance(item, (str, IOBase)): raise ValueError(f"Expected str or file-like object, got {type(path)}") df.append(pd.read_csv(item, **_kwargs)) else: raise ValueError(f"Expected str, list, or file-like object, got {type(path)}") return create_pandas_dataframe_agent(llm, df, **kwargs)