import gradio as gr import pandas as pd from langchain.agents import create_pandas_dataframe_agent from langchain.chat_models import ChatOpenAI import re import json def process_inputs(open_api_key, open_api_model, description, csv_file): # setup agent try: df = pd.read_csv(csv_file.name) df_agent = create_pandas_dataframe_agent( ChatOpenAI( temperature=0, openai_api_key=open_api_key, model=open_api_model ), df, verbose=True, ) except Exception as e: return print(e) df_agent.agent.llm_chain.prompt.template = ( "Context: " + description + df_agent.agent.llm_chain.prompt.template ) # Planning history = {} description = df_agent.run("Describe the data") history["description"] = description planning = df_agent.run( f"Description: {description} Based on this description and the data itself, what kind of analysis can we perform on the data? Return result in bullet points without the bullets, just new lines" ) print(planning) # remove non-Alphanumeric characters at the beginning of the string tasks = list( filter( bool, [re.sub(r"^[\W_]+", "", str.strip()) for str in planning.splitlines()] ) ) history["planning"] = tasks # Perform tasks f = open("result.txt", "a") result = [] for task in tasks: print(f"The task is: {task}") data = df_agent.run(task) result.append(data) f.write(data + "\n") f.close() history["result"] = result plots = df_agent.run( f"Based on the information given below, create some insightful plots using matplotlib, seaborn or plotly with python_repl_ast. Save these plots in the root directory with a relevant name, and return a dict where keys are the name of the files and the values are the insights dervied from the plot. Information about the data is as follows: {history}" ) history["plots"] = plots hypothesis = df_agent.run( f"Based on the information, data, and context given, form valid hypothesis for further investigations. Information about the data is as follows: {history}" ) history["hypothesis"] = hypothesis history_pretty = json.dump(history, csv_file, indent=2) return f"{history_pretty}" iface = gr.Interface( fn=process_inputs, inputs=[ gr.Textbox(lines=1, label="OpenAI API Key"), gr.Dropdown(["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4"], label="Model"), gr.Textbox(lines=2, label="Brief description of the data"), gr.File(label="Upload CSV Only"), ], outputs=gr.Textbox(), ) iface.launch()