Spaces:
Runtime error
Runtime error
import chainlit as cl | |
import pandas as pd | |
import io | |
import matplotlib.pyplot as plt | |
import base64 | |
from io import BytesIO | |
from pandasai import SmartDataframe | |
import pandas as pd | |
from pandasai.llm import OpenAI | |
from io import StringIO | |
import matplotlib.pyplot as plt | |
import csv | |
from collections import defaultdict | |
import os | |
from langchain.agents import AgentExecutor, AgentType, initialize_agent | |
from langchain.agents.structured_chat.prompt import SUFFIX | |
from langchain.chat_models import ChatOpenAI | |
from langchain.memory import ConversationBufferMemory | |
from chainlit.action import Action | |
from chainlit.input_widget import Select, Switch, Slider | |
from langchain.tools import StructuredTool, Tool | |
# this is our tool - which is what allows our agent to generate images in the first place! | |
# the `description` field is of utmost imporance as it is what the LLM "brain" uses to determine | |
# which tool to use for a given input. | |
got_csv = False | |
async def start(): | |
""" | |
This is called when the Chainlit chat is started! | |
We can add some settings to our application to allow users to select the appropriate model, and more! | |
""" | |
settings = await cl.ChatSettings( | |
[ | |
Select( | |
id="Model", | |
label="OpenAI - Model", | |
values=["gpt-3.5-turbo", "gpt-4-1106-preview"], | |
initial_index=1, | |
), | |
Switch(id="Streaming", label="OpenAI - Stream Tokens", initial=True), | |
Slider( | |
id="Temperature", | |
label="OpenAI - Temperature", | |
initial=0, | |
min=0, | |
max=2, | |
step=0.1, | |
), | |
] | |
).send() | |
await setup_agent(settings) | |
async def setup_agent(settings): | |
print("Setup agent with following settings: ", settings) | |
# We set up our agent with the user selected (or default) settings here. | |
llm = ChatOpenAI( | |
temperature=settings["Temperature"], | |
streaming=settings["Streaming"], | |
model=settings["Model"], | |
) | |
# We get our memory here, which is used to track the conversation history. | |
memory = get_memory() | |
# This suffix is used to provide the chat history to the prompt. | |
_SUFFIX = "Chat history:\n{chat_history}\n\n" + SUFFIX | |
# We initialize our agent here, which is simply being used to decide between responding with text | |
# or an image | |
agent = initialize_agent( | |
llm=llm, # our LLM (default is GPT-4 Turbo) | |
tools = [ | |
generate_most_valuable_feature | |
], | |
agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, # the agent type we're using today | |
memory=memory, # our memory! | |
agent_kwargs={ | |
"suffix": _SUFFIX, # adding our chat history suffix | |
"input_variables": ["input", "agent_scratchpad", "chat_history"], | |
}, | |
) | |
cl.user_session.set("agent", agent) # storing our agent in the user session | |
def get_memory(): | |
""" | |
This is used to track the conversation history and allow our agent to | |
remember what was said before. | |
""" | |
return ConversationBufferMemory(memory_key="chat_history") | |
def find_most_valuable_feature(csv_file): | |
print("find_most_valuable_feature") | |
print(csv_file) | |
smart_llm = OpenAI(api_token=os.environ["OPENAI_API_KEY"]) | |
# Initialize a defaultdict to store column data | |
columns = defaultdict(list) | |
# Read the CSV file and populate the defaultdict | |
with open("upload.csv") as f: | |
reader = csv.reader(f) | |
headers = next(reader) | |
for row in reader: | |
for header, value in zip(headers, row): | |
columns[header].append(value) | |
# Manually create a DataFrame from the defaultdict | |
smart_df = pd.DataFrame({ | |
"ID": columns["ID"], | |
"Date and Time": columns["Date and Time"], | |
"Business Unit": columns["Business Unit"], | |
"Usage Change": columns["Usage Change"], | |
"Wolftech Improvement": columns["Wolftech Improvement"], | |
"Likelihood to Recommend": columns["Likelihood to Recommend"], | |
"Effective Training": columns["Effective Training"], | |
"Most Valuable Feature": columns["Most Valuable Feature"] | |
}) | |
smart_df = SmartDataframe(smart_df, config={"llm": smart_llm}) | |
out = smart_df.chat('Summarize the top three "Most Valuable Feature" for people where Usage Changed was Increased?') | |
print(out) | |
df = out | |
# Plotting | |
plt.figure(figsize=(10, 6)) | |
plt.bar(df["Most Valuable Feature"], df["Count"], color='blue') | |
plt.xlabel('Most Valuable Feature') | |
plt.ylabel('Count') | |
plt.title('Count of Most Valuable Features') | |
plt.xticks(rotation=45, ha="right") # Rotate labels for better readability | |
plt.tight_layout() # Adjust layout for better fit | |
# Save the plot to a BytesIO object | |
image_buffer = BytesIO() | |
plt.savefig(image_buffer, format='png') | |
image_buffer.seek(0) | |
return image_buffer | |
generate_most_valuable_feature = Tool.from_function( | |
func=find_most_valuable_feature, | |
name="Find most valuable feature", | |
description=f"Useful for finding the most valuable feature from a CSV file", | |
return_direct=True, | |
) | |
def process_and_analyze_data(csv_file): | |
# Read CSV file | |
csv_data = pd.read_csv(csv_file) | |
# Logging to check data loading | |
print(f"CSV Data Loaded: {csv_data.head()}") | |
# Count of responses in each category of 'Business Unit' | |
business_unit_counts = csv_data['Business Unit'].value_counts() | |
# Plotting the count of responses in each 'Business Unit' category | |
plt.figure(figsize=(10, 6)) | |
business_unit_counts.plot(kind='bar') | |
plt.title('Count of Responses by Business Unit') | |
plt.xlabel('Business Unit') | |
plt.ylabel('Count') | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
# Save the plot to a BytesIO object | |
image_buffer = BytesIO() | |
plt.savefig(image_buffer, format='png') | |
image_buffer.seek(0) | |
return image_buffer | |
# Function to handle message events | |
async def handle_message(message: cl.Message): | |
global got_csv, agent | |
# Retrieve the CSV file from the message | |
csv_file = next( | |
( | |
io.BytesIO(file.content) | |
for file in message.elements or [] | |
if file.mime and "csv" in file.mime | |
), | |
None, | |
) | |
# Logging to check file retrieval | |
print(f"CSV File: {csv_file}") | |
if csv_file: | |
got_csv = True | |
try: | |
image_buffer = find_most_valuable_feature(csv_file) | |
# Get bytes data from BytesIO object and send the image data | |
image_data = image_buffer.getvalue() | |
name = "chart" | |
cl.user_session.set(name, image_data) | |
cl.user_session.set("generated_image", name) | |
await cl.Message(content="Based on the people who increased usage, here are the most valuable features...").send() | |
generated_image = cl.user_session.get(name) | |
agent = cl.user_session.get("agent") | |
res = await cl.make_async(agent.run)( | |
input=message.content, callbacks=[cl.LangchainCallbackHandler()] | |
) | |
elements = [] | |
actions = [] | |
elements = [ | |
cl.Image( | |
content=generated_image, | |
name=name, | |
display="inline", | |
size="large" | |
) | |
] | |
await cl.Message(content=name, elements=elements, actions=actions).send() | |
except Exception as e: | |
await cl.Message(content=f"An error occurred: {str(e)}").send() | |
else: | |
if not got_csv: | |
await cl.Message(content="Please upload a CSV file.").send() | |
else: | |
res = await cl.make_async(agent.run)( | |
input=message.content, callbacks=[cl.LangchainCallbackHandler()] | |
) | |
await cl.Message(content=res).send() | |
# Run the ChainLit app | |
if __name__ == "__main__": | |
cl.run() | |