chatbot / app.py
rachelsmith9240's picture
Upload app.py
155b610 verified
# Import the necessary libraries
import streamlit as st
from openai import OpenAI
from pinecone import Pinecone
import os
import pandas as pd
import numpy as np
#pinecone_key_file = "pinecone_api.txt"
#with open(pinecone_key_file, "r") as f:
# for line in f:
# PINECONE_KEY = line
# break
pc = Pinecone(api_key=os.environ.get("PINECONE_KEY"))
#with open('open_ai_key.txt', "r") as f:
# for line in f:
# OPENAI_KEY = line
# break
client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))
st.title("Seattle Pandas Super Duper ML Chatbot")
class Obnoxious_Agent:
def __init__(self, client) -> None:
self.client=client
def set_prompt(self, query):
prompt=f'''Is this query obnoxious, related to machine learning, or general greetings?
Answer "obnoxious" if it is an obnoxious query, answer "machine learning" if it is related to machine learning,
"general greetings" if it is a general greeting, and "others" for all other queries. When considering whether
a query is related to machine learning, be sure to pay attention to common machine learning acronyms (RNN, CNN, CV, GAN)
and also consider topics from emerging fields like computer vision, deep learning, AI content generation, and others.
Examples are included.
"Query: You are stupid ; Answer: obnoxious"
"Query: poop; Answer: obnoxious"
"Query: kdkdkspapemrmn ; Answer: obnoxious"
"Query: What is a random forest? ; Answer: machine learning"
"Query: How to train a model using a GPU? ; Answer: machine learning"
"Query: What is a CNN? ; Answer: machine learning"
"Query: RNN? ; Answer: machine learning"
"Query: Causal inference? ; Answer: machine learning"
"Query: What is computer vision or CV? ; Answer: machine learning"
"Query: What is computer vision or CV? ; Answer: machine learning"
"Query: How are you? ; Answer: general greetings"
"Query: I like shoes; Answer: other"
Query: {query}'''
return prompt
def extract_action(self, response) -> bool:
if 'obnoxious' in response.lower():
return 'obnoxious'
elif 'general greetings' in response.lower():
return 'gt'
elif 'machine learning' in response.lower():
return 'ml'
else:
return 'other'
def check_query(self, query):
prompt=self.set_prompt(query)
# print(prompt)
message = {"role": "user", "content": prompt}
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[message]
)
# print(response)
return self.extract_action(response.choices[0].message.content)
class Query_Agent:
def __init__(self, client, index='llm-chatbot-index') -> None:
self.pc = Pinecone(api_key=os.environ.get("PINECONE_KEY"))
self.index = self.pc.Index(index)
self.client = client
self.df = pd.read_csv("text_embedding.csv")
self.texts_size_250 = np.array(self.df['Text'])
# this assumes that an index is already there and has been onboarded, etc.
def get_embedding(self, text, model="text-embedding-ada-002"):
text = text.replace("\n", " ")
return self.client.embeddings.create(input = [text], model=model).data[0].embedding
#############################
## TODO: Function to query the Pinecone vector store and return the top-k results
def send_pinecone_query(self, query, top_k=5, namespace="250_chunk"):
return self.index.query(
vector=query,
top_k=top_k,
namespace = namespace)
#############################
def query_vector_store(self, query, top_k=5):
e = self.get_embedding(query)
relevant = self.send_pinecone_query(e, top_k)
scores = 0
context = ""
for result in relevant["matches"]:
scores+=float(result['score'])
context+=self.texts_size_250[int(result['id'])]
context+='\n'
return context, scores/top_k
def set_prompt(self, query, context):
prompt = "Given the following context, explain "+query+": " + context
return prompt
def extract_action(self, response, query = None):
context, avg_score=self.query_vector_store(query)
if avg_score<0.3:
return 'non-relevant'
else:
return self.set_prompt(query, context)
class Answering_Agent:
def __init__(self, openai_client, mode) -> None:
# TODO: Initialize the Answering_Agent
self.client = openai_client
self.mode = mode
def generate_response(self, query, docs, conv_history, k=5):
prompt = f'''You are a {self.mode} chatbot. Answer all queries in a {self.mode} style.
I will provide a user query you must answer, relevant documents which you
should reference in your answer, and conversation history which you should
refer to for context.
Query: {query}
Conversation History: {conv_history}
Relevant Documents: {docs}
'''
message = {"role": "user", "content": prompt}
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[message]
)
return response.choices[0].message.content
class Relevant_Documents_Agent:
def __init__(self, client) -> None:
self.client = client
# TODO: Initialize the Relevant_Documents_Agent
def get_relevance(self, query, documents, cosine_similarity) -> str:
# TODO: Get if the returned documents are relevant
prompt = f'''Based on the following query, please decide if the following
documents are relevant to this query. For context, the average cosine similarity of these documents to this
query is {cosine_similarity}. Your response must be one of the two following [relevant, non-relevant].
Query: {query}
Documents: {documents}'''
print("USER PROMPT:", prompt)
message = {"role": "user", "content": prompt}
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[message]
)
rel_response = response.choices[0].message.content
print("Relevance: ", rel_response)
return rel_response
class Head_Agent:
def __init__(self, mode) -> None:
self.client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))
self.possible_modes = ['verbose', 'concise', 'shakespearean']
self.mode = mode
self.setup_sub_agents()
with st.chat_message("assistant"):
st.write(f"Welcome to your {self.mode} chatbot!")
def setup_sub_agents(self):
self.obnoxious_agent=Obnoxious_Agent(self.client)
self.query_agent=Query_Agent(self.client)
self.answering_agent = Answering_Agent(self.client, self.mode)
self.relevance_agent = Relevant_Documents_Agent(self.client)
def evaluate_mode(self, query):
prompt=f'''Classify the following query to see if it most closely matches
an item in this list {self.possible_modes}.
Your response MUST be a single word from that list only. Query: {query}'''
# print(prompt)
message = {"role": "user", "content": prompt}
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[message]
)
# print(response)
return response.choices[0].message.content
def main_loop(self):
# TODO: Run the main loop for the chatbot
if "openai_model" not in st.session_state:
st.session_state["openai_model"] = "gpt-3.5-turbo"
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Hi, how can I help you?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# first check if prompt is obnoxious
obnoxious=self.obnoxious_agent.check_query(prompt)
print(obnoxious)
if obnoxious=='obnoxious':
with st.chat_message("assistant"):
response="Please refrain from obnoxious questions."
st.write(response)
elif obnoxious=='gt':
with st.chat_message("assistant"):
response="How can I assist you today?"
st.write(response)
# elif obnoxious == 'other':
#with st.chat_message("assistant"):
# response="I can only answer questions about machine learning!"
# st.write(response)
else:
#next check if prompt if relevant
docs, cosine_similarity = self.query_agent.query_vector_store(prompt)
response=self.relevance_agent.get_relevance(prompt, docs, cosine_similarity)
# make this be relevance agent
if 'non-relevant' in response.lower():
if obnoxious == 'ml':
with st.chat_message("assistant"):
response = self.answering_agent.generate_response(prompt, '', st.session_state['messages'])
st.write(response)
else:
with st.chat_message("assistant"):
response="Please ask questions only related to Machine Learning!"
st.write(response)
else:
with st.chat_message("assistant"):
response = self.answering_agent.generate_response(prompt, docs, st.session_state['messages'])
st.write(response)
st.session_state.messages.append({"role": "assistant", "content": response})
if "mode" not in st.session_state:
st.session_state.mode = "Concise"
head_agent=Head_Agent(st.session_state.mode)
def set_mode():
head_agent.answering_agent = Answering_Agent(head_agent.client, st.session_state.mode)
st.session_state.mode = st.selectbox(
'What kind of chatbot would you like today?',
('Concise', 'Chatty', 'Shakespearean'), on_change=set_mode)
st.write('You selected:', st.session_state.mode)
head_agent.main_loop()