USC-GPT

Sleeping

App Files Files Community

bhulston commited on Dec 24, 2023

Commit

4e0f9dd

•

1 Parent(s): d8083af

Update app.py

Browse files

Add routing agent to help in determining when vector DB is needed for query and when it should be avoided.

Will likely need to add another route option to help us construct a new query to the vector DB based on the chat history context (rather than just the individual pure prompt)

Files changed (1) hide show

app.py +42 -53

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import os
 import json
 import getpass
 import openai
 from langchain.vectorstores import Pinecone
 from langchain.embeddings import OpenAIEmbeddings
@@ -35,10 +37,6 @@ index = pinecone.Index(index_name)
 k = 5
 st.title("USC GPT - Find the perfect class")
 class_time = st.slider(
@@ -57,6 +55,23 @@ units = st.slider(
 assistant = st.chat_message("assistant")
 initial_message = "How can I help you today?"
 if "messages" not in st.session_state:
     st.session_state.messages = []
     with st.chat_message("assistant"):
@@ -72,59 +87,33 @@ if prompt := st.chat_input("What kind of class are you looking for?"):
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
         full_response = ""
-        response = filter_agent(prompt, OPENAI_API)
-        query = response
-        response = index.query(
-            vector = embeddings.embed_query(query),
-            top_k = 25,
-            include_metadata = True
-        )
-        response = reranker(query, response)
-        result_query = 'Original Query:' + query + 'Query Results:' + str(response)
-        assistant_response = results_agent(result_query, OPENAI_API)
         for chunk in assistant_response.split():
             full_response += chunk + " "
             time.sleep(0.05)
             message_placeholder.markdown(full_response + "▌")
         message_placeholder.markdown(full_response)
         st.session_state.messages.append({"role": "assistant", "content": full_response})
-# if prompt := st.chat_input("What kind of class are you looking for?"):
-#     # Display user message in chat message container
-#     with st.chat_message("user"):
-#         st.markdown(prompt)
-#     # Add user message to chat history
-#     st.session_state.messages.append({"role": "user", "content": prompt})
-#     response = filter_agent(prompt, OPENAI_API)
-#     query = response
-#     response = index.query(
-#         vector= embeddings.embed_query(query),
-#         # filter= build_filter(json),
-#         top_k=5,
-#         include_metadata=True
-#     )
-#     response = reranker(query, response)
-#     result_query = 'Original Query:' + query + 'Query Results:' + str(response)
-#     assistant_response = results_agent(result_query, OPENAI_API)
-#     if assistant_response:
-#         with st.chat_message("assistant"):
-#             message_placeholder = st.empty()
-#             full_response = ""
-#             # Simulate stream of response with milliseconds delay
-#             for chunk in assistant_response.split():
-#                 full_response += chunk + " "
-#                 time.sleep(0.05)
-#                 # Add a blinking cursor to simulate typing
-#                 message_placeholder.markdown(full_response + "▌")
-#             message_placeholder.markdown(full_response)
-#         # Add assistant response to chat history
-#         st.session_state.messages.append({"role": "assistant", "content": full_response})

 import json
 import getpass
 import openai
+from openai import OpenAi
 from langchain.vectorstores import Pinecone
 from langchain.embeddings import OpenAIEmbeddings
 k = 5
 st.title("USC GPT - Find the perfect class")
 class_time = st.slider(
 assistant = st.chat_message("assistant")
 initial_message = "How can I help you today?"
+def get_rag_results(prompt):
+    '''
+1. Remove filters from the prompt to optimize success of the RAG-based step.
+2. Query the Pinecone DB and return the top 25 results based on cosine similarity
+3. Rerank the results from vector DB using a BERT-based cross encoder
+    '''
+    query = prompt
+    response = filter_agent(prompt, OPENAI_API)
+    response = index.query(
+        vector = embeddings.embed_query(query),
+        top_k = 25,
+        include_metadata = True
+    )
+    response = reranker(query, response) # BERT cross encoder for ranking
+    return response
 if "messages" not in st.session_state:
     st.session_state.messages = []
     with st.chat_message("assistant"):
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
         full_response = ""
+        messages = [{"role": m["role"], "content": m["content"]}
+                for m in st.session_state.messages]
+        message_history = " ".join([message["content"] for message in messages])
+        route = routing_agent(prompt, OPENAI_API, message_history)
+        if route == "1":
+        ## Option for accessing Vector DB
+            rag_response = get_rag_results(prompt)
+            result_query = 'Original Query:' + prompt + 'Query Results:' + str(rag_response)
+            assistant_response = results_agent(result_query, OPENAI_API)
+        else:
+        ## Option if not accessing Database
+            assistant_response = openai.chatCompletion.create(
+                model = "gpt-4",
+                messages = [
+                    {"role": m["role"], "content": m["content"]}
+                    for m in st.session_state.messages
+                ]
+            )["choices"][0]["message"]["content"]
+        ## Display response regardless of route
         for chunk in assistant_response.split():
             full_response += chunk + " "
             time.sleep(0.05)
             message_placeholder.markdown(full_response + "▌")
         message_placeholder.markdown(full_response)
         st.session_state.messages.append({"role": "assistant", "content": full_response})