Spaces:

bohmian
/

esg_countries_chatbot

Sleeping

App Files Files Community

bohmian commited on Feb 14, 2024

Commit

eabbef9

verified ·

1 Parent(s): 610ed65

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -70

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ class MyCallbackHandler(BaseCallbackHandler):
             [{"role": "assistant", "content": thought}, {"role": "assistant", "content": calling_tool}]
         )
         # Add the response to the chat window
-        with messages.chat_message("assistant"):
             st.markdown(thought)
             st.markdown(calling_tool)
@@ -83,7 +83,7 @@ class MyCallbackHandler(BaseCallbackHandler):
         st.session_state.messages.append(
             {"role": "assistant", "content": tool_output}
         )
-        with messages.chat_message("assistant"):
             st.markdown(tool_output)
 my_callback_handler = MyCallbackHandler()
@@ -121,7 +121,7 @@ if 'bm25_n_similar_documents' not in st.session_state:
     st.session_state['bm25_n_similar_documents'] =  5 # number of chunks returned by bm25 retriever (keyword)
 if 'retriever_config' not in st.session_state:
-    st.session_state['retriever_config'] =  'ensemble' # choose one of ['semantic', 'keyword', 'ensemble']
 if 'keyword_retriever_weight' not in st.session_state:
     st.session_state['keyword_retriever_weight'] =  0.3 # choose between 0 and 1, only when using ensemble
@@ -160,7 +160,6 @@ countries = [
 ################################ Get LLM and Embeddings ################################
-# when LLM config change we will call the function again
 def get_llm():
     # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
     # It is a free API that is very good for deploying online for quick testing without users having to deploy a local LLM
@@ -185,6 +184,11 @@ def get_embeddings():
 llm = get_llm()
 hf_embeddings = get_embeddings()
 ################################ Download and Initialize Pre-Built Retrievers ################################
@@ -238,6 +242,12 @@ def get_retrievers():
 chroma_db, bm25_retrievers = get_retrievers()
 ################################ Tools for Agent to Use ################################
 # The most important tool is the first one, which uses a RetrievalQA chain to answer a question about a specific country's ESG policies,
@@ -276,7 +286,7 @@ def retrieve_answer_for_country(query_and_country: str) -> str: # TODO, change d
         # ensemble (below) reranks results from both retrievers above
         ensemble = EnsembleRetriever(retrievers=[bm, chroma], weights=[st.session_state['keyword_retriever_weight'], 1 - st.session_state['keyword_retriever_weight']])
         # for user to make selection
-        retrievers = {'ensemble': ensemble, 'semantic': chroma, 'keyword': bm}
         qa = RetrievalQA.from_chain_type(
             llm=llm,
@@ -362,30 +372,91 @@ agent = initialize_agent(
     # max_iterations=10
 )
-# menu options
-if "menu" not in st.session_state:
-    st.session_state["menu"] = [
-            "Chatbot",
-            "Chat Config",
-            "Document, Retriever, Web Scraping Config",
-            "Source Documents for Last Query",
-        ]
 ################################ Sidebar with Menu ################################
 with st.sidebar:
-    st.subheader("DO NOT NAVIGATE between pages when agent is still generating messages in the chat.  Wait for query to complete first.")
-    st.write("")
-    page = option_menu("Main Menu", st.session_state["menu"],
-        icons=['house', 'gear', 'gear', 'gear'], menu_icon="cast", default_index=0)
-    st.write(st.session_state['chunk_size'])
-tab1, tab2, tab3 = st.tabs(["Cat", "Dog", "Owl"])
 ################################ Main Chatbot Page ################################
-with tab1:
-#if page == "Chatbot":
-    #st.header("Chat")
-    messages = st.container()
     # Store the conversation in the session state.
     # Used to render the chat conversation.
@@ -403,7 +474,7 @@ with tab1:
     # Loop through each message in the session state and render it as a chat message
     for message in st.session_state.messages:
-        with messages.chat_message(message["role"]):
             st.markdown(message["content"])
     # We take questions/instructions from the chat input to pass to the LLM
@@ -419,7 +490,7 @@ with tab1:
         )
         # Add our input to the chat window
-        with messages.chat_message("user"):
             st.markdown(formatted_user_query)
         # Let user know agent is planning the actions
@@ -430,7 +501,7 @@ with tab1:
             {"role": "assistant", "content": action_plan_message}
         )
         # Add the response to the chat window
-        with messages.chat_message("assistant"):
             st.markdown(action_plan_message)
         results = agent(user_query)
@@ -442,63 +513,32 @@ with tab1:
         )
         # Add the response to the chat window
-        with messages.chat_message("assistant"):
             st.markdown(response)
-################################ Chat Config Page ################################
-# for changing config like temperature etc.
-with tab2:
-# if page == "Chat Config":
-#     st.header(page)
-    st.selectbox(
-            "HuggingFace Inference Model",
-            options=["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2"],
-            on_change=get_llm,
-            key="model"
-        )
-    st.slider(
-            "Temperature",
-            0.0, 1.0, 0.05,
-            #value = st.session_state['temperature'],
-            on_change=get_llm,
-            key="temperature"
-        )
 ################################ Document Page ################################
 # to scrape new documents from DuckDuckGo
 # to chnange paramters like chunk size
 # to upload own PDF
 # to override existing data on new scraped data or new pdf uploaded
-with tab3:
-# if page == "Document, Retriever, Web Scraping Config":
-#     st.header(page)
-    st.selectbox(
-            "Chunk Size",
-            options=[500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000],
-            on_change=get_retrievers,
-            key="chunk_size"
-        )
-    st.selectbox(
-            "Chunk Overlap",
-            options=[50, 100, 150, 200],
-            on_change=get_retrievers,
-            key="chunk_overlap"
-        )
-################################ Main Chatbot Page ################################
-if page == "Source Documents for Last Query":
-    st.header(page)
     try:
         st.subheader(st.session_state['source_documents'][0])
         for doc in st.session_state['source_documents'][1:]:
-            st.write("Source: " + doc.metadata['source'])
             st.write(doc)
     except:
         st.write("No source documents retrieved yet. Please run a user query before coming back to this page.")

             [{"role": "assistant", "content": thought}, {"role": "assistant", "content": calling_tool}]
         )
         # Add the response to the chat window
+        with st.chat_message("assistant"):
             st.markdown(thought)
             st.markdown(calling_tool)
         st.session_state.messages.append(
             {"role": "assistant", "content": tool_output}
         )
+        with st.chat_message("assistant"):
             st.markdown(tool_output)
 my_callback_handler = MyCallbackHandler()
     st.session_state['bm25_n_similar_documents'] =  5 # number of chunks returned by bm25 retriever (keyword)
 if 'retriever_config' not in st.session_state:
+    st.session_state['retriever_config'] =  'Ensemble (Both Re-Ranked)' # choose one of ['semantic', 'keyword', 'ensemble']
 if 'keyword_retriever_weight' not in st.session_state:
     st.session_state['keyword_retriever_weight'] =  0.3 # choose between 0 and 1, only when using ensemble
 ################################ Get LLM and Embeddings ################################
 def get_llm():
     # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
     # It is a free API that is very good for deploying online for quick testing without users having to deploy a local LLM
 llm = get_llm()
 hf_embeddings = get_embeddings()
+# when LLM config is changed we will call this function
+def update_llm():
+    global llm
+    llm = get_llm()
 ################################ Download and Initialize Pre-Built Retrievers ################################
 chroma_db, bm25_retrievers = get_retrievers()
+# when retriever config is changed we will call this function
+def update_retrievers():
+    global chroma_db
+    global bm25_retrievers
+    chroma_db, bm25_retrievers = get_retrievers()
 ################################ Tools for Agent to Use ################################
 # The most important tool is the first one, which uses a RetrievalQA chain to answer a question about a specific country's ESG policies,
         # ensemble (below) reranks results from both retrievers above
         ensemble = EnsembleRetriever(retrievers=[bm, chroma], weights=[st.session_state['keyword_retriever_weight'], 1 - st.session_state['keyword_retriever_weight']])
         # for user to make selection
+        retrievers = {'Ensemble (Both Re-Ranked)': ensemble, 'Semantic (Chroma DB)': chroma, 'Keyword (BM 2.5)': bm}
         qa = RetrievalQA.from_chain_type(
             llm=llm,
     # max_iterations=10
 )
 ################################ Sidebar with Menu ################################
 with st.sidebar:
+    page = option_menu("Chatbot",
+        [
+            "Main Chatbot",
+            "View Source Docs for Last Query",
+            "Scrape or Upload Docs",
+        ],
+        icons=['house', 'gear', 'gear', 'gear'],
+        menu_icon="", default_index=0)
+    with st.container(border = True):
+        st.write("DO NOT NAVIGATE between pages or change when agent is still generating messages in the chat.  Wait for query to complete first.")
+        st.write("")
+    with st.expander("LLM Config", expanded = True):
+        st.selectbox(
+                "HuggingFace Inference Model",
+                options=["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2"],
+                on_change=update_llm,
+                key="model"
+            )
+        st.slider(
+                "Temperature",
+                0.0, 1.0, 0.05,
+                #value = st.session_state['temperature'],
+                on_change=update_llm,
+                key="temperature"
+            )
+        st.slider(
+                "Max Tokens Generated",
+                200, 1000,
+                on_change=update_llm,
+                key="max_new_tokens"
+            )
+    with st.expander("Document Config", expanded = True):
+        st.selectbox(
+                "Chunk Size",
+                options=[500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000],
+                on_change=update_retrievers,
+                key="chunk_size"
+            )
+        st.selectbox(
+                "Chunk Overlap",
+                options=[50, 100, 150, 200],
+                on_change=update_retrievers,
+                key="chunk_overlap"
+            )
+    with st.expander("Retriever Config", expanded = True):
+        st.selectbox(
+            "Retriever to Use",
+            options=['Ensemble (Both Re-Ranked)', 'Semantic (Chroma DB)', 'Keyword (BM 2.5)'],
+            key="retriever_config"
+        )
+        st.slider(
+                "Keyword Retriever Weight (If using ensemble retriever, this is the weight of the keyword retriever, semantic retriever would be 1 minus this value)",
+                0.0, 0.05, 1.0,
+                key="keyword_retriever_weight"
+        )
+        st.slider(
+                "Number of Relevant Documents Returned by Keyword Retriever",
+                0, 1, 20,
+                key="bm25_n_similar_documents"
+        )
+        st.slider(
+                "Number of Relevant Documents Returned by Semantic Retriever",
+                0, 1, 20,
+                key="chroma_n_similar_documents"
+        )
 ################################ Main Chatbot Page ################################
+if page == "Main Chatbot":
+    st.subheader("Chatbot")
     # Store the conversation in the session state.
     # Used to render the chat conversation.
     # Loop through each message in the session state and render it as a chat message
     for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
             st.markdown(message["content"])
     # We take questions/instructions from the chat input to pass to the LLM
         )
         # Add our input to the chat window
+        with st.chat_message("user"):
             st.markdown(formatted_user_query)
         # Let user know agent is planning the actions
             {"role": "assistant", "content": action_plan_message}
         )
         # Add the response to the chat window
+        with st.chat_message("assistant"):
             st.markdown(action_plan_message)
         results = agent(user_query)
         )
         # Add the response to the chat window
+        with st.chat_message("assistant"):
             st.markdown(response)
 ################################ Document Page ################################
 # to scrape new documents from DuckDuckGo
 # to chnange paramters like chunk size
 # to upload own PDF
 # to override existing data on new scraped data or new pdf uploaded
+################################ Source Documents Page ################################
+if page == "View Source Docs for Last Query":
+    st.header("Source Documents for Last Query")
     try:
         st.subheader(st.session_state['source_documents'][0])
         for doc in st.session_state['source_documents'][1:]:
+            #st.write("Source: " + doc.metadata['source'])
             st.write(doc)
     except:
         st.write("No source documents retrieved yet. Please run a user query before coming back to this page.")
+# in main app, add configuration for user to scrape new data from DuckDuckGo
+# in main app, add configuration for user to upload PDF to override country's existing policies in vectorstore