Spaces:

bohmian
/

esg_countries_chatbot

Running

App Files Files Community

bohmian commited on Feb 14, 2024

Commit

e638c19

verified ·

1 Parent(s): 399efe8

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -47

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import streamlit as st
-#from streamlit_chat import message
 from streamlit_option_menu import option_menu
 import os
@@ -21,14 +20,17 @@ from langchain.retrievers import EnsembleRetriever # to use chroma and
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 import warnings
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
-from langchain.callbacks.base import BaseCallbackHandler
 # callback is needed to print intermediate steps of agent reasoning in the chatbot
 # i.e. when action is taken, when tool is called, when tool call is complete etc.
 class MyCallbackHandler(BaseCallbackHandler):
@@ -86,20 +88,15 @@ class MyCallbackHandler(BaseCallbackHandler):
 my_callback_handler = MyCallbackHandler()
-# # Set the webpage title
-# st.set_page_config(
-#     page_title="Your own AI-Chat!",
-#     layout="wide"
-# )
-# llm for HuggingFace Inference API
-# model = "mistralai/Mistral-7B-Instruct-v0.2"
-model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-# with st.spinner('Downloading pre-built Chroma and BM25 vector stores'):
-#     chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)
-# Document config
 if 'chunk_size' not in st.session_state:
     st.session_state['chunk_size'] = 1000 # choose one of [500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000]
@@ -116,8 +113,7 @@ if 'countries_to_scrape' not in st.session_state:
 # in main app, add configuration for user to scrape new data from DuckDuckGo
 # in main app, add configuration for user to upload PDF to override country's existing policies in vectorstore
-# Retriever config
 if 'chroma_n_similar_documents' not in st.session_state:
     st.session_state['chroma_n_similar_documents'] =  5 # number of chunks returned by chroma vector store retriever (semantic)
@@ -135,11 +131,16 @@ if 'source_documents' not in st.session_state:
 # LLM config
 if 'temperature' not in st.session_state:
     st.session_state['temperature'] = 0.25
 if 'max_new_tokens' not in st.session_state:
     st.session_state['max_new_tokens'] =  500 # max tokens generated by LLM
 # This is the list of countries present in the vector store, since the vector store is previously prepared as they take very long to prepare
 # This is for the RetrievalQA tool later to check, because even if the country given to it is not in the vector store,
@@ -157,21 +158,22 @@ countries = [
     "Germany",
     ]
-@st.cache_data # only going to get once
-def get_llm(temp = st.session_state['temperature'], tokens = st.session_state['max_new_tokens']):
     # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
     # It is a free API that is very good for deploying online for quick testing without users having to deploy a local LLM
-    llm = HuggingFaceHub(repo_id=model,
                         model_kwargs={
-                        'temperature':temp,
-                        "max_new_tokens":tokens
                         },
                         )
     return llm
-llm = get_llm(st.session_state['temperature'], tokens = st.session_state['max_new_tokens'])
-@st.cache_data # only going to get once
 def get_embeddings():
     with st.spinner(f'Getting HuggingFaceEmbeddings'):
         # We use HuggingFaceEmbeddings() as it is open source and free to use.
@@ -179,8 +181,13 @@ def get_embeddings():
         hf_embeddings = HuggingFaceEmbeddings()
         return hf_embeddings
 hf_embeddings = get_embeddings()
 # Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps, and zipped up,
 # to save time when experimenting as the embeddings take a long time to generate.
 # The existing stores will be pulled using from google drive above when app starts. When using the existing vector stores,
@@ -213,7 +220,7 @@ with st.spinner(f'Setting up pre-built chroma vector store'):
 # The retrievers with different chunking sizes and overlaps and countries were created in advanced and saved as pickle files and pulled using !wget.
 # Need to initialize one BM25Retriever for each country so the search results later in the main app can be limited to just a particular country.
 # (Chroma DB gives an option to filter metadata for just a particular country during the retrieval processbut BM25 does not because it makes use of external ranking library.)
-# A separate retriever was saved for each country.
 bm25_retrievers = {} # to store retrievers for different countries
 with st.spinner(f'Setting up pre-built bm25 retrievers'):
     for country in countries:
@@ -222,11 +229,16 @@ with st.spinner(f'Setting up pre-built bm25 retrievers'):
             bm25_retriever = pickle.load(handle)
             bm25_retrievers[country] = bm25_retriever
-# Tools for LLM to Use
 # The most important tool is the first one, which uses a RetrievalQA chain to answer a question about a specific country's ESG policies,
 # e.g. carbon emissions policy of Singapore.
 # By calling this tool multiple times, the agent is able to look at the responses from this tool for both countries and compare them.
-# This is far better than just retrieving relevant chunks for the user's query and throw everything to a single RetrievalQA chain to process
 # Multi input tools are not available, hence we have to prompt the agent to give an input list as a string
 # then use ast.literal_eval to convert it back into a list
 @tool
@@ -251,11 +263,14 @@ def retrieve_answer_for_country(query_and_country: str) -> str: # TODO, change d
             then there is no record for the country and no answer can be obtained."""
         # different retrievers
-        bm = bm25_retrievers[country] # keyword based
         bm.k = st.session_state['bm25_n_similar_documents']
-        chroma = chroma_db.as_retriever(search_kwargs={'filter': {'country':country}, 'k': st.session_state['chroma_n_similar_documents']}) # semantic
-        # ensemble (below) reranks results from both retrievers
         ensemble = EnsembleRetriever(retrievers=[bm, chroma], weights=[st.session_state['keyword_retriever_weight'], 1 - st.session_state['keyword_retriever_weight']])
         retrievers = {'ensemble': ensemble, 'semantic': chroma, 'keyword': bm}
         qa = RetrievalQA.from_chain_type(
@@ -265,8 +280,10 @@ def retrieve_answer_for_country(query_and_country: str) -> str: # TODO, change d
             return_source_documents=True # returned in result['source_documents']
         )
         result = qa(query)
         st.session_state['source_documents'].append(f"Documents retrieved for agent query '{query}' for country '{country}'.")
-        st.session_state['source_documents'].append(result['source_documents']) # let user know what source docs are used
         return f"{query.capitalize()} for {country}: " + result['result']
     except Exception as e:
@@ -319,10 +336,12 @@ def compare(query:str) -> str:
     Give as much elaboration in your answer as possible but they MUST be from the earlier context.
     Do not give details that cannot be found in the earlier context."""
 retrieve_answer_for_country.callbacks = [my_callback_handler]
 compare.callbacks = [my_callback_handler]
 generic_chat_llm.callbacks = [my_callback_handler]
 agent = initialize_agent(
     [retrieve_answer_for_country, compare], # tools
     #[retrieve_answer_for_country, generic_chat_llm, compare],
@@ -347,7 +366,7 @@ if "menu" not in st.session_state:
             "Source Documents for Last Query",
         ]
-# sidebar with menu navigation
 with st.sidebar:
     st.subheader("DO NOT NAVIGATE between pages when agent is still generating messages in the chat.  Wait for query to complete first.")
     st.write("")
@@ -356,6 +375,7 @@ with st.sidebar:
     st.spinner("test")
 if page == "Chatbot":
     st.header("Chat")
@@ -373,27 +393,19 @@ if page == "Chatbot":
             """}
         ]
-    if "current_response" not in st.session_state:
-        st.session_state.current_response = ""
-    # Loop through each message in the session state and render it as a chat message.
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
-    # We initialize the quantized LLM from a local path.
-    # Currently most parameters are fixed but we can make them
-    # configurable.
-    #llm_chain = create_chain(retriever)
     # We take questions/instructions from the chat input to pass to the LLM
     if user_query := st.chat_input("Your message here", key="user_input"):
-        # remove source documents option from menu while query is running
         st.session_state['source_documents'] = [f"User query: '{user_query}'"] # reset source documents list
-        formatted_user_query = f":blue[{user_query}]"
         # Add our input to the session state
         st.session_state.messages.append(
             {"role": "user", "content": formatted_user_query}
         )
@@ -413,10 +425,6 @@ if page == "Chatbot":
         with st.chat_message("assistant"):
             st.markdown(action_plan_message)
-        # Pass our input to the llm chain and capture the final responses.
-        # It is worth noting that the Stream Handler is already receiving the
-        # streaming response as the llm is generating. We get our response
-        # here once the llm has finished generating the complete response.
         results = agent(user_query)
         response = f":blue[The answer to your query is:] {results['output']}"
@@ -430,14 +438,22 @@ if page == "Chatbot":
             st.markdown(response)
 if page == "Chat Config":
     st.header(page)
 if page == "Document, Retriever, Web Scraping Config":
     st.header(page)
 if page == "Source Documents for Last Query":
     st.header(page)
     try:

 import streamlit as st
 from streamlit_option_menu import option_menu
 import os
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
+# for printing intermediate steps of agent (actions, tool calling etc.)
+from langchain.callbacks.base import BaseCallbackHandler
 import warnings
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
+################################ Callback ################################
 # callback is needed to print intermediate steps of agent reasoning in the chatbot
 # i.e. when action is taken, when tool is called, when tool call is complete etc.
 class MyCallbackHandler(BaseCallbackHandler):
 my_callback_handler = MyCallbackHandler()
+################################ Configs ################################
+# Set the webpage title
+st.set_page_config(
+    page_title="ESG Countries Chatbot",
+    # layout="wide"
+)
+# Document Config
 if 'chunk_size' not in st.session_state:
     st.session_state['chunk_size'] = 1000 # choose one of [500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000]
 # in main app, add configuration for user to scrape new data from DuckDuckGo
 # in main app, add configuration for user to upload PDF to override country's existing policies in vectorstore
+# Retriever Config
 if 'chroma_n_similar_documents' not in st.session_state:
     st.session_state['chroma_n_similar_documents'] =  5 # number of chunks returned by chroma vector store retriever (semantic)
 # LLM config
+# LLM from HuggingFace Inference API
+if 'model' not in st.session_state:
+    st.session_state['model'] = "mistralai/Mixtral-8x7B-Instruct-v0.1" # or "mistralai/Mistral-7B-Instruct-v0.2"
 if 'temperature' not in st.session_state:
     st.session_state['temperature'] = 0.25
 if 'max_new_tokens' not in st.session_state:
     st.session_state['max_new_tokens'] =  500 # max tokens generated by LLM
 # This is the list of countries present in the vector store, since the vector store is previously prepared as they take very long to prepare
 # This is for the RetrievalQA tool later to check, because even if the country given to it is not in the vector store,
     "Germany",
     ]
+################################ Get LLM and Embeddings ################################
+@st.cache_data # only going to get this once instead of all the time when page refreshers
+# unless LLM config change then we will call the function again
+def get_llm():
     # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
     # It is a free API that is very good for deploying online for quick testing without users having to deploy a local LLM
+    llm = HuggingFaceHub(repo_id=st.session_state['model'],
                         model_kwargs={
+                        'temperature': st.session_state['temperature'],
+                        "max_new_tokens": st.session_state['max_new_tokens']
                         },
                         )
     return llm
+@st.cache_data # only going to get this once instead of all the time when page refreshers
 def get_embeddings():
     with st.spinner(f'Getting HuggingFaceEmbeddings'):
         # We use HuggingFaceEmbeddings() as it is open source and free to use.
         hf_embeddings = HuggingFaceEmbeddings()
         return hf_embeddings
+# call above functions
+llm = get_llm()
 hf_embeddings = get_embeddings()
+################################ Download and Initialize Pre-Built Retrievers ################################
 # Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps, and zipped up,
 # to save time when experimenting as the embeddings take a long time to generate.
 # The existing stores will be pulled using from google drive above when app starts. When using the existing vector stores,
 # The retrievers with different chunking sizes and overlaps and countries were created in advanced and saved as pickle files and pulled using !wget.
 # Need to initialize one BM25Retriever for each country so the search results later in the main app can be limited to just a particular country.
 # (Chroma DB gives an option to filter metadata for just a particular country during the retrieval processbut BM25 does not because it makes use of external ranking library.)
+# A separate retriever was hence pre-built for each unique country and each unique chunk size and overlap.
 bm25_retrievers = {} # to store retrievers for different countries
 with st.spinner(f'Setting up pre-built bm25 retrievers'):
     for country in countries:
             bm25_retriever = pickle.load(handle)
             bm25_retrievers[country] = bm25_retriever
+# One retriever above is semantic based and the other is keyword based
+# Both retrievers will be used
+# Then Langchain's EnsembleRetriever will be used to rerank both their results to give final output to RetrievalQA chain below
+################################ Tools for Agent to Use ################################
 # The most important tool is the first one, which uses a RetrievalQA chain to answer a question about a specific country's ESG policies,
 # e.g. carbon emissions policy of Singapore.
 # By calling this tool multiple times, the agent is able to look at the responses from this tool for both countries and compare them.
+# This is far better than just retrieving relevant chunks for the user's query and throwing everything to a single RetrievalQA chain to process
 # Multi input tools are not available, hence we have to prompt the agent to give an input list as a string
 # then use ast.literal_eval to convert it back into a list
 @tool
             then there is no record for the country and no answer can be obtained."""
         # different retrievers
+        # keyword
+        bm = bm25_retrievers[country]
         bm.k = st.session_state['bm25_n_similar_documents']
+        # semantic
+        chroma = chroma_db.as_retriever(search_kwargs={'filter': {'country':country}, 'k': st.session_state['chroma_n_similar_documents']})
+        # ensemble (below) reranks results from both retrievers above
         ensemble = EnsembleRetriever(retrievers=[bm, chroma], weights=[st.session_state['keyword_retriever_weight'], 1 - st.session_state['keyword_retriever_weight']])
+        # for user to make selection
         retrievers = {'ensemble': ensemble, 'semantic': chroma, 'keyword': bm}
         qa = RetrievalQA.from_chain_type(
             return_source_documents=True # returned in result['source_documents']
         )
         result = qa(query)
+        # add to source documents session state so it can be loaded later in the other menu
+        # all source documents linked to answer any query (or part of it) are visible
         st.session_state['source_documents'].append(f"Documents retrieved for agent query '{query}' for country '{country}'.")
+        st.session_state['source_documents'].append(result['source_documents'])
         return f"{query.capitalize()} for {country}: " + result['result']
     except Exception as e:
     Give as much elaboration in your answer as possible but they MUST be from the earlier context.
     Do not give details that cannot be found in the earlier context."""
+# equip tools with callbacks
 retrieve_answer_for_country.callbacks = [my_callback_handler]
 compare.callbacks = [my_callback_handler]
 generic_chat_llm.callbacks = [my_callback_handler]
+# Initialize
 agent = initialize_agent(
     [retrieve_answer_for_country, compare], # tools
     #[retrieve_answer_for_country, generic_chat_llm, compare],
             "Source Documents for Last Query",
         ]
+################################ Sidebar with Menu ################################
 with st.sidebar:
     st.subheader("DO NOT NAVIGATE between pages when agent is still generating messages in the chat.  Wait for query to complete first.")
     st.write("")
     st.spinner("test")
+################################ Main Chatbot Page ################################
 if page == "Chatbot":
     st.header("Chat")
             """}
         ]
+    # Loop through each message in the session state and render it as a chat message
     for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.markdown(message["content"])
     # We take questions/instructions from the chat input to pass to the LLM
     if user_query := st.chat_input("Your message here", key="user_input"):
+        # reset source documents list during a new query
         st.session_state['source_documents'] = [f"User query: '{user_query}'"] # reset source documents list
         # Add our input to the session state
+        formatted_user_query = f":blue[{user_query}]"
         st.session_state.messages.append(
             {"role": "user", "content": formatted_user_query}
         )
         with st.chat_message("assistant"):
             st.markdown(action_plan_message)
         results = agent(user_query)
         response = f":blue[The answer to your query is:] {results['output']}"
             st.markdown(response)
+################################ Chat Config Page ################################
+# for changing config like temperature etc.
 if page == "Chat Config":
     st.header(page)
+################################ Document Page ################################
+# to scrape new documents from DuckDuckGo
+# to chnange paramters like chunk size
+# to upload own PDF
+# to override existing data on new scraped data or new pdf uploaded
 if page == "Document, Retriever, Web Scraping Config":
     st.header(page)
+################################ Main Chatbot Page ################################
 if page == "Source Documents for Last Query":
     st.header(page)
     try: