Spaces:

bohmian
/

esg_countries_chatbot

Running

App Files Files Community

bohmian commited on Feb 14, 2024

Commit

dfe22da

verified ·

1 Parent(s): e653334

Create app.py

Browse files

Files changed (1) hide show

app.py +391 -0

app.py ADDED Viewed

	@@ -0,0 +1,391 @@

+import streamlit as st
+from streamlit_chat import message
+import os
+from langchain.llms import HuggingFaceHub # for calling HuggingFace Inference API (free for our use case)
+from langchain.embeddings import HuggingFaceEmbeddings # to let program know what embeddings the vector store was embedded in earlier
+# to set up the agent and tools which will be used to answer questions later
+from langchain.agents import initialize_agent
+from langchain.agents import tool # decorator so each function will be recognized as a tool
+from langchain.chains.retrieval_qa.base import RetrievalQA # to answer questions from vector store retriever
+# from langchain.chains.question_answering import load_qa_chain # to further customize qa chain if needed
+from langchain.vectorstores import Chroma # vector store for retriever
+import ast # to parse user string input to list for one of the tools (agent tools do not support 2 inputs)
+#from langchain.memory import ConversationBufferMemory # not used as of now
+import pickle # for loading the bm25 retriever
+from langchain.retrievers import EnsembleRetriever # to use chroma and
+# for defining a generic LLMChain as a generic chat tool (if needed)
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+# os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
+from langchain.callbacks.base import BaseCallbackHandler
+class MyCallbackHandler(BaseCallbackHandler):
+    def __init__(self):
+        self.tokens = []
+    def on_llm_new_token(self, token, **kwargs) -> None: # HuggingFaceHub() cannot stream
+        self.tokens.append(token)
+        print(token)
+    def on_agent_action(self, action, **kwargs):
+        """Run on agent action."""
+        print("\n\nnew action", action)
+        thought = action.log.replace('\n', '  \n') # so streamlit will recognize as newline
+        tool_called = action.tool
+        # tool_input = action.tool_input
+        calling_tool = f"I am calling the '{tool_called}' tool and waiting for it to give me a result..."
+        st.session_state.messages.extend(
+            [{"role": "assistant", "content": thought}, {"role": "assistant", "content": calling_tool}]
+        )
+        # Add the response to the chat window
+        with st.chat_message("assistant"):
+            st.markdown(thought)
+            st.markdown(calling_tool)
+    # def on_agent_finish(self, finish, **kwargs):
+    #     """Run on agent end."""
+    #     #print("\n\nEnd", finish)
+    #     finish_string = finish.log.replace('\n', '  \n') # so streamlit will recognize as newline
+    #     st.session_state.messages.append(
+    #         {"role": "assistant", "content": finish_string}
+    #     )
+    #     with st.chat_message("assistant"):
+    #         st.markdown(finish_string)
+    # def on_llm_start(self, serialized, prompts, **kwargs):
+    #     """Run when LLM starts running."""
+    #     print("LLM Start: ", prompts)
+    # def on_llm_end(self, response, **kwargs):
+    #     """Run when LLM ends running."""
+    #     print(response)
+    def on_tool_end(self, output, **kwargs):
+        """Run when tool ends running."""
+        #print("\n\nTool End: ", output)
+        tool_output = f"Tool Output: {output}  \n  \nI am processing the output from the tool..."
+        st.session_state.messages.append(
+            {"role": "assistant", "content": tool_output}
+        )
+        with st.chat_message("assistant"):
+            st.markdown(tool_output)
+my_callback_handler = MyCallbackHandler()
+# # Set the webpage title
+# st.set_page_config(
+#     page_title="Your own AI-Chat!",
+#     layout="wide"
+# )
+# llm for HuggingFace Inference API
+# model = "mistralai/Mistral-7B-Instruct-v0.2"
+model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+# with st.spinner('Downloading pre-built Chroma and BM25 vector stores'):
+#     chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)
+# Document config
+if 'chunk_size' not in st.session_state:
+    st.session_state['chunk_size'] = 1000 # choose one of [500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000]
+if 'chunk_overlap' not in st.session_state:
+    st.session_state['chunk_overlap'] = 100 # choose one of [50, 100, 150, 200]
+# scraping results using DuckDuckGo
+if 'top_n_results' not in st.session_state:
+    st.session_state['top_n_results'] = 10 # this is for returning top n search results using DuckDuckGo
+if 'countries_to_scrape' not in st.session_state:
+    st.session_state['countries_to_scrape'] = [] # this is for returning top n search results using DuckDuckGo
+# in main app, add configuration for user to scrape new data from DuckDuckGo
+# in main app, add configuration for user to upload PDF to override country's existing policies in vectorstore
+# Retriever config
+if 'chroma_n_similar_documents' not in st.session_state:
+    st.session_state['chroma_n_similar_documents'] =  5 # number of chunks returned by chroma vector store retriever (semantic)
+if 'bm25_n_similar_documents' not in st.session_state:
+    st.session_state['bm25_n_similar_documents'] =  5 # number of chunks returned by bm25 retriever (keyword)
+if 'retriever_config' not in st.session_state:
+    st.session_state['retriever_config'] =  'ensemble' # choose one of ['semantic', 'keyword', 'ensemble']
+if 'keyword_retriever_weight' not in st.session_state:
+    st.session_state['keyword_retriever_weight'] =  0.3 # choose between 0 and 1, only when using ensemble
+if 'source_documents' not in st.session_state:
+    st.session_state['source_documents'] = [] # this is to store all source documents for a particular search
+# LLM config
+if 'temperature' not in st.session_state:
+    st.session_state['temperature'] = 0.25
+if 'max_new_tokens' not in st.session_state:
+    st.session_state['max_new_tokens'] =  500 # max tokens generated by LLM
+# This is the list of countries present in the vector store, since the vector store is previously prepared as they take very long to prepare
+# This is for the RetrievalQA tool later to check, because even if the country given to it is not in the vector store,
+# it would still filter the vector store with this country and give an empty result, instead of giving an error.
+# We have to manually return the error to let the agent using the tool know.
+# The countries were reduced to just 6 as the time taken to get the embeddings to build up the chunks is too long.
+# However, having more countries **will not affect** the quality of the answers in comparing between 2 countries in the RAG application
+# as the RAG only picks out document chunks for the 2 countries of interest.
+countries = [
+    "Australia",
+    "China",
+    "Japan",
+    "Malaysia",
+    "Singapore",
+    "Germany",
+    ]
+@st.cache_data # only going to get once
+def get_llm(temp = st.session_state['temperature'], tokens = st.session_state['max_new_tokens']):
+    # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
+    # It is a free API that is very good for deploying online for quick testing without users having to deploy a local LLM
+    llm = HuggingFaceHub(repo_id=model,
+                        model_kwargs={
+                        'temperature':temp,
+                        "max_new_tokens":tokens
+                        },
+                        )
+    return llm
+llm = get_llm(st.session_state['temperature'], tokens = st.session_state['max_new_tokens'])
+@st.cache_data # only going to get once
+def get_embeddings():
+    with st.spinner(f'Getting HuggingFaceEmbeddings'):
+        # We use HuggingFaceEmbeddings() as it is open source and free to use.
+        # Initialize the default hf model for embedding the tokenized texts into vectors with semantic meanings
+        hf_embeddings = HuggingFaceEmbeddings()
+        return hf_embeddings
+hf_embeddings = get_embeddings()
+# Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps,
+# to save time when experimenting as the embeddings take a long time to generate.
+# The existing stores will be pulled using !wget above when app starts. When using the existing vector stores,
+# just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
+# Not in this notebook: Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
+persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
+with st.spinner(f'Setting up pre-built chroma vector store'):
+    chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)
+# Initialize BM25 Retriever
+# Unlike Chroma (semantic) BM25 is a keyword-based algorithm that performs well on queries containing keywords without capturing the semantic meaning of the query terms,
+# hence there is no need to embed the text with HuggingFaceEmbeddings and it is relatively faster to set up.
+# The retrievers with different chunking sizes and overlaps and countries were created in advanced and saved as pickle files and pulled using !wget.
+# Need to initialize one BM25Retriever for each country so the search results later in the main app can be limited to just a particular country.
+# (Chroma DB gives an option to filter metadata for just a particular country during the retrieval processbut BM25 does not because it makes use of external ranking library.)
+# A separate retriever was saved for each country.
+bm25_retrievers = {} # to store retrievers for different countries
+with st.spinner(f'Setting up pre-built bm25 retrievers'):
+    for country in countries:
+        bm25_filename = f"bm25/bm25_esg_countries_{country}_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}.pickle"
+        with open(bm25_filename, 'rb') as handle:
+            bm25_retriever = pickle.load(handle)
+            bm25_retrievers[country] = bm25_retriever
+# Tools for LLM to Use
+# The most important tool is the first one, which uses a RetrievalQA chain to answer a question about a specific country's ESG policies,
+# e.g. carbon emissions policy of Singapore.
+# By calling this tool multiple times, the agent is able to look at the responses from this tool for both countries and compare them.
+# This is far better than just retrieving relevant chunks for the user's query and throw everything to a single RetrievalQA chain to process
+# Multi input tools are not available, hence we have to prompt the agent to give an input list as a string
+# then use ast.literal_eval to convert it back into a list
+@tool
+def retrieve_answer_for_country(query_and_country: str) -> str: # TODO, change diff chain type diff version answers, change
+    """Gives answer to a query about a single country's public ESG policy.
+    The input list should be of the following format:
+    [query, country]
+    The first element of the list is the user query, surrounded by double quotes.
+    The second element is the full name of the country involved, surrounded by double quotes, for example "Singapore".
+    The 2 inputs are separated by a comma. Do not write a list comprehension.
+    The 2 inputs, together, are surrounded by square brackets as it is a list.
+    Do not put multiple countries into the input at once. Instead use this tool multiple times, one time for each country.
+    If you have multiple queries to ask about a country, break the query into separate parts and use this tool multiple times, one for each query.
+    """
+    try:
+        query_and_country_list = ast.literal_eval(query_and_country)
+        query = query_and_country_list[0]
+        country = query_and_country_list[1].capitalize() # in case LLM did not capitalize first letter as filtering for metadata is case sensitive
+        if not country in countries:
+            return """The country that you input into the tool cannot be found.
+            If you did not make a mistake and the country that you input is indeed what the user asked,
+            then there is no record for the country and no answer can be obtained."""
+        # different retrievers
+        bm = bm25_retrievers[country] # keyword based
+        bm.k = st.session_state['bm25_n_similar_documents']
+        chroma = chroma_db.as_retriever(search_kwargs={'filter': {'country':country}, 'k': st.session_state['chroma_n_similar_documents']}) # semantic
+        # ensemble (below) reranks results from both retrievers
+        ensemble = EnsembleRetriever(retrievers=[bm, chroma], weights=[st.session_state['keyword_retriever_weight'], 1 - st.session_state['keyword_retriever_weight']])
+        retrievers = {'ensemble': ensemble, 'semantic': chroma, 'keyword': bm}
+        qa = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type='stuff',
+            retriever=retrievers[st.session_state['retriever_config']], # selected retriever based on user config
+            return_source_documents=True # returned in result['source_documents']
+        )
+        result = qa(query)
+        st.session_state['source_documents'].append(result['source_documents']) # let user know what source docs are used
+        return result['result']
+    except Exception as e:
+        return f"""There is an error using this tool: {e}. Check if you have input anything wrongly and try again.
+        Remember the 2 inputs, query and country, must both be surrounded by double quotes.
+        The 2 inputs, together, are surrounded by square brackets as it is a list."""
+# if a user tries to casually chat with the agent chatbot, the LLM will be able to use this tool to reply instead
+# this is optional, better to let user's know the chatbot is not for casual chatting
+@tool
+def generic_chat_llm(query: str) -> str:
+    """Use this tool for general queries and casual chat. Forward the user input directly into this tool, do not come up with your own input.
+    This tool IS NOT FOR MAKING COMPARISONS of anything.
+    This tool IS NOT FOR FINDING ESG POLICY of any country!
+    It is only for casual chat! Do not use this tool unnecessarily!
+    """
+    try:
+        # Second Generic Tool
+        prompt = PromptTemplate(
+            input_variables=["query"],
+            template="{query}"
+        )
+        llm_chain = LLMChain(llm=llm, prompt=prompt)
+        return llm_chain.run(query)
+    except Exception as e:
+        return f"""There is an error using this tool: {e}. Check if you have input anything wrongly and try again.
+        If you have already tried 2 times, do not try anymore, there is no response for your input.
+        Move on to the next step of your plan."""
+# sometimes the agent will suddenly ask for a 'compare' tool even though it was not given this tool
+# hence I have decided to give it this tool that gives a prompt to remind it to look at past information
+# and decide whether it is time to darw a conclusion
+# tools cannot have no input, hence I let the agent input a 'query' parameter even though it is not used
+# having the query as input let the LLM 'recall' what is being asked
+# instead of it being lost all the way at the start of the ReAct process
+@tool
+def compare(query:str) -> str:
+    """Use this tool to give you hints and instructions on how you can compare between policies of countries.
+    Use this tool only at one of your final steps, do not use it at the start.
+    When putting the query into this tool, look at the entire query that the user has asked at the start,
+    do not leave any details in the query out.
+    """
+    return f"""Look at all your previous observations to answer the user query.
+    Use as much relevant information as possible but only from your previous thoughts and observations.
+    If you need more details, you can use a tool to find out more. If you have enough information,
+    use your reasoning to answer them to the best of your ability. Give as much detail as you want in your answer."""
+retrieve_answer_for_country.callbacks = [my_callback_handler]
+compare.callbacks = [my_callback_handler]
+generic_chat_llm.callbacks = [my_callback_handler]
+agent = initialize_agent(
+    [retrieve_answer_for_country, compare], # tools
+    #[retrieve_answer_for_country, generic_chat_llm, compare],
+    llm=llm,
+    agent="zero-shot-react-description", # this is good
+    verbose=False,
+    handle_parsing_errors=True,
+    return_intermediate_steps=True,
+    callbacks=[my_callback_handler]
+    # memory=ConversationBufferMemory(
+    #             memory_key="chat_history", return_messages=True
+    # ),
+    # max_iterations=10
+)
+# Create a header element
+st.header("Chat")
+col1, col2 = st.columns(2)
+# with col1:
+# Store the conversation in the session state.
+# Used to render the chat conversation.
+# Initialize it with the first message for users to be greeted with
+if "messages" not in st.session_state:
+    st.session_state.messages = [
+        {"role": "assistant", "content": "How may I help you today?"}
+    ]
+if "current_response" not in st.session_state:
+    st.session_state.current_response = ""
+# Loop through each message in the session state and render it as a chat message.
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# We initialize the quantized LLM from a local path.
+# Currently most parameters are fixed but we can make them
+# configurable.
+#llm_chain = create_chain(retriever)
+# We take questions/instructions from the chat input to pass to the LLM
+if user_query := st.chat_input("Your message here", key="user_input"):
+    # Add our input to the session state
+    st.session_state.messages.append(
+        {"role": "user", "content": user_query}
+    )
+    # Add our input to the chat window
+    with st.chat_message("user"):
+        st.markdown(user_query)
+    # Let user know agent is planning the actions
+    action_plan_message = "Please wait while I plan out a best set of actions to obtain the information and answer your query."
+    # Add the response to the session state
+    st.session_state.messages.append(
+        {"role": "assistant", "content": action_plan_message}
+    )
+    # Add the response to the chat window
+    with st.chat_message("assistant"):
+        st.markdown(action_plan_message)
+    # Pass our input to the llm chain and capture the final responses.
+    # It is worth noting that the Stream Handler is already receiving the
+    # streaming response as the llm is generating. We get our response
+    # here once the llm has finished generating the complete response.
+    results = agent(user_query)
+    response = f"The answer to your query is: {results['output']}"
+    # Add the response to the session state
+    st.session_state.messages.append(
+        {"role": "assistant", "content": response}
+    )
+    # Add the response to the chat window
+    with st.chat_message("assistant"):
+        st.markdown(response)
+# with col2:
+#     st.write("hi")