safety-copilot

Running

App Files Files Community

Asankhaya Sharma commited on Feb 20, 2024

Commit

dfd217b

1 Parent(s): 6128070

new format for chat

Browse files

Files changed (3) hide show

main.py +92 -37
question.py +0 -85
requirements.txt +3 -2

main.py CHANGED Viewed

@@ -1,42 +1,76 @@
 # main.py
 import os
-import tempfile
 import streamlit as st
-from question import chat_with_doc
-from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
-from langchain.vectorstores import SupabaseVectorStore
 from supabase import Client, create_client
-from stats import get_usage
 supabase_url = st.secrets.SUPABASE_URL
 supabase_key = st.secrets.SUPABASE_KEY
 openai_api_key = st.secrets.openai_api_key
 anthropic_api_key = st.secrets.anthropic_api_key
 hf_api_key = st.secrets.hf_api_key
-supabase: Client = create_client(supabase_url, supabase_key)
-self_hosted = st.secrets.self_hosted
 username = st.secrets.username
-# embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
 embeddings = HuggingFaceInferenceAPIEmbeddings(
     api_key=hf_api_key,
     model_name="BAAI/bge-large-en-v1.5"
 )
-vector_store = SupabaseVectorStore(supabase, embeddings, query_name='match_documents', table_name="documents")
-models = ["meta-llama/Llama-2-70b-chat-hf", "mistralai/Mixtral-8x7B-Instruct-v0.1"]
-if openai_api_key:
-    models += ["gpt-3.5-turbo", "gpt-4"]
-if anthropic_api_key:
-    models += ["claude-v1", "claude-v1.3",
-               "claude-instant-v1-100k", "claude-instant-v1.1-100k"]
 # Set the theme
 st.set_page_config(
     page_title="Securade.ai - Safety Copilot",
@@ -54,25 +88,46 @@ st.title("👷‍♂️ Safety Copilot 🦺")
 st.markdown("Chat with your personal safety assistant about any health & safety related queries.")
 st.markdown("Up-to-date with latest OSH regulations for Singapore, Indonesia, Malaysia & other parts of Asia.")
-st.markdown("---\n\n")
-# Initialize session state variables
-if 'model' not in st.session_state:
-    st.session_state['model'] = "meta-llama/Llama-2-70b-chat-hf"
-if 'temperature' not in st.session_state:
-    st.session_state['temperature'] = 0.1
-if 'chunk_size' not in st.session_state:
-    st.session_state['chunk_size'] = 500
-if 'chunk_overlap' not in st.session_state:
-    st.session_state['chunk_overlap'] = 0
-if 'max_tokens' not in st.session_state:
-    st.session_state['max_tokens'] = 500
-if 'username' not in st.session_state:
-    st.session_state['username'] = username
-stats = str(get_usage(supabase))
-chat_with_doc(st.session_state['model'], vector_store, stats_db=supabase, stats=stats)
-st.markdown("---\n\n")

 # main.py
 import os
 import streamlit as st
+import anthropic
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
 from supabase import Client, create_client
+from streamlit.logger import get_logger
+from stats import get_usage, add_usage
 supabase_url = st.secrets.SUPABASE_URL
 supabase_key = st.secrets.SUPABASE_KEY
 openai_api_key = st.secrets.openai_api_key
 anthropic_api_key = st.secrets.anthropic_api_key
 hf_api_key = st.secrets.hf_api_key
 username = st.secrets.username
+supabase: Client = create_client(supabase_url, supabase_key)
+logger = get_logger(__name__)
 embeddings = HuggingFaceInferenceAPIEmbeddings(
     api_key=hf_api_key,
     model_name="BAAI/bge-large-en-v1.5"
 )
+if 'chat_history' not in st.session_state:
+    st.session_state['chat_history'] = []
+vector_store = SupabaseVectorStore(supabase, embeddings, query_name='match_documents', table_name="documents")
+memory = ConversationBufferMemory(memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)
+model = "meta-llama/Llama-2-70b-chat-hf" #mistralai/Mixtral-8x7B-Instruct-v0.1
+temperature = 0.1
+max_tokens = 500
+stats = str(get_usage(supabase))
+def response_generator(query):
+    qa = None
+    add_usage(supabase, "chat", "prompt" + query, {"model": model, "temperature": temperature})
+    logger.info('Using HF model %s', model)
+    # print(st.session_state['max_tokens'])
+    endpoint_url = ("https://api-inference.huggingface.co/models/"+ model)
+    model_kwargs = {"temperature" : temperature,
+                    "max_new_tokens" : max_tokens,
+                    "return_full_text" : False}
+    hf = HuggingFaceEndpoint(
+        endpoint_url=endpoint_url,
+        task="text-generation",
+        huggingfacehub_api_token=hf_api_key,
+        model_kwargs=model_kwargs
+    )
+    qa = ConversationalRetrievalChain.from_llm(hf, retriever=vector_store.as_retriever(search_kwargs={"score_threshold": 0.6, "k": 4,"filter": {"user": username}}), memory=memory, verbose=True, return_source_documents=True)
+    # Generate model's response
+    model_response = qa({"question": query})
+    logger.info('Result: %s', model_response["answer"])
+    sources = model_response["source_documents"]
+    logger.info('Sources: %s', model_response["source_documents"])
+    if len(sources) > 0:
+        response = model_response["answer"]
+    else:
+        response = "I am sorry, I do not have enough information to provide an answer. If there is a public source of data that you would like to add, please email copilot@securade.ai."
+    return response
 # Set the theme
 st.set_page_config(
     page_title="Securade.ai - Safety Copilot",
 st.markdown("Chat with your personal safety assistant about any health & safety related queries.")
 st.markdown("Up-to-date with latest OSH regulations for Singapore, Indonesia, Malaysia & other parts of Asia.")
+st.markdown("_"+ stats + " queries answered!_")
+if 'chat_history' not in st.session_state:
+    st.session_state['chat_history'] = []
+# Display chat messages from history on app rerun
+for message in st.session_state.chat_history:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# Accept user input
+if prompt := st.chat_input("Ask a question"):
+    # print(prompt)
+    # Add user message to chat history
+    st.session_state.chat_history.append({"role": "user", "content": prompt})
+    # Display user message in chat message container
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    with st.spinner('Safety briefing in progress... Your customized guidance is en route.'):
+        response = response_generator(prompt)
+    # Display assistant response in chat message container
+    with st.chat_message("assistant"):
+        st.markdown(response)
+    # Add assistant response to chat history
+    # print(response)
+    st.session_state.chat_history.append({"role": "assistant", "content": response})
+# query = st.text_area("## Ask a question (" + stats + " queries answered so far)", max_chars=500)
+# columns = st.columns(2)
+# with columns[0]:
+#     button = st.button("Ask")
+# with columns[1]:
+#     clear_history = st.button("Clear History", type='secondary')
+# st.markdown("---\n\n")
+# if clear_history:
+#     # Clear memory in Langchain
+#     memory.clear()
+#     st.session_state['chat_history'] = []
+#     st.experimental_rerun()

question.py DELETED Viewed

@@ -1,85 +0,0 @@
-import anthropic
-import streamlit as st
-from streamlit.logger import get_logger
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-from langchain.llms import OpenAI
-from langchain.llms import HuggingFaceEndpoint
-from langchain.chat_models import ChatAnthropic
-from langchain.vectorstores import SupabaseVectorStore
-from stats import add_usage
-memory = ConversationBufferMemory(memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)
-openai_api_key = st.secrets.openai_api_key
-anthropic_api_key = st.secrets.anthropic_api_key
-hf_api_key = st.secrets.hf_api_key
-logger = get_logger(__name__)
-def chat_with_doc(model, vector_store: SupabaseVectorStore, stats_db, stats):
-    if 'chat_history' not in st.session_state:
-        st.session_state['chat_history'] = []
-    query = st.text_area("## Ask a question (" + stats + " queries answered so far)", max_chars=500)
-    columns = st.columns(2)
-    with columns[0]:
-        button = st.button("Ask")
-    with columns[1]:
-        clear_history = st.button("Clear History", type='secondary')
-    st.markdown("---\n\n")
-    if clear_history:
-        # Clear memory in Langchain
-        memory.clear()
-        st.session_state['chat_history'] = []
-        st.experimental_rerun()
-    if button:
-        qa = None
-        add_usage(stats_db, "chat", "prompt" + query, {"model": model, "temperature": st.session_state['temperature']})
-        if model.startswith("gpt"):
-            logger.info('Using OpenAI model %s', model)
-            qa = ConversationalRetrievalChain.from_llm(
-                OpenAI(
-                    model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature'], max_tokens=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True)
-        elif anthropic_api_key and model.startswith("claude"):
-            logger.info('Using Anthropics model %s', model)
-            qa = ConversationalRetrievalChain.from_llm(
-                ChatAnthropic(
-                    model=st.session_state['model'], anthropic_api_key=anthropic_api_key, temperature=st.session_state['temperature'], max_tokens_to_sample=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True, max_tokens_limit=102400)
-        elif hf_api_key:
-            logger.info('Using HF model %s', model)
-            # print(st.session_state['max_tokens'])
-            endpoint_url = ("https://api-inference.huggingface.co/models/"+ model)
-            model_kwargs = {"temperature" : st.session_state['temperature'],
-                            "max_new_tokens" : st.session_state['max_tokens'],
-                            "return_full_text" : False}
-            hf = HuggingFaceEndpoint(
-                endpoint_url=endpoint_url,
-                task="text-generation",
-                huggingfacehub_api_token=hf_api_key,
-                model_kwargs=model_kwargs
-            )
-            qa = ConversationalRetrievalChain.from_llm(hf, retriever=vector_store.as_retriever(search_kwargs={"score_threshold": 0.6, "k": 4,"filter": {"user": st.session_state["username"]}}), memory=memory, verbose=True, return_source_documents=True)
-        print("Question>")
-        print(query)
-        st.session_state['chat_history'].append(("You", query))
-        # Generate model's response and add it to chat history
-        model_response = qa({"question": query})
-        logger.info('Result: %s', model_response["answer"])
-        sources = model_response["source_documents"]
-        logger.info('Sources: %s', model_response["source_documents"])
-        if len(sources) > 0:
-            st.session_state['chat_history'].append(("Safety Copilot", model_response["answer"]))
-        else:
-            st.session_state['chat_history'].append(("Safety Copilot", "I am sorry, I do not have enough information to provide an answer. If there is a public source of data that you would like to add, please email copilot@securade.ai."))
-        # Display chat history
-        st.empty()
-        chat_history = st.session_state['chat_history']
-        for speaker, text in chat_history:
-            st.markdown(f"**{speaker}:** {text}")

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
-langchain==0.1.0
 Markdown==3.4.3
 openai==0.27.6
 pdf2image==1.16.3
 pypdf==3.8.1
-streamlit==1.22.0
 StrEnum==0.4.10
 supabase==1.0.3
 tiktoken==0.4.0

+langchain-community==0.20.0
+langchain==0.1.7
 Markdown==3.4.3
 openai==0.27.6
 pdf2image==1.16.3
 pypdf==3.8.1
+streamlit==1.31.0
 StrEnum==0.4.10
 supabase==1.0.3
 tiktoken==0.4.0