Spaces:

deepset
/

retrieval-augmentation-svb

Runtime error

App Files Files Community

maybeMayank commited on Mar 28, 2023

Commit

c24940a

1 Parent(s): d26c2ca

made changes

Browse files

Files changed (3) hide show

.streamlit/config.toml +10 -0
app.py +50 -28
backend_utils.py +13 -17

.streamlit/config.toml CHANGED Viewed

@@ -1,3 +1,13 @@
 [theme]
 base = "light"
 font="monospace"

 [theme]
 base = "light"
 font="monospace"
+[global]
+# By default, Streamlit checks if the Python watchdog module is available and, if not, prints a warning asking for you to install it. The watchdog module is not required, but highly recommended. It improves Streamlit's ability to detect changes to files in your filesystem.
+# If you'd like to turn off this warning, set this to True.
+# Default: false
+disableWatchdogWarning = true
+# If True, will show a warning when you run a Streamlit-enabled script via "python my_script.py".
+# Default: true
+showWarningOnDirectExecution = false

app.py CHANGED Viewed

@@ -1,59 +1,81 @@
 import streamlit as st
-from backend_utils import app_init, set_q1, set_q2, set_q3, set_q4, set_q5
-st.markdown("<center> <h1> Haystack Demo </h1> </center>", unsafe_allow_html=True)
-if st.session_state.get('pipelines_loaded', False):
-    with st.spinner('Loading pipelines...'):
-        p1, p2, p3 = app_init()
-        st.success('Pipelines are loaded', icon="✅")
-        st.session_state['pipelines_loaded'] = True
 placeholder = st.empty()
 with placeholder:
     search_bar, button = st.columns([3, 1])
     with search_bar:
-        username = st.text_area(f"", max_chars=200, key='query')
     with button:
-        st.write("")
-        st.write("")
         run_pressed = st.button("Run")
-st.radio("Type", ("Retrieval Augmented", "Retrieval Augmented with Web Search"), key="query_type")
-# st.sidebar.selectbox(
-#      "Example Questions:",
-#      QUERIES,
-#      key='q_drop_down', on_change=set_question)
 c1, c2, c3, c4, c5 = st.columns(5)
 with c1:
-    st.button('Example Q1', on_click=set_q1)
 with c2:
-    st.button('Example Q2', on_click=set_q2)
 with c3:
-    st.button('Example Q3', on_click=set_q3)
 with c4:
-    st.button('Example Q4', on_click=set_q4)
 with c5:
-    st.button('Example Q5', on_click=set_q5)
-st.markdown("<h4> Answer with PLAIN GPT </h4>", unsafe_allow_html=True)
 placeholder_plain_gpt = st.empty()
-st.text("")
-st.text("")
-st.markdown(f"<h4> Answer with {st.session_state['query_type'].upper()} </h4>", unsafe_allow_html=True)
 placeholder_retrieval_augmented = st.empty()
 if st.session_state.get('query') and run_pressed:
     input = st.session_state['query']
-    p1, p2, p3 = app_init()
-    answers = p1.run(input)
     placeholder_plain_gpt.markdown(answers['results'][0])
     if st.session_state.get("query_type", "Retrieval Augmented") == "Retrieval Augmented":
-        answers_2 = p2.run(input)
     else:
         answers_2 = p3.run(input)
     placeholder_retrieval_augmented.markdown(answers_2['results'][0])

 import streamlit as st
+from backend_utils import (get_plain_pipeline, get_retrieval_augmented_pipeline,
+                           get_web_retrieval_augmented_pipeline, set_q1, set_q2, set_q3, set_q4, set_q5, QUERIES)
+st.set_page_config(
+    page_title="Retrieval Augmentation with Haystack",
+)
+st.markdown("<center> <h2> Reduce Hallucinations with Retrieval Augmentation </h2> </center>", unsafe_allow_html=True)
+st.markdown("Ask a question about the collapse of the Silicon Valley Bank (SVB).", unsafe_allow_html=True)
+# if not st.session_state.get('pipelines_loaded', False):
+#     with st.spinner('Loading pipelines... \n This may take a few mins and might also fail if OpenAI API server is down.'):
+#         p1, p2, p3 = app_init()
+#         st.success('Pipelines are loaded', icon="✅")
+#         st.session_state['pipelines_loaded'] = True
 placeholder = st.empty()
 with placeholder:
     search_bar, button = st.columns([3, 1])
     with search_bar:
+        username = st.text_area(f" ", max_chars=200, key='query')
     with button:
+        st.write(" ")
+        st.write(" ")
         run_pressed = st.button("Run")
+st.markdown("<center> <h5> Example questions </h5> </center>", unsafe_allow_html=True)
+st.write(" ")
+st.write(" ")
 c1, c2, c3, c4, c5 = st.columns(5)
 with c1:
+    st.button(QUERIES[0], on_click=set_q1)
 with c2:
+    st.button(QUERIES[1], on_click=set_q2)
 with c3:
+    st.button(QUERIES[2], on_click=set_q3)
 with c4:
+    st.button(QUERIES[3], on_click=set_q4)
 with c5:
+    st.button(QUERIES[4], on_click=set_q5)
+st.write(" ")
+st.radio("Answer Type:", ("Retrieval Augmented (Static news dataset)", "Retrieval Augmented with Web Search"), key="query_type")
+# st.sidebar.selectbox(
+#      "Example Questions:",
+#      QUERIES,
+#      key='q_drop_down', on_change=set_question)
+st.markdown("<h5> Answer with GPT's Internal Knowledge </h5>", unsafe_allow_html=True)
 placeholder_plain_gpt = st.empty()
+st.text(" ")
+st.text(" ")
+st.markdown(f"<h5> Answer with {st.session_state['query_type']} </h5>", unsafe_allow_html=True)
 placeholder_retrieval_augmented = st.empty()
 if st.session_state.get('query') and run_pressed:
     input = st.session_state['query']
+    with st.spinner('Loading pipelines... \n This may take a few mins and might also fail if OpenAI API server is down.'):
+        p1 = get_plain_pipeline()
+    with st.spinner('Fetching answers from GPT\'s internal knowledge... '
+                    '\n This may take a few mins and might also fail if OpenAI API server is down.'):
+        answers = p1.run(input)
     placeholder_plain_gpt.markdown(answers['results'][0])
     if st.session_state.get("query_type", "Retrieval Augmented") == "Retrieval Augmented":
+        with st.spinner(
+                'Loading Retrieval Augmented pipeline... \
+                n This may take a few mins and might also fail if OpenAI API server is down.'):
+            p2 = get_retrieval_augmented_pipeline()
+        with st.spinner('Fetching relevant documents from documented stores and calculating answers... '
+                        '\n This may take a few mins and might also fail if OpenAI API server is down.'):
+            answers_2 = p2.run(input)
     else:
+        p3 = get_web_retrieval_augmented_pipeline()
         answers_2 = p3.run(input)
     placeholder_retrieval_augmented.markdown(answers_2['results'][0])

backend_utils.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import os
 import streamlit as st
 from haystack import Pipeline
 from haystack.document_stores import FAISSDocumentStore
@@ -15,14 +13,8 @@ QUERIES = [
     "When did SVB collapse?"
 ]
-def ChangeWidgetFontSize(wgt_txt, wch_font_size = '12px'):
-    htmlstr = """<script>var elements = window.parent.document.querySelectorAll('*'), i;
-                    for (i = 0; i < elements.length; ++i) { if (elements[i].innerText == |wgt_txt|)
-                        { elements[i].style.fontSize='""" + wch_font_size + """';} } </script>  """
-    htmlstr = htmlstr.replace('|wgt_txt|', "'" + wgt_txt + "'")
 def get_plain_pipeline():
     prompt_open_ai = PromptModel(model_name_or_path="text-davinci-003", api_key=st.secrets["OPENAI_API_KEY"])
     # Now let make one PromptNode use the default model and the other one the OpenAI model:
@@ -33,6 +25,7 @@ def get_plain_pipeline():
     return pipeline
 def get_retrieval_augmented_pipeline():
     ds = FAISSDocumentStore(faiss_index_path="data/my_faiss_index.faiss",
                             faiss_config_path="data/my_faiss_index.json")
@@ -62,6 +55,7 @@ def get_retrieval_augmented_pipeline():
     return pipeline
 def get_web_retrieval_augmented_pipeline():
     search_key = st.secrets["WEBRET_API_KEY"]
     web_retriever = WebRetriever(api_key=search_key, search_engine_provider="SerperDev")
@@ -82,13 +76,16 @@ def get_web_retrieval_augmented_pipeline():
     return pipeline
-@st.cache_resource(show_spinner=False)
-def app_init():
-    os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
-    p1 = get_plain_pipeline()
-    p2 = get_retrieval_augmented_pipeline()
-    p3 = get_web_retrieval_augmented_pipeline()
-    return p1, p2, p3
 if 'query' not in st.session_state:
@@ -117,4 +114,3 @@ def set_q4():
 def set_q5():
     st.session_state['query'] = QUERIES[4]

 import streamlit as st
 from haystack import Pipeline
 from haystack.document_stores import FAISSDocumentStore
     "When did SVB collapse?"
 ]
+@st.cache_resource(show_spinner=False)
 def get_plain_pipeline():
     prompt_open_ai = PromptModel(model_name_or_path="text-davinci-003", api_key=st.secrets["OPENAI_API_KEY"])
     # Now let make one PromptNode use the default model and the other one the OpenAI model:
     return pipeline
+@st.cache_resource(show_spinner=False)
 def get_retrieval_augmented_pipeline():
     ds = FAISSDocumentStore(faiss_index_path="data/my_faiss_index.faiss",
                             faiss_config_path="data/my_faiss_index.json")
     return pipeline
+@st.cache_resource(show_spinner=False)
 def get_web_retrieval_augmented_pipeline():
     search_key = st.secrets["WEBRET_API_KEY"]
     web_retriever = WebRetriever(api_key=search_key, search_engine_provider="SerperDev")
     return pipeline
+# @st.cache_resource(show_spinner=False)
+# def app_init():
+#     print("Loading Pipelines...")
+#     p1 = get_plain_pipeline()
+#     print("Loaded Plain Pipeline")
+#     p2 = get_retrieval_augmented_pipeline()
+#     print("Loaded Retrieval Augmented Pipeline")
+#     p3 = get_web_retrieval_augmented_pipeline()
+#     print("Loaded Web Retrieval Augmented Pipeline")
+#     return p1, p2, p3
 if 'query' not in st.session_state:
 def set_q5():
     st.session_state['query'] = QUERIES[4]