Spaces:

bohmian
/

esg_countries_chatbot

Running

App Files Files Community

bohmian commited on Feb 15, 2024

Commit

72d76ec

verified ·

1 Parent(s): bfef8be

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -36,6 +36,7 @@ from web_scrape_and_pdf_loader import (
     pdf_loader_local
 )
 import glob
 # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
@@ -604,28 +605,21 @@ if page == "Scrape or Upload Own Docs":
         options=options
     )
-    with st.container(border = True):
         st.subheader(f"Selected Option: {option}")
-        if option == options[0]:
             uploaded_pdf = st.file_uploader("Upload a PDF")
             if uploaded_pdf:
                 temp_file = "./temp.pdf"
                 with open(temp_file, "wb") as file:
                     file.write(uploaded_pdf.getvalue())
                     pdf_filename, = uploaded_pdf.name
-                raw_pdf_documents = pdf_loader_local(pdf_filename, country)
-                # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
-                # can be used to override existing vectorstore for this country in sidebar document configuration
-                setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country)
-                # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
-                # can be used to override existing vectorstore for this country in sidebar document configuration
-                setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country)
-        if option == options[1]:
             n_search_results = st.number_input(
                                         "How many DuckDuckGo search results would you like to scrape?",
                                         0, 20,
@@ -635,15 +629,21 @@ if page == "Scrape or Upload Own Docs":
                                         "Search Term",
                                         value = f"{country_scrape_upload} sustainability esg newest updated public policy document government",
                                     )
-            all_links, df_links = duckduckgo_scrape(country, search_term, n_search_results)
-            all_documents = process_links_load_documents(all_links)
-    # # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
-    # # can be used to override existing vectorstore for this country in sidebar document configuration
-    # setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country)
-    # # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
-    # # can be used to override existing vectorstore for this country in sidebar document configuration
-    # setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country)

     pdf_loader_local
 )
+# look for new retrievers that user created (to override existing ones if user chooses)
 import glob
 # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
         options=options
     )
+    if option == options[0]:
         st.subheader(f"Selected Option: {option}")
+        with st.form(key='upload_pdf_form'):
             uploaded_pdf = st.file_uploader("Upload a PDF")
             if uploaded_pdf:
                 temp_file = "./temp.pdf"
                 with open(temp_file, "wb") as file:
                     file.write(uploaded_pdf.getvalue())
                     pdf_filename, = uploaded_pdf.name
+            submit_upload_pdf = st.form_submit_button(label='Submit')
+    if option == options[1]:
+        st.subheader(f"Selected Option: {option}")
+        with st.form(key='upload_pdf_form'):
             n_search_results = st.number_input(
                                         "How many DuckDuckGo search results would you like to scrape?",
                                         0, 20,
                                         "Search Term",
                                         value = f"{country_scrape_upload} sustainability esg newest updated public policy document government",
                                     )
+            submit_scrape_web = st.form_submit_button(label='Submit')
+    if submit_upload_pdf | submit_scrape_web:
+        if submit_upload_pdf:
+            all_documents = pdf_loader_local(pdf_filename, country)
+        if submit_scrape_web:
+            all_links, df_links = duckduckgo_scrape(country, search_term, n_search_results)
+            all_documents = process_links_load_documents(all_links)
+        # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
+        # can be used to override existing vectorstore for this country in sidebar document configuration
+        setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country)
+        # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
+        # can be used to override existing vectorstore for this country in sidebar document configuration
+        setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country)