Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -631,6 +631,17 @@ if page == "Scrape or Upload Own Docs":
|
|
631 |
submit_scrape_web = False
|
632 |
submit_scrape_vector_store = False
|
633 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
634 |
# form for user to configure pdf loading options
|
635 |
if option == options[0]:
|
636 |
with st.form(key='upload_pdf_form'):
|
@@ -641,7 +652,13 @@ if page == "Scrape or Upload Own Docs":
|
|
641 |
with open(temp_file, "wb") as file:
|
642 |
file.write(uploaded_pdf.getvalue())
|
643 |
pdf_filename, = uploaded_pdf.name
|
644 |
-
submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
|
646 |
# form for user to configure web scraping for duckduckgo
|
647 |
if option == options[1]:
|
@@ -667,31 +684,9 @@ if page == "Scrape or Upload Own Docs":
|
|
667 |
st.write(df_links)
|
668 |
except:
|
669 |
st.write("Waiting for web scraping results.")
|
670 |
-
submit_scrape_vector_store = st.form_submit_button(label='Create Vector Store from Search Results')
|
671 |
-
|
672 |
-
def get_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
|
673 |
-
with st.spinner('Setting up new bm25 retrievers with documents, can take very long...'):
|
674 |
-
# vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
675 |
-
# can be used to override existing vectorstore for this country in sidebar document configuration
|
676 |
-
setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
677 |
-
|
678 |
-
|
679 |
-
with st.spinner('Setting up new chromadb vectores with documents, can take 5 mins and above...'):
|
680 |
-
# vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
681 |
-
# can be used to override existing vectorstore for this country in sidebar document configuration
|
682 |
-
setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
683 |
-
|
684 |
-
if (submit_upload_pdf | submit_scrape_vector_store):
|
685 |
-
if submit_upload_pdf:
|
686 |
-
with st.spinner('Generating documents from PDF...'):
|
687 |
-
all_documents = pdf_loader_local(pdf_filename, country_scrape_upload)
|
688 |
-
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
689 |
-
|
690 |
-
if submit_scrape_vector_store:
|
691 |
-
with st.spinner('Generating documents from web search results...'):
|
692 |
-
all_documents = process_links_load_documents(all_links)
|
693 |
-
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
|
|
|
|
|
|
|
|
|
|
631 |
submit_scrape_web = False
|
632 |
submit_scrape_vector_store = False
|
633 |
|
634 |
+
def get_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
|
635 |
+
with st.spinner('Setting up new bm25 retrievers with documents, can take very long...'):
|
636 |
+
# vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
637 |
+
# can be used to override existing vectorstore for this country in sidebar document configuration
|
638 |
+
setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
639 |
+
|
640 |
+
with st.spinner('Setting up new chromadb vectores with documents, can take 5 mins and above...'):
|
641 |
+
# vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
642 |
+
# can be used to override existing vectorstore for this country in sidebar document configuration
|
643 |
+
setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
644 |
+
|
645 |
# form for user to configure pdf loading options
|
646 |
if option == options[0]:
|
647 |
with st.form(key='upload_pdf_form'):
|
|
|
652 |
with open(temp_file, "wb") as file:
|
653 |
file.write(uploaded_pdf.getvalue())
|
654 |
pdf_filename, = uploaded_pdf.name
|
655 |
+
submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
|
656 |
+
|
657 |
+
if submit_upload_pdf:
|
658 |
+
with st.spinner('Generating documents from PDF...'):
|
659 |
+
all_documents = pdf_loader_local(pdf_filename, country_scrape_upload)
|
660 |
+
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
661 |
+
|
662 |
|
663 |
# form for user to configure web scraping for duckduckgo
|
664 |
if option == options[1]:
|
|
|
684 |
st.write(df_links)
|
685 |
except:
|
686 |
st.write("Waiting for web scraping results.")
|
687 |
+
submit_scrape_vector_store = st.form_submit_button(label='Create Vector Store from Search Results')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
688 |
|
689 |
+
if submit_scrape_vector_store:
|
690 |
+
with st.spinner('Generating documents from web search results...'):
|
691 |
+
all_documents = process_links_load_documents(all_links)
|
692 |
+
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|