Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -632,15 +632,15 @@ if page == "Scrape or Upload Own Docs":
|
|
632 |
submit_scrape_vector_store = False
|
633 |
|
634 |
def get_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
|
635 |
-
with st.spinner('Setting up new bm25 retrievers with documents, can take very long...'):
|
636 |
# vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
637 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
638 |
-
|
639 |
|
640 |
-
with st.spinner('Setting up new chromadb vectores with documents, can take 5 mins and above...'):
|
641 |
# vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
642 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
643 |
-
|
644 |
|
645 |
# form for user to configure pdf loading options
|
646 |
if option == options[0]:
|
@@ -655,8 +655,8 @@ if page == "Scrape or Upload Own Docs":
|
|
655 |
submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
|
656 |
|
657 |
if submit_upload_pdf:
|
658 |
-
with st.spinner('Generating documents from PDF...'):
|
659 |
-
|
660 |
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
661 |
|
662 |
|
@@ -687,7 +687,7 @@ if page == "Scrape or Upload Own Docs":
|
|
687 |
submit_scrape_vector_store = st.form_submit_button(label='Create Vector Store from Search Results')
|
688 |
|
689 |
if submit_scrape_vector_store:
|
690 |
-
with st.spinner('Generating documents from web search results...'):
|
691 |
-
|
692 |
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
693 |
st.write("Done.")
|
|
|
632 |
submit_scrape_vector_store = False
|
633 |
|
634 |
def get_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
|
635 |
+
#with st.spinner('Setting up new bm25 retrievers with documents, can take very long...'):
|
636 |
# vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
637 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
638 |
+
setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
639 |
|
640 |
+
#with st.spinner('Setting up new chromadb vectores with documents, can take 5 mins and above...'):
|
641 |
# vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
|
642 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
643 |
+
setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
644 |
|
645 |
# form for user to configure pdf loading options
|
646 |
if option == options[0]:
|
|
|
655 |
submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
|
656 |
|
657 |
if submit_upload_pdf:
|
658 |
+
#with st.spinner('Generating documents from PDF...'):
|
659 |
+
all_documents = pdf_loader_local(pdf_filename, country_scrape_upload)
|
660 |
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
661 |
|
662 |
|
|
|
687 |
submit_scrape_vector_store = st.form_submit_button(label='Create Vector Store from Search Results')
|
688 |
|
689 |
if submit_scrape_vector_store:
|
690 |
+
#with st.spinner('Generating documents from web search results...'):
|
691 |
+
all_documents = process_links_load_documents(all_links)
|
692 |
get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
|
693 |
st.write("Done.")
|