bohmian commited on
Commit
e809ac3
1 Parent(s): 953fb9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -28
app.py CHANGED
@@ -631,6 +631,17 @@ if page == "Scrape or Upload Own Docs":
631
  submit_scrape_web = False
632
  submit_scrape_vector_store = False
633
 
 
 
 
 
 
 
 
 
 
 
 
634
  # form for user to configure pdf loading options
635
  if option == options[0]:
636
  with st.form(key='upload_pdf_form'):
@@ -641,7 +652,13 @@ if page == "Scrape or Upload Own Docs":
641
  with open(temp_file, "wb") as file:
642
  file.write(uploaded_pdf.getvalue())
643
  pdf_filename, = uploaded_pdf.name
644
- submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
 
 
 
 
 
 
645
 
646
  # form for user to configure web scraping for duckduckgo
647
  if option == options[1]:
@@ -667,31 +684,9 @@ if page == "Scrape or Upload Own Docs":
667
  st.write(df_links)
668
  except:
669
  st.write("Waiting for web scraping results.")
670
- submit_scrape_vector_store = st.form_submit_button(label='Create Vector Store from Search Results')
671
-
672
- def get_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
673
- with st.spinner('Setting up new bm25 retrievers with documents, can take very long...'):
674
- # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
675
- # can be used to override existing vectorstore for this country in sidebar document configuration
676
- setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
677
-
678
-
679
- with st.spinner('Setting up new chromadb vectores with documents, can take 5 mins and above...'):
680
- # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
681
- # can be used to override existing vectorstore for this country in sidebar document configuration
682
- setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
683
-
684
- if (submit_upload_pdf | submit_scrape_vector_store):
685
- if submit_upload_pdf:
686
- with st.spinner('Generating documents from PDF...'):
687
- all_documents = pdf_loader_local(pdf_filename, country_scrape_upload)
688
- get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
689
-
690
- if submit_scrape_vector_store:
691
- with st.spinner('Generating documents from web search results...'):
692
- all_documents = process_links_load_documents(all_links)
693
- get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
694
-
695
-
696
-
697
 
 
 
 
 
 
631
  submit_scrape_web = False
632
  submit_scrape_vector_store = False
633
 
634
+ def get_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
635
+ with st.spinner('Setting up new bm25 retrievers with documents, can take very long...'):
636
+ # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
637
+ # can be used to override existing vectorstore for this country in sidebar document configuration
638
+ setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
639
+
640
+ with st.spinner('Setting up new chromadb vectores with documents, can take 5 mins and above...'):
641
+ # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
642
+ # can be used to override existing vectorstore for this country in sidebar document configuration
643
+ setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
644
+
645
  # form for user to configure pdf loading options
646
  if option == options[0]:
647
  with st.form(key='upload_pdf_form'):
 
652
  with open(temp_file, "wb") as file:
653
  file.write(uploaded_pdf.getvalue())
654
  pdf_filename, = uploaded_pdf.name
655
+ submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
656
+
657
+ if submit_upload_pdf:
658
+ with st.spinner('Generating documents from PDF...'):
659
+ all_documents = pdf_loader_local(pdf_filename, country_scrape_upload)
660
+ get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)
661
+
662
 
663
  # form for user to configure web scraping for duckduckgo
664
  if option == options[1]:
 
684
  st.write(df_links)
685
  except:
686
  st.write("Waiting for web scraping results.")
687
+ submit_scrape_vector_store = st.form_submit_button(label='Create Vector Store from Search Results')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688
 
689
+ if submit_scrape_vector_store:
690
+ with st.spinner('Generating documents from web search results...'):
691
+ all_documents = process_links_load_documents(all_links)
692
+ get_new_retrievers(all_documents, st.session_state['chunk_size'], st.session_state['chunk_overlap'], country_scrape_upload)