bohmian commited on
Commit
72d76ec
·
verified ·
1 Parent(s): bfef8be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -36,6 +36,7 @@ from web_scrape_and_pdf_loader import (
36
  pdf_loader_local
37
  )
38
 
 
39
  import glob
40
 
41
  # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
@@ -604,28 +605,21 @@ if page == "Scrape or Upload Own Docs":
604
  options=options
605
  )
606
 
607
- with st.container(border = True):
608
  st.subheader(f"Selected Option: {option}")
609
- if option == options[0]:
610
-
611
  uploaded_pdf = st.file_uploader("Upload a PDF")
612
  if uploaded_pdf:
613
  temp_file = "./temp.pdf"
614
  with open(temp_file, "wb") as file:
615
  file.write(uploaded_pdf.getvalue())
616
  pdf_filename, = uploaded_pdf.name
617
-
618
- raw_pdf_documents = pdf_loader_local(pdf_filename, country)
619
-
620
- # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
621
- # can be used to override existing vectorstore for this country in sidebar document configuration
622
- setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country)
623
-
624
- # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
625
- # can be used to override existing vectorstore for this country in sidebar document configuration
626
- setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country)
627
-
628
- if option == options[1]:
629
  n_search_results = st.number_input(
630
  "How many DuckDuckGo search results would you like to scrape?",
631
  0, 20,
@@ -635,15 +629,21 @@ if page == "Scrape or Upload Own Docs":
635
  "Search Term",
636
  value = f"{country_scrape_upload} sustainability esg newest updated public policy document government",
637
  )
 
638
 
639
- all_links, df_links = duckduckgo_scrape(country, search_term, n_search_results)
640
- all_documents = process_links_load_documents(all_links)
641
 
642
- # # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
643
- # # can be used to override existing vectorstore for this country in sidebar document configuration
644
- # setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country)
 
 
 
645
 
646
- # # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
647
- # # can be used to override existing vectorstore for this country in sidebar document configuration
648
- # setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country)
649
 
 
 
 
 
36
  pdf_loader_local
37
  )
38
 
39
+ # look for new retrievers that user created (to override existing ones if user chooses)
40
  import glob
41
 
42
  # os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'your_api_key' # for using HuggingFace Inference API
 
605
  options=options
606
  )
607
 
608
+ if option == options[0]:
609
  st.subheader(f"Selected Option: {option}")
610
+ with st.form(key='upload_pdf_form'):
611
+
612
  uploaded_pdf = st.file_uploader("Upload a PDF")
613
  if uploaded_pdf:
614
  temp_file = "./temp.pdf"
615
  with open(temp_file, "wb") as file:
616
  file.write(uploaded_pdf.getvalue())
617
  pdf_filename, = uploaded_pdf.name
618
+ submit_upload_pdf = st.form_submit_button(label='Submit')
619
+
620
+ if option == options[1]:
621
+ st.subheader(f"Selected Option: {option}")
622
+ with st.form(key='upload_pdf_form'):
 
 
 
 
 
 
 
623
  n_search_results = st.number_input(
624
  "How many DuckDuckGo search results would you like to scrape?",
625
  0, 20,
 
629
  "Search Term",
630
  value = f"{country_scrape_upload} sustainability esg newest updated public policy document government",
631
  )
632
+ submit_scrape_web = st.form_submit_button(label='Submit')
633
 
634
+ if submit_upload_pdf | submit_scrape_web:
 
635
 
636
+ if submit_upload_pdf:
637
+ all_documents = pdf_loader_local(pdf_filename, country)
638
+
639
+ if submit_scrape_web:
640
+ all_links, df_links = duckduckgo_scrape(country, search_term, n_search_results)
641
+ all_documents = process_links_load_documents(all_links)
642
 
643
+ # vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
644
+ # can be used to override existing vectorstore for this country in sidebar document configuration
645
+ setup_chromadb_vectorstore(all_documents, chunk_size, chunk_overlap, country)
646
 
647
+ # vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}"
648
+ # can be used to override existing vectorstore for this country in sidebar document configuration
649
+ setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country)