Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -273,12 +273,15 @@ def check_for_new_retrievers():
|
|
273 |
# see if retrievers/vector stores created by user's own uploaded PDF or newly scraped data is found
|
274 |
new_documents_chroma = glob.glob("chromadb/new*")
|
275 |
new_documents_bm25 = glob.glob("bm25/new*")
|
|
|
|
|
276 |
new_countries = []
|
277 |
-
|
278 |
# loop through new docs in chroma retrievers created by user scraping/pdf (if any)
|
279 |
try:
|
280 |
for doc in new_documents_chroma:
|
281 |
-
|
|
|
282 |
|
283 |
new_doc_country = doc.split('_')[1]
|
284 |
new_doc_chunk_size = doc.split('_')[3]
|
@@ -643,7 +646,7 @@ if page == "Scrape or Upload Own Docs":
|
|
643 |
# how user wishes to populate documents
|
644 |
options = [
|
645 |
"Upload Own PDF",
|
646 |
-
"Automatically Scrape Web Data using DuckDuckGo (
|
647 |
]
|
648 |
|
649 |
option = st.radio(
|
@@ -657,12 +660,12 @@ if page == "Scrape or Upload Own Docs":
|
|
657 |
|
658 |
# save new retrievers in local directory
|
659 |
def save_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
|
660 |
-
with st.spinner('Setting up new bm25 retrievers with documents,
|
661 |
# vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}_"
|
662 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
663 |
setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
664 |
|
665 |
-
with st.spinner('Setting up new chromadb vector stores with documents,
|
666 |
# vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}_"
|
667 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
668 |
setup_chromadb_vectorstore(hf_embeddings, all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
@@ -682,7 +685,7 @@ if page == "Scrape or Upload Own Docs":
|
|
682 |
with open(temp_file, "wb") as file:
|
683 |
file.write(uploaded_pdf.getvalue())
|
684 |
pdf_filename = uploaded_pdf.name
|
685 |
-
submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store')
|
686 |
st.markdown(":blue[NOTE:] After you are done creating the vectore store, the country will appear under :blue[Countries to Override in the 'Document Config'] section of the left sidebar. Select the country to override it.")
|
687 |
|
688 |
|
|
|
273 |
# see if retrievers/vector stores created by user's own uploaded PDF or newly scraped data is found
|
274 |
new_documents_chroma = glob.glob("chromadb/new*")
|
275 |
new_documents_bm25 = glob.glob("bm25/new*")
|
276 |
+
new_documents_chroma = [os.path.split(doc)[-1] for doc in new_documents_chroma]
|
277 |
+
new_documents_bm25 = [os.path.split(doc)[-1] for doc in new_documents_bm25]
|
278 |
new_countries = []
|
279 |
+
|
280 |
# loop through new docs in chroma retrievers created by user scraping/pdf (if any)
|
281 |
try:
|
282 |
for doc in new_documents_chroma:
|
283 |
+
#print(doc)
|
284 |
+
if ((doc + ".pickle") in new_documents_bm25): # check that the doc also exists for bm25 retriever
|
285 |
|
286 |
new_doc_country = doc.split('_')[1]
|
287 |
new_doc_chunk_size = doc.split('_')[3]
|
|
|
646 |
# how user wishes to populate documents
|
647 |
options = [
|
648 |
"Upload Own PDF",
|
649 |
+
"Automatically Scrape Web Data using DuckDuckGo (may take more than 5 mins)"
|
650 |
]
|
651 |
|
652 |
option = st.radio(
|
|
|
660 |
|
661 |
# save new retrievers in local directory
|
662 |
def save_new_retrievers(all_documents, chunk_size, chunk_overlap, country_scrape_upload):
|
663 |
+
with st.spinner('Setting up new bm25 retrievers with documents, may take more than 5 mins...'):
|
664 |
# vectorstore for this country will be stored in "bm25/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}_"
|
665 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
666 |
setup_bm25_retriever(all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
667 |
|
668 |
+
with st.spinner('Setting up new chromadb vector stores with documents, may take more than 5 mins...'):
|
669 |
# vectorstore for this country will be stored in "chroma_db/new_{country}_chunk_{chunk_size}_overlap_{chunk_overlap}_"
|
670 |
# can be used to override existing vectorstore for this country in sidebar document configuration
|
671 |
setup_chromadb_vectorstore(hf_embeddings, all_documents, chunk_size, chunk_overlap, country_scrape_upload)
|
|
|
685 |
with open(temp_file, "wb") as file:
|
686 |
file.write(uploaded_pdf.getvalue())
|
687 |
pdf_filename = uploaded_pdf.name
|
688 |
+
submit_upload_pdf = st.form_submit_button(label='Upload and Create Vector Store (Scroll down after clicking)')
|
689 |
st.markdown(":blue[NOTE:] After you are done creating the vectore store, the country will appear under :blue[Countries to Override in the 'Document Config'] section of the left sidebar. Select the country to override it.")
|
690 |
|
691 |
|