Spaces:

bohmian
/

esg_countries_chatbot

Sleeping

App Files Files Community

bohmian commited on Feb 14

Commit

eb5e46b

•

1 Parent(s): 14dde8a

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -180,28 +180,28 @@ def get_embeddings():
 hf_embeddings = get_embeddings()
 if not os.path.exists("bm25.zip"):
-    with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps'):
         os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
 if not os.path.exists("chromadb.zip"):
-    with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps'):
         os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
 if not os.path.exists("bm25/"):
-    with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps'):
         os.system("unzip bm25.zip")
 if not os.path.exists("chromadb/"):
-    with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps'):
         os.system("unzip chromadb.zip")
-# Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps,
-# to save time when experimenting as the embeddings take a long time to generate.
-# The existing stores will be pulled using !wget above when app starts. When using the existing vector stores,
-# just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
-# Not in this notebook: Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
 persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
 with st.spinner(f'Setting up pre-built chroma vector store'):
     chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)

 hf_embeddings = get_embeddings()
+# Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps, and zipped up,
+# to save time when experimenting as the embeddings take a long time to generate.
+# The existing stores will be pulled using from google drive above when app starts. When using the existing vector stores,
+# just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
+# Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
+# This step will take some time
 if not os.path.exists("bm25.zip"):
+    with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps, will take some time'):
         os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
 if not os.path.exists("chromadb.zip"):
+    with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps, will take some time'):
         os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
 if not os.path.exists("bm25/"):
+    with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps, will take some time'):
         os.system("unzip bm25.zip")
 if not os.path.exists("chromadb/"):
+    with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps, will take some time'):
         os.system("unzip chromadb.zip")
 persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
 with st.spinner(f'Setting up pre-built chroma vector store'):
     chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)