bohmian commited on
Commit
eb5e46b
1 Parent(s): 14dde8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -180,28 +180,28 @@ def get_embeddings():
180
 
181
  hf_embeddings = get_embeddings()
182
 
 
 
 
 
 
 
183
  if not os.path.exists("bm25.zip"):
184
- with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps'):
185
  os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
186
 
187
  if not os.path.exists("chromadb.zip"):
188
- with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps'):
189
  os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
190
 
191
  if not os.path.exists("bm25/"):
192
- with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps'):
193
  os.system("unzip bm25.zip")
194
 
195
  if not os.path.exists("chromadb/"):
196
- with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps'):
197
  os.system("unzip chromadb.zip")
198
 
199
-
200
- # Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps,
201
- # to save time when experimenting as the embeddings take a long time to generate.
202
- # The existing stores will be pulled using !wget above when app starts. When using the existing vector stores,
203
- # just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
204
- # Not in this notebook: Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
205
  persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
206
  with st.spinner(f'Setting up pre-built chroma vector store'):
207
  chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)
 
180
 
181
  hf_embeddings = get_embeddings()
182
 
183
+ # Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps, and zipped up,
184
+ # to save time when experimenting as the embeddings take a long time to generate.
185
+ # The existing stores will be pulled using from google drive above when app starts. When using the existing vector stores,
186
+ # just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
187
+ # Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
188
+ # This step will take some time
189
  if not os.path.exists("bm25.zip"):
190
+ with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps, will take some time'):
191
  os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
192
 
193
  if not os.path.exists("chromadb.zip"):
194
+ with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps, will take some time'):
195
  os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
196
 
197
  if not os.path.exists("bm25/"):
198
+ with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps, will take some time'):
199
  os.system("unzip bm25.zip")
200
 
201
  if not os.path.exists("chromadb/"):
202
+ with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps, will take some time'):
203
  os.system("unzip chromadb.zip")
204
 
 
 
 
 
 
 
205
  persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
206
  with st.spinner(f'Setting up pre-built chroma vector store'):
207
  chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)