Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -180,28 +180,28 @@ def get_embeddings():
|
|
180 |
|
181 |
hf_embeddings = get_embeddings()
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
if not os.path.exists("bm25.zip"):
|
184 |
-
with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps'):
|
185 |
os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
|
186 |
|
187 |
if not os.path.exists("chromadb.zip"):
|
188 |
-
with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps'):
|
189 |
os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
|
190 |
|
191 |
if not os.path.exists("bm25/"):
|
192 |
-
with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps'):
|
193 |
os.system("unzip bm25.zip")
|
194 |
|
195 |
if not os.path.exists("chromadb/"):
|
196 |
-
with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps'):
|
197 |
os.system("unzip chromadb.zip")
|
198 |
|
199 |
-
|
200 |
-
# Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps,
|
201 |
-
# to save time when experimenting as the embeddings take a long time to generate.
|
202 |
-
# The existing stores will be pulled using !wget above when app starts. When using the existing vector stores,
|
203 |
-
# just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
|
204 |
-
# Not in this notebook: Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
|
205 |
persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
|
206 |
with st.spinner(f'Setting up pre-built chroma vector store'):
|
207 |
chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)
|
|
|
180 |
|
181 |
hf_embeddings = get_embeddings()
|
182 |
|
183 |
+
# Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps, and zipped up,
|
184 |
+
# to save time when experimenting as the embeddings take a long time to generate.
|
185 |
+
# The existing stores will be pulled using from google drive above when app starts. When using the existing vector stores,
|
186 |
+
# just need to change the name of the persist directory when selecting the different chunk sizes and overlaps.
|
187 |
+
# Later in the main app if the user choose to scrape new data, or override with their own PDF, a new chromadb would be created.
|
188 |
+
# This step will take some time
|
189 |
if not os.path.exists("bm25.zip"):
|
190 |
+
with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps, will take some time'):
|
191 |
os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
|
192 |
|
193 |
if not os.path.exists("chromadb.zip"):
|
194 |
+
with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps, will take some time'):
|
195 |
os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
|
196 |
|
197 |
if not os.path.exists("bm25/"):
|
198 |
+
with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps, will take some time'):
|
199 |
os.system("unzip bm25.zip")
|
200 |
|
201 |
if not os.path.exists("chromadb/"):
|
202 |
+
with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps, will take some time'):
|
203 |
os.system("unzip chromadb.zip")
|
204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
persist_directory = f"chromadb/chromadb_esg_countries_chunk_{st.session_state['chunk_size']}_overlap_{st.session_state['chunk_overlap']}"
|
206 |
with st.spinner(f'Setting up pre-built chroma vector store'):
|
207 |
chroma_db = Chroma(persist_directory=persist_directory,embedding_function=hf_embeddings)
|