Spaces:

bohmian
/

esg_countries_chatbot

Sleeping

bohmian commited on Feb 14, 2024

Commit

2360df8

verified ·

1 Parent(s): dfe22da

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -155,6 +155,7 @@ countries = [
     "Germany",
     ]
 @st.cache_data # only going to get once
 def get_llm(temp = st.session_state['temperature'], tokens = st.session_state['max_new_tokens']):
     # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
@@ -179,6 +180,23 @@ def get_embeddings():
 hf_embeddings = get_embeddings()
 # Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps,
 # to save time when experimenting as the embeddings take a long time to generate.
 # The existing stores will be pulled using !wget above when app starts. When using the existing vector stores,

     "Germany",
     ]
 @st.cache_data # only going to get once
 def get_llm(temp = st.session_state['temperature'], tokens = st.session_state['max_new_tokens']):
     # This is an inference endpoint API from huggingface, the model is not run locally, it is run on huggingface
 hf_embeddings = get_embeddings()
+if not os.path.exists("bm25.zip"):
+    with st.spinner(f'Downloading bm25 retriever for all chunk sizes and overlaps'):
+        os.system("gdown https://drive.google.com/uc?id=1q-hNnyyBA8tKyF3vR69nkwCk9kJj7WHi")
+if not os.path.exists("chromadb.zip"):
+    with st.spinner(f'Downloading chromadb retrievers for all chunk sizes and overlaps'):
+        os.system("gdown https://drive.google.com/uc?id=1zad6tgYm2o5M9E2dTLQqmm6GoI8kxNC3")
+if not os.path.exists("bm25/"):
+    with st.spinner(f'Unzipping bm25 retriever for all chunk sizes and overlaps'):
+        os.system("unzip bm25.zip")
+if not os.path.exists("chromadb/"):
+    with st.spinner(f'Unzipping chromadb retrievers for all chunk sizes and overlaps'):
+        os.system("unzip chromadb.zip")
 # Chromadb vector stores have already been pre-created for the countries above for each of the different chunk sizes and overlaps,
 # to save time when experimenting as the embeddings take a long time to generate.
 # The existing stores will be pulled using !wget above when app starts. When using the existing vector stores,