Spaces:

KatGaw
/

airbnb_new_space

Runtime error

App Files Files Community

KatGaw commited on Jun 25, 2024

Commit

aa08d55

1 Parent(s): 81ab76b

adding new files

Browse files

Files changed (1) hide show

app.py +29 -9

app.py CHANGED Viewed

@@ -88,16 +88,36 @@ for chunk in split_chunks:
     max_chunk_length = max(max_chunk_length, tiktoken_len(chunk.page_content))
 # Embeddings and Vector store
-qdrant_vectorstore = FAISS.from_documents(
-    split_chunks,
-    embedding_model,
-    location=":memory:",
-    collection_name="airbnb 10k filings",
-)
 print("Loaded Vectorstore")
 # Ste up ur retriever using LangChain
-qdrant_retriever = qdrant_vectorstore.as_retriever()
 @cl.on_chat_start
 async def init():
@@ -115,7 +135,7 @@ async def init():
         # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
         # "question" : populated by getting the value of the "question" key
         # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
-        {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
         # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
         #              by getting the value of the "context" key from the previous step
         | RunnablePassthrough.assign(context=itemgetter("context"))
@@ -127,7 +147,7 @@ async def init():
     # cl.user_session.set("retrieval_augmented_qa_chain", retrieval_augmented_qa_chain)
     # lcel_rag_chain = (
-    #     {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
     #     | rag_prompt | openai_chat_model
     # )
     cl.user_session.set("lcel_rag_chain", lcel_rag_chain)

     max_chunk_length = max(max_chunk_length, tiktoken_len(chunk.page_content))
 # Embeddings and Vector store
+# qdrant_vectorstore = FAISS.from_documents(
+#     split_chunks,
+#     embedding_model,
+#     location=":memory:",
+#     collection_name="airbnb 10k filings",
+# )
+if os.path.exists("./data/vectorstore"):
+    vectorstore = FAISS.load_local(
+        "./data/vectorstore",
+        embedding_model,
+        allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
+    )
+    retriever = vectorstore.as_retriever()
+    print("Loaded Vectorstore")
+else:
+    print("Indexing Files")
+    os.makedirs("./data/vectorstore", exist_ok=True)
+    for i in range(0, len(split_chunks), 32):
+        if i == 0:
+            vectorstore = FAISS.from_documents(split_chunks[i:i+32], embedding_model)
+            continue
+        vectorstore.add_documents(split_chunks[i:i+32])
+    vectorstore.save_local("./data/vectorstore")
 print("Loaded Vectorstore")
 # Ste up ur retriever using LangChain
+retriever = vectorstore.as_retriever()
 @cl.on_chat_start
 async def init():
         # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
         # "question" : populated by getting the value of the "question" key
         # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
+        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
         # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
         #              by getting the value of the "context" key from the previous step
         | RunnablePassthrough.assign(context=itemgetter("context"))
     # cl.user_session.set("retrieval_augmented_qa_chain", retrieval_augmented_qa_chain)
     # lcel_rag_chain = (
+    #     {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
     #     | rag_prompt | openai_chat_model
     # )
     cl.user_session.set("lcel_rag_chain", lcel_rag_chain)