KatGaw commited on
Commit
aa08d55
·
1 Parent(s): 81ab76b

adding new files

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -88,16 +88,36 @@ for chunk in split_chunks:
88
  max_chunk_length = max(max_chunk_length, tiktoken_len(chunk.page_content))
89
 
90
  # Embeddings and Vector store
91
- qdrant_vectorstore = FAISS.from_documents(
92
- split_chunks,
93
- embedding_model,
94
- location=":memory:",
95
- collection_name="airbnb 10k filings",
96
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  print("Loaded Vectorstore")
98
 
99
  # Ste up ur retriever using LangChain
100
- qdrant_retriever = qdrant_vectorstore.as_retriever()
101
 
102
  @cl.on_chat_start
103
  async def init():
@@ -115,7 +135,7 @@ async def init():
115
  # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
116
  # "question" : populated by getting the value of the "question" key
117
  # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
118
- {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
119
  # "context" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
120
  # by getting the value of the "context" key from the previous step
121
  | RunnablePassthrough.assign(context=itemgetter("context"))
@@ -127,7 +147,7 @@ async def init():
127
  # cl.user_session.set("retrieval_augmented_qa_chain", retrieval_augmented_qa_chain)
128
 
129
  # lcel_rag_chain = (
130
- # {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
131
  # | rag_prompt | openai_chat_model
132
  # )
133
  cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
 
88
  max_chunk_length = max(max_chunk_length, tiktoken_len(chunk.page_content))
89
 
90
  # Embeddings and Vector store
91
+ # qdrant_vectorstore = FAISS.from_documents(
92
+ # split_chunks,
93
+ # embedding_model,
94
+ # location=":memory:",
95
+ # collection_name="airbnb 10k filings",
96
+ # )
97
+
98
+ if os.path.exists("./data/vectorstore"):
99
+ vectorstore = FAISS.load_local(
100
+ "./data/vectorstore",
101
+ embedding_model,
102
+ allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
103
+ )
104
+ retriever = vectorstore.as_retriever()
105
+ print("Loaded Vectorstore")
106
+ else:
107
+ print("Indexing Files")
108
+ os.makedirs("./data/vectorstore", exist_ok=True)
109
+ for i in range(0, len(split_chunks), 32):
110
+ if i == 0:
111
+ vectorstore = FAISS.from_documents(split_chunks[i:i+32], embedding_model)
112
+ continue
113
+ vectorstore.add_documents(split_chunks[i:i+32])
114
+ vectorstore.save_local("./data/vectorstore")
115
+
116
+
117
  print("Loaded Vectorstore")
118
 
119
  # Ste up ur retriever using LangChain
120
+ retriever = vectorstore.as_retriever()
121
 
122
  @cl.on_chat_start
123
  async def init():
 
135
  # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
136
  # "question" : populated by getting the value of the "question" key
137
  # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
138
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
139
  # "context" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
140
  # by getting the value of the "context" key from the previous step
141
  | RunnablePassthrough.assign(context=itemgetter("context"))
 
147
  # cl.user_session.set("retrieval_augmented_qa_chain", retrieval_augmented_qa_chain)
148
 
149
  # lcel_rag_chain = (
150
+ # {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
151
  # | rag_prompt | openai_chat_model
152
  # )
153
  cl.user_session.set("lcel_rag_chain", lcel_rag_chain)