NavyDevilDoc commited on
Commit
e5ea137
·
verified ·
1 Parent(s): 6d20f65

Update src/rag_engine.py

Browse files

added a function that links the flattened context to the knowledge base

Files changed (1) hide show
  1. src/rag_engine.py +46 -1
src/rag_engine.py CHANGED
@@ -204,4 +204,49 @@ def reset_knowledge_base(username):
204
  if os.path.exists(user_db_path):
205
  shutil.rmtree(user_db_path)
206
  return True, "Database Reset."
207
- return False, "Database already empty."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  if os.path.exists(user_db_path):
205
  shutil.rmtree(user_db_path)
206
  return True, "Database Reset."
207
+ return False, "Database already empty."
208
+
209
+ def process_and_add_text(raw_text, source_name, username, strategy="paragraph"):
210
+ """
211
+ Directly indexes a raw text string into the user's vector DB.
212
+ Useful for indexing content generated by the LLM (like flattened notes).
213
+ """
214
+ user_db_path = os.path.join(CHROMA_PATH, username)
215
+
216
+ try:
217
+ if not raw_text or not raw_text.strip():
218
+ return False, "Content appears empty."
219
+
220
+ # 1. CHUNK TEXT (Reusing the standard logic)
221
+ chunks = []
222
+ if strategy == "paragraph":
223
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
224
+ chunks = splitter.split_text(raw_text)
225
+ elif strategy == "token":
226
+ splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=50)
227
+ chunks = splitter.split_text(raw_text)
228
+ elif strategy == "page":
229
+ splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
230
+ chunks = splitter.split_text(raw_text)
231
+
232
+ # 2. CREATE DOCUMENTS
233
+ # We append "_flattened" to the source name so you can distinguish it from the original
234
+ docs = [
235
+ Document(
236
+ page_content=chunk,
237
+ metadata={"source": source_name, "strategy": f"{strategy}-flattened"}
238
+ )
239
+ for chunk in chunks
240
+ ]
241
+
242
+ # 3. INDEX TO CHROMA
243
+ if docs:
244
+ emb_fn = get_embedding_func()
245
+ db = Chroma(persist_directory=user_db_path, embedding_function=emb_fn)
246
+ db.add_documents(docs)
247
+ return True, f"Successfully indexed {len(docs)} flattened chunks."
248
+ else:
249
+ return False, "No chunks created."
250
+
251
+ except Exception as e:
252
+ return False, f"Error processing text: {e}"