Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Dec 15, 2025

Commit

e5ea137

verified ·

1 Parent(s): 6d20f65

Update src/rag_engine.py

Browse files

added a function that links the flattened context to the knowledge base

Files changed (1) hide show

src/rag_engine.py +46 -1

src/rag_engine.py CHANGED Viewed

@@ -204,4 +204,49 @@ def reset_knowledge_base(username):
     if os.path.exists(user_db_path):
         shutil.rmtree(user_db_path)
         return True, "Database Reset."
-    return False, "Database already empty."

     if os.path.exists(user_db_path):
         shutil.rmtree(user_db_path)
         return True, "Database Reset."
+    return False, "Database already empty."
+def process_and_add_text(raw_text, source_name, username, strategy="paragraph"):
+    """
+    Directly indexes a raw text string into the user's vector DB.
+    Useful for indexing content generated by the LLM (like flattened notes).
+    """
+    user_db_path = os.path.join(CHROMA_PATH, username)
+    try:
+        if not raw_text or not raw_text.strip():
+            return False, "Content appears empty."
+        # 1. CHUNK TEXT (Reusing the standard logic)
+        chunks = []
+        if strategy == "paragraph":
+            splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+            chunks = splitter.split_text(raw_text)
+        elif strategy == "token":
+            splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=50)
+            chunks = splitter.split_text(raw_text)
+        elif strategy == "page":
+            splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
+            chunks = splitter.split_text(raw_text)
+        # 2. CREATE DOCUMENTS
+        # We append "_flattened" to the source name so you can distinguish it from the original
+        docs = [
+            Document(
+                page_content=chunk,
+                metadata={"source": source_name, "strategy": f"{strategy}-flattened"}
+            )
+            for chunk in chunks
+        ]
+        # 3. INDEX TO CHROMA
+        if docs:
+            emb_fn = get_embedding_func()
+            db = Chroma(persist_directory=user_db_path, embedding_function=emb_fn)
+            db.add_documents(docs)
+            return True, f"Successfully indexed {len(docs)} flattened chunks."
+        else:
+            return False, "No chunks created."
+    except Exception as e:
+        return False, f"Error processing text: {e}"