tasal9
/

Multilingual-ZamAI-Embeddings

@@ -1,44 +1,44 @@
-"""
-ZamAI Document Indexer
-This script helps add new documents to the embedding vector database.
-"""
-import os
-import argparse
-from llama_index.readers.file import SimpleDirectoryReader
-from setup import setup_embedding_model
-def index_documents(corpus_path, db_path=None):
-    """
-    Index documents from the specified corpus path into the vector database.
-    Args:
-        corpus_path: Path to the directory containing documents to index
-        db_path: Optional custom path for the ChromaDB database
-    """
-    if not os.path.exists(corpus_path):
-        print(f"Error: Directory {corpus_path} does not exist.")
-        return
-    if not os.listdir(corpus_path):
-        print(f"Error: No files found in {corpus_path}")
-        return
-    # Set up embedding model and components
-    db_path = db_path or "./models/embeddings/chroma_db"
-    embedding_components = setup_embedding_model(corpus_path=corpus_path, db_path=db_path)
-    print(f"Successfully indexed documents from {corpus_path}")
-    print(f"Vector database stored at {db_path}")
-    # Return the components if needed for further processing
-    return embedding_components
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Index documents for ZamAI embeddings")
-    parser.add_argument("--corpus", type=str, default="data/text_corpus/",
-                        help="Path to the directory containing documents to index")
-    parser.add_argument("--db", type=str, default=None,
-                        help="Path to store the ChromaDB database (optional)")
-    args = parser.parse_args()
-    index_documents(args.corpus, args.db)

+"""
+ZamAI Document Indexer
+This script helps add new documents to the embedding vector database.
+"""
+import os
+import argparse
+from llama_index.readers.file import SimpleDirectoryReader
+from setup import setup_embedding_model
+def index_documents(corpus_path, db_path=None):
+    """
+    Index documents from the specified corpus path into the vector database.
+    Args:
+        corpus_path: Path to the directory containing documents to index
+        db_path: Optional custom path for the ChromaDB database
+    """
+    if not os.path.exists(corpus_path):
+        print(f"Error: Directory {corpus_path} does not exist.")
+        return
+    if not os.listdir(corpus_path):
+        print(f"Error: No files found in {corpus_path}")
+        return
+    # Set up embedding model and components
+    db_path = db_path or "./models/embeddings/chroma_db"
+    embedding_components = setup_embedding_model(corpus_path=corpus_path, db_path=db_path)
+    print(f"Successfully indexed documents from {corpus_path}")
+    print(f"Vector database stored at {db_path}")
+    # Return the components if needed for further processing
+    return embedding_components
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Index documents for ZamAI embeddings")
+    parser.add_argument("--corpus", type=str, default="data/text_corpus/",
+                        help="Path to the directory containing documents to index")
+    parser.add_argument("--db", type=str, default=None,
+                        help="Path to store the ChromaDB database (optional)")
+    args = parser.parse_args()
+    index_documents(args.corpus, args.db)