|
|
|
|
|
|
|
|
|
|
|
from llama_index.core import VectorStoreIndex, Document
|
|
|
from llama_index.core.node_parser import SentenceSplitter
|
|
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
|
import config
|
|
|
import data_processing
|
|
|
import os
|
|
|
|
|
|
def build_vector_store():
|
|
|
"""
|
|
|
Builds and saves a LlamaIndex vector store from the processed documents.
|
|
|
"""
|
|
|
|
|
|
all_docs = data_processing.load_and_process_all()
|
|
|
|
|
|
|
|
|
if not all_docs:
|
|
|
print("No documents were created. Exiting.")
|
|
|
return
|
|
|
|
|
|
|
|
|
llama_documents = all_docs
|
|
|
|
|
|
|
|
|
print(f"Loading embedding model: {config.EMBEDDING_MODEL_NAME}...")
|
|
|
embed_model = HuggingFaceEmbedding(model_name=config.EMBEDDING_MODEL_NAME)
|
|
|
|
|
|
|
|
|
print("Creating the LlamaIndex vector store...")
|
|
|
index = VectorStoreIndex.from_documents(
|
|
|
llama_documents,
|
|
|
embed_model=embed_model,
|
|
|
transformations=[SentenceSplitter(chunk_size=1000, chunk_overlap=150)]
|
|
|
)
|
|
|
|
|
|
|
|
|
print(f"Saving the vector store to: {config.LLAMA_INDEX_STORE_PATH}")
|
|
|
index.storage_context.persist(persist_dir=config.LLAMA_INDEX_STORE_PATH)
|
|
|
print("Vector store built and saved successfully.")
|
|
|
|
|
|
def main():
|
|
|
"""
|
|
|
Main function to build the knowledge base.
|
|
|
"""
|
|
|
|
|
|
if os.path.exists(config.LLAMA_INDEX_STORE_PATH):
|
|
|
print("Vector store already exists. Skipping build process.")
|
|
|
else:
|
|
|
build_vector_store()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|
|
|
|