from src.rag import CustomAgglomerativeSplitter, FaissDB import argparse from dotenv import load_dotenv import os load_dotenv() def main(path_to_dataset: str, path_to_index: str): splitter = CustomAgglomerativeSplitter(emb_model=os.getenv("OPENAI_EMBEDDINGS_MODEL")) documents = splitter.read_and_split(path_to_dataset) faiss_db = FaissDB(emb_model=os.getenv("OPENAI_EMBEDDINGS_MODEL")) faiss_db.init_index(documents) faiss_db.save_index(path_to_index) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--path_to_dataset", type=str, required=True) parser.add_argument("--path_to_index", type=str, required=True) args = parser.parse_args() main(args.path_to_dataset, args.path_to_index)