Spaces:
Sleeping
Sleeping
from src.rag import CustomAgglomerativeSplitter, FaissDB | |
import argparse | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() | |
def main(path_to_dataset: str, path_to_index: str): | |
splitter = CustomAgglomerativeSplitter(emb_model=os.getenv("OPENAI_EMBEDDINGS_MODEL")) | |
documents = splitter.read_and_split(path_to_dataset) | |
faiss_db = FaissDB(emb_model=os.getenv("OPENAI_EMBEDDINGS_MODEL")) | |
faiss_db.init_index(documents) | |
faiss_db.save_index(path_to_index) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--path_to_dataset", type=str, required=True) | |
parser.add_argument("--path_to_index", type=str, required=True) | |
args = parser.parse_args() | |
main(args.path_to_dataset, args.path_to_index) | |