from langchain.document_loaders import PyPDFDirectoryLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from constants import persist_directory loader = PyPDFDirectoryLoader("docs/") documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ".", "!", ",", " ", ""], keep_separator=True, ) texts = text_splitter.split_documents(documents) embedding = OpenAIEmbeddings() vectordb = Chroma.from_documents( documents=texts, embedding=embedding, persist_directory=persist_directory, ) vectordb.persist()