from langchain.document_loaders import PyPDFLoader, DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import Chroma loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200) texts = text_splitter.split_documents(documents) embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-mpnet-base-dot-v1") persist_directory = "ipc_vector_data" db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)