open-notebook / loader.py
avinash
updated ui2
e792350
raw
history blame contribute delete
350 Bytes
# loader.py
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
def load_and_split_pdf(pdf_path):
loader = PyPDFLoader(pdf_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
return splitter.split_documents(docs)