Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import GutenbergLoader | |
# class GutenbergEpubLoader(UnstructuredEPubLoader): | |
# """Load Project Gutenberg EPub files end remove boilerplate text and add metadata.""" | |
# def preprocess_documents(self, documents: list[Document]) -> list[Document]: | |
# """Remove boilerplate text and add metadata to the documents.""" | |
# for doc in tqdm(documents): | |
# doc.page_content = remove_boilerplate(doc.page_content) | |
# doc.metadata.update(get_metadata_from_text(doc.page_content)) | |
# return documents | |