Spaces:
Sleeping
Sleeping
File size: 605 Bytes
3b4f6eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from langchain_community.document_loaders import GutenbergLoader
# class GutenbergEpubLoader(UnstructuredEPubLoader):
# """Load Project Gutenberg EPub files end remove boilerplate text and add metadata."""
# def preprocess_documents(self, documents: list[Document]) -> list[Document]:
# """Remove boilerplate text and add metadata to the documents."""
# for doc in tqdm(documents):
# doc.page_content = remove_boilerplate(doc.page_content)
# doc.metadata.update(get_metadata_from_text(doc.page_content))
# return documents
|