Spaces:
Runtime error
Runtime error
File size: 1,485 Bytes
58d33f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from pathlib import Path
from langchain.document_loaders import (
PDFMinerLoader,
PyMuPDFLoader,
UnstructuredPDFLoader,
)
def test_unstructured_pdf_loader() -> None:
"""Test unstructured loader."""
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
loader = UnstructuredPDFLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
def test_pdfminer_loader() -> None:
"""Test PDFMiner loader."""
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
loader = PDFMinerLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
loader = PDFMinerLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
def test_pymupdf_loader() -> None:
"""Test PyMuPDF loader."""
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
loader = PyMuPDFLoader(str(file_path))
docs = loader.load()
assert len(docs) == 1
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
loader = PyMuPDFLoader(str(file_path))
docs = loader.load()
assert len(docs) == 16
assert loader.web_path is None
web_path = "https://people.sc.fsu.edu/~jpeterson/hello_world.pdf"
loader = PyMuPDFLoader(web_path)
docs = loader.load()
assert loader.web_path == web_path
assert loader.file_path != web_path
assert len(docs) == 1
|