Spaces:
No application file
No application file
import pytest | |
from langchain.schema import Document | |
def test_load_data(loader, mocker): | |
mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader") | |
mocked_pypdfloader.return_value.load_and_split.return_value = [ | |
Document(page_content="Page 0 Content", metadata={"source": "example.pdf", "page": 0}), | |
Document(page_content="Page 1 Content", metadata={"source": "example.pdf", "page": 1}), | |
] | |
mock_sha256 = mocker.patch("embedchain.loaders.docs_site_loader.hashlib.sha256") | |
doc_id = "mocked_hash" | |
mock_sha256.return_value.hexdigest.return_value = doc_id | |
result = loader.load_data("dummy_url") | |
assert result["doc_id"] is doc_id | |
assert result["data"] == [ | |
{"content": "Page 0 Content", "meta_data": {"source": "example.pdf", "page": 0, "url": "dummy_url"}}, | |
{"content": "Page 1 Content", "meta_data": {"source": "example.pdf", "page": 1, "url": "dummy_url"}}, | |
] | |
def test_load_data_fails_to_find_data(loader, mocker): | |
mocked_pypdfloader = mocker.patch("embedchain.loaders.pdf_file.PyPDFLoader") | |
mocked_pypdfloader.return_value.load_and_split.return_value = [] | |
with pytest.raises(ValueError): | |
loader.load_data("dummy_url") | |
def loader(): | |
from embedchain.loaders.pdf_file import PdfFileLoader | |
return PdfFileLoader() | |