Spaces:
Running
Running
| """ | |
| Unit tests for the file manager module. | |
| """ | |
| import io | |
| from typing import Any | |
| import pytest | |
| from slidedeckai.helpers import file_manager | |
| class _FakePage: | |
| def __init__(self, text: str) -> None: | |
| self._text = text | |
| def extract_text(self) -> str: | |
| return self._text | |
| class _FakePdf: | |
| def __init__(self, pages_text: list[str]) -> None: | |
| self.pages = [_FakePage(t) for t in pages_text] | |
| def _make_fake_pdf_reader(pages_text: list[str]) -> Any: | |
| """Return a callable that behaves like PdfReader when called with a file. | |
| The returned object will have a .pages attribute with page objects that | |
| implement extract_text(). This lets tests avoid creating real PDF | |
| binaries and keeps tests deterministic. | |
| """ | |
| def _reader(_fileobj: Any) -> _FakePdf: | |
| return _FakePdf(pages_text) | |
| return _reader | |
| def test_get_pdf_contents_single_page(monkeypatch: pytest.MonkeyPatch) -> None: | |
| """get_pdf_contents should return the text for a single-page PDF when | |
| page_range end is None. | |
| """ | |
| fake_texts = ['Page one text'] | |
| monkeypatch.setattr( | |
| file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts) | |
| ) | |
| # When start == end, validate_page_range returns (start, None) — emulate | |
| # that contract here and exercise get_pdf_contents handling of end=None. | |
| result = file_manager.get_pdf_contents( | |
| pdf_file=io.BytesIO(b'pdf'), | |
| page_range=(1, None) | |
| ) | |
| assert result == 'Page one text' | |
| def test_get_pdf_contents_multi_page_range(monkeypatch: pytest.MonkeyPatch) -> None: | |
| """get_pdf_contents should concatenate text from multiple pages in the | |
| provided range. | |
| """ | |
| fake_texts = ['First', 'Second', 'Third'] | |
| monkeypatch.setattr( | |
| file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts) | |
| ) | |
| # Request pages 1..2 (inclusive). Internally the function iterates from | |
| # start-1 up to end (exclusive), so passing (1, 2) should return First + Second | |
| result = file_manager.get_pdf_contents( | |
| pdf_file=io.BytesIO(b'pdf'), | |
| page_range=(1, 2) | |
| ) | |
| assert result == 'FirstSecond' | |
| def test_validate_page_range_various( | |
| monkeypatch: pytest.MonkeyPatch, start: int, end: int, expected: tuple[int, Any] | |
| ) -> None: | |
| """validate_page_range should correctly normalize start/end values and | |
| return (start, None) when the constrained range is a single page. | |
| """ | |
| fake_texts = ['A', 'B', 'C'] | |
| monkeypatch.setattr( | |
| file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts) | |
| ) | |
| result = file_manager.validate_page_range( | |
| pdf_file=io.BytesIO(b'pdf'), | |
| start=start, | |
| end=end | |
| ) | |
| assert result == expected | |
| def test_validate_page_range_two_page_return(monkeypatch: pytest.MonkeyPatch) -> None: | |
| """When the validated range spans multiple pages, validate_page_range | |
| should return the clamped (start, end) pair with end not None. | |
| """ | |
| fake_texts = ['A', 'B', 'C', 'D'] | |
| monkeypatch.setattr( | |
| file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts) | |
| ) | |
| # start=2 end=3 should be unchanged and returned as (2, 3) | |
| result = file_manager.validate_page_range( | |
| pdf_file=io.BytesIO(b'pdf'), | |
| start=2, | |
| end=3 | |
| ) | |
| assert result == (2, 3) | |
| def test_get_pdf_contents_handles_empty_page_text(monkeypatch: pytest.MonkeyPatch) -> None: | |
| """Pages may return empty strings; get_pdf_contents should concatenate | |
| them without failing. | |
| """ | |
| fake_texts = ['', 'Line two', ''] | |
| monkeypatch.setattr( | |
| file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts) | |
| ) | |
| result = file_manager.get_pdf_contents(pdf_file=io.BytesIO(b"pdf"), page_range=(1, 3)) | |
| assert result == 'Line two' | |