File size: 4,112 Bytes
690eb5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Unit tests for the file manager module.
"""
import io
from typing import Any

import pytest

from slidedeckai.helpers import file_manager


class _FakePage:
    def __init__(self, text: str) -> None:
        self._text = text

    def extract_text(self) -> str:
        return self._text


class _FakePdf:
    def __init__(self, pages_text: list[str]) -> None:
        self.pages = [_FakePage(t) for t in pages_text]


def _make_fake_pdf_reader(pages_text: list[str]) -> Any:
    """Return a callable that behaves like PdfReader when called with a file.

    The returned object will have a .pages attribute with page objects that
    implement extract_text(). This lets tests avoid creating real PDF
    binaries and keeps tests deterministic.
    """
    def _reader(_fileobj: Any) -> _FakePdf:
        return _FakePdf(pages_text)

    return _reader


def test_get_pdf_contents_single_page(monkeypatch: pytest.MonkeyPatch) -> None:
    """get_pdf_contents should return the text for a single-page PDF when
    page_range end is None.
    """
    fake_texts = ['Page one text']
    monkeypatch.setattr(
        file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
    )

    # When start == end, validate_page_range returns (start, None) — emulate
    # that contract here and exercise get_pdf_contents handling of end=None.
    result = file_manager.get_pdf_contents(
        pdf_file=io.BytesIO(b'pdf'),
        page_range=(1, None)
    )
    assert result == 'Page one text'


def test_get_pdf_contents_multi_page_range(monkeypatch: pytest.MonkeyPatch) -> None:
    """get_pdf_contents should concatenate text from multiple pages in the
    provided range.
    """
    fake_texts = ['First', 'Second', 'Third']
    monkeypatch.setattr(
        file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
    )

    # Request pages 1..2 (inclusive). Internally the function iterates from
    # start-1 up to end (exclusive), so passing (1, 2) should return First + Second
    result = file_manager.get_pdf_contents(
        pdf_file=io.BytesIO(b'pdf'),
        page_range=(1, 2)
    )
    assert result == 'FirstSecond'


@pytest.mark.parametrize(
    'start,end,expected',
    [
        (0, 5, (1, 3)),  # start too small -> clamped to 1; end clamped to n_pages
        (2, 2, (2, None)),  # equal start & end -> end is None
        (10, 1, (1, None)),  # start > end -> start reset to 1
        (1, 100, (1, 3)),  # end too large -> clamped to n_pages
    ],
)
def test_validate_page_range_various(
    monkeypatch: pytest.MonkeyPatch, start: int, end: int, expected: tuple[int, Any]
) -> None:
    """validate_page_range should correctly normalize start/end values and
    return (start, None) when the constrained range is a single page.
    """
    fake_texts = ['A', 'B', 'C']
    monkeypatch.setattr(
        file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
    )
    result = file_manager.validate_page_range(
        pdf_file=io.BytesIO(b'pdf'),
        start=start,
        end=end
    )
    assert result == expected


def test_validate_page_range_two_page_return(monkeypatch: pytest.MonkeyPatch) -> None:
    """When the validated range spans multiple pages, validate_page_range
    should return the clamped (start, end) pair with end not None.
    """
    fake_texts = ['A', 'B', 'C', 'D']
    monkeypatch.setattr(
        file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
    )
    # start=2 end=3 should be unchanged and returned as (2, 3)
    result = file_manager.validate_page_range(
        pdf_file=io.BytesIO(b'pdf'),
        start=2,
        end=3
    )
    assert result == (2, 3)


def test_get_pdf_contents_handles_empty_page_text(monkeypatch: pytest.MonkeyPatch) -> None:
    """Pages may return empty strings; get_pdf_contents should concatenate
    them without failing.
    """
    fake_texts = ['', 'Line two', '']
    monkeypatch.setattr(
        file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
    )

    result = file_manager.get_pdf_contents(pdf_file=io.BytesIO(b"pdf"), page_range=(1, 3))
    assert result == 'Line two'