Spaces:
Running
Running
| """Tests for bbox coordinate normalization. | |
| These tests cover the core bbox pipeline — the most critical part of the | |
| visual rendering. Every edge case matters because a broken bbox means | |
| misaligned overlays in the UI. | |
| """ | |
| import pytest | |
| from docling_core.types.doc.base import BoundingBox, CoordOrigin | |
| from infra.bbox import EMPTY_BBOX, to_topleft_list | |
| # --------------------------------------------------------------------------- | |
| # Standard conversions | |
| # --------------------------------------------------------------------------- | |
| class TestToTopleftListStandard: | |
| """Normal bbox conversions (happy path).""" | |
| def test_topleft_origin_unchanged(self): | |
| """TOPLEFT bbox should pass through unchanged.""" | |
| bbox = BoundingBox(l=10, t=20, r=100, b=80, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == [10, 20, 100, 80] | |
| def test_bottomleft_origin_converted(self): | |
| """BOTTOMLEFT bbox should have y-coordinates flipped.""" | |
| bbox = BoundingBox(l=50, t=700, r=200, b=600, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| # After conversion: new_t = 792 - 700 = 92, new_b = 792 - 600 = 192 | |
| assert result[0] == 50 # l unchanged | |
| assert result[1] == pytest.approx(92.0) # t = page_height - old_t | |
| assert result[2] == 200 # r unchanged | |
| assert result[3] == pytest.approx(192.0) # b = page_height - old_b | |
| def test_result_has_positive_dimensions(self): | |
| """Converted bbox should always have b > t (positive height).""" | |
| bbox = BoundingBox(l=10, t=500, r=300, b=100, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=800.0) | |
| left, t, r, b = result | |
| assert r > left, "width should be positive" | |
| assert b > t, "height should be positive" | |
| def test_full_page_bbox_bottomleft(self): | |
| """A bbox covering the full page in BOTTOMLEFT origin.""" | |
| bbox = BoundingBox(l=0, t=792, r=612, b=0, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == [0, 0, 612, 792] | |
| def test_full_page_bbox_topleft(self): | |
| """A bbox covering the full page in TOPLEFT origin.""" | |
| bbox = BoundingBox(l=0, t=0, r=612, b=792, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == [0, 0, 612, 792] | |
| # --------------------------------------------------------------------------- | |
| # Page format variations | |
| # --------------------------------------------------------------------------- | |
| class TestPageFormats: | |
| """Verify correct conversion across different page sizes.""" | |
| def test_a4_page(self): | |
| """A4 page (595.28 x 841.89 pt) -- most common non-US format.""" | |
| page_height = 841.89 | |
| bbox = BoundingBox(l=72, t=769.89, r=523.28, b=72, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=page_height) | |
| assert result[0] == 72 | |
| assert result[1] == pytest.approx(page_height - 769.89) # ~72 | |
| assert result[2] == 523.28 | |
| assert result[3] == pytest.approx(page_height - 72) # ~769.89 | |
| def test_a3_page(self): | |
| """A3 page (841.89 x 1190.55 pt).""" | |
| page_height = 1190.55 | |
| bbox = BoundingBox(l=0, t=1190.55, r=841.89, b=0, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=page_height) | |
| assert result == pytest.approx([0, 0, 841.89, 1190.55]) | |
| def test_legal_page(self): | |
| """US Legal page (612 x 1008 pt).""" | |
| page_height = 1008.0 | |
| bbox = BoundingBox(l=50, t=50, r=562, b=958, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=page_height) | |
| assert result == [50, 50, 562, 958] | |
| def test_landscape_page(self): | |
| """Landscape orientation (width > height).""" | |
| page_height = 612.0 # Letter landscape | |
| bbox = BoundingBox(l=100, t=500, r=700, b=100, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=page_height) | |
| left, top, right, bottom = result | |
| assert right > left | |
| assert bottom > top | |
| assert top == pytest.approx(612.0 - 500.0) # 112 | |
| assert bottom == pytest.approx(612.0 - 100.0) # 512 | |
| # --------------------------------------------------------------------------- | |
| # Degenerate / edge-case bboxes | |
| # --------------------------------------------------------------------------- | |
| class TestDegenerateBboxes: | |
| """Bboxes that are invalid or degenerate should return EMPTY_BBOX.""" | |
| def test_zero_width_returns_empty(self): | |
| """A bbox with l == r (zero width) is degenerate.""" | |
| bbox = BoundingBox(l=100, t=20, r=100, b=80, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == EMPTY_BBOX | |
| def test_zero_height_returns_empty(self): | |
| """A bbox with t == b (zero height) is degenerate.""" | |
| bbox = BoundingBox(l=10, t=50, r=100, b=50, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == EMPTY_BBOX | |
| def test_inverted_lr_returns_empty(self): | |
| """A bbox where l > r (inverted x) is degenerate.""" | |
| bbox = BoundingBox(l=200, t=20, r=100, b=80, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == EMPTY_BBOX | |
| def test_inverted_tb_topleft_returns_empty(self): | |
| """A TOPLEFT bbox where t > b (inverted y) is degenerate.""" | |
| bbox = BoundingBox(l=10, t=100, r=200, b=50, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == EMPTY_BBOX | |
| def test_point_bbox_returns_empty(self): | |
| """A zero-area point bbox (l==r, t==b) is degenerate.""" | |
| bbox = BoundingBox(l=100, t=200, r=100, b=200, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == EMPTY_BBOX | |
| def test_empty_bbox_is_not_mutated(self): | |
| """Each call returns a fresh list — no shared mutable state.""" | |
| bbox = BoundingBox(l=100, t=20, r=100, b=80, coord_origin=CoordOrigin.TOPLEFT) | |
| result1 = to_topleft_list(bbox, page_height=792.0) | |
| result2 = to_topleft_list(bbox, page_height=792.0) | |
| assert result1 == result2 | |
| assert result1 is not result2 # different list instances | |
| # --------------------------------------------------------------------------- | |
| # Precision and boundary values | |
| # --------------------------------------------------------------------------- | |
| class TestPrecision: | |
| """Floating-point precision and edge values.""" | |
| def test_very_small_bbox(self): | |
| """A tiny but valid bbox (e.g. a period character).""" | |
| bbox = BoundingBox(l=100.0, t=200.0, r=100.5, b=200.5, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == [100.0, 200.0, 100.5, 200.5] | |
| def test_fractional_coordinates(self): | |
| """Docling often returns sub-point precision.""" | |
| bbox = BoundingBox(l=72.34, t=145.67, r=540.12, b=200.89, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=842.0) | |
| assert result == pytest.approx([72.34, 145.67, 540.12, 200.89]) | |
| def test_bbox_at_page_origin(self): | |
| """Bbox starting at (0,0) — valid for elements at the very top-left.""" | |
| bbox = BoundingBox(l=0, t=0, r=50, b=30, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == [0, 0, 50, 30] | |
| def test_bbox_at_page_bottom_right(self): | |
| """Bbox at the very bottom-right corner of the page.""" | |
| bbox = BoundingBox(l=500, t=750, r=612, b=792, coord_origin=CoordOrigin.TOPLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| assert result == [500, 750, 612, 792] | |
| def test_bottomleft_near_page_edge(self): | |
| """BOTTOMLEFT bbox near the bottom of the page (small y values).""" | |
| bbox = BoundingBox(l=50, t=30, r=200, b=10, coord_origin=CoordOrigin.BOTTOMLEFT) | |
| result = to_topleft_list(bbox, page_height=792.0) | |
| # Converted: top = 792-30 = 762, bottom = 792-10 = 782 | |
| assert result[1] == pytest.approx(762.0) | |
| assert result[3] == pytest.approx(782.0) | |