Spaces:
Runtime error
Runtime error
| from abc import ABC, abstractmethod | |
| from io import BytesIO | |
| from pathlib import Path | |
| from typing import Iterable, Optional, Set, Union | |
| from docling_core.types.doc import BoundingBox, Size | |
| from PIL import Image | |
| from docling.backend.abstract_backend import PaginatedDocumentBackend | |
| from docling.datamodel.base_models import Cell, InputFormat | |
| from docling.datamodel.document import InputDocument | |
| class PdfPageBackend(ABC): | |
| def get_text_in_rect(self, bbox: BoundingBox) -> str: | |
| pass | |
| def get_text_cells(self) -> Iterable[Cell]: | |
| pass | |
| def get_bitmap_rects(self, float: int = 1) -> Iterable[BoundingBox]: | |
| pass | |
| def get_page_image( | |
| self, scale: float = 1, cropbox: Optional[BoundingBox] = None | |
| ) -> Image.Image: | |
| pass | |
| def get_size(self) -> Size: | |
| pass | |
| def is_valid(self) -> bool: | |
| pass | |
| def unload(self): | |
| pass | |
| class PdfDocumentBackend(PaginatedDocumentBackend): | |
| def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]): | |
| super().__init__(in_doc, path_or_stream) | |
| if self.input_format is not InputFormat.PDF: | |
| if self.input_format is InputFormat.IMAGE: | |
| buf = BytesIO() | |
| img = Image.open(self.path_or_stream) | |
| img.save(buf, "PDF") | |
| buf.seek(0) | |
| self.path_or_stream = buf | |
| else: | |
| raise RuntimeError( | |
| f"Incompatible file format {self.input_format} was passed to a PdfDocumentBackend." | |
| ) | |
| def load_page(self, page_no: int) -> PdfPageBackend: | |
| pass | |
| def page_count(self) -> int: | |
| pass | |
| def supported_formats(cls) -> Set[InputFormat]: | |
| return {InputFormat.PDF} | |
| def supports_pagination(cls) -> bool: | |
| return True | |