|
|
import logging |
|
|
|
|
|
from docling.backend.abstract_backend import ( |
|
|
AbstractDocumentBackend, |
|
|
DeclarativeDocumentBackend, |
|
|
) |
|
|
from docling.datamodel.base_models import ConversionStatus |
|
|
from docling.datamodel.document import ConversionResult |
|
|
from docling.datamodel.pipeline_options import PipelineOptions |
|
|
from docling.pipeline.base_pipeline import BasePipeline |
|
|
from docling.utils.profiling import ProfilingScope, TimeRecorder |
|
|
|
|
|
_log = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class SimplePipeline(BasePipeline): |
|
|
"""SimpleModelPipeline. |
|
|
|
|
|
This class is used at the moment for formats / backends |
|
|
which produce straight DoclingDocument output. |
|
|
""" |
|
|
|
|
|
def __init__(self, pipeline_options: PipelineOptions): |
|
|
super().__init__(pipeline_options) |
|
|
|
|
|
def _build_document(self, conv_res: ConversionResult) -> ConversionResult: |
|
|
|
|
|
if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend): |
|
|
raise RuntimeError( |
|
|
f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. " |
|
|
f"Can not convert this with simple pipeline. " |
|
|
f"Please check your format configuration on DocumentConverter." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT): |
|
|
conv_res.document = conv_res.input._backend.convert() |
|
|
return conv_res |
|
|
|
|
|
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: |
|
|
|
|
|
|
|
|
|
|
|
return ConversionStatus.SUCCESS |
|
|
|
|
|
@classmethod |
|
|
def get_default_options(cls) -> PipelineOptions: |
|
|
return PipelineOptions() |
|
|
|
|
|
@classmethod |
|
|
def is_backend_supported(cls, backend: AbstractDocumentBackend): |
|
|
return isinstance(backend, DeclarativeDocumentBackend) |
|
|
|