Spaces:
Running on Zero
Running on Zero
| # --------------------------------------------------------------------------- | |
| # Force-upgrade transformers to >=5.1.0 before any other import. | |
| # | |
| # Why: PP-DocLayoutV3's custom model classes (PPDocLayoutV3ImageProcessor, | |
| # PPDocLayoutV3ForObjectDetection) were added to the transformers library in | |
| # version 5.1.0. docling-ibm-models caps transformers<5.0.0 (conservative | |
| # pinning), so pip resolves transformers ~4.x at build time. We upgrade it | |
| # here at runtime, before any docling/transformers import, so the correct | |
| # classes are available. docling-ibm-models' usage (AutoModel, pipeline API) | |
| # remains compatible with transformers 5.x. | |
| # --------------------------------------------------------------------------- | |
| import subprocess | |
| import sys | |
| subprocess.run( | |
| [ | |
| sys.executable, "-m", "pip", "install", | |
| "transformers>=5.1.0", | |
| "--quiet", | |
| ], | |
| check=True, | |
| ) | |
| # `spaces` MUST be imported before any package that touches CUDA (torch, | |
| # transformers, docling …). ZeroGPU intercepts the CUDA initialisation; if | |
| # anything else triggers it first the import raises RuntimeError. | |
| import spaces # noqa: E402 | |
| # --------------------------------------------------------------------------- | |
| # Plugin registration | |
| # --------------------------------------------------------------------------- | |
| # docling-pp-doc-layout requires Python >=3.12 on PyPI, but the code itself | |
| # is compatible with Python 3.10 (all annotations are guarded by | |
| # `from __future__ import annotations`). Instead of installing the package, | |
| # we bundle the source directly and register the model with docling's factory | |
| # by monkey-patching BaseFactory.load_from_plugins so that every new | |
| # LayoutFactory instance automatically includes PPDocLayoutV3Model. | |
| from docling.models.factories.base_factory import BaseFactory | |
| from docling.models.factories.layout_factory import LayoutFactory | |
| from docling_pp_doc_layout.model import PPDocLayoutV3Model | |
| _orig_load = BaseFactory.load_from_plugins | |
| def _load_with_pp_doc_layout( | |
| self, plugin_name=None, allow_external_plugins=False | |
| ): | |
| _orig_load( | |
| self, | |
| plugin_name=plugin_name, | |
| allow_external_plugins=allow_external_plugins, | |
| ) | |
| if isinstance(self, LayoutFactory): | |
| try: | |
| self.register( | |
| PPDocLayoutV3Model, | |
| "docling-pp-doc-layout", | |
| "docling_pp_doc_layout.model", | |
| ) | |
| except ValueError: | |
| pass # already registered on a previous factory creation | |
| BaseFactory.load_from_plugins = _load_with_pp_doc_layout | |
| # --------------------------------------------------------------------------- | |
| import gradio as gr | |
| from docling.datamodel.base_models import InputFormat | |
| from docling.document_converter import DocumentConverter, PdfFormatOption | |
| from docling.datamodel.pipeline_options import PdfPipelineOptions | |
| from docling_pp_doc_layout.options import PPDocLayoutV3Options | |
| # Global initialisation — pipeline is constructed lazily on the first | |
| # convert() call, which happens inside @spaces.GPU, so decide_device() | |
| # correctly resolves "cuda:0" when the H200 is allocated. | |
| pipeline_options = PdfPipelineOptions( | |
| layout_options=PPDocLayoutV3Options( | |
| batch_size=2, | |
| confidence_threshold=0.5, | |
| ) | |
| ) | |
| converter = DocumentConverter( | |
| format_options={ | |
| InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) | |
| } | |
| ) | |
| def infer_layout(file_path: str | None): | |
| if not file_path: | |
| return {"error": "No file uploaded"}, None | |
| try: | |
| result = converter.convert(file_path) | |
| structured_data = [] | |
| for item, _level in result.document.iterate_items(): | |
| structured_data.append({ | |
| "type": type(item).__name__, | |
| "content": getattr(item, "text", "No text mapping"), | |
| }) | |
| # Write to a temp file so Gradio can serve it as a download. | |
| import json, tempfile, os | |
| tmp = tempfile.NamedTemporaryFile( | |
| mode="w", suffix=".json", delete=False, encoding="utf-8" | |
| ) | |
| json.dump(structured_data, tmp, ensure_ascii=False, indent=2) | |
| tmp.close() | |
| return structured_data, tmp.name | |
| except Exception as e: | |
| return {"runtime_exception": str(e)}, None | |
| with gr.Blocks(title="PP-DocLayoutV3 Empirical Parser") as interface: | |
| gr.Markdown( | |
| "## Layout Detection Inference\n" | |
| "Upload a PDF to parse structural components through the " | |
| "PaddlePaddle PP-DocLayoutV3 model." | |
| ) | |
| with gr.Row(): | |
| pdf_input = gr.File(label="Source Document", file_types=[".pdf"]) | |
| json_output = gr.JSON(label="Structured Extraction Matrix") | |
| download_btn = gr.DownloadButton(label="Download JSON", visible=False) | |
| execute_btn = gr.Button("Run Layout Detection") | |
| def run_and_reveal(file_path): | |
| data, path = infer_layout(file_path) | |
| return data, gr.DownloadButton(value=path, visible=path is not None) | |
| execute_btn.click( | |
| fn=run_and_reveal, | |
| inputs=pdf_input, | |
| outputs=[json_output, download_btn], | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() |