Spaces:

adelevett
/

docling_pp_layout_demo

Running on Zero

App Files Files Community

docling_pp_layout_demo / app.py

adelevett

Upload 2 files

8ac770e verified 2 days ago

raw

history blame contribute delete

5.18 kB

	# ---------------------------------------------------------------------------
	# Force-upgrade transformers to >=5.1.0 before any other import.
	#
	# Why: PP-DocLayoutV3's custom model classes (PPDocLayoutV3ImageProcessor,
	# PPDocLayoutV3ForObjectDetection) were added to the transformers library in
	# version 5.1.0. docling-ibm-models caps transformers<5.0.0 (conservative
	# pinning), so pip resolves transformers ~4.x at build time. We upgrade it
	# here at runtime, before any docling/transformers import, so the correct
	# classes are available. docling-ibm-models' usage (AutoModel, pipeline API)
	# remains compatible with transformers 5.x.
	# ---------------------------------------------------------------------------
	import subprocess
	import sys

	subprocess.run(
	[
	sys.executable, "-m", "pip", "install",
	"transformers>=5.1.0",
	"--quiet",
	],
	check=True,
	)

	# `spaces` MUST be imported before any package that touches CUDA (torch,
	# transformers, docling …). ZeroGPU intercepts the CUDA initialisation; if
	# anything else triggers it first the import raises RuntimeError.
	import spaces # noqa: E402

	# ---------------------------------------------------------------------------
	# Plugin registration
	# ---------------------------------------------------------------------------
	# docling-pp-doc-layout requires Python >=3.12 on PyPI, but the code itself
	# is compatible with Python 3.10 (all annotations are guarded by
	# `from __future__ import annotations`). Instead of installing the package,
	# we bundle the source directly and register the model with docling's factory
	# by monkey-patching BaseFactory.load_from_plugins so that every new
	# LayoutFactory instance automatically includes PPDocLayoutV3Model.
	from docling.models.factories.base_factory import BaseFactory
	from docling.models.factories.layout_factory import LayoutFactory
	from docling_pp_doc_layout.model import PPDocLayoutV3Model

	_orig_load = BaseFactory.load_from_plugins


	def _load_with_pp_doc_layout(
	self, plugin_name=None, allow_external_plugins=False
	):
	_orig_load(
	self,
	plugin_name=plugin_name,
	allow_external_plugins=allow_external_plugins,
	)
	if isinstance(self, LayoutFactory):
	try:
	self.register(
	PPDocLayoutV3Model,
	"docling-pp-doc-layout",
	"docling_pp_doc_layout.model",
	)
	except ValueError:
	pass # already registered on a previous factory creation


	BaseFactory.load_from_plugins = _load_with_pp_doc_layout

	# ---------------------------------------------------------------------------
	import gradio as gr
	from docling.datamodel.base_models import InputFormat
	from docling.document_converter import DocumentConverter, PdfFormatOption
	from docling.datamodel.pipeline_options import PdfPipelineOptions
	from docling_pp_doc_layout.options import PPDocLayoutV3Options

	# Global initialisation — pipeline is constructed lazily on the first
	# convert() call, which happens inside @spaces.GPU, so decide_device()
	# correctly resolves "cuda:0" when the H200 is allocated.
	pipeline_options = PdfPipelineOptions(
	layout_options=PPDocLayoutV3Options(
	batch_size=2,
	confidence_threshold=0.5,
	)
	)

	converter = DocumentConverter(
	format_options={
	InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
	}
	)


	@spaces.GPU(duration=120)
	def infer_layout(file_path: str \| None):
	if not file_path:
	return {"error": "No file uploaded"}, None
	try:
	result = converter.convert(file_path)
	structured_data = []
	for item, _level in result.document.iterate_items():
	structured_data.append({
	"type": type(item).__name__,
	"content": getattr(item, "text", "No text mapping"),
	})
	# Write to a temp file so Gradio can serve it as a download.
	import json, tempfile, os
	tmp = tempfile.NamedTemporaryFile(
	mode="w", suffix=".json", delete=False, encoding="utf-8"
	)
	json.dump(structured_data, tmp, ensure_ascii=False, indent=2)
	tmp.close()
	return structured_data, tmp.name
	except Exception as e:
	return {"runtime_exception": str(e)}, None


	with gr.Blocks(title="PP-DocLayoutV3 Empirical Parser") as interface:
	gr.Markdown(
	"## Layout Detection Inference\n"
	"Upload a PDF to parse structural components through the "
	"PaddlePaddle PP-DocLayoutV3 model."
	)
	with gr.Row():
	pdf_input = gr.File(label="Source Document", file_types=[".pdf"])
	json_output = gr.JSON(label="Structured Extraction Matrix")
	download_btn = gr.DownloadButton(label="Download JSON", visible=False)
	execute_btn = gr.Button("Run Layout Detection")

	def run_and_reveal(file_path):
	data, path = infer_layout(file_path)
	return data, gr.DownloadButton(value=path, visible=path is not None)

	execute_btn.click(
	fn=run_and_reveal,
	inputs=pdf_input,
	outputs=[json_output, download_btn],
	)

	if __name__ == "__main__":
	interface.launch()