| | import gradio as gr |
| | import json |
| | import tempfile |
| | import os |
| | from typing import List, Optional, Literal, Tuple |
| | from PIL import Image |
| |
|
| | import spaces |
| | from pathlib import Path |
| | from visualizer import htrflow_visualizer |
| | from htrflow.volume.volume import Collection |
| | from htrflow.pipeline.pipeline import Pipeline |
| |
|
| |
|
| | DEFAULT_OUTPUT = "alto" |
| | FORMAT_CHOICES = [ |
| | "letter_english", |
| | "letter_swedish", |
| | "spread_english", |
| | "spread_swedish", |
| | ] |
| | FILE_CHOICES = ["txt", "alto", "page", "json"] |
| |
|
| | FormatChoices = Literal[ |
| | "letter_english", "letter_swedish", "spread_english", "spread_swedish" |
| | ] |
| | FileChoices = Literal["txt", "alto", "page", "json"] |
| |
|
| | PIPELINE_CONFIGS = { |
| | "letter_english": { |
| | "steps": [ |
| | { |
| | "step": "Segmentation", |
| | "settings": { |
| | "model": "yolo", |
| | "model_settings": { |
| | "model": "Riksarkivet/yolov9-lines-within-regions-1" |
| | }, |
| | "generation_settings": {"batch_size": 8}, |
| | }, |
| | }, |
| | { |
| | "step": "TextRecognition", |
| | "settings": { |
| | "model": "TrOCR", |
| | "model_settings": {"model": "microsoft/trocr-base-handwritten"}, |
| | "generation_settings": {"batch_size": 16}, |
| | }, |
| | }, |
| | {"step": "OrderLines"}, |
| | ] |
| | }, |
| | "letter_swedish": { |
| | "steps": [ |
| | { |
| | "step": "Segmentation", |
| | "settings": { |
| | "model": "yolo", |
| | "model_settings": { |
| | "model": "Riksarkivet/yolov9-lines-within-regions-1" |
| | }, |
| | "generation_settings": {"batch_size": 8}, |
| | }, |
| | }, |
| | { |
| | "step": "TextRecognition", |
| | "settings": { |
| | "model": "TrOCR", |
| | "model_settings": { |
| | "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2" |
| | }, |
| | "generation_settings": {"batch_size": 16}, |
| | }, |
| | }, |
| | {"step": "OrderLines"}, |
| | ] |
| | }, |
| | "spread_english": { |
| | "steps": [ |
| | { |
| | "step": "Segmentation", |
| | "settings": { |
| | "model": "yolo", |
| | "model_settings": {"model": "Riksarkivet/yolov9-regions-1"}, |
| | "generation_settings": {"batch_size": 4}, |
| | }, |
| | }, |
| | { |
| | "step": "Segmentation", |
| | "settings": { |
| | "model": "yolo", |
| | "model_settings": { |
| | "model": "Riksarkivet/yolov9-lines-within-regions-1" |
| | }, |
| | "generation_settings": {"batch_size": 8}, |
| | }, |
| | }, |
| | { |
| | "step": "TextRecognition", |
| | "settings": { |
| | "model": "TrOCR", |
| | "model_settings": {"model": "microsoft/trocr-base-handwritten"}, |
| | "generation_settings": {"batch_size": 16}, |
| | }, |
| | }, |
| | {"step": "ReadingOrderMarginalia", "settings": {"two_page": True}}, |
| | ] |
| | }, |
| | "spread_swedish": { |
| | "steps": [ |
| | { |
| | "step": "Segmentation", |
| | "settings": { |
| | "model": "yolo", |
| | "model_settings": {"model": "Riksarkivet/yolov9-regions-1"}, |
| | "generation_settings": {"batch_size": 4}, |
| | }, |
| | }, |
| | { |
| | "step": "Segmentation", |
| | "settings": { |
| | "model": "yolo", |
| | "model_settings": { |
| | "model": "Riksarkivet/yolov9-lines-within-regions-1" |
| | }, |
| | "generation_settings": {"batch_size": 8}, |
| | }, |
| | }, |
| | { |
| | "step": "TextRecognition", |
| | "settings": { |
| | "model": "TrOCR", |
| | "model_settings": { |
| | "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2" |
| | }, |
| | "generation_settings": {"batch_size": 16}, |
| | }, |
| | }, |
| | {"step": "ReadingOrderMarginalia", "settings": {"two_page": True}}, |
| | ] |
| | }, |
| | } |
| |
|
| |
|
| | @spaces.GPU |
| | def _process_htr_pipeline( |
| | image_path: str, document_type: FormatChoices, custom_settings: Optional[str] = None |
| | ) -> Collection: |
| | """Process HTR pipeline and return the processed collection.""" |
| |
|
| | if not image_path: |
| | raise ValueError("No image provided") |
| |
|
| | if custom_settings: |
| | try: |
| | config = json.loads(custom_settings) |
| | except json.JSONDecodeError: |
| | raise ValueError("Invalid JSON in custom_settings parameter") |
| | else: |
| | config = PIPELINE_CONFIGS[document_type] |
| |
|
| | collection = Collection([image_path]) |
| | pipeline = Pipeline.from_config(config) |
| |
|
| | try: |
| | processed_collection = pipeline.run(collection) |
| | return processed_collection |
| | except Exception as pipeline_error: |
| | raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}") |
| |
|
| |
|
| | def htr_text( |
| | image_path: str, |
| | document_type: FormatChoices = "letter_swedish", |
| | custom_settings: Optional[str] = None, |
| | ) -> str: |
| | """Extract text from handwritten documents using HTR. |
| | |
| | returns: |
| | str: Extracted text from the image. |
| | """ |
| | try: |
| | processed_collection = _process_htr_pipeline( |
| | image_path, document_type, custom_settings |
| | ) |
| | extracted_text = extract_text_from_collection(processed_collection) |
| | return extracted_text |
| |
|
| | except Exception as e: |
| | return f"HTR text extraction failed: {str(e)}" |
| |
|
| |
|
| | def htrflow_file( |
| | image_path: str, |
| | document_type: FormatChoices = "letter_swedish", |
| | output_format: FileChoices = DEFAULT_OUTPUT, |
| | custom_settings: Optional[str] = None, |
| | server_name: str = "https://gabriel-htrflow-mcp.hf.space", |
| | ) -> str: |
| | """ |
| | Process HTR and return a formatted file for download. |
| | |
| | Returns: |
| | str: File path for direct download via gr.File (server_name/gradio_api/file=/tmp/gradio/{temp_folder}/{file_name}) |
| | """ |
| | try: |
| | original_filename = Path(image_path).stem or "output" |
| |
|
| | processed_collection = _process_htr_pipeline( |
| | image_path, document_type, custom_settings |
| | ) |
| |
|
| | temp_dir = Path(tempfile.mkdtemp()) |
| | export_dir = temp_dir / output_format |
| | processed_collection.save(directory=str(export_dir), serializer=output_format) |
| |
|
| | output_file_path = None |
| | for root, _, files in os.walk(export_dir): |
| | for file in files: |
| | old_path = os.path.join(root, file) |
| | file_ext = Path(file).suffix |
| | new_filename = ( |
| | f"{original_filename}.{output_format}" |
| | if not file_ext |
| | else f"{original_filename}{file_ext}" |
| | ) |
| | new_path = os.path.join(root, new_filename) |
| | os.rename(old_path, new_path) |
| | output_file_path = new_path |
| | break |
| |
|
| | if output_file_path and os.path.exists(output_file_path): |
| | return output_file_path |
| | else: |
| | return None |
| |
|
| | except Exception as e: |
| | return None |
| |
|
| |
|
| | def extract_text_from_collection(collection: Collection) -> str: |
| | text_lines = [] |
| | for page in collection.pages: |
| | for node in page.traverse(): |
| | if hasattr(node, "text") and node.text: |
| | text_lines.append(node.text) |
| | return "\n".join(text_lines) |
| |
|
| |
|
| | def create_htrflow_mcp_server(): |
| | htr_text_interface = gr.Interface( |
| | fn=htr_text, |
| | inputs=[ |
| | gr.Image(type="filepath", label="Upload Image or Enter URL"), |
| | gr.Dropdown( |
| | choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type" |
| | ), |
| | gr.Textbox( |
| | label="Custom Settings (JSON)", |
| | placeholder="Optional custom pipeline settings", |
| | value="", |
| | ), |
| | ], |
| | outputs=[gr.Textbox(label="Extracted Text", lines=10)], |
| | description="Extract plain text from handwritten documents using HTR", |
| | api_name="htr_text", |
| | ) |
| |
|
| | htrflow_file_interface = gr.Interface( |
| | fn=htrflow_file, |
| | inputs=[ |
| | gr.Image(type="filepath", label="Upload Image or Enter URL"), |
| | gr.Dropdown( |
| | choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type" |
| | ), |
| | gr.Dropdown( |
| | choices=FILE_CHOICES, value=DEFAULT_OUTPUT, label="Output Format" |
| | ), |
| | gr.Textbox( |
| | label="Custom Settings (JSON)", |
| | placeholder="Optional custom pipeline settings", |
| | value="", |
| | ), |
| | gr.Textbox( |
| | label="Server Name", |
| | value="https://gabriel-htrflow-mcp.hf.space", |
| | placeholder="Server URL for download links", |
| | ), |
| | ], |
| | outputs=[gr.File(label="Download HTR Output File")], |
| | description="Process handwritten text and get formatted file (ALTO XML, PAGE XML, JSON, or TXT)", |
| | api_name="htrflow_file", |
| | ) |
| |
|
| | htrflow_viz = gr.Interface( |
| | fn=htrflow_visualizer, |
| | inputs=[ |
| | gr.Image(type="filepath", label="Upload Original Image"), |
| | gr.File(label="Upload ALTO/PAGE XML File"), |
| | gr.Textbox( |
| | label="Server Name", |
| | value="https://gabriel-htrflow-mcp.hf.space", |
| | placeholder="Server URL for download links", |
| | ), |
| | ], |
| | outputs=gr.File(label="Download Visualization Image"), |
| | description="Visualize HTR results by overlaying text regions and polygons on the original image", |
| | api_name="htrflow_visualizer", |
| | ) |
| |
|
| | demo = gr.TabbedInterface( |
| | [htr_text_interface, htrflow_file_interface, htrflow_viz], |
| | ["HTR Text", "HTR File", "HTR Visualizer"], |
| | title="HTRflow Handwritten Text Recognition", |
| | ) |
| |
|
| | return demo |
| |
|
| |
|
| | if __name__ == "__main__": |
| | demo = create_htrflow_mcp_server() |
| | demo.launch(mcp_server=True, share=False, debug=False) |
| |
|