Spaces:
Runtime error
Runtime error
| import logging | |
| import warnings | |
| from enum import Enum | |
| from pathlib import Path | |
| from typing import Annotated, Optional | |
| import typer | |
| from rich.console import Console | |
| from rich.logging import RichHandler | |
| from docling.datamodel.settings import settings | |
| from docling.utils.model_downloader import download_models | |
| warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch") | |
| warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr") | |
| console = Console() | |
| err_console = Console(stderr=True) | |
| app = typer.Typer( | |
| name="Docling models helper", | |
| no_args_is_help=True, | |
| add_completion=False, | |
| pretty_exceptions_enable=False, | |
| ) | |
| class _AvailableModels(str, Enum): | |
| LAYOUT = "layout" | |
| TABLEFORMER = "tableformer" | |
| CODE_FORMULA = "code_formula" | |
| PICTURE_CLASSIFIER = "picture_classifier" | |
| SMOLVLM = "smolvlm" | |
| EASYOCR = "easyocr" | |
| def download( | |
| output_dir: Annotated[ | |
| Path, | |
| typer.Option( | |
| ..., | |
| "-o", | |
| "--output-dir", | |
| help="The directory where all the models are downloaded.", | |
| ), | |
| ] = (settings.cache_dir / "models"), | |
| force: Annotated[ | |
| bool, typer.Option(..., help="If true, the download will be forced") | |
| ] = False, | |
| models: Annotated[ | |
| Optional[list[_AvailableModels]], | |
| typer.Argument( | |
| help=f"Models to download (default behavior: all will be downloaded)", | |
| ), | |
| ] = None, | |
| quiet: Annotated[ | |
| bool, | |
| typer.Option( | |
| ..., | |
| "-q", | |
| "--quiet", | |
| help="No extra output is generated, the CLI prints only the directory with the cached models.", | |
| ), | |
| ] = False, | |
| ): | |
| if not quiet: | |
| FORMAT = "%(message)s" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="[blue]%(message)s[/blue]", | |
| datefmt="[%X]", | |
| handlers=[RichHandler(show_level=False, show_time=False, markup=True)], | |
| ) | |
| to_download = models or [m for m in _AvailableModels] | |
| output_dir = download_models( | |
| output_dir=output_dir, | |
| force=force, | |
| progress=(not quiet), | |
| with_layout=_AvailableModels.LAYOUT in to_download, | |
| with_tableformer=_AvailableModels.TABLEFORMER in to_download, | |
| with_code_formula=_AvailableModels.CODE_FORMULA in to_download, | |
| with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download, | |
| with_smolvlm=_AvailableModels.SMOLVLM in to_download, | |
| with_easyocr=_AvailableModels.EASYOCR in to_download, | |
| ) | |
| if quiet: | |
| typer.echo(output_dir) | |
| else: | |
| typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green") | |
| console.print( | |
| "\n", | |
| "Docling can now be configured for running offline using the local artifacts.\n\n", | |
| "Using the CLI:", | |
| f"`docling --artifacts-path={output_dir} FILE`", | |
| "\n", | |
| "Using Python: see the documentation at <https://ds4sd.github.io/docling/usage>.", | |
| ) | |
| click_app = typer.main.get_command(app) | |
| if __name__ == "__main__": | |
| app() | |