MikeTrizna's picture
Upload folder using huggingface_hub
f3270e6 verified
# Copyright (C) 2021-2025, Mindee.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
from typing import Any
from pydantic import BaseModel, Field
class KIEIn(BaseModel):
det_arch: str = Field(default="db_resnet50", examples=["db_resnet50"])
reco_arch: str = Field(default="crnn_vgg16_bn", examples=["crnn_vgg16_bn"])
assume_straight_pages: bool = Field(default=True, examples=[True])
preserve_aspect_ratio: bool = Field(default=True, examples=[True])
detect_orientation: bool = Field(default=False, examples=[False])
detect_language: bool = Field(default=False, examples=[False])
symmetric_pad: bool = Field(default=True, examples=[True])
straighten_pages: bool = Field(default=False, examples=[False])
det_bs: int = Field(default=2, examples=[2])
reco_bs: int = Field(default=128, examples=[128])
disable_page_orientation: bool = Field(default=False, examples=[False])
disable_crop_orientation: bool = Field(default=False, examples=[False])
bin_thresh: float = Field(default=0.1, examples=[0.1])
box_thresh: float = Field(default=0.1, examples=[0.1])
class OCRIn(KIEIn, BaseModel):
resolve_lines: bool = Field(default=True, examples=[True])
resolve_blocks: bool = Field(default=False, examples=[False])
paragraph_break: float = Field(default=0.0035, examples=[0.0035])
class RecognitionIn(BaseModel):
reco_arch: str = Field(default="crnn_vgg16_bn", examples=["crnn_vgg16_bn"])
reco_bs: int = Field(default=128, examples=[128])
class DetectionIn(BaseModel):
det_arch: str = Field(default="db_resnet50", examples=["db_resnet50"])
assume_straight_pages: bool = Field(default=True, examples=[True])
preserve_aspect_ratio: bool = Field(default=True, examples=[True])
symmetric_pad: bool = Field(default=True, examples=[True])
det_bs: int = Field(default=2, examples=[2])
bin_thresh: float = Field(default=0.1, examples=[0.1])
box_thresh: float = Field(default=0.1, examples=[0.1])
class RecognitionOut(BaseModel):
name: str = Field(..., examples=["example.jpg"])
value: str = Field(..., examples=["Hello"])
confidence: float = Field(..., examples=[0.99])
class DetectionOut(BaseModel):
name: str = Field(..., examples=["example.jpg"])
geometries: list[list[float]] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
class OCRWord(BaseModel):
value: str = Field(..., examples=["example"])
geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
objectness_score: float = Field(..., examples=[0.99])
confidence: float = Field(..., examples=[0.99])
crop_orientation: dict[str, Any] = Field(..., examples=[{"value": 0, "confidence": None}])
class OCRLine(BaseModel):
geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
objectness_score: float = Field(..., examples=[0.99])
words: list[OCRWord] = Field(
...,
examples=[
{
"value": "example",
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"confidence": 0.99,
"crop_orientation": {"value": 0, "confidence": None},
}
],
)
class OCRBlock(BaseModel):
geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
objectness_score: float = Field(..., examples=[0.99])
lines: list[OCRLine] = Field(
...,
examples=[
{
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"words": [
{
"value": "example",
"geometry": [0.0, 0.0, 0.0, 0.0],
"confidence": 0.99,
"crop_orientation": {"value": 0, "confidence": None},
}
],
}
],
)
class OCRPage(BaseModel):
blocks: list[OCRBlock] = Field(
...,
examples=[
{
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"lines": [
{
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"words": [
{
"value": "example",
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"confidence": 0.99,
"crop_orientation": {"value": 0, "confidence": None},
}
],
}
],
}
],
)
class OCROut(BaseModel):
name: str = Field(..., examples=["example.jpg"])
orientation: dict[str, float | None] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}])
language: dict[str, str | float | None] = Field(..., examples=[{"value": "en", "confidence": 0.99}])
dimensions: tuple[int, int] = Field(..., examples=[(100, 100)])
items: list[OCRPage] = Field(
...,
examples=[
{
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"lines": [
{
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"words": [
{
"value": "example",
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"confidence": 0.99,
"crop_orientation": {"value": 0, "confidence": None},
}
],
}
],
}
],
)
class KIEElement(BaseModel):
class_name: str = Field(..., examples=["example"])
items: list[dict[str, str | list[float] | float | dict[str, Any]]] = Field(
...,
examples=[
{
"value": "example",
"geometry": [0.0, 0.0, 0.0, 0.0],
"objectness_score": 0.99,
"confidence": 0.99,
"crop_orientation": {"value": 0, "confidence": None},
}
],
)
class KIEOut(BaseModel):
name: str = Field(..., examples=["example.jpg"])
orientation: dict[str, float | None] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}])
language: dict[str, str | float | None] = Field(..., examples=[{"value": "en", "confidence": 0.99}])
dimensions: tuple[int, int] = Field(..., examples=[(100, 100)])
predictions: list[KIEElement]