|
--- |
|
language: |
|
- en |
|
- da |
|
--- |
|
|
|
|
|
<p align="center"> |
|
<img src="https://doctr-static.mindee.com/models?id=v0.3.1/Logo_doctr.gif&src=0" width="60%"> |
|
</p> |
|
|
|
**Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch** |
|
|
|
## Task: recognition |
|
|
|
https://github.com/mindee/doctr |
|
|
|
This model does a good job if you need to do OCR on Danish documents. |
|
|
|
### Example usage: |
|
|
|
```python |
|
|
|
from doctr.io import DocumentFile |
|
from doctr.models import ocr_predictor, from_hub |
|
|
|
reco_arch = from_hub('diversen/doctr-torch-crnn_vgg16_bn-danish-v1') |
|
det_arch = "db_resnet50" |
|
|
|
model = ocr_predictor(det_arch=det_arch, reco_arch=reco_arch, pretrained=True) |
|
image = DocumentFile.from_images(['test.jpg']) |
|
|
|
result = model(image) |
|
result.show() |
|
|
|
output = result.export() |
|
text_str = "" |
|
|
|
for block in output["pages"][0]["blocks"]: |
|
block_txt = "" |
|
for line in block["lines"]: |
|
line_txt = "" |
|
for word in line["words"]: |
|
line_txt += word["value"] + " " |
|
block_txt += line_txt + "\n" |
|
text_str += block_txt + "\n" |
|
|
|
print(text_str) |
|
``` |
|
|
|
### Run Configuration |
|
|
|
{ |
|
"arch": "crnn_vgg16_bn", |
|
"train_path": "train-data", |
|
"val_path": "validation-data", |
|
"train_samples": 1000, |
|
"val_samples": 20, |
|
"font": "FreeMono.ttf,FreeSans.ttf,FreeSerif.ttf", |
|
"min_chars": 1, |
|
"max_chars": 32, |
|
"name": "doctr-torch-crnn_vgg16_bn-danish-v1", |
|
"epochs": 1, |
|
"batch_size": 64, |
|
"device": 0, |
|
"input_size": 32, |
|
"lr": 0.001, |
|
"weight_decay": 0, |
|
"workers": 16, |
|
"resume": "crnn_vgg16_bn_20240317-095746.pt", |
|
"vocab": "danish", |
|
"test_only": false, |
|
"freeze_backbone": false, |
|
"show_samples": false, |
|
"wb": false, |
|
"push_to_hub": true, |
|
"pretrained": true, |
|
"sched": "cosine", |
|
"amp": false, |
|
"find_lr": false, |
|
"early_stop": false, |
|
"early_stop_epochs": 5, |
|
"early_stop_delta": 0.01 |
|
} |
|
|