hmarzan commited on
Commit
d4fd597
1 Parent(s): 409307b

Initial version

Browse files
Files changed (3) hide show
  1. app.py +53 -0
  2. config.yaml +35 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from sconf import Config
3
+ from PIL import Image, ImageOps
4
+ from donut import DonutConfig, DonutModel
5
+ import warnings
6
+
7
+ warnings.filterwarnings("ignore")
8
+
9
+ from transformers import logging
10
+
11
+ logging.set_verbosity_warning()
12
+
13
+ config = Config(default="./config.yaml")
14
+
15
+ model = DonutModel.from_pretrained(
16
+ config.pretrained_model_name_or_path,
17
+ input_size=config.input_size,
18
+ max_length=config.max_position_embeddings, #self.config.max_length,
19
+ align_long_axis=config.align_long_axis,
20
+ ignore_mismatched_sizes=True,
21
+ )
22
+
23
+ task_name = "matricula"
24
+ task_prompt = f"<s_{task_name}>"
25
+
26
+ def predict_matricula(model, task_name, image):
27
+ image = ImageOps.exif_transpose(image)
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+
30
+ model.eval()
31
+ model.to(device)
32
+
33
+ result = model.inference(image=image, prompt=f"<s_{task_name}>")["predictions"][0]
34
+ return result
35
+
36
+
37
+ import gradio as gr
38
+
39
+ with gr.Blocks() as demo:
40
+ fn_predict=lambda x:predict_matricula(model, task_name="matricula", image=x)
41
+ gr.Markdown(
42
+ """
43
+ # Demo: Donut 🍩 for DR Matriculas
44
+ Dominican Vehicle **Matriculas OCR** Infering
45
+ """)
46
+ with gr.Row():
47
+ input_image=gr.Image(label="Matricula", sources="upload", type="pil", show_label=True)
48
+ with gr.Row():
49
+ output_json=gr.JSON(label="Matricula JSON", show_label=True, value={})
50
+ submit_btn = gr.Button("Submit")
51
+ submit_btn.click(fn=fn_predict, inputs=[input_image], outputs=[output_json])
52
+
53
+ demo.launch(share=True)
config.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ resume_from_checkpoint_path: None
2
+ result_path: 'trainer/result'
3
+ pretrained_model_name_or_path: 'marzanconsulting/donut-dr-matriculas-ocr'
4
+ dataset_name_or_paths:
5
+ - 'trainer/dataset'
6
+ task_start_tokens:
7
+ - '<s_matricula>'
8
+ sort_json_key: False
9
+ train_batch_sizes:
10
+ - 5
11
+ val_batch_sizes:
12
+ - 1
13
+ input_size:
14
+ - 960
15
+ - 1280
16
+ max_length: 868
17
+ max_position_embeddings: 868
18
+ align_long_axis: False
19
+ num_nodes: 1
20
+ seed: 2022
21
+ lr: 3e-05
22
+ warmup_steps: 300
23
+ num_training_samples_per_epoch: 800
24
+ max_epochs: 50
25
+ max_steps: -1
26
+ num_workers: 12
27
+ val_check_interval: 1.0
28
+ check_val_every_n_epoch: 3
29
+ gradient_clip_val: 1.0
30
+ verbose: True
31
+ model_dir: ''
32
+ tensorboard_dir: ''
33
+ checkpoint_dir:
34
+ exp_name: 'train_cord_matriculas'
35
+ exp_version: 'mmc_v1'
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ donut-python==1.0.9
2
+ torchvision==0.14.0
3
+ pytorch-lightning==1.6.4
4
+ tokenizers>=0.11,<=0.12.1
5
+ transformers==4.25.1
6
+ sentencepiece
7
+ timm==0.6.13
8
+ datasets[vision]