Theivaprakasham commited on
Commit
697b4a3
β€’
1 Parent(s): 56b4eea
Files changed (3) hide show
  1. app.py +111 -0
  2. packages.txt +6 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+ from transformers import AutoModelForTokenClassification
7
+ from datasets.features import ClassLabel
8
+ from transformers import AutoProcessor
9
+ from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
10
+ import torch
11
+ from datasets import load_metric
12
+ from transformers import LayoutLMv3ForTokenClassification
13
+ from transformers.data.data_collator import default_data_collator
14
+
15
+
16
+ from transformers import AutoModelForTokenClassification
17
+ from datasets import load_dataset
18
+ from PIL import Image, ImageDraw, ImageFont
19
+
20
+
21
+ processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
22
+ model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-sroie")
23
+
24
+
25
+
26
+ # load image example
27
+ dataset = load_dataset("darentang/sroie", split="test")
28
+ Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
29
+ Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
30
+ Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
31
+ # define id2label, label2color
32
+ labels = dataset.features['ner_tags'].feature.names
33
+ id2label = {v: k for v, k in enumerate(labels)}
34
+ label2color = {
35
+ "B-ADDRESS": 'blue',
36
+ "B-COMPANY": 'red',
37
+ "B-DATE": 'green',
38
+ "B-TOTAL": 'violet',
39
+ "I-ADDRESS": 'green',
40
+ "I-COMPANY": 'blue',
41
+ "I-DATE": 'red',
42
+ "I-TOTAL": 'red',
43
+ "O": 'orange'
44
+ }
45
+
46
+ def unnormalize_box(bbox, width, height):
47
+ return [
48
+ width * (bbox[0] / 1000),
49
+ height * (bbox[1] / 1000),
50
+ width * (bbox[2] / 1000),
51
+ height * (bbox[3] / 1000),
52
+ ]
53
+
54
+
55
+ def iob_to_label(label):
56
+ return label
57
+
58
+
59
+
60
+ def process_image(image):
61
+
62
+ print(type(image))
63
+ width, height = image.size
64
+
65
+ # encode
66
+ encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
67
+ offset_mapping = encoding.pop('offset_mapping')
68
+
69
+ # forward pass
70
+ outputs = model(**encoding)
71
+
72
+ # get predictions
73
+ predictions = outputs.logits.argmax(-1).squeeze().tolist()
74
+ token_boxes = encoding.bbox.squeeze().tolist()
75
+
76
+ # only keep non-subword predictions
77
+ is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
78
+ true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
79
+ true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
80
+
81
+ # draw predictions over the image
82
+ draw = ImageDraw.Draw(image)
83
+ font = ImageFont.load_default()
84
+ for prediction, box in zip(true_predictions, true_boxes):
85
+ predicted_label = iob_to_label(prediction)
86
+ draw.rectangle(box, outline=label2color[predicted_label])
87
+ draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
88
+
89
+ return image
90
+
91
+
92
+ title = "Bill Information Extraction using LayoutLMv3 model"
93
+ description = "Bill Information Extraction - We use Microsoft’s LayoutLMv3 trained on SROIE Dataset to predict the Company Name, Address, Date, and Total Amount from Bills. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
94
+
95
+ article="<b>References</b><br>[1] Y. Xu et al., β€œLayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
96
+
97
+ examples =[['example1.png'],['example2.png'],['example3.png']]
98
+
99
+ css = """.output_image, .input_image {height: 600px !important}"""
100
+
101
+ iface = gr.Interface(fn=process_image,
102
+ inputs=gr.inputs.Image(type="pil"),
103
+ outputs=gr.outputs.Image(type="pil", label="annotated image"),
104
+ title=title,
105
+ description=description,
106
+ article=article,
107
+ examples=examples,
108
+ css=css,
109
+ analytics_enabled = True, enable_queue=True)
110
+
111
+ iface.launch(inline=False, share=False, debug=False)
packages.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ffmpeg
2
+ libsm6
3
+ libxext6 -y
4
+ libgl1
5
+ -y libgl1-mesa-glx
6
+ tesseract-ocr
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers.git
2
+ PyYAML==6.0
3
+ pytesseract==0.3.9
4
+ datasets==2.2.2
5
+ seqeval==1.2.2