kaydee commited on
Commit
954ecdd
1 Parent(s): 9f66454

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +23 -0
  2. extract_info.py +111 -0
  3. imgprocessing.py +262 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+ import utils.extract_info as ei
4
+ import glob
5
+ import gradio as gr
6
+
7
+ title = "Reciepts Information Extraction using LayoutLMv3 Model"
8
+ description = "Reciepts information extraction - Here we use Microsoft's LayoutLMv3 trained on WildReceipt Dataset to predict the keys and values. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
9
+
10
+ examples =[['Receipts/7f892b9b.jpeg'],['Receipts/1f2e0222.jpeg'],['Receipts/f9aa53c2.jpeg']]
11
+
12
+ css = """.output_image {size: 600px !important}, .input_image {height: 600px !important}"""
13
+
14
+ iface = gr.Interface(fn=ei.main,
15
+ inputs=gr.inputs.Image(),
16
+ outputs=gr.outputs.Image(type="pil", label="annotated image"),
17
+ title=title,
18
+ description=description,
19
+ examples=examples,
20
+ css=css,
21
+ analytics_enabled = True, enable_queue=True)
22
+
23
+ iface.launch(inline=False, share=True, debug=False)
extract_info.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
3
+ import warnings
4
+ warnings.filterwarnings("ignore")
5
+ import numpy as np
6
+ from transformers import AutoModelForTokenClassification
7
+ from datasets.features import ClassLabel
8
+ from transformers import AutoProcessor
9
+ from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
10
+ import torch
11
+ from datasets import load_metric
12
+ from transformers import LayoutLMv3ForTokenClassification
13
+ from transformers.data.data_collator import default_data_collator
14
+ from transformers import AutoModelForTokenClassification
15
+ from datasets import load_dataset
16
+ from PIL import Image, ImageDraw, ImageFont
17
+ import pytesseract
18
+ from utils.imgprocessing import processed_image
19
+
20
+ pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
21
+
22
+ processor = AutoProcessor.from_pretrained("kaydee/layoutlmv3-wildreceipt", apply_ocr=True)
23
+ model = AutoModelForTokenClassification.from_pretrained("kaydee/layoutlmv3-wildreceipt")
24
+
25
+ dataset = load_dataset("kaydee/wildreceipt", split="test")
26
+
27
+ labels = dataset.features['ner_tags'].feature.names
28
+ id2label = {v: k for v, k in enumerate(labels)}
29
+ label2color = {
30
+ "Date_key": 'red',
31
+ "Date_value": 'green',
32
+ "Ignore": 'orange',
33
+ "Others": 'orange',
34
+ "Prod_item_key": 'red',
35
+ "Prod_item_value": 'green',
36
+ "Prod_price_key": 'red',
37
+ "Prod_price_value": 'green',
38
+ "Prod_quantity_key": 'red',
39
+ "Prod_quantity_value": 'green',
40
+ "Store_addr_key": 'red',
41
+ "Store_addr_value": 'green',
42
+ "Store_name_key": 'red',
43
+ "Store_name_value": 'green',
44
+ "Subtotal_key": 'red',
45
+ "Subtotal_value": 'green',
46
+ "Tax_key": 'red',
47
+ "Tax_value": 'green',
48
+ "Tel_key": 'red',
49
+ "Tel_value": 'green',
50
+ "Time_key": 'red',
51
+ "Time_value": 'green',
52
+ "Tips_key": 'red',
53
+ "Tips_value": 'green',
54
+ "Total_key": 'red',
55
+ "Total_value": 'blue'
56
+ }
57
+
58
+ def unnormalize_box(bbox, width, height):
59
+ return [
60
+ width * (bbox[0] / 1000),
61
+ height * (bbox[1] / 1000),
62
+ width * (bbox[2] / 1000),
63
+ height * (bbox[3] / 1000),
64
+ ]
65
+
66
+
67
+ def iob_to_label(label):
68
+ return label
69
+
70
+ def process_image(image):
71
+ print(type(image))
72
+ width, height = image.size
73
+
74
+ # encode
75
+ encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
76
+ offset_mapping = encoding.pop('offset_mapping')
77
+
78
+ # forward pass
79
+ outputs = model(**encoding)
80
+
81
+ # get predictions
82
+ predictions = outputs.logits.argmax(-1).squeeze().tolist()
83
+ token_boxes = encoding.bbox.squeeze().tolist()
84
+
85
+ # only keep non-subword predictions
86
+ is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
87
+ true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
88
+ true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
89
+
90
+ # draw predictions over the image
91
+ draw = ImageDraw.Draw(image)
92
+ font = ImageFont.load_default()
93
+ for prediction, box in zip(true_predictions, true_boxes):
94
+ predicted_label = iob_to_label(prediction)
95
+ draw.rectangle(box, outline=label2color[predicted_label])
96
+ draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
97
+
98
+ return image
99
+
100
+ def main(img):
101
+ image = processed_image(img)
102
+ # Convert the image to a NumPy array
103
+ img_array = np.array(image)
104
+ # Add an extra dimension to the array
105
+ img_array_3d = np.expand_dims(img_array, axis=2)
106
+ # Create a new 3D array by repeating the grayscale values along the third dimension
107
+ img_3d = np.repeat(img_array_3d, 3, axis=2)
108
+ # Convert the 3D array back to an image and save it
109
+ img_out = Image.fromarray(np.uint8(img_3d))
110
+ final = process_image(img_out)
111
+ return final
imgprocessing.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import matplotlib.pyplot as plt
3
+ import numpy as np
4
+ from skimage.filters import threshold_local
5
+ import os
6
+ from PIL import Image
7
+ from rembg import remove
8
+
9
+ def opencv_resize(image, ratio):
10
+ width = int(image.shape[1] * ratio)
11
+ height = int(image.shape[0] * ratio)
12
+ dim = (width, height)
13
+ return cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
14
+
15
+ def plot_rgb(image):
16
+ plt.figure(figsize=(16,10))
17
+ return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
18
+
19
+ def plot_gray(image):
20
+ plt.figure(figsize=(16,10))
21
+ return plt.imshow(image, cmap='Greys_r')
22
+
23
+ # approximate the contour by a more primitive polygon shape
24
+ def approximate_contour(contour):
25
+ peri = cv2.arcLength(contour, True)
26
+ return cv2.approxPolyDP(contour, 0.032 * peri, True)
27
+
28
+ def get_receipt_contour(contours):
29
+ # loop over the contours
30
+ for c in contours:
31
+ approx = approximate_contour(c)
32
+ # if our approximated contour has four points, we can assume it is receipt's rectangle
33
+ if len(approx) == 4:
34
+ return approx
35
+
36
+ def contour_to_rect(image, contour):
37
+ resize_ratio = 1000 / image.shape[0]
38
+ pts = contour.reshape(4, 2)
39
+ rect = np.zeros((4, 2), dtype = "float32")
40
+ # top-left point has the smallest sum
41
+ # bottom-right has the largest sum
42
+ s = pts.sum(axis = 1)
43
+ rect[0] = pts[np.argmin(s)]
44
+ rect[2] = pts[np.argmax(s)]
45
+ # compute the difference between the points:
46
+ # the top-right will have the minumum difference
47
+ # the bottom-left will have the maximum difference
48
+ diff = np.diff(pts, axis = 1)
49
+ rect[1] = pts[np.argmin(diff)]
50
+ rect[3] = pts[np.argmax(diff)]
51
+ return rect / resize_ratio
52
+
53
+ def wrap_perspective(img, rect):
54
+ # unpack rectangle points: top left, top right, bottom right, bottom left
55
+ (tl, tr, br, bl) = rect
56
+ # compute the width of the new image
57
+ widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
58
+ widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
59
+ # compute the height of the new image
60
+ heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
61
+ heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
62
+ # take the maximum of the width and height values to reach
63
+ # our final dimensions
64
+ maxWidth = max(int(widthA), int(widthB))
65
+ maxHeight = max(int(heightA), int(heightB))
66
+ # destination points which will be used to map the screen to a "scanned" view
67
+ dst = np.array([
68
+ [0, 0],
69
+ [maxWidth - 1, 0],
70
+ [maxWidth - 1, maxHeight - 1],
71
+ [0, maxHeight - 1]], dtype = "float32")
72
+ # calculate the perspective transform matrix
73
+ M = cv2.getPerspectiveTransform(rect, dst)
74
+ # warp the perspective to grab the screen
75
+ return cv2.warpPerspective(img, M, (maxWidth, maxHeight))
76
+
77
+ def bw_scanner(image):
78
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
79
+ T = threshold_local(gray, 21, offset = 5, method = "gaussian")
80
+ return (gray > T).astype("uint8") * 255
81
+
82
+ def remove_bg(path):
83
+ input = cv2.imread(path)
84
+ output = remove(input)
85
+ return output
86
+
87
+ def processed_result(filename):
88
+ name = os.path.basename(filename)
89
+ head,sep,tail = name.partition('.')
90
+ image = remove_bg(filename)
91
+ # Downscale image as finding receipt contour is more efficient on a small image
92
+ resize_ratio = 1000 / image.shape[0]
93
+ original = image.copy()
94
+ image = opencv_resize(image, resize_ratio)
95
+
96
+ # Convert to grayscale for further processing
97
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
98
+
99
+ # Get rid of noise with Gaussian Blur filter
100
+ blurred = cv2.GaussianBlur(gray, (5, 5), 1)
101
+ blurred = cv2.medianBlur(blurred,7)
102
+
103
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
104
+ erosion = cv2.erode(blurred,kernel,iterations = 1)
105
+
106
+ rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
107
+ rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
108
+ dilated = cv2.dilate(erosion, rectKernel)
109
+
110
+ opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
111
+ closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
112
+
113
+ (thresh, blackAndWhiteImage) = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
114
+ edged = cv2.Canny(blackAndWhiteImage, 30, 30, apertureSize=3)
115
+
116
+ # Detect all contours in Canny-edged image
117
+ contours, hierarchy = cv2.findContours(blackAndWhiteImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
118
+ image_with_contours = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
119
+
120
+ largest_contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
121
+ image_with_largest_contours = cv2.drawContours(image.copy(), largest_contours, -1, (0,255,0), 3)
122
+
123
+ receipt_contour = get_receipt_contour(largest_contours)
124
+ image_with_receipt_contour = cv2.drawContours(image.copy(), [receipt_contour], -1, (0, 255, 0), 2)
125
+
126
+ scanned = wrap_perspective(original.copy(), contour_to_rect(original, receipt_contour))
127
+
128
+ temp_image = cv2.cvtColor(scanned.copy(), cv2.COLOR_BGR2RGB)
129
+
130
+ blurred = cv2.GaussianBlur(temp_image, (5, 5), 1)
131
+
132
+ blurred = cv2.medianBlur(blurred,7)
133
+
134
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
135
+ erosion = cv2.erode(blurred,kernel,iterations = 1)
136
+
137
+ # Detect white regions
138
+ rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
139
+ rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
140
+ dilated = cv2.dilate(erosion, rectKernel)
141
+
142
+
143
+ opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
144
+ closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
145
+
146
+ edged = cv2.Canny(opening, 30, 30, apertureSize=3)
147
+
148
+ rho = 1 # distance resolution in pixels of the Hough grid
149
+ theta = np.pi / 600 # angular resolution in radians of the Hough grid
150
+ threshold = 10 # minimum number of votes (intersections in Hough grid cell)
151
+ min_line_length = 50 # minimum number of pixels making up a line
152
+ max_line_gap = 20 # maximum gap in pixels between connectable line segments
153
+
154
+ line_image = np.copy(temp_image) * 0 # creating a blank to draw lines on
155
+
156
+ minLineLength = 100
157
+ maxLineGap = 10
158
+ lines = cv2.HoughLinesP(edged, rho, theta, threshold, np.array([]),
159
+ min_line_length, max_line_gap)
160
+ for line in lines:
161
+ for x1,y1,x2,y2 in line:
162
+ cv2.line(line_image,(x1,y1),(x2,y2),(255,255,255),20)
163
+ diff_x = abs(x1 - x2)
164
+ diff_y = abs(y1 - y2)
165
+ if(diff_y <= diff_x):
166
+ cv2.line(line_image,(x1,y1),(x2,y1),(0,255,0),5)
167
+ else:
168
+ cv2.line(line_image,(x1,y1),(x1,y2),(0,0,255),5)
169
+
170
+ lines_edges = cv2.addWeighted(temp_image, 0.8, line_image, 1, 0)
171
+
172
+ result = bw_scanner(scanned)
173
+ output = Image.fromarray(result)
174
+ output.save("C:\\Users\\Amrit\\Btech_project\\Processed_img\\"+head+".png")
175
+ #output.save("C:\\Users\\Amrit\\Btech_project\\o.png")
176
+
177
+ def processed_image(img):
178
+ image = remove(img)
179
+ # Downscale image as finding receipt contour is more efficient on a small image
180
+ resize_ratio = 1000 / image.shape[0]
181
+ original = image.copy()
182
+ image = opencv_resize(image, resize_ratio)
183
+
184
+ # Convert to grayscale for further processing
185
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
186
+
187
+ # Get rid of noise with Gaussian Blur filter
188
+ blurred = cv2.GaussianBlur(gray, (5, 5), 1)
189
+ blurred = cv2.medianBlur(blurred,7)
190
+
191
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
192
+ erosion = cv2.erode(blurred,kernel,iterations = 1)
193
+
194
+ rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
195
+ rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
196
+ dilated = cv2.dilate(erosion, rectKernel)
197
+
198
+ opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
199
+ closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
200
+
201
+ (thresh, blackAndWhiteImage) = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
202
+ edged = cv2.Canny(blackAndWhiteImage, 30, 30, apertureSize=3)
203
+
204
+ # Detect all contours in Canny-edged image
205
+ contours, hierarchy = cv2.findContours(blackAndWhiteImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
206
+ image_with_contours = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
207
+
208
+ largest_contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
209
+ image_with_largest_contours = cv2.drawContours(image.copy(), largest_contours, -1, (0,255,0), 3)
210
+
211
+ receipt_contour = get_receipt_contour(largest_contours)
212
+ image_with_receipt_contour = cv2.drawContours(image.copy(), [receipt_contour], -1, (0, 255, 0), 2)
213
+
214
+ scanned = wrap_perspective(original.copy(), contour_to_rect(original, receipt_contour))
215
+
216
+ temp_image = cv2.cvtColor(scanned.copy(), cv2.COLOR_BGR2RGB)
217
+
218
+ blurred = cv2.GaussianBlur(temp_image, (5, 5), 1)
219
+
220
+ blurred = cv2.medianBlur(blurred,7)
221
+
222
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
223
+ erosion = cv2.erode(blurred,kernel,iterations = 1)
224
+
225
+ # Detect white regions
226
+ rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
227
+ rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
228
+ dilated = cv2.dilate(erosion, rectKernel)
229
+
230
+
231
+ opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
232
+ closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
233
+
234
+ edged = cv2.Canny(opening, 30, 30, apertureSize=3)
235
+
236
+ rho = 1 # distance resolution in pixels of the Hough grid
237
+ theta = np.pi / 600 # angular resolution in radians of the Hough grid
238
+ threshold = 10 # minimum number of votes (intersections in Hough grid cell)
239
+ min_line_length = 50 # minimum number of pixels making up a line
240
+ max_line_gap = 20 # maximum gap in pixels between connectable line segments
241
+
242
+ line_image = np.copy(temp_image) * 0 # creating a blank to draw lines on
243
+
244
+ minLineLength = 100
245
+ maxLineGap = 10
246
+ lines = cv2.HoughLinesP(edged, rho, theta, threshold, np.array([]),
247
+ min_line_length, max_line_gap)
248
+ for line in lines:
249
+ for x1,y1,x2,y2 in line:
250
+ cv2.line(line_image,(x1,y1),(x2,y2),(255,255,255),20)
251
+ diff_x = abs(x1 - x2)
252
+ diff_y = abs(y1 - y2)
253
+ if(diff_y <= diff_x):
254
+ cv2.line(line_image,(x1,y1),(x2,y1),(0,255,0),5)
255
+ else:
256
+ cv2.line(line_image,(x1,y1),(x1,y2),(0,0,255),5)
257
+
258
+ lines_edges = cv2.addWeighted(temp_image, 0.8, line_image, 1, 0)
259
+
260
+ result = bw_scanner(scanned)
261
+ output = Image.fromarray(result)
262
+ return result