Spaces:

kaydee
/

Reciept-information-extraction

Sleeping

App Files Files Community

kaydee commited on Mar 28

Commit

954ecdd

•

1 Parent(s): 9f66454

Upload 3 files

Browse files

Files changed (3) hide show

app.py +23 -0
extract_info.py +111 -0
imgprocessing.py +262 -0

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import warnings
+warnings.filterwarnings("ignore")
+import utils.extract_info as ei
+import glob
+import gradio as gr
+title = "Reciepts Information Extraction using LayoutLMv3 Model"
+description = "Reciepts information extraction - Here we use Microsoft's LayoutLMv3 trained on WildReceipt Dataset to predict the keys and values. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
+examples =[['Receipts/7f892b9b.jpeg'],['Receipts/1f2e0222.jpeg'],['Receipts/f9aa53c2.jpeg']]
+css = """.output_image {size: 600px !important}, .input_image {height: 600px !important}"""
+iface = gr.Interface(fn=ei.main,
+                     inputs=gr.inputs.Image(),
+                     outputs=gr.outputs.Image(type="pil", label="annotated image"),
+                     title=title,
+                     description=description,
+                     examples=examples,
+                     css=css,
+                     analytics_enabled = True, enable_queue=True)
+iface.launch(inline=False, share=True, debug=False)

extract_info.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
+import warnings
+warnings.filterwarnings("ignore")
+import numpy as np
+from transformers import AutoModelForTokenClassification
+from datasets.features import ClassLabel
+from transformers import AutoProcessor
+from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
+import torch
+from datasets import load_metric
+from transformers import LayoutLMv3ForTokenClassification
+from transformers.data.data_collator import default_data_collator
+from transformers import AutoModelForTokenClassification
+from datasets import load_dataset
+from PIL import Image, ImageDraw, ImageFont
+import pytesseract
+from utils.imgprocessing import processed_image
+pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
+processor = AutoProcessor.from_pretrained("kaydee/layoutlmv3-wildreceipt", apply_ocr=True)
+model = AutoModelForTokenClassification.from_pretrained("kaydee/layoutlmv3-wildreceipt")
+dataset = load_dataset("kaydee/wildreceipt", split="test")
+labels = dataset.features['ner_tags'].feature.names
+id2label = {v: k for v, k in enumerate(labels)}
+label2color = {
+    "Date_key": 'red',
+    "Date_value": 'green',
+    "Ignore": 'orange',
+    "Others": 'orange',
+    "Prod_item_key": 'red',
+    "Prod_item_value": 'green',
+    "Prod_price_key": 'red',
+    "Prod_price_value": 'green',
+    "Prod_quantity_key": 'red',
+    "Prod_quantity_value": 'green',
+    "Store_addr_key": 'red',
+    "Store_addr_value": 'green',
+    "Store_name_key": 'red',
+    "Store_name_value": 'green',
+    "Subtotal_key": 'red',
+    "Subtotal_value": 'green',
+    "Tax_key": 'red',
+    "Tax_value": 'green',
+    "Tel_key": 'red',
+    "Tel_value": 'green',
+    "Time_key": 'red',
+    "Time_value": 'green',
+    "Tips_key": 'red',
+    "Tips_value": 'green',
+    "Total_key": 'red',
+    "Total_value": 'blue'
+}
+def unnormalize_box(bbox, width, height):
+     return [
+         width * (bbox[0] / 1000),
+         height * (bbox[1] / 1000),
+         width * (bbox[2] / 1000),
+         height * (bbox[3] / 1000),
+     ]
+def iob_to_label(label):
+    return label
+def process_image(image):
+    print(type(image))
+    width, height = image.size
+    # encode
+    encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
+    offset_mapping = encoding.pop('offset_mapping')
+    # forward pass
+    outputs = model(**encoding)
+    # get predictions
+    predictions = outputs.logits.argmax(-1).squeeze().tolist()
+    token_boxes = encoding.bbox.squeeze().tolist()
+    # only keep non-subword predictions
+    is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
+    true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
+    true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
+    # draw predictions over the image
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()
+    for prediction, box in zip(true_predictions, true_boxes):
+        predicted_label = iob_to_label(prediction)
+        draw.rectangle(box, outline=label2color[predicted_label])
+        draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
+    return image
+def main(img):
+    image = processed_image(img)
+    # Convert the image to a NumPy array
+    img_array = np.array(image)
+    # Add an extra dimension to the array
+    img_array_3d = np.expand_dims(img_array, axis=2)
+    # Create a new 3D array by repeating the grayscale values along the third dimension
+    img_3d = np.repeat(img_array_3d, 3, axis=2)
+    # Convert the 3D array back to an image and save it
+    img_out = Image.fromarray(np.uint8(img_3d))
+    final = process_image(img_out)
+    return final

imgprocessing.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+from skimage.filters import threshold_local
+import os
+from PIL import Image
+from rembg import remove
+def opencv_resize(image, ratio):
+    width = int(image.shape[1] * ratio)
+    height = int(image.shape[0] * ratio)
+    dim = (width, height)
+    return cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
+def plot_rgb(image):
+    plt.figure(figsize=(16,10))
+    return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+def plot_gray(image):
+    plt.figure(figsize=(16,10))
+    return plt.imshow(image, cmap='Greys_r')
+# approximate the contour by a more primitive polygon shape
+def approximate_contour(contour):
+    peri = cv2.arcLength(contour, True)
+    return cv2.approxPolyDP(contour, 0.032 * peri, True)
+def get_receipt_contour(contours):
+    # loop over the contours
+    for c in contours:
+        approx = approximate_contour(c)
+        # if our approximated contour has four points, we can assume it is receipt's rectangle
+        if len(approx) == 4:
+            return approx
+def contour_to_rect(image, contour):
+    resize_ratio = 1000 / image.shape[0]
+    pts = contour.reshape(4, 2)
+    rect = np.zeros((4, 2), dtype = "float32")
+    # top-left point has the smallest sum
+    # bottom-right has the largest sum
+    s = pts.sum(axis = 1)
+    rect[0] = pts[np.argmin(s)]
+    rect[2] = pts[np.argmax(s)]
+    # compute the difference between the points:
+    # the top-right will have the minumum difference
+    # the bottom-left will have the maximum difference
+    diff = np.diff(pts, axis = 1)
+    rect[1] = pts[np.argmin(diff)]
+    rect[3] = pts[np.argmax(diff)]
+    return rect / resize_ratio
+def wrap_perspective(img, rect):
+    # unpack rectangle points: top left, top right, bottom right, bottom left
+    (tl, tr, br, bl) = rect
+    # compute the width of the new image
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    # compute the height of the new image
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    # take the maximum of the width and height values to reach
+    # our final dimensions
+    maxWidth = max(int(widthA), int(widthB))
+    maxHeight = max(int(heightA), int(heightB))
+    # destination points which will be used to map the screen to a "scanned" view
+    dst = np.array([
+        [0, 0],
+        [maxWidth - 1, 0],
+        [maxWidth - 1, maxHeight - 1],
+        [0, maxHeight - 1]], dtype = "float32")
+    # calculate the perspective transform matrix
+    M = cv2.getPerspectiveTransform(rect, dst)
+    # warp the perspective to grab the screen
+    return cv2.warpPerspective(img, M, (maxWidth, maxHeight))
+def bw_scanner(image):
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    T = threshold_local(gray, 21, offset = 5, method = "gaussian")
+    return (gray > T).astype("uint8") * 255
+def remove_bg(path):
+    input = cv2.imread(path)
+    output = remove(input)
+    return output
+def processed_result(filename):
+    name = os.path.basename(filename)
+    head,sep,tail = name.partition('.')
+    image = remove_bg(filename)
+    # Downscale image as finding receipt contour is more efficient on a small image
+    resize_ratio = 1000 / image.shape[0]
+    original = image.copy()
+    image = opencv_resize(image, resize_ratio)
+    # Convert to grayscale for further processing
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Get rid of noise with Gaussian Blur filter
+    blurred = cv2.GaussianBlur(gray, (5, 5), 1)
+    blurred = cv2.medianBlur(blurred,7)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
+    erosion = cv2.erode(blurred,kernel,iterations = 1)
+    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
+    rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
+    dilated = cv2.dilate(erosion, rectKernel)
+    opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
+    closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
+    (thresh, blackAndWhiteImage) = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
+    edged = cv2.Canny(blackAndWhiteImage, 30, 30, apertureSize=3)
+    # Detect all contours in Canny-edged image
+    contours, hierarchy = cv2.findContours(blackAndWhiteImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    image_with_contours = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
+    largest_contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
+    image_with_largest_contours = cv2.drawContours(image.copy(), largest_contours, -1, (0,255,0), 3)
+    receipt_contour = get_receipt_contour(largest_contours)
+    image_with_receipt_contour = cv2.drawContours(image.copy(), [receipt_contour], -1, (0, 255, 0), 2)
+    scanned = wrap_perspective(original.copy(), contour_to_rect(original, receipt_contour))
+    temp_image = cv2.cvtColor(scanned.copy(), cv2.COLOR_BGR2RGB)
+    blurred = cv2.GaussianBlur(temp_image, (5, 5), 1)
+    blurred = cv2.medianBlur(blurred,7)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
+    erosion = cv2.erode(blurred,kernel,iterations = 1)
+    # Detect white regions
+    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
+    rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
+    dilated = cv2.dilate(erosion, rectKernel)
+    opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
+    closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
+    edged = cv2.Canny(opening, 30, 30, apertureSize=3)
+    rho = 1  # distance resolution in pixels of the Hough grid
+    theta = np.pi / 600  # angular resolution in radians of the Hough grid
+    threshold = 10  # minimum number of votes (intersections in Hough grid cell)
+    min_line_length = 50  # minimum number of pixels making up a line
+    max_line_gap = 20  # maximum gap in pixels between connectable line segments
+    line_image = np.copy(temp_image) * 0  # creating a blank to draw lines on
+    minLineLength = 100
+    maxLineGap = 10
+    lines = cv2.HoughLinesP(edged, rho, theta, threshold, np.array([]),
+                    min_line_length, max_line_gap)
+    for line in lines:
+        for x1,y1,x2,y2 in line:
+            cv2.line(line_image,(x1,y1),(x2,y2),(255,255,255),20)
+            diff_x = abs(x1 - x2)
+            diff_y = abs(y1 - y2)
+            if(diff_y <= diff_x):
+                cv2.line(line_image,(x1,y1),(x2,y1),(0,255,0),5)
+            else:
+                cv2.line(line_image,(x1,y1),(x1,y2),(0,0,255),5)
+    lines_edges = cv2.addWeighted(temp_image, 0.8, line_image, 1, 0)
+    result = bw_scanner(scanned)
+    output = Image.fromarray(result)
+    output.save("C:\\Users\\Amrit\\Btech_project\\Processed_img\\"+head+".png")
+    #output.save("C:\\Users\\Amrit\\Btech_project\\o.png")
+def processed_image(img):
+    image = remove(img)
+    # Downscale image as finding receipt contour is more efficient on a small image
+    resize_ratio = 1000 / image.shape[0]
+    original = image.copy()
+    image = opencv_resize(image, resize_ratio)
+    # Convert to grayscale for further processing
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Get rid of noise with Gaussian Blur filter
+    blurred = cv2.GaussianBlur(gray, (5, 5), 1)
+    blurred = cv2.medianBlur(blurred,7)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
+    erosion = cv2.erode(blurred,kernel,iterations = 1)
+    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
+    rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
+    dilated = cv2.dilate(erosion, rectKernel)
+    opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
+    closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
+    (thresh, blackAndWhiteImage) = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
+    edged = cv2.Canny(blackAndWhiteImage, 30, 30, apertureSize=3)
+    # Detect all contours in Canny-edged image
+    contours, hierarchy = cv2.findContours(blackAndWhiteImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    image_with_contours = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
+    largest_contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
+    image_with_largest_contours = cv2.drawContours(image.copy(), largest_contours, -1, (0,255,0), 3)
+    receipt_contour = get_receipt_contour(largest_contours)
+    image_with_receipt_contour = cv2.drawContours(image.copy(), [receipt_contour], -1, (0, 255, 0), 2)
+    scanned = wrap_perspective(original.copy(), contour_to_rect(original, receipt_contour))
+    temp_image = cv2.cvtColor(scanned.copy(), cv2.COLOR_BGR2RGB)
+    blurred = cv2.GaussianBlur(temp_image, (5, 5), 1)
+    blurred = cv2.medianBlur(blurred,7)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
+    erosion = cv2.erode(blurred,kernel,iterations = 1)
+    # Detect white regions
+    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
+    rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
+    dilated = cv2.dilate(erosion, rectKernel)
+    opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
+    closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
+    edged = cv2.Canny(opening, 30, 30, apertureSize=3)
+    rho = 1  # distance resolution in pixels of the Hough grid
+    theta = np.pi / 600  # angular resolution in radians of the Hough grid
+    threshold = 10  # minimum number of votes (intersections in Hough grid cell)
+    min_line_length = 50  # minimum number of pixels making up a line
+    max_line_gap = 20  # maximum gap in pixels between connectable line segments
+    line_image = np.copy(temp_image) * 0  # creating a blank to draw lines on
+    minLineLength = 100
+    maxLineGap = 10
+    lines = cv2.HoughLinesP(edged, rho, theta, threshold, np.array([]),
+                    min_line_length, max_line_gap)
+    for line in lines:
+        for x1,y1,x2,y2 in line:
+            cv2.line(line_image,(x1,y1),(x2,y2),(255,255,255),20)
+            diff_x = abs(x1 - x2)
+            diff_y = abs(y1 - y2)
+            if(diff_y <= diff_x):
+                cv2.line(line_image,(x1,y1),(x2,y1),(0,255,0),5)
+            else:
+                cv2.line(line_image,(x1,y1),(x1,y2),(0,0,255),5)
+    lines_edges = cv2.addWeighted(temp_image, 0.8, line_image, 1, 0)
+    result = bw_scanner(scanned)
+    output = Image.fromarray(result)
+    return result