Reciept-information-extraction / imgprocessing.py
kaydee's picture
Upload 3 files
954ecdd verified
raw
history blame
10.6 kB
import cv2
import matplotlib.pyplot as plt
import numpy as np
from skimage.filters import threshold_local
import os
from PIL import Image
from rembg import remove
def opencv_resize(image, ratio):
width = int(image.shape[1] * ratio)
height = int(image.shape[0] * ratio)
dim = (width, height)
return cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
def plot_rgb(image):
plt.figure(figsize=(16,10))
return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
def plot_gray(image):
plt.figure(figsize=(16,10))
return plt.imshow(image, cmap='Greys_r')
# approximate the contour by a more primitive polygon shape
def approximate_contour(contour):
peri = cv2.arcLength(contour, True)
return cv2.approxPolyDP(contour, 0.032 * peri, True)
def get_receipt_contour(contours):
# loop over the contours
for c in contours:
approx = approximate_contour(c)
# if our approximated contour has four points, we can assume it is receipt's rectangle
if len(approx) == 4:
return approx
def contour_to_rect(image, contour):
resize_ratio = 1000 / image.shape[0]
pts = contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# top-left point has the smallest sum
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points:
# the top-right will have the minumum difference
# the bottom-left will have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect / resize_ratio
def wrap_perspective(img, rect):
# unpack rectangle points: top left, top right, bottom right, bottom left
(tl, tr, br, bl) = rect
# compute the width of the new image
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# compute the height of the new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# destination points which will be used to map the screen to a "scanned" view
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix
M = cv2.getPerspectiveTransform(rect, dst)
# warp the perspective to grab the screen
return cv2.warpPerspective(img, M, (maxWidth, maxHeight))
def bw_scanner(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
T = threshold_local(gray, 21, offset = 5, method = "gaussian")
return (gray > T).astype("uint8") * 255
def remove_bg(path):
input = cv2.imread(path)
output = remove(input)
return output
def processed_result(filename):
name = os.path.basename(filename)
head,sep,tail = name.partition('.')
image = remove_bg(filename)
# Downscale image as finding receipt contour is more efficient on a small image
resize_ratio = 1000 / image.shape[0]
original = image.copy()
image = opencv_resize(image, resize_ratio)
# Convert to grayscale for further processing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Get rid of noise with Gaussian Blur filter
blurred = cv2.GaussianBlur(gray, (5, 5), 1)
blurred = cv2.medianBlur(blurred,7)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
erosion = cv2.erode(blurred,kernel,iterations = 1)
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
dilated = cv2.dilate(erosion, rectKernel)
opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
(thresh, blackAndWhiteImage) = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(blackAndWhiteImage, 30, 30, apertureSize=3)
# Detect all contours in Canny-edged image
contours, hierarchy = cv2.findContours(blackAndWhiteImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
image_with_contours = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
largest_contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
image_with_largest_contours = cv2.drawContours(image.copy(), largest_contours, -1, (0,255,0), 3)
receipt_contour = get_receipt_contour(largest_contours)
image_with_receipt_contour = cv2.drawContours(image.copy(), [receipt_contour], -1, (0, 255, 0), 2)
scanned = wrap_perspective(original.copy(), contour_to_rect(original, receipt_contour))
temp_image = cv2.cvtColor(scanned.copy(), cv2.COLOR_BGR2RGB)
blurred = cv2.GaussianBlur(temp_image, (5, 5), 1)
blurred = cv2.medianBlur(blurred,7)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
erosion = cv2.erode(blurred,kernel,iterations = 1)
# Detect white regions
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
dilated = cv2.dilate(erosion, rectKernel)
opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
edged = cv2.Canny(opening, 30, 30, apertureSize=3)
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 600 # angular resolution in radians of the Hough grid
threshold = 10 # minimum number of votes (intersections in Hough grid cell)
min_line_length = 50 # minimum number of pixels making up a line
max_line_gap = 20 # maximum gap in pixels between connectable line segments
line_image = np.copy(temp_image) * 0 # creating a blank to draw lines on
minLineLength = 100
maxLineGap = 10
lines = cv2.HoughLinesP(edged, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(line_image,(x1,y1),(x2,y2),(255,255,255),20)
diff_x = abs(x1 - x2)
diff_y = abs(y1 - y2)
if(diff_y <= diff_x):
cv2.line(line_image,(x1,y1),(x2,y1),(0,255,0),5)
else:
cv2.line(line_image,(x1,y1),(x1,y2),(0,0,255),5)
lines_edges = cv2.addWeighted(temp_image, 0.8, line_image, 1, 0)
result = bw_scanner(scanned)
output = Image.fromarray(result)
output.save("C:\\Users\\Amrit\\Btech_project\\Processed_img\\"+head+".png")
#output.save("C:\\Users\\Amrit\\Btech_project\\o.png")
def processed_image(img):
image = remove(img)
# Downscale image as finding receipt contour is more efficient on a small image
resize_ratio = 1000 / image.shape[0]
original = image.copy()
image = opencv_resize(image, resize_ratio)
# Convert to grayscale for further processing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Get rid of noise with Gaussian Blur filter
blurred = cv2.GaussianBlur(gray, (5, 5), 1)
blurred = cv2.medianBlur(blurred,7)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
erosion = cv2.erode(blurred,kernel,iterations = 1)
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
dilated = cv2.dilate(erosion, rectKernel)
opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
(thresh, blackAndWhiteImage) = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(blackAndWhiteImage, 30, 30, apertureSize=3)
# Detect all contours in Canny-edged image
contours, hierarchy = cv2.findContours(blackAndWhiteImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
image_with_contours = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
largest_contours = sorted(contours, key = cv2.contourArea, reverse = True)[:10]
image_with_largest_contours = cv2.drawContours(image.copy(), largest_contours, -1, (0,255,0), 3)
receipt_contour = get_receipt_contour(largest_contours)
image_with_receipt_contour = cv2.drawContours(image.copy(), [receipt_contour], -1, (0, 255, 0), 2)
scanned = wrap_perspective(original.copy(), contour_to_rect(original, receipt_contour))
temp_image = cv2.cvtColor(scanned.copy(), cv2.COLOR_BGR2RGB)
blurred = cv2.GaussianBlur(temp_image, (5, 5), 1)
blurred = cv2.medianBlur(blurred,7)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
erosion = cv2.erode(blurred,kernel,iterations = 1)
# Detect white regions
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 50))
rectKernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 20))
dilated = cv2.dilate(erosion, rectKernel)
opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, rectKernel2)
closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, rectKernel2)
edged = cv2.Canny(opening, 30, 30, apertureSize=3)
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 600 # angular resolution in radians of the Hough grid
threshold = 10 # minimum number of votes (intersections in Hough grid cell)
min_line_length = 50 # minimum number of pixels making up a line
max_line_gap = 20 # maximum gap in pixels between connectable line segments
line_image = np.copy(temp_image) * 0 # creating a blank to draw lines on
minLineLength = 100
maxLineGap = 10
lines = cv2.HoughLinesP(edged, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(line_image,(x1,y1),(x2,y2),(255,255,255),20)
diff_x = abs(x1 - x2)
diff_y = abs(y1 - y2)
if(diff_y <= diff_x):
cv2.line(line_image,(x1,y1),(x2,y1),(0,255,0),5)
else:
cv2.line(line_image,(x1,y1),(x1,y2),(0,0,255),5)
lines_edges = cv2.addWeighted(temp_image, 0.8, line_image, 1, 0)
result = bw_scanner(scanned)
output = Image.fromarray(result)
return result