|
|
|
""" |
|
Detect words on the page |
|
return array of words' bounding boxes |
|
""" |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
|
|
from .helpers import * |
|
|
|
|
|
def detection(image, join=False): |
|
"""Detecting the words bounding boxes. |
|
Return: numpy array of bounding boxes [x, y, x+w, y+h] |
|
""" |
|
|
|
blurred = cv2.GaussianBlur(image, (5, 5), 18) |
|
edge_img = _edge_detect(blurred) |
|
ret, edge_img = cv2.threshold(edge_img, 50, 255, cv2.THRESH_BINARY) |
|
bw_img = cv2.morphologyEx(edge_img, cv2.MORPH_CLOSE, |
|
np.ones((15,15), np.uint8)) |
|
|
|
return _text_detect(bw_img, image, join) |
|
|
|
|
|
def sort_words(boxes): |
|
"""Sort boxes - (x, y, x+w, y+h) from left to right, top to bottom.""" |
|
mean_height = sum([y2 - y1 for _, y1, _, y2 in boxes]) / len(boxes) |
|
|
|
boxes.view('i8,i8,i8,i8').sort(order=['f1'], axis=0) |
|
current_line = boxes[0][1] |
|
lines = [] |
|
tmp_line = [] |
|
for box in boxes: |
|
if box[1] > current_line + mean_height: |
|
lines.append(tmp_line) |
|
tmp_line = [box] |
|
current_line = box[1] |
|
continue |
|
tmp_line.append(box) |
|
lines.append(tmp_line) |
|
|
|
for line in lines: |
|
line.sort(key=lambda box: box[0]) |
|
|
|
return lines |
|
|
|
|
|
def _edge_detect(im): |
|
""" |
|
Edge detection using sobel operator on each layer individually. |
|
Sobel operator is applied for each image layer (RGB) |
|
""" |
|
return np.max(np.array([_sobel_detect(im[:,:, 0]), |
|
_sobel_detect(im[:,:, 1]), |
|
_sobel_detect(im[:,:, 2])]), axis=0) |
|
|
|
|
|
def _sobel_detect(channel): |
|
"""Sobel operator.""" |
|
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0) |
|
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1) |
|
sobel = np.hypot(sobelX, sobelY) |
|
sobel[sobel > 255] = 255 |
|
return np.uint8(sobel) |
|
|
|
|
|
def union(a,b): |
|
x = min(a[0], b[0]) |
|
y = min(a[1], b[1]) |
|
w = max(a[0]+a[2], b[0]+b[2]) - x |
|
h = max(a[1]+a[3], b[1]+b[3]) - y |
|
return [x, y, w, h] |
|
|
|
def _intersect(a,b): |
|
x = max(a[0], b[0]) |
|
y = max(a[1], b[1]) |
|
w = min(a[0]+a[2], b[0]+b[2]) - x |
|
h = min(a[1]+a[3], b[1]+b[3]) - y |
|
if w<0 or h<0: |
|
return False |
|
return True |
|
|
|
def _group_rectangles(rec): |
|
""" |
|
Uion intersecting rectangles. |
|
Args: |
|
rec - list of rectangles in form [x, y, w, h] |
|
Return: |
|
list of grouped ractangles |
|
""" |
|
tested = [False for i in range(len(rec))] |
|
final = [] |
|
i = 0 |
|
while i < len(rec): |
|
if not tested[i]: |
|
j = i+1 |
|
while j < len(rec): |
|
if not tested[j] and _intersect(rec[i], rec[j]): |
|
rec[i] = union(rec[i], rec[j]) |
|
tested[j] = True |
|
j = i |
|
j += 1 |
|
final += [rec[i]] |
|
i += 1 |
|
|
|
return final |
|
|
|
|
|
def _text_detect(img, image, join=False): |
|
"""Text detection using contours.""" |
|
small = resize(img, 2000) |
|
|
|
|
|
mask = np.zeros(small.shape, np.uint8) |
|
cnt, hierarchy = cv2.findContours(np.copy(small), |
|
cv2.RETR_CCOMP, |
|
cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
index = 0 |
|
boxes = [] |
|
|
|
while (index >= 0): |
|
x,y,w,h = cv2.boundingRect(cnt[index]) |
|
cv2.drawContours(mask, cnt, index, (255, 255, 255), cv2.FILLED) |
|
maskROI = mask[y:y+h, x:x+w] |
|
|
|
r = cv2.countNonZero(maskROI) / (w * h) |
|
|
|
|
|
if (r > 0.1 |
|
and 1600 > w > 10 |
|
and 1600 > h > 10 |
|
and h/w < 3 |
|
and w/h < 10 |
|
and (60 // h) * w < 1000): |
|
boxes += [[x, y, w, h]] |
|
|
|
index = hierarchy[0][index][0] |
|
|
|
if join: |
|
|
|
boxes = _group_rectangles(boxes) |
|
|
|
|
|
small = cv2.cvtColor(small, cv2.COLOR_GRAY2RGB) |
|
bounding_boxes = np.array([0,0,0,0]) |
|
for (x, y, w, h) in boxes: |
|
cv2.rectangle(small, (x, y),(x+w,y+h), (0, 255, 0), 2) |
|
bounding_boxes = np.vstack((bounding_boxes, |
|
np.array([x, y, x+w, y+h]))) |
|
|
|
implt(small, t='Bounding rectangles') |
|
|
|
boxes = bounding_boxes.dot(ratio(image, small.shape[0])).astype(np.int64) |
|
return boxes[1:] |
|
|
|
|
|
def textDetectWatershed(thresh): |
|
"""NOT IN USE - Text detection using watershed algorithm. |
|
Based on: http://docs.opencv.org/trunk/d3/db4/tutorial_py_watershed.html |
|
""" |
|
img = cv2.cvtColor(cv2.imread("data/textdet/%s.jpg" % IMG), |
|
cv2.COLOR_BGR2RGB) |
|
img = resize(img, 3000) |
|
thresh = resize(thresh, 3000) |
|
|
|
kernel = np.ones((3,3),np.uint8) |
|
opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 3) |
|
|
|
|
|
sure_bg = cv2.dilate(opening,kernel,iterations=3) |
|
|
|
|
|
dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5) |
|
ret, sure_fg = cv2.threshold(dist_transform, |
|
0.01*dist_transform.max(), 255, 0) |
|
|
|
|
|
sure_fg = np.uint8(sure_fg) |
|
unknown = cv2.subtract(sure_bg,sure_fg) |
|
|
|
|
|
ret, markers = cv2.connectedComponents(sure_fg) |
|
|
|
|
|
markers += 1 |
|
|
|
|
|
markers[unknown == 255] = 0 |
|
|
|
markers = cv2.watershed(img, markers) |
|
implt(markers, t='Markers') |
|
image = img.copy() |
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
for mark in np.unique(markers): |
|
|
|
if mark == 0: |
|
continue |
|
|
|
|
|
mask = np.zeros(gray.shape, dtype="uint8") |
|
mask[markers == mark] = 255 |
|
|
|
cnts = cv2.findContours(mask.copy(), |
|
cv2.RETR_EXTERNAL, |
|
cv2.CHAIN_APPROX_SIMPLE)[-2] |
|
c = max(cnts, key=cv2.contourArea) |
|
|
|
|
|
x,y,w,h = cv2.boundingRect(c) |
|
cv2.drawContours(mask, c, 0, (255, 255, 255), cv2.FILLED) |
|
maskROI = mask[y:y+h, x:x+w] |
|
|
|
r = cv2.countNonZero(maskROI) / (w * h) |
|
|
|
|
|
if r > 0.2 and 2000 > w > 15 and 1500 > h > 15: |
|
cv2.rectangle(image, (x, y),(x+w,y+h), (0, 255, 0), 2) |
|
|
|
implt(image) |
|
|