|
|
|
""" |
|
Include functions for normalizing images of words and letters |
|
Main functions: word_normalization, letter_normalization, image_standardization |
|
""" |
|
import numpy as np |
|
import cv2 |
|
import math |
|
|
|
from .helpers import * |
|
|
|
|
|
def image_standardization(image): |
|
"""Image standardization should result in same output |
|
as tf.image.per_image_standardization. |
|
""" |
|
return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size)) |
|
|
|
|
|
def _crop_add_border(img, height, threshold=50, border=True, border_size=15): |
|
"""Crop and add border to word image of letter segmentation.""" |
|
|
|
|
|
ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO) |
|
|
|
x0 = 0 |
|
y0 = 0 |
|
x1 = img.shape[1] |
|
y1 = img.shape[0] |
|
|
|
for i in range(img.shape[0]): |
|
if np.count_nonzero(img[i, :]) > 1: |
|
y0 = i |
|
break |
|
for i in reversed(range(img.shape[0])): |
|
if np.count_nonzero(img[i, :]) > 1: |
|
y1 = i+1 |
|
break |
|
for i in range(img.shape[1]): |
|
if np.count_nonzero(img[:, i]) > 1: |
|
x0 = i |
|
break |
|
for i in reversed(range(img.shape[1])): |
|
if np.count_nonzero(img[:, i]) > 1: |
|
x1 = i+1 |
|
break |
|
|
|
if height != 0: |
|
img = resize(img[y0:y1, x0:x1], height, True) |
|
else: |
|
img = img[y0:y1, x0:x1] |
|
|
|
if border: |
|
return cv2.copyMakeBorder(img, 0, 0, border_size, border_size, |
|
cv2.BORDER_CONSTANT, |
|
value=[0, 0, 0]) |
|
return img |
|
|
|
|
|
def _word_tilt(img, height, border=True, border_size=15): |
|
"""Detect the angle and tilt the image.""" |
|
edges = cv2.Canny(img, 50, 150, apertureSize = 3) |
|
lines = cv2.HoughLines(edges, 1, np.pi/180, 30) |
|
|
|
if lines is not None: |
|
meanAngle = 0 |
|
|
|
numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6) |
|
if numLines > 1: |
|
meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6]) |
|
|
|
|
|
if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6): |
|
img = _tilt_by_angle(img, meanAngle, height) |
|
return _crop_add_border(img, height, 50, border, border_size) |
|
|
|
|
|
def _tilt_by_angle(img, angle, height): |
|
"""Tilt the image by given angle.""" |
|
dist = np.tan(angle) * height |
|
width = len(img[0]) |
|
sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]]) |
|
|
|
|
|
|
|
if dist > 0: |
|
tPoints = np.float32([[0,0], |
|
[dist,height], |
|
[width+dist,height], |
|
[width,0]]) |
|
else: |
|
tPoints = np.float32([[-dist,0], |
|
[0,height], |
|
[width,height], |
|
[width-dist,0]]) |
|
|
|
M = cv2.getPerspectiveTransform(sPoints, tPoints) |
|
return cv2.warpPerspective(img, M, (int(width+abs(dist)), height)) |
|
|
|
|
|
def _sobel_detect(channel): |
|
"""The Sobel Operator.""" |
|
sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0) |
|
sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1) |
|
|
|
sobel = np.hypot(sobelX, sobelY) |
|
sobel[sobel > 255] = 255 |
|
return np.uint8(sobel) |
|
|
|
|
|
class HysterThresh: |
|
def __init__(self, img): |
|
img = 255 - img |
|
img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255 |
|
hist, bins = np.histogram(img.ravel(), 256, [0,256]) |
|
|
|
self.high = np.argmax(hist) + 65 |
|
self.low = np.argmax(hist) + 45 |
|
self.diff = 255 - self.high |
|
|
|
self.img = img |
|
self.im = np.zeros(img.shape, dtype=img.dtype) |
|
|
|
def get_image(self): |
|
self._hyster() |
|
return np.uint8(self.im) |
|
|
|
def _hyster_rec(self, r, c): |
|
h, w = self.img.shape |
|
for ri in range(r-1, r+2): |
|
for ci in range(c-1, c+2): |
|
if (h > ri >= 0 |
|
and w > ci >= 0 |
|
and self.im[ri, ci] == 0 |
|
and self.high > self.img[ri, ci] >= self.low): |
|
self.im[ri, ci] = self.img[ri, ci] + self.diff |
|
self._hyster_rec(ri, ci) |
|
|
|
def _hyster(self): |
|
r, c = self.img.shape |
|
for ri in range(r): |
|
for ci in range(c): |
|
if (self.img[ri, ci] >= self.high): |
|
self.im[ri, ci] = 255 |
|
self.img[ri, ci] = 255 |
|
self._hyster_rec(ri, ci) |
|
|
|
|
|
def _hyst_word_norm(image): |
|
"""Word normalization using hystheresis thresholding.""" |
|
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
|
|
|
img = cv2.bilateralFilter(gray, 10, 10, 30) |
|
return HysterThresh(img).get_image() |
|
|
|
|
|
def word_normalization(image, height, border=True, tilt=True, border_size=15, hyst_norm=False): |
|
""" Preprocess a word - resize, binarize, tilt world.""" |
|
image = resize(image, height, True) |
|
|
|
if hyst_norm: |
|
th = _hyst_word_norm(image) |
|
else: |
|
img = cv2.bilateralFilter(image, 10, 30, 30) |
|
gray = 255 - cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) |
|
norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX) |
|
ret,th = cv2.threshold(norm, 50, 255, cv2.THRESH_TOZERO) |
|
|
|
if tilt: |
|
return _word_tilt(th, height, border, border_size) |
|
return _crop_add_border(th, height=height, border=border, border_size=border_size) |
|
|
|
|
|
def _resize_letter(img, size = 56): |
|
"""Resize bigger side of the image to given size.""" |
|
if (img.shape[0] > img.shape[1]): |
|
rat = size / img.shape[0] |
|
return cv2.resize(img, (int(rat * img.shape[1]), size)) |
|
else: |
|
rat = size / img.shape[1] |
|
return cv2.resize(img, (size, int(rat * img.shape[0]))) |
|
return img |
|
|
|
|
|
def letter_normalization(image, is_thresh=True, dim=False): |
|
"""Preprocess a letter - crop, resize""" |
|
if is_thresh and image.shape[0] > 0 and image.shape[1] > 0: |
|
image = _crop_add_border(image, height=0, threshold=80, border=False) |
|
|
|
resized = image |
|
if image.shape[0] > 1 and image.shape[1] > 1: |
|
resized = _resize_letter(image) |
|
|
|
result = np.zeros((64, 64), np.uint8) |
|
offset = [0, 0] |
|
|
|
if image.shape[0] > image.shape[1]: |
|
offset = [int((result.shape[1] - resized.shape[1])/2), 4] |
|
else: |
|
offset = [4, int((result.shape[0] - resized.shape[0])/2)] |
|
|
|
result[offset[1]:offset[1] + resized.shape[0], |
|
offset[0]:offset[0] + resized.shape[1]] = resized |
|
|
|
if dim: |
|
return result, image.shape |
|
return result |
|
|