Spaces:

Parechan
/

lanpip

Runtime error

App Files Files Community

lanpip / test /ocr /normalization.py

Parechan

Upload 35 files

0c84ee8 verified about 1 year ago

raw

history blame contribute delete

6.83 kB

	# -- coding: utf-8 --
	"""
	Include functions for normalizing images of words and letters
	Main functions: word_normalization, letter_normalization, image_standardization
	"""
	import numpy as np
	import cv2
	import math

	from .helpers import *


	def image_standardization(image):
	"""Image standardization should result in same output
	as tf.image.per_image_standardization.
	"""
	return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size))


	def _crop_add_border(img, height, threshold=50, border=True, border_size=15):
	"""Crop and add border to word image of letter segmentation."""
	# Clear small values

	ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO)

	x0 = 0
	y0 = 0
	x1 = img.shape[1]
	y1 = img.shape[0]

	for i in range(img.shape[0]):
	if np.count_nonzero(img[i, :]) > 1:
	y0 = i
	break
	for i in reversed(range(img.shape[0])):
	if np.count_nonzero(img[i, :]) > 1:
	y1 = i+1
	break
	for i in range(img.shape[1]):
	if np.count_nonzero(img[:, i]) > 1:
	x0 = i
	break
	for i in reversed(range(img.shape[1])):
	if np.count_nonzero(img[:, i]) > 1:
	x1 = i+1
	break

	if height != 0:
	img = resize(img[y0:y1, x0:x1], height, True)
	else:
	img = img[y0:y1, x0:x1]

	if border:
	return cv2.copyMakeBorder(img, 0, 0, border_size, border_size,
	cv2.BORDER_CONSTANT,
	value=[0, 0, 0])
	return img


	def _word_tilt(img, height, border=True, border_size=15):
	"""Detect the angle and tilt the image."""
	edges = cv2.Canny(img, 50, 150, apertureSize = 3)
	lines = cv2.HoughLines(edges, 1, np.pi/180, 30)

	if lines is not None:
	meanAngle = 0
	# Set min number of valid lines (try higher)
	numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6)
	if numLines > 1:
	meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6])

	# Look for angle with correct value
	if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6):
	img = _tilt_by_angle(img, meanAngle, height)
	return _crop_add_border(img, height, 50, border, border_size)


	def _tilt_by_angle(img, angle, height):
	"""Tilt the image by given angle."""
	dist = np.tan(angle) * height
	width = len(img[0])
	sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]])

	# Dist is positive for angle < 0.7; negative for angle > 2.6
	# Image must be shifed to right
	if dist > 0:
	tPoints = np.float32([[0,0],
	[dist,height],
	[width+dist,height],
	[width,0]])
	else:
	tPoints = np.float32([[-dist,0],
	[0,height],
	[width,height],
	[width-dist,0]])

	M = cv2.getPerspectiveTransform(sPoints, tPoints)
	return cv2.warpPerspective(img, M, (int(width+abs(dist)), height))


	def _sobel_detect(channel):
	"""The Sobel Operator."""
	sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0)
	sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1)
	# Combine x, y gradient magnitudes sqrt(x^2 + y^2)
	sobel = np.hypot(sobelX, sobelY)
	sobel[sobel > 255] = 255
	return np.uint8(sobel)


	class HysterThresh:
	def __init__(self, img):
	img = 255 - img
	img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255
	hist, bins = np.histogram(img.ravel(), 256, [0,256])

	self.high = np.argmax(hist) + 65
	self.low = np.argmax(hist) + 45
	self.diff = 255 - self.high

	self.img = img
	self.im = np.zeros(img.shape, dtype=img.dtype)

	def get_image(self):
	self._hyster()
	return np.uint8(self.im)

	def _hyster_rec(self, r, c):
	h, w = self.img.shape
	for ri in range(r-1, r+2):
	for ci in range(c-1, c+2):
	if (h > ri >= 0
	and w > ci >= 0
	and self.im[ri, ci] == 0
	and self.high > self.img[ri, ci] >= self.low):
	self.im[ri, ci] = self.img[ri, ci] + self.diff
	self._hyster_rec(ri, ci)

	def _hyster(self):
	r, c = self.img.shape
	for ri in range(r):
	for ci in range(c):
	if (self.img[ri, ci] >= self.high):
	self.im[ri, ci] = 255
	self.img[ri, ci] = 255
	self._hyster_rec(ri, ci)


	def _hyst_word_norm(image):
	"""Word normalization using hystheresis thresholding."""
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	# img = cv2.bilateralFilter(gray, 0, 10, 30)
	img = cv2.bilateralFilter(gray, 10, 10, 30)
	return HysterThresh(img).get_image()


	def word_normalization(image, height, border=True, tilt=True, border_size=15, hyst_norm=False):
	""" Preprocess a word - resize, binarize, tilt world."""
	image = resize(image, height, True)

	if hyst_norm:
	th = _hyst_word_norm(image)
	else:
	img = cv2.bilateralFilter(image, 10, 30, 30)
	gray = 255 - cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
	norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
	ret,th = cv2.threshold(norm, 50, 255, cv2.THRESH_TOZERO)

	if tilt:
	return _word_tilt(th, height, border, border_size)
	return _crop_add_border(th, height=height, border=border, border_size=border_size)


	def _resize_letter(img, size = 56):
	"""Resize bigger side of the image to given size."""
	if (img.shape[0] > img.shape[1]):
	rat = size / img.shape[0]
	return cv2.resize(img, (int(rat * img.shape[1]), size))
	else:
	rat = size / img.shape[1]
	return cv2.resize(img, (size, int(rat * img.shape[0])))
	return img


	def letter_normalization(image, is_thresh=True, dim=False):
	"""Preprocess a letter - crop, resize"""
	if is_thresh and image.shape[0] > 0 and image.shape[1] > 0:
	image = _crop_add_border(image, height=0, threshold=80, border=False)

	resized = image
	if image.shape[0] > 1 and image.shape[1] > 1:
	resized = _resize_letter(image)

	result = np.zeros((64, 64), np.uint8)
	offset = [0, 0]
	# Calculate offset for smaller size
	if image.shape[0] > image.shape[1]:
	offset = [int((result.shape[1] - resized.shape[1])/2), 4]
	else:
	offset = [4, int((result.shape[0] - resized.shape[0])/2)]
	# Replace zeros by image
	result[offset[1]:offset[1] + resized.shape[0],
	offset[0]:offset[0] + resized.shape[1]] = resized

	if dim:
	return result, image.shape
	return result