Spaces:

tomofi
/

NDLOCR

Build error

NDLOCR / src /separate_pages_ssd /ssd_tools /ssd_utils.py

Your Name

Add files

c9019cd about 2 years ago

No virus

10.6 kB

	"""Some utils for SSD."""

	import numpy as np
	import tensorflow as tf


	class BBoxUtility(object):
	"""Utility class to do some stuff with bounding boxes and priors.

	# Arguments
	num_classes: Number of classes including background.
	priors: Priors and variances, numpy tensor of shape (num_priors, 8),
	priors[i] = [xmin, ymin, xmax, ymax, varxc, varyc, varw, varh].
	overlap_threshold: Threshold to assign box to a prior.
	nms_thresh: Nms threshold.
	top_k: Number of total bboxes to be kept per image after nms step.

	# References
	https://arxiv.org/abs/1512.02325
	"""
	# TODO add setter methods for nms_thresh and top_K
	def __init__(self, num_classes, priors=None, overlap_threshold=0.5,
	nms_thresh=0.45, top_k=400):
	self.num_classes = num_classes
	self.priors = priors
	self.num_priors = 0 if priors is None else len(priors)
	self.overlap_threshold = overlap_threshold
	self._nms_thresh = nms_thresh
	self._top_k = top_k
	self.boxes = tf.placeholder(dtype='float32', shape=(None, 4))
	self.scores = tf.placeholder(dtype='float32', shape=(None,))
	self.nms = tf.image.non_max_suppression(self.boxes, self.scores,
	self._top_k,
	iou_threshold=self._nms_thresh)
	self.sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 0}))

	@property
	def nms_thresh(self):
	return self._nms_thresh

	@nms_thresh.setter
	def nms_thresh(self, value):
	self._nms_thresh = value
	self.nms = tf.image.non_max_suppression(self.boxes, self.scores,
	self._top_k,
	iou_threshold=self._nms_thresh)

	@property
	def top_k(self):
	return self._top_k

	@top_k.setter
	def top_k(self, value):
	self._top_k = value
	self.nms = tf.image.non_max_suppression(self.boxes, self.scores,
	self._top_k,
	iou_threshold=self._nms_thresh)

	def iou(self, box):
	"""Compute intersection over union for the box with all priors.

	# Arguments
	box: Box, numpy tensor of shape (4,).

	# Return
	iou: Intersection over union,
	numpy tensor of shape (num_priors).
	"""
	# compute intersection
	inter_upleft = np.maximum(self.priors[:, :2], box[:2])
	inter_botright = np.minimum(self.priors[:, 2:4], box[2:])
	inter_wh = inter_botright - inter_upleft
	inter_wh = np.maximum(inter_wh, 0)
	inter = inter_wh[:, 0] * inter_wh[:, 1]
	# compute union
	area_pred = (box[2] - box[0]) * (box[3] - box[1])
	area_gt = (self.priors[:, 2] - self.priors[:, 0])
	area_gt *= (self.priors[:, 3] - self.priors[:, 1])
	union = area_pred + area_gt - inter
	# compute iou
	iou = inter / union
	return iou

	def encode_box(self, box, return_iou=True):
	"""Encode box for training, do it only for assigned priors.

	# Arguments
	box: Box, numpy tensor of shape (4,).
	return_iou: Whether to concat iou to encoded values.

	# Return
	encoded_box: Tensor with encoded box
	numpy tensor of shape (num_priors, 4 + int(return_iou)).
	"""
	iou = self.iou(box)
	encoded_box = np.zeros((self.num_priors, 4 + return_iou))
	assign_mask = iou > self.overlap_threshold
	if not assign_mask.any():
	assign_mask[iou.argmax()] = True
	if return_iou:
	encoded_box[:, -1][assign_mask] = iou[assign_mask]
	assigned_priors = self.priors[assign_mask]
	box_center = 0.5 * (box[:2] + box[2:])
	box_wh = box[2:] - box[:2]
	assigned_priors_center = 0.5 * (assigned_priors[:, :2] +
	assigned_priors[:, 2:4])
	assigned_priors_wh = (assigned_priors[:, 2:4] -
	assigned_priors[:, :2])
	# we encode variance
	encoded_box[:, :2][assign_mask] = box_center - assigned_priors_center
	encoded_box[:, :2][assign_mask] /= assigned_priors_wh
	encoded_box[:, :2][assign_mask] /= assigned_priors[:, -4:-2]
	encoded_box[:, 2:4][assign_mask] = np.log(box_wh /
	assigned_priors_wh)
	encoded_box[:, 2:4][assign_mask] /= assigned_priors[:, -2:]
	return encoded_box.ravel()

	def assign_boxes(self, boxes):
	"""Assign boxes to priors for training.

	# Arguments
	boxes: Box, numpy tensor of shape (num_boxes, 4 + num_classes),
	num_classes without background.

	# Return
	assignment: Tensor with assigned boxes,
	numpy tensor of shape (num_boxes, 4 + num_classes + 8),
	priors in ground truth are fictitious,
	assignment[:, -8] has 1 if prior should be penalized
	or in other words is assigned to some ground truth box,
	assignment[:, -7:] are all 0. See loss for more details.
	"""
	assignment = np.zeros((self.num_priors, 4 + self.num_classes + 8))
	assignment[:, 4] = 1.0
	if len(boxes) == 0:
	return assignment
	encoded_boxes = np.apply_along_axis(self.encode_box, 1, boxes[:, :4])
	encoded_boxes = encoded_boxes.reshape(-1, self.num_priors, 5)
	best_iou = encoded_boxes[:, :, -1].max(axis=0)
	best_iou_idx = encoded_boxes[:, :, -1].argmax(axis=0)
	best_iou_mask = best_iou > 0
	best_iou_idx = best_iou_idx[best_iou_mask]
	assign_num = len(best_iou_idx)
	encoded_boxes = encoded_boxes[:, best_iou_mask, :]
	assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx,
	np.arange(assign_num),
	:4]
	assignment[:, 4][best_iou_mask] = 0
	assignment[:, 5:-8][best_iou_mask] = boxes[best_iou_idx, 4:]
	assignment[:, -8][best_iou_mask] = 1
	return assignment

	def decode_boxes(self, mbox_loc, mbox_priorbox, variances):
	"""Convert bboxes from local predictions to shifted priors.

	# Arguments
	mbox_loc: Numpy array of predicted locations.
	mbox_priorbox: Numpy array of prior boxes.
	variances: Numpy array of variances.

	# Return
	decode_bbox: Shifted priors.
	"""
	prior_width = mbox_priorbox[:, 2] - mbox_priorbox[:, 0]
	prior_height = mbox_priorbox[:, 3] - mbox_priorbox[:, 1]
	prior_center_x = 0.5 * (mbox_priorbox[:, 2] + mbox_priorbox[:, 0])
	prior_center_y = 0.5 * (mbox_priorbox[:, 3] + mbox_priorbox[:, 1])
	decode_bbox_center_x = mbox_loc[:, 0] * prior_width * variances[:, 0]
	decode_bbox_center_x += prior_center_x
	decode_bbox_center_y = mbox_loc[:, 1] * prior_width * variances[:, 1]
	decode_bbox_center_y += prior_center_y
	decode_bbox_width = np.exp(mbox_loc[:, 2] * variances[:, 2])
	decode_bbox_width *= prior_width
	decode_bbox_height = np.exp(mbox_loc[:, 3] * variances[:, 3])
	decode_bbox_height *= prior_height
	decode_bbox_xmin = decode_bbox_center_x - 0.5 * decode_bbox_width
	decode_bbox_ymin = decode_bbox_center_y - 0.5 * decode_bbox_height
	decode_bbox_xmax = decode_bbox_center_x + 0.5 * decode_bbox_width
	decode_bbox_ymax = decode_bbox_center_y + 0.5 * decode_bbox_height
	decode_bbox = np.concatenate((decode_bbox_xmin[:, None],
	decode_bbox_ymin[:, None],
	decode_bbox_xmax[:, None],
	decode_bbox_ymax[:, None]), axis=-1)
	decode_bbox = np.minimum(np.maximum(decode_bbox, 0.0), 1.0)
	return decode_bbox

	def detection_out(self, predictions, background_label_id=0, keep_top_k=200,
	confidence_threshold=0.01):
	"""Do non maximum suppression (nms) on prediction results.

	# Arguments
	predictions: Numpy array of predicted values.
	num_classes: Number of classes for prediction.
	background_label_id: Label of background class.
	keep_top_k: Number of total bboxes to be kept per image
	after nms step.
	confidence_threshold: Only consider detections,
	whose confidences are larger than a threshold.

	# Return
	results: List of predictions for every picture. Each prediction is:
	[label, confidence, xmin, ymin, xmax, ymax]
	"""
	mbox_loc = predictions[:, :, :4]
	variances = predictions[:, :, -4:]
	mbox_priorbox = predictions[:, :, -8:-4]
	mbox_conf = predictions[:, :, 4:-8]
	results = []
	for i in range(len(mbox_loc)):
	results.append([])
	decode_bbox = self.decode_boxes(mbox_loc[i],
	mbox_priorbox[i], variances[i])
	for c in range(self.num_classes):
	if c == background_label_id:
	continue
	c_confs = mbox_conf[i, :, c]
	c_confs_m = c_confs > confidence_threshold
	if len(c_confs[c_confs_m]) > 0:
	boxes_to_process = decode_bbox[c_confs_m]
	confs_to_process = c_confs[c_confs_m]
	feed_dict = {self.boxes: boxes_to_process,
	self.scores: confs_to_process}
	idx = self.sess.run(self.nms, feed_dict=feed_dict)
	good_boxes = boxes_to_process[idx]
	confs = confs_to_process[idx][:, None]
	labels = c * np.ones((len(idx), 1))
	c_pred = np.concatenate((labels, confs, good_boxes),
	axis=1)
	results[-1].extend(c_pred)
	if len(results[-1]) > 0:
	results[-1] = np.array(results[-1])
	argsort = np.argsort(results[-1][:, 1])[::-1]
	results[-1] = results[-1][argsort]
	results[-1] = results[-1][:keep_top_k]
	return results