CTRNetDemo

Runtime error

App Files Files Community

CTRNetDemo / rapid_ch_det /utils.py

widness

Duplicate from SWHL/CTRNetDemo

58a3f69 over 1 year ago

raw

history blame

15.8 kB

	"""
	# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	# -- encoding: utf-8 --
	# @Author: SWHL
	# @Contact: liekkaskono@163.com
	import sys
	import warnings
	from pathlib import Path

	import cv2
	import numpy as np
	import pyclipper
	import six
	import yaml
	from onnxruntime import (GraphOptimizationLevel, InferenceSession,
	SessionOptions, get_available_providers, get_device)
	from shapely.geometry import Polygon

	root_dir = Path(__file__).resolve().parent.parent


	class OrtInferSession():
	def __init__(self, config):
	sess_opt = SessionOptions()
	sess_opt.log_severity_level = 4
	sess_opt.enable_cpu_mem_arena = False
	sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL

	cuda_ep = 'CUDAExecutionProvider'
	cpu_ep = 'CPUExecutionProvider'
	cpu_provider_options = {
	"arena_extend_strategy": "kSameAsRequested",
	}

	EP_list = []
	if config['use_cuda'] and get_device() == 'GPU' \
	and cuda_ep in get_available_providers():
	EP_list = [(cuda_ep, config[cuda_ep])]
	EP_list.append((cpu_ep, cpu_provider_options))

	config['model_path'] = str(root_dir / config['model_path'])
	self._verify_model(config['model_path'])
	self.session = InferenceSession(config['model_path'],
	sess_options=sess_opt,
	providers=EP_list)

	if config['use_cuda'] and cuda_ep not in self.session.get_providers():
	warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
	'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
	'you can check their relations from the offical web site: '
	'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
	RuntimeWarning)

	def get_input_name(self, input_idx=0):
	return self.session.get_inputs()[input_idx].name

	def get_output_name(self, output_idx=0):
	return self.session.get_outputs()[output_idx].name

	@staticmethod
	def _verify_model(model_path):
	model_path = Path(model_path)
	if not model_path.exists():
	raise FileNotFoundError(f'{model_path} does not exists.')
	if not model_path.is_file():
	raise FileExistsError(f'{model_path} is not a file.')


	def read_yaml(yaml_path):
	with open(yaml_path, 'rb') as f:
	data = yaml.load(f, Loader=yaml.Loader)
	return data


	class DecodeImage():
	""" decode image """

	def __init__(self, img_mode='RGB', channel_first=False):
	self.img_mode = img_mode
	self.channel_first = channel_first

	def __call__(self, data):
	img = data['image']
	if six.PY2:
	assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage"
	else:
	assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"

	img = np.frombuffer(img, dtype='uint8')
	img = cv2.imdecode(img, 1)
	if img is None:
	return None

	if self.img_mode == 'GRAY':
	img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
	elif self.img_mode == 'RGB':
	assert img.shape[2] == 3, f'invalid shape of image[{img.shape}]'
	img = img[:, :, ::-1]

	if self.channel_first:
	img = img.transpose((2, 0, 1))
	data['image'] = img
	return data


	class NormalizeImage():
	""" normalize image such as substract mean, divide std"""

	def __init__(self, scale=None, mean=None, std=None, order='chw'):
	if isinstance(scale, str):
	scale = eval(scale)
	self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
	mean = mean if mean is not None else [0.485, 0.456, 0.406]
	std = std if std is not None else [0.229, 0.224, 0.225]

	shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
	self.mean = np.array(mean).reshape(shape).astype('float32')
	self.std = np.array(std).reshape(shape).astype('float32')

	def __call__(self, data):
	img = np.array(data['image']).astype(np.float32)
	data['image'] = (img * self.scale - self.mean) / self.std
	return data


	class ToCHWImage():
	""" convert hwc image to chw image"""
	def __init__(self):
	pass

	def __call__(self, data):
	img = np.array(data['image'])
	data['image'] = img.transpose((2, 0, 1))
	return data


	class KeepKeys():
	def __init__(self, keep_keys):
	self.keep_keys = keep_keys

	def __call__(self, data):
	data_list = []
	for key in self.keep_keys:
	data_list.append(data[key])
	return data_list


	class DetResizeForTest():
	def __init__(self, **kwargs):
	super(DetResizeForTest, self).__init__()
	self.resize_type = 0
	if 'image_shape' in kwargs:
	self.image_shape = kwargs['image_shape']
	self.resize_type = 1
	elif 'limit_side_len' in kwargs:
	self.limit_side_len = kwargs.get('limit_side_len', 736)
	self.limit_type = kwargs.get('limit_type', 'min')

	if 'resize_long' in kwargs:
	self.resize_type = 2
	self.resize_long = kwargs.get('resize_long', 960)
	else:
	self.limit_side_len = kwargs.get('limit_side_len', 736)
	self.limit_type = kwargs.get('limit_type', 'min')

	def __call__(self, data):
	img = data['image']
	src_h, src_w = img.shape[:2]

	if self.resize_type == 0:
	# img, shape = self.resize_image_type0(img)
	img, [ratio_h, ratio_w] = self.resize_image_type0(img)
	elif self.resize_type == 2:
	img, [ratio_h, ratio_w] = self.resize_image_type2(img)
	else:
	# img, shape = self.resize_image_type1(img)
	img, [ratio_h, ratio_w] = self.resize_image_type1(img)
	data['image'] = img
	data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
	return data

	def resize_image_type1(self, img):
	resize_h, resize_w = self.image_shape
	ori_h, ori_w = img.shape[:2] # (h, w, c)
	ratio_h = float(resize_h) / ori_h
	ratio_w = float(resize_w) / ori_w
	img = cv2.resize(img, (int(resize_w), int(resize_h)))
	# return img, np.array([ori_h, ori_w])
	return img, [ratio_h, ratio_w]

	def resize_image_type0(self, img):
	"""
	resize image to a size multiple of 32 which is required by the network
	args:
	img(array): array with shape [h, w, c]
	return(tuple):
	img, (ratio_h, ratio_w)
	"""
	limit_side_len = self.limit_side_len
	h, w = img.shape[:2]

	# limit the max side
	if self.limit_type == 'max':
	if max(h, w) > limit_side_len:
	if h > w:
	ratio = float(limit_side_len) / h
	else:
	ratio = float(limit_side_len) / w
	else:
	ratio = 1.
	else:
	if min(h, w) < limit_side_len:
	if h < w:
	ratio = float(limit_side_len) / h
	else:
	ratio = float(limit_side_len) / w
	else:
	ratio = 1.
	resize_h = int(h * ratio)
	resize_w = int(w * ratio)

	resize_h = int(round(resize_h / 32) * 32)
	resize_w = int(round(resize_w / 32) * 32)

	try:
	if int(resize_w) <= 0 or int(resize_h) <= 0:
	return None, (None, None)
	img = cv2.resize(img, (int(resize_w), int(resize_h)))
	except:
	print(img.shape, resize_w, resize_h)
	sys.exit(0)
	ratio_h = resize_h / float(h)
	ratio_w = resize_w / float(w)
	return img, [ratio_h, ratio_w]

	def resize_image_type2(self, img):
	h, w = img.shape[:2]

	resize_w = w
	resize_h = h

	# Fix the longer side
	if resize_h > resize_w:
	ratio = float(self.resize_long) / resize_h
	else:
	ratio = float(self.resize_long) / resize_w

	resize_h = int(resize_h * ratio)
	resize_w = int(resize_w * ratio)

	max_stride = 128
	resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
	resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
	img = cv2.resize(img, (int(resize_w), int(resize_h)))
	ratio_h = resize_h / float(h)
	ratio_w = resize_w / float(w)

	return img, [ratio_h, ratio_w]


	def transform(data, ops=None):
	""" transform """
	if ops is None:
	ops = []

	for op in ops:
	data = op(data)
	if data is None:
	return None
	return data


	def create_operators(op_param_dict):
	"""
	create operators based on the config
	"""
	ops = []
	for op_name, param in op_param_dict.items():
	if param is None:
	param = {}
	op = eval(op_name)(**param)
	ops.append(op)
	return ops


	def draw_text_det_res(dt_boxes, img_path):
	src_im = cv2.imread(img_path)
	for box in dt_boxes:
	box = np.array(box).astype(np.int32).reshape(-1, 2)
	cv2.polylines(src_im, [box], True,
	color=(255, 255, 0), thickness=2)
	return src_im


	class DBPostProcess():
	"""The post process for Differentiable Binarization (DB)."""

	def __init__(self,
	thresh=0.3,
	box_thresh=0.7,
	max_candidates=1000,
	unclip_ratio=2.0,
	score_mode="fast",
	use_dilation=False):
	self.thresh = thresh
	self.box_thresh = box_thresh
	self.max_candidates = max_candidates
	self.unclip_ratio = unclip_ratio
	self.min_size = 3
	self.score_mode = score_mode

	if use_dilation:
	self.dilation_kernel = np.array([[1, 1], [1, 1]])
	else:
	self.dilation_kernel = None

	def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
	'''
	_bitmap: single map with shape (1, H, W),
	whose values are binarized as {0, 1}
	'''

	bitmap = _bitmap
	height, width = bitmap.shape

	outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
	cv2.CHAIN_APPROX_SIMPLE)
	if len(outs) == 3:
	img, contours, _ = outs[0], outs[1], outs[2]
	elif len(outs) == 2:
	contours, _ = outs[0], outs[1]

	num_contours = min(len(contours), self.max_candidates)

	boxes = []
	scores = []
	for index in range(num_contours):
	contour = contours[index]
	points, sside = self.get_mini_boxes(contour)
	if sside < self.min_size:
	continue
	points = np.array(points)
	if self.score_mode == "fast":
	score = self.box_score_fast(pred, points.reshape(-1, 2))
	else:
	score = self.box_score_slow(pred, contour)
	if self.box_thresh > score:
	continue

	box = self.unclip(points).reshape(-1, 1, 2)
	box, sside = self.get_mini_boxes(box)
	if sside < self.min_size + 2:
	continue
	box = np.array(box)

	box[:, 0] = np.clip(
	np.round(box[:, 0] / width * dest_width), 0, dest_width)
	box[:, 1] = np.clip(
	np.round(box[:, 1] / height * dest_height), 0, dest_height)
	boxes.append(box.astype(np.int16))
	scores.append(score)
	return np.array(boxes, dtype=np.int16), scores

	def unclip(self, box):
	unclip_ratio = self.unclip_ratio
	poly = Polygon(box)
	distance = poly.area * unclip_ratio / poly.length
	offset = pyclipper.PyclipperOffset()
	offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
	expanded = np.array(offset.Execute(distance))
	return expanded

	def get_mini_boxes(self, contour):
	bounding_box = cv2.minAreaRect(contour)
	points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])

	index_1, index_2, index_3, index_4 = 0, 1, 2, 3
	if points[1][1] > points[0][1]:
	index_1 = 0
	index_4 = 1
	else:
	index_1 = 1
	index_4 = 0
	if points[3][1] > points[2][1]:
	index_2 = 2
	index_3 = 3
	else:
	index_2 = 3
	index_3 = 2

	box = [
	points[index_1], points[index_2], points[index_3], points[index_4]
	]
	return box, min(bounding_box[1])

	def box_score_fast(self, bitmap, _box):
	h, w = bitmap.shape[:2]
	box = _box.copy()
	xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
	xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
	ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
	ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)

	mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
	box[:, 0] = box[:, 0] - xmin
	box[:, 1] = box[:, 1] - ymin
	cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
	return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]

	def box_score_slow(self, bitmap, contour):
	'''
	box_score_slow: use polyon mean score as the mean score
	'''
	h, w = bitmap.shape[:2]
	contour = contour.copy()
	contour = np.reshape(contour, (-1, 2))

	xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
	xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
	ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
	ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)

	mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)

	contour[:, 0] = contour[:, 0] - xmin
	contour[:, 1] = contour[:, 1] - ymin

	cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
	return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]

	def __call__(self, pred, shape_list):
	pred = pred[:, 0, :, :]
	segmentation = pred > self.thresh

	boxes_batch = []
	for batch_index in range(pred.shape[0]):
	src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
	if self.dilation_kernel is not None:
	mask = cv2.dilate(
	np.array(segmentation[batch_index]).astype(np.uint8),
	self.dilation_kernel)
	else:
	mask = segmentation[batch_index]
	boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
	src_w, src_h)

	boxes_batch.append({'points': boxes})
	return boxes_batch