Spaces:

SWHL
/

CTRNetDemo

Sleeping

App Files Files Community

SWHL commited on Dec 21, 2022

Commit

38a5dfd

1 Parent(s): 809becd

First commit

Browse files

Files changed (13) hide show

.gitignore +5 -0
app.py +43 -0
ctrnet_infer.py +201 -0
images/1.jpg +0 -0
images/2.jpg +0 -0
images/4.jpg +0 -0
models/CTRNet_G.onnx +3 -0
rapid_ch_det/__init__.py +4 -0
rapid_ch_det/config.yaml +29 -0
rapid_ch_det/models/ch_PP-OCRv3_det_infer.onnx +3 -0
rapid_ch_det/text_detect.py +134 -0
rapid_ch_det/utils.py +461 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.vscode
+*.pyc
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+import os
+os.system('pip install -r requirements.txt')
+import cv2
+import gradio as gr
+from ctrnet_infer import CTRNetInfer
+def inference(img):
+    img_path = img.name
+    img = cv2.imread(img_path)
+    pred = ctrnet(img)
+    pred = cv2.cvtColor(pred, cv2.COLOR_BGR2RGB)
+    return pred
+model_path = 'models/CTRNet_G.onnx'
+ctrnet = CTRNetInfer(model_path)
+title = 'CTRNet Demo'
+description = '''This is the demo for the paper “Don't Forget Me: Accurate Background Recovery for Text Removal via Modeling Local-Global Context”. Github Repo: https://github.com/lcy0604/CTRNet'''
+css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
+examples = [['images/1.jpg'], ['images/2.jpg'], ['images/4.jpg']]
+gr.Interface(
+    inference,
+    inputs=[
+        gr.inputs.Image(type='file', label='Input'),
+    ],
+    outputs=[
+        gr.outputs.Image(type='file', label='Output_image'),
+    ],
+    title=title,
+    description=description,
+    examples=examples,
+    css=css,
+    allow_flagging='never',
+    enable_queue=True
+    ).launch(debug=True, enable_queue=True)

ctrnet_infer.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+import copy
+import time
+import cv2
+import numpy as np
+import pyclipper
+from onnxruntime import InferenceSession
+from shapely.geometry import Polygon
+from rapid_ch_det import TextDetector
+class SimpleDataset():
+    def __call__(self, img: np.ndarray, bboxes: np.ndarray):
+        '''
+        bboxes: (N, 4, 2)
+        '''
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        gt_instance = np.zeros(img.shape[:2], dtype='uint8')
+        for i in range(len(bboxes)):
+            cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)
+        gt_text = gt_instance.copy()
+        gt_text[gt_text > 0] = 1
+        gt_text = gt_text[None, None, ...].astype(np.float32)
+        canvas, shrink_mask, mask_ori = self.get_seg_map(img, bboxes)
+        soft_mask = canvas + mask_ori
+        index_mask = np.where(soft_mask > 1)
+        soft_mask[index_mask] = 1
+        soft_mask = soft_mask[None, None, ...].astype(np.float32)
+        img = np.transpose(img, (2, 0, 1)).astype(np.float32) / 255.0
+        img = img[None, ...]
+        structure_im = copy.deepcopy(img)
+        return img, structure_im, gt_text, soft_mask
+    def draw_border_map(self, polygon, canvas, mask_ori, mask):
+        polygon = np.array(polygon)
+        assert polygon.ndim == 2
+        assert polygon.shape[1] == 2
+        ### shrink box ###
+        polygon_shape = Polygon(polygon)
+        distance = polygon_shape.area * \
+            (1 - np.power(0.95, 2)) / polygon_shape.length
+        subject = [tuple(l) for l in polygon]
+        padding = pyclipper.PyclipperOffset()
+        padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        padded_polygon = np.array(padding.Execute(-distance)[0])
+        cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+        ### shrink box ###
+        cv2.fillPoly(mask_ori, [polygon.astype(np.int32)], 1.0)
+        polygon = padded_polygon
+        polygon_shape = Polygon(padded_polygon)
+        distance = polygon_shape.area * \
+            (1 - np.power(0.4, 2)) / polygon_shape.length
+        subject = [tuple(l) for l in polygon]
+        padding = pyclipper.PyclipperOffset()
+        padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        padded_polygon = np.array(padding.Execute(distance)[0])
+        xmin = padded_polygon[:, 0].min()
+        xmax = padded_polygon[:, 0].max()
+        ymin = padded_polygon[:, 1].min()
+        ymax = padded_polygon[:, 1].max()
+        width = xmax - xmin + 1
+        height = ymax - ymin + 1
+        polygon[:, 0] = polygon[:, 0] - xmin
+        polygon[:, 1] = polygon[:, 1] - ymin
+        xs = np.broadcast_to(
+            np.linspace(0, width - 1, num=width).reshape(1, width), (height, width))
+        ys = np.broadcast_to(
+            np.linspace(0, height - 1, num=height).reshape(height, 1), (height, width))
+        distance_map = np.zeros(
+            (polygon.shape[0], height, width), dtype=np.float32)
+        for i in range(polygon.shape[0]):
+            j = (i + 1) % polygon.shape[0]
+            # import pdb;pdb.set_trace()
+            absolute_distance = self.coumpute_distance(xs, ys, polygon[i], polygon[j])
+            distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+        distance_map = distance_map.min(axis=0)
+        xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+        xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+        ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+        ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+            1 - distance_map[
+                ymin_valid-ymin:ymax_valid-ymax+height,
+                xmin_valid-xmin:xmax_valid-xmax+width],
+            canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+    @staticmethod
+    def coumpute_distance(xs, ys, point_1, point_2):
+        '''
+        compute the distance from point to a line
+        ys: coordinates in the first axis
+        xs: coordinates in the second axis
+        point_1, point_2: (x, y), the end of the line
+        '''
+        height, width = xs.shape[:2]
+        square_distance_1 = np.square(
+            xs - point_1[0]) + np.square(ys - point_1[1])
+        square_distance_2 = np.square(
+            xs - point_2[0]) + np.square(ys - point_2[1])
+        square_distance = np.square(
+            point_1[0] - point_2[0]) + np.square(point_1[1] - point_2[1])
+        cosin = (square_distance - square_distance_1 - square_distance_2) / \
+            (2 * np.sqrt(square_distance_1 * square_distance_2) + 1e-50)
+        square_sin = 1 - np.square(cosin)
+        square_sin = np.nan_to_num(square_sin)
+        result = np.sqrt(square_distance_1 * square_distance_2 *
+                        square_sin / square_distance)
+        result[cosin < 0] = np.sqrt(np.fmin(
+            square_distance_1, square_distance_2))[cosin < 0]
+        # extend_line(point_1, point_2, result)
+        return result
+    def get_seg_map(self, img, label):
+        canvas = np.zeros(img.shape[:2], dtype=np.float32)
+        mask = np.zeros(img.shape[:2], dtype=np.float32)
+        mask_ori = np.zeros(img.shape[:2], dtype=np.float32)
+        polygons = label
+        for i in range(len(polygons)):
+            self.draw_border_map(polygons[i], canvas, mask_ori, mask=mask)
+        return canvas, mask, mask_ori
+class CTRNetInfer():
+    def __init__(self, model_path) -> None:
+        self.session = InferenceSession(model_path,
+                                        providers=['CPUExecutionProvider'])
+        self.dataset = SimpleDataset()
+        self.text_det = TextDetector()
+        self.input_shape = (512, 512)
+    def __call__(self, ori_img):
+        ori_img_shape = ori_img.shape[:2]
+        bboxes = self.text_det(ori_img)[0].astype(np.int64)
+        # resize img 到512x512
+        resize_img = cv2.resize(ori_img, self.input_shape,
+                                interpolation=cv2.INTER_LINEAR)
+        resize_bboxes = self.get_resized_points(bboxes,
+                                                ori_img_shape,
+                                                self.input_shape)
+        img, structure_im, gt_text, soft_mask = self.dataset(
+            resize_img, resize_bboxes)
+        input_dict = {
+            'input': img,
+            'gt_text': gt_text,
+            'soft_mask': soft_mask,
+            'structure_im': structure_im
+        }
+        prediction = self.session.run(None, input_dict)[3]
+        withMask_prediction = prediction * soft_mask + img * (1 - soft_mask)
+        withMask_prediction = np.transpose(withMask_prediction, (0, 2, 3, 1)) * 255
+        withMask_prediction = withMask_prediction.squeeze().astype(np.uint8)
+        withMask_prediction = cv2.cvtColor(withMask_prediction,
+                                           cv2.COLOR_BGR2RGB)
+        ori_pred = cv2.resize(withMask_prediction, ori_img_shape[::-1],
+                              interpolation=cv2.INTER_LINEAR)
+        return ori_pred
+    @staticmethod
+    def get_resized_points(cur_points, cur_shape, new_shape):
+        cur_points = np.array(cur_points)
+        ratio_x = cur_shape[0] / new_shape[0]
+        ratio_y = cur_shape[1] / new_shape[1]
+        cur_points[:, :, 0] = cur_points[:, :, 0] / ratio_x
+        cur_points[:, :, 1] = cur_points[:, :, 1] / ratio_y
+        return cur_points.astype(np.int64)
+if __name__ == '__main__':
+    model_path = 'CTRNet_G.onnx'
+    ctrnet = CTRNetInfer(model_path)
+    img_path = 'images/1.jpg'
+    ori_img = cv2.imread(img_path)
+    s = time.time()
+    pred = ctrnet(ori_img)
+    print(f'elapse: {time.time() - s}')
+    cv2.imwrite('pred_result.jpg', pred)

images/1.jpg ADDED Viewed

images/2.jpg ADDED Viewed

images/4.jpg ADDED Viewed

models/CTRNet_G.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15d46cec531574c5afef5f27f287f0ccf62a911749089f7cfcbf760226a3eda8
+size 842447752

rapid_ch_det/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+from .text_detect import TextDetector

rapid_ch_det/config.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+model_path: models/ch_PP-OCRv3_det_infer.onnx
+use_cuda: false
+CUDAExecutionProvider:
+    device_id: 0
+    arena_extend_strategy: kNextPowerOfTwo
+    cudnn_conv_algo_search: EXHAUSTIVE
+    do_copy_in_default_stream: true
+pre_process:
+    DetResizeForTest:
+        limit_side_len: 736
+        limit_type: min
+    NormalizeImage:
+        std: [0.229, 0.224, 0.225]
+        mean: [0.485, 0.456, 0.406]
+        scale: 1./255.
+        order: hwc
+    ToCHWImage:
+    KeepKeys:
+        keep_keys: ['image', 'shape']
+post_process:
+    thresh: 0.3
+    box_thresh: 0.5
+    max_candidates: 1000
+    unclip_ratio: 1.6
+    use_dilation: true
+    score_mode: "fast"

rapid_ch_det/models/ch_PP-OCRv3_det_infer.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3439588c030faea393a54515f51e983d8e155b19a2e8aba7891934c1cf0de526
+size 2432880

rapid_ch_det/text_detect.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+import argparse
+import time
+import cv2
+from pathlib import Path
+import numpy as np
+try:
+    from .utils import (DBPostProcess, create_operators,
+                        transform, read_yaml, OrtInferSession)
+except:
+    from utils import (DBPostProcess, create_operators,
+                       transform, read_yaml, OrtInferSession)
+root_dir = Path(__file__).resolve().parent
+class TextDetector():
+    def __init__(self, config=str(root_dir / 'config.yaml')):
+        if isinstance(config, str):
+            config = read_yaml(config)
+        config['model_path'] = str(root_dir / config['model_path'])
+        self.preprocess_op = create_operators(config['pre_process'])
+        self.postprocess_op = DBPostProcess(**config['post_process'])
+        session_instance = OrtInferSession(config)
+        self.session = session_instance.session
+        self.input_name = session_instance.get_input_name()
+    def __call__(self, img):
+        if img is None:
+            raise ValueError('img is None')
+        ori_im_shape = img.shape[:2]
+        data = {'image': img}
+        data = transform(data, self.preprocess_op)
+        img, shape_list = data
+        if img is None:
+            return None, 0
+        img = np.expand_dims(img, axis=0).astype(np.float32)
+        shape_list = np.expand_dims(shape_list, axis=0)
+        starttime = time.time()
+        preds = self.session.run(None, {self.input_name: img})
+        post_result = self.postprocess_op(preds[0], shape_list)
+        dt_boxes = post_result[0]['points']
+        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape)
+        elapse = time.time() - starttime
+        return dt_boxes, elapse
+    def order_points_clockwise(self, pts):
+        """
+        reference from:
+        https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
+        sort the points based on their x-coordinates
+        """
+        xSorted = pts[np.argsort(pts[:, 0]), :]
+        # grab the left-most and right-most points from the sorted
+        # x-roodinate points
+        leftMost = xSorted[:2, :]
+        rightMost = xSorted[2:, :]
+        # now, sort the left-most coordinates according to their
+        # y-coordinates so we can grab the top-left and bottom-left
+        # points, respectively
+        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
+        (tl, bl) = leftMost
+        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
+        (tr, br) = rightMost
+        rect = np.array([tl, tr, br, bl], dtype="float32")
+        return rect
+    def clip_det_res(self, points, img_height, img_width):
+        for pno in range(points.shape[0]):
+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+        return points
+    def filter_tag_det_res(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.order_points_clockwise(box)
+            box = self.clip_det_res(box, img_height, img_width)
+            rect_width = int(np.linalg.norm(box[0] - box[1]))
+            rect_height = int(np.linalg.norm(box[0] - box[3]))
+            if rect_width <= 3 or rect_height <= 3:
+                continue
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config_path', type=str, default='config.yaml')
+    parser.add_argument('--image_path', type=str, default=None)
+    args = parser.parse_args()
+    config = read_yaml(args.config_path)
+    text_detector = TextDetector(config)
+    img = cv2.imread(args.image_path)
+    dt_boxes, elapse = text_detector(img)
+    from utils import draw_text_det_res
+    src_im = draw_text_det_res(dt_boxes, args.image_path)
+    cv2.imwrite('det_results.jpg', src_im)
+    print('The det_results.jpg has been saved in the current directory.')

rapid_ch_det/utils.py ADDED Viewed

	@@ -0,0 +1,461 @@

+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+import sys
+import warnings
+from pathlib import Path
+import cv2
+import numpy as np
+import pyclipper
+import six
+import yaml
+from onnxruntime import (GraphOptimizationLevel, InferenceSession,
+                         SessionOptions, get_available_providers, get_device)
+from shapely.geometry import Polygon
+root_dir = Path(__file__).resolve().parent.parent
+class OrtInferSession():
+    def __init__(self, config):
+        sess_opt = SessionOptions()
+        sess_opt.log_severity_level = 4
+        sess_opt.enable_cpu_mem_arena = False
+        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+        cuda_ep = 'CUDAExecutionProvider'
+        cpu_ep = 'CPUExecutionProvider'
+        cpu_provider_options = {
+            "arena_extend_strategy": "kSameAsRequested",
+        }
+        EP_list = []
+        if config['use_cuda'] and get_device() == 'GPU' \
+                and cuda_ep in get_available_providers():
+            EP_list = [(cuda_ep, config[cuda_ep])]
+        EP_list.append((cpu_ep, cpu_provider_options))
+        config['model_path'] = str(root_dir / config['model_path'])
+        self._verify_model(config['model_path'])
+        self.session = InferenceSession(config['model_path'],
+                                        sess_options=sess_opt,
+                                        providers=EP_list)
+        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
+            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
+                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
+                          'you can check their relations from the offical web site: '
+                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
+                          RuntimeWarning)
+    def get_input_name(self, input_idx=0):
+        return self.session.get_inputs()[input_idx].name
+    def get_output_name(self, output_idx=0):
+        return self.session.get_outputs()[output_idx].name
+    @staticmethod
+    def _verify_model(model_path):
+        model_path = Path(model_path)
+        if not model_path.exists():
+            raise FileNotFoundError(f'{model_path} does not exists.')
+        if not model_path.is_file():
+            raise FileExistsError(f'{model_path} is not a file.')
+def read_yaml(yaml_path):
+    with open(yaml_path, 'rb') as f:
+        data = yaml.load(f, Loader=yaml.Loader)
+    return data
+class DecodeImage():
+    """ decode image """
+    def __init__(self, img_mode='RGB', channel_first=False):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, f'invalid shape of image[{img.shape}]'
+            img = img[:, :, ::-1]
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
+class NormalizeImage():
+    """ normalize image such as substract mean, divide std"""
+    def __init__(self, scale=None, mean=None, std=None, order='chw'):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+    def __call__(self, data):
+        img = np.array(data['image']).astype(np.float32)
+        data['image'] = (img * self.scale - self.mean) / self.std
+        return data
+class ToCHWImage():
+    """ convert hwc image to chw image"""
+    def __init__(self):
+        pass
+    def __call__(self, data):
+        img = np.array(data['image'])
+        data['image'] = img.transpose((2, 0, 1))
+        return data
+class KeepKeys():
+    def __init__(self, keep_keys):
+        self.keep_keys = keep_keys
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
+class DetResizeForTest():
+    def __init__(self, **kwargs):
+        super(DetResizeForTest, self).__init__()
+        self.resize_type = 0
+        if 'image_shape' in kwargs:
+            self.image_shape = kwargs['image_shape']
+            self.resize_type = 1
+        elif 'limit_side_len' in kwargs:
+            self.limit_side_len = kwargs.get('limit_side_len', 736)
+            self.limit_type = kwargs.get('limit_type', 'min')
+        if 'resize_long' in kwargs:
+            self.resize_type = 2
+            self.resize_long = kwargs.get('resize_long', 960)
+        else:
+            self.limit_side_len = kwargs.get('limit_side_len', 736)
+            self.limit_type = kwargs.get('limit_type', 'min')
+    def __call__(self, data):
+        img = data['image']
+        src_h, src_w = img.shape[:2]
+        if self.resize_type == 0:
+            # img, shape = self.resize_image_type0(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+        elif self.resize_type == 2:
+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
+        else:
+            # img, shape = self.resize_image_type1(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
+        data['image'] = img
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
+        return data
+    def resize_image_type1(self, img):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        # return img, np.array([ori_h, ori_w])
+        return img, [ratio_h, ratio_w]
+    def resize_image_type0(self, img):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
+        """
+        limit_side_len = self.limit_side_len
+        h, w = img.shape[:2]
+        # limit the max side
+        if self.limit_type == 'max':
+            if max(h, w) > limit_side_len:
+                if h > w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        else:
+            if min(h, w) < limit_side_len:
+                if h < w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        resize_h = int(h * ratio)
+        resize_w = int(w * ratio)
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return img, [ratio_h, ratio_w]
+    def resize_image_type2(self, img):
+        h, w = img.shape[:2]
+        resize_w = w
+        resize_h = h
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(self.resize_long) / resize_h
+        else:
+            ratio = float(self.resize_long) / resize_w
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return img, [ratio_h, ratio_w]
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+def create_operators(op_param_dict):
+    """
+    create operators based on the config
+    """
+    ops = []
+    for op_name, param in op_param_dict.items():
+        if param is None:
+            param = {}
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
+def draw_text_det_res(dt_boxes, img_path):
+    src_im = cv2.imread(img_path)
+    for box in dt_boxes:
+        box = np.array(box).astype(np.int32).reshape(-1, 2)
+        cv2.polylines(src_im, [box], True,
+                      color=(255, 255, 0), thickness=2)
+    return src_im
+class DBPostProcess():
+    """The post process for Differentiable Binarization (DB)."""
+    def __init__(self,
+                 thresh=0.3,
+                 box_thresh=0.7,
+                 max_candidates=1000,
+                 unclip_ratio=2.0,
+                 score_mode="fast",
+                 use_dilation=False):
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+        self.min_size = 3
+        self.score_mode = score_mode
+        if use_dilation:
+            self.dilation_kernel = np.array([[1, 1], [1, 1]])
+        else:
+            self.dilation_kernel = None
+    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+        '''
+        _bitmap: single map with shape (1, H, W),
+                whose values are binarized as {0, 1}
+        '''
+        bitmap = _bitmap
+        height, width = bitmap.shape
+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
+                                cv2.CHAIN_APPROX_SIMPLE)
+        if len(outs) == 3:
+            img, contours, _ = outs[0], outs[1], outs[2]
+        elif len(outs) == 2:
+            contours, _ = outs[0], outs[1]
+        num_contours = min(len(contours), self.max_candidates)
+        boxes = []
+        scores = []
+        for index in range(num_contours):
+            contour = contours[index]
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            if self.score_mode == "fast":
+                score = self.box_score_fast(pred, points.reshape(-1, 2))
+            else:
+                score = self.box_score_slow(pred, contour)
+            if self.box_thresh > score:
+                continue
+            box = self.unclip(points).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes.append(box.astype(np.int16))
+            scores.append(score)
+        return np.array(boxes, dtype=np.int16), scores
+    def unclip(self, box):
+        unclip_ratio = self.unclip_ratio
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+    def box_score_fast(self, bitmap, _box):
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def box_score_slow(self, bitmap, contour):
+        '''
+        box_score_slow: use polyon mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        contour = contour.copy()
+        contour = np.reshape(contour, (-1, 2))
+        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
+        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
+        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
+        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        contour[:, 0] = contour[:, 0] - xmin
+        contour[:, 1] = contour[:, 1] - ymin
+        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def __call__(self, pred, shape_list):
+        pred = pred[:, 0, :, :]
+        segmentation = pred > self.thresh
+        boxes_batch = []
+        for batch_index in range(pred.shape[0]):
+            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
+            if self.dilation_kernel is not None:
+                mask = cv2.dilate(
+                    np.array(segmentation[batch_index]).astype(np.uint8),
+                    self.dilation_kernel)
+            else:
+                mask = segmentation[batch_index]
+            boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
+                                                   src_w, src_h)
+            boxes_batch.append({'points': boxes})
+        return boxes_batch

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy==1.21.6
+onnxruntime>=1.10.0
+opencv_python
+pyclipper>=1.2.1
+Shapely
+six