Spaces:

DeepLearning101
/

OCR101TW

Build error

App Files Files Community

DeepLearning101 commited on Oct 15, 2023

Commit

fa6fa48

•

1 Parent(s): c93900f

Upload 39 files

Browse files

Files changed (39) hide show

tools/__init__.py +14 -0
tools/__pycache__/__init__.cpython-37.pyc +0 -0
tools/__pycache__/__init__.cpython-38.pyc +0 -0
tools/end2end/convert_ppocr_label.py +100 -0
tools/end2end/draw_html.py +73 -0
tools/end2end/eval_end2end.py +193 -0
tools/end2end/readme.md +63 -0
tools/eval.py +137 -0
tools/export_center.py +77 -0
tools/export_model.py +269 -0
tools/infer/__pycache__/predict_cls.cpython-37.pyc +0 -0
tools/infer/__pycache__/predict_cls.cpython-38.pyc +0 -0
tools/infer/__pycache__/predict_det.cpython-37.pyc +0 -0
tools/infer/__pycache__/predict_det.cpython-38.pyc +0 -0
tools/infer/__pycache__/predict_rec.cpython-37.pyc +0 -0
tools/infer/__pycache__/predict_rec.cpython-38.pyc +0 -0
tools/infer/__pycache__/predict_system.cpython-37.pyc +0 -0
tools/infer/__pycache__/predict_system.cpython-38.pyc +0 -0
tools/infer/__pycache__/utility.cpython-37.pyc +0 -0
tools/infer/__pycache__/utility.cpython-38.pyc +0 -0
tools/infer/predict_cls.py +151 -0
tools/infer/predict_det.py +353 -0
tools/infer/predict_e2e.py +169 -0
tools/infer/predict_rec.py +667 -0
tools/infer/predict_sr.py +155 -0
tools/infer/predict_system.py +262 -0
tools/infer/utility.py +663 -0
tools/infer_cls.py +85 -0
tools/infer_det.py +134 -0
tools/infer_e2e.py +174 -0
tools/infer_kie.py +176 -0
tools/infer_kie_token_ser.py +157 -0
tools/infer_kie_token_ser_re.py +225 -0
tools/infer_rec.py +188 -0
tools/infer_sr.py +100 -0
tools/infer_table.py +121 -0
tools/program.py +702 -0
tools/test_hubserving.py +157 -0
tools/train.py +209 -0

tools/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

tools/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (107 Bytes). View file

tools/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (144 Bytes). View file

tools/end2end/convert_ppocr_label.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import json
+import os
+def poly_to_string(poly):
+    if len(poly.shape) > 1:
+        poly = np.array(poly).flatten()
+    string = "\t".join(str(i) for i in poly)
+    return string
+def convert_label(label_dir, mode="gt", save_dir="./save_results/"):
+    if not os.path.exists(label_dir):
+        raise ValueError(f"The file {label_dir} does not exist!")
+    assert label_dir != save_dir, "hahahhaha"
+    label_file = open(label_dir, 'r')
+    data = label_file.readlines()
+    gt_dict = {}
+    for line in data:
+        try:
+            tmp = line.split('\t')
+            assert len(tmp) == 2, ""
+        except:
+            tmp = line.strip().split('    ')
+        gt_lists = []
+        if tmp[0].split('/')[0] is not None:
+            img_path = tmp[0]
+            anno = json.loads(tmp[1])
+            gt_collect = []
+            for dic in anno:
+                #txt = dic['transcription'].replace(' ', '')  # ignore blank
+                txt = dic['transcription']
+                if 'score' in dic and float(dic['score']) < 0.5:
+                    continue
+                if u'\u3000' in txt: txt = txt.replace(u'\u3000', u' ')
+                #while ' ' in txt:
+                #    txt = txt.replace(' ', '')
+                poly = np.array(dic['points']).flatten()
+                if txt == "###":
+                    txt_tag = 1  ## ignore 1
+                else:
+                    txt_tag = 0
+                if mode == "gt":
+                    gt_label = poly_to_string(poly) + "\t" + str(
+                        txt_tag) + "\t" + txt + "\n"
+                else:
+                    gt_label = poly_to_string(poly) + "\t" + txt + "\n"
+                gt_lists.append(gt_label)
+            gt_dict[img_path] = gt_lists
+        else:
+            continue
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    for img_name in gt_dict.keys():
+        save_name = img_name.split("/")[-1]
+        save_file = os.path.join(save_dir, save_name + ".txt")
+        with open(save_file, "w") as f:
+            f.writelines(gt_dict[img_name])
+    print("The convert label saved in {}".format(save_dir))
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(description="args")
+    parser.add_argument("--label_path", type=str, required=True)
+    parser.add_argument("--save_folder", type=str, required=True)
+    parser.add_argument("--mode", type=str, default=False)
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    convert_label(args.label_path, args.mode, args.save_folder)

tools/end2end/draw_html.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import argparse
+def str2bool(v):
+    return v.lower() in ("true", "t", "1")
+def init_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--image_dir", type=str, default="")
+    parser.add_argument("--save_html_path", type=str, default="./default.html")
+    parser.add_argument("--width", type=int, default=640)
+    return parser
+def parse_args():
+    parser = init_args()
+    return parser.parse_args()
+def draw_debug_img(args):
+    html_path = args.save_html_path
+    err_cnt = 0
+    with open(html_path, 'w') as html:
+        html.write('<html>\n<body>\n')
+        html.write('<table border="1">\n')
+        html.write(
+            "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
+        )
+        image_list = []
+        path = args.image_dir
+        for i, filename in enumerate(sorted(os.listdir(path))):
+            if filename.endswith("txt"): continue
+            # The image path
+            base = "{}/{}".format(path, filename)
+            html.write("<tr>\n")
+            html.write(f'<td> {filename}\n GT')
+            html.write(f'<td>GT\n<img src="{base}" width={args.width}></td>')
+            html.write("</tr>\n")
+        html.write('<style>\n')
+        html.write('span {\n')
+        html.write('    color: red;\n')
+        html.write('}\n')
+        html.write('</style>\n')
+        html.write('</table>\n')
+        html.write('</html>\n</body>\n')
+    print(f"The html file saved in {html_path}")
+    return
+if __name__ == "__main__":
+    args = parse_args()
+    draw_debug_img(args)

tools/end2end/eval_end2end.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import re
+import sys
+import shapely
+from shapely.geometry import Polygon
+import numpy as np
+from collections import defaultdict
+import operator
+import editdistance
+def strQ2B(ustring):
+    rstring = ""
+    for uchar in ustring:
+        inside_code = ord(uchar)
+        if inside_code == 12288:
+            inside_code = 32
+        elif (inside_code >= 65281 and inside_code <= 65374):
+            inside_code -= 65248
+        rstring += chr(inside_code)
+    return rstring
+def polygon_from_str(polygon_points):
+    """
+    Create a shapely polygon object from gt or dt line.
+    """
+    polygon_points = np.array(polygon_points).reshape(4, 2)
+    polygon = Polygon(polygon_points).convex_hull
+    return polygon
+def polygon_iou(poly1, poly2):
+    """
+    Intersection over union between two shapely polygons.
+    """
+    if not poly1.intersects(
+            poly2):  # this test is fast and can accelerate calculation
+        iou = 0
+    else:
+        try:
+            inter_area = poly1.intersection(poly2).area
+            union_area = poly1.area + poly2.area - inter_area
+            iou = float(inter_area) / union_area
+        except shapely.geos.TopologicalError:
+            # except Exception as e:
+            #     print(e)
+            print('shapely.geos.TopologicalError occurred, iou set to 0')
+            iou = 0
+    return iou
+def ed(str1, str2):
+    return editdistance.eval(str1, str2)
+def e2e_eval(gt_dir, res_dir, ignore_blank=False):
+    print('start testing...')
+    iou_thresh = 0.5
+    val_names = os.listdir(gt_dir)
+    num_gt_chars = 0
+    gt_count = 0
+    dt_count = 0
+    hit = 0
+    ed_sum = 0
+    for i, val_name in enumerate(val_names):
+        with open(os.path.join(gt_dir, val_name), encoding='utf-8') as f:
+            gt_lines = [o.strip() for o in f.readlines()]
+        gts = []
+        ignore_masks = []
+        for line in gt_lines:
+            parts = line.strip().split('\t')
+            # ignore illegal data
+            if len(parts) < 9:
+                continue
+            assert (len(parts) < 11)
+            if len(parts) == 9:
+                gts.append(parts[:8] + [''])
+            else:
+                gts.append(parts[:8] + [parts[-1]])
+            ignore_masks.append(parts[8])
+        val_path = os.path.join(res_dir, val_name)
+        if not os.path.exists(val_path):
+            dt_lines = []
+        else:
+            with open(val_path, encoding='utf-8') as f:
+                dt_lines = [o.strip() for o in f.readlines()]
+        dts = []
+        for line in dt_lines:
+            # print(line)
+            parts = line.strip().split("\t")
+            assert (len(parts) < 10), "line error: {}".format(line)
+            if len(parts) == 8:
+                dts.append(parts + [''])
+            else:
+                dts.append(parts)
+        dt_match = [False] * len(dts)
+        gt_match = [False] * len(gts)
+        all_ious = defaultdict(tuple)
+        for index_gt, gt in enumerate(gts):
+            gt_coors = [float(gt_coor) for gt_coor in gt[0:8]]
+            gt_poly = polygon_from_str(gt_coors)
+            for index_dt, dt in enumerate(dts):
+                dt_coors = [float(dt_coor) for dt_coor in dt[0:8]]
+                dt_poly = polygon_from_str(dt_coors)
+                iou = polygon_iou(dt_poly, gt_poly)
+                if iou >= iou_thresh:
+                    all_ious[(index_gt, index_dt)] = iou
+        sorted_ious = sorted(
+            all_ious.items(), key=operator.itemgetter(1), reverse=True)
+        sorted_gt_dt_pairs = [item[0] for item in sorted_ious]
+        # matched gt and dt
+        for gt_dt_pair in sorted_gt_dt_pairs:
+            index_gt, index_dt = gt_dt_pair
+            if gt_match[index_gt] == False and dt_match[index_dt] == False:
+                gt_match[index_gt] = True
+                dt_match[index_dt] = True
+                if ignore_blank:
+                    gt_str = strQ2B(gts[index_gt][8]).replace(" ", "")
+                    dt_str = strQ2B(dts[index_dt][8]).replace(" ", "")
+                else:
+                    gt_str = strQ2B(gts[index_gt][8])
+                    dt_str = strQ2B(dts[index_dt][8])
+                if ignore_masks[index_gt] == '0':
+                    ed_sum += ed(gt_str, dt_str)
+                    num_gt_chars += len(gt_str)
+                    if gt_str == dt_str:
+                        hit += 1
+                    gt_count += 1
+                    dt_count += 1
+        # unmatched dt
+        for tindex, dt_match_flag in enumerate(dt_match):
+            if dt_match_flag == False:
+                dt_str = dts[tindex][8]
+                gt_str = ''
+                ed_sum += ed(dt_str, gt_str)
+                dt_count += 1
+        # unmatched gt
+        for tindex, gt_match_flag in enumerate(gt_match):
+            if gt_match_flag == False and ignore_masks[tindex] == '0':
+                dt_str = ''
+                gt_str = gts[tindex][8]
+                ed_sum += ed(gt_str, dt_str)
+                num_gt_chars += len(gt_str)
+                gt_count += 1
+    eps = 1e-9
+    print('hit, dt_count, gt_count', hit, dt_count, gt_count)
+    precision = hit / (dt_count + eps)
+    recall = hit / (gt_count + eps)
+    fmeasure = 2.0 * precision * recall / (precision + recall + eps)
+    avg_edit_dist_img = ed_sum / len(val_names)
+    avg_edit_dist_field = ed_sum / (gt_count + eps)
+    character_acc = 1 - ed_sum / (num_gt_chars + eps)
+    print('character_acc: %.2f' % (character_acc * 100) + "%")
+    print('avg_edit_dist_field: %.2f' % (avg_edit_dist_field))
+    print('avg_edit_dist_img: %.2f' % (avg_edit_dist_img))
+    print('precision: %.2f' % (precision * 100) + "%")
+    print('recall: %.2f' % (recall * 100) + "%")
+    print('fmeasure: %.2f' % (fmeasure * 100) + "%")
+if __name__ == '__main__':
+    # if len(sys.argv) != 3:
+    #     print("python3 ocr_e2e_eval.py gt_dir res_dir")
+    #     exit(-1)
+    # gt_folder = sys.argv[1]
+    # pred_folder = sys.argv[2]
+    gt_folder = sys.argv[1]
+    pred_folder = sys.argv[2]
+    e2e_eval(gt_folder, pred_folder)

tools/end2end/readme.md ADDED Viewed

	@@ -0,0 +1,63 @@

+# 简介
+`tools/end2end`目录下存放了文本检测+文本识别pipeline串联预测的指标评测代码以及可视化工具。本节介绍文本检测+文本识别的端对端指标评估方式。
+## 端对端评测步骤
+**步骤一：**
+运行`tools/infer/predict_system.py`，得到保存的结果:
+```
+python3 tools/infer/predict_system.py  --det_model_dir=./ch_PP-OCRv2_det_infer/ --rec_model_dir=./ch_PP-OCRv2_rec_infer/  --image_dir=./datasets/img_dir/ --draw_img_save_dir=./ch_PP-OCRv2_results/ --is_visualize=True
+```
+文本检测识别可视化图默认保存在`./ch_PP-OCRv2_results/`目录下，预测结果默认保存在`./ch_PP-OCRv2_results/system_results.txt`中，格式如下：
+```
+all-sum-510/00224225.jpg        [{"transcription": "超赞", "points": [[8.0, 48.0], [157.0, 44.0], [159.0, 115.0], [10.0, 119.0]], "score": "0.99396634"}, {"transcription": "中", "points": [[202.0, 152.0], [230.0, 152.0], [230.0, 163.0], [202.0, 163.0]], "score": "0.09310734"}, {"transcription": "58.0m", "points": [[196.0, 192.0], [444.0, 192.0], [444.0, 240.0], [196.0, 240.0]], "score": "0.44041982"}, {"transcription": "汽配", "points": [[55.0, 263.0], [95.0, 263.0], [95.0, 281.0], [55.0, 281.0]], "score": "0.9986651"}, {"transcription": "成总店", "points": [[120.0, 262.0], [176.0, 262.0], [176.0, 283.0], [120.0, 283.0]], "score": "0.9929402"}, {"transcription": "K", "points": [[237.0, 286.0], [311.0, 286.0], [311.0, 345.0], [237.0, 345.0]], "score": "0.6074794"}, {"transcription": "88：-8", "points": [[203.0, 405.0], [477.0, 414.0], [475.0, 459.0], [201.0, 450.0]], "score": "0.7106863"}]
+```
+**步骤二：**
+将步骤一保存的数据转换为端对端评测需要的数据格式：
+修改 `tools/end2end/convert_ppocr_label.py`中的代码，convert_label函数中设置输入标签路径，Mode，保存标签路径等，对预测数据的GTlabel和预测结果的label格式进行转换。
+```
+python3 tools/end2end/convert_ppocr_label.py --mode=gt --label_path=path/to/label_txt --save_folder=save_gt_label
+python3 tools/end2end/convert_ppocr_label.py --mode=pred --label_path=path/to/pred_txt --save_folder=save_PPOCRV2_infer
+```
+得到如下结果：
+```
+├── ./save_gt_label/
+├── ./save_PPOCRV2_infer/
+```
+**步骤三：**
+执行端对端评测，运行`tools/eval_end2end.py`计算端对端指标，运行方式如下：
+```
+python3 tools/eval_end2end.py "gt_label_dir"  "predict_label_dir"
+```
+比如：
+```
+python3 tools/eval_end2end.py ./save_gt_label/ ./save_PPOCRV2_infer/
+```
+将得到如下结果，fmeasure为主要关注的指标：
+```
+hit, dt_count, gt_count 1557 2693 3283
+character_acc: 61.77%
+avg_edit_dist_field: 3.08
+avg_edit_dist_img: 51.82
+precision: 57.82%
+recall: 47.43%
+fmeasure: 52.11%
+```

tools/eval.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, __dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+import paddle
+from ppocr.data import build_dataloader
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.metrics import build_metric
+from ppocr.utils.save_load import load_model
+import tools.program as program
+def main():
+    global_config = config['Global']
+    # build dataloader
+    valid_dataloader = build_dataloader(config, 'Eval', device, logger)
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, 'character'):
+        char_num = len(getattr(post_process_class, 'character'))
+        if config['Architecture']["algorithm"] in ["Distillation",
+                                                   ]:  # distillation model
+            for key in config['Architecture']["Models"]:
+                if config['Architecture']['Models'][key]['Head'][
+                        'name'] == 'MultiHead':  # for multi head
+                    out_channels_list = {}
+                    if config['PostProcess'][
+                            'name'] == 'DistillationSARLabelDecode':
+                        char_num = char_num - 2
+                    out_channels_list['CTCLabelDecode'] = char_num
+                    out_channels_list['SARLabelDecode'] = char_num + 2
+                    config['Architecture']['Models'][key]['Head'][
+                        'out_channels_list'] = out_channels_list
+                else:
+                    config['Architecture']["Models"][key]["Head"][
+                        'out_channels'] = char_num
+        elif config['Architecture']['Head'][
+                'name'] == 'MultiHead':  # for multi head
+            out_channels_list = {}
+            if config['PostProcess']['name'] == 'SARLabelDecode':
+                char_num = char_num - 2
+            out_channels_list['CTCLabelDecode'] = char_num
+            out_channels_list['SARLabelDecode'] = char_num + 2
+            config['Architecture']['Head'][
+                'out_channels_list'] = out_channels_list
+        else:  # base rec model
+            config['Architecture']["Head"]['out_channels'] = char_num
+    model = build_model(config['Architecture'])
+    extra_input_models = [
+        "SRN", "NRTR", "SAR", "SEED", "SVTR", "VisionLAN", "RobustScanner"
+    ]
+    extra_input = False
+    if config['Architecture']['algorithm'] == 'Distillation':
+        for key in config['Architecture']["Models"]:
+            extra_input = extra_input or config['Architecture']['Models'][key][
+                'algorithm'] in extra_input_models
+    else:
+        extra_input = config['Architecture']['algorithm'] in extra_input_models
+    if "model_type" in config['Architecture'].keys():
+        if config['Architecture']['algorithm'] == 'CAN':
+            model_type = 'can'
+        else:
+            model_type = config['Architecture']['model_type']
+    else:
+        model_type = None
+    # build metric
+    eval_class = build_metric(config['Metric'])
+    # amp
+    use_amp = config["Global"].get("use_amp", False)
+    amp_level = config["Global"].get("amp_level", 'O2')
+    amp_custom_black_list = config['Global'].get('amp_custom_black_list', [])
+    if use_amp:
+        AMP_RELATED_FLAGS_SETTING = {
+            'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
+            'FLAGS_max_inplace_grad_add': 8,
+        }
+        paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
+        scale_loss = config["Global"].get("scale_loss", 1.0)
+        use_dynamic_loss_scaling = config["Global"].get(
+            "use_dynamic_loss_scaling", False)
+        scaler = paddle.amp.GradScaler(
+            init_loss_scaling=scale_loss,
+            use_dynamic_loss_scaling=use_dynamic_loss_scaling)
+        if amp_level == "O2":
+            model = paddle.amp.decorate(
+                models=model, level=amp_level, master_weight=True)
+    else:
+        scaler = None
+    best_model_dict = load_model(
+        config, model, model_type=config['Architecture']["model_type"])
+    if len(best_model_dict):
+        logger.info('metric in ckpt ***************')
+        for k, v in best_model_dict.items():
+            logger.info('{}:{}'.format(k, v))
+    # start eval
+    metric = program.eval(model, valid_dataloader, post_process_class,
+                          eval_class, model_type, extra_input, scaler,
+                          amp_level, amp_custom_black_list)
+    logger.info('metric eval ***************')
+    for k, v in metric.items():
+        logger.info('{}:{}'.format(k, v))
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/export_center.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import pickle
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+from ppocr.data import build_dataloader
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import print_dict
+import tools.program as program
+def main():
+    global_config = config['Global']
+    # build dataloader
+    config['Eval']['dataset']['name'] = config['Train']['dataset']['name']
+    config['Eval']['dataset']['data_dir'] = config['Train']['dataset'][
+        'data_dir']
+    config['Eval']['dataset']['label_file_list'] = config['Train']['dataset'][
+        'label_file_list']
+    eval_dataloader = build_dataloader(config, 'Eval', device, logger)
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, 'character'):
+        char_num = len(getattr(post_process_class, 'character'))
+        config['Architecture']["Head"]['out_channels'] = char_num
+    #set return_features = True
+    config['Architecture']["Head"]["return_feats"] = True
+    model = build_model(config['Architecture'])
+    best_model_dict = load_model(config, model)
+    if len(best_model_dict):
+        logger.info('metric in ckpt ***************')
+        for k, v in best_model_dict.items():
+            logger.info('{}:{}'.format(k, v))
+    # get features from train data
+    char_center = program.get_center(model, eval_dataloader, post_process_class)
+    #serialize to disk
+    with open("train_center.pkl", 'wb') as f:
+        pickle.dump(char_center, f)
+    return
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/export_model.py ADDED Viewed

	@@ -0,0 +1,269 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))
+import argparse
+import paddle
+from paddle.jit import to_static
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.logging import get_logger
+from tools.program import load_config, merge_config, ArgsParser
+def export_single_model(model,
+                        arch_config,
+                        save_path,
+                        logger,
+                        input_shape=None,
+                        quanter=None):
+    if arch_config["algorithm"] == "SRN":
+        max_text_length = arch_config["Head"]["max_text_length"]
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 1, 64, 256], dtype="float32"), [
+                    paddle.static.InputSpec(
+                        shape=[None, 256, 1],
+                        dtype="int64"), paddle.static.InputSpec(
+                            shape=[None, max_text_length, 1], dtype="int64"),
+                    paddle.static.InputSpec(
+                        shape=[None, 8, max_text_length, max_text_length],
+                        dtype="int64"), paddle.static.InputSpec(
+                            shape=[None, 8, max_text_length, max_text_length],
+                            dtype="int64")
+                ]
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "SAR":
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 3, 48, 160], dtype="float32"),
+            [paddle.static.InputSpec(
+                shape=[None], dtype="float32")]
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "SVTR":
+        if arch_config["Head"]["name"] == 'MultiHead':
+            other_shape = [
+                paddle.static.InputSpec(
+                    shape=[None, 3, 48, -1], dtype="float32"),
+            ]
+        else:
+            other_shape = [
+                paddle.static.InputSpec(
+                    shape=[None] + input_shape, dtype="float32"),
+            ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "PREN":
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 3, 64, 256], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["model_type"] == "sr":
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 3, 16, 64], dtype="float32")
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "ViTSTR":
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 1, 224, 224], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "ABINet":
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 3, 32, 128], dtype="float32"),
+        ]
+        # print([None, 3, 32, 128])
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["NRTR", "SPIN", 'RFL']:
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 1, 32, 100], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "VisionLAN":
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 3, 64, 256], dtype="float32"),
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "RobustScanner":
+        max_text_length = arch_config["Head"]["max_text_length"]
+        other_shape = [
+            paddle.static.InputSpec(
+                shape=[None, 3, 48, 160], dtype="float32"), [
+                    paddle.static.InputSpec(
+                        shape=[None, ], dtype="float32"),
+                    paddle.static.InputSpec(
+                        shape=[None, max_text_length], dtype="int64")
+                ]
+        ]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] == "CAN":
+        other_shape = [[
+            paddle.static.InputSpec(
+                shape=[None, 1, None, None],
+                dtype="float32"), paddle.static.InputSpec(
+                    shape=[None, 1, None, None], dtype="float32"),
+            paddle.static.InputSpec(
+                shape=[None, arch_config['Head']['max_text_length']],
+                dtype="int64")
+        ]]
+        model = to_static(model, input_spec=other_shape)
+    elif arch_config["algorithm"] in ["LayoutLM", "LayoutLMv2", "LayoutXLM"]:
+        input_spec = [
+            paddle.static.InputSpec(
+                shape=[None, 512], dtype="int64"),  # input_ids
+            paddle.static.InputSpec(
+                shape=[None, 512, 4], dtype="int64"),  # bbox
+            paddle.static.InputSpec(
+                shape=[None, 512], dtype="int64"),  # attention_mask
+            paddle.static.InputSpec(
+                shape=[None, 512], dtype="int64"),  # token_type_ids
+            paddle.static.InputSpec(
+                shape=[None, 3, 224, 224], dtype="int64"),  # image
+        ]
+        if 'Re' in arch_config['Backbone']['name']:
+            input_spec.extend([
+                paddle.static.InputSpec(
+                    shape=[None, 512, 3], dtype="int64"),  # entities
+                paddle.static.InputSpec(
+                    shape=[None, None, 2], dtype="int64"),  # relations
+            ])
+        if model.backbone.use_visual_backbone is False:
+            input_spec.pop(4)
+        model = to_static(model, input_spec=[input_spec])
+    else:
+        infer_shape = [3, -1, -1]
+        if arch_config["model_type"] == "rec":
+            infer_shape = [3, 32, -1]  # for rec model, H must be 32
+            if "Transform" in arch_config and arch_config[
+                    "Transform"] is not None and arch_config["Transform"][
+                        "name"] == "TPS":
+                logger.info(
+                    "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training"
+                )
+                infer_shape[-1] = 100
+        elif arch_config["model_type"] == "table":
+            infer_shape = [3, 488, 488]
+            if arch_config["algorithm"] == "TableMaster":
+                infer_shape = [3, 480, 480]
+            if arch_config["algorithm"] == "SLANet":
+                infer_shape = [3, -1, -1]
+        model = to_static(
+            model,
+            input_spec=[
+                paddle.static.InputSpec(
+                    shape=[None] + infer_shape, dtype="float32")
+            ])
+    if quanter is None:
+        paddle.jit.save(model, save_path)
+    else:
+        quanter.save_quantized_model(model, save_path)
+    logger.info("inference model is saved to {}".format(save_path))
+    return
+def main():
+    FLAGS = ArgsParser().parse_args()
+    config = load_config(FLAGS.config)
+    config = merge_config(config, FLAGS.opt)
+    logger = get_logger()
+    # build post process
+    post_process_class = build_post_process(config["PostProcess"],
+                                            config["Global"])
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, "character"):
+        char_num = len(getattr(post_process_class, "character"))
+        if config["Architecture"]["algorithm"] in ["Distillation",
+                                                   ]:  # distillation model
+            for key in config["Architecture"]["Models"]:
+                if config["Architecture"]["Models"][key]["Head"][
+                        "name"] == 'MultiHead':  # multi head
+                    out_channels_list = {}
+                    if config['PostProcess'][
+                            'name'] == 'DistillationSARLabelDecode':
+                        char_num = char_num - 2
+                    out_channels_list['CTCLabelDecode'] = char_num
+                    out_channels_list['SARLabelDecode'] = char_num + 2
+                    config['Architecture']['Models'][key]['Head'][
+                        'out_channels_list'] = out_channels_list
+                else:
+                    config["Architecture"]["Models"][key]["Head"][
+                        "out_channels"] = char_num
+                # just one final tensor needs to exported for inference
+                config["Architecture"]["Models"][key][
+                    "return_all_feats"] = False
+        elif config['Architecture']['Head'][
+                'name'] == 'MultiHead':  # multi head
+            out_channels_list = {}
+            char_num = len(getattr(post_process_class, 'character'))
+            if config['PostProcess']['name'] == 'SARLabelDecode':
+                char_num = char_num - 2
+            out_channels_list['CTCLabelDecode'] = char_num
+            out_channels_list['SARLabelDecode'] = char_num + 2
+            config['Architecture']['Head'][
+                'out_channels_list'] = out_channels_list
+        else:  # base rec model
+            config["Architecture"]["Head"]["out_channels"] = char_num
+    # for sr algorithm
+    if config["Architecture"]["model_type"] == "sr":
+        config['Architecture']["Transform"]['infer_mode'] = True
+    model = build_model(config["Architecture"])
+    load_model(config, model, model_type=config['Architecture']["model_type"])
+    model.eval()
+    save_path = config["Global"]["save_inference_dir"]
+    arch_config = config["Architecture"]
+    if arch_config["algorithm"] == "SVTR" and arch_config["Head"][
+            "name"] != 'MultiHead':
+        input_shape = config["Eval"]["dataset"]["transforms"][-2][
+            'SVTRRecResizeImg']['image_shape']
+    else:
+        input_shape = None
+    if arch_config["algorithm"] in ["Distillation", ]:  # distillation model
+        archs = list(arch_config["Models"].values())
+        for idx, name in enumerate(model.model_name_list):
+            sub_model_save_path = os.path.join(save_path, name, "inference")
+            export_single_model(model.model_list[idx], archs[idx],
+                                sub_model_save_path, logger)
+    else:
+        save_path = os.path.join(save_path, "inference")
+        export_single_model(
+            model, arch_config, save_path, logger, input_shape=input_shape)
+if __name__ == "__main__":
+    main()

tools/infer/__pycache__/predict_cls.cpython-37.pyc ADDED Viewed

Binary file (4.07 kB). View file

tools/infer/__pycache__/predict_cls.cpython-38.pyc ADDED Viewed

Binary file (4.11 kB). View file

tools/infer/__pycache__/predict_det.cpython-37.pyc ADDED Viewed

Binary file (8.48 kB). View file

tools/infer/__pycache__/predict_det.cpython-38.pyc ADDED Viewed

Binary file (8.61 kB). View file

tools/infer/__pycache__/predict_rec.cpython-37.pyc ADDED Viewed

Binary file (13.8 kB). View file

tools/infer/__pycache__/predict_rec.cpython-38.pyc ADDED Viewed

Binary file (13.8 kB). View file

tools/infer/__pycache__/predict_system.cpython-37.pyc ADDED Viewed

Binary file (7.04 kB). View file

tools/infer/__pycache__/predict_system.cpython-38.pyc ADDED Viewed

Binary file (7.13 kB). View file

tools/infer/__pycache__/utility.cpython-37.pyc ADDED Viewed

Binary file (17.4 kB). View file

tools/infer/__pycache__/utility.cpython-38.pyc ADDED Viewed

Binary file (17.4 kB). View file

tools/infer/predict_cls.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import copy
+import numpy as np
+import math
+import time
+import traceback
+import tools.infer.utility as utility
+from ppocr.postprocess import build_post_process
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, check_and_read
+logger = get_logger()
+class TextClassifier(object):
+    def __init__(self, args):
+        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
+        self.cls_batch_num = args.cls_batch_num
+        self.cls_thresh = args.cls_thresh
+        postprocess_params = {
+            'name': 'ClsPostProcess',
+            "label_list": args.label_list,
+        }
+        self.postprocess_op = build_post_process(postprocess_params)
+        self.predictor, self.input_tensor, self.output_tensors, _ = \
+            utility.create_predictor(args, 'cls', logger)
+        self.use_onnx = args.use_onnx
+    def resize_norm_img(self, img):
+        imgC, imgH, imgW = self.cls_image_shape
+        h = img.shape[0]
+        w = img.shape[1]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        resized_image = resized_image.astype('float32')
+        if self.cls_image_shape[0] == 1:
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+        else:
+            resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+    def __call__(self, img_list):
+        img_list = copy.deepcopy(img_list)
+        img_num = len(img_list)
+        # Calculate the aspect ratio of all text bars
+        width_list = []
+        for img in img_list:
+            width_list.append(img.shape[1] / float(img.shape[0]))
+        # Sorting can speed up the cls process
+        indices = np.argsort(np.array(width_list))
+        cls_res = [['', 0.0]] * img_num
+        batch_num = self.cls_batch_num
+        elapse = 0
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            norm_img_batch = []
+            max_wh_ratio = 0
+            starttime = time.time()
+            for ino in range(beg_img_no, end_img_no):
+                h, w = img_list[indices[ino]].shape[0:2]
+                wh_ratio = w * 1.0 / h
+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[indices[ino]])
+                norm_img = norm_img[np.newaxis, :]
+                norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch)
+            norm_img_batch = norm_img_batch.copy()
+            if self.use_onnx:
+                input_dict = {}
+                input_dict[self.input_tensor.name] = norm_img_batch
+                outputs = self.predictor.run(self.output_tensors, input_dict)
+                prob_out = outputs[0]
+            else:
+                self.input_tensor.copy_from_cpu(norm_img_batch)
+                self.predictor.run()
+                prob_out = self.output_tensors[0].copy_to_cpu()
+                self.predictor.try_shrink_memory()
+            cls_result = self.postprocess_op(prob_out)
+            elapse += time.time() - starttime
+            for rno in range(len(cls_result)):
+                label, score = cls_result[rno]
+                cls_res[indices[beg_img_no + rno]] = [label, score]
+                if '180' in label and score > self.cls_thresh:
+                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                        img_list[indices[beg_img_no + rno]], 1)
+        return img_list, cls_res, elapse
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    text_classifier = TextClassifier(args)
+    valid_image_file_list = []
+    img_list = []
+    for image_file in image_file_list:
+        img, flag, _ = check_and_read(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        valid_image_file_list.append(image_file)
+        img_list.append(img)
+    try:
+        img_list, cls_res, predict_time = text_classifier(img_list)
+    except Exception as E:
+        logger.info(traceback.format_exc())
+        logger.info(E)
+        exit()
+    for ino in range(len(img_list)):
+        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
+                                               cls_res[ino]))
+if __name__ == "__main__":
+    main(utility.parse_args())

tools/infer/predict_det.py ADDED Viewed

	@@ -0,0 +1,353 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import numpy as np
+import time
+import sys
+import tools.infer.utility as utility
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, check_and_read
+from ppocr.data import create_operators, transform
+from ppocr.postprocess import build_post_process
+import json
+logger = get_logger()
+class TextDetector(object):
+    def __init__(self, args):
+        self.args = args
+        self.det_algorithm = args.det_algorithm
+        self.use_onnx = args.use_onnx
+        pre_process_list = [{
+            'DetResizeForTest': {
+                'limit_side_len': args.det_limit_side_len,
+                'limit_type': args.det_limit_type,
+            }
+        }, {
+            'NormalizeImage': {
+                'std': [0.229, 0.224, 0.225],
+                'mean': [0.485, 0.456, 0.406],
+                'scale': '1./255.',
+                'order': 'hwc'
+            }
+        }, {
+            'ToCHWImage': None
+        }, {
+            'KeepKeys': {
+                'keep_keys': ['image', 'shape']
+            }
+        }]
+        postprocess_params = {}
+        if self.det_algorithm == "DB":
+            postprocess_params['name'] = 'DBPostProcess'
+            postprocess_params["thresh"] = args.det_db_thresh
+            postprocess_params["box_thresh"] = args.det_db_box_thresh
+            postprocess_params["max_candidates"] = 1000
+            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
+            postprocess_params["use_dilation"] = args.use_dilation
+            postprocess_params["score_mode"] = args.det_db_score_mode
+            postprocess_params["box_type"] = args.det_box_type
+        elif self.det_algorithm == "DB++":
+            postprocess_params['name'] = 'DBPostProcess'
+            postprocess_params["thresh"] = args.det_db_thresh
+            postprocess_params["box_thresh"] = args.det_db_box_thresh
+            postprocess_params["max_candidates"] = 1000
+            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
+            postprocess_params["use_dilation"] = args.use_dilation
+            postprocess_params["score_mode"] = args.det_db_score_mode
+            postprocess_params["box_type"] = args.det_box_type
+            pre_process_list[1] = {
+                'NormalizeImage': {
+                    'std': [1.0, 1.0, 1.0],
+                    'mean':
+                    [0.48109378172549, 0.45752457890196, 0.40787054090196],
+                    'scale': '1./255.',
+                    'order': 'hwc'
+                }
+            }
+        elif self.det_algorithm == "EAST":
+            postprocess_params['name'] = 'EASTPostProcess'
+            postprocess_params["score_thresh"] = args.det_east_score_thresh
+            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
+            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
+        elif self.det_algorithm == "SAST":
+            pre_process_list[0] = {
+                'DetResizeForTest': {
+                    'resize_long': args.det_limit_side_len
+                }
+            }
+            postprocess_params['name'] = 'SASTPostProcess'
+            postprocess_params["score_thresh"] = args.det_sast_score_thresh
+            postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
+            if args.det_box_type == 'poly':
+                postprocess_params["sample_pts_num"] = 6
+                postprocess_params["expand_scale"] = 1.2
+                postprocess_params["shrink_ratio_of_width"] = 0.2
+            else:
+                postprocess_params["sample_pts_num"] = 2
+                postprocess_params["expand_scale"] = 1.0
+                postprocess_params["shrink_ratio_of_width"] = 0.3
+        elif self.det_algorithm == "PSE":
+            postprocess_params['name'] = 'PSEPostProcess'
+            postprocess_params["thresh"] = args.det_pse_thresh
+            postprocess_params["box_thresh"] = args.det_pse_box_thresh
+            postprocess_params["min_area"] = args.det_pse_min_area
+            postprocess_params["box_type"] = args.det_box_type
+            postprocess_params["scale"] = args.det_pse_scale
+        elif self.det_algorithm == "FCE":
+            pre_process_list[0] = {
+                'DetResizeForTest': {
+                    'rescale_img': [1080, 736]
+                }
+            }
+            postprocess_params['name'] = 'FCEPostProcess'
+            postprocess_params["scales"] = args.scales
+            postprocess_params["alpha"] = args.alpha
+            postprocess_params["beta"] = args.beta
+            postprocess_params["fourier_degree"] = args.fourier_degree
+            postprocess_params["box_type"] = args.det_box_type
+        elif self.det_algorithm == "CT":
+            pre_process_list[0] = {'ScaleAlignedShort': {'short_size': 640}}
+            postprocess_params['name'] = 'CTPostProcess'
+        else:
+            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
+            sys.exit(0)
+        self.preprocess_op = create_operators(pre_process_list)
+        self.postprocess_op = build_post_process(postprocess_params)
+        self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
+            args, 'det', logger)
+        if self.use_onnx:
+            img_h, img_w = self.input_tensor.shape[2:]
+            if img_h is not None and img_w is not None and img_h > 0 and img_w > 0:
+                pre_process_list[0] = {
+                    'DetResizeForTest': {
+                        'image_shape': [img_h, img_w]
+                    }
+                }
+        self.preprocess_op = create_operators(pre_process_list)
+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="det",
+                model_precision=args.precision,
+                batch_size=1,
+                data_shape="dynamic",
+                save_path=None,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=gpu_id if args.use_gpu else None,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=2,
+                logger=logger)
+    def order_points_clockwise(self, pts):
+        rect = np.zeros((4, 2), dtype="float32")
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]
+        rect[2] = pts[np.argmax(s)]
+        tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0)
+        diff = np.diff(np.array(tmp), axis=1)
+        rect[1] = tmp[np.argmin(diff)]
+        rect[3] = tmp[np.argmax(diff)]
+        return rect
+    def clip_det_res(self, points, img_height, img_width):
+        for pno in range(points.shape[0]):
+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+        return points
+    def filter_tag_det_res(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            if type(box) is list:
+                box = np.array(box)
+            box = self.order_points_clockwise(box)
+            box = self.clip_det_res(box, img_height, img_width)
+            rect_width = int(np.linalg.norm(box[0] - box[1]))
+            rect_height = int(np.linalg.norm(box[0] - box[3]))
+            if rect_width <= 3 or rect_height <= 3:
+                continue
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            if type(box) is list:
+                box = np.array(box)
+            box = self.clip_det_res(box, img_height, img_width)
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+    def __call__(self, img):
+        ori_im = img.copy()
+        data = {'image': img}
+        st = time.time()
+        if self.args.benchmark:
+            self.autolog.times.start()
+        data = transform(data, self.preprocess_op)
+        img, shape_list = data
+        if img is None:
+            return None, 0
+        img = np.expand_dims(img, axis=0)
+        shape_list = np.expand_dims(shape_list, axis=0)
+        img = img.copy()
+        if self.args.benchmark:
+            self.autolog.times.stamp()
+        if self.use_onnx:
+            input_dict = {}
+            input_dict[self.input_tensor.name] = img
+            outputs = self.predictor.run(self.output_tensors, input_dict)
+        else:
+            self.input_tensor.copy_from_cpu(img)
+            self.predictor.run()
+            outputs = []
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            if self.args.benchmark:
+                self.autolog.times.stamp()
+        preds = {}
+        if self.det_algorithm == "EAST":
+            preds['f_geo'] = outputs[0]
+            preds['f_score'] = outputs[1]
+        elif self.det_algorithm == 'SAST':
+            preds['f_border'] = outputs[0]
+            preds['f_score'] = outputs[1]
+            preds['f_tco'] = outputs[2]
+            preds['f_tvo'] = outputs[3]
+        elif self.det_algorithm in ['DB', 'PSE', 'DB++']:
+            preds['maps'] = outputs[0]
+        elif self.det_algorithm == 'FCE':
+            for i, output in enumerate(outputs):
+                preds['level_{}'.format(i)] = output
+        elif self.det_algorithm == "CT":
+            preds['maps'] = outputs[0]
+            preds['score'] = outputs[1]
+        else:
+            raise NotImplementedError
+        post_result = self.postprocess_op(preds, shape_list)
+        dt_boxes = post_result[0]['points']
+        if self.args.det_box_type == 'poly':
+            dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
+        else:
+            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
+        if self.args.benchmark:
+            self.autolog.times.end(stamp=True)
+        et = time.time()
+        return dt_boxes, et - st
+if __name__ == "__main__":
+    args = utility.parse_args()
+    image_file_list = get_image_file_list(args.image_dir)
+    text_detector = TextDetector(args)
+    total_time = 0
+    draw_img_save_dir = args.draw_img_save_dir
+    os.makedirs(draw_img_save_dir, exist_ok=True)
+    if args.warmup:
+        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+        for i in range(2):
+            res = text_detector(img)
+    save_results = []
+    for idx, image_file in enumerate(image_file_list):
+        img, flag_gif, flag_pdf = check_and_read(image_file)
+        if not flag_gif and not flag_pdf:
+            img = cv2.imread(image_file)
+        if not flag_pdf:
+            if img is None:
+                logger.debug("error in loading image:{}".format(image_file))
+                continue
+            imgs = [img]
+        else:
+            page_num = args.page_num
+            if page_num > len(img) or page_num == 0:
+                page_num = len(img)
+            imgs = img[:page_num]
+        for index, img in enumerate(imgs):
+            st = time.time()
+            dt_boxes, _ = text_detector(img)
+            elapse = time.time() - st
+            total_time += elapse
+            if len(imgs) > 1:
+                save_pred = os.path.basename(image_file) + '_' + str(
+                    index) + "\t" + str(
+                        json.dumps([x.tolist() for x in dt_boxes])) + "\n"
+            else:
+                save_pred = os.path.basename(image_file) + "\t" + str(
+                    json.dumps([x.tolist() for x in dt_boxes])) + "\n"
+            save_results.append(save_pred)
+            logger.info(save_pred)
+            if len(imgs) > 1:
+                logger.info("{}_{} The predict time of {}: {}".format(
+                    idx, index, image_file, elapse))
+            else:
+                logger.info("{} The predict time of {}: {}".format(
+                    idx, image_file, elapse))
+            src_im = utility.draw_text_det_res(dt_boxes, img)
+            if flag_gif:
+                save_file = image_file[:-3] + "png"
+            elif flag_pdf:
+                save_file = image_file.replace('.pdf',
+                                               '_' + str(index) + '.png')
+            else:
+                save_file = image_file
+            img_path = os.path.join(
+                draw_img_save_dir,
+                "det_res_{}".format(os.path.basename(save_file)))
+            cv2.imwrite(img_path, src_im)
+            logger.info("The visualized image saved in {}".format(img_path))
+    with open(os.path.join(draw_img_save_dir, "det_results.txt"), 'w') as f:
+        f.writelines(save_results)
+        f.close()
+    if args.benchmark:
+        text_detector.autolog.report()

tools/infer/predict_e2e.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import numpy as np
+import time
+import sys
+import tools.infer.utility as utility
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, check_and_read
+from ppocr.data import create_operators, transform
+from ppocr.postprocess import build_post_process
+logger = get_logger()
+class TextE2E(object):
+    def __init__(self, args):
+        self.args = args
+        self.e2e_algorithm = args.e2e_algorithm
+        self.use_onnx = args.use_onnx
+        pre_process_list = [{
+            'E2EResizeForTest': {}
+        }, {
+            'NormalizeImage': {
+                'std': [0.229, 0.224, 0.225],
+                'mean': [0.485, 0.456, 0.406],
+                'scale': '1./255.',
+                'order': 'hwc'
+            }
+        }, {
+            'ToCHWImage': None
+        }, {
+            'KeepKeys': {
+                'keep_keys': ['image', 'shape']
+            }
+        }]
+        postprocess_params = {}
+        if self.e2e_algorithm == "PGNet":
+            pre_process_list[0] = {
+                'E2EResizeForTest': {
+                    'max_side_len': args.e2e_limit_side_len,
+                    'valid_set': 'totaltext'
+                }
+            }
+            postprocess_params['name'] = 'PGPostProcess'
+            postprocess_params["score_thresh"] = args.e2e_pgnet_score_thresh
+            postprocess_params["character_dict_path"] = args.e2e_char_dict_path
+            postprocess_params["valid_set"] = args.e2e_pgnet_valid_set
+            postprocess_params["mode"] = args.e2e_pgnet_mode
+        else:
+            logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm))
+            sys.exit(0)
+        self.preprocess_op = create_operators(pre_process_list)
+        self.postprocess_op = build_post_process(postprocess_params)
+        self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor(
+            args, 'e2e', logger)  # paddle.jit.load(args.det_model_dir)
+        # self.predictor.eval()
+    def clip_det_res(self, points, img_height, img_width):
+        for pno in range(points.shape[0]):
+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+        return points
+    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.clip_det_res(box, img_height, img_width)
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+    def __call__(self, img):
+        ori_im = img.copy()
+        data = {'image': img}
+        data = transform(data, self.preprocess_op)
+        img, shape_list = data
+        if img is None:
+            return None, 0
+        img = np.expand_dims(img, axis=0)
+        shape_list = np.expand_dims(shape_list, axis=0)
+        img = img.copy()
+        starttime = time.time()
+        if self.use_onnx:
+            input_dict = {}
+            input_dict[self.input_tensor.name] = img
+            outputs = self.predictor.run(self.output_tensors, input_dict)
+            preds = {}
+            preds['f_border'] = outputs[0]
+            preds['f_char'] = outputs[1]
+            preds['f_direction'] = outputs[2]
+            preds['f_score'] = outputs[3]
+        else:
+            self.input_tensor.copy_from_cpu(img)
+            self.predictor.run()
+            outputs = []
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            preds = {}
+            if self.e2e_algorithm == 'PGNet':
+                preds['f_border'] = outputs[0]
+                preds['f_char'] = outputs[1]
+                preds['f_direction'] = outputs[2]
+                preds['f_score'] = outputs[3]
+            else:
+                raise NotImplementedError
+        post_result = self.postprocess_op(preds, shape_list)
+        points, strs = post_result['points'], post_result['texts']
+        dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape)
+        elapse = time.time() - starttime
+        return dt_boxes, strs, elapse
+if __name__ == "__main__":
+    args = utility.parse_args()
+    image_file_list = get_image_file_list(args.image_dir)
+    text_detector = TextE2E(args)
+    count = 0
+    total_time = 0
+    draw_img_save = "./inference_results"
+    if not os.path.exists(draw_img_save):
+        os.makedirs(draw_img_save)
+    for image_file in image_file_list:
+        img, flag, _ = check_and_read(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        points, strs, elapse = text_detector(img)
+        if count > 0:
+            total_time += elapse
+        count += 1
+        logger.info("Predict time of {}: {}".format(image_file, elapse))
+        src_im = utility.draw_e2e_res(points, strs, image_file)
+        img_name_pure = os.path.split(image_file)[-1]
+        img_path = os.path.join(draw_img_save,
+                                "e2e_res_{}".format(img_name_pure))
+        cv2.imwrite(img_path, src_im)
+        logger.info("The visualized image saved in {}".format(img_path))
+    if count > 1:
+        logger.info("Avg Time: {}".format(total_time / (count - 1)))

tools/infer/predict_rec.py ADDED Viewed

	@@ -0,0 +1,667 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+from PIL import Image
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import numpy as np
+import math
+import time
+import traceback
+import paddle
+import tools.infer.utility as utility
+from ppocr.postprocess import build_post_process
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, check_and_read
+logger = get_logger()
+class TextRecognizer(object):
+    def __init__(self, args):
+        self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
+        self.rec_batch_num = args.rec_batch_num
+        self.rec_algorithm = args.rec_algorithm
+        postprocess_params = {
+            'name': 'CTCLabelDecode',
+            "character_dict_path": args.rec_char_dict_path,
+            "use_space_char": args.use_space_char
+        }
+        if self.rec_algorithm == "SRN":
+            postprocess_params = {
+                'name': 'SRNLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == "RARE":
+            postprocess_params = {
+                'name': 'AttnLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == 'NRTR':
+            postprocess_params = {
+                'name': 'NRTRLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == "SAR":
+            postprocess_params = {
+                'name': 'SARLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == "VisionLAN":
+            postprocess_params = {
+                'name': 'VLLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == 'ViTSTR':
+            postprocess_params = {
+                'name': 'ViTSTRLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == 'ABINet':
+            postprocess_params = {
+                'name': 'ABINetLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == "SPIN":
+            postprocess_params = {
+                'name': 'SPINLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == "RobustScanner":
+            postprocess_params = {
+                'name': 'SARLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char,
+                "rm_symbol": True
+            }
+        elif self.rec_algorithm == 'RFL':
+            postprocess_params = {
+                'name': 'RFLLabelDecode',
+                "character_dict_path": None,
+                "use_space_char": args.use_space_char
+            }
+        elif self.rec_algorithm == "PREN":
+            postprocess_params = {'name': 'PRENLabelDecode'}
+        elif self.rec_algorithm == "CAN":
+            self.inverse = args.rec_image_inverse
+            postprocess_params = {
+                'name': 'CANLabelDecode',
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
+        self.postprocess_op = build_post_process(postprocess_params)
+        self.predictor, self.input_tensor, self.output_tensors, self.config = \
+            utility.create_predictor(args, 'rec', logger)
+        self.benchmark = args.benchmark
+        self.use_onnx = args.use_onnx
+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="rec",
+                model_precision=args.precision,
+                batch_size=args.rec_batch_num,
+                data_shape="dynamic",
+                save_path=None,  #args.save_log_path,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=gpu_id if args.use_gpu else None,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=0,
+                logger=logger)
+    def resize_norm_img(self, img, max_wh_ratio):
+        imgC, imgH, imgW = self.rec_image_shape
+        if self.rec_algorithm == 'NRTR' or self.rec_algorithm == 'ViTSTR':
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            # return padding_im
+            image_pil = Image.fromarray(np.uint8(img))
+            if self.rec_algorithm == 'ViTSTR':
+                img = image_pil.resize([imgW, imgH], Image.BICUBIC)
+            else:
+                img = image_pil.resize([imgW, imgH], Image.ANTIALIAS)
+            img = np.array(img)
+            norm_img = np.expand_dims(img, -1)
+            norm_img = norm_img.transpose((2, 0, 1))
+            if self.rec_algorithm == 'ViTSTR':
+                norm_img = norm_img.astype(np.float32) / 255.
+            else:
+                norm_img = norm_img.astype(np.float32) / 128. - 1.
+            return norm_img
+        elif self.rec_algorithm == 'RFL':
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            resized_image = cv2.resize(
+                img, (imgW, imgH), interpolation=cv2.INTER_CUBIC)
+            resized_image = resized_image.astype('float32')
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+            resized_image -= 0.5
+            resized_image /= 0.5
+            return resized_image
+        assert imgC == img.shape[2]
+        imgW = int((imgH * max_wh_ratio))
+        if self.use_onnx:
+            w = self.input_tensor.shape[3:][0]
+            if w is not None and w > 0:
+                imgW = w
+        h, w = img.shape[:2]
+        ratio = w / float(h)
+        if math.ceil(imgH * ratio) > imgW:
+            resized_w = imgW
+        else:
+            resized_w = int(math.ceil(imgH * ratio))
+        if self.rec_algorithm == 'RARE':
+            if resized_w > self.rec_image_shape[2]:
+                resized_w = self.rec_image_shape[2]
+            imgW = self.rec_image_shape[2]
+        resized_image = cv2.resize(img, (resized_w, imgH))
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+    def resize_norm_img_vl(self, img, image_shape):
+        imgC, imgH, imgW = image_shape
+        img = img[:, :, ::-1]  # bgr2rgb
+        resized_image = cv2.resize(
+            img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        return resized_image
+    def resize_norm_img_srn(self, img, image_shape):
+        imgC, imgH, imgW = image_shape
+        img_black = np.zeros((imgH, imgW))
+        im_hei = img.shape[0]
+        im_wid = img.shape[1]
+        if im_wid <= im_hei * 1:
+            img_new = cv2.resize(img, (imgH * 1, imgH))
+        elif im_wid <= im_hei * 2:
+            img_new = cv2.resize(img, (imgH * 2, imgH))
+        elif im_wid <= im_hei * 3:
+            img_new = cv2.resize(img, (imgH * 3, imgH))
+        else:
+            img_new = cv2.resize(img, (imgW, imgH))
+        img_np = np.asarray(img_new)
+        img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
+        img_black[:, 0:img_np.shape[1]] = img_np
+        img_black = img_black[:, :, np.newaxis]
+        row, col, c = img_black.shape
+        c = 1
+        return np.reshape(img_black, (c, row, col)).astype(np.float32)
+    def srn_other_inputs(self, image_shape, num_heads, max_text_length):
+        imgC, imgH, imgW = image_shape
+        feature_dim = int((imgH / 8) * (imgW / 8))
+        encoder_word_pos = np.array(range(0, feature_dim)).reshape(
+            (feature_dim, 1)).astype('int64')
+        gsrm_word_pos = np.array(range(0, max_text_length)).reshape(
+            (max_text_length, 1)).astype('int64')
+        gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length))
+        gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape(
+            [-1, 1, max_text_length, max_text_length])
+        gsrm_slf_attn_bias1 = np.tile(
+            gsrm_slf_attn_bias1,
+            [1, num_heads, 1, 1]).astype('float32') * [-1e9]
+        gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape(
+            [-1, 1, max_text_length, max_text_length])
+        gsrm_slf_attn_bias2 = np.tile(
+            gsrm_slf_attn_bias2,
+            [1, num_heads, 1, 1]).astype('float32') * [-1e9]
+        encoder_word_pos = encoder_word_pos[np.newaxis, :]
+        gsrm_word_pos = gsrm_word_pos[np.newaxis, :]
+        return [
+            encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
+            gsrm_slf_attn_bias2
+        ]
+    def process_image_srn(self, img, image_shape, num_heads, max_text_length):
+        norm_img = self.resize_norm_img_srn(img, image_shape)
+        norm_img = norm_img[np.newaxis, :]
+        [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \
+            self.srn_other_inputs(image_shape, num_heads, max_text_length)
+        gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32)
+        gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32)
+        encoder_word_pos = encoder_word_pos.astype(np.int64)
+        gsrm_word_pos = gsrm_word_pos.astype(np.int64)
+        return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
+                gsrm_slf_attn_bias2)
+    def resize_norm_img_sar(self, img, image_shape,
+                            width_downsample_ratio=0.25):
+        imgC, imgH, imgW_min, imgW_max = image_shape
+        h = img.shape[0]
+        w = img.shape[1]
+        valid_ratio = 1.0
+        # make sure new_width is an integral multiple of width_divisor.
+        width_divisor = int(1 / width_downsample_ratio)
+        # resize
+        ratio = w / float(h)
+        resize_w = math.ceil(imgH * ratio)
+        if resize_w % width_divisor != 0:
+            resize_w = round(resize_w / width_divisor) * width_divisor
+        if imgW_min is not None:
+            resize_w = max(imgW_min, resize_w)
+        if imgW_max is not None:
+            valid_ratio = min(1.0, 1.0 * resize_w / imgW_max)
+            resize_w = min(imgW_max, resize_w)
+        resized_image = cv2.resize(img, (resize_w, imgH))
+        resized_image = resized_image.astype('float32')
+        # norm
+        if image_shape[0] == 1:
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+        else:
+            resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        resize_shape = resized_image.shape
+        padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32)
+        padding_im[:, :, 0:resize_w] = resized_image
+        pad_shape = padding_im.shape
+        return padding_im, resize_shape, pad_shape, valid_ratio
+    def resize_norm_img_spin(self, img):
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # return padding_im
+        img = cv2.resize(img, tuple([100, 32]), cv2.INTER_CUBIC)
+        img = np.array(img, np.float32)
+        img = np.expand_dims(img, -1)
+        img = img.transpose((2, 0, 1))
+        mean = [127.5]
+        std = [127.5]
+        mean = np.array(mean, dtype=np.float32)
+        std = np.array(std, dtype=np.float32)
+        mean = np.float32(mean.reshape(1, -1))
+        stdinv = 1 / np.float32(std.reshape(1, -1))
+        img -= mean
+        img *= stdinv
+        return img
+    def resize_norm_img_svtr(self, img, image_shape):
+        imgC, imgH, imgW = image_shape
+        resized_image = cv2.resize(
+            img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        return resized_image
+    def resize_norm_img_abinet(self, img, image_shape):
+        imgC, imgH, imgW = image_shape
+        resized_image = cv2.resize(
+            img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image / 255.
+        mean = np.array([0.485, 0.456, 0.406])
+        std = np.array([0.229, 0.224, 0.225])
+        resized_image = (
+            resized_image - mean[None, None, ...]) / std[None, None, ...]
+        resized_image = resized_image.transpose((2, 0, 1))
+        resized_image = resized_image.astype('float32')
+        return resized_image
+    def norm_img_can(self, img, image_shape):
+        img = cv2.cvtColor(
+            img, cv2.COLOR_BGR2GRAY)  # CAN only predict gray scale image
+        if self.inverse:
+            img = 255 - img
+        if self.rec_image_shape[0] == 1:
+            h, w = img.shape
+            _, imgH, imgW = self.rec_image_shape
+            if h < imgH or w < imgW:
+                padding_h = max(imgH - h, 0)
+                padding_w = max(imgW - w, 0)
+                img_padded = np.pad(img, ((0, padding_h), (0, padding_w)),
+                                    'constant',
+                                    constant_values=(255))
+                img = img_padded
+        img = np.expand_dims(img, 0) / 255.0  # h,w,c -> c,h,w
+        img = img.astype('float32')
+        return img
+    def __call__(self, img_list):
+        img_num = len(img_list)
+        # Calculate the aspect ratio of all text bars
+        width_list = []
+        for img in img_list:
+            width_list.append(img.shape[1] / float(img.shape[0]))
+        # Sorting can speed up the recognition process
+        indices = np.argsort(np.array(width_list))
+        rec_res = [['', 0.0]] * img_num
+        batch_num = self.rec_batch_num
+        st = time.time()
+        if self.benchmark:
+            self.autolog.times.start()
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            norm_img_batch = []
+            if self.rec_algorithm == "SRN":
+                encoder_word_pos_list = []
+                gsrm_word_pos_list = []
+                gsrm_slf_attn_bias1_list = []
+                gsrm_slf_attn_bias2_list = []
+            if self.rec_algorithm == "SAR":
+                valid_ratios = []
+            imgC, imgH, imgW = self.rec_image_shape[:3]
+            max_wh_ratio = imgW / imgH
+            # max_wh_ratio = 0
+            for ino in range(beg_img_no, end_img_no):
+                h, w = img_list[indices[ino]].shape[0:2]
+                wh_ratio = w * 1.0 / h
+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+            for ino in range(beg_img_no, end_img_no):
+                if self.rec_algorithm == "SAR":
+                    norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
+                        img_list[indices[ino]], self.rec_image_shape)
+                    norm_img = norm_img[np.newaxis, :]
+                    valid_ratio = np.expand_dims(valid_ratio, axis=0)
+                    valid_ratios.append(valid_ratio)
+                    norm_img_batch.append(norm_img)
+                elif self.rec_algorithm == "SRN":
+                    norm_img = self.process_image_srn(
+                        img_list[indices[ino]], self.rec_image_shape, 8, 25)
+                    encoder_word_pos_list.append(norm_img[1])
+                    gsrm_word_pos_list.append(norm_img[2])
+                    gsrm_slf_attn_bias1_list.append(norm_img[3])
+                    gsrm_slf_attn_bias2_list.append(norm_img[4])
+                    norm_img_batch.append(norm_img[0])
+                elif self.rec_algorithm == "SVTR":
+                    norm_img = self.resize_norm_img_svtr(img_list[indices[ino]],
+                                                         self.rec_image_shape)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                elif self.rec_algorithm in ["VisionLAN", "PREN"]:
+                    norm_img = self.resize_norm_img_vl(img_list[indices[ino]],
+                                                       self.rec_image_shape)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                elif self.rec_algorithm == 'SPIN':
+                    norm_img = self.resize_norm_img_spin(img_list[indices[ino]])
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                elif self.rec_algorithm == "ABINet":
+                    norm_img = self.resize_norm_img_abinet(
+                        img_list[indices[ino]], self.rec_image_shape)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                elif self.rec_algorithm == "RobustScanner":
+                    norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
+                        img_list[indices[ino]],
+                        self.rec_image_shape,
+                        width_downsample_ratio=0.25)
+                    norm_img = norm_img[np.newaxis, :]
+                    valid_ratio = np.expand_dims(valid_ratio, axis=0)
+                    valid_ratios = []
+                    valid_ratios.append(valid_ratio)
+                    norm_img_batch.append(norm_img)
+                    word_positions_list = []
+                    word_positions = np.array(range(0, 40)).astype('int64')
+                    word_positions = np.expand_dims(word_positions, axis=0)
+                    word_positions_list.append(word_positions)
+                elif self.rec_algorithm == "CAN":
+                    norm_img = self.norm_img_can(img_list[indices[ino]],
+                                                 max_wh_ratio)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+                    norm_image_mask = np.ones(norm_img.shape, dtype='float32')
+                    word_label = np.ones([1, 36], dtype='int64')
+                    norm_img_mask_batch = []
+                    word_label_list = []
+                    norm_img_mask_batch.append(norm_image_mask)
+                    word_label_list.append(word_label)
+                else:
+                    norm_img = self.resize_norm_img(img_list[indices[ino]],
+                                                    max_wh_ratio)
+                    norm_img = norm_img[np.newaxis, :]
+                    norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch)
+            norm_img_batch = norm_img_batch.copy()
+            if self.benchmark:
+                self.autolog.times.stamp()
+            if self.rec_algorithm == "SRN":
+                encoder_word_pos_list = np.concatenate(encoder_word_pos_list)
+                gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list)
+                gsrm_slf_attn_bias1_list = np.concatenate(
+                    gsrm_slf_attn_bias1_list)
+                gsrm_slf_attn_bias2_list = np.concatenate(
+                    gsrm_slf_attn_bias2_list)
+                inputs = [
+                    norm_img_batch,
+                    encoder_word_pos_list,
+                    gsrm_word_pos_list,
+                    gsrm_slf_attn_bias1_list,
+                    gsrm_slf_attn_bias2_list,
+                ]
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = {"predict": outputs[2]}
+                else:
+                    input_names = self.predictor.get_input_names()
+                    for i in range(len(input_names)):
+                        input_tensor = self.predictor.get_input_handle(
+                            input_names[i])
+                        input_tensor.copy_from_cpu(inputs[i])
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    preds = {"predict": outputs[2]}
+            elif self.rec_algorithm == "SAR":
+                valid_ratios = np.concatenate(valid_ratios)
+                inputs = [
+                    norm_img_batch,
+                    np.array(
+                        [valid_ratios], dtype=np.float32),
+                ]
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs[0]
+                else:
+                    input_names = self.predictor.get_input_names()
+                    for i in range(len(input_names)):
+                        input_tensor = self.predictor.get_input_handle(
+                            input_names[i])
+                        input_tensor.copy_from_cpu(inputs[i])
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    preds = outputs[0]
+            elif self.rec_algorithm == "RobustScanner":
+                valid_ratios = np.concatenate(valid_ratios)
+                word_positions_list = np.concatenate(word_positions_list)
+                inputs = [norm_img_batch, valid_ratios, word_positions_list]
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs[0]
+                else:
+                    input_names = self.predictor.get_input_names()
+                    for i in range(len(input_names)):
+                        input_tensor = self.predictor.get_input_handle(
+                            input_names[i])
+                        input_tensor.copy_from_cpu(inputs[i])
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    preds = outputs[0]
+            elif self.rec_algorithm == "CAN":
+                norm_img_mask_batch = np.concatenate(norm_img_mask_batch)
+                word_label_list = np.concatenate(word_label_list)
+                inputs = [norm_img_batch, norm_img_mask_batch, word_label_list]
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs
+                else:
+                    input_names = self.predictor.get_input_names()
+                    input_tensor = []
+                    for i in range(len(input_names)):
+                        input_tensor_i = self.predictor.get_input_handle(
+                            input_names[i])
+                        input_tensor_i.copy_from_cpu(inputs[i])
+                        input_tensor.append(input_tensor_i)
+                    self.input_tensor = input_tensor
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    preds = outputs
+            else:
+                if self.use_onnx:
+                    input_dict = {}
+                    input_dict[self.input_tensor.name] = norm_img_batch
+                    outputs = self.predictor.run(self.output_tensors,
+                                                 input_dict)
+                    preds = outputs[0]
+                else:
+                    self.input_tensor.copy_from_cpu(norm_img_batch)
+                    self.predictor.run()
+                    outputs = []
+                    for output_tensor in self.output_tensors:
+                        output = output_tensor.copy_to_cpu()
+                        outputs.append(output)
+                    if self.benchmark:
+                        self.autolog.times.stamp()
+                    if len(outputs) != 1:
+                        preds = outputs
+                    else:
+                        preds = outputs[0]
+            rec_result = self.postprocess_op(preds)
+            for rno in range(len(rec_result)):
+                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
+            if self.benchmark:
+                self.autolog.times.end(stamp=True)
+        return rec_res, time.time() - st
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    text_recognizer = TextRecognizer(args)
+    valid_image_file_list = []
+    img_list = []
+    logger.info(
+        "In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', "
+        "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320"
+    )
+    # warmup 2 times
+    if args.warmup:
+        img = np.random.uniform(0, 255, [48, 320, 3]).astype(np.uint8)
+        for i in range(2):
+            res = text_recognizer([img] * int(args.rec_batch_num))
+    for image_file in image_file_list:
+        img, flag, _ = check_and_read(image_file)
+        if not flag:
+            img = cv2.imread(image_file)
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        valid_image_file_list.append(image_file)
+        img_list.append(img)
+    try:
+        rec_res, _ = text_recognizer(img_list)
+    except Exception as E:
+        logger.info(traceback.format_exc())
+        logger.info(E)
+        exit()
+    for ino in range(len(img_list)):
+        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
+                                               rec_res[ino]))
+    if args.benchmark:
+        text_recognizer.autolog.report()
+if __name__ == "__main__":
+    main(utility.parse_args())

tools/infer/predict_sr.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+from PIL import Image
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, __dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import numpy as np
+import math
+import time
+import traceback
+import paddle
+import tools.infer.utility as utility
+from ppocr.postprocess import build_post_process
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, check_and_read
+logger = get_logger()
+class TextSR(object):
+    def __init__(self, args):
+        self.sr_image_shape = [int(v) for v in args.sr_image_shape.split(",")]
+        self.sr_batch_num = args.sr_batch_num
+        self.predictor, self.input_tensor, self.output_tensors, self.config = \
+            utility.create_predictor(args, 'sr', logger)
+        self.benchmark = args.benchmark
+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="sr",
+                model_precision=args.precision,
+                batch_size=args.sr_batch_num,
+                data_shape="dynamic",
+                save_path=None,  #args.save_log_path,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=gpu_id if args.use_gpu else None,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=0,
+                logger=logger)
+    def resize_norm_img(self, img):
+        imgC, imgH, imgW = self.sr_image_shape
+        img = img.resize((imgW // 2, imgH // 2), Image.BICUBIC)
+        img_numpy = np.array(img).astype("float32")
+        img_numpy = img_numpy.transpose((2, 0, 1)) / 255
+        return img_numpy
+    def __call__(self, img_list):
+        img_num = len(img_list)
+        batch_num = self.sr_batch_num
+        st = time.time()
+        st = time.time()
+        all_result = [] * img_num
+        if self.benchmark:
+            self.autolog.times.start()
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            norm_img_batch = []
+            imgC, imgH, imgW = self.sr_image_shape
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[ino])
+                norm_img = norm_img[np.newaxis, :]
+                norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch)
+            norm_img_batch = norm_img_batch.copy()
+            if self.benchmark:
+                self.autolog.times.stamp()
+            self.input_tensor.copy_from_cpu(norm_img_batch)
+            self.predictor.run()
+            outputs = []
+            for output_tensor in self.output_tensors:
+                output = output_tensor.copy_to_cpu()
+                outputs.append(output)
+            if len(outputs) != 1:
+                preds = outputs
+            else:
+                preds = outputs[0]
+            all_result.append(outputs)
+        if self.benchmark:
+            self.autolog.times.end(stamp=True)
+        return all_result, time.time() - st
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    text_recognizer = TextSR(args)
+    valid_image_file_list = []
+    img_list = []
+    # warmup 2 times
+    if args.warmup:
+        img = np.random.uniform(0, 255, [16, 64, 3]).astype(np.uint8)
+        for i in range(2):
+            res = text_recognizer([img] * int(args.sr_batch_num))
+    for image_file in image_file_list:
+        img, flag, _ = check_and_read(image_file)
+        if not flag:
+            img = Image.open(image_file).convert("RGB")
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        valid_image_file_list.append(image_file)
+        img_list.append(img)
+    try:
+        preds, _ = text_recognizer(img_list)
+        for beg_no in range(len(preds)):
+            sr_img = preds[beg_no][1]
+            lr_img = preds[beg_no][0]
+            for i in (range(sr_img.shape[0])):
+                fm_sr = (sr_img[i] * 255).transpose(1, 2, 0).astype(np.uint8)
+                fm_lr = (lr_img[i] * 255).transpose(1, 2, 0).astype(np.uint8)
+                img_name_pure = os.path.split(valid_image_file_list[
+                    beg_no * args.sr_batch_num + i])[-1]
+                cv2.imwrite("infer_result/sr_{}".format(img_name_pure),
+                            fm_sr[:, :, ::-1])
+                logger.info("The visualized image saved in infer_result/sr_{}".
+                            format(img_name_pure))
+    except Exception as E:
+        logger.info(traceback.format_exc())
+        logger.info(E)
+        exit()
+    if args.benchmark:
+        text_recognizer.autolog.report()
+if __name__ == "__main__":
+    main(utility.parse_args())

tools/infer/predict_system.py ADDED Viewed

	@@ -0,0 +1,262 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+import subprocess
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import copy
+import numpy as np
+import json
+import time
+import logging
+from PIL import Image
+import tools.infer.utility as utility
+import tools.infer.predict_rec as predict_rec
+import tools.infer.predict_det as predict_det
+import tools.infer.predict_cls as predict_cls
+from ppocr.utils.utility import get_image_file_list, check_and_read
+from ppocr.utils.logging import get_logger
+from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image, get_minarea_rect_crop
+logger = get_logger()
+class TextSystem(object):
+    def __init__(self, args):
+        if not args.show_log:
+            logger.setLevel(logging.INFO)
+        self.text_detector = predict_det.TextDetector(args)
+        self.text_recognizer = predict_rec.TextRecognizer(args)
+        self.use_angle_cls = args.use_angle_cls
+        self.drop_score = args.drop_score
+        if self.use_angle_cls:
+            self.text_classifier = predict_cls.TextClassifier(args)
+        self.args = args
+        self.crop_image_res_index = 0
+    def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res):
+        os.makedirs(output_dir, exist_ok=True)
+        bbox_num = len(img_crop_list)
+        for bno in range(bbox_num):
+            cv2.imwrite(
+                os.path.join(output_dir,
+                             f"mg_crop_{bno+self.crop_image_res_index}.jpg"),
+                img_crop_list[bno])
+            logger.debug(f"{bno}, {rec_res[bno]}")
+        self.crop_image_res_index += bbox_num
+    def __call__(self, img, cls=True):
+        time_dict = {'det': 0, 'rec': 0, 'csl': 0, 'all': 0}
+        start = time.time()
+        ori_im = img.copy()
+        dt_boxes, elapse = self.text_detector(img)
+        time_dict['det'] = elapse
+        logger.debug("dt_boxes num : {}, elapse : {}".format(
+            len(dt_boxes), elapse))
+        if dt_boxes is None:
+            return None, None
+        img_crop_list = []
+        dt_boxes = sorted_boxes(dt_boxes)
+        for bno in range(len(dt_boxes)):
+            tmp_box = copy.deepcopy(dt_boxes[bno])
+            if self.args.det_box_type == "quad":
+                img_crop = get_rotate_crop_image(ori_im, tmp_box)
+            else:
+                img_crop = get_minarea_rect_crop(ori_im, tmp_box)
+            img_crop_list.append(img_crop)
+        if self.use_angle_cls and cls:
+            img_crop_list, angle_list, elapse = self.text_classifier(
+                img_crop_list)
+            time_dict['cls'] = elapse
+            logger.debug("cls num  : {}, elapse : {}".format(
+                len(img_crop_list), elapse))
+        rec_res, elapse = self.text_recognizer(img_crop_list)
+        time_dict['rec'] = elapse
+        logger.debug("rec_res num  : {}, elapse : {}".format(
+            len(rec_res), elapse))
+        if self.args.save_crop_res:
+            self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list,
+                                   rec_res)
+        filter_boxes, filter_rec_res = [], []
+        for box, rec_result in zip(dt_boxes, rec_res):
+            text, score = rec_result
+            if score >= self.drop_score:
+                filter_boxes.append(box)
+                filter_rec_res.append(rec_result)
+        end = time.time()
+        time_dict['all'] = end - start
+        return filter_boxes, filter_rec_res, time_dict
+def sorted_boxes(dt_boxes):
+    """
+    Sort text boxes in order from top to bottom, left to right
+    args:
+        dt_boxes(array):detected text boxes with shape [4, 2]
+    return:
+        sorted boxes(array) with shape [4, 2]
+    """
+    num_boxes = dt_boxes.shape[0]
+    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
+    _boxes = list(sorted_boxes)
+    for i in range(num_boxes - 1):
+        for j in range(i, -1, -1):
+            if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
+                    (_boxes[j + 1][0][0] < _boxes[j][0][0]):
+                tmp = _boxes[j]
+                _boxes[j] = _boxes[j + 1]
+                _boxes[j + 1] = tmp
+            else:
+                break
+    return _boxes
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    image_file_list = image_file_list[args.process_id::args.total_process_num]
+    text_sys = TextSystem(args)
+    is_visualize = True
+    font_path = args.vis_font_path
+    drop_score = args.drop_score
+    draw_img_save_dir = args.draw_img_save_dir
+    os.makedirs(draw_img_save_dir, exist_ok=True)
+    save_results = []
+    logger.info(
+        "In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320', "
+        "if you are using recognition model with PP-OCRv2 or an older version, please set --rec_image_shape='3,32,320"
+    )
+    # warm up 10 times
+    if args.warmup:
+        img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
+        for i in range(10):
+            res = text_sys(img)
+    total_time = 0
+    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
+    _st = time.time()
+    count = 0
+    for idx, image_file in enumerate(image_file_list):
+        img, flag_gif, flag_pdf = check_and_read(image_file)
+        if not flag_gif and not flag_pdf:
+            img = cv2.imread(image_file)
+        if not flag_pdf:
+            if img is None:
+                logger.debug("error in loading image:{}".format(image_file))
+                continue
+            imgs = [img]
+        else:
+            page_num = args.page_num
+            if page_num > len(img) or page_num == 0:
+                page_num = len(img)
+            imgs = img[:page_num]
+        for index, img in enumerate(imgs):
+            starttime = time.time()
+            dt_boxes, rec_res, time_dict = text_sys(img)
+            elapse = time.time() - starttime
+            total_time += elapse
+            if len(imgs) > 1:
+                logger.debug(
+                    str(idx) + '_' + str(index) + "  Predict time of %s: %.3fs"
+                    % (image_file, elapse))
+            else:
+                logger.debug(
+                    str(idx) + "  Predict time of %s: %.3fs" % (image_file,
+                                                                elapse))
+            for text, score in rec_res:
+                logger.debug("{}, {:.3f}".format(text, score))
+            res = [{
+                "transcription": rec_res[i][0],
+                "points": np.array(dt_boxes[i]).astype(np.int32).tolist(),
+            } for i in range(len(dt_boxes))]
+            if len(imgs) > 1:
+                save_pred = os.path.basename(image_file) + '_' + str(
+                    index) + "\t" + json.dumps(
+                        res, ensure_ascii=False) + "\n"
+            else:
+                save_pred = os.path.basename(image_file) + "\t" + json.dumps(
+                    res, ensure_ascii=False) + "\n"
+            save_results.append(save_pred)
+            if is_visualize:
+                image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+                boxes = dt_boxes
+                txts = [rec_res[i][0] for i in range(len(rec_res))]
+                scores = [rec_res[i][1] for i in range(len(rec_res))]
+                draw_img = draw_ocr_box_txt(
+                    image,
+                    boxes,
+                    txts,
+                    scores,
+                    drop_score=drop_score,
+                    font_path=font_path)
+                if flag_gif:
+                    save_file = image_file[:-3] + "png"
+                elif flag_pdf:
+                    save_file = image_file.replace('.pdf',
+                                                   '_' + str(index) + '.png')
+                else:
+                    save_file = image_file
+                cv2.imwrite(
+                    os.path.join(draw_img_save_dir,
+                                 os.path.basename(save_file)),
+                    draw_img[:, :, ::-1])
+                logger.debug("The visualized image saved in {}".format(
+                    os.path.join(draw_img_save_dir, os.path.basename(
+                        save_file))))
+    logger.info("The predict total time is {}".format(time.time() - _st))
+    if args.benchmark:
+        text_sys.text_detector.autolog.report()
+        text_sys.text_recognizer.autolog.report()
+    with open(
+            os.path.join(draw_img_save_dir, "system_results.txt"),
+            'w',
+            encoding='utf-8') as f:
+        f.writelines(save_results)
+if __name__ == "__main__":
+    args = utility.parse_args()
+    if args.use_mp:
+        p_list = []
+        total_process_num = args.total_process_num
+        for process_id in range(total_process_num):
+            cmd = [sys.executable, "-u"] + sys.argv + [
+                "--process_id={}".format(process_id),
+                "--use_mp={}".format(False)
+            ]
+            p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
+            p_list.append(p)
+        for p in p_list:
+            p.wait()
+    else:
+        main(args)

tools/infer/utility.py ADDED Viewed

	@@ -0,0 +1,663 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import sys
+import platform
+import cv2
+import numpy as np
+import paddle
+from PIL import Image, ImageDraw, ImageFont
+import math
+from paddle import inference
+import time
+import random
+from ppocr.utils.logging import get_logger
+def str2bool(v):
+    return v.lower() in ("true", "t", "1")
+def init_args():
+    parser = argparse.ArgumentParser()
+    # params for prediction engine
+    parser.add_argument("--use_gpu", type=str2bool, default=True)
+    parser.add_argument("--use_xpu", type=str2bool, default=False)
+    parser.add_argument("--use_npu", type=str2bool, default=False)
+    parser.add_argument("--ir_optim", type=str2bool, default=True)
+    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+    parser.add_argument("--min_subgraph_size", type=int, default=15)
+    parser.add_argument("--precision", type=str, default="fp32")
+    parser.add_argument("--gpu_mem", type=int, default=500)
+    parser.add_argument("--gpu_id", type=int, default=0)
+    # params for text detector
+    parser.add_argument("--image_dir", type=str)
+    parser.add_argument("--page_num", type=int, default=0)
+    parser.add_argument("--det_algorithm", type=str, default='DB')
+    parser.add_argument("--det_model_dir", type=str)
+    parser.add_argument("--det_limit_side_len", type=float, default=960)
+    parser.add_argument("--det_limit_type", type=str, default='max')
+    parser.add_argument("--det_box_type", type=str, default='quad')
+    # DB parmas
+    parser.add_argument("--det_db_thresh", type=float, default=0.3)
+    parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
+    parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
+    parser.add_argument("--max_batch_size", type=int, default=10)
+    parser.add_argument("--use_dilation", type=str2bool, default=False)
+    parser.add_argument("--det_db_score_mode", type=str, default="fast")
+    # EAST parmas
+    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
+    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
+    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
+    # SAST parmas
+    parser.add_argument("--det_sast_score_thresh", type=float, default=0.5)
+    parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2)
+    # PSE parmas
+    parser.add_argument("--det_pse_thresh", type=float, default=0)
+    parser.add_argument("--det_pse_box_thresh", type=float, default=0.85)
+    parser.add_argument("--det_pse_min_area", type=float, default=16)
+    parser.add_argument("--det_pse_scale", type=int, default=1)
+    # FCE parmas
+    parser.add_argument("--scales", type=list, default=[8, 16, 32])
+    parser.add_argument("--alpha", type=float, default=1.0)
+    parser.add_argument("--beta", type=float, default=1.0)
+    parser.add_argument("--fourier_degree", type=int, default=5)
+    # params for text recognizer
+    parser.add_argument("--rec_algorithm", type=str, default='SVTR_LCNet')
+    parser.add_argument("--rec_model_dir", type=str)
+    parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
+    parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
+    parser.add_argument("--rec_batch_num", type=int, default=6)
+    parser.add_argument("--max_text_length", type=int, default=25)
+    parser.add_argument(
+        "--rec_char_dict_path",
+        type=str,
+        default="./ppocr/utils/ppocr_keys_v1.txt")
+    parser.add_argument("--use_space_char", type=str2bool, default=True)
+    parser.add_argument(
+        "--vis_font_path", type=str, default="./doc/fonts/simfang.ttf")
+    parser.add_argument("--drop_score", type=float, default=0.5)
+    # params for e2e
+    parser.add_argument("--e2e_algorithm", type=str, default='PGNet')
+    parser.add_argument("--e2e_model_dir", type=str)
+    parser.add_argument("--e2e_limit_side_len", type=float, default=768)
+    parser.add_argument("--e2e_limit_type", type=str, default='max')
+    # PGNet parmas
+    parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
+    parser.add_argument(
+        "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
+    parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
+    parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
+    # params for text classifier
+    parser.add_argument("--use_angle_cls", type=str2bool, default=False)
+    parser.add_argument("--cls_model_dir", type=str)
+    parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
+    parser.add_argument("--label_list", type=list, default=['0', '180'])
+    parser.add_argument("--cls_batch_num", type=int, default=6)
+    parser.add_argument("--cls_thresh", type=float, default=0.9)
+    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
+    parser.add_argument("--cpu_threads", type=int, default=10)
+    parser.add_argument("--use_pdserving", type=str2bool, default=False)
+    parser.add_argument("--warmup", type=str2bool, default=False)
+    # SR parmas
+    parser.add_argument("--sr_model_dir", type=str)
+    parser.add_argument("--sr_image_shape", type=str, default="3, 32, 128")
+    parser.add_argument("--sr_batch_num", type=int, default=1)
+    #
+    parser.add_argument(
+        "--draw_img_save_dir", type=str, default="./inference_results")
+    parser.add_argument("--save_crop_res", type=str2bool, default=False)
+    parser.add_argument("--crop_res_save_dir", type=str, default="./output")
+    # multi-process
+    parser.add_argument("--use_mp", type=str2bool, default=False)
+    parser.add_argument("--total_process_num", type=int, default=1)
+    parser.add_argument("--process_id", type=int, default=0)
+    parser.add_argument("--benchmark", type=str2bool, default=False)
+    parser.add_argument("--save_log_path", type=str, default="./log_output/")
+    parser.add_argument("--show_log", type=str2bool, default=True)
+    parser.add_argument("--use_onnx", type=str2bool, default=False)
+    return parser
+def parse_args():
+    parser = init_args()
+    return parser.parse_args()
+def create_predictor(args, mode, logger):
+    if mode == "det":
+        model_dir = args.det_model_dir
+    elif mode == 'cls':
+        model_dir = args.cls_model_dir
+    elif mode == 'rec':
+        model_dir = args.rec_model_dir
+    elif mode == 'table':
+        model_dir = args.table_model_dir
+    elif mode == 'ser':
+        model_dir = args.ser_model_dir
+    elif mode == 're':
+        model_dir = args.re_model_dir
+    elif mode == "sr":
+        model_dir = args.sr_model_dir
+    elif mode == 'layout':
+        model_dir = args.layout_model_dir
+    else:
+        model_dir = args.e2e_model_dir
+    if model_dir is None:
+        logger.info("not find {} model file path {}".format(mode, model_dir))
+        sys.exit(0)
+    if args.use_onnx:
+        import onnxruntime as ort
+        model_file_path = model_dir
+        if not os.path.exists(model_file_path):
+            raise ValueError("not find model file path {}".format(
+                model_file_path))
+        sess = ort.InferenceSession(model_file_path)
+        return sess, sess.get_inputs()[0], None, None
+    else:
+        file_names = ['model', 'inference']
+        for file_name in file_names:
+            model_file_path = '{}/{}.pdmodel'.format(model_dir, file_name)
+            params_file_path = '{}/{}.pdiparams'.format(model_dir, file_name)
+            if os.path.exists(model_file_path) and os.path.exists(
+                    params_file_path):
+                break
+        if not os.path.exists(model_file_path):
+            raise ValueError(
+                "not find model.pdmodel or inference.pdmodel in {}".format(
+                    model_dir))
+        if not os.path.exists(params_file_path):
+            raise ValueError(
+                "not find model.pdiparams or inference.pdiparams in {}".format(
+                    model_dir))
+        config = inference.Config(model_file_path, params_file_path)
+        if hasattr(args, 'precision'):
+            if args.precision == "fp16" and args.use_tensorrt:
+                precision = inference.PrecisionType.Half
+            elif args.precision == "int8":
+                precision = inference.PrecisionType.Int8
+            else:
+                precision = inference.PrecisionType.Float32
+        else:
+            precision = inference.PrecisionType.Float32
+        if args.use_gpu:
+            gpu_id = get_infer_gpuid()
+            if gpu_id is None:
+                logger.warning(
+                    "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
+                )
+            config.enable_use_gpu(args.gpu_mem, args.gpu_id)
+            if args.use_tensorrt:
+                config.enable_tensorrt_engine(
+                    workspace_size=1 << 30,
+                    precision_mode=precision,
+                    max_batch_size=args.max_batch_size,
+                    min_subgraph_size=args.
+                    min_subgraph_size,  # skip the minmum trt subgraph
+                    use_calib_mode=False)
+                # collect shape
+                trt_shape_f = os.path.join(model_dir,
+                                           f"{mode}_trt_dynamic_shape.txt")
+                if not os.path.exists(trt_shape_f):
+                    config.collect_shape_range_info(trt_shape_f)
+                    logger.info(
+                        f"collect dynamic shape info into : {trt_shape_f}")
+                try:
+                    config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f,
+                                                               True)
+                except Exception as E:
+                    logger.info(E)
+                    logger.info("Please keep your paddlepaddle-gpu >= 2.3.0!")
+        elif args.use_npu:
+            config.enable_custom_device("npu")
+        elif args.use_xpu:
+            config.enable_xpu(10 * 1024 * 1024)
+        else:
+            config.disable_gpu()
+            if args.enable_mkldnn:
+                # cache 10 different shapes for mkldnn to avoid memory leak
+                config.set_mkldnn_cache_capacity(10)
+                config.enable_mkldnn()
+                if args.precision == "fp16":
+                    config.enable_mkldnn_bfloat16()
+                if hasattr(args, "cpu_threads"):
+                    config.set_cpu_math_library_num_threads(args.cpu_threads)
+                else:
+                    # default cpu threads as 10
+                    config.set_cpu_math_library_num_threads(10)
+        # enable memory optim
+        config.enable_memory_optim()
+        config.disable_glog_info()
+        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+        config.delete_pass("matmul_transpose_reshape_fuse_pass")
+        if mode == 're':
+            config.delete_pass("simplify_with_basic_ops_pass")
+        if mode == 'table':
+            config.delete_pass("fc_fuse_pass")  # not supported for table
+        config.switch_use_feed_fetch_ops(False)
+        config.switch_ir_optim(True)
+        # create predictor
+        predictor = inference.create_predictor(config)
+        input_names = predictor.get_input_names()
+        if mode in ['ser', 're']:
+            input_tensor = []
+            for name in input_names:
+                input_tensor.append(predictor.get_input_handle(name))
+        else:
+            for name in input_names:
+                input_tensor = predictor.get_input_handle(name)
+        output_tensors = get_output_tensors(args, mode, predictor)
+        return predictor, input_tensor, output_tensors, config
+def get_output_tensors(args, mode, predictor):
+    output_names = predictor.get_output_names()
+    output_tensors = []
+    if mode == "rec" and args.rec_algorithm in ["CRNN", "SVTR_LCNet"]:
+        output_name = 'softmax_0.tmp_0'
+        if output_name in output_names:
+            return [predictor.get_output_handle(output_name)]
+        else:
+            for output_name in output_names:
+                output_tensor = predictor.get_output_handle(output_name)
+                output_tensors.append(output_tensor)
+    else:
+        for output_name in output_names:
+            output_tensor = predictor.get_output_handle(output_name)
+            output_tensors.append(output_tensor)
+    return output_tensors
+def get_infer_gpuid():
+    sysstr = platform.system()
+    if sysstr == "Windows":
+        return 0
+    if not paddle.fluid.core.is_compiled_with_rocm():
+        cmd = "env | grep CUDA_VISIBLE_DEVICES"
+    else:
+        cmd = "env | grep HIP_VISIBLE_DEVICES"
+    env_cuda = os.popen(cmd).readlines()
+    if len(env_cuda) == 0:
+        return 0
+    else:
+        gpu_id = env_cuda[0].strip().split("=")[1]
+        return int(gpu_id[0])
+def draw_e2e_res(dt_boxes, strs, img_path):
+    src_im = cv2.imread(img_path)
+    for box, str in zip(dt_boxes, strs):
+        box = box.astype(np.int32).reshape((-1, 1, 2))
+        cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+        cv2.putText(
+            src_im,
+            str,
+            org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
+            fontFace=cv2.FONT_HERSHEY_COMPLEX,
+            fontScale=0.7,
+            color=(0, 255, 0),
+            thickness=1)
+    return src_im
+def draw_text_det_res(dt_boxes, img):
+    for box in dt_boxes:
+        box = np.array(box).astype(np.int32).reshape(-1, 2)
+        cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
+    return img
+def resize_img(img, input_size=600):
+    """
+    resize img and limit the longest side of the image to input_size
+    """
+    img = np.array(img)
+    im_shape = img.shape
+    im_size_max = np.max(im_shape[0:2])
+    im_scale = float(input_size) / float(im_size_max)
+    img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
+    return img
+def draw_ocr(image,
+             boxes,
+             txts=None,
+             scores=None,
+             drop_score=0.5,
+             font_path="./doc/fonts/simfang.ttf"):
+    """
+    Visualize the results of OCR detection and recognition
+    args:
+        image(Image|array): RGB image
+        boxes(list): boxes with shape(N, 4, 2)
+        txts(list): the texts
+        scores(list): txxs corresponding scores
+        drop_score(float): only scores greater than drop_threshold will be visualized
+        font_path: the path of font which is used to draw text
+    return(array):
+        the visualized img
+    """
+    if scores is None:
+        scores = [1] * len(boxes)
+    box_num = len(boxes)
+    for i in range(box_num):
+        if scores is not None and (scores[i] < drop_score or
+                                   math.isnan(scores[i])):
+            continue
+        box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64)
+        image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
+    if txts is not None:
+        img = np.array(resize_img(image, input_size=600))
+        txt_img = text_visual(
+            txts,
+            scores,
+            img_h=img.shape[0],
+            img_w=600,
+            threshold=drop_score,
+            font_path=font_path)
+        img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
+        return img
+    return image
+def draw_ocr_box_txt(image,
+                     boxes,
+                     txts=None,
+                     scores=None,
+                     drop_score=0.5,
+                     font_path="./doc/fonts/simfang.ttf"):
+    h, w = image.height, image.width
+    img_left = image.copy()
+    img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
+    random.seed(0)
+    draw_left = ImageDraw.Draw(img_left)
+    if txts is None or len(txts) != len(boxes):
+        txts = [None] * len(boxes)
+    for idx, (box, txt) in enumerate(zip(boxes, txts)):
+        if scores is not None and scores[idx] < drop_score:
+            continue
+        color = (random.randint(0, 255), random.randint(0, 255),
+                 random.randint(0, 255))
+        draw_left.polygon(box, fill=color)
+        img_right_text = draw_box_txt_fine((w, h), box, txt, font_path)
+        pts = np.array(box, np.int32).reshape((-1, 1, 2))
+        cv2.polylines(img_right_text, [pts], True, color, 1)
+        img_right = cv2.bitwise_and(img_right, img_right_text)
+    img_left = Image.blend(image, img_left, 0.5)
+    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
+    img_show.paste(img_left, (0, 0, w, h))
+    img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
+    return np.array(img_show)
+def draw_box_txt_fine(img_size, box, txt, font_path="./doc/fonts/simfang.ttf"):
+    box_height = int(
+        math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][1])**2))
+    box_width = int(
+        math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][1])**2))
+    if box_height > 2 * box_width and box_height > 30:
+        img_text = Image.new('RGB', (box_height, box_width), (255, 255, 255))
+        draw_text = ImageDraw.Draw(img_text)
+        if txt:
+            font = create_font(txt, (box_height, box_width), font_path)
+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+        img_text = img_text.transpose(Image.ROTATE_270)
+    else:
+        img_text = Image.new('RGB', (box_width, box_height), (255, 255, 255))
+        draw_text = ImageDraw.Draw(img_text)
+        if txt:
+            font = create_font(txt, (box_width, box_height), font_path)
+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+    pts1 = np.float32(
+        [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]])
+    pts2 = np.array(box, dtype=np.float32)
+    M = cv2.getPerspectiveTransform(pts1, pts2)
+    img_text = np.array(img_text, dtype=np.uint8)
+    img_right_text = cv2.warpPerspective(
+        img_text,
+        M,
+        img_size,
+        flags=cv2.INTER_NEAREST,
+        borderMode=cv2.BORDER_CONSTANT,
+        borderValue=(255, 255, 255))
+    return img_right_text
+def create_font(txt, sz, font_path="./doc/fonts/simfang.ttf"):
+    font_size = int(sz[1] * 0.99)
+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+    length = font.getsize(txt)[0]
+    if length > sz[0]:
+        font_size = int(font_size * sz[0] / length)
+        font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+    return font
+def str_count(s):
+    """
+    Count the number of Chinese characters,
+    a single English character and a single number
+    equal to half the length of Chinese characters.
+    args:
+        s(string): the input of string
+    return(int):
+        the number of Chinese characters
+    """
+    import string
+    count_zh = count_pu = 0
+    s_len = len(s)
+    en_dg_count = 0
+    for c in s:
+        if c in string.ascii_letters or c.isdigit() or c.isspace():
+            en_dg_count += 1
+        elif c.isalpha():
+            count_zh += 1
+        else:
+            count_pu += 1
+    return s_len - math.ceil(en_dg_count / 2)
+def text_visual(texts,
+                scores,
+                img_h=400,
+                img_w=600,
+                threshold=0.,
+                font_path="./doc/simfang.ttf"):
+    """
+    create new blank img and draw txt on it
+    args:
+        texts(list): the text will be draw
+        scores(list|None): corresponding score of each txt
+        img_h(int): the height of blank img
+        img_w(int): the width of blank img
+        font_path: the path of font which is used to draw text
+    return(array):
+    """
+    if scores is not None:
+        assert len(texts) == len(
+            scores), "The number of txts and corresponding scores must match"
+    def create_blank_img():
+        blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
+        blank_img[:, img_w - 1:] = 0
+        blank_img = Image.fromarray(blank_img).convert("RGB")
+        draw_txt = ImageDraw.Draw(blank_img)
+        return blank_img, draw_txt
+    blank_img, draw_txt = create_blank_img()
+    font_size = 20
+    txt_color = (0, 0, 0)
+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+    gap = font_size + 5
+    txt_img_list = []
+    count, index = 1, 0
+    for idx, txt in enumerate(texts):
+        index += 1
+        if scores[idx] < threshold or math.isnan(scores[idx]):
+            index -= 1
+            continue
+        first_line = True
+        while str_count(txt) >= img_w // font_size - 4:
+            tmp = txt
+            txt = tmp[:img_w // font_size - 4]
+            if first_line:
+                new_txt = str(index) + ': ' + txt
+                first_line = False
+            else:
+                new_txt = '    ' + txt
+            draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
+            txt = tmp[img_w // font_size - 4:]
+            if count >= img_h // gap - 1:
+                txt_img_list.append(np.array(blank_img))
+                blank_img, draw_txt = create_blank_img()
+                count = 0
+            count += 1
+        if first_line:
+            new_txt = str(index) + ': ' + txt + '   ' + '%.3f' % (scores[idx])
+        else:
+            new_txt = "  " + txt + "  " + '%.3f' % (scores[idx])
+        draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
+        # whether add new blank img or not
+        if count >= img_h // gap - 1 and idx + 1 < len(texts):
+            txt_img_list.append(np.array(blank_img))
+            blank_img, draw_txt = create_blank_img()
+            count = 0
+        count += 1
+    txt_img_list.append(np.array(blank_img))
+    if len(txt_img_list) == 1:
+        blank_img = np.array(txt_img_list[0])
+    else:
+        blank_img = np.concatenate(txt_img_list, axis=1)
+    return np.array(blank_img)
+def base64_to_cv2(b64str):
+    import base64
+    data = base64.b64decode(b64str.encode('utf8'))
+    data = np.frombuffer(data, np.uint8)
+    data = cv2.imdecode(data, cv2.IMREAD_COLOR)
+    return data
+def draw_boxes(image, boxes, scores=None, drop_score=0.5):
+    if scores is None:
+        scores = [1] * len(boxes)
+    for (box, score) in zip(boxes, scores):
+        if score < drop_score:
+            continue
+        box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
+        image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
+    return image
+def get_rotate_crop_image(img, points):
+    '''
+    img_height, img_width = img.shape[0:2]
+    left = int(np.min(points[:, 0]))
+    right = int(np.max(points[:, 0]))
+    top = int(np.min(points[:, 1]))
+    bottom = int(np.max(points[:, 1]))
+    img_crop = img[top:bottom, left:right, :].copy()
+    points[:, 0] = points[:, 0] - left
+    points[:, 1] = points[:, 1] - top
+    '''
+    assert len(points) == 4, "shape of points must be 4*2"
+    img_crop_width = int(
+        max(
+            np.linalg.norm(points[0] - points[1]),
+            np.linalg.norm(points[2] - points[3])))
+    img_crop_height = int(
+        max(
+            np.linalg.norm(points[0] - points[3]),
+            np.linalg.norm(points[1] - points[2])))
+    pts_std = np.float32([[0, 0], [img_crop_width, 0],
+                          [img_crop_width, img_crop_height],
+                          [0, img_crop_height]])
+    M = cv2.getPerspectiveTransform(points, pts_std)
+    dst_img = cv2.warpPerspective(
+        img,
+        M, (img_crop_width, img_crop_height),
+        borderMode=cv2.BORDER_REPLICATE,
+        flags=cv2.INTER_CUBIC)
+    dst_img_height, dst_img_width = dst_img.shape[0:2]
+    if dst_img_height * 1.0 / dst_img_width >= 1.5:
+        dst_img = np.rot90(dst_img)
+    return dst_img
+def get_minarea_rect_crop(img, points):
+    bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
+    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+    index_a, index_b, index_c, index_d = 0, 1, 2, 3
+    if points[1][1] > points[0][1]:
+        index_a = 0
+        index_d = 1
+    else:
+        index_a = 1
+        index_d = 0
+    if points[3][1] > points[2][1]:
+        index_b = 2
+        index_c = 3
+    else:
+        index_b = 3
+        index_c = 2
+    box = [points[index_a], points[index_b], points[index_c], points[index_d]]
+    crop_img = get_rotate_crop_image(img, np.array(box))
+    return crop_img
+def check_gpu(use_gpu):
+    if use_gpu and not paddle.is_compiled_with_cuda():
+        use_gpu = False
+    return use_gpu
+if __name__ == '__main__':
+    pass

tools/infer_cls.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+def main():
+    global_config = config['Global']
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # build model
+    model = build_model(config['Architecture'])
+    load_model(config, model)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        elif op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['image']
+        elif op_name == "SSLRotateResize":
+            op[op_name]["mode"] = "test"
+        transforms.append(op)
+    global_config['infer_mode'] = True
+    ops = create_operators(transforms, global_config)
+    model.eval()
+    for file in get_image_file_list(config['Global']['infer_img']):
+        logger.info("infer_img: {}".format(file))
+        with open(file, 'rb') as f:
+            img = f.read()
+            data = {'image': img}
+        batch = transform(data, ops)
+        images = np.expand_dims(batch[0], axis=0)
+        images = paddle.to_tensor(images)
+        preds = model(images)
+        post_result = post_process_class(preds)
+        for rec_result in post_result:
+            logger.info('\t result: {}'.format(rec_result))
+    logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/infer_det.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import json
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+def draw_det_res(dt_boxes, config, img, img_name, save_path):
+    if len(dt_boxes) > 0:
+        import cv2
+        src_im = img
+        for box in dt_boxes:
+            box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+        if not os.path.exists(save_path):
+            os.makedirs(save_path)
+        save_path = os.path.join(save_path, os.path.basename(img_name))
+        cv2.imwrite(save_path, src_im)
+        logger.info("The detected Image saved in {}".format(save_path))
+@paddle.no_grad()
+def main():
+    global_config = config['Global']
+    # build model
+    model = build_model(config['Architecture'])
+    load_model(config, model)
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'])
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        elif op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['image', 'shape']
+        transforms.append(op)
+    ops = create_operators(transforms, global_config)
+    save_res_path = config['Global']['save_res_path']
+    if not os.path.exists(os.path.dirname(save_res_path)):
+        os.makedirs(os.path.dirname(save_res_path))
+    model.eval()
+    with open(save_res_path, "wb") as fout:
+        for file in get_image_file_list(config['Global']['infer_img']):
+            logger.info("infer_img: {}".format(file))
+            with open(file, 'rb') as f:
+                img = f.read()
+                data = {'image': img}
+            batch = transform(data, ops)
+            images = np.expand_dims(batch[0], axis=0)
+            shape_list = np.expand_dims(batch[1], axis=0)
+            images = paddle.to_tensor(images)
+            preds = model(images)
+            post_result = post_process_class(preds, shape_list)
+            src_img = cv2.imread(file)
+            dt_boxes_json = []
+            # parser boxes if post_result is dict
+            if isinstance(post_result, dict):
+                det_box_json = {}
+                for k in post_result.keys():
+                    boxes = post_result[k][0]['points']
+                    dt_boxes_list = []
+                    for box in boxes:
+                        tmp_json = {"transcription": ""}
+                        tmp_json['points'] = np.array(box).tolist()
+                        dt_boxes_list.append(tmp_json)
+                    det_box_json[k] = dt_boxes_list
+                    save_det_path = os.path.dirname(config['Global'][
+                        'save_res_path']) + "/det_results_{}/".format(k)
+                    draw_det_res(boxes, config, src_img, file, save_det_path)
+            else:
+                boxes = post_result[0]['points']
+                dt_boxes_json = []
+                # write result
+                for box in boxes:
+                    tmp_json = {"transcription": ""}
+                    tmp_json['points'] = np.array(box).tolist()
+                    dt_boxes_json.append(tmp_json)
+                save_det_path = os.path.dirname(config['Global'][
+                    'save_res_path']) + "/det_results/"
+                draw_det_res(boxes, config, src_img, file, save_det_path)
+            otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n"
+            fout.write(otstr.encode())
+    logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/infer_e2e.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import json
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+from PIL import Image, ImageDraw, ImageFont
+import math
+def draw_e2e_res_for_chinese(image,
+                             boxes,
+                             txts,
+                             config,
+                             img_name,
+                             font_path="./doc/simfang.ttf"):
+    h, w = image.height, image.width
+    img_left = image.copy()
+    img_right = Image.new('RGB', (w, h), (255, 255, 255))
+    import random
+    random.seed(0)
+    draw_left = ImageDraw.Draw(img_left)
+    draw_right = ImageDraw.Draw(img_right)
+    for idx, (box, txt) in enumerate(zip(boxes, txts)):
+        box = np.array(box)
+        box = [tuple(x) for x in box]
+        color = (random.randint(0, 255), random.randint(0, 255),
+                 random.randint(0, 255))
+        draw_left.polygon(box, fill=color)
+        draw_right.polygon(box, outline=color)
+        font = ImageFont.truetype(font_path, 15, encoding="utf-8")
+        draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
+    img_left = Image.blend(image, img_left, 0.5)
+    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
+    img_show.paste(img_left, (0, 0, w, h))
+    img_show.paste(img_right, (w, 0, w * 2, h))
+    save_e2e_path = os.path.dirname(config['Global'][
+        'save_res_path']) + "/e2e_results/"
+    if not os.path.exists(save_e2e_path):
+        os.makedirs(save_e2e_path)
+    save_path = os.path.join(save_e2e_path, os.path.basename(img_name))
+    cv2.imwrite(save_path, np.array(img_show)[:, :, ::-1])
+    logger.info("The e2e Image saved in {}".format(save_path))
+def draw_e2e_res(dt_boxes, strs, config, img, img_name):
+    if len(dt_boxes) > 0:
+        src_im = img
+        for box, str in zip(dt_boxes, strs):
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
+            cv2.putText(
+                src_im,
+                str,
+                org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
+                fontFace=cv2.FONT_HERSHEY_COMPLEX,
+                fontScale=0.7,
+                color=(0, 255, 0),
+                thickness=1)
+        save_det_path = os.path.dirname(config['Global'][
+            'save_res_path']) + "/e2e_results/"
+        if not os.path.exists(save_det_path):
+            os.makedirs(save_det_path)
+        save_path = os.path.join(save_det_path, os.path.basename(img_name))
+        cv2.imwrite(save_path, src_im)
+        logger.info("The e2e Image saved in {}".format(save_path))
+def main():
+    global_config = config['Global']
+    # build model
+    model = build_model(config['Architecture'])
+    load_model(config, model)
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        elif op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['image', 'shape']
+        transforms.append(op)
+    ops = create_operators(transforms, global_config)
+    save_res_path = config['Global']['save_res_path']
+    if not os.path.exists(os.path.dirname(save_res_path)):
+        os.makedirs(os.path.dirname(save_res_path))
+    model.eval()
+    with open(save_res_path, "wb") as fout:
+        for file in get_image_file_list(config['Global']['infer_img']):
+            logger.info("infer_img: {}".format(file))
+            with open(file, 'rb') as f:
+                img = f.read()
+                data = {'image': img}
+            batch = transform(data, ops)
+            images = np.expand_dims(batch[0], axis=0)
+            shape_list = np.expand_dims(batch[1], axis=0)
+            images = paddle.to_tensor(images)
+            preds = model(images)
+            post_result = post_process_class(preds, shape_list)
+            points, strs = post_result['points'], post_result['texts']
+            # write result
+            dt_boxes_json = []
+            for poly, str in zip(points, strs):
+                tmp_json = {"transcription": str}
+                tmp_json['points'] = poly.tolist()
+                dt_boxes_json.append(tmp_json)
+            otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n"
+            fout.write(otstr.encode())
+            src_img = cv2.imread(file)
+            if global_config['infer_visual_type'] == 'EN':
+                draw_e2e_res(points, strs, config, src_img, file)
+            elif global_config['infer_visual_type'] == 'CN':
+                src_img = Image.fromarray(
+                    cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB))
+                draw_e2e_res_for_chinese(
+                    src_img,
+                    points,
+                    strs,
+                    config,
+                    file,
+                    font_path="./doc/fonts/simfang.ttf")
+    logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/infer_kie.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import paddle.nn.functional as F
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.utils.save_load import load_model
+import tools.program as program
+import time
+def read_class_list(filepath):
+    ret = {}
+    with open(filepath, "r") as f:
+        lines = f.readlines()
+        for idx, line in enumerate(lines):
+            ret[idx] = line.strip("\n")
+    return ret
+def draw_kie_result(batch, node, idx_to_cls, count):
+    img = batch[6].copy()
+    boxes = batch[7]
+    h, w = img.shape[:2]
+    pred_img = np.ones((h, w * 2, 3), dtype=np.uint8) * 255
+    max_value, max_idx = paddle.max(node, -1), paddle.argmax(node, -1)
+    node_pred_label = max_idx.numpy().tolist()
+    node_pred_score = max_value.numpy().tolist()
+    for i, box in enumerate(boxes):
+        if i >= len(node_pred_label):
+            break
+        new_box = [[box[0], box[1]], [box[2], box[1]], [box[2], box[3]],
+                   [box[0], box[3]]]
+        Pts = np.array([new_box], np.int32)
+        cv2.polylines(
+            img, [Pts.reshape((-1, 1, 2))],
+            True,
+            color=(255, 255, 0),
+            thickness=1)
+        x_min = int(min([point[0] for point in new_box]))
+        y_min = int(min([point[1] for point in new_box]))
+        pred_label = node_pred_label[i]
+        if pred_label in idx_to_cls:
+            pred_label = idx_to_cls[pred_label]
+        pred_score = '{:.2f}'.format(node_pred_score[i])
+        text = pred_label + '(' + pred_score + ')'
+        cv2.putText(pred_img, text, (x_min * 2, y_min),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+    vis_img = np.ones((h, w * 3, 3), dtype=np.uint8) * 255
+    vis_img[:, :w] = img
+    vis_img[:, w:] = pred_img
+    save_kie_path = os.path.dirname(config['Global'][
+        'save_res_path']) + "/kie_results/"
+    if not os.path.exists(save_kie_path):
+        os.makedirs(save_kie_path)
+    save_path = os.path.join(save_kie_path, str(count) + ".png")
+    cv2.imwrite(save_path, vis_img)
+    logger.info("The Kie Image saved in {}".format(save_path))
+def write_kie_result(fout, node, data):
+    """
+    Write infer result to output file, sorted by the predict label of each line.
+    The format keeps the same as the input with additional score attribute.
+    """
+    import json
+    label = data['label']
+    annotations = json.loads(label)
+    max_value, max_idx = paddle.max(node, -1), paddle.argmax(node, -1)
+    node_pred_label = max_idx.numpy().tolist()
+    node_pred_score = max_value.numpy().tolist()
+    res = []
+    for i, label in enumerate(node_pred_label):
+        pred_score = '{:.2f}'.format(node_pred_score[i])
+        pred_res = {
+                'label': label,
+                'transcription': annotations[i]['transcription'],
+                'score': pred_score,
+                'points': annotations[i]['points'],
+            }
+        res.append(pred_res)
+    res.sort(key=lambda x: x['label'])
+    fout.writelines([json.dumps(res, ensure_ascii=False) + '\n'])
+def main():
+    global_config = config['Global']
+    # build model
+    model = build_model(config['Architecture'])
+    load_model(config, model)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        transforms.append(op)
+    data_dir = config['Eval']['dataset']['data_dir']
+    ops = create_operators(transforms, global_config)
+    save_res_path = config['Global']['save_res_path']
+    class_path = config['Global']['class_path']
+    idx_to_cls = read_class_list(class_path)
+    os.makedirs(os.path.dirname(save_res_path), exist_ok=True)
+    model.eval()
+    warmup_times = 0
+    count_t = []
+    with open(save_res_path, "w") as fout:
+        with open(config['Global']['infer_img'], "rb") as f:
+            lines = f.readlines()
+            for index, data_line in enumerate(lines):
+                if index == 10:
+                    warmup_t = time.time()
+                data_line = data_line.decode('utf-8')
+                substr = data_line.strip("\n").split("\t")
+                img_path, label = data_dir + "/" + substr[0], substr[1]
+                data = {'img_path': img_path, 'label': label}
+                with open(data['img_path'], 'rb') as f:
+                    img = f.read()
+                    data['image'] = img
+                st = time.time()
+                batch = transform(data, ops)
+                batch_pred = [0] * len(batch)
+                for i in range(len(batch)):
+                    batch_pred[i] = paddle.to_tensor(
+                        np.expand_dims(
+                            batch[i], axis=0))
+                st = time.time()
+                node, edge = model(batch_pred)
+                node = F.softmax(node, -1)
+                count_t.append(time.time() - st)
+                draw_kie_result(batch, node, idx_to_cls, index)
+                write_kie_result(fout, node, data)
+        fout.close()
+    logger.info("success!")
+    logger.info("It took {} s for predict {} images.".format(
+        np.sum(count_t), len(count_t)))
+    ips = len(count_t[warmup_times:]) / np.sum(count_t[warmup_times:])
+    logger.info("The ips is {} images/s".format(ips))
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/infer_kie_token_ser.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import json
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.visual import draw_ser_results
+from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps
+import tools.program as program
+def to_tensor(data):
+    import numbers
+    from collections import defaultdict
+    data_dict = defaultdict(list)
+    to_tensor_idxs = []
+    for idx, v in enumerate(data):
+        if isinstance(v, (np.ndarray, paddle.Tensor, numbers.Number)):
+            if idx not in to_tensor_idxs:
+                to_tensor_idxs.append(idx)
+        data_dict[idx].append(v)
+    for idx in to_tensor_idxs:
+        data_dict[idx] = paddle.to_tensor(data_dict[idx])
+    return list(data_dict.values())
+class SerPredictor(object):
+    def __init__(self, config):
+        global_config = config['Global']
+        self.algorithm = config['Architecture']["algorithm"]
+        # build post process
+        self.post_process_class = build_post_process(config['PostProcess'],
+                                                     global_config)
+        # build model
+        self.model = build_model(config['Architecture'])
+        load_model(
+            config, self.model, model_type=config['Architecture']["model_type"])
+        from paddleocr import PaddleOCR
+        self.ocr_engine = PaddleOCR(
+            use_angle_cls=False,
+            show_log=False,
+            rec_model_dir=global_config.get("kie_rec_model_dir", None),
+            det_model_dir=global_config.get("kie_det_model_dir", None),
+            use_gpu=global_config['use_gpu'])
+        # create data ops
+        transforms = []
+        for op in config['Eval']['dataset']['transforms']:
+            op_name = list(op)[0]
+            if 'Label' in op_name:
+                op[op_name]['ocr_engine'] = self.ocr_engine
+            elif op_name == 'KeepKeys':
+                op[op_name]['keep_keys'] = [
+                    'input_ids', 'bbox', 'attention_mask', 'token_type_ids',
+                    'image', 'labels', 'segment_offset_id', 'ocr_info',
+                    'entities'
+                ]
+            transforms.append(op)
+        if config["Global"].get("infer_mode", None) is None:
+            global_config['infer_mode'] = True
+        self.ops = create_operators(config['Eval']['dataset']['transforms'],
+                                    global_config)
+        self.model.eval()
+    def __call__(self, data):
+        with open(data["img_path"], 'rb') as f:
+            img = f.read()
+        data["image"] = img
+        batch = transform(data, self.ops)
+        batch = to_tensor(batch)
+        preds = self.model(batch)
+        post_result = self.post_process_class(
+            preds, segment_offset_ids=batch[6], ocr_infos=batch[7])
+        return post_result, batch
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    os.makedirs(config['Global']['save_res_path'], exist_ok=True)
+    ser_engine = SerPredictor(config)
+    if config["Global"].get("infer_mode", None) is False:
+        data_dir = config['Eval']['dataset']['data_dir']
+        with open(config['Global']['infer_img'], "rb") as f:
+            infer_imgs = f.readlines()
+    else:
+        infer_imgs = get_image_file_list(config['Global']['infer_img'])
+    with open(
+            os.path.join(config['Global']['save_res_path'],
+                         "infer_results.txt"),
+            "w",
+            encoding='utf-8') as fout:
+        for idx, info in enumerate(infer_imgs):
+            if config["Global"].get("infer_mode", None) is False:
+                data_line = info.decode('utf-8')
+                substr = data_line.strip("\n").split("\t")
+                img_path = os.path.join(data_dir, substr[0])
+                data = {'img_path': img_path, 'label': substr[1]}
+            else:
+                img_path = info
+                data = {'img_path': img_path}
+            save_img_path = os.path.join(
+                config['Global']['save_res_path'],
+                os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg")
+            result, _ = ser_engine(data)
+            result = result[0]
+            fout.write(img_path + "\t" + json.dumps(
+                {
+                    "ocr_info": result,
+                }, ensure_ascii=False) + "\n")
+            img_res = draw_ser_results(img_path, result)
+            cv2.imwrite(save_img_path, img_res)
+            logger.info("process: [{}/{}], save result to {}".format(
+                idx, len(infer_imgs), save_img_path))

tools/infer_kie_token_ser_re.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import json
+import paddle
+import paddle.distributed as dist
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.visual import draw_re_results
+from ppocr.utils.logging import get_logger
+from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps, print_dict
+from tools.program import ArgsParser, load_config, merge_config
+from tools.infer_kie_token_ser import SerPredictor
+class ReArgsParser(ArgsParser):
+    def __init__(self):
+        super(ReArgsParser, self).__init__()
+        self.add_argument(
+            "-c_ser", "--config_ser", help="ser configuration file to use")
+        self.add_argument(
+            "-o_ser",
+            "--opt_ser",
+            nargs='+',
+            help="set ser configuration options ")
+    def parse_args(self, argv=None):
+        args = super(ReArgsParser, self).parse_args(argv)
+        assert args.config_ser is not None, \
+            "Please specify --config_ser=ser_configure_file_path."
+        args.opt_ser = self._parse_opt(args.opt_ser)
+        return args
+def make_input(ser_inputs, ser_results):
+    entities_labels = {'HEADER': 0, 'QUESTION': 1, 'ANSWER': 2}
+    batch_size, max_seq_len = ser_inputs[0].shape[:2]
+    entities = ser_inputs[8][0]
+    ser_results = ser_results[0]
+    assert len(entities) == len(ser_results)
+    # entities
+    start = []
+    end = []
+    label = []
+    entity_idx_dict = {}
+    for i, (res, entity) in enumerate(zip(ser_results, entities)):
+        if res['pred'] == 'O':
+            continue
+        entity_idx_dict[len(start)] = i
+        start.append(entity['start'])
+        end.append(entity['end'])
+        label.append(entities_labels[res['pred']])
+    entities = np.full([max_seq_len + 1, 3], fill_value=-1, dtype=np.int64)
+    entities[0, 0] = len(start)
+    entities[1:len(start) + 1, 0] = start
+    entities[0, 1] = len(end)
+    entities[1:len(end) + 1, 1] = end
+    entities[0, 2] = len(label)
+    entities[1:len(label) + 1, 2] = label
+    # relations
+    head = []
+    tail = []
+    for i in range(len(label)):
+        for j in range(len(label)):
+            if label[i] == 1 and label[j] == 2:
+                head.append(i)
+                tail.append(j)
+    relations = np.full([len(head) + 1, 2], fill_value=-1, dtype=np.int64)
+    relations[0, 0] = len(head)
+    relations[1:len(head) + 1, 0] = head
+    relations[0, 1] = len(tail)
+    relations[1:len(tail) + 1, 1] = tail
+    entities = np.expand_dims(entities, axis=0)
+    entities = np.repeat(entities, batch_size, axis=0)
+    relations = np.expand_dims(relations, axis=0)
+    relations = np.repeat(relations, batch_size, axis=0)
+    # remove ocr_info segment_offset_id and label in ser input
+    if isinstance(ser_inputs[0], paddle.Tensor):
+        entities = paddle.to_tensor(entities)
+        relations = paddle.to_tensor(relations)
+    ser_inputs = ser_inputs[:5] + [entities, relations]
+    entity_idx_dict_batch = []
+    for b in range(batch_size):
+        entity_idx_dict_batch.append(entity_idx_dict)
+    return ser_inputs, entity_idx_dict_batch
+class SerRePredictor(object):
+    def __init__(self, config, ser_config):
+        global_config = config['Global']
+        if "infer_mode" in global_config:
+            ser_config["Global"]["infer_mode"] = global_config["infer_mode"]
+        self.ser_engine = SerPredictor(ser_config)
+        #  init re model
+        # build post process
+        self.post_process_class = build_post_process(config['PostProcess'],
+                                                     global_config)
+        # build model
+        self.model = build_model(config['Architecture'])
+        load_model(
+            config, self.model, model_type=config['Architecture']["model_type"])
+        self.model.eval()
+    def __call__(self, data):
+        ser_results, ser_inputs = self.ser_engine(data)
+        re_input, entity_idx_dict_batch = make_input(ser_inputs, ser_results)
+        if self.model.backbone.use_visual_backbone is False:
+            re_input.pop(4)
+        preds = self.model(re_input)
+        post_result = self.post_process_class(
+            preds,
+            ser_results=ser_results,
+            entity_idx_dict_batch=entity_idx_dict_batch)
+        return post_result
+def preprocess():
+    FLAGS = ReArgsParser().parse_args()
+    config = load_config(FLAGS.config)
+    config = merge_config(config, FLAGS.opt)
+    ser_config = load_config(FLAGS.config_ser)
+    ser_config = merge_config(ser_config, FLAGS.opt_ser)
+    logger = get_logger()
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global']['use_gpu']
+    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
+    device = paddle.set_device(device)
+    logger.info('{} re config {}'.format('*' * 10, '*' * 10))
+    print_dict(config, logger)
+    logger.info('\n')
+    logger.info('{} ser config {}'.format('*' * 10, '*' * 10))
+    print_dict(ser_config, logger)
+    logger.info('train with paddle {} and device {}'.format(paddle.__version__,
+                                                            device))
+    return config, ser_config, device, logger
+if __name__ == '__main__':
+    config, ser_config, device, logger = preprocess()
+    os.makedirs(config['Global']['save_res_path'], exist_ok=True)
+    ser_re_engine = SerRePredictor(config, ser_config)
+    if config["Global"].get("infer_mode", None) is False:
+        data_dir = config['Eval']['dataset']['data_dir']
+        with open(config['Global']['infer_img'], "rb") as f:
+            infer_imgs = f.readlines()
+    else:
+        infer_imgs = get_image_file_list(config['Global']['infer_img'])
+    with open(
+            os.path.join(config['Global']['save_res_path'],
+                         "infer_results.txt"),
+            "w",
+            encoding='utf-8') as fout:
+        for idx, info in enumerate(infer_imgs):
+            if config["Global"].get("infer_mode", None) is False:
+                data_line = info.decode('utf-8')
+                substr = data_line.strip("\n").split("\t")
+                img_path = os.path.join(data_dir, substr[0])
+                data = {'img_path': img_path, 'label': substr[1]}
+            else:
+                img_path = info
+                data = {'img_path': img_path}
+            save_img_path = os.path.join(
+                config['Global']['save_res_path'],
+                os.path.splitext(os.path.basename(img_path))[0] + "_ser_re.jpg")
+            result = ser_re_engine(data)
+            result = result[0]
+            fout.write(img_path + "\t" + json.dumps(
+                result, ensure_ascii=False) + "\n")
+            img_res = draw_re_results(img_path, result)
+            cv2.imwrite(save_img_path, img_res)
+            logger.info("process: [{}/{}], save result to {}".format(
+                idx, len(infer_imgs), save_img_path))

tools/infer_rec.py ADDED Viewed

	@@ -0,0 +1,188 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+import json
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+def main():
+    global_config = config['Global']
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # build model
+    if hasattr(post_process_class, 'character'):
+        char_num = len(getattr(post_process_class, 'character'))
+        if config['Architecture']["algorithm"] in ["Distillation",
+                                                   ]:  # distillation model
+            for key in config['Architecture']["Models"]:
+                if config['Architecture']['Models'][key]['Head'][
+                        'name'] == 'MultiHead':  # for multi head
+                    out_channels_list = {}
+                    if config['PostProcess'][
+                            'name'] == 'DistillationSARLabelDecode':
+                        char_num = char_num - 2
+                    out_channels_list['CTCLabelDecode'] = char_num
+                    out_channels_list['SARLabelDecode'] = char_num + 2
+                    config['Architecture']['Models'][key]['Head'][
+                        'out_channels_list'] = out_channels_list
+                else:
+                    config['Architecture']["Models"][key]["Head"][
+                        'out_channels'] = char_num
+        elif config['Architecture']['Head'][
+                'name'] == 'MultiHead':  # for multi head loss
+            out_channels_list = {}
+            if config['PostProcess']['name'] == 'SARLabelDecode':
+                char_num = char_num - 2
+            out_channels_list['CTCLabelDecode'] = char_num
+            out_channels_list['SARLabelDecode'] = char_num + 2
+            config['Architecture']['Head'][
+                'out_channels_list'] = out_channels_list
+        else:  # base rec model
+            config['Architecture']["Head"]['out_channels'] = char_num
+    model = build_model(config['Architecture'])
+    load_model(config, model)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        elif op_name in ['RecResizeImg']:
+            op[op_name]['infer_mode'] = True
+        elif op_name == 'KeepKeys':
+            if config['Architecture']['algorithm'] == "SRN":
+                op[op_name]['keep_keys'] = [
+                    'image', 'encoder_word_pos', 'gsrm_word_pos',
+                    'gsrm_slf_attn_bias1', 'gsrm_slf_attn_bias2'
+                ]
+            elif config['Architecture']['algorithm'] == "SAR":
+                op[op_name]['keep_keys'] = ['image', 'valid_ratio']
+            elif config['Architecture']['algorithm'] == "RobustScanner":
+                op[op_name][
+                    'keep_keys'] = ['image', 'valid_ratio', 'word_positons']
+            else:
+                op[op_name]['keep_keys'] = ['image']
+        transforms.append(op)
+    global_config['infer_mode'] = True
+    ops = create_operators(transforms, global_config)
+    save_res_path = config['Global'].get('save_res_path',
+                                         "./output/rec/predicts_rec.txt")
+    if not os.path.exists(os.path.dirname(save_res_path)):
+        os.makedirs(os.path.dirname(save_res_path))
+    model.eval()
+    with open(save_res_path, "w") as fout:
+        for file in get_image_file_list(config['Global']['infer_img']):
+            logger.info("infer_img: {}".format(file))
+            with open(file, 'rb') as f:
+                img = f.read()
+                data = {'image': img}
+            batch = transform(data, ops)
+            if config['Architecture']['algorithm'] == "SRN":
+                encoder_word_pos_list = np.expand_dims(batch[1], axis=0)
+                gsrm_word_pos_list = np.expand_dims(batch[2], axis=0)
+                gsrm_slf_attn_bias1_list = np.expand_dims(batch[3], axis=0)
+                gsrm_slf_attn_bias2_list = np.expand_dims(batch[4], axis=0)
+                others = [
+                    paddle.to_tensor(encoder_word_pos_list),
+                    paddle.to_tensor(gsrm_word_pos_list),
+                    paddle.to_tensor(gsrm_slf_attn_bias1_list),
+                    paddle.to_tensor(gsrm_slf_attn_bias2_list)
+                ]
+            if config['Architecture']['algorithm'] == "SAR":
+                valid_ratio = np.expand_dims(batch[-1], axis=0)
+                img_metas = [paddle.to_tensor(valid_ratio)]
+            if config['Architecture']['algorithm'] == "RobustScanner":
+                valid_ratio = np.expand_dims(batch[1], axis=0)
+                word_positons = np.expand_dims(batch[2], axis=0)
+                img_metas = [
+                    paddle.to_tensor(valid_ratio),
+                    paddle.to_tensor(word_positons),
+                ]
+            if config['Architecture']['algorithm'] == "CAN":
+                image_mask = paddle.ones(
+                    (np.expand_dims(
+                        batch[0], axis=0).shape), dtype='float32')
+                label = paddle.ones((1, 36), dtype='int64')
+            images = np.expand_dims(batch[0], axis=0)
+            images = paddle.to_tensor(images)
+            if config['Architecture']['algorithm'] == "SRN":
+                preds = model(images, others)
+            elif config['Architecture']['algorithm'] == "SAR":
+                preds = model(images, img_metas)
+            elif config['Architecture']['algorithm'] == "RobustScanner":
+                preds = model(images, img_metas)
+            elif config['Architecture']['algorithm'] == "CAN":
+                preds = model([images, image_mask, label])
+            else:
+                preds = model(images)
+            post_result = post_process_class(preds)
+            info = None
+            if isinstance(post_result, dict):
+                rec_info = dict()
+                for key in post_result:
+                    if len(post_result[key][0]) >= 2:
+                        rec_info[key] = {
+                            "label": post_result[key][0][0],
+                            "score": float(post_result[key][0][1]),
+                        }
+                info = json.dumps(rec_info, ensure_ascii=False)
+            elif isinstance(post_result, list) and isinstance(post_result[0],
+                                                              int):
+                # for RFLearning CNT branch
+                info = str(post_result[0])
+            else:
+                if len(post_result[0]) >= 2:
+                    info = post_result[0][0] + "\t" + str(post_result[0][1])
+            if info is not None:
+                logger.info("\t result: {}".format(info))
+                fout.write(file + "\t" + info + "\n")
+    logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/infer_sr.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+import json
+from PIL import Image
+import cv2
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, __dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import paddle
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import get_image_file_list
+import tools.program as program
+def main():
+    global_config = config['Global']
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # sr transform
+    config['Architecture']["Transform"]['infer_mode'] = True
+    model = build_model(config['Architecture'])
+    load_model(config, model)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Label' in op_name:
+            continue
+        elif op_name in ['SRResize']:
+            op[op_name]['infer_mode'] = True
+        elif op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['img_lr']
+        transforms.append(op)
+    global_config['infer_mode'] = True
+    ops = create_operators(transforms, global_config)
+    save_visual_path = config['Global'].get('save_visual', "infer_result/")
+    if not os.path.exists(os.path.dirname(save_visual_path)):
+        os.makedirs(os.path.dirname(save_visual_path))
+    model.eval()
+    for file in get_image_file_list(config['Global']['infer_img']):
+        logger.info("infer_img: {}".format(file))
+        img = Image.open(file).convert("RGB")
+        data = {'image_lr': img}
+        batch = transform(data, ops)
+        images = np.expand_dims(batch[0], axis=0)
+        images = paddle.to_tensor(images)
+        preds = model(images)
+        sr_img = preds["sr_img"][0]
+        lr_img = preds["lr_img"][0]
+        fm_sr = (sr_img.numpy() * 255).transpose(1, 2, 0).astype(np.uint8)
+        fm_lr = (lr_img.numpy() * 255).transpose(1, 2, 0).astype(np.uint8)
+        img_name_pure = os.path.split(file)[-1]
+        cv2.imwrite("{}/sr_{}".format(save_visual_path, img_name_pure),
+                    fm_sr[:, :, ::-1])
+        logger.info("The visualized image saved in infer_result/sr_{}".format(
+            img_name_pure))
+    logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main()

tools/infer_table.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os
+import sys
+import json
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import paddle
+from paddle.jit import to_static
+from ppocr.data import create_operators, transform
+from ppocr.modeling.architectures import build_model
+from ppocr.postprocess import build_post_process
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import get_image_file_list
+from ppocr.utils.visual import draw_rectangle
+from tools.infer.utility import draw_boxes
+import tools.program as program
+import cv2
+@paddle.no_grad()
+def main(config, device, logger, vdl_writer):
+    global_config = config['Global']
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # build model
+    if hasattr(post_process_class, 'character'):
+        config['Architecture']["Head"]['out_channels'] = len(
+            getattr(post_process_class, 'character'))
+    model = build_model(config['Architecture'])
+    algorithm = config['Architecture']['algorithm']
+    load_model(config, model)
+    # create data ops
+    transforms = []
+    for op in config['Eval']['dataset']['transforms']:
+        op_name = list(op)[0]
+        if 'Encode' in op_name:
+            continue
+        if op_name == 'KeepKeys':
+            op[op_name]['keep_keys'] = ['image', 'shape']
+        transforms.append(op)
+    global_config['infer_mode'] = True
+    ops = create_operators(transforms, global_config)
+    save_res_path = config['Global']['save_res_path']
+    os.makedirs(save_res_path, exist_ok=True)
+    model.eval()
+    with open(
+            os.path.join(save_res_path, 'infer.txt'), mode='w',
+            encoding='utf-8') as f_w:
+        for file in get_image_file_list(config['Global']['infer_img']):
+            logger.info("infer_img: {}".format(file))
+            with open(file, 'rb') as f:
+                img = f.read()
+                data = {'image': img}
+            batch = transform(data, ops)
+            images = np.expand_dims(batch[0], axis=0)
+            shape_list = np.expand_dims(batch[1], axis=0)
+            images = paddle.to_tensor(images)
+            preds = model(images)
+            post_result = post_process_class(preds, [shape_list])
+            structure_str_list = post_result['structure_batch_list'][0]
+            bbox_list = post_result['bbox_batch_list'][0]
+            structure_str_list = structure_str_list[0]
+            structure_str_list = [
+                '<html>', '<body>', '<table>'
+            ] + structure_str_list + ['</table>', '</body>', '</html>']
+            bbox_list_str = json.dumps(bbox_list.tolist())
+            logger.info("result: {}, {}".format(structure_str_list,
+                                                bbox_list_str))
+            f_w.write("result: {}, {}\n".format(structure_str_list,
+                                                bbox_list_str))
+            if len(bbox_list) > 0 and len(bbox_list[0]) == 4:
+                img = draw_rectangle(file, bbox_list)
+            else:
+                img = draw_boxes(cv2.imread(file), bbox_list)
+            cv2.imwrite(
+                os.path.join(save_res_path, os.path.basename(file)), img)
+            logger.info('save result to {}'.format(save_res_path))
+        logger.info("success!")
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess()
+    main(config, device, logger, vdl_writer)

tools/program.py ADDED Viewed

	@@ -0,0 +1,702 @@

+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import platform
+import yaml
+import time
+import datetime
+import paddle
+import paddle.distributed as dist
+from tqdm import tqdm
+import cv2
+import numpy as np
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+from ppocr.utils.stats import TrainingStats
+from ppocr.utils.save_load import save_model
+from ppocr.utils.utility import print_dict, AverageMeter
+from ppocr.utils.logging import get_logger
+from ppocr.utils.loggers import VDLLogger, WandbLogger, Loggers
+from ppocr.utils import profiler
+from ppocr.data import build_dataloader
+class ArgsParser(ArgumentParser):
+    def __init__(self):
+        super(ArgsParser, self).__init__(
+            formatter_class=RawDescriptionHelpFormatter)
+        self.add_argument("-c", "--config", help="configuration file to use")
+        self.add_argument(
+            "-o", "--opt", nargs='+', help="set configuration options")
+        self.add_argument(
+            '-p',
+            '--profiler_options',
+            type=str,
+            default=None,
+            help='The option of profiler, which should be in format ' \
+                 '\"key1=value1;key2=value2;key3=value3\".'
+        )
+    def parse_args(self, argv=None):
+        args = super(ArgsParser, self).parse_args(argv)
+        assert args.config is not None, \
+            "Please specify --config=configure_file_path."
+        args.opt = self._parse_opt(args.opt)
+        return args
+    def _parse_opt(self, opts):
+        config = {}
+        if not opts:
+            return config
+        for s in opts:
+            s = s.strip()
+            k, v = s.split('=')
+            config[k] = yaml.load(v, Loader=yaml.Loader)
+        return config
+def load_config(file_path):
+    """
+    Load config from yml/yaml file.
+    Args:
+        file_path (str): Path of the config file to be loaded.
+    Returns: global config
+    """
+    _, ext = os.path.splitext(file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+    config = yaml.load(open(file_path, 'rb'), Loader=yaml.Loader)
+    return config
+def merge_config(config, opts):
+    """
+    Merge config into global config.
+    Args:
+        config (dict): Config to be merged.
+    Returns: global config
+    """
+    for key, value in opts.items():
+        if "." not in key:
+            if isinstance(value, dict) and key in config:
+                config[key].update(value)
+            else:
+                config[key] = value
+        else:
+            sub_keys = key.split('.')
+            assert (
+                sub_keys[0] in config
+            ), "the sub_keys can only be one of global_config: {}, but get: " \
+               "{}, please check your running command".format(
+                config.keys(), sub_keys[0])
+            cur = config[sub_keys[0]]
+            for idx, sub_key in enumerate(sub_keys[1:]):
+                if idx == len(sub_keys) - 2:
+                    cur[sub_key] = value
+                else:
+                    cur = cur[sub_key]
+    return config
+def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
+    err = "Config {} cannot be set as true while your paddle " \
+          "is not compiled with {} ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle to run model on {} \n" \
+          "\t2. Set {} as false in config file to run " \
+          "model on CPU"
+    try:
+        if use_gpu and use_xpu:
+            print("use_xpu and use_gpu can not both be ture.")
+        if use_gpu and not paddle.is_compiled_with_cuda():
+            print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
+            sys.exit(1)
+        if use_xpu and not paddle.device.is_compiled_with_xpu():
+            print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
+            sys.exit(1)
+        if use_npu:
+            if int(paddle.version.major) != 0 and int(
+                    paddle.version.major) <= 2 and int(
+                        paddle.version.minor) <= 4:
+                if not paddle.device.is_compiled_with_npu():
+                    print(err.format("use_npu", "npu", "npu", "use_npu"))
+                    sys.exit(1)
+            # is_compiled_with_npu() has been updated after paddle-2.4
+            else:
+                if not paddle.device.is_compiled_with_custom_device("npu"):
+                    print(err.format("use_npu", "npu", "npu", "use_npu"))
+                    sys.exit(1)
+        if use_mlu and not paddle.device.is_compiled_with_mlu():
+            print(err.format("use_mlu", "mlu", "mlu", "use_mlu"))
+            sys.exit(1)
+    except Exception as e:
+        pass
+def to_float32(preds):
+    if isinstance(preds, dict):
+        for k in preds:
+            if isinstance(preds[k], dict) or isinstance(preds[k], list):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], paddle.Tensor):
+                preds[k] = preds[k].astype(paddle.float32)
+    elif isinstance(preds, list):
+        for k in range(len(preds)):
+            if isinstance(preds[k], dict):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], list):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], paddle.Tensor):
+                preds[k] = preds[k].astype(paddle.float32)
+    elif isinstance(preds, paddle.Tensor):
+        preds = preds.astype(paddle.float32)
+    return preds
+def train(config,
+          train_dataloader,
+          valid_dataloader,
+          device,
+          model,
+          loss_class,
+          optimizer,
+          lr_scheduler,
+          post_process_class,
+          eval_class,
+          pre_best_model_dict,
+          logger,
+          log_writer=None,
+          scaler=None,
+          amp_level='O2',
+          amp_custom_black_list=[]):
+    cal_metric_during_train = config['Global'].get('cal_metric_during_train',
+                                                   False)
+    calc_epoch_interval = config['Global'].get('calc_epoch_interval', 1)
+    log_smooth_window = config['Global']['log_smooth_window']
+    epoch_num = config['Global']['epoch_num']
+    print_batch_step = config['Global']['print_batch_step']
+    eval_batch_step = config['Global']['eval_batch_step']
+    profiler_options = config['profiler_options']
+    global_step = 0
+    if 'global_step' in pre_best_model_dict:
+        global_step = pre_best_model_dict['global_step']
+    start_eval_step = 0
+    if type(eval_batch_step) == list and len(eval_batch_step) >= 2:
+        start_eval_step = eval_batch_step[0]
+        eval_batch_step = eval_batch_step[1]
+        if len(valid_dataloader) == 0:
+            logger.info(
+                'No Images in eval dataset, evaluation during training ' \
+                'will be disabled'
+            )
+            start_eval_step = 1e111
+        logger.info(
+            "During the training process, after the {}th iteration, " \
+            "an evaluation is run every {} iterations".
+            format(start_eval_step, eval_batch_step))
+    save_epoch_step = config['Global']['save_epoch_step']
+    save_model_dir = config['Global']['save_model_dir']
+    if not os.path.exists(save_model_dir):
+        os.makedirs(save_model_dir)
+    main_indicator = eval_class.main_indicator
+    best_model_dict = {main_indicator: 0}
+    best_model_dict.update(pre_best_model_dict)
+    train_stats = TrainingStats(log_smooth_window, ['lr'])
+    model_average = False
+    model.train()
+    use_srn = config['Architecture']['algorithm'] == "SRN"
+    extra_input_models = [
+        "SRN", "NRTR", "SAR", "SEED", "SVTR", "SPIN", "VisionLAN",
+        "RobustScanner", "RFL", 'DRRG'
+    ]
+    extra_input = False
+    if config['Architecture']['algorithm'] == 'Distillation':
+        for key in config['Architecture']["Models"]:
+            extra_input = extra_input or config['Architecture']['Models'][key][
+                'algorithm'] in extra_input_models
+    else:
+        extra_input = config['Architecture']['algorithm'] in extra_input_models
+    try:
+        model_type = config['Architecture']['model_type']
+    except:
+        model_type = None
+    algorithm = config['Architecture']['algorithm']
+    start_epoch = best_model_dict[
+        'start_epoch'] if 'start_epoch' in best_model_dict else 1
+    total_samples = 0
+    train_reader_cost = 0.0
+    train_batch_cost = 0.0
+    reader_start = time.time()
+    eta_meter = AverageMeter()
+    max_iter = len(train_dataloader) - 1 if platform.system(
+    ) == "Windows" else len(train_dataloader)
+    for epoch in range(start_epoch, epoch_num + 1):
+        if train_dataloader.dataset.need_reset:
+            train_dataloader = build_dataloader(
+                config, 'Train', device, logger, seed=epoch)
+            max_iter = len(train_dataloader) - 1 if platform.system(
+            ) == "Windows" else len(train_dataloader)
+        for idx, batch in enumerate(train_dataloader):
+            profiler.add_profiler_step(profiler_options)
+            train_reader_cost += time.time() - reader_start
+            if idx >= max_iter:
+                break
+            lr = optimizer.get_lr()
+            images = batch[0]
+            if use_srn:
+                model_average = True
+            # use amp
+            if scaler:
+                with paddle.amp.auto_cast(
+                        level=amp_level,
+                        custom_black_list=amp_custom_black_list):
+                    if model_type == 'table' or extra_input:
+                        preds = model(images, data=batch[1:])
+                    elif model_type in ["kie"]:
+                        preds = model(batch)
+                    elif algorithm in ['CAN']:
+                        preds = model(batch[:3])
+                    else:
+                        preds = model(images)
+                preds = to_float32(preds)
+                loss = loss_class(preds, batch)
+                avg_loss = loss['loss']
+                scaled_avg_loss = scaler.scale(avg_loss)
+                scaled_avg_loss.backward()
+                scaler.minimize(optimizer, scaled_avg_loss)
+            else:
+                if model_type == 'table' or extra_input:
+                    preds = model(images, data=batch[1:])
+                elif model_type in ["kie", 'sr']:
+                    preds = model(batch)
+                elif algorithm in ['CAN']:
+                    preds = model(batch[:3])
+                else:
+                    preds = model(images)
+                loss = loss_class(preds, batch)
+                avg_loss = loss['loss']
+                avg_loss.backward()
+                optimizer.step()
+            optimizer.clear_grad()
+            if cal_metric_during_train and epoch % calc_epoch_interval == 0:  # only rec and cls need
+                batch = [item.numpy() for item in batch]
+                if model_type in ['kie', 'sr']:
+                    eval_class(preds, batch)
+                elif model_type in ['table']:
+                    post_result = post_process_class(preds, batch)
+                    eval_class(post_result, batch)
+                elif algorithm in ['CAN']:
+                    model_type = 'can'
+                    eval_class(preds[0], batch[2:], epoch_reset=(idx == 0))
+                else:
+                    if config['Loss']['name'] in ['MultiLoss', 'MultiLoss_v2'
+                                                  ]:  # for multi head loss
+                        post_result = post_process_class(
+                            preds['ctc'], batch[1])  # for CTC head out
+                    elif config['Loss']['name'] in ['VLLoss']:
+                        post_result = post_process_class(preds, batch[1],
+                                                         batch[-1])
+                    else:
+                        post_result = post_process_class(preds, batch[1])
+                    eval_class(post_result, batch)
+                metric = eval_class.get_metric()
+                train_stats.update(metric)
+            train_batch_time = time.time() - reader_start
+            train_batch_cost += train_batch_time
+            eta_meter.update(train_batch_time)
+            global_step += 1
+            total_samples += len(images)
+            if not isinstance(lr_scheduler, float):
+                lr_scheduler.step()
+            # logger and visualdl
+            stats = {k: v.numpy().mean() for k, v in loss.items()}
+            stats['lr'] = lr
+            train_stats.update(stats)
+            if log_writer is not None and dist.get_rank() == 0:
+                log_writer.log_metrics(
+                    metrics=train_stats.get(), prefix="TRAIN", step=global_step)
+            if dist.get_rank() == 0 and (
+                (global_step > 0 and global_step % print_batch_step == 0) or
+                (idx >= len(train_dataloader) - 1)):
+                logs = train_stats.log()
+                eta_sec = ((epoch_num + 1 - epoch) * \
+                    len(train_dataloader) - idx - 1) * eta_meter.avg
+                eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
+                strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \
+                    '{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \
+                    'ips: {:.5f} samples/s, eta: {}'.format(
+                    epoch, epoch_num, global_step, logs,
+                    train_reader_cost / print_batch_step,
+                    train_batch_cost / print_batch_step,
+                    total_samples / print_batch_step,
+                    total_samples / train_batch_cost, eta_sec_format)
+                logger.info(strs)
+                total_samples = 0
+                train_reader_cost = 0.0
+                train_batch_cost = 0.0
+            # eval
+            if global_step > start_eval_step and \
+                    (global_step - start_eval_step) % eval_batch_step == 0 \
+                    and dist.get_rank() == 0:
+                if model_average:
+                    Model_Average = paddle.incubate.optimizer.ModelAverage(
+                        0.15,
+                        parameters=model.parameters(),
+                        min_average_window=10000,
+                        max_average_window=15625)
+                    Model_Average.apply()
+                cur_metric = eval(
+                    model,
+                    valid_dataloader,
+                    post_process_class,
+                    eval_class,
+                    model_type,
+                    extra_input=extra_input,
+                    scaler=scaler,
+                    amp_level=amp_level,
+                    amp_custom_black_list=amp_custom_black_list)
+                cur_metric_str = 'cur metric, {}'.format(', '.join(
+                    ['{}: {}'.format(k, v) for k, v in cur_metric.items()]))
+                logger.info(cur_metric_str)
+                # logger metric
+                if log_writer is not None:
+                    log_writer.log_metrics(
+                        metrics=cur_metric, prefix="EVAL", step=global_step)
+                if cur_metric[main_indicator] >= best_model_dict[
+                        main_indicator]:
+                    best_model_dict.update(cur_metric)
+                    best_model_dict['best_epoch'] = epoch
+                    save_model(
+                        model,
+                        optimizer,
+                        save_model_dir,
+                        logger,
+                        config,
+                        is_best=True,
+                        prefix='best_accuracy',
+                        best_model_dict=best_model_dict,
+                        epoch=epoch,
+                        global_step=global_step)
+                best_str = 'best metric, {}'.format(', '.join([
+                    '{}: {}'.format(k, v) for k, v in best_model_dict.items()
+                ]))
+                logger.info(best_str)
+                # logger best metric
+                if log_writer is not None:
+                    log_writer.log_metrics(
+                        metrics={
+                            "best_{}".format(main_indicator):
+                            best_model_dict[main_indicator]
+                        },
+                        prefix="EVAL",
+                        step=global_step)
+                    log_writer.log_model(
+                        is_best=True,
+                        prefix="best_accuracy",
+                        metadata=best_model_dict)
+            reader_start = time.time()
+        if dist.get_rank() == 0:
+            save_model(
+                model,
+                optimizer,
+                save_model_dir,
+                logger,
+                config,
+                is_best=False,
+                prefix='latest',
+                best_model_dict=best_model_dict,
+                epoch=epoch,
+                global_step=global_step)
+            if log_writer is not None:
+                log_writer.log_model(is_best=False, prefix="latest")
+        if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0:
+            save_model(
+                model,
+                optimizer,
+                save_model_dir,
+                logger,
+                config,
+                is_best=False,
+                prefix='iter_epoch_{}'.format(epoch),
+                best_model_dict=best_model_dict,
+                epoch=epoch,
+                global_step=global_step)
+            if log_writer is not None:
+                log_writer.log_model(
+                    is_best=False, prefix='iter_epoch_{}'.format(epoch))
+    best_str = 'best metric, {}'.format(', '.join(
+        ['{}: {}'.format(k, v) for k, v in best_model_dict.items()]))
+    logger.info(best_str)
+    if dist.get_rank() == 0 and log_writer is not None:
+        log_writer.close()
+    return
+def eval(model,
+         valid_dataloader,
+         post_process_class,
+         eval_class,
+         model_type=None,
+         extra_input=False,
+         scaler=None,
+         amp_level='O2',
+         amp_custom_black_list=[]):
+    model.eval()
+    with paddle.no_grad():
+        total_frame = 0.0
+        total_time = 0.0
+        pbar = tqdm(
+            total=len(valid_dataloader),
+            desc='eval model:',
+            position=0,
+            leave=True)
+        max_iter = len(valid_dataloader) - 1 if platform.system(
+        ) == "Windows" else len(valid_dataloader)
+        sum_images = 0
+        for idx, batch in enumerate(valid_dataloader):
+            if idx >= max_iter:
+                break
+            images = batch[0]
+            start = time.time()
+            # use amp
+            if scaler:
+                with paddle.amp.auto_cast(
+                        level=amp_level,
+                        custom_black_list=amp_custom_black_list):
+                    if model_type == 'table' or extra_input:
+                        preds = model(images, data=batch[1:])
+                    elif model_type in ["kie"]:
+                        preds = model(batch)
+                    elif model_type in ['can']:
+                        preds = model(batch[:3])
+                    elif model_type in ['sr']:
+                        preds = model(batch)
+                        sr_img = preds["sr_img"]
+                        lr_img = preds["lr_img"]
+                    else:
+                        preds = model(images)
+                preds = to_float32(preds)
+            else:
+                if model_type == 'table' or extra_input:
+                    preds = model(images, data=batch[1:])
+                elif model_type in ["kie"]:
+                    preds = model(batch)
+                elif model_type in ['can']:
+                    preds = model(batch[:3])
+                elif model_type in ['sr']:
+                    preds = model(batch)
+                    sr_img = preds["sr_img"]
+                    lr_img = preds["lr_img"]
+                else:
+                    preds = model(images)
+            batch_numpy = []
+            for item in batch:
+                if isinstance(item, paddle.Tensor):
+                    batch_numpy.append(item.numpy())
+                else:
+                    batch_numpy.append(item)
+            # Obtain usable results from post-processing methods
+            total_time += time.time() - start
+            # Evaluate the results of the current batch
+            if model_type in ['table', 'kie']:
+                if post_process_class is None:
+                    eval_class(preds, batch_numpy)
+                else:
+                    post_result = post_process_class(preds, batch_numpy)
+                    eval_class(post_result, batch_numpy)
+            elif model_type in ['sr']:
+                eval_class(preds, batch_numpy)
+            elif model_type in ['can']:
+                eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0))
+            else:
+                post_result = post_process_class(preds, batch_numpy[1])
+                eval_class(post_result, batch_numpy)
+            pbar.update(1)
+            total_frame += len(images)
+            sum_images += 1
+        # Get final metric，eg. acc or hmean
+        metric = eval_class.get_metric()
+    pbar.close()
+    model.train()
+    metric['fps'] = total_frame / total_time
+    return metric
+def update_center(char_center, post_result, preds):
+    result, label = post_result
+    feats, logits = preds
+    logits = paddle.argmax(logits, axis=-1)
+    feats = feats.numpy()
+    logits = logits.numpy()
+    for idx_sample in range(len(label)):
+        if result[idx_sample][0] == label[idx_sample][0]:
+            feat = feats[idx_sample]
+            logit = logits[idx_sample]
+            for idx_time in range(len(logit)):
+                index = logit[idx_time]
+                if index in char_center.keys():
+                    char_center[index][0] = (
+                        char_center[index][0] * char_center[index][1] +
+                        feat[idx_time]) / (char_center[index][1] + 1)
+                    char_center[index][1] += 1
+                else:
+                    char_center[index] = [feat[idx_time], 1]
+    return char_center
+def get_center(model, eval_dataloader, post_process_class):
+    pbar = tqdm(total=len(eval_dataloader), desc='get center:')
+    max_iter = len(eval_dataloader) - 1 if platform.system(
+    ) == "Windows" else len(eval_dataloader)
+    char_center = dict()
+    for idx, batch in enumerate(eval_dataloader):
+        if idx >= max_iter:
+            break
+        images = batch[0]
+        start = time.time()
+        preds = model(images)
+        batch = [item.numpy() for item in batch]
+        # Obtain usable results from post-processing methods
+        post_result = post_process_class(preds, batch[1])
+        #update char_center
+        char_center = update_center(char_center, post_result, preds)
+        pbar.update(1)
+    pbar.close()
+    for key in char_center.keys():
+        char_center[key] = char_center[key][0]
+    return char_center
+def preprocess(is_train=False):
+    FLAGS = ArgsParser().parse_args()
+    profiler_options = FLAGS.profiler_options
+    config = load_config(FLAGS.config)
+    config = merge_config(config, FLAGS.opt)
+    profile_dic = {"profiler_options": FLAGS.profiler_options}
+    config = merge_config(config, profile_dic)
+    if is_train:
+        # save_config
+        save_model_dir = config['Global']['save_model_dir']
+        os.makedirs(save_model_dir, exist_ok=True)
+        with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
+            yaml.dump(
+                dict(config), f, default_flow_style=False, sort_keys=False)
+        log_file = '{}/train.log'.format(save_model_dir)
+    else:
+        log_file = None
+    logger = get_logger(log_file=log_file)
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['Global'].get('use_gpu', False)
+    use_xpu = config['Global'].get('use_xpu', False)
+    use_npu = config['Global'].get('use_npu', False)
+    use_mlu = config['Global'].get('use_mlu', False)
+    alg = config['Architecture']['algorithm']
+    assert alg in [
+        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
+        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
+        'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE',
+        'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN',
+        'Gestalt', 'SLANet', 'RobustScanner', 'CT', 'RFL', 'DRRG', 'CAN',
+        'Telescope'
+    ]
+    if use_xpu:
+        device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0))
+    elif use_npu:
+        device = 'npu:{0}'.format(os.getenv('FLAGS_selected_npus', 0))
+    elif use_mlu:
+        device = 'mlu:{0}'.format(os.getenv('FLAGS_selected_mlus', 0))
+    else:
+        device = 'gpu:{}'.format(dist.ParallelEnv()
+                                 .dev_id) if use_gpu else 'cpu'
+    check_device(use_gpu, use_xpu, use_npu, use_mlu)
+    device = paddle.set_device(device)
+    config['Global']['distributed'] = dist.get_world_size() != 1
+    loggers = []
+    if 'use_visualdl' in config['Global'] and config['Global']['use_visualdl']:
+        save_model_dir = config['Global']['save_model_dir']
+        vdl_writer_path = '{}/vdl/'.format(save_model_dir)
+        log_writer = VDLLogger(vdl_writer_path)
+        loggers.append(log_writer)
+    if ('use_wandb' in config['Global'] and
+            config['Global']['use_wandb']) or 'wandb' in config:
+        save_dir = config['Global']['save_model_dir']
+        wandb_writer_path = "{}/wandb".format(save_dir)
+        if "wandb" in config:
+            wandb_params = config['wandb']
+        else:
+            wandb_params = dict()
+        wandb_params.update({'save_dir': save_dir})
+        log_writer = WandbLogger(**wandb_params, config=config)
+        loggers.append(log_writer)
+    else:
+        log_writer = None
+    print_dict(config, logger)
+    if loggers:
+        log_writer = Loggers(loggers)
+    else:
+        log_writer = None
+    logger.info('train with paddle {} and device {}'.format(paddle.__version__,
+                                                            device))
+    return config, device, logger, log_writer

tools/test_hubserving.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+from ppocr.utils.logging import get_logger
+logger = get_logger()
+import cv2
+import numpy as np
+import time
+from PIL import Image
+from ppocr.utils.utility import get_image_file_list
+from tools.infer.utility import draw_ocr, draw_boxes, str2bool
+from ppstructure.utility import draw_structure_result
+from ppstructure.predict_system import to_excel
+import requests
+import json
+import base64
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+def draw_server_result(image_file, res):
+    img = cv2.imread(image_file)
+    image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    if len(res) == 0:
+        return np.array(image)
+    keys = res[0].keys()
+    if 'text_region' not in keys:  # for ocr_rec, draw function is invalid
+        logger.info("draw function is invalid for ocr_rec!")
+        return None
+    elif 'text' not in keys:  # for ocr_det
+        logger.info("draw text boxes only!")
+        boxes = []
+        for dno in range(len(res)):
+            boxes.append(res[dno]['text_region'])
+        boxes = np.array(boxes)
+        draw_img = draw_boxes(image, boxes)
+        return draw_img
+    else:  # for ocr_system
+        logger.info("draw boxes and texts!")
+        boxes = []
+        texts = []
+        scores = []
+        for dno in range(len(res)):
+            boxes.append(res[dno]['text_region'])
+            texts.append(res[dno]['text'])
+            scores.append(res[dno]['confidence'])
+        boxes = np.array(boxes)
+        scores = np.array(scores)
+        draw_img = draw_ocr(
+            image, boxes, texts, scores, draw_txt=True, drop_score=0.5)
+        return draw_img
+def save_structure_res(res, save_folder, image_file):
+    img = cv2.imread(image_file)
+    excel_save_folder = os.path.join(save_folder, os.path.basename(image_file))
+    os.makedirs(excel_save_folder, exist_ok=True)
+    # save res
+    with open(
+            os.path.join(excel_save_folder, 'res.txt'), 'w',
+            encoding='utf8') as f:
+        for region in res:
+            if region['type'] == 'Table':
+                excel_path = os.path.join(excel_save_folder,
+                                          '{}.xlsx'.format(region['bbox']))
+                to_excel(region['res'], excel_path)
+            elif region['type'] == 'Figure':
+                x1, y1, x2, y2 = region['bbox']
+                print(region['bbox'])
+                roi_img = img[y1:y2, x1:x2, :]
+                img_path = os.path.join(excel_save_folder,
+                                        '{}.jpg'.format(region['bbox']))
+                cv2.imwrite(img_path, roi_img)
+            else:
+                for text_result in region['res']:
+                    f.write('{}\n'.format(json.dumps(text_result)))
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    is_visualize = False
+    headers = {"Content-type": "application/json"}
+    cnt = 0
+    total_time = 0
+    for image_file in image_file_list:
+        img = open(image_file, 'rb').read()
+        if img is None:
+            logger.info("error in loading image:{}".format(image_file))
+            continue
+        img_name = os.path.basename(image_file)
+        # seed http request
+        starttime = time.time()
+        data = {'images': [cv2_to_base64(img)]}
+        r = requests.post(
+            url=args.server_url, headers=headers, data=json.dumps(data))
+        elapse = time.time() - starttime
+        total_time += elapse
+        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))
+        res = r.json()["results"][0]
+        logger.info(res)
+        if args.visualize:
+            draw_img = None
+            if 'structure_table' in args.server_url:
+                to_excel(res['html'], './{}.xlsx'.format(img_name))
+            elif 'structure_system' in args.server_url:
+                save_structure_res(res['regions'], args.output, image_file)
+            else:
+                draw_img = draw_server_result(image_file, res)
+            if draw_img is not None:
+                if not os.path.exists(args.output):
+                    os.makedirs(args.output)
+                cv2.imwrite(
+                    os.path.join(args.output, os.path.basename(image_file)),
+                    draw_img[:, :, ::-1])
+                logger.info("The visualized image saved in {}".format(
+                    os.path.join(args.output, os.path.basename(image_file))))
+        cnt += 1
+        if cnt % 100 == 0:
+            logger.info("{} processed".format(cnt))
+    logger.info("avg time cost: {}".format(float(total_time) / cnt))
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(description="args for hub serving")
+    parser.add_argument("--server_url", type=str, required=True)
+    parser.add_argument("--image_dir", type=str, required=True)
+    parser.add_argument("--visualize", type=str2bool, default=False)
+    parser.add_argument("--output", type=str, default='./hubserving_result')
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)

tools/train.py ADDED Viewed

	@@ -0,0 +1,209 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+import yaml
+import paddle
+import paddle.distributed as dist
+from ppocr.data import build_dataloader
+from ppocr.modeling.architectures import build_model
+from ppocr.losses import build_loss
+from ppocr.optimizer import build_optimizer
+from ppocr.postprocess import build_post_process
+from ppocr.metrics import build_metric
+from ppocr.utils.save_load import load_model
+from ppocr.utils.utility import set_seed
+from ppocr.modeling.architectures import apply_to_static
+import tools.program as program
+dist.get_world_size()
+def main(config, device, logger, vdl_writer):
+    # init dist environment
+    if config['Global']['distributed']:
+        dist.init_parallel_env()
+    global_config = config['Global']
+    # build dataloader
+    train_dataloader = build_dataloader(config, 'Train', device, logger)
+    if len(train_dataloader) == 0:
+        logger.error(
+            "No Images in train dataset, please ensure\n" +
+            "\t1. The images num in the train label_file_list should be larger than or equal with batch size.\n"
+            +
+            "\t2. The annotation file and path in the configuration file are provided normally."
+        )
+        return
+    if config['Eval']:
+        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
+    else:
+        valid_dataloader = None
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'],
+                                            global_config)
+    # build model
+    # for rec algorithm
+    if hasattr(post_process_class, 'character'):
+        char_num = len(getattr(post_process_class, 'character'))
+        if config['Architecture']["algorithm"] in ["Distillation",
+                                                   ]:  # distillation model
+            for key in config['Architecture']["Models"]:
+                if config['Architecture']['Models'][key]['Head'][
+                        'name'] == 'MultiHead':  # for multi head
+                    if config['PostProcess'][
+                            'name'] == 'DistillationSARLabelDecode':
+                        char_num = char_num - 2
+                    # update SARLoss params
+                    assert list(config['Loss']['loss_config_list'][-1].keys())[
+                        0] == 'DistillationSARLoss'
+                    config['Loss']['loss_config_list'][-1][
+                        'DistillationSARLoss']['ignore_index'] = char_num + 1
+                    out_channels_list = {}
+                    out_channels_list['CTCLabelDecode'] = char_num
+                    out_channels_list['SARLabelDecode'] = char_num + 2
+                    config['Architecture']['Models'][key]['Head'][
+                        'out_channels_list'] = out_channels_list
+                else:
+                    config['Architecture']["Models"][key]["Head"][
+                        'out_channels'] = char_num
+        elif config['Architecture']['Head'][
+                'name'] == 'MultiHead':  # for multi head
+            if config['PostProcess']['name'] == 'SARLabelDecode':
+                char_num = char_num - 2
+            # update SARLoss params
+            assert list(config['Loss']['loss_config_list'][1].keys())[
+                0] == 'SARLoss'
+            if config['Loss']['loss_config_list'][1]['SARLoss'] is None:
+                config['Loss']['loss_config_list'][1]['SARLoss'] = {
+                    'ignore_index': char_num + 1
+                }
+            else:
+                config['Loss']['loss_config_list'][1]['SARLoss'][
+                    'ignore_index'] = char_num + 1
+            out_channels_list = {}
+            out_channels_list['CTCLabelDecode'] = char_num
+            out_channels_list['SARLabelDecode'] = char_num + 2
+            config['Architecture']['Head'][
+                'out_channels_list'] = out_channels_list
+        else:  # base rec model
+            config['Architecture']["Head"]['out_channels'] = char_num
+        if config['PostProcess']['name'] == 'SARLabelDecode':  # for SAR model
+            config['Loss']['ignore_index'] = char_num - 1
+    model = build_model(config['Architecture'])
+    use_sync_bn = config["Global"].get("use_sync_bn", False)
+    if use_sync_bn:
+        model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+        logger.info('convert_sync_batchnorm')
+    model = apply_to_static(model, config, logger)
+    # build loss
+    loss_class = build_loss(config['Loss'])
+    # build optim
+    optimizer, lr_scheduler = build_optimizer(
+        config['Optimizer'],
+        epochs=config['Global']['epoch_num'],
+        step_each_epoch=len(train_dataloader),
+        model=model)
+    # build metric
+    eval_class = build_metric(config['Metric'])
+    logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
+    if valid_dataloader is not None:
+        logger.info('valid dataloader has {} iters'.format(
+            len(valid_dataloader)))
+    use_amp = config["Global"].get("use_amp", False)
+    amp_level = config["Global"].get("amp_level", 'O2')
+    amp_custom_black_list = config['Global'].get('amp_custom_black_list', [])
+    if use_amp:
+        AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, }
+        if paddle.is_compiled_with_cuda():
+            AMP_RELATED_FLAGS_SETTING.update({
+                'FLAGS_cudnn_batchnorm_spatial_persistent': 1
+            })
+        paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
+        scale_loss = config["Global"].get("scale_loss", 1.0)
+        use_dynamic_loss_scaling = config["Global"].get(
+            "use_dynamic_loss_scaling", False)
+        scaler = paddle.amp.GradScaler(
+            init_loss_scaling=scale_loss,
+            use_dynamic_loss_scaling=use_dynamic_loss_scaling)
+        if amp_level == "O2":
+            model, optimizer = paddle.amp.decorate(
+                models=model,
+                optimizers=optimizer,
+                level=amp_level,
+                master_weight=True)
+    else:
+        scaler = None
+    # load pretrain model
+    pre_best_model_dict = load_model(config, model, optimizer,
+                                     config['Architecture']["model_type"])
+    if config['Global']['distributed']:
+        model = paddle.DataParallel(model)
+    # start train
+    program.train(config, train_dataloader, valid_dataloader, device, model,
+                  loss_class, optimizer, lr_scheduler, post_process_class,
+                  eval_class, pre_best_model_dict, logger, vdl_writer, scaler,
+                  amp_level, amp_custom_black_list)
+def test_reader(config, device, logger):
+    loader = build_dataloader(config, 'Train', device, logger)
+    import time
+    starttime = time.time()
+    count = 0
+    try:
+        for data in loader():
+            count += 1
+            if count % 1 == 0:
+                batch_time = time.time() - starttime
+                starttime = time.time()
+                logger.info("reader: {}, {}, {}".format(
+                    count, len(data[0]), batch_time))
+    except Exception as e:
+        logger.info(e)
+    logger.info("finish reader: {}, Success!".format(count))
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess(is_train=True)
+    seed = config['Global']['seed'] if 'seed' in config['Global'] else 1024
+    set_seed(seed)
+    main(config, device, logger, vdl_writer)
+    # test_reader(config, device, logger)