# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import random import ast from PIL import Image, ImageDraw, ImageFont import numpy as np from tools.infer.utility import draw_ocr_box_txt, str2bool, init_args as infer_args def init_args(): parser = infer_args() # params for output parser.add_argument("--output", type=str, default='./output') # params for table structure parser.add_argument("--table_max_len", type=int, default=488) parser.add_argument("--table_algorithm", type=str, default='TableAttn') parser.add_argument("--table_model_dir", type=str) parser.add_argument( "--merge_no_span_structure", type=str2bool, default=True) parser.add_argument( "--table_char_dict_path", type=str, default="../ppocr/utils/dict/table_structure_dict_ch.txt") # params for layout parser.add_argument("--layout_model_dir", type=str) parser.add_argument( "--layout_dict_path", type=str, default="../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt") parser.add_argument( "--layout_score_threshold", type=float, default=0.5, help="Threshold of score.") parser.add_argument( "--layout_nms_threshold", type=float, default=0.5, help="Threshold of nms.") # params for kie parser.add_argument("--kie_algorithm", type=str, default='LayoutXLM') parser.add_argument("--ser_model_dir", type=str) parser.add_argument("--re_model_dir", type=str) parser.add_argument("--use_visual_backbone", type=str2bool, default=True) parser.add_argument( "--ser_dict_path", type=str, default="../train_data/XFUND/class_list_xfun.txt") # need to be None or tb-yx parser.add_argument("--ocr_order_method", type=str, default=None) # params for inference parser.add_argument( "--mode", type=str, choices=['structure', 'kie'], default='structure', help='structure and kie is supported') parser.add_argument( "--image_orientation", type=bool, default=False, help='Whether to enable image orientation recognition') parser.add_argument( "--layout", type=str2bool, default=True, help='Whether to enable layout analysis') parser.add_argument( "--table", type=str2bool, default=True, help='In the forward, whether the table area uses table recognition') parser.add_argument( "--ocr", type=str2bool, default=True, help='In the forward, whether the non-table area is recognition by ocr') # param for recovery parser.add_argument( "--recovery", type=str2bool, default=False, help='Whether to enable layout of recovery') parser.add_argument( "--use_pdf2docx_api", type=str2bool, default=False, help='Whether to use pdf2docx api') return parser def parse_args(): parser = init_args() return parser.parse_args() def draw_structure_result(image, result, font_path): if isinstance(image, np.ndarray): image = Image.fromarray(image) boxes, txts, scores = [], [], [] img_layout = image.copy() draw_layout = ImageDraw.Draw(img_layout) text_color = (255, 255, 255) text_background_color = (80, 127, 255) catid2color = {} font_size = 15 font = ImageFont.truetype(font_path, font_size, encoding="utf-8") for region in result: if region['type'] not in catid2color: box_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) catid2color[region['type']] = box_color else: box_color = catid2color[region['type']] box_layout = region['bbox'] draw_layout.rectangle( [(box_layout[0], box_layout[1]), (box_layout[2], box_layout[3])], outline=box_color, width=3) text_w, text_h = font.getsize(region['type']) draw_layout.rectangle( [(box_layout[0], box_layout[1]), (box_layout[0] + text_w, box_layout[1] + text_h)], fill=text_background_color) draw_layout.text( (box_layout[0], box_layout[1]), region['type'], fill=text_color, font=font) if region['type'] == 'table': pass else: for text_result in region['res']: boxes.append(np.array(text_result['text_region'])) txts.append(text_result['text']) scores.append(text_result['confidence']) im_show = draw_ocr_box_txt( img_layout, boxes, txts, scores, font_path=font_path, drop_score=0) return im_show