# Copyright (c) OpenMMLab. All rights reserved. import os.path as osp from typing import List, Optional, Tuple, Union import numpy as np from mmengine import track_parallel_progress from scipy.io import loadmat from mmocr.utils import is_type_list from ..data_preparer import DATA_PARSERS from .base import BaseParser @DATA_PARSERS.register_module() class SynthTextAnnParser(BaseParser): """SynthText Text Detection Annotation Parser. Args: split (str): The split of the dataset. It is usually set automatically and users do not need to set it manually in config file in most cases. nproc (int): Number of processes to process the data. Defaults to 1. It is usually set automatically and users do not need to set it manually in config file in most cases. separator (str): The separator between each element in a line. Defaults to ','. ignore (str): The text to be ignored. Defaults to '###'. format (str): The format of the annotation. Defaults to 'x1,y1,x2,y2,x3,y3,x4,trans'. encoding (str): The encoding of the annotation file. Defaults to 'utf-8-sig'. remove_strs (List[str], Optional): Used to remove redundant strings in the transcription. Defaults to None. mode (str, optional): The mode of the box converter. Supported modes are 'xywh' and 'xyxy'. Defaults to None. """ def __init__(self, split: str, nproc: int, separator: str = ',', ignore: str = '###', format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans', encoding: str = 'utf-8', remove_strs: Optional[List[str]] = None, mode: str = None) -> None: self.sep = separator self.format = format self.encoding = encoding self.ignore = ignore self.mode = mode self.remove_strs = remove_strs super().__init__(split=split, nproc=nproc) def _trace_boundary(self, char_boxes: List[np.ndarray]) -> np.ndarray: """Trace the boundary point of text. Args: char_boxes (list[ndarray]): The char boxes for one text. Each element is 4x2 ndarray. Returns: ndarray: The boundary point sets with size nx2. """ assert is_type_list(char_boxes, np.ndarray) # from top left to to right p_top = [box[0:2] for box in char_boxes] # from bottom right to bottom left p_bottom = [ char_boxes[idx][[2, 3], :] for idx in range(len(char_boxes) - 1, -1, -1) ] p = p_top + p_bottom boundary = np.concatenate(p).astype(int) return boundary def _match_bbox_char_str(self, bboxes: np.ndarray, char_bboxes: np.ndarray, strs: np.ndarray ) -> Tuple[List[np.ndarray], List[str]]: """Match the bboxes, char bboxes, and strs. Args: bboxes (ndarray): The text boxes of size (2, 4, num_box). char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box). strs (ndarray): The string of size (num_strs,) Returns: Tuple(List[ndarray], List[str]): Polygon & word list. """ assert isinstance(bboxes, np.ndarray) assert isinstance(char_bboxes, np.ndarray) assert isinstance(strs, np.ndarray) # bboxes = bboxes.astype(np.int32) char_bboxes = char_bboxes.astype(np.int32) if len(char_bboxes.shape) == 2: char_bboxes = np.expand_dims(char_bboxes, axis=2) char_bboxes = np.transpose(char_bboxes, (2, 1, 0)) num_boxes = 1 if len(bboxes.shape) == 2 else bboxes.shape[-1] poly_charbox_list = [[] for _ in range(num_boxes)] words = [] for line in strs: words += line.split() words_len = [len(w) for w in words] words_end_inx = np.cumsum(words_len) start_inx = 0 for word_inx, end_inx in enumerate(words_end_inx): for char_inx in range(start_inx, end_inx): poly_charbox_list[word_inx].append(char_bboxes[char_inx]) start_inx = end_inx for box_inx in range(num_boxes): assert len(poly_charbox_list[box_inx]) > 0 poly_boundary_list = [] for item in poly_charbox_list: boundary = np.ndarray((0, 2)) if len(item) > 0: boundary = self._trace_boundary(item) poly_boundary_list.append(boundary) return poly_boundary_list, words def parse_files(self, img_paths: Union[List[str], str], ann_paths: Union[List[str], str]) -> List[Tuple]: """Convert annotations to MMOCR format. Args: img_paths (str or list[str]): the list of image paths or the directory of the images. ann_paths (str or list[str]): the list of annotation paths or the path of the annotation file which contains all the annotations. Returns: List[Tuple]: A list of a tuple of (image_path, instances). - img_path (str): The path of image file, which can be read directly by opencv. - instance: instance is a list of dict containing parsed annotations, which should contain the following keys: - 'poly' or 'box' (textdet or textspotting) - 'text' (textspotting or textrecog) - 'ignore' (all task) """ assert isinstance(ann_paths, str) gt = loadmat(ann_paths) self.img_dir = img_paths samples = track_parallel_progress( self.parse_file, list( zip(gt['imnames'][0], gt['wordBB'][0], gt['charBB'][0], gt['txt'][0])), nproc=self.nproc) return samples def parse_file(self, annotation: Tuple) -> Tuple: """Parse single annotation.""" img_file, wordBB, charBB, txt = annotation polys_list, word_list = self._match_bbox_char_str(wordBB, charBB, txt) instances = list() for poly, word in zip(polys_list, word_list): instances.append( dict(poly=poly.flatten().tolist(), text=word, ignore=False)) return osp.join(self.img_dir, img_file[0]), instances