# Copyright (c) OpenMMLab. All rights reserved. import warnings from typing import Dict, Sequence import mmengine from mmocr.utils import is_type_list def dump_ocr_data(image_infos: Sequence[Dict], out_json_name: str, task_name: str, **kwargs) -> Dict: """Dump the annotation in openmmlab style. Args: image_infos (list): List of image information dicts. Read the example section for the format illustration. out_json_name (str): Output json filename. task_name (str): Task name. Options are 'textdet', 'textrecog' and 'textspotter'. Examples: Here is the general structure of image_infos for textdet/textspotter tasks: .. code-block:: python [ # A list of dicts. Each dict stands for a single image. { "file_name": "1.jpg", "height": 100, "width": 200, "segm_file": "seg.txt" # (optional) path to segmap "anno_info": [ # a list of dicts. Each dict # stands for a single text instance. { "iscrowd": 0, # 0: don't ignore this instance # 1: ignore "category_id": 0, # Instance class id. Must be 0 # for OCR tasks to permanently # be mapped to 'text' category "bbox": [x, y, w, h], "segmentation": [x1, y1, x2, y2, ...], "text": "demo_text" # for textspotter only. } ] }, ] The input for textrecog task is much simpler: .. code-block:: python [ # A list of dicts. Each dict stands for a single image. { "file_name": "1.jpg", "anno_info": [ # a list of dicts. Each dict # stands for a single text instance. # However, in textrecog, usually each # image only has one text instance. { "text": "demo_text" } ] }, ] Returns: out_json(dict): The openmmlab-style annotation. """ task2dataset = { 'textspotter': 'TextSpotterDataset', 'textdet': 'TextDetDataset', 'textrecog': 'TextRecogDataset' } assert isinstance(image_infos, list) assert isinstance(out_json_name, str) assert task_name in task2dataset.keys() dataset_type = task2dataset[task_name] out_json = dict( metainfo=dict(dataset_type=dataset_type, task_name=task_name), data_list=list()) if task_name in ['textdet', 'textspotter']: out_json['metainfo']['category'] = [dict(id=0, name='text')] for image_info in image_infos: single_info = dict(instances=list()) single_info['img_path'] = image_info['file_name'] if task_name in ['textdet', 'textspotter']: single_info['height'] = image_info['height'] single_info['width'] = image_info['width'] if 'segm_file' in image_info: single_info['seg_map'] = image_info['segm_file'] anno_infos = image_info['anno_info'] for anno_info in anno_infos: instance = {} if task_name in ['textrecog', 'textspotter']: instance['text'] = anno_info['text'] if task_name in ['textdet', 'textspotter']: mask = anno_info['segmentation'] # TODO: remove this if-branch when all converters have been # verified if len(mask) == 1 and len(mask[0]) > 1: mask = mask[0] warnings.warn( 'Detected nested segmentation for a single' 'text instance, which should be a 1-d array now.' 'Please fix input accordingly.') instance['polygon'] = mask x, y, w, h = anno_info['bbox'] instance['bbox'] = [x, y, x + w, y + h] instance['bbox_label'] = anno_info['category_id'] instance['ignore'] = anno_info['iscrowd'] == 1 single_info['instances'].append(instance) out_json['data_list'].append(single_info) mmengine.dump(out_json, out_json_name, **kwargs) return out_json def recog_anno_to_imginfo( file_paths: Sequence[str], labels: Sequence[str], ) -> Sequence[Dict]: """Convert a list of file_paths and labels for recognition tasks into the format of image_infos acceptable by :func:`dump_ocr_data()`. It's meant to maintain compatibility with the legacy annotation format in MMOCR 0.x. In MMOCR 0.x, data converters for recognition usually converts the annotations into a list of file paths and a list of labels, which look like the following: .. code-block:: python file_paths = ['1.jpg', '2.jpg', ...] labels = ['aaa', 'bbb', ...] This utility merges them into a list of dictionaries parsable by :func:`dump_ocr_data()`: .. code-block:: python [ # A list of dicts. Each dict stands for a single image. { "file_name": "1.jpg", "anno_info": [ { "text": "aaa" } ] }, { "file_name": "2.jpg", "anno_info": [ { "text": "bbb" } ] }, ... ] Args: file_paths (list[str]): A list of file paths to images. labels (list[str]): A list of text labels. Returns: list[dict]: Annotations parsable by :func:`dump_ocr_data()`. """ assert is_type_list(file_paths, str) assert is_type_list(labels, str) assert len(file_paths) == len(labels) results = [] for i in range(len(file_paths)): result = dict( file_name=file_paths[i], anno_info=[dict(text=labels[i])]) results.append(result) return results