MAERec-Gradio / mmocr /utils /data_converter_utils.py
Mountchicken's picture
Upload 704 files
9bf4bd7
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Dict, Sequence
import mmengine
from mmocr.utils import is_type_list
def dump_ocr_data(image_infos: Sequence[Dict], out_json_name: str,
task_name: str, **kwargs) -> Dict:
"""Dump the annotation in openmmlab style.
Args:
image_infos (list): List of image information dicts. Read the example
section for the format illustration.
out_json_name (str): Output json filename.
task_name (str): Task name. Options are 'textdet', 'textrecog' and
'textspotter'.
Examples:
Here is the general structure of image_infos for textdet/textspotter
tasks:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"height": 100,
"width": 200,
"segm_file": "seg.txt" # (optional) path to segmap
"anno_info": [ # a list of dicts. Each dict
# stands for a single text instance.
{
"iscrowd": 0, # 0: don't ignore this instance
# 1: ignore
"category_id": 0, # Instance class id. Must be 0
# for OCR tasks to permanently
# be mapped to 'text' category
"bbox": [x, y, w, h],
"segmentation": [x1, y1, x2, y2, ...],
"text": "demo_text" # for textspotter only.
}
]
},
]
The input for textrecog task is much simpler:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"anno_info": [ # a list of dicts. Each dict
# stands for a single text instance.
# However, in textrecog, usually each
# image only has one text instance.
{
"text": "demo_text"
}
]
},
]
Returns:
out_json(dict): The openmmlab-style annotation.
"""
task2dataset = {
'textspotter': 'TextSpotterDataset',
'textdet': 'TextDetDataset',
'textrecog': 'TextRecogDataset'
}
assert isinstance(image_infos, list)
assert isinstance(out_json_name, str)
assert task_name in task2dataset.keys()
dataset_type = task2dataset[task_name]
out_json = dict(
metainfo=dict(dataset_type=dataset_type, task_name=task_name),
data_list=list())
if task_name in ['textdet', 'textspotter']:
out_json['metainfo']['category'] = [dict(id=0, name='text')]
for image_info in image_infos:
single_info = dict(instances=list())
single_info['img_path'] = image_info['file_name']
if task_name in ['textdet', 'textspotter']:
single_info['height'] = image_info['height']
single_info['width'] = image_info['width']
if 'segm_file' in image_info:
single_info['seg_map'] = image_info['segm_file']
anno_infos = image_info['anno_info']
for anno_info in anno_infos:
instance = {}
if task_name in ['textrecog', 'textspotter']:
instance['text'] = anno_info['text']
if task_name in ['textdet', 'textspotter']:
mask = anno_info['segmentation']
# TODO: remove this if-branch when all converters have been
# verified
if len(mask) == 1 and len(mask[0]) > 1:
mask = mask[0]
warnings.warn(
'Detected nested segmentation for a single'
'text instance, which should be a 1-d array now.'
'Please fix input accordingly.')
instance['polygon'] = mask
x, y, w, h = anno_info['bbox']
instance['bbox'] = [x, y, x + w, y + h]
instance['bbox_label'] = anno_info['category_id']
instance['ignore'] = anno_info['iscrowd'] == 1
single_info['instances'].append(instance)
out_json['data_list'].append(single_info)
mmengine.dump(out_json, out_json_name, **kwargs)
return out_json
def recog_anno_to_imginfo(
file_paths: Sequence[str],
labels: Sequence[str],
) -> Sequence[Dict]:
"""Convert a list of file_paths and labels for recognition tasks into the
format of image_infos acceptable by :func:`dump_ocr_data()`. It's meant to
maintain compatibility with the legacy annotation format in MMOCR 0.x.
In MMOCR 0.x, data converters for recognition usually converts the
annotations into a list of file paths and a list of labels, which look
like the following:
.. code-block:: python
file_paths = ['1.jpg', '2.jpg', ...]
labels = ['aaa', 'bbb', ...]
This utility merges them into a list of dictionaries parsable by
:func:`dump_ocr_data()`:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"anno_info": [
{
"text": "aaa"
}
]
},
{
"file_name": "2.jpg",
"anno_info": [
{
"text": "bbb"
}
]
},
...
]
Args:
file_paths (list[str]): A list of file paths to images.
labels (list[str]): A list of text labels.
Returns:
list[dict]: Annotations parsable by :func:`dump_ocr_data()`.
"""
assert is_type_list(file_paths, str)
assert is_type_list(labels, str)
assert len(file_paths) == len(labels)
results = []
for i in range(len(file_paths)):
result = dict(
file_name=file_paths[i], anno_info=[dict(text=labels[i])])
results.append(result)
return results