Spaces:
Running
Running
File size: 6,516 Bytes
9bf4bd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Dict, Sequence
import mmengine
from mmocr.utils import is_type_list
def dump_ocr_data(image_infos: Sequence[Dict], out_json_name: str,
task_name: str, **kwargs) -> Dict:
"""Dump the annotation in openmmlab style.
Args:
image_infos (list): List of image information dicts. Read the example
section for the format illustration.
out_json_name (str): Output json filename.
task_name (str): Task name. Options are 'textdet', 'textrecog' and
'textspotter'.
Examples:
Here is the general structure of image_infos for textdet/textspotter
tasks:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"height": 100,
"width": 200,
"segm_file": "seg.txt" # (optional) path to segmap
"anno_info": [ # a list of dicts. Each dict
# stands for a single text instance.
{
"iscrowd": 0, # 0: don't ignore this instance
# 1: ignore
"category_id": 0, # Instance class id. Must be 0
# for OCR tasks to permanently
# be mapped to 'text' category
"bbox": [x, y, w, h],
"segmentation": [x1, y1, x2, y2, ...],
"text": "demo_text" # for textspotter only.
}
]
},
]
The input for textrecog task is much simpler:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"anno_info": [ # a list of dicts. Each dict
# stands for a single text instance.
# However, in textrecog, usually each
# image only has one text instance.
{
"text": "demo_text"
}
]
},
]
Returns:
out_json(dict): The openmmlab-style annotation.
"""
task2dataset = {
'textspotter': 'TextSpotterDataset',
'textdet': 'TextDetDataset',
'textrecog': 'TextRecogDataset'
}
assert isinstance(image_infos, list)
assert isinstance(out_json_name, str)
assert task_name in task2dataset.keys()
dataset_type = task2dataset[task_name]
out_json = dict(
metainfo=dict(dataset_type=dataset_type, task_name=task_name),
data_list=list())
if task_name in ['textdet', 'textspotter']:
out_json['metainfo']['category'] = [dict(id=0, name='text')]
for image_info in image_infos:
single_info = dict(instances=list())
single_info['img_path'] = image_info['file_name']
if task_name in ['textdet', 'textspotter']:
single_info['height'] = image_info['height']
single_info['width'] = image_info['width']
if 'segm_file' in image_info:
single_info['seg_map'] = image_info['segm_file']
anno_infos = image_info['anno_info']
for anno_info in anno_infos:
instance = {}
if task_name in ['textrecog', 'textspotter']:
instance['text'] = anno_info['text']
if task_name in ['textdet', 'textspotter']:
mask = anno_info['segmentation']
# TODO: remove this if-branch when all converters have been
# verified
if len(mask) == 1 and len(mask[0]) > 1:
mask = mask[0]
warnings.warn(
'Detected nested segmentation for a single'
'text instance, which should be a 1-d array now.'
'Please fix input accordingly.')
instance['polygon'] = mask
x, y, w, h = anno_info['bbox']
instance['bbox'] = [x, y, x + w, y + h]
instance['bbox_label'] = anno_info['category_id']
instance['ignore'] = anno_info['iscrowd'] == 1
single_info['instances'].append(instance)
out_json['data_list'].append(single_info)
mmengine.dump(out_json, out_json_name, **kwargs)
return out_json
def recog_anno_to_imginfo(
file_paths: Sequence[str],
labels: Sequence[str],
) -> Sequence[Dict]:
"""Convert a list of file_paths and labels for recognition tasks into the
format of image_infos acceptable by :func:`dump_ocr_data()`. It's meant to
maintain compatibility with the legacy annotation format in MMOCR 0.x.
In MMOCR 0.x, data converters for recognition usually converts the
annotations into a list of file paths and a list of labels, which look
like the following:
.. code-block:: python
file_paths = ['1.jpg', '2.jpg', ...]
labels = ['aaa', 'bbb', ...]
This utility merges them into a list of dictionaries parsable by
:func:`dump_ocr_data()`:
.. code-block:: python
[ # A list of dicts. Each dict stands for a single image.
{
"file_name": "1.jpg",
"anno_info": [
{
"text": "aaa"
}
]
},
{
"file_name": "2.jpg",
"anno_info": [
{
"text": "bbb"
}
]
},
...
]
Args:
file_paths (list[str]): A list of file paths to images.
labels (list[str]): A list of text labels.
Returns:
list[dict]: Annotations parsable by :func:`dump_ocr_data()`.
"""
assert is_type_list(file_paths, str)
assert is_type_list(labels, str)
assert len(file_paths) == len(labels)
results = []
for i in range(len(file_paths)):
result = dict(
file_name=file_paths[i], anno_info=[dict(text=labels[i])])
results.append(result)
return results
|