Mountchicken's picture
Upload 704 files
9bf4bd7
raw
history blame
6.15 kB
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List, Tuple
import mmcv
from mmengine import mkdir_or_exist
from mmocr.registry import DATA_PACKERS
from mmocr.utils import bbox2poly, crop_img, poly2bbox, warp_img
from .base import BasePacker
@DATA_PACKERS.register_module()
class TextRecogPacker(BasePacker):
"""Text recogntion packer. It is used to pack the parsed annotation info
to:
.. code-block:: python
{
"metainfo":
{
"dataset_type": "TextRecogDataset",
"task_name": "textrecog",
},
"data_list":
[
{
"img_path": "textrecog_imgs/train/test_img.jpg",
"instances":
[
{
"text": "GRAND"
}
]
}
]
}
"""
def pack_instance(self, sample: Tuple) -> Dict:
"""Pack the text info to a recognition instance.
Args:
samples (Tuple): A tuple of (img_name, text).
split (str): The split of the instance.
Returns:
Dict: The packed instance.
"""
img_name, text = sample
img_name = osp.relpath(img_name, self.data_root)
packed_instance = dict(instances=[dict(text=text)], img_path=img_name)
return packed_instance
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
meta = {
'metainfo': {
'dataset_type': 'TextRecogDataset',
'task_name': 'textrecog'
},
'data_list': sample
}
return meta
@DATA_PACKERS.register_module()
class TextRecogCropPacker(TextRecogPacker):
"""Text recognition packer with image cropper. It is used to pack the
parsed annotation info and crop out the word images from the full-size
ones.
Args:
crop_with_warp (bool): Whether to crop the text from the original
image using opencv warpPerspective.
jitter (bool): (Applicable when crop_with_warp=True)
Whether to jitter the box.
jitter_ratio_x (float): (Applicable when crop_with_warp=True)
Horizontal jitter ratio relative to the height.
jitter_ratio_y (float): (Applicable when crop_with_warp=True)
Vertical jitter ratio relative to the height.
long_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the long edge of the cropped image.
Defaults to 0.1.
short_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the short edge of the cropped image.
Defaults to 0.05.
"""
def __init__(self,
crop_with_warp: bool = False,
jitter: bool = False,
jitter_ratio_x: float = 0.0,
jitter_ratio_y: float = 0.0,
long_edge_pad_ratio: float = 0.0,
short_edge_pad_ratio: float = 0.0,
**kwargs):
super().__init__(**kwargs)
self.crop_with_warp = crop_with_warp
self.jitter = jitter
self.jrx = jitter_ratio_x
self.jry = jitter_ratio_y
self.lepr = long_edge_pad_ratio
self.sepr = short_edge_pad_ratio
# Crop converter crops the images of textdet to patches
self.cropped_img_dir = 'textrecog_imgs'
self.crop_save_path = osp.join(self.data_root, self.cropped_img_dir)
mkdir_or_exist(self.crop_save_path)
mkdir_or_exist(osp.join(self.crop_save_path, self.split))
def pack_instance(self, sample: Tuple) -> List:
"""Crop patches from image.
Args:
samples (Tuple): A tuple of (img_name, text).
Return:
List: The list of cropped patches.
"""
def get_box(instance: Dict) -> List:
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
if 'poly' in instance:
return bbox2poly(poly2bbox(instance['poly'])).tolist()
def get_poly(instance: Dict) -> List:
if 'poly' in instance:
return instance['poly']
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
data_list = []
img_path, instances = sample
img = mmcv.imread(img_path)
for i, instance in enumerate(instances):
if instance['ignore']:
continue
if self.crop_with_warp:
poly = get_poly(instance)
patch = warp_img(img, poly, self.jitter, self.jrx, self.jry)
else:
box = get_box(instance)
patch = crop_img(img, box, self.lepr, self.sepr)
if patch.shape[0] == 0 or patch.shape[1] == 0:
continue
text = instance['text']
patch_name = osp.splitext(
osp.basename(img_path))[0] + f'_{i}' + osp.splitext(
osp.basename(img_path))[1]
dst_path = osp.join(self.crop_save_path, self.split, patch_name)
mmcv.imwrite(patch, dst_path)
rec_instance = dict(
instances=[dict(text=text)],
img_path=osp.join(self.cropped_img_dir, self.split,
patch_name))
data_list.append(rec_instance)
return data_list
def add_meta(self, sample: List) -> Dict:
# Since the TextRecogCropConverter packs all of the patches in a single
# image into a list, we need to flatten the list.
sample = [item for sublist in sample for item in sublist]
return super().add_meta(sample)