Spaces:
Running
Running
# Copyright (c) OpenMMLab. All rights reserved. | |
import os.path as osp | |
from typing import Dict, List, Tuple | |
import mmcv | |
from mmengine import mkdir_or_exist | |
from mmocr.registry import DATA_PACKERS | |
from mmocr.utils import bbox2poly, crop_img, poly2bbox, warp_img | |
from .base import BasePacker | |
class TextRecogPacker(BasePacker): | |
"""Text recogntion packer. It is used to pack the parsed annotation info | |
to: | |
.. code-block:: python | |
{ | |
"metainfo": | |
{ | |
"dataset_type": "TextRecogDataset", | |
"task_name": "textrecog", | |
}, | |
"data_list": | |
[ | |
{ | |
"img_path": "textrecog_imgs/train/test_img.jpg", | |
"instances": | |
[ | |
{ | |
"text": "GRAND" | |
} | |
] | |
} | |
] | |
} | |
""" | |
def pack_instance(self, sample: Tuple) -> Dict: | |
"""Pack the text info to a recognition instance. | |
Args: | |
samples (Tuple): A tuple of (img_name, text). | |
split (str): The split of the instance. | |
Returns: | |
Dict: The packed instance. | |
""" | |
img_name, text = sample | |
img_name = osp.relpath(img_name, self.data_root) | |
packed_instance = dict(instances=[dict(text=text)], img_path=img_name) | |
return packed_instance | |
def add_meta(self, sample: List) -> Dict: | |
"""Add meta information to the sample. | |
Args: | |
sample (List): A list of samples of the dataset. | |
Returns: | |
Dict: A dict contains the meta information and samples. | |
""" | |
meta = { | |
'metainfo': { | |
'dataset_type': 'TextRecogDataset', | |
'task_name': 'textrecog' | |
}, | |
'data_list': sample | |
} | |
return meta | |
class TextRecogCropPacker(TextRecogPacker): | |
"""Text recognition packer with image cropper. It is used to pack the | |
parsed annotation info and crop out the word images from the full-size | |
ones. | |
Args: | |
crop_with_warp (bool): Whether to crop the text from the original | |
image using opencv warpPerspective. | |
jitter (bool): (Applicable when crop_with_warp=True) | |
Whether to jitter the box. | |
jitter_ratio_x (float): (Applicable when crop_with_warp=True) | |
Horizontal jitter ratio relative to the height. | |
jitter_ratio_y (float): (Applicable when crop_with_warp=True) | |
Vertical jitter ratio relative to the height. | |
long_edge_pad_ratio (float): (Applicable when crop_with_warp=False) | |
The ratio of padding the long edge of the cropped image. | |
Defaults to 0.1. | |
short_edge_pad_ratio (float): (Applicable when crop_with_warp=False) | |
The ratio of padding the short edge of the cropped image. | |
Defaults to 0.05. | |
""" | |
def __init__(self, | |
crop_with_warp: bool = False, | |
jitter: bool = False, | |
jitter_ratio_x: float = 0.0, | |
jitter_ratio_y: float = 0.0, | |
long_edge_pad_ratio: float = 0.0, | |
short_edge_pad_ratio: float = 0.0, | |
**kwargs): | |
super().__init__(**kwargs) | |
self.crop_with_warp = crop_with_warp | |
self.jitter = jitter | |
self.jrx = jitter_ratio_x | |
self.jry = jitter_ratio_y | |
self.lepr = long_edge_pad_ratio | |
self.sepr = short_edge_pad_ratio | |
# Crop converter crops the images of textdet to patches | |
self.cropped_img_dir = 'textrecog_imgs' | |
self.crop_save_path = osp.join(self.data_root, self.cropped_img_dir) | |
mkdir_or_exist(self.crop_save_path) | |
mkdir_or_exist(osp.join(self.crop_save_path, self.split)) | |
def pack_instance(self, sample: Tuple) -> List: | |
"""Crop patches from image. | |
Args: | |
samples (Tuple): A tuple of (img_name, text). | |
Return: | |
List: The list of cropped patches. | |
""" | |
def get_box(instance: Dict) -> List: | |
if 'box' in instance: | |
return bbox2poly(instance['box']).tolist() | |
if 'poly' in instance: | |
return bbox2poly(poly2bbox(instance['poly'])).tolist() | |
def get_poly(instance: Dict) -> List: | |
if 'poly' in instance: | |
return instance['poly'] | |
if 'box' in instance: | |
return bbox2poly(instance['box']).tolist() | |
data_list = [] | |
img_path, instances = sample | |
img = mmcv.imread(img_path) | |
for i, instance in enumerate(instances): | |
if instance['ignore']: | |
continue | |
if self.crop_with_warp: | |
poly = get_poly(instance) | |
patch = warp_img(img, poly, self.jitter, self.jrx, self.jry) | |
else: | |
box = get_box(instance) | |
patch = crop_img(img, box, self.lepr, self.sepr) | |
if patch.shape[0] == 0 or patch.shape[1] == 0: | |
continue | |
text = instance['text'] | |
patch_name = osp.splitext( | |
osp.basename(img_path))[0] + f'_{i}' + osp.splitext( | |
osp.basename(img_path))[1] | |
dst_path = osp.join(self.crop_save_path, self.split, patch_name) | |
mmcv.imwrite(patch, dst_path) | |
rec_instance = dict( | |
instances=[dict(text=text)], | |
img_path=osp.join(self.cropped_img_dir, self.split, | |
patch_name)) | |
data_list.append(rec_instance) | |
return data_list | |
def add_meta(self, sample: List) -> Dict: | |
# Since the TextRecogCropConverter packs all of the patches in a single | |
# image into a list, we need to flatten the list. | |
sample = [item for sublist in sample for item in sublist] | |
return super().add_meta(sample) | |