Spaces:
Running
Running
# Copyright (c) OpenMMLab. All rights reserved. | |
from typing import List, Optional, Tuple | |
from mmocr.registry import DATA_PARSERS | |
from mmocr.utils import bbox2poly | |
from .base import BaseParser | |
class SROIETextDetAnnParser(BaseParser): | |
"""SROIE Txt Format Text Detection Annotation Parser. | |
The original annotation format of this dataset is stored in txt files, | |
which is formed as the following format: | |
x1, y1, x2, y2, x3, y3, x4, y4, transcription | |
Args: | |
separator (str): The separator between each element in a line. Defaults | |
to ','. | |
ignore (str): The text to be ignored. Defaults to '###'. | |
format (str): The format of the annotation. Defaults to | |
'x1,y1,x2,y2,x3,y3,x4,trans'. | |
encoding (str): The encoding of the annotation file. Defaults to | |
'utf-8-sig'. | |
nproc (int): The number of processes to parse the annotation. Defaults | |
to 1. | |
remove_strs (List[str], Optional): Used to remove redundant strings in | |
the transcription. Defaults to None. | |
mode (str, optional): The mode of the box converter. Supported modes | |
are 'xywh' and 'xyxy'. Defaults to None. | |
""" | |
def __init__(self, | |
split: str, | |
separator: str = ',', | |
ignore: str = '###', | |
format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans', | |
encoding: str = 'utf-8-sig', | |
nproc: int = 1, | |
remove_strs: Optional[List[str]] = None, | |
mode: str = None) -> None: | |
self.sep = separator | |
self.format = format | |
self.encoding = encoding | |
self.ignore = ignore | |
self.mode = mode | |
self.remove_strs = remove_strs | |
super().__init__(nproc=nproc, split=split) | |
def parse_file(self, img_path: str, ann_path: str) -> Tuple: | |
"""Parse single annotation.""" | |
instances = list() | |
try: | |
# there might be some illegal symbols in the annotation | |
# which cannot be parsed by loader | |
for anno in self.loader(ann_path, self.sep, self.format, | |
self.encoding): | |
anno = list(anno.values()) | |
if self.remove_strs is not None: | |
for strs in self.remove_strs: | |
for i in range(len(anno)): | |
if strs in anno[i]: | |
anno[i] = anno[i].replace(strs, '') | |
poly = list(map(float, anno[0:-1])) | |
if self.mode is not None: | |
poly = bbox2poly(poly, self.mode) | |
poly = poly.tolist() | |
text = anno[-1] | |
instances.append( | |
dict(poly=poly, text=text, ignore=text == self.ignore)) | |
except Exception: | |
pass | |
return img_path, instances | |