Spaces:
Running
Running
# Copyright (c) OpenMMLab. All rights reserved. | |
import json | |
from typing import Tuple | |
from mmocr.registry import DATA_PARSERS | |
from mmocr.utils import bbox2poly | |
from .base import BaseParser | |
class FUNSDTextDetAnnParser(BaseParser): | |
"""FUNSD Text Detection Annotation Parser. See | |
dataset_zoo/funsd/sample_anno.md for annotation example. | |
Args: | |
nproc (int): The number of processes to parse the annotation. Defaults | |
to 1. | |
""" | |
def parse_file(self, img_path: str, ann_path: str) -> Tuple: | |
"""Parse single annotation.""" | |
instances = list() | |
for poly, text, ignore in self.loader(ann_path): | |
instances.append(dict(poly=poly, text=text, ignore=ignore)) | |
return img_path, instances | |
def loader(self, file_path: str): | |
with open(file_path, 'r') as f: | |
data = json.load(f) | |
for form in data['form']: | |
for word in form['words']: | |
poly = bbox2poly(word['box']).tolist() | |
text = word['text'] | |
ignore = len(text) == 0 | |
yield poly, text, ignore | |