Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import json | |
from mmocr.datasets.builder import PARSERS | |
from mmocr.utils import StringStrip | |
class LineStrParser: | |
"""Parse string of one line in annotation file to dict format. | |
Args: | |
keys (list[str]): Keys in result dict. | |
keys_idx (list[int]): Value index in sub-string list | |
for each key above. | |
separator (str): Separator to separate string to list of sub-string. | |
""" | |
def __init__(self, | |
keys=['filename', 'text'], | |
keys_idx=[0, 1], | |
separator=' ', | |
**kwargs): | |
assert isinstance(keys, list) | |
assert isinstance(keys_idx, list) | |
assert isinstance(separator, str) | |
assert len(keys) > 0 | |
assert len(keys) == len(keys_idx) | |
self.keys = keys | |
self.keys_idx = keys_idx | |
self.separator = separator | |
self.strip_cls = StringStrip(**kwargs) | |
def get_item(self, data_ret, index): | |
map_index = index % len(data_ret) | |
line_str = data_ret[map_index] | |
line_str = self.strip_cls(line_str) | |
line_str = line_str.split(self.separator) | |
if len(line_str) <= max(self.keys_idx): | |
raise Exception( | |
f'key index: {max(self.keys_idx)} out of range: {line_str}') | |
line_info = {} | |
for i, key in enumerate(self.keys): | |
line_info[key] = line_str[self.keys_idx[i]] | |
return line_info | |
class LineJsonParser: | |
"""Parse json-string of one line in annotation file to dict format. | |
Args: | |
keys (list[str]): Keys in both json-string and result dict. | |
""" | |
def __init__(self, keys=[]): | |
assert isinstance(keys, list) | |
assert len(keys) > 0 | |
self.keys = keys | |
def get_item(self, data_ret, index): | |
map_index = index % len(data_ret) | |
json_str = data_ret[map_index] | |
line_json_obj = json.loads(json_str) | |
line_info = {} | |
for key in self.keys: | |
if key not in line_json_obj: | |
raise Exception(f'key {key} not in line json {line_json_obj}') | |
line_info[key] = line_json_obj[key] | |
return line_info | |