MMOCR / tests /test_dataset /test_kie_dataset.py
tomofi's picture
Add application file
2366e36
raw
history blame
3.49 kB
# Copyright (c) OpenMMLab. All rights reserved.
import json
import math
import os.path as osp
import tempfile
import pytest
import torch
from mmocr.datasets.kie_dataset import KIEDataset
def _create_dummy_ann_file(ann_file):
ann_info1 = {
'file_name':
'sample1.png',
'height':
200,
'width':
200,
'annotations': [{
'text': 'store',
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
'label': 1
}, {
'text': 'address',
'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
'label': 1
}, {
'text': 'price',
'box': [33.0, 2.0, 43.0, 2.0, 36.0, 12.0, 25.0, 12.0],
'label': 1
}, {
'text': '1.0',
'box': [46.0, 2.0, 61.0, 2.0, 53.0, 12.0, 39.0, 12.0],
'label': 1
}, {
'text': 'google',
'box': [61.0, 2.0, 69.0, 2.0, 63.0, 12.0, 55.0, 12.0],
'label': 1
}]
}
with open(ann_file, 'w') as fw:
for ann_info in [ann_info1]:
fw.write(json.dumps(ann_info) + '\n')
return ann_info1
def _create_dummy_dict_file(dict_file):
dict_str = '0123'
with open(dict_file, 'w') as fw:
for char in list(dict_str):
fw.write(char + '\n')
return dict_str
def _create_dummy_loader():
loader = dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations']))
return loader
def test_kie_dataset():
tmp_dir = tempfile.TemporaryDirectory()
# create dummy data
ann_file = osp.join(tmp_dir.name, 'fake_data.txt')
ann_info1 = _create_dummy_ann_file(ann_file)
dict_file = osp.join(tmp_dir.name, 'fake_dict.txt')
_create_dummy_dict_file(dict_file)
# test initialization
loader = _create_dummy_loader()
dataset = KIEDataset(ann_file, loader, dict_file, pipeline=[])
tmp_dir.cleanup()
dataset.prepare_train_img(0)
# test pre_pipeline
img_ann_info = dataset.data_infos[0]
img_info = {
'filename': img_ann_info['file_name'],
'height': img_ann_info['height'],
'width': img_ann_info['width']
}
ann_info = dataset._parse_anno_info(img_ann_info['annotations'])
results = dict(img_info=img_info, ann_info=ann_info)
dataset.pre_pipeline(results)
assert results['img_prefix'] == dataset.img_prefix
# test _parse_anno_info
annos = ann_info1['annotations']
with pytest.raises(AssertionError):
dataset._parse_anno_info(annos[0])
tmp_annos = [{
'text': 'store',
'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0]
}]
dataset._parse_anno_info(tmp_annos)
tmp_annos = [{'text': 'store'}]
with pytest.raises(AssertionError):
dataset._parse_anno_info(tmp_annos)
return_anno = dataset._parse_anno_info(annos)
assert 'bboxes' in return_anno
assert 'relations' in return_anno
assert 'texts' in return_anno
assert 'labels' in return_anno
# test evaluation
result = {}
result['nodes'] = torch.full((5, 5), 1, dtype=torch.float)
result['nodes'][:, 1] = 100.
print('hello', result['nodes'].size())
results = [result for _ in range(5)]
eval_res = dataset.evaluate(results)
assert math.isclose(eval_res['macro_f1'], 0.2, abs_tol=1e-4)
test_kie_dataset()