|
|
|
class Entity(object): |
|
def __init__(self, _id, _text, _mask, _interactive, _type, _start_idx, _end_idx, _image=None): |
|
self.id = _id |
|
self.text = _text |
|
self.mask = _mask |
|
self.interactive = _interactive |
|
self.type = _type |
|
self.start_idx = _start_idx |
|
self.end_idx = _end_idx |
|
|
|
self.image = _image |
|
|
|
def split_by_ordered_substrings(sentence, substrings): |
|
results = [] |
|
substring_indices = [] |
|
|
|
start_index = 0 |
|
for i, substring in enumerate(substrings): |
|
|
|
index = sentence[start_index:].find(substring) |
|
|
|
if index == -1: |
|
continue |
|
|
|
|
|
if index > 0: |
|
results.append(sentence[start_index:start_index+index]) |
|
substring_indices.append(None) |
|
|
|
|
|
results.append(substring) |
|
substring_indices.append(i) |
|
start_index += index + len(substring) |
|
|
|
|
|
if start_index < len(sentence): |
|
results.append(sentence[start_index:]) |
|
substring_indices.append(None) |
|
|
|
return results, substring_indices |
|
|