Spaces:
Runtime error
Runtime error
File size: 1,340 Bytes
06924e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
def join_words(tokens):
completed_word = ''
if tokens:
completed_word = tokens[0]
for token in tokens[1:]:
if not (token.isalpha() and completed_word[-1].isalpha()):
completed_word += token
else:
completed_word += ' ' + token
return completed_word
def ent_merge(data_frame):
ent_result = []
current_entity_tokens = []
current_entity = None
for token, tag in zip(data_frame['words'], data_frame["entities"]):
if tag.startswith("B-"):
if current_entity is not None:
ent_result.append((join_words(current_entity_tokens), current_entity))
current_entity = tag[2:]
current_entity_tokens = [token]
elif current_entity_tokens is not None and (
tag == "I-" + str(current_entity) or tag == "L-" + str(current_entity)):
current_entity_tokens.append(token)
else:
ent_result.append((join_words(current_entity_tokens), current_entity))
ent_result.append([token, tag[2:]])
current_entity_tokens = []
current_entity = None
if current_entity is not None:
ent_result.append((join_words(current_entity_tokens), current_entity))
# ent_result = sorted(ent_result)
return ent_result
|