Spaces:
Runtime error
Runtime error
def join_words(tokens): | |
completed_word = '' | |
if tokens: | |
completed_word = tokens[0] | |
for token in tokens[1:]: | |
if not (token.isalpha() and completed_word[-1].isalpha()): | |
completed_word += token | |
else: | |
completed_word += ' ' + token | |
return completed_word | |
def ent_merge(data_frame): | |
ent_result = [] | |
current_entity_tokens = [] | |
current_entity = None | |
for token, tag in zip(data_frame['words'], data_frame["entities"]): | |
if tag.startswith("B-"): | |
if current_entity is not None: | |
ent_result.append((join_words(current_entity_tokens), current_entity)) | |
current_entity = tag[2:] | |
current_entity_tokens = [token] | |
elif current_entity_tokens is not None and ( | |
tag == "I-" + str(current_entity) or tag == "L-" + str(current_entity)): | |
current_entity_tokens.append(token) | |
else: | |
ent_result.append((join_words(current_entity_tokens), current_entity)) | |
ent_result.append([token, tag[2:]]) | |
current_entity_tokens = [] | |
current_entity = None | |
if current_entity is not None: | |
ent_result.append((join_words(current_entity_tokens), current_entity)) | |
# ent_result = sorted(ent_result) | |
return ent_result | |