Spaces:

kandysh
/

NER_Tagger

Runtime error

File size: 1,340 Bytes

06924e0

def join_words(tokens):
    completed_word = ''
    if tokens:
        completed_word = tokens[0]
        for token in tokens[1:]:
            if not (token.isalpha() and completed_word[-1].isalpha()):
                completed_word += token
            else:
                completed_word += ' ' + token
    return completed_word


def ent_merge(data_frame):
    ent_result = []
    current_entity_tokens = []
    current_entity = None
    for token, tag in zip(data_frame['words'], data_frame["entities"]):
        if tag.startswith("B-"):
            if current_entity is not None:
                ent_result.append((join_words(current_entity_tokens), current_entity))
            current_entity = tag[2:]
            current_entity_tokens = [token]
        elif current_entity_tokens is not None and (
                tag == "I-" + str(current_entity) or tag == "L-" + str(current_entity)):
            current_entity_tokens.append(token)
        else:
            ent_result.append((join_words(current_entity_tokens), current_entity))
            ent_result.append([token, tag[2:]])
            current_entity_tokens = []
            current_entity = None
    if current_entity is not None:
        ent_result.append((join_words(current_entity_tokens), current_entity))
        # ent_result = sorted(ent_result)
    return ent_result