NER_skripsi / format_entity.py
ki-ki13
change a bit
210b009
def format_entities(tokens, labels):
entities = []
current_entity = {"text": "", "label": None}
for token, label in zip(tokens, labels):
if label.startswith('B-') or label.startswith('I-'):
current_label = label[2:] # Remove 'B-' or 'I-' prefix
if current_entity["label"] == current_label:
current_entity["text"] += f" {token}"
else:
if current_entity["label"]:
entities.append(current_entity)
current_entity = {"text": token, "label": current_label}
else:
# Include tokens without 'B-' or 'I-' prefixes
if current_entity["label"]:
entities.append(current_entity)
current_entity = {"text": "", "label": None}
else:
entities.append({"text": token, "label": None})
# Add the last entity if any
if current_entity["label"]:
entities.append(current_entity)
else:
entities.append({"text": current_entity["text"], "label": None})
return entities