def format_entities(tokens, labels): entities = [] current_entity = {"text": "", "label": None} for token, label in zip(tokens, labels): if label.startswith('B-') or label.startswith('I-'): current_label = label[2:] # Remove 'B-' or 'I-' prefix if current_entity["label"] == current_label: current_entity["text"] += f" {token}" else: if current_entity["label"]: entities.append(current_entity) current_entity = {"text": token, "label": current_label} else: # Include tokens without 'B-' or 'I-' prefixes if current_entity["label"]: entities.append(current_entity) current_entity = {"text": "", "label": None} else: entities.append({"text": token, "label": None}) # Add the last entity if any if current_entity["label"]: entities.append(current_entity) else: entities.append({"text": current_entity["text"], "label": None}) return entities