def get_ner_spans_from_annotations(annotated_labels): spans = [] for entity_type, spans_list in annotated_labels.items(): for spans_dict in spans_list: ner_span_dict = { **spans_dict, "label": entity_type, "span_text": spans_dict["label"], } spans.append(ner_span_dict) return spans def get_highlight_spans_from_ner_spans(ner_spans, parent_text): if not ner_spans: return [parent_text] output_list = [] prev_span_end = 0 # output_list = [parent_text[ner_spans[0]["start"]]] for span in ner_spans: output_list.append(parent_text[prev_span_end : span["start"]]) tup = (span["span_text"], span["label"]) output_list.append(tup) prev_span_end = span["end"] if prev_span_end != len(parent_text): output_list.append(parent_text[prev_span_end:]) return output_list