colors = { 'GeneOrGeneProduct': '#aad4aa', # Pastel green 'DiseaseOrPhenotypicFeature': '#f8b400', # Pastel orange 'ChemicalEntity': '#a4c2f4', # Pastel blue 'OrganismTaxon': '#ffb6c1', # Pastel pink 'SequenceVariant': '#e2b0ff', # Pastel purple 'CellLine': '#ffcc99' # Pastel peach } def annotate_sentence(sentence, predictions): output = [] i = 0 for p in predictions: # Add initial tokens if sentence[i:p['start']] != '': output.append(sentence[i:p['start']]) # Add prediction tokens if sentence[p['start']:p['end']] != '': output.append( (sentence[p['start']:p['end']], p['entity_group'], colors[p['entity_group']]) if p['entity_group'] != 'null' else sentence[p['start']:p['end']] ) i = p['end'] # Add any trailing tokens if sentence[i:] != '': output.append(sentence[p['end']:]) return output