Dagobert42's picture
small edits and comments
0c62ce7
colors = {
'GeneOrGeneProduct': '#aad4aa', # Pastel green
'DiseaseOrPhenotypicFeature': '#f8b400', # Pastel orange
'ChemicalEntity': '#a4c2f4', # Pastel blue
'OrganismTaxon': '#ffb6c1', # Pastel pink
'SequenceVariant': '#e2b0ff', # Pastel purple
'CellLine': '#ffcc99' # Pastel peach
}
def annotate_sentence(sentence, predictions):
output = []
i = 0
for p in predictions:
# Add initial tokens
if sentence[i:p['start']] != '':
output.append(sentence[i:p['start']])
# Add prediction tokens
if sentence[p['start']:p['end']] != '':
output.append(
(sentence[p['start']:p['end']], p['entity_group'], colors[p['entity_group']])
if p['entity_group'] != 'null'
else sentence[p['start']:p['end']]
)
i = p['end']
# Add any trailing tokens
if sentence[i:] != '':
output.append(sentence[p['end']:])
return output