from transformers import pipeline baseline_classifier = pipeline("ner", model="Dagobert42/biored-finetuned", aggregation_strategy="simple" ) augmented_classifier = pipeline("ner", model="Dagobert42/biored-augmented", aggregation_strategy="simple" ) def annotate_sentence(sentence, predictions): colors = { 'null': '#bfbfbf', # Pastel gray 'GeneOrGeneProduct': '#aad4aa', # Pastel green 'DiseaseOrPhenotypicFeature': '#f8b400', # Pastel orange 'ChemicalEntity': '#a4c2f4', # Pastel blue 'OrganismTaxon': '#ffb6c1', # Pastel pink 'SequenceVariant': '#e2b0ff', # Pastel purple 'CellLine': '#ffcc99' # Pastel peach } output = [] i = 0 for p in predictions: if sentence[i:p['start']] != '': output.append(sentence[i:p['start']]) output.append((p['word'], p['entity_group'], colors[p['entity_group']])) i = p['end'] if sentence[p['end']:]: output.append(sentence[p['end']:]) return output