from dataclasses import dataclass from span_dataclass_converters import get_ner_spans_from_annotations @dataclass class PredefinedExample: text: str gt_labels: dict # gt_spans: list # predictions: list @property def gt_spans(self): return sorted( get_ner_spans_from_annotations(self.gt_labels), key=lambda span: span["start"], ) @property def predictions(self): return [self.gt_spans] small_example = PredefinedExample( text="The patient was diagnosed with bronchitis and was prescribed a mucolytic", gt_labels={ "Disease": [ {"start": 31, "end": 41, "label": "bronchitis"}, ], "Drug": [ {"start": 63, "end": 72, "label": "mucolytic"}, ], }, ) big_example = PredefinedExample( text=( "The patient was experiencing stomach pain and flu like symptoms for 3 days. " "Upon investigation, the chest xray revealed acute bronchitis disease. " "The patient was asked to take rest for a week and was prescribed a mucolytic along with paracetamol for body pains." ), gt_labels={ "Disease": [ {"start": 120, "end": 144, "label": "acute bronchitis disease"}, ], "Drug": [ {"start": 213, "end": 222, "label": "mucolytic"}, {"start": 234, "end": 245, "label": "paracetamol"}, ], "Symptoms": [ {"start": 29, "end": 41, "label": "stomach pain"}, {"start": 46, "end": 63, "label": "flu like symptoms"}, ], }, ) EXAMPLES = [small_example, big_example]