#!/usr/bin/env python3 # load NERDA functionality from NERDA.models import NERDA ### # Load finetuned model, based on microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract ## def load_finetuned_model(): # model file finetuned_model = 'model/trained_ner_model.bin' # the IOB tagging scheme: words that are beginning of named entities # are tagged with 'B-' and words 'inside' (=continuations of) # named entities are tagged with 'I-'. tag_scheme = [ 'B-Patient', 'I-Patient', 'B-Intervention', 'I-Intervention', 'B-Control', 'I-Control', 'B-Outcome', 'I-Outcome' ] # outside text tag_outside = 'O' # base transformer model transformer_name = 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract' # max length of abstract max_len = 512 # define model model = NERDA( tag_scheme = tag_scheme, tag_outside = tag_outside, max_len = max_len, transformer = transformer_name ) # load from file model.load_network_from_file( finetuned_model ) return( model ) # load finetuned model model = load_finetuned_model() # example text text = 'Long-term outcomes after repeat doses of antenatal corticosteroids. Previous trials have shown that repeat courses of antenatal corticosteroids improve some neonatal outcomes in preterm infants but reduce birth weight and increase the risk of intrauterine growth restriction. We report long-term follow-up results of children enrolled in a randomized trial comparing single and repeat courses of antenatal corticosteroids. Women at 23 through 31 weeks of gestation who remained pregnant 7 days after an initial course of corticosteroids were randomly assigned to weekly courses of betamethasone, consisting of 12 mg given intramuscularly and repeated once at 24 hours, or an identical-appearing placebo. ' # predict PICO labels pred = model.predict_text( text ) # print( pred[ 1 ] ) -> # # [ # ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], # ['O', 'O', 'O', 'O', 'O', 'B-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], # ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Intervention', 'O', 'B-Control', 'I-Control', 'I-Control', 'I-Intervention', 'I-Intervention', 'I-Intervention'], # ['B-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'O', 'O', 'O', 'O', 'B-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'O', 'O', 'I-Control', 'I-Control', 'I-Control'] # ]