#!/usr/bin/env python3 | |
# load NERDA functionality | |
from NERDA.models import NERDA | |
### | |
# Load finetuned model, based on microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract | |
## | |
def load_finetuned_model(): | |
# model file | |
finetuned_model = 'model/trained_ner_model.bin' | |
# the IOB tagging scheme: words that are beginning of named entities | |
# are tagged with 'B-' and words 'inside' (=continuations of) | |
# named entities are tagged with 'I-'. | |
tag_scheme = [ 'B-Patient', | |
'I-Patient', | |
'B-Intervention', | |
'I-Intervention', | |
'B-Control', | |
'I-Control', | |
'B-Outcome', | |
'I-Outcome' ] | |
# outside text | |
tag_outside = 'O' | |
# base transformer model | |
transformer_name = 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract' | |
# max length of abstract | |
max_len = 512 | |
# define model | |
model = NERDA( | |
tag_scheme = tag_scheme, | |
tag_outside = tag_outside, | |
max_len = max_len, | |
transformer = transformer_name ) | |
# load from file | |
model.load_network_from_file( finetuned_model ) | |
return( model ) | |
# load finetuned model | |
model = load_finetuned_model() | |
# example text | |
text = 'Long-term outcomes after repeat doses of antenatal corticosteroids. Previous trials have shown that repeat courses of antenatal corticosteroids improve some neonatal outcomes in preterm infants but reduce birth weight and increase the risk of intrauterine growth restriction. We report long-term follow-up results of children enrolled in a randomized trial comparing single and repeat courses of antenatal corticosteroids. Women at 23 through 31 weeks of gestation who remained pregnant 7 days after an initial course of corticosteroids were randomly assigned to weekly courses of betamethasone, consisting of 12 mg given intramuscularly and repeated once at 24 hours, or an identical-appearing placebo. ' | |
# predict PICO labels | |
pred = model.predict_text( text ) | |
# print( pred[ 1 ] ) -> | |
# | |
# [ | |
# ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], | |
# ['O', 'O', 'O', 'O', 'O', 'B-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], | |
# ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Intervention', 'O', 'B-Control', 'I-Control', 'I-Control', 'I-Intervention', 'I-Intervention', 'I-Intervention'], | |
# ['B-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'O', 'O', 'O', 'O', 'B-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'O', 'O', 'I-Control', 'I-Control', 'I-Control'] | |
# ] | |