pubmed-pico / example.py
wmotte's picture
Test results
2052ba5
#!/usr/bin/env python3
# load NERDA functionality
from NERDA.models import NERDA
###
# Load finetuned model, based on microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract
##
def load_finetuned_model():
# model file
finetuned_model = 'model/trained_ner_model.bin'
# the IOB tagging scheme: words that are beginning of named entities
# are tagged with 'B-' and words 'inside' (=continuations of)
# named entities are tagged with 'I-'.
tag_scheme = [ 'B-Patient',
'I-Patient',
'B-Intervention',
'I-Intervention',
'B-Control',
'I-Control',
'B-Outcome',
'I-Outcome' ]
# outside text
tag_outside = 'O'
# base transformer model
transformer_name = 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract'
# max length of abstract
max_len = 512
# define model
model = NERDA(
tag_scheme = tag_scheme,
tag_outside = tag_outside,
max_len = max_len,
transformer = transformer_name )
# load from file
model.load_network_from_file( finetuned_model )
return( model )
# load finetuned model
model = load_finetuned_model()
# example text
text = 'Long-term outcomes after repeat doses of antenatal corticosteroids. Previous trials have shown that repeat courses of antenatal corticosteroids improve some neonatal outcomes in preterm infants but reduce birth weight and increase the risk of intrauterine growth restriction. We report long-term follow-up results of children enrolled in a randomized trial comparing single and repeat courses of antenatal corticosteroids. Women at 23 through 31 weeks of gestation who remained pregnant 7 days after an initial course of corticosteroids were randomly assigned to weekly courses of betamethasone, consisting of 12 mg given intramuscularly and repeated once at 24 hours, or an identical-appearing placebo. '
# predict PICO labels
pred = model.predict_text( text )
# print( pred[ 1 ] ) ->
#
# [
# ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
# ['O', 'O', 'O', 'O', 'O', 'B-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
# ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Intervention', 'O', 'B-Control', 'I-Control', 'I-Control', 'I-Intervention', 'I-Intervention', 'I-Intervention'],
# ['B-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'I-Patient', 'O', 'O', 'O', 'O', 'B-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'I-Intervention', 'O', 'O', 'I-Control', 'I-Control', 'I-Control']
# ]