Kaelan commited on
Commit
f5e3fa7
1 Parent(s): fb96d28

initial commit

Browse files
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from annotated_text import annotated_text
3
+ import pandas as pd
4
+ import yaml
5
+ import os
6
+
7
+ from src.negation import *
8
+ from src.app_utils import *
9
+ from src.inference import inference
10
+ from src.trainers import eval_spacy
11
+
12
+ #### Loading configuration and models ####
13
+
14
+ with open('./st_config.yaml', "r") as yamlfile:
15
+ args = yaml.load(yamlfile, Loader=yaml.FullLoader)
16
+
17
+ if os.path.exists(args['model_dir']):
18
+ model_names_dir = os.listdir(args['model_dir'])
19
+ else:
20
+ model_names_dir = []
21
+
22
+ model_names = model_names_dir + args['default_models'] if args['default_models'] is not None else model_names_dir
23
+
24
+ st.title('NER Visualizer')
25
+
26
+
27
+
28
+ ##################################
29
+ #### sidebar (Chose Model) ######
30
+ ##################################
31
+ model_name= st.sidebar.selectbox("Select a model", options=model_names)
32
+ print(model_name)
33
+ if len(model_names) > 0:
34
+ models = load_models(model_names,args, model_names_dir)
35
+ print(models)
36
+ selected_model = models[model_name]
37
+ print(selected_model)
38
+
39
+ ##################################
40
+ #### sidebar (Chose Example) ####
41
+ ##################################
42
+ st.sidebar.markdown('###')
43
+ if args['examples'] is not None:
44
+ chosen_note = st.sidebar.selectbox("Select an example text", options=args['examples'].keys())
45
+ else:
46
+ chosen_note = None
47
+
48
+ if chosen_note == "radiology_dataset":
49
+ text_input = pd.read_csv("./eval_35.csv", converters={'entities': ast.literal_eval})
50
+ text_input = text_input.to_dict('records')
51
+
52
+
53
+ # set colors for each entity
54
+ if len(model_names) > 0:
55
+ ents_available = selected_model.get_pipe('ner').labels
56
+ print(ents_available)
57
+ ent_colors_map = dict(map(lambda i,j : (i,j) , ents_available,args['colors_palette'][:len(ents_available)]))
58
+
59
+
60
+ ##################
61
+ ### Text area ###
62
+ ##################
63
+ if chosen_note != "radiology_dataset":
64
+ text_input = st.text_area("Type notes in the box below",
65
+ value=args['examples'][chosen_note] if args['examples'] is not None else '')
66
+ st.markdown("---")
67
+
68
+ ############################
69
+ ### Side bar (Load Files)###
70
+ ############################
71
+ st.sidebar.info('For csv & json files, name the text columns to be infered as "text". Annotated labels as "entities" Format of json text as below')
72
+ st.sidebar.json([{"text":"example","entities":[[5,6,"do"],[8,11,"dx"]]},{"text":"example2","entities":[[5,6,"do"],[8,11,"dx"]]}])
73
+ uploaded_file = st.sidebar.file_uploader("Upload a file", type=["csv","json","pdf", "txt"])
74
+ text_input = process_files(uploaded_file, text_input)
75
+
76
+ #################################
77
+ ### Side bar (Select Entities)###
78
+ #################################
79
+ selected_entities = st.sidebar.multiselect(
80
+ "Select the entities you want to view",
81
+ options=ents_available if len(model_names)> 0 else [],
82
+ default=ents_available if len(model_names)> 0 else [],
83
+ )
84
+
85
+ ##########################
86
+ ### Text Area (Slider)###
87
+ ##########################
88
+ if (len(text_input)> 1) & (isinstance(text_input,(list,dict))):
89
+ sample = st.slider('Select Example', min_value=1, max_value=len(text_input))
90
+ else:
91
+ sample = None
92
+
93
+
94
+
95
+ # Process documents to tokens
96
+ if len(model_names)>0:
97
+ infer_input = text_input[sample-1]["text"] if sample is not None else text_input
98
+ doc = selected_model(infer_input)
99
+
100
+ textcol_negate, textcol_compare = st.columns([1, 1])
101
+
102
+ # checkboxes for negation
103
+ negate = textcol_negate.checkbox('Check for Negation')
104
+
105
+ ##########################################
106
+ ### Checkboxes for Compare with labels ###
107
+ ##########################################
108
+ if (isinstance(text_input,(dict,list))):
109
+ if 'entities' in text_input[0].keys():
110
+ state_compare = False
111
+ compare = textcol_compare.checkbox('Compare between predictions and labels',disabled=state_compare)
112
+ else:
113
+ state_compare, compare = True, False
114
+ else:
115
+ state_compare, compare = True, False
116
+
117
+ ###############################
118
+ ### Processing for negation ###
119
+ ###############################
120
+ if negate:
121
+ neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)}
122
+ neg = negation(selected_model, neg_ent)
123
+ doc = infer_negation(neg,selected_model,infer_input,doc)
124
+ selected_entities += ['NEG']
125
+ ent_colors_map.update({'NEG': '#C7C7C7'})
126
+
127
+ ################################
128
+ ### Processing for Comparision##
129
+ ################################
130
+ if compare & (isinstance(text_input,(dict,list))):
131
+ infer_input = text_input[sample-1]
132
+ tokens_compare = process_text_compare(infer_input,selected_entities,colors=ent_colors_map)
133
+
134
+ tokens = process_text(doc, selected_entities,colors=ent_colors_map)
135
+
136
+ st.markdown('##')
137
+ # Display results
138
+ st.markdown('#### Predictions')
139
+ annotated_text(*tokens)
140
+
141
+ if compare & (isinstance(text_input,(dict,list))):
142
+ st.markdown('#### Labels')
143
+ annotated_text(*tokens_compare)
144
+
145
+ st.markdown("---")
146
+ data = pd.DataFrame.from_dict([{'label': entity.label_, 'text': entity.text, 'start': entity.start, 'end': entity.end} \
147
+ for entity in doc.ents])
148
+ if data.shape[1]>0:
149
+ st.table(data['label'].value_counts())
150
+ myexpander = st.expander('Details on text')
151
+ myexpander.table(data)
152
+
153
+ ###################################
154
+ #### Inference on whole dataset####
155
+ ###################################
156
+ infer_whole_dataset = st.checkbox('Inference on whole dataset')
157
+ if (isinstance(text_input,(dict,list))) & (infer_whole_dataset):
158
+ texts = []
159
+ for text in text_input:
160
+ texts.append(text['text'])
161
+
162
+ st.markdown('### Prediction on whole dataset')
163
+ inference_data = inference(selected_model,texts)
164
+
165
+ ### Applying negation to whole dataset
166
+ if negate:
167
+ neg_ent = {"ent_types":list(selected_model.get_pipe('ner').labels)}
168
+ neg = negation(selected_model, neg_ent)
169
+ docs = selected_model.pipe(texts,batch_size=8)
170
+
171
+ records = []
172
+ for no,doc in enumerate(docs):
173
+ doc = infer_negation(neg,selected_model,texts[no],doc)
174
+ if len(doc.ents)>0:
175
+ records.append([{'id':no+1,'text':doc.text,'span': entity.text,
176
+ 'entity': entity.label_, 'start': entity.start, 'end': entity.end}
177
+ for entity in doc.ents])
178
+ else:
179
+ records.append([{'id':no+1,'text':doc.text,'span': None,
180
+ 'entity': None, 'start':None, 'end': None}])
181
+
182
+ inference_data = pd.DataFrame.from_dict(sum(records,[])).set_index(['text','id'])
183
+
184
+ st.download_button(
185
+ label="Download Prediction as CSV",
186
+ data=inference_data.to_csv().encode('utf-8'),
187
+ file_name='inference_data.csv',
188
+ mime='text/csv',
189
+ )
190
+ ########################################
191
+ ### Expander for dataframe and report###
192
+ ########################################
193
+ report_expander = st.expander('Report on Evaluation Results')
194
+ results_metrics = eval_spacy(selected_model,text_input)
195
+ overall_score = pd.DataFrame.from_dict({'Type':['Overall'],'Precision': [results_metrics['ents_p']],
196
+ 'Recall': [results_metrics['ents_r']],
197
+ 'F1': [results_metrics['ents_f']]})
198
+ overall_score = overall_score.set_index('Type')
199
+ entities_score = pd.DataFrame.from_dict(results_metrics['ents_per_type']).T
200
+ entities_score = entities_score.rename(columns={'p':'Precision','r':'Recall','f':'F1'})
201
+ report_expander.table(overall_score)
202
+ report_expander.table(entities_score)
203
+
204
+ df_expander = st.expander('Inference Table')
205
+ df_expander.write(inference_data.to_html(), unsafe_allow_html=True)
206
+ #df_expander.table(inference_data)
eval_35.csv ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,entities
2
+ A 30 year old female patient with a past medical history of asthma morbid obesity BMI 39 5 and hypertension on an angiotensin converting enzyme ACE inhibitor presented with a 6 day history of fever Tmax 38 9°C cough and shortness of breath Laboratory studies were remarkable for lymphopenia 0 6×103 µL normal range 0 9×103 µL – 3 3×103 µL elevated serum creatinine 1 3 mg dL normal range 0 6 mg dL – 1 2 mg dL elevated aspartate aminotransferase 73 IU L normal range 13 IU L – 39 IU L elevated c reactive protein 8 6 mg dL normal range 0 – 1 mg dL elevated procalcitonin 2 39 ng mL normal 0 1 ng mL elevated interleukin 6 197 pg mL normal ≤ 5 pg mL elevated cardiac troponin I 142 ng L normal 15 ng L and mildly elevated d dimer 570 ng mL normal 500 ng mL She reported a history of contact with a COVID positive co worker and no recent travel Influenza A B RT PCR were negative She developed acute respiratory distress and was emergently intubated Prone portable PA chest X ray on second day of admission demonstrates persistent airspace opacities cardiomegaly and haziness of the cardiac borders Fig 3 ,"[[60, 66, 'DX'], [99, 111, 'DX'], [198, 203, 'DX'], [219, 224, 'DX'], [230, 249, 'DX'], [290, 301, 'DX'], [846, 851, 'DX'], [943, 969, 'DX'], [1070, 1099, 'EXIST_WORSEN'], [1101, 1113, 'EXIST_WORSEN'], [1118, 1149, 'EXIST_WORSEN']]"
3
+ A 29 year old immunocompromised female patient with a 3 day history of cough and fever Past medical history includes severe ulcerative colitis treated with Tofacitinib The patient was admitted to the hospital ward and discharged one week after admission with complete recovery Chest X ray Increase of parenchymal opacity in right lower lobe ,"[[71, 76, 'DX'], [81, 86, 'DX'], [261, 278, 'ABST_RECOVER'], [305, 344, 'EXIST_WORSEN']]"
4
+ 79 year old woman who presented with chest pain cough and fever for 3 days Coronavirus disease COVID 19 had recently been diagnosed in two of her household members Patient developed acute respiratory distress syndrome within subsequent few days and died 11 days after admission Courtesy of Song F Shanghai Public Health Clinical Center Shanghai China show ground glass opacification GGO on day 1 ,"[[37, 47, 'DX'], [49, 54, 'DX'], [60, 65, 'DX'], [78, 89, 'DX'], [99, 107, 'DX'], [188, 214, 'DX'], [370, 396, 'EXIST_WORSEN'], [398, 401, 'EXIST_WORSEN']]"
5
+ 79 year old woman who presented with chest pain cough and fever for 3 days Coronavirus disease COVID 19 had recently been diagnosed in two of her household members Patient developed acute respiratory distress syndrome within subsequent few days and died 11 days after admission Courtesy of Song F Shanghai Public Health Clinical Center Shanghai China obtained on day 4 show GGO has progressed to airspace consolidation ,"[[37, 47, 'DX'], [49, 54, 'DX'], [60, 65, 'DX'], [78, 89, 'DX'], [99, 107, 'DX'], [188, 214, 'DX'], [388, 391, 'EXIST_WORSEN'], [410, 432, 'EXIST_WORSEN']]"
6
+ History 73 year old male with aorta insufficiency and pacemaker was admitted to the hospital with fever and coughing after being in an area with Corona X findings day 1 normal findings day 4 bilateral consolidations intubated day 8 bilateral consolidation day 13 extubation PCR positive Follow up Extubated after 9 days of mechanical ventilation ,"[[98, 103, 'DX'], [108, 116, 'DX'], [195, 219, 'EXIST_WORSEN'], [238, 261, 'EXIST_WORSEN']]"
7
+ 50 year old man was sent to the fever clinic for fever chills cough fatigue and shortness of breath He reported the travel history of Wuhan from January 8 to 12 and the first symptoms appeared on January 14 the first day of onset manifested as mild chills and dry cough But the patient continued to work until going to the hospital on January 21 Figure 1 The patient underwent a chest radiograph and a pharyngeal swab in the hospital The chest radiograph showed multiple patchy images of both lungs Appendix p2 On January 22 the 9th day of onset He was immediately transferred to the isolation ward and oxygen was given through the mask for oxygen support Interferon alpha 2b aerosol inhalation of 5 million U bid and lopinavir ritonavir 500 mg bid Po were used as antiviral treatment and moxifloxacin 0 4 g qd ivgtt to prevent secondary infections Given its severe shortness of breath and hypoxemia methylprednisolone 80 mg bid Ivgtt was given to reduce lung inflammation The laboratory test results are listed in the appendix p4 After receiving medication the patient's body temperature dropped from 39 0 ° C to 36 4 ° C ,"[[32, 37, 'DX'], [49, 54, 'DX'], [56, 62, 'DX'], [64, 69, 'DX'], [84, 103, 'DX'], [258, 264, 'DX'], [269, 278, 'DX'], [476, 512, 'EXIST_WORSEN'], [899, 925, 'DX'], [930, 939, 'DX']]"
8
+ chest film normal on admission to hospital,"[[0, 20, 'ABST_RECOVER']]"
9
+ patient on mechanical ventilation with bilateral consolidations on the chest film,"[[34, 76, 'EXIST_WORSEN']]"
10
+ Chest film of a 83 year old male with mitral insufficiency pulmonary hypertension and atrial fibrillation with COVID 19 infection Ground glass opacification and consolidation in right upper lobe and left lower lobe arrows ,"[[60, 82, 'DX'], [112, 120, 'DX'], [132, 216, 'EXIST_WORSEN']]"
11
+ Within a few hours after presentation on the ER the patient became hypoxic and was treated with mechanical ventilation Later that day the patient was transferred to another hospital History 64 year old male with fever and coughing for 2 weeks after a skiing holiday with his family CT findings Widespread GGO in all lobes Crazy paving blue arrows Vascular enlargement black arrow Subpleural bands with retraction yellow arrows Consolidation and bronchiectasis posteriorly in the lower lobes CORADS 5 very high suspicion of COVID 19 PCR positive,"[[67, 74, 'DX'], [215, 220, 'DX'], [225, 233, 'DX'], [299, 326, 'EXIST_WORSEN'], [356, 376, 'EXIST_WORSEN'], [392, 424, 'EXIST_WORSEN'], [460, 505, 'EXIST_WORSEN'], [541, 549, 'DX'], [551, 563, 'DX']]"
12
+ 83 year old male with mitral insufficiency and pulmonary hypertension was diagnosed with COVID 19 infection The chest film shows consolidation in the right upper lobe green arrow and probably some consolidation in the left lower lobe The patient decided not to be treat with mechanical ventilation and died four days later ,"[[47, 69, 'DX'], [89, 97, 'DX'], [130, 167, 'EXIST_WORSEN'], [200, 236, 'EXIST_WORSEN']]"
13
+ Day 1 normal findings History 73 year old male with aorta insufficiency and pacemaker was admitted to the hospital with fever and coughing after being in an area with COVID 19 PCR positive Follow up extubated after 9 days of mechanical ventilation ,"[[55, 74, 'DX'], [123, 128, 'DX'], [133, 141, 'DX'], [170, 178, 'DX'], [180, 192, 'DX']]"
14
+ Day 4 bilateral consolidations intubated History 73 year old male with aorta insufficiency and pacemaker was admitted to the hospital with fever and coughing after being in an area with COVID 19 PCR positive Follow up extubated after 9 days of mechanical ventilation ,"[[7, 31, 'EXIST_WORSEN'], [74, 93, 'DX'], [142, 147, 'DX'], [152, 160, 'DX'], [189, 197, 'DX'], [199, 211, 'DX']]"
15
+ Day 8 bilateral consolidation History 73 year old male with aorta insufficiency and pacemaker was admitted to the hospital with fever and coughing after being in an area with COVID 19 PCR positive Follow up extubated after 9 days of mechanical ventilation ,"[[7, 30, 'EXIST_WORSEN'], [63, 82, 'DX'], [131, 136, 'DX'], [141, 149, 'DX'], [178, 186, 'DX'], [188, 200, 'DX']]"
16
+ Day 13 extubation History 73 year old male with aorta insufficiency and pacemaker was admitted to the hospital with fever and coughing after being in an area with COVID 19 PCR positive Follow up extubated after 9 days of mechanical ventilation ,"[[51, 70, 'DX'], [119, 124, 'DX'], [129, 137, 'DX'], [166, 174, 'DX'], [176, 188, 'DX']]"
17
+ 72 year old female came to the hospital with sore throat cough dyspnea anosmia and fever for 5 days Physical exam revealed no pathological findings Biochemistry showed lymphopenia decreased prothrombin activity c reactive protein increase and hypoxemia RT PCR was positive for COVID 19 No co morbidities or risk factors were communicated AP chest X Ray a reticular nodular pattern in both lungs mostly in the right one was observed In addition mild opacities in the superior middle and lower right lobes were depicted ,"[[45, 56, 'DX'], [58, 63, 'DX'], [65, 72, 'DX'], [74, 81, 'DX'], [86, 91, 'DX'], [173, 184, 'DX'], [250, 259, 'DX'], [285, 293, 'DX'], [366, 405, 'EXIST_WORSEN'], [459, 518, 'EXIST_WORSEN']]"
18
+ A 72 year old female patient with a history of ischaemic stroke ocular myasthenia arterial hyper tension and hypercholesterolaemia was admitted to the emergency department because of dyspnoea She reported having fever and cough for a week At admission her pulse oximeter saturation was 84 the tympanic temperature was 37 6 °C Laboratory findings revealed elevated C reactive protein 19 69 mg dL normal range 0 01 0 5 mg dL and mild lymphopenia 0 7X10 3 mm 3 normal range 1 0 4 0 X10 3 mm 3 The patient also underwent non contrast chest CT AP chest X ray obtained on the second day of admission demonstrated diffuse bilateral opacities tracheal cannula na sogastric tube internal jugular CVC,"[[186, 194, 'DX'], [216, 221, 'DX'], [226, 231, 'DX'], [444, 455, 'DX'], [624, 651, 'EXIST_WORSEN']]"
19
+ A 74 year old woman with history of hypertension and heart disease who had been discharged 10 days before knee prosthetic surgery was admitted with 4 day history of fever dry cough and dyspnoea She had not left home since discharge and no family member was affected Analysis revealed lymphopenia elevation of C reactive protein and a positive RT PCR The patient was admitted to the intensive care unit with a favourable course Chest X ray at admission showed diffuse reticular pattern with small opacities in both basal regions,"[[36, 48, 'DX'], [53, 66, 'DX'], [167, 172, 'DX'], [174, 183, 'DX'], [188, 196, 'DX'], [289, 300, 'DX'], [468, 536, 'EXIST_WORSEN']]"
20
+ A 74 year old woman with history of hypertension and heart disease who had been discharged 10 days before knee prosthetic surgery was admitted with 4 day history of fever dry cough and dyspnoea She had not left home since discharge and no family member was affected Analysis revealed lymphopenia elevation of C reactive protein and a positive RT PCR The patient was admitted to the intensive care unit with a favourable course Chest X ray on the second day showed diffuse reticular pattern and increased density in both lungs,"[[36, 48, 'DX'], [53, 66, 'DX'], [167, 172, 'DX'], [174, 183, 'DX'], [188, 196, 'DX'], [289, 300, 'DX'], [473, 534, 'EXIST_WORSEN']]"
21
+ A 74 year old woman with history of hypertension and heart disease who had been discharged 10 days before knee prosthetic surgery was admitted with 4 day history of fever dry cough and dyspnoea She had not left home since discharge and no family member was affected Analysis revealed lymphopenia elevation of C reactive protein and a positive RT PCR The patient was admitted to the intensive care unit with a favourable course Chest x ray on the eighth day showed improvement with decreased of high density and reticular pattern more evident in the upper left lobe ,"[[36, 48, 'DX'], [53, 66, 'DX'], [167, 172, 'DX'], [174, 183, 'DX'], [188, 196, 'DX'], [289, 300, 'DX'], [473, 574, 'ABST_RECOVER']]"
22
+ A sixty five year old woman presented to the emergency department with a 5 day history of nausea and diarrhoea and a 2 day onset of non productive cough and asthenia without fever Her husband had similar symptoms and both had no epidemiological context for COVID 19 infection She had type 2 diabetes mellitus arterial hypertension and chronic renal disease Both were positive on RT PCR test for COVID 19 Anteroposterior chest x ray of a patient infected with COVID 19 that shows consolidations,"[[90, 96, 'DX'], [101, 110, 'DX'], [132, 152, 'DX'], [157, 165, 'DX'], [175, 180, 'DX'], [260, 268, 'DX'], [401, 409, 'DX'], [466, 474, 'DX'], [480, 500, 'EXIST_WORSEN']]"
23
+ showing interstitial alveolar hypodiaphania of the middle basal field on the left and basal seat on the right which is associated with pleural veiling on the left ,"[[30, 81, 'EXIST_WORSEN'], [136, 163, 'EXIST_WORSEN']]"
24
+ Softened confluent densities with peripheral distribution with associated interstitial weft thickening No pleural effusion Thickening with frosted glass with peripheral distribution and associated thickening of the interlobular septa absence of pleural effusion and in the absence of significant ilo mediastinal lymphadenopathies characterize the TC pattern highly suggestive of CoViD 19 then found later with pharyngeal swab ,"[[9, 57, 'EXIST_WORSEN'], [74, 102, 'EXIST_WORSEN'], [104, 123, 'ABST_RECOVER'], [199, 235, 'EXIST_WORSEN'], [237, 264, 'ABST_RECOVER'], [276, 314, 'ABST_RECOVER'], [315, 332, 'EXIST_WORSEN'], [383, 391, 'DX']]"
25
+ posterior bilateral interstitial engagement at the base of the alveolar consolidation area with air bronchograms and moderate concomitant pleural effusion The X ray examination shows nuanced parenchymal thickening in the middle and lower field in the right hemithorax and in the middle field on the left ,"[[64, 86, 'EXIST_WORSEN'], [97, 113, 'EXIST_WORSEN'], [118, 155, 'EXIST_WORSEN'], [185, 215, 'EXIST_WORSEN']]"
26
+ Fever cough and shortness of breath on arrival patient saturation of oxygen was 75 There is peripheral patchy air space opacification seen in both lung lower zones with diffuse ground glass haze bilaterally This is the initial plain film raising suspicion of COVID 19 pneumonia RT PCR was sent which turned out to be positive The patient was referred to a COVID 19 dedicated center for further treatment ,"[[0, 5, 'DX'], [7, 12, 'DX'], [17, 36, 'DX'], [95, 160, 'EXIST_WORSEN'], [264, 272, 'DX'], [273, 282, 'DX'], [363, 371, 'DX']]"
27
+ Fever dry cough and dyspnea for few days Multiple peripheral opacifications throughout both lungs ,"[[0, 5, 'DX'], [7, 16, 'DX'], [21, 28, 'DX'], [44, 101, 'EXIST_WORSEN']]"
28
+ Moderate amount of mid zone airspace opacification in both mid zones with a peripheral predominance ,"[[19, 68, 'EXIST_WORSEN']]"
29
+ just stepped down from HDU New oxygen requirements Extensive bilateral airspace opacification in both lungs more pronounced on the right and with relative sparing of the left upper lobe The airspace opacification has a peripheral distribution No pleural effusions ,"[[63, 109, 'EXIST_WORSEN'], [195, 247, 'EXIST_WORSEN'], [250, 270, 'ABST_RECOVER']]"
30
+ ITU admission Endotracheal tube nasogastric tube and right internal jugular lines suitable sited Bilateral airspace opacification persists but it has partially regressed since the prior radiograph ,"[[101, 174, 'ABST_RECOVER']]"
31
+ Lines and tubes suitably sited Minor regression in the appearances of the lungs from the radiograph of 2 days earlier ,"[[33, 81, 'ABST_RECOVER']]"
32
+ increasing oxygen requirements Extubated Positive pressure ventilation mask in use Widespread bilateral airspace opacification in both lungs No longer is the distribution peripheral or sparing the apices No pleural effusions or lobar consolidation ,"[[88, 145, 'EXIST_WORSEN'], [213, 256, 'ABST_RECOVER']]"
33
+ Extubated since the prior radiograph Partial regression of the diffuse lungs changes however air bronchograms are now evident in both upper lobes ,"[[39, 78, 'ABST_RECOVER'], [96, 148, 'EXIST_WORSEN']]"
34
+ Remarkable improvement in appearances since the radiograph 4 days earlier The current appearances of the lungs are nearly normal and better than the day 1 admission appearances ,"[[0, 37, 'ABST_RECOVER'], [105, 132, 'ABST_RECOVER']]"
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.18.1
2
+ keras==2.11.0
3
+ matplotlib==3.5.0
4
+ nltk==3.8.1
5
+ numpy==1.24.2
6
+ pandas==1.5.3
7
+ plac==1.3.5
8
+ PyPDF2==3.0.1
9
+ scikit-learn==1.2.1
10
+ spacy==3.4.1
11
+ #spacy==3.5.0
12
+ spacy-transformers==1.1.2
13
+ #spacy-transformers==1.2.2
14
+ spacy-alignments==0.9.0
15
+ spacy-legacy==3.0.12
16
+ spacy-loggers==1.0.4
17
+ spacy-lookups-data==1.0.3
18
+ st-annotated-text
19
+ tensorflow==2.11.0
20
+ tensorflow-estimator==2.11.0
21
+ thinc==8.1.7
22
+ tokenizers
23
+ torch==1.11.0
24
+ tqdm==4.64.1
25
+ transformers
26
+ negspacy==1.0.3
27
+ https://huggingface.co/Kaelan/en_Radiology_ner_bc5cdr_md/resolve/main/en_Radiology_ner_bc5cdr_md-any-py3-none-any.whl
28
+ https://huggingface.co/Kaelan/en_ner_bc5cdr_md/resolve/main/en_ner_bc5cdr_md-any-py3-none-any.whl
src/__pycache__/app_utils.cpython-39.pyc ADDED
Binary file (5.02 kB). View file
 
src/__pycache__/inference.cpython-39.pyc ADDED
Binary file (1.16 kB). View file
 
src/__pycache__/model_utils.cpython-39.pyc ADDED
Binary file (2.79 kB). View file
 
src/__pycache__/negation.cpython-39.pyc ADDED
Binary file (2.51 kB). View file
 
src/__pycache__/trainers.cpython-39.pyc ADDED
Binary file (4.9 kB). View file
 
src/app_utils.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import streamlit as st
3
+ import pandas as pd
4
+
5
+ from PyPDF2 import PdfReader
6
+ from io import StringIO
7
+ import json
8
+ import warnings
9
+ import os
10
+ import ast
11
+
12
+ @st.cache(show_spinner=False, allow_output_mutation=True, suppress_st_warning=True)
13
+ #@st.cache_resource
14
+ def load_models(model_names: list, args: dict, model_names_dir: list)-> dict:
15
+ """
16
+ Check if model name refers to fine tuned models that are located in the model_dir or
17
+ default models native to spacy. Load them according to required methods
18
+
19
+ Parameters:
20
+ model_names: list of model names for inference
21
+ args: dict, configuration parameters
22
+ model_names_dir: list of model that are from the model_names_dir which are fine tuned models
23
+
24
+ Returns:
25
+ model_dict: A dictionary of keys representing the model names and values containing the model.
26
+
27
+ """
28
+ assert (model_names is not None) or (len(model_names)!=0), "No models avaliable"
29
+
30
+ model_dict = {}
31
+ for model_name in model_names:
32
+ print(model_name)
33
+ # loading model from directory
34
+ if model_name in model_names_dir:
35
+ try:
36
+ model_path = os.path.join(args['model_dir'], model_name)
37
+ model = spacy.load(model_path)
38
+ except:
39
+ warnings.warn(f"Path to {model_name} not found")
40
+ else:
41
+ try:
42
+ #load default models from spacy
43
+ model = spacy.load(model_name)
44
+ except:
45
+ warnings.warn(f'Model: {model_name} not found')
46
+ model_dict.update({model_name:model})
47
+ print('Model loaded')
48
+ return model_dict
49
+
50
+ def process_text(doc: spacy, selected_entities: list,colors: list)-> list:
51
+ """
52
+ This function is to process the tokens from the doc type output from spacy models such that tokens that
53
+ are grouped together by their corresponding entities. This allow the st-annotations to be processed
54
+ the tokens for visualization
55
+
56
+ Example: "Hi John, i am sick with cough and flu"
57
+ Entities: person , disease
58
+ Output: [(Hi)(John, 'person', blue)(i am sick)(cough, 'disease', red)(and)(flu, 'disease', red)]
59
+
60
+ Parameters:
61
+ doc : spacy document
62
+ selected_entities : list of entities
63
+ colors : list of colors
64
+
65
+ Returns:
66
+ tokens: list of tuples
67
+ """
68
+ tokens = []
69
+ span = ''
70
+ p_ent = None
71
+ last = len(doc)
72
+ for no, token in enumerate(doc):
73
+ add_span = False
74
+ for ent in selected_entities:
75
+ if (token.ent_type_ == ent) & (ent in selected_entities):
76
+ span += token.text + " "
77
+ p_ent = ent
78
+ add_span = True
79
+ if no+1 == last:
80
+ tokens.append((span, ent, colors[ent],'#464646'))
81
+
82
+ if (add_span is False) & (len(span) >1):
83
+ tokens.append((span, p_ent, colors[p_ent],'#464646'))
84
+ span = ''
85
+ p_ent = None
86
+ if add_span is False:
87
+ tokens.append(" " + token.text + " ")
88
+
89
+ return tokens
90
+
91
+ def process_text_compare(infer_input: dict, selected_entities: list, colors: list)-> list:
92
+ """
93
+ This function is use when user is looking to compare the text annotations between the prediction and
94
+ labels. This function is to process the tokens from evaluation data such that tokens that
95
+ are grouped together by their corresponding entities. This allow the st-annotations to be processed
96
+ the tokens for visualization
97
+
98
+ Example: "Hi John, i am sick with cough and flu"
99
+ Entities: person , disease
100
+ Output: [(Hi)(John, 'person', blue)(i am sick)(cough, 'disease', red)(and)(flu, 'disease', red)]
101
+
102
+ Parameters:
103
+ infer_input : spacy document
104
+ selected_entities : list of entities
105
+ colors : list of colors
106
+
107
+ Returns:
108
+ tokens: list of tuples
109
+
110
+ """
111
+ tokens = []
112
+
113
+ start_=0
114
+ end_= len(infer_input['text'])
115
+
116
+ for start, end, entities in infer_input['entities']:
117
+ if entities in selected_entities:
118
+ # get the span of words that match the entities detected
119
+ span = infer_input['text'][start:end+1]
120
+ # get the span of words that don't match the entities
121
+ if start_ != start:
122
+ b4_span = infer_input['text'][start_:start]
123
+ tokens.append(" " + b4_span + " ")
124
+
125
+ tokens.append((span, entities, colors[entities],'#464646'))
126
+ start_=end
127
+
128
+ if start_ <= end_:
129
+ span = infer_input['text'][start_:end_+1]
130
+ tokens.append(" " + span + " ")
131
+ return tokens
132
+
133
+
134
+ def process_files(uploaded_file, text_input):
135
+ """
136
+ As the app allows uploading files of mutiple files types, at present
137
+ such as json, csv, pdf and txt format.
138
+ The function is to detect what kind of file has been uploaded and process
139
+ the files accordingly.
140
+ If file has been uplaoded it will replace existing text_input
141
+
142
+ Parameters:
143
+ uploaded_file: The UploadedFile class is a subclass of BytesIO, and therefore it is "file-like".
144
+ text_input: str / dict /list
145
+
146
+ Return:
147
+ text_input: list / dict / str
148
+ """
149
+ if uploaded_file is not None:
150
+ if uploaded_file.name[-3:]=='csv':
151
+ # literal_eval to eval a string of list into actual list obj
152
+ text_input = pd.read_csv(uploaded_file, converters={'entities': ast.literal_eval})
153
+ text_input = text_input.to_dict('records')
154
+
155
+ elif uploaded_file.name[-3:]=='son':
156
+ text_input = json.load(uploaded_file)
157
+ else:
158
+ try:
159
+ text_input = ""
160
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
161
+ for line in stringio.readlines():
162
+ text_input += line + "\n"
163
+ #text_input = text_input.decode("utf-8", errors='strict')
164
+ except:
165
+ text_input = []
166
+ reader = PdfReader(uploaded_file)
167
+ count = len(reader.pages)
168
+
169
+ # read all the pages of a pdf
170
+ for i in range(count):
171
+ pages = reader.pages[i]
172
+ text_input.append(pages.extract_text())
173
+ text_input = ''.join(text_input)
174
+
175
+ return text_input
src/inference.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import pandas as pd
3
+
4
+ def inference(model: spacy, texts: list, batch_size: int=8):
5
+ """
6
+ To perform batch inferencing
7
+
8
+ Parameters:
9
+ model: type of model
10
+ texts: input text example
11
+ batch_size: batch size of the inference
12
+
13
+ Returns:
14
+ data: pandas.DataFrame of the output from inference
15
+ """
16
+
17
+ docs = model.pipe(texts,batch_size=batch_size)
18
+
19
+ records = []
20
+ for no, doc in enumerate(docs):
21
+ if len(doc.ents)>0:
22
+ records.append([{'id':no+1,'text':doc.text,'span': entity.text,
23
+ 'entity': entity.label_, 'start': entity.start, 'end': entity.end}
24
+ for entity in doc.ents])
25
+ else:
26
+ records.append([{'id':no+1,'text':doc.text,'span': None,
27
+ 'entity': None, 'start':None, 'end': None}])
28
+
29
+ data = pd.DataFrame.from_dict(sum(records,[])).set_index(['text','id'])
30
+
31
+ return data
32
+
33
+
src/model_utils.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import spacy
5
+ from spacy.training import Example
6
+
7
+ def make_training_doc(nlp: spacy, data: list):
8
+ """
9
+ To convert data into spacy doc type that can be use for training
10
+
11
+ parameters:
12
+ nlp: model
13
+ data: training data
14
+
15
+ returns:
16
+ trainiing_data: list of spacy doc
17
+ """
18
+ training_data = []
19
+ for text, annotations in data:
20
+ doc = nlp.make_doc(text)
21
+ example = Example.from_dict(doc, annotations)
22
+ training_data.append(example)
23
+
24
+ return training_data
25
+
26
+
27
+ def load_model(model: str=None):
28
+ """
29
+ Load the model indicated by model
30
+
31
+ parameters:
32
+ model: str , name of the model to load
33
+
34
+ returns:
35
+ nlp: spacy model object
36
+ optimizer : the optimizer to be use in training
37
+ """
38
+ if model is not None:
39
+ nlp = spacy.load(model) # load existing spaCy model
40
+ print("Loaded model '%s'" % model)
41
+ optimizer = nlp.resume_training()
42
+ else:
43
+ nlp = spacy.blank('en') # create blank Language class
44
+ print("Created blank 'en' model")
45
+ optimizer = nlp.begin_training()
46
+
47
+ return nlp, optimizer
48
+
49
+
50
+ def save_model(model: spacy, output_dir: str):
51
+ """
52
+ Save the model to the output_dir
53
+
54
+ parameters:
55
+ model: spacy model
56
+ output_dir: path
57
+ """
58
+ if output_dir is not None:
59
+ output_dir = Path(output_dir)
60
+ if not output_dir.exists():
61
+ output_dir.mkdir()
62
+ model.to_disk(output_dir)
63
+ print("Saved model to", output_dir)
64
+
65
+ return None
66
+
67
+
68
+ def load_data(args):
69
+ """
70
+ Load training data, evaluation data as well as entities dictionary
71
+
72
+ parameters:
73
+ args: dict, configuration from the config file
74
+
75
+ returns:
76
+ train_dict, entities_dict, eval_dict
77
+
78
+ """
79
+
80
+ assert args['train_dir'] != None, 'indicate path for training directory'
81
+
82
+ # Load the training data
83
+ with open(args['train_dir']) as f:
84
+ train_dict = json.load(f)
85
+ print('Loaded Training Data')
86
+
87
+ try:
88
+ entities_dict=train_dict[args['ent_key']]
89
+ print('Loaded Entities from Training Data')
90
+ except KeyError:
91
+ entities_dict=None
92
+ print('No classes for entities found in data loaded. Proceed to check in ent_dir')
93
+
94
+ # Load entities
95
+ if args['ent_dir'] is not None and entities_dict is None:
96
+ with open(args['ent_dir']) as f:
97
+ entities_dict = json.load(f)
98
+ entities_dict = entities_dict[args['ent_key']]
99
+ print('Loaded Entities from ent_dir')
100
+ elif args['ent_dir'] is None and entities_dict is None:
101
+ assert entities_dict != None, 'No entities found from training_dir & ent_dir'
102
+
103
+ # Load eval data
104
+ if args['eval_dir'] is not None:
105
+ with open(args['eval_dir']) as f:
106
+ eval_dict = json.load(f)
107
+ print('Loaded Evaluating Data')
108
+ else:
109
+ return train_dict, entities_dict, None
110
+
111
+ return train_dict, entities_dict, eval_dict
src/negation.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from negspacy.negation import Negex
3
+ from spacy.matcher import PhraseMatcher
4
+ from spacy.tokens import Span
5
+
6
+ def negation(model: spacy, entities: list):
7
+ """
8
+ Take in the current model pipeline and add in Negation model.
9
+ Add in entities to the negation model
10
+ Parameters:
11
+ model: spacy model
12
+ entities: list of entities
13
+
14
+ Returns:
15
+ model: spacy model with Negation added to the pipeline
16
+ """
17
+ if 'parser' in model.pipe_names:
18
+ model.remove_pipe('parser')
19
+ #nlp.add_pipe(nlp.create_pipe('sentencizer'))
20
+ if 'sentencizer' not in model.pipe_names:
21
+ model.add_pipe('sentencizer')
22
+ #negex = Negex(nlp)
23
+ if 'negex' not in model.pipe_names:
24
+ model.add_pipe('negex',config=entities)
25
+
26
+ return model
27
+
28
+ def infer_negation(neg_model: spacy, model: spacy, text: str ,pred_doc: spacy):
29
+ """
30
+ To match results from the negation model with the results from the model.
31
+ Replace the entity type of the spans or tokens in the predictions doc
32
+ that should be negated with entity type "NEG".
33
+
34
+ Parameters:
35
+ neg_model: spacy negation model
36
+ model: spacy model
37
+ text: text sample
38
+ pred_doc: prediction of the text sample from model
39
+
40
+ Returns:
41
+ pred_doc: spacy doc with all entities that should be negated replaced with the "NEG" entity type
42
+
43
+ """
44
+ doc = neg_model(text)
45
+ results = {'ent':[],'start':[], 'end':[]}
46
+ for e in doc.ents:
47
+ rs = str(e._.negex)
48
+ if rs == "True":
49
+ results['ent'].append(e.text)
50
+ results['start'].append(e.start)
51
+ results['end'].append(e.end)
52
+ print('Negation: ', results)
53
+
54
+ patterns = [model.make_doc(text) for text in results['ent']]
55
+ matcher = PhraseMatcher(model.vocab)
56
+ matcher.add('NEG', None, *patterns)
57
+
58
+ # match all the tokens or span of text detected to be negated with the prediction doc.
59
+ matches = matcher(pred_doc)
60
+ seen_tokens = set()
61
+ new_entities = []
62
+ entities = pred_doc.ents
63
+
64
+ # to get exact matches: not only the span or word matches but also location
65
+ for match in results['start']:
66
+ count = 0
67
+ for match_id, start, end in matches:
68
+ if match == start:
69
+ new_entities.append(Span(pred_doc, start, end, label=match_id))
70
+ entities = [
71
+ e for e in entities if not (e.start < end and e.end > start)
72
+ ]
73
+ seen_tokens.update(range(start, end))
74
+ matches.pop(count)
75
+ count += 1
76
+ pred_doc.ents = tuple(entities) + tuple(new_entities)
77
+
78
+ return pred_doc
src/trainers.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from spacy.util import minibatch, compounding
3
+ from spacy.scorer import Scorer
4
+ from src.model_utils import *
5
+
6
+ import random
7
+ from tqdm import tqdm
8
+
9
+ def train_transformer(config: dict, train_data: list, components: list, iter: int,
10
+ batch_size: int=compounding(4.0, 32.0, 1.001), entities: list=None, eval_data: list=None)-> spacy:
11
+ """
12
+ Finetune a transformer model or resume training from a fine-tuned model.
13
+
14
+ Parameters:
15
+ config: dict, configuration parameters
16
+ train_data: list, contain training data
17
+ components: list, list of components to be trained
18
+ iter: int, number of iterations to train
19
+ batch_size: int, batch size to be used for training
20
+ entities: list of entities to be trained on for NER
21
+ eval_data: list, containing evaluation data
22
+
23
+ Returns:
24
+ nlp : spacy transformer
25
+ losses: list of the losses at every iteration
26
+
27
+
28
+ """
29
+ if config['dir'] is not None:
30
+ nlp = spacy.load(config['dir'])
31
+ optimizer = nlp.resume_training()
32
+ else:
33
+ nlp = spacy.blank("en") # empty English pipeline
34
+ nlp.add_pipe("transformer", config=config['config'])
35
+ for component in components:
36
+ nlp.add_pipe(component)
37
+
38
+ task=nlp.get_pipe(component)
39
+ if ('ner' in components) and (entities is not None):
40
+ for label in entities:
41
+ task.add_label(label)
42
+
43
+ nlp.initialize() # XXX don't forget this step!
44
+ optimizer = nlp.create_optimizer()
45
+
46
+ # convert data into training doc
47
+ train_data_doc = make_training_doc(nlp, train_data)
48
+
49
+ all_losses = []
50
+ for itn in tqdm(range(1,iter+1)):
51
+ print("Starting iteration " + str(itn))
52
+ random.shuffle(train_data)
53
+ losses = {}
54
+ # compounding(4.0, 32.0, 1.001)
55
+ batches = minibatch(train_data_doc, size=batch_size)
56
+ for batch in batches:
57
+ nlp.update(batch, sgd=optimizer,drop=0.2, losses=losses)
58
+
59
+ scores = eval_spacy(nlp, eval_data) if eval_data else eval_spacy(nlp, train_data)
60
+ print("epoch: {} Losses: {} Recall: {} Precision: {} F1: {}". \
61
+ format(itn, str(losses),scores['ents_r'],scores['ents_p'],scores['ents_f']))
62
+
63
+ all_losses.append([losses[component] for component in components])
64
+
65
+ return nlp, all_losses
66
+
67
+
68
+
69
+ def train_spacy(model: spacy, train_data: list, components: list, iter: int,
70
+ batch_size:int=compounding(4.0, 32.0, 1.001), entities: list=None, eval_data:list=None)-> spacy:
71
+ """
72
+ Finetune a spacy model or resume training from a fine-tuned model.
73
+
74
+ Parameters:
75
+ model: str, name of spacy model
76
+ train_data: list, contain training data
77
+ components: list, list of components to be trained
78
+ iter: int, number of iterations to train
79
+ batch_size: int, batch size to be used for training
80
+ entities: list of entities to be trained on for NER
81
+ eval_data: list, containing evaluation data
82
+
83
+ Returns:
84
+ nlp : spacy model
85
+ losses: list of the losses at every iteration
86
+
87
+ """
88
+
89
+ # get model and optimizer
90
+ if model is not None:
91
+ nlp, optimizer = load_model(model) # load existing spaCy model/ blank models
92
+
93
+ # convert data into training doc
94
+ train_data_doc = make_training_doc(nlp, train_data)
95
+
96
+ # create the built-in pipeline components and add them to the pipeline
97
+ # nlp.create_pipe works for built-ins that are registered with spaCy
98
+ for component in components:
99
+ if component not in nlp.pipe_names:
100
+ ner = nlp.create_pipe(component)
101
+ nlp.add_pipe(component, last=True)
102
+ else:
103
+ ner = nlp.get_pipe(component)
104
+
105
+ # add labels if component is NER
106
+ if (component == 'ner') and (entities is not None):
107
+ for ent in entities:
108
+ ner.add_label(ent)
109
+
110
+ print(f'Entities in the model are: {nlp.get_pipe("ner").labels}')
111
+
112
+ # get names of other pipes to disable them during training
113
+ other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in components]
114
+ all_losses = []
115
+ with nlp.disable_pipes(*other_pipes): # only train NER
116
+ for itn in tqdm(range(1,iter+1)):
117
+ print("Starting iteration " + str(itn))
118
+ random.shuffle(train_data)
119
+ losses = {}
120
+ batches = minibatch(train_data_doc, size=batch_size)
121
+ for batch in batches:
122
+ nlp.update(list(batch),
123
+ losses=losses,
124
+ drop=0.1,
125
+ sgd=optimizer)
126
+ scores = eval_spacy(nlp, eval_data) if eval_data else eval_spacy(nlp, train_data)
127
+ print("epoch: {} Losses: {} Recall: {} Precision: {} F1: {}". \
128
+ format(itn, str(losses),scores['ents_r'],scores['ents_p'],scores['ents_f']))
129
+
130
+ all_losses.append([losses[component] for component in components])
131
+
132
+ return nlp, all_losses
133
+
134
+ def eval_spacy(model: spacy, data):
135
+ """
136
+ Function to perform evaluation and scoring
137
+
138
+ Parameters:
139
+ model: either a spacy model or spacy transformer
140
+ data: evaluation data so that scoring can be done
141
+
142
+ Returns:
143
+ score: dict with scores of the model
144
+ """
145
+ scorer = Scorer()
146
+ examples = []
147
+ try:
148
+ # accept spacy format json data
149
+ for input_, annot in data:
150
+ doc = model.make_doc(input_)
151
+ example = Example.from_dict(doc, annot)
152
+ example.predicted = model(str(example.text))
153
+ examples.append(example)
154
+ scores = scorer.score(examples)
155
+ return scores
156
+ except TypeError:
157
+ # accept alternative format json data
158
+ for row in data:
159
+ input_, annot = row.values()
160
+ doc = model.make_doc(input_)
161
+ example = Example.from_dict(doc, {'entities':annot})
162
+ example.predicted = model(str(example.text))
163
+ examples.append(example)
164
+ scores = scorer.score(examples)
165
+ return scores
166
+ except Exception as e: print(e)
167
+
168
+
st_config.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #"/app/models/push"
2
+ model_dir: ""
3
+ default_models: ['en_Radiology_ner_bc5cdr_md','en_ner_bc5cdr_md']
4
+ examples:
5
+ radiology_dataset: "eval_35.csv"
6
+ clinical note A:
7
+ "A 74 year old woman with history of hypertension and heart disease who had \
8
+ been discharged 10 days before knee prosthetic surgery was admitted with 4 day \
9
+ history of fever dry cough and dyspnoea. She had not left home since discharge \
10
+ and no family member was affected. Analysis revealed lymphopenia elevation of C \
11
+ reactive protein and a positive RT PCR. The patient was admitted to the intensive \
12
+ care unit with a favourable course. Chest X ray on the second day showed diffuse \
13
+ reticular pattern and increased density in both lungs"
14
+ Clinical note B:
15
+ "A 29 year old immunocompromised female patient with a 3 day history of cough and fever. \
16
+ Past medical history includes severe ulcerative colitis treated with Tofacitinib \
17
+ The patient was admitted to the hospital ward and discharged one week after admission \
18
+ with complete recovery. Chest X ray shows increase of parenchymal opacity in right lower lobe"
19
+ negation:
20
+ "I am still have cough however there is absence of fever. i do not have any headache nor \
21
+ stomachache but i do have SARS and COVID 19"
22
+
23
+ colors_palette: ["#99A3A4" ,"#bc4ed8","#FF5733","#54e96b","#2ce6f5","#f23fc4","#f3a53a",'#7FB3D5',"#EC7063","#F4D03F"]