ajit commited on
Commit
4a56525
1 Parent(s): 08984d9
Files changed (1) hide show
  1. app.py +76 -76
app.py CHANGED
@@ -21,19 +21,19 @@ SPECIFIC_TAG=":__entity__"
21
  def POS_get_model(model_name):
22
  val = SequenceTagger.load(model_name) # Load the model
23
  return val
24
-
25
  def getPos(s: Sentence):
26
  texts = []
27
  labels = []
28
  for t in s.tokens:
29
  for label in t.annotation_layers.keys():
30
  texts.append(t.text)
31
- labels.append(t.get_labels(label)[0].value)
32
  return texts, labels
33
-
34
  def getDictFromPOS(texts, labels):
35
  return [["dummy",t,l,"dummy","dummy" ] for t, l in zip(texts, labels)]
36
-
37
  def decode(tokenizer, pred_idx, top_clean):
38
  ignore_tokens = string.punctuation + '[PAD]'
39
  tokens = []
@@ -73,9 +73,9 @@ def get_bert_prediction(input_text,top_k):
73
  def load_pos_model():
74
  checkpoint = "flair/pos-english"
75
  return POS_get_model(checkpoint)
76
-
77
 
78
-
 
79
 
80
  def init_session_states():
81
  if 'top_k' not in st.session_state:
@@ -93,8 +93,8 @@ def init_session_states():
93
  if 'aggr' not in st.session_state:
94
  st.session_state['aggr'] = None
95
 
96
-
97
-
98
  def get_pos_arr(input_text,display_area):
99
  if (st.session_state['pos_model'] is None):
100
  display_area.text("Loading model 3 of 3.Loading POS model...")
@@ -104,37 +104,37 @@ def get_pos_arr(input_text,display_area):
104
  texts, labels = getPos(s)
105
  pos_results = getDictFromPOS(texts, labels)
106
  return pos_results
107
-
108
  def perform_inference(text,display_area):
109
-
110
  if (st.session_state['bio_model'] is None):
111
  display_area.text("Loading model 1 of 3. Bio model...")
112
  st.session_state['bio_model'] = bd.BatchInference("bio/desc_a100_config.json",'ajitrajasekharan/biomedical',False,False,DEFAULT_TOP_K,True,True, "bio/","bio/a100_labels.txt",False)
113
-
114
  if (st.session_state['phi_model'] is None):
115
  display_area.text("Loading model 2 of 3. PHI model...")
116
  st.session_state['phi_model'] = bd.BatchInference("bbc/desc_bbc_config.json",'bert-base-cased',False,False,DEFAULT_TOP_K,True,True, "bbc/","bbc/bbc_labels.txt",False)
117
-
118
  #Load POS model if needed and gets POS tags
119
  if (SPECIFIC_TAG not in text):
120
  pos_arr = get_pos_arr(text,display_area)
121
  else:
122
  pos_arr = None
123
-
124
  if (st.session_state['ner_bio'] is None):
125
  display_area.text("Initializing BIO module...")
126
  st.session_state['ner_bio'] = ner.UnsupNER("bio/ner_a100_config.json")
127
-
128
  if (st.session_state['ner_phi'] is None):
129
  display_area.text("Initializing PHI module...")
130
  st.session_state['ner_phi'] = ner.UnsupNER("bbc/ner_bbc_config.json")
131
-
132
  if (st.session_state['aggr'] is None):
133
  display_area.text("Initializing Aggregation modeule...")
134
  st.session_state['aggr'] = aggr.AggregateNER("./ensemble_config.json")
135
-
136
-
137
-
138
  display_area.text("Getting results from BIO model...")
139
  bio_descs = st.session_state['bio_model'].get_descriptors(text,pos_arr)
140
  display_area.text("Getting results from PHI model...")
@@ -142,65 +142,65 @@ def perform_inference(text,display_area):
142
  display_area.text("Aggregating BIO & PHI results...")
143
  bio_ner = st.session_state['ner_bio'].tag_sentence_service(text,bio_descs)
144
  phi_ner = st.session_state['ner_phi'].tag_sentence_service(text,phi_results)
145
-
146
  combined_arr = [json.loads(bio_ner),json.loads(phi_ner)]
147
 
148
  aggregate_results = st.session_state['aggr'].fetch_all(text,combined_arr)
149
  return aggregate_results
150
-
151
 
152
  sent_arr = [
153
- "Lou Gehrig who works for XCorp and lives in New York suffers from Parkinson's ",
154
- "Parkinson who works for XCorp and lives in New York suffers from Lou Gehrig's",
155
- "lou gehrig was diagnosed with Parkinson's ",
156
- "A eGFR below 60 indicates chronic kidney disease",
157
- "Overexpression of EGFR occurs across a wide range of different cancers",
158
- "Stanford called",
159
- "He was diagnosed with non small cell lung cancer",
160
- "I met my girl friends at the pub ",
161
- "I met my New York friends at the pub",
162
- "I met my XCorp friends at the pub",
163
- "I met my two friends at the pub",
164
- "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD brand as well as a portfolio of assays for prostate cancer diagnosis ",
165
- "There are no treatment options specifically indicated for ACD and physicians must utilize agents approved for other dermatology conditions", "As ACD has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
166
- "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
167
- "Patients treated with anticancer chemotherapy drugs ( ACD ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
168
- "In the LASOR trial , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
169
- "The sky turned dark in advance of the storm that was coming from the east ",
170
- "She loves to watch Sunday afternoon football with her family ",
171
- "Paul Erdos died at 83 "
172
  ]
173
 
174
 
175
  sent_arr_masked = [
176
- "Lou Gehrig:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Parkinson's:__entity__ ",
177
- "Parkinson:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Lou Gehrig's:__entity__",
178
- "lou:__entity__ gehrig:__entity__ was diagnosed with Parkinson's:__entity__ ",
179
- "A eGFR:__entity__ below 60 indicates chronic kidney disease",
180
- "Overexpression of EGFR:__entity__ occurs across a wide range of different cancers",
181
- "Stanford:__entity__ called",
182
- "He was diagnosed with non:__entity__ small:__entity__ cell:__entity__ lung:__entity__ cancer:__entity__",
183
- "I met my girl:__entity__ friends at the pub ",
184
- "I met my New:__entity__ York:__entity__ friends at the pub",
185
- "I met my XCorp:__entity__ friends at the pub",
186
- "I met my two:__entity__ friends at the pub",
187
- "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD:__entity__ brand as well as a portfolio of assays for prostate cancer diagnosis ",
188
- "There are no treatment options specifically indicated for ACD:__entity__ and physicians must utilize agents approved for other dermatology conditions",
189
- "As ACD:__entity__ has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
190
- "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD:__entity__ provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
191
- "Patients treated with anticancer chemotherapy drugs ( ACD:__entity__ ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
192
- "In the LASOR:__entity__ trial:__entity__ , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
193
- "The sky turned dark:__entity__ in advance of the storm that was coming from the east ",
194
- "She loves to watch Sunday afternoon football:__entity__ with her family ",
195
- "Paul:__entity__ Erdos:__entity__ died at 83:__entity__ "
196
  ]
197
 
198
  def init_selectbox():
199
  return st.selectbox(
200
  'Choose any of the sentences in pull-down below',
201
  sent_arr,key='my_choice')
202
-
203
-
204
  def on_text_change():
205
  text = st.session_state.my_text
206
  print("in callback: " + text)
@@ -210,23 +210,23 @@ def main():
210
  try:
211
 
212
  init_session_states()
213
-
214
  st.markdown("<h3 style='text-align: center;'>Biomedical and PHI NER ensemble</h3>", unsafe_allow_html=True)
215
- st.markdown("<h4 style='text-align: center;'>Using pretrained models with <a href='https://ajitrajasekharan.github.io/2021/01/02/my-first-post.html'>no fine tuning</a></h3>", unsaf4_allow_html=True)
216
  #st.markdown("""
217
  #<h3 style="font-size:16px; color: #ff0000; text-align: center"><b>App under construction... (not in working condition yet)</b></h3>
218
  #""", unsafe_allow_html=True)
219
-
220
-
221
  st.markdown("""
222
  <p style="text-align:center;"><img src="https://ajitrajasekharan.github.io/images/1.png" width="700"></p>
223
  <br/>
224
  <br/>
225
  """, unsafe_allow_html=True)
226
-
227
  st.write("This app uses 3 models. Two Pretrained Bert models (**no fine tuning**) and a POS tagger")
228
-
229
-
230
  with st.form('my_form'):
231
  selected_sentence = init_selectbox()
232
  text_input = st.text_area(label='Type any sentence below',value="")
@@ -243,16 +243,16 @@ def main():
243
  with display_area.container():
244
  st.text(f"prediction took {time.time() - start:.2f}s")
245
  st.json(results)
246
-
247
-
248
-
249
-
250
 
251
  #input_text = st.text_area(
252
  # label="Type any sentence",
253
  # on_change=on_text_change,key='my_text'
254
  # )
255
-
256
  st.markdown("""
257
  <small style="font-size:16px; color: #7f7f7f; text-align: left"><br/><br/>Models used: <br/>(1) <a href='https://huggingface.co/ajitrajasekharan/biomedical' target='_blank'>Biomedical model</a> pretrained on Pubmed,Clinical trials and BookCorpus subset.<br/>(2) Bert-base-cased (for PHI entities - Person/location/organization etc.)<br/>(3) Flair POS tagger</small>
258
  #""", unsafe_allow_html=True)
@@ -264,8 +264,8 @@ def main():
264
  """, unsafe_allow_html=True)
265
 
266
  except Exception as e:
267
- print("Some error occurred in main")
268
- st.exception(e)
269
-
270
  if __name__ == "__main__":
271
  main()
 
21
  def POS_get_model(model_name):
22
  val = SequenceTagger.load(model_name) # Load the model
23
  return val
24
+
25
  def getPos(s: Sentence):
26
  texts = []
27
  labels = []
28
  for t in s.tokens:
29
  for label in t.annotation_layers.keys():
30
  texts.append(t.text)
31
+ labels.append(t.get_labels(label)[0].value)
32
  return texts, labels
33
+
34
  def getDictFromPOS(texts, labels):
35
  return [["dummy",t,l,"dummy","dummy" ] for t, l in zip(texts, labels)]
36
+
37
  def decode(tokenizer, pred_idx, top_clean):
38
  ignore_tokens = string.punctuation + '[PAD]'
39
  tokens = []
 
73
  def load_pos_model():
74
  checkpoint = "flair/pos-english"
75
  return POS_get_model(checkpoint)
 
76
 
77
+
78
+
79
 
80
  def init_session_states():
81
  if 'top_k' not in st.session_state:
 
93
  if 'aggr' not in st.session_state:
94
  st.session_state['aggr'] = None
95
 
96
+
97
+
98
  def get_pos_arr(input_text,display_area):
99
  if (st.session_state['pos_model'] is None):
100
  display_area.text("Loading model 3 of 3.Loading POS model...")
 
104
  texts, labels = getPos(s)
105
  pos_results = getDictFromPOS(texts, labels)
106
  return pos_results
107
+
108
  def perform_inference(text,display_area):
109
+
110
  if (st.session_state['bio_model'] is None):
111
  display_area.text("Loading model 1 of 3. Bio model...")
112
  st.session_state['bio_model'] = bd.BatchInference("bio/desc_a100_config.json",'ajitrajasekharan/biomedical',False,False,DEFAULT_TOP_K,True,True, "bio/","bio/a100_labels.txt",False)
113
+
114
  if (st.session_state['phi_model'] is None):
115
  display_area.text("Loading model 2 of 3. PHI model...")
116
  st.session_state['phi_model'] = bd.BatchInference("bbc/desc_bbc_config.json",'bert-base-cased',False,False,DEFAULT_TOP_K,True,True, "bbc/","bbc/bbc_labels.txt",False)
117
+
118
  #Load POS model if needed and gets POS tags
119
  if (SPECIFIC_TAG not in text):
120
  pos_arr = get_pos_arr(text,display_area)
121
  else:
122
  pos_arr = None
123
+
124
  if (st.session_state['ner_bio'] is None):
125
  display_area.text("Initializing BIO module...")
126
  st.session_state['ner_bio'] = ner.UnsupNER("bio/ner_a100_config.json")
127
+
128
  if (st.session_state['ner_phi'] is None):
129
  display_area.text("Initializing PHI module...")
130
  st.session_state['ner_phi'] = ner.UnsupNER("bbc/ner_bbc_config.json")
131
+
132
  if (st.session_state['aggr'] is None):
133
  display_area.text("Initializing Aggregation modeule...")
134
  st.session_state['aggr'] = aggr.AggregateNER("./ensemble_config.json")
135
+
136
+
137
+
138
  display_area.text("Getting results from BIO model...")
139
  bio_descs = st.session_state['bio_model'].get_descriptors(text,pos_arr)
140
  display_area.text("Getting results from PHI model...")
 
142
  display_area.text("Aggregating BIO & PHI results...")
143
  bio_ner = st.session_state['ner_bio'].tag_sentence_service(text,bio_descs)
144
  phi_ner = st.session_state['ner_phi'].tag_sentence_service(text,phi_results)
145
+
146
  combined_arr = [json.loads(bio_ner),json.loads(phi_ner)]
147
 
148
  aggregate_results = st.session_state['aggr'].fetch_all(text,combined_arr)
149
  return aggregate_results
150
+
151
 
152
  sent_arr = [
153
+ "Lou Gehrig who works for XCorp and lives in New York suffers from Parkinson's ",
154
+ "Parkinson who works for XCorp and lives in New York suffers from Lou Gehrig's",
155
+ "lou gehrig was diagnosed with Parkinson's ",
156
+ "A eGFR below 60 indicates chronic kidney disease",
157
+ "Overexpression of EGFR occurs across a wide range of different cancers",
158
+ "Stanford called",
159
+ "He was diagnosed with non small cell lung cancer",
160
+ "I met my girl friends at the pub ",
161
+ "I met my New York friends at the pub",
162
+ "I met my XCorp friends at the pub",
163
+ "I met my two friends at the pub",
164
+ "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD brand as well as a portfolio of assays for prostate cancer diagnosis ",
165
+ "There are no treatment options specifically indicated for ACD and physicians must utilize agents approved for other dermatology conditions", "As ACD has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
166
+ "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
167
+ "Patients treated with anticancer chemotherapy drugs ( ACD ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
168
+ "In the LASOR trial , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
169
+ "The sky turned dark in advance of the storm that was coming from the east ",
170
+ "She loves to watch Sunday afternoon football with her family ",
171
+ "Paul Erdos died at 83 "
172
  ]
173
 
174
 
175
  sent_arr_masked = [
176
+ "Lou Gehrig:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Parkinson's:__entity__ ",
177
+ "Parkinson:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Lou Gehrig's:__entity__",
178
+ "lou:__entity__ gehrig:__entity__ was diagnosed with Parkinson's:__entity__ ",
179
+ "A eGFR:__entity__ below 60 indicates chronic kidney disease",
180
+ "Overexpression of EGFR:__entity__ occurs across a wide range of different cancers",
181
+ "Stanford:__entity__ called",
182
+ "He was diagnosed with non:__entity__ small:__entity__ cell:__entity__ lung:__entity__ cancer:__entity__",
183
+ "I met my girl:__entity__ friends at the pub ",
184
+ "I met my New:__entity__ York:__entity__ friends at the pub",
185
+ "I met my XCorp:__entity__ friends at the pub",
186
+ "I met my two:__entity__ friends at the pub",
187
+ "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD:__entity__ brand as well as a portfolio of assays for prostate cancer diagnosis ",
188
+ "There are no treatment options specifically indicated for ACD:__entity__ and physicians must utilize agents approved for other dermatology conditions",
189
+ "As ACD:__entity__ has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
190
+ "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD:__entity__ provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
191
+ "Patients treated with anticancer chemotherapy drugs ( ACD:__entity__ ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
192
+ "In the LASOR:__entity__ trial:__entity__ , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
193
+ "The sky turned dark:__entity__ in advance of the storm that was coming from the east ",
194
+ "She loves to watch Sunday afternoon football:__entity__ with her family ",
195
+ "Paul:__entity__ Erdos:__entity__ died at 83:__entity__ "
196
  ]
197
 
198
  def init_selectbox():
199
  return st.selectbox(
200
  'Choose any of the sentences in pull-down below',
201
  sent_arr,key='my_choice')
202
+
203
+
204
  def on_text_change():
205
  text = st.session_state.my_text
206
  print("in callback: " + text)
 
210
  try:
211
 
212
  init_session_states()
213
+
214
  st.markdown("<h3 style='text-align: center;'>Biomedical and PHI NER ensemble</h3>", unsafe_allow_html=True)
215
+ st.markdown("<h4 style='text-align: center;'>Using pretrained models with <a href='https://ajitrajasekharan.github.io/2021/01/02/my-first-post.html'>no fine tuning</a></h4>", unsafe_allow_html=True)
216
  #st.markdown("""
217
  #<h3 style="font-size:16px; color: #ff0000; text-align: center"><b>App under construction... (not in working condition yet)</b></h3>
218
  #""", unsafe_allow_html=True)
219
+
220
+
221
  st.markdown("""
222
  <p style="text-align:center;"><img src="https://ajitrajasekharan.github.io/images/1.png" width="700"></p>
223
  <br/>
224
  <br/>
225
  """, unsafe_allow_html=True)
226
+
227
  st.write("This app uses 3 models. Two Pretrained Bert models (**no fine tuning**) and a POS tagger")
228
+
229
+
230
  with st.form('my_form'):
231
  selected_sentence = init_selectbox()
232
  text_input = st.text_area(label='Type any sentence below',value="")
 
243
  with display_area.container():
244
  st.text(f"prediction took {time.time() - start:.2f}s")
245
  st.json(results)
246
+
247
+
248
+
249
+
250
 
251
  #input_text = st.text_area(
252
  # label="Type any sentence",
253
  # on_change=on_text_change,key='my_text'
254
  # )
255
+
256
  st.markdown("""
257
  <small style="font-size:16px; color: #7f7f7f; text-align: left"><br/><br/>Models used: <br/>(1) <a href='https://huggingface.co/ajitrajasekharan/biomedical' target='_blank'>Biomedical model</a> pretrained on Pubmed,Clinical trials and BookCorpus subset.<br/>(2) Bert-base-cased (for PHI entities - Person/location/organization etc.)<br/>(3) Flair POS tagger</small>
258
  #""", unsafe_allow_html=True)
 
264
  """, unsafe_allow_html=True)
265
 
266
  except Exception as e:
267
+ print("Some error occurred in main")
268
+ st.exception(e)
269
+
270
  if __name__ == "__main__":
271
  main()