Spaces:
Runtime error
Runtime error
ajit
commited on
Commit
•
4a56525
1
Parent(s):
08984d9
Cosmetic
Browse files
app.py
CHANGED
@@ -21,19 +21,19 @@ SPECIFIC_TAG=":__entity__"
|
|
21 |
def POS_get_model(model_name):
|
22 |
val = SequenceTagger.load(model_name) # Load the model
|
23 |
return val
|
24 |
-
|
25 |
def getPos(s: Sentence):
|
26 |
texts = []
|
27 |
labels = []
|
28 |
for t in s.tokens:
|
29 |
for label in t.annotation_layers.keys():
|
30 |
texts.append(t.text)
|
31 |
-
labels.append(t.get_labels(label)[0].value)
|
32 |
return texts, labels
|
33 |
-
|
34 |
def getDictFromPOS(texts, labels):
|
35 |
return [["dummy",t,l,"dummy","dummy" ] for t, l in zip(texts, labels)]
|
36 |
-
|
37 |
def decode(tokenizer, pred_idx, top_clean):
|
38 |
ignore_tokens = string.punctuation + '[PAD]'
|
39 |
tokens = []
|
@@ -73,9 +73,9 @@ def get_bert_prediction(input_text,top_k):
|
|
73 |
def load_pos_model():
|
74 |
checkpoint = "flair/pos-english"
|
75 |
return POS_get_model(checkpoint)
|
76 |
-
|
77 |
|
78 |
-
|
|
|
79 |
|
80 |
def init_session_states():
|
81 |
if 'top_k' not in st.session_state:
|
@@ -93,8 +93,8 @@ def init_session_states():
|
|
93 |
if 'aggr' not in st.session_state:
|
94 |
st.session_state['aggr'] = None
|
95 |
|
96 |
-
|
97 |
-
|
98 |
def get_pos_arr(input_text,display_area):
|
99 |
if (st.session_state['pos_model'] is None):
|
100 |
display_area.text("Loading model 3 of 3.Loading POS model...")
|
@@ -104,37 +104,37 @@ def get_pos_arr(input_text,display_area):
|
|
104 |
texts, labels = getPos(s)
|
105 |
pos_results = getDictFromPOS(texts, labels)
|
106 |
return pos_results
|
107 |
-
|
108 |
def perform_inference(text,display_area):
|
109 |
-
|
110 |
if (st.session_state['bio_model'] is None):
|
111 |
display_area.text("Loading model 1 of 3. Bio model...")
|
112 |
st.session_state['bio_model'] = bd.BatchInference("bio/desc_a100_config.json",'ajitrajasekharan/biomedical',False,False,DEFAULT_TOP_K,True,True, "bio/","bio/a100_labels.txt",False)
|
113 |
-
|
114 |
if (st.session_state['phi_model'] is None):
|
115 |
display_area.text("Loading model 2 of 3. PHI model...")
|
116 |
st.session_state['phi_model'] = bd.BatchInference("bbc/desc_bbc_config.json",'bert-base-cased',False,False,DEFAULT_TOP_K,True,True, "bbc/","bbc/bbc_labels.txt",False)
|
117 |
-
|
118 |
#Load POS model if needed and gets POS tags
|
119 |
if (SPECIFIC_TAG not in text):
|
120 |
pos_arr = get_pos_arr(text,display_area)
|
121 |
else:
|
122 |
pos_arr = None
|
123 |
-
|
124 |
if (st.session_state['ner_bio'] is None):
|
125 |
display_area.text("Initializing BIO module...")
|
126 |
st.session_state['ner_bio'] = ner.UnsupNER("bio/ner_a100_config.json")
|
127 |
-
|
128 |
if (st.session_state['ner_phi'] is None):
|
129 |
display_area.text("Initializing PHI module...")
|
130 |
st.session_state['ner_phi'] = ner.UnsupNER("bbc/ner_bbc_config.json")
|
131 |
-
|
132 |
if (st.session_state['aggr'] is None):
|
133 |
display_area.text("Initializing Aggregation modeule...")
|
134 |
st.session_state['aggr'] = aggr.AggregateNER("./ensemble_config.json")
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
display_area.text("Getting results from BIO model...")
|
139 |
bio_descs = st.session_state['bio_model'].get_descriptors(text,pos_arr)
|
140 |
display_area.text("Getting results from PHI model...")
|
@@ -142,65 +142,65 @@ def perform_inference(text,display_area):
|
|
142 |
display_area.text("Aggregating BIO & PHI results...")
|
143 |
bio_ner = st.session_state['ner_bio'].tag_sentence_service(text,bio_descs)
|
144 |
phi_ner = st.session_state['ner_phi'].tag_sentence_service(text,phi_results)
|
145 |
-
|
146 |
combined_arr = [json.loads(bio_ner),json.loads(phi_ner)]
|
147 |
|
148 |
aggregate_results = st.session_state['aggr'].fetch_all(text,combined_arr)
|
149 |
return aggregate_results
|
150 |
-
|
151 |
|
152 |
sent_arr = [
|
153 |
-
"Lou Gehrig who works for XCorp and lives in New York suffers from Parkinson's ",
|
154 |
-
"Parkinson who works for XCorp and lives in New York suffers from Lou Gehrig's",
|
155 |
-
"lou gehrig was diagnosed with Parkinson's ",
|
156 |
-
"A eGFR below 60 indicates chronic kidney disease",
|
157 |
-
"Overexpression of EGFR occurs across a wide range of different cancers",
|
158 |
-
"Stanford called",
|
159 |
-
"He was diagnosed with non small cell lung cancer",
|
160 |
-
"I met my girl friends at the pub ",
|
161 |
-
"I met my New York friends at the pub",
|
162 |
-
"I met my XCorp friends at the pub",
|
163 |
-
"I met my two friends at the pub",
|
164 |
-
"Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD brand as well as a portfolio of assays for prostate cancer diagnosis ",
|
165 |
-
"There are no treatment options specifically indicated for ACD and physicians must utilize agents approved for other dermatology conditions", "As ACD has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
|
166 |
-
"Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
|
167 |
-
"Patients treated with anticancer chemotherapy drugs ( ACD ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
|
168 |
-
"In the LASOR trial , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
|
169 |
-
"The sky turned dark in advance of the storm that was coming from the east ",
|
170 |
-
"She loves to watch Sunday afternoon football with her family ",
|
171 |
-
"Paul Erdos died at 83 "
|
172 |
]
|
173 |
|
174 |
|
175 |
sent_arr_masked = [
|
176 |
-
"Lou Gehrig:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Parkinson's:__entity__ ",
|
177 |
-
"Parkinson:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Lou Gehrig's:__entity__",
|
178 |
-
"lou:__entity__ gehrig:__entity__ was diagnosed with Parkinson's:__entity__ ",
|
179 |
-
"A eGFR:__entity__ below 60 indicates chronic kidney disease",
|
180 |
-
"Overexpression of EGFR:__entity__ occurs across a wide range of different cancers",
|
181 |
-
"Stanford:__entity__ called",
|
182 |
-
"He was diagnosed with non:__entity__ small:__entity__ cell:__entity__ lung:__entity__ cancer:__entity__",
|
183 |
-
"I met my girl:__entity__ friends at the pub ",
|
184 |
-
"I met my New:__entity__ York:__entity__ friends at the pub",
|
185 |
-
"I met my XCorp:__entity__ friends at the pub",
|
186 |
-
"I met my two:__entity__ friends at the pub",
|
187 |
-
"Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD:__entity__ brand as well as a portfolio of assays for prostate cancer diagnosis ",
|
188 |
-
"There are no treatment options specifically indicated for ACD:__entity__ and physicians must utilize agents approved for other dermatology conditions",
|
189 |
-
"As ACD:__entity__ has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
|
190 |
-
"Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD:__entity__ provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
|
191 |
-
"Patients treated with anticancer chemotherapy drugs ( ACD:__entity__ ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
|
192 |
-
"In the LASOR:__entity__ trial:__entity__ , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
|
193 |
-
"The sky turned dark:__entity__ in advance of the storm that was coming from the east ",
|
194 |
-
"She loves to watch Sunday afternoon football:__entity__ with her family ",
|
195 |
-
"Paul:__entity__ Erdos:__entity__ died at 83:__entity__ "
|
196 |
]
|
197 |
|
198 |
def init_selectbox():
|
199 |
return st.selectbox(
|
200 |
'Choose any of the sentences in pull-down below',
|
201 |
sent_arr,key='my_choice')
|
202 |
-
|
203 |
-
|
204 |
def on_text_change():
|
205 |
text = st.session_state.my_text
|
206 |
print("in callback: " + text)
|
@@ -210,23 +210,23 @@ def main():
|
|
210 |
try:
|
211 |
|
212 |
init_session_states()
|
213 |
-
|
214 |
st.markdown("<h3 style='text-align: center;'>Biomedical and PHI NER ensemble</h3>", unsafe_allow_html=True)
|
215 |
-
st.markdown("<h4 style='text-align: center;'>Using pretrained models with <a href='https://ajitrajasekharan.github.io/2021/01/02/my-first-post.html'>no fine tuning</a></
|
216 |
#st.markdown("""
|
217 |
#<h3 style="font-size:16px; color: #ff0000; text-align: center"><b>App under construction... (not in working condition yet)</b></h3>
|
218 |
#""", unsafe_allow_html=True)
|
219 |
-
|
220 |
-
|
221 |
st.markdown("""
|
222 |
<p style="text-align:center;"><img src="https://ajitrajasekharan.github.io/images/1.png" width="700"></p>
|
223 |
<br/>
|
224 |
<br/>
|
225 |
""", unsafe_allow_html=True)
|
226 |
-
|
227 |
st.write("This app uses 3 models. Two Pretrained Bert models (**no fine tuning**) and a POS tagger")
|
228 |
-
|
229 |
-
|
230 |
with st.form('my_form'):
|
231 |
selected_sentence = init_selectbox()
|
232 |
text_input = st.text_area(label='Type any sentence below',value="")
|
@@ -243,16 +243,16 @@ def main():
|
|
243 |
with display_area.container():
|
244 |
st.text(f"prediction took {time.time() - start:.2f}s")
|
245 |
st.json(results)
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
|
251 |
#input_text = st.text_area(
|
252 |
# label="Type any sentence",
|
253 |
# on_change=on_text_change,key='my_text'
|
254 |
# )
|
255 |
-
|
256 |
st.markdown("""
|
257 |
<small style="font-size:16px; color: #7f7f7f; text-align: left"><br/><br/>Models used: <br/>(1) <a href='https://huggingface.co/ajitrajasekharan/biomedical' target='_blank'>Biomedical model</a> pretrained on Pubmed,Clinical trials and BookCorpus subset.<br/>(2) Bert-base-cased (for PHI entities - Person/location/organization etc.)<br/>(3) Flair POS tagger</small>
|
258 |
#""", unsafe_allow_html=True)
|
@@ -264,8 +264,8 @@ def main():
|
|
264 |
""", unsafe_allow_html=True)
|
265 |
|
266 |
except Exception as e:
|
267 |
-
print("Some error occurred in main")
|
268 |
-
st.exception(e)
|
269 |
-
|
270 |
if __name__ == "__main__":
|
271 |
main()
|
|
|
21 |
def POS_get_model(model_name):
|
22 |
val = SequenceTagger.load(model_name) # Load the model
|
23 |
return val
|
24 |
+
|
25 |
def getPos(s: Sentence):
|
26 |
texts = []
|
27 |
labels = []
|
28 |
for t in s.tokens:
|
29 |
for label in t.annotation_layers.keys():
|
30 |
texts.append(t.text)
|
31 |
+
labels.append(t.get_labels(label)[0].value)
|
32 |
return texts, labels
|
33 |
+
|
34 |
def getDictFromPOS(texts, labels):
|
35 |
return [["dummy",t,l,"dummy","dummy" ] for t, l in zip(texts, labels)]
|
36 |
+
|
37 |
def decode(tokenizer, pred_idx, top_clean):
|
38 |
ignore_tokens = string.punctuation + '[PAD]'
|
39 |
tokens = []
|
|
|
73 |
def load_pos_model():
|
74 |
checkpoint = "flair/pos-english"
|
75 |
return POS_get_model(checkpoint)
|
|
|
76 |
|
77 |
+
|
78 |
+
|
79 |
|
80 |
def init_session_states():
|
81 |
if 'top_k' not in st.session_state:
|
|
|
93 |
if 'aggr' not in st.session_state:
|
94 |
st.session_state['aggr'] = None
|
95 |
|
96 |
+
|
97 |
+
|
98 |
def get_pos_arr(input_text,display_area):
|
99 |
if (st.session_state['pos_model'] is None):
|
100 |
display_area.text("Loading model 3 of 3.Loading POS model...")
|
|
|
104 |
texts, labels = getPos(s)
|
105 |
pos_results = getDictFromPOS(texts, labels)
|
106 |
return pos_results
|
107 |
+
|
108 |
def perform_inference(text,display_area):
|
109 |
+
|
110 |
if (st.session_state['bio_model'] is None):
|
111 |
display_area.text("Loading model 1 of 3. Bio model...")
|
112 |
st.session_state['bio_model'] = bd.BatchInference("bio/desc_a100_config.json",'ajitrajasekharan/biomedical',False,False,DEFAULT_TOP_K,True,True, "bio/","bio/a100_labels.txt",False)
|
113 |
+
|
114 |
if (st.session_state['phi_model'] is None):
|
115 |
display_area.text("Loading model 2 of 3. PHI model...")
|
116 |
st.session_state['phi_model'] = bd.BatchInference("bbc/desc_bbc_config.json",'bert-base-cased',False,False,DEFAULT_TOP_K,True,True, "bbc/","bbc/bbc_labels.txt",False)
|
117 |
+
|
118 |
#Load POS model if needed and gets POS tags
|
119 |
if (SPECIFIC_TAG not in text):
|
120 |
pos_arr = get_pos_arr(text,display_area)
|
121 |
else:
|
122 |
pos_arr = None
|
123 |
+
|
124 |
if (st.session_state['ner_bio'] is None):
|
125 |
display_area.text("Initializing BIO module...")
|
126 |
st.session_state['ner_bio'] = ner.UnsupNER("bio/ner_a100_config.json")
|
127 |
+
|
128 |
if (st.session_state['ner_phi'] is None):
|
129 |
display_area.text("Initializing PHI module...")
|
130 |
st.session_state['ner_phi'] = ner.UnsupNER("bbc/ner_bbc_config.json")
|
131 |
+
|
132 |
if (st.session_state['aggr'] is None):
|
133 |
display_area.text("Initializing Aggregation modeule...")
|
134 |
st.session_state['aggr'] = aggr.AggregateNER("./ensemble_config.json")
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
display_area.text("Getting results from BIO model...")
|
139 |
bio_descs = st.session_state['bio_model'].get_descriptors(text,pos_arr)
|
140 |
display_area.text("Getting results from PHI model...")
|
|
|
142 |
display_area.text("Aggregating BIO & PHI results...")
|
143 |
bio_ner = st.session_state['ner_bio'].tag_sentence_service(text,bio_descs)
|
144 |
phi_ner = st.session_state['ner_phi'].tag_sentence_service(text,phi_results)
|
145 |
+
|
146 |
combined_arr = [json.loads(bio_ner),json.loads(phi_ner)]
|
147 |
|
148 |
aggregate_results = st.session_state['aggr'].fetch_all(text,combined_arr)
|
149 |
return aggregate_results
|
150 |
+
|
151 |
|
152 |
sent_arr = [
|
153 |
+
"Lou Gehrig who works for XCorp and lives in New York suffers from Parkinson's ",
|
154 |
+
"Parkinson who works for XCorp and lives in New York suffers from Lou Gehrig's",
|
155 |
+
"lou gehrig was diagnosed with Parkinson's ",
|
156 |
+
"A eGFR below 60 indicates chronic kidney disease",
|
157 |
+
"Overexpression of EGFR occurs across a wide range of different cancers",
|
158 |
+
"Stanford called",
|
159 |
+
"He was diagnosed with non small cell lung cancer",
|
160 |
+
"I met my girl friends at the pub ",
|
161 |
+
"I met my New York friends at the pub",
|
162 |
+
"I met my XCorp friends at the pub",
|
163 |
+
"I met my two friends at the pub",
|
164 |
+
"Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD brand as well as a portfolio of assays for prostate cancer diagnosis ",
|
165 |
+
"There are no treatment options specifically indicated for ACD and physicians must utilize agents approved for other dermatology conditions", "As ACD has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
|
166 |
+
"Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
|
167 |
+
"Patients treated with anticancer chemotherapy drugs ( ACD ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
|
168 |
+
"In the LASOR trial , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
|
169 |
+
"The sky turned dark in advance of the storm that was coming from the east ",
|
170 |
+
"She loves to watch Sunday afternoon football with her family ",
|
171 |
+
"Paul Erdos died at 83 "
|
172 |
]
|
173 |
|
174 |
|
175 |
sent_arr_masked = [
|
176 |
+
"Lou Gehrig:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Parkinson's:__entity__ ",
|
177 |
+
"Parkinson:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Lou Gehrig's:__entity__",
|
178 |
+
"lou:__entity__ gehrig:__entity__ was diagnosed with Parkinson's:__entity__ ",
|
179 |
+
"A eGFR:__entity__ below 60 indicates chronic kidney disease",
|
180 |
+
"Overexpression of EGFR:__entity__ occurs across a wide range of different cancers",
|
181 |
+
"Stanford:__entity__ called",
|
182 |
+
"He was diagnosed with non:__entity__ small:__entity__ cell:__entity__ lung:__entity__ cancer:__entity__",
|
183 |
+
"I met my girl:__entity__ friends at the pub ",
|
184 |
+
"I met my New:__entity__ York:__entity__ friends at the pub",
|
185 |
+
"I met my XCorp:__entity__ friends at the pub",
|
186 |
+
"I met my two:__entity__ friends at the pub",
|
187 |
+
"Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD:__entity__ brand as well as a portfolio of assays for prostate cancer diagnosis ",
|
188 |
+
"There are no treatment options specifically indicated for ACD:__entity__ and physicians must utilize agents approved for other dermatology conditions",
|
189 |
+
"As ACD:__entity__ has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
|
190 |
+
"Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD:__entity__ provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
|
191 |
+
"Patients treated with anticancer chemotherapy drugs ( ACD:__entity__ ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
|
192 |
+
"In the LASOR:__entity__ trial:__entity__ , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
|
193 |
+
"The sky turned dark:__entity__ in advance of the storm that was coming from the east ",
|
194 |
+
"She loves to watch Sunday afternoon football:__entity__ with her family ",
|
195 |
+
"Paul:__entity__ Erdos:__entity__ died at 83:__entity__ "
|
196 |
]
|
197 |
|
198 |
def init_selectbox():
|
199 |
return st.selectbox(
|
200 |
'Choose any of the sentences in pull-down below',
|
201 |
sent_arr,key='my_choice')
|
202 |
+
|
203 |
+
|
204 |
def on_text_change():
|
205 |
text = st.session_state.my_text
|
206 |
print("in callback: " + text)
|
|
|
210 |
try:
|
211 |
|
212 |
init_session_states()
|
213 |
+
|
214 |
st.markdown("<h3 style='text-align: center;'>Biomedical and PHI NER ensemble</h3>", unsafe_allow_html=True)
|
215 |
+
st.markdown("<h4 style='text-align: center;'>Using pretrained models with <a href='https://ajitrajasekharan.github.io/2021/01/02/my-first-post.html'>no fine tuning</a></h4>", unsafe_allow_html=True)
|
216 |
#st.markdown("""
|
217 |
#<h3 style="font-size:16px; color: #ff0000; text-align: center"><b>App under construction... (not in working condition yet)</b></h3>
|
218 |
#""", unsafe_allow_html=True)
|
219 |
+
|
220 |
+
|
221 |
st.markdown("""
|
222 |
<p style="text-align:center;"><img src="https://ajitrajasekharan.github.io/images/1.png" width="700"></p>
|
223 |
<br/>
|
224 |
<br/>
|
225 |
""", unsafe_allow_html=True)
|
226 |
+
|
227 |
st.write("This app uses 3 models. Two Pretrained Bert models (**no fine tuning**) and a POS tagger")
|
228 |
+
|
229 |
+
|
230 |
with st.form('my_form'):
|
231 |
selected_sentence = init_selectbox()
|
232 |
text_input = st.text_area(label='Type any sentence below',value="")
|
|
|
243 |
with display_area.container():
|
244 |
st.text(f"prediction took {time.time() - start:.2f}s")
|
245 |
st.json(results)
|
246 |
+
|
247 |
+
|
248 |
+
|
249 |
+
|
250 |
|
251 |
#input_text = st.text_area(
|
252 |
# label="Type any sentence",
|
253 |
# on_change=on_text_change,key='my_text'
|
254 |
# )
|
255 |
+
|
256 |
st.markdown("""
|
257 |
<small style="font-size:16px; color: #7f7f7f; text-align: left"><br/><br/>Models used: <br/>(1) <a href='https://huggingface.co/ajitrajasekharan/biomedical' target='_blank'>Biomedical model</a> pretrained on Pubmed,Clinical trials and BookCorpus subset.<br/>(2) Bert-base-cased (for PHI entities - Person/location/organization etc.)<br/>(3) Flair POS tagger</small>
|
258 |
#""", unsafe_allow_html=True)
|
|
|
264 |
""", unsafe_allow_html=True)
|
265 |
|
266 |
except Exception as e:
|
267 |
+
print("Some error occurred in main")
|
268 |
+
st.exception(e)
|
269 |
+
|
270 |
if __name__ == "__main__":
|
271 |
main()
|