Spaces:
Running
Running
wzkariampuzha
commited on
Commit
•
77a72db
1
Parent(s):
44803cb
Update extract_abs.py
Browse files- extract_abs.py +24 -21
extract_abs.py
CHANGED
@@ -302,27 +302,30 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
302 |
|
303 |
#Gather title+abstracts into a dictionary {pmid:abstract}
|
304 |
pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
326 |
|
327 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
328 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|
|
|
302 |
|
303 |
#Gather title+abstracts into a dictionary {pmid:abstract}
|
304 |
pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
|
305 |
+
if len(pmid_abs)==0:
|
306 |
+
st.error('No results were gathered. Enter a new search term.')
|
307 |
+
else:
|
308 |
+
st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
|
309 |
+
|
310 |
+
i = 0
|
311 |
+
my_bar = st.progress(i)
|
312 |
+
percent_at_step = 100/len(pmid_abs)
|
313 |
+
for pmid, abstract in pmid_abs.items():
|
314 |
+
epi_prob, isEpi = classify_abs.getTextPredictions(abstract, classify_model_vars)
|
315 |
+
if isEpi:
|
316 |
+
#Preprocessing Functions for Extraction
|
317 |
+
sentences = str2sents(abstract)
|
318 |
+
model_outputs = [NER_pipeline(sent) for sent in sentences]
|
319 |
+
extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
|
320 |
+
if extraction:
|
321 |
+
extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
|
322 |
+
#Slow dataframe update
|
323 |
+
results = results.append(extraction, ignore_index=True)
|
324 |
+
i+=1
|
325 |
+
my_bar.progress(round(i*percent_at_step/100,1))
|
326 |
+
|
327 |
+
st.write(len(results),'abstracts classified as epidemiological.')
|
328 |
+
return results.sort_values('EPI_PROB', ascending=False)
|
329 |
|
330 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
331 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|