Spaces:

ncats
/

EpiPipeline4RD

Running

App Files Files Community

wzkariampuzha commited on Mar 24, 2022

Commit

77a72db

•

1 Parent(s): 44803cb

Update extract_abs.py

Browse files

Files changed (1) hide show

extract_abs.py +24 -21

extract_abs.py CHANGED Viewed

@@ -302,27 +302,30 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
     #Gather title+abstracts into a dictionary {pmid:abstract}
     pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
-    st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
-    i = 0
-    my_bar = st.progress(i)
-    percent_at_step = 100/len(pmid_abs)
-    for pmid, abstract in pmid_abs.items():
-        epi_prob, isEpi = classify_abs.getTextPredictions(abstract, classify_model_vars)
-        if isEpi:
-            #Preprocessing Functions for Extraction
-            sentences = str2sents(abstract)
-            model_outputs = [NER_pipeline(sent) for sent in sentences]
-            extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
-            if extraction:
-                extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
-                #Slow dataframe update
-                results = results.append(extraction, ignore_index=True)
-        i+=1
-        my_bar.progress(round(i*percent_at_step/100,1))
-    st.write(len(results),'abstracts classified as epidemiological.')
-    return results.sort_values('EPI_PROB', ascending=False)
 #Identical to search_term_extraction, except it returns a JSON object instead of a df
 def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search

     #Gather title+abstracts into a dictionary {pmid:abstract}
     pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
+    if len(pmid_abs)==0:
+        st.error('No results were gathered. Enter a new search term.')
+    else:
+        st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
+        i = 0
+        my_bar = st.progress(i)
+        percent_at_step = 100/len(pmid_abs)
+        for pmid, abstract in pmid_abs.items():
+            epi_prob, isEpi = classify_abs.getTextPredictions(abstract, classify_model_vars)
+            if isEpi:
+                #Preprocessing Functions for Extraction
+                sentences = str2sents(abstract)
+                model_outputs = [NER_pipeline(sent) for sent in sentences]
+                extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
+                if extraction:
+                    extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
+                    #Slow dataframe update
+                    results = results.append(extraction, ignore_index=True)
+            i+=1
+            my_bar.progress(round(i*percent_at_step/100,1))
+        st.write(len(results),'abstracts classified as epidemiological.')
+        return results.sort_values('EPI_PROB', ascending=False)
 #Identical to search_term_extraction, except it returns a JSON object instead of a df
 def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search