Spaces:
Running
Running
wzkariampuzha
commited on
Commit
•
b94c6e2
1
Parent(s):
ec290ed
Update extract_abs.py
Browse files- extract_abs.py +6 -3
extract_abs.py
CHANGED
@@ -301,12 +301,13 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
301 |
st.write("SEARCHING FOR: "+ str(search_term_list))
|
302 |
|
303 |
#Gather title+abstracts into a dictionary {pmid:abstract}
|
304 |
-
pmid_abs = classify_abs.streamlit_getAbs(search_term_list, maxResults, filtering)
|
305 |
if len(pmid_abs)==0:
|
306 |
st.error('No results were gathered. Enter a new search term.')
|
307 |
else:
|
308 |
st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
|
309 |
-
|
|
|
310 |
i = 0
|
311 |
my_bar = st.progress(i)
|
312 |
percent_at_step = 100/len(pmid_abs)
|
@@ -321,11 +322,13 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
321 |
extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
|
322 |
#Slow dataframe update
|
323 |
results = results.append(extraction, ignore_index=True)
|
|
|
324 |
i+=1
|
325 |
my_bar.progress(round(i*percent_at_step/100,1))
|
326 |
|
|
|
327 |
st.write(len(results),'abstracts classified as epidemiological.')
|
328 |
-
return results.sort_values('EPI_PROB', ascending=False)
|
329 |
|
330 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
331 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|
|
|
301 |
st.write("SEARCHING FOR: "+ str(search_term_list))
|
302 |
|
303 |
#Gather title+abstracts into a dictionary {pmid:abstract}
|
304 |
+
pmid_abs, sankey_initial = classify_abs.streamlit_getAbs(search_term_list, maxResults, filtering)
|
305 |
if len(pmid_abs)==0:
|
306 |
st.error('No results were gathered. Enter a new search term.')
|
307 |
else:
|
308 |
st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
|
309 |
+
gathered, relevant = sankey_initial
|
310 |
+
epidemiologic = 0
|
311 |
i = 0
|
312 |
my_bar = st.progress(i)
|
313 |
percent_at_step = 100/len(pmid_abs)
|
|
|
322 |
extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
|
323 |
#Slow dataframe update
|
324 |
results = results.append(extraction, ignore_index=True)
|
325 |
+
epidemiologic+=1
|
326 |
i+=1
|
327 |
my_bar.progress(round(i*percent_at_step/100,1))
|
328 |
|
329 |
+
sankey_data = (gathered, relevant,epidemiologic)
|
330 |
st.write(len(results),'abstracts classified as epidemiological.')
|
331 |
+
return results.sort_values('EPI_PROB', ascending=False), sankey_data
|
332 |
|
333 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
334 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|