Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -68,7 +68,7 @@ class Journal:
|
|
68 |
def __repr__(self):
|
69 |
return f"Journal(name='{self.name}', bytes='{self.bytes}')"
|
70 |
|
71 |
-
llm = ChatOpenAI(temperature=0, model="gpt-
|
72 |
|
73 |
textex_chain = create_extraction_chain(textex_schema, llm)
|
74 |
tablex_chain = create_extraction_chain(tablex_schema, llm)
|
@@ -104,7 +104,7 @@ if uploaded_files:
|
|
104 |
if on_h:
|
105 |
chunk_size_h = st.selectbox(
|
106 |
'Tokens amounts per process :',
|
107 |
-
(
|
108 |
)
|
109 |
parseButtonH = st.button("Get Result", key='table_H')
|
110 |
|
@@ -116,7 +116,7 @@ if uploaded_files:
|
|
116 |
if on_v:
|
117 |
chunk_size_v = st.selectbox(
|
118 |
'Tokens amounts per process :',
|
119 |
-
(
|
120 |
)
|
121 |
parseButtonV = st.button("Get Result", key='table_V')
|
122 |
with col3:
|
@@ -127,7 +127,7 @@ if uploaded_files:
|
|
127 |
if on_t:
|
128 |
chunk_size_t = st.selectbox(
|
129 |
'Tokens amounts per process :',
|
130 |
-
(
|
131 |
)
|
132 |
parseButtonT = st.button("Get Result", key="no_Table")
|
133 |
|
@@ -161,7 +161,10 @@ if uploaded_files:
|
|
161 |
try:
|
162 |
df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
|
163 |
except:
|
164 |
-
|
|
|
|
|
|
|
165 |
# df = pd.DataFrame(repair_json(tablex_chain.run(inp)[0]))
|
166 |
chunkdf.append(df)
|
167 |
|
@@ -203,7 +206,7 @@ if uploaded_files:
|
|
203 |
embeddings = OpenAIEmbeddings()
|
204 |
|
205 |
db = Chroma.from_documents(docs, embeddings)
|
206 |
-
llm_table = ChatOpenAI(model_name="gpt-
|
207 |
qa_chain = RetrievalQA.from_chain_type(llm_table, retriever=db.as_retriever())
|
208 |
|
209 |
# List of questions
|
@@ -232,6 +235,7 @@ if uploaded_files:
|
|
232 |
if output_list[0]['result'].split('\n')[i] != "":
|
233 |
try:
|
234 |
row = literal_eval(repair_json(output_list[0]['result'].split('\n')[i]))[0]
|
|
|
235 |
row = {**row, **{
|
236 |
'Title' : concat['title'][0],
|
237 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
@@ -397,26 +401,33 @@ if uploaded_files:
|
|
397 |
'Recommendation' : summary,
|
398 |
}
|
399 |
}
|
400 |
-
if
|
|
|
|
|
|
|
|
|
|
|
401 |
row.update({
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
|
|
|
|
420 |
else:
|
421 |
L.append(row)
|
422 |
except SyntaxError:
|
@@ -616,7 +627,17 @@ if uploaded_files:
|
|
616 |
chunkdf = []
|
617 |
for i, chunk in enumerate(text_chunk):
|
618 |
inp = chunk
|
619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
chunkdf.append(df)
|
621 |
|
622 |
concat = pd.concat(chunkdf, axis=0).reset_index().drop('index', axis=1).fillna('')
|
@@ -687,11 +708,10 @@ if uploaded_files:
|
|
687 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
688 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
689 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
690 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
691 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
692 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
693 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
694 |
-
'Recommendation' : summary,
|
695 |
}}
|
696 |
if len(row['Genes'].strip().split(',')) > 1:
|
697 |
for g in row['Genes'].strip().split(','):
|
@@ -705,8 +725,8 @@ if uploaded_files:
|
|
705 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
706 |
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
707 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
708 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
709 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
710 |
'Recommendation' : summary,
|
711 |
})
|
712 |
else:
|
@@ -718,10 +738,10 @@ if uploaded_files:
|
|
718 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
719 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
720 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
721 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
722 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
723 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
724 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
725 |
'Recommendation' : summary,
|
726 |
}}
|
727 |
if len(row['Genes'].strip().split(',')) > 1:
|
@@ -734,10 +754,10 @@ if uploaded_files:
|
|
734 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
735 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
736 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
737 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
738 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
739 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
740 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
741 |
'Recommendation' : summary,
|
742 |
})
|
743 |
else:
|
@@ -750,10 +770,10 @@ if uploaded_files:
|
|
750 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
751 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
752 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
753 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
754 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
755 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
756 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
757 |
'Recommendation' : summary,
|
758 |
}
|
759 |
}
|
@@ -770,10 +790,10 @@ if uploaded_files:
|
|
770 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
771 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
772 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
773 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
774 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
775 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
776 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
777 |
'Recommendation' : summary,
|
778 |
}
|
779 |
}
|
@@ -793,10 +813,10 @@ if uploaded_files:
|
|
793 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
794 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
795 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
796 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
797 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
798 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
799 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
800 |
'Recommendation' : summary,
|
801 |
}}
|
802 |
if row['SNPs'] != "Not available":
|
@@ -813,10 +833,10 @@ if uploaded_files:
|
|
813 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
814 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
815 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
816 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
817 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
818 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
819 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
820 |
'Recommendation' : summary,
|
821 |
})
|
822 |
else:
|
@@ -828,10 +848,10 @@ if uploaded_files:
|
|
828 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
829 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
830 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
831 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
832 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
833 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
834 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
835 |
'Recommendation' : summary,
|
836 |
}}
|
837 |
if row['SNPs'] != "Not available":
|
@@ -848,10 +868,10 @@ if uploaded_files:
|
|
848 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
849 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
850 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
851 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
852 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
853 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
854 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
855 |
'Recommendation' : summary,
|
856 |
})
|
857 |
else:
|
@@ -864,10 +884,10 @@ if uploaded_files:
|
|
864 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
865 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
866 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
867 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
868 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
869 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
870 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
871 |
'Recommendation' : summary,
|
872 |
}
|
873 |
}
|
@@ -884,10 +904,10 @@ if uploaded_files:
|
|
884 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
885 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
886 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
887 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
888 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
889 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
890 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
891 |
'Recommendation' : summary,
|
892 |
}
|
893 |
}
|
@@ -907,10 +927,10 @@ if uploaded_files:
|
|
907 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
908 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
909 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
910 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
911 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
912 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
913 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
914 |
'Recommendation' : summary,
|
915 |
}
|
916 |
}
|
@@ -927,10 +947,10 @@ if uploaded_files:
|
|
927 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
928 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
929 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
930 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
931 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
932 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
933 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
934 |
'Recommendation' : summary,
|
935 |
}
|
936 |
}
|
@@ -948,10 +968,10 @@ if uploaded_files:
|
|
948 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
949 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
950 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
951 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
952 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
953 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
954 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
955 |
'Recommendation' : summary,
|
956 |
}
|
957 |
}
|
@@ -968,10 +988,10 @@ if uploaded_files:
|
|
968 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
969 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
970 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
971 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
972 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
973 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
974 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
975 |
'Recommendation' : summary,
|
976 |
}
|
977 |
}
|
@@ -1039,7 +1059,10 @@ if uploaded_files:
|
|
1039 |
time.sleep(0.1)
|
1040 |
st.write("☑ Generating Summary ...")
|
1041 |
|
1042 |
-
|
|
|
|
|
|
|
1043 |
for col in list(concat.columns):
|
1044 |
concat[col] = concat[col].apply(lambda x: x if x not in ['N/A', 'not mentioned', 'Not mentioned', 'Unknown'] else '')
|
1045 |
|
@@ -1096,7 +1119,7 @@ if uploaded_files:
|
|
1096 |
'Recommendation' : summary,
|
1097 |
})
|
1098 |
|
1099 |
-
csv = pd.concat([csv, pd.DataFrame(L)], ignore_index=True)
|
1100 |
status.update(label="Gene and SNPs succesfully collected.")
|
1101 |
st.dataframe(csv)
|
1102 |
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|
|
|
68 |
def __repr__(self):
|
69 |
return f"Journal(name='{self.name}', bytes='{self.bytes}')"
|
70 |
|
71 |
+
llm = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
|
72 |
|
73 |
textex_chain = create_extraction_chain(textex_schema, llm)
|
74 |
tablex_chain = create_extraction_chain(tablex_schema, llm)
|
|
|
104 |
if on_h:
|
105 |
chunk_size_h = st.selectbox(
|
106 |
'Tokens amounts per process :',
|
107 |
+
(120000, 96000, 64000, 32000), key='table_h'
|
108 |
)
|
109 |
parseButtonH = st.button("Get Result", key='table_H')
|
110 |
|
|
|
116 |
if on_v:
|
117 |
chunk_size_v = st.selectbox(
|
118 |
'Tokens amounts per process :',
|
119 |
+
(120000, 96000, 64000, 32000), key='table_v'
|
120 |
)
|
121 |
parseButtonV = st.button("Get Result", key='table_V')
|
122 |
with col3:
|
|
|
127 |
if on_t:
|
128 |
chunk_size_t = st.selectbox(
|
129 |
'Tokens amounts per process :',
|
130 |
+
(120000, 96000, 64000, 32000), key='no_table'
|
131 |
)
|
132 |
parseButtonT = st.button("Get Result", key="no_Table")
|
133 |
|
|
|
161 |
try:
|
162 |
df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
|
163 |
except:
|
164 |
+
try:
|
165 |
+
df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
|
166 |
+
except SyntaxError:
|
167 |
+
df = pd.DataFrame(literal_eval('[' + str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
|
168 |
# df = pd.DataFrame(repair_json(tablex_chain.run(inp)[0]))
|
169 |
chunkdf.append(df)
|
170 |
|
|
|
206 |
embeddings = OpenAIEmbeddings()
|
207 |
|
208 |
db = Chroma.from_documents(docs, embeddings)
|
209 |
+
llm_table = ChatOpenAI(model_name="gpt-4-0125-preview", temperature=0)
|
210 |
qa_chain = RetrievalQA.from_chain_type(llm_table, retriever=db.as_retriever())
|
211 |
|
212 |
# List of questions
|
|
|
235 |
if output_list[0]['result'].split('\n')[i] != "":
|
236 |
try:
|
237 |
row = literal_eval(repair_json(output_list[0]['result'].split('\n')[i]))[0]
|
238 |
+
st.write(row)
|
239 |
row = {**row, **{
|
240 |
'Title' : concat['title'][0],
|
241 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
|
|
401 |
'Recommendation' : summary,
|
402 |
}
|
403 |
}
|
404 |
+
if 'SNPs' in list(row.keys()):
|
405 |
+
if row['SNPs'] != "Not available":
|
406 |
+
row.update({
|
407 |
+
'SNPs' : "Not available"
|
408 |
+
})
|
409 |
+
else:
|
410 |
row.update({
|
411 |
+
'SNPs' : "Not available"
|
412 |
+
})
|
413 |
+
|
414 |
+
if 'Genes' in list(row.keys()):
|
415 |
+
if len(row['Genes'].strip().split(',')) > 1:
|
416 |
+
for g in row['Genes'].strip().split(','):
|
417 |
+
L.append({
|
418 |
+
'Title' : concat['title'][0],
|
419 |
+
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
420 |
+
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
421 |
+
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
422 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
423 |
+
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
424 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
425 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
426 |
+
'Recommendation' : summary,
|
427 |
+
'Genes' : g.strip().upper().replace('Unknown', ''),
|
428 |
+
"SNPs" : "Not available",
|
429 |
+
"Diseases" : ''.join(list(row['Diseases'].title() if row['Diseases'] not in ['T2D', 'T2DM', 'NAFLD', 'CVD'] else row['Diseases'])).replace('Unknown', '').replace('Unknown', '')
|
430 |
+
})
|
431 |
else:
|
432 |
L.append(row)
|
433 |
except SyntaxError:
|
|
|
627 |
chunkdf = []
|
628 |
for i, chunk in enumerate(text_chunk):
|
629 |
inp = chunk
|
630 |
+
# Assuming tablex_chain.run(inp)[0] returns a dictionary
|
631 |
+
original_dict = tablex_chain.run(inp)[0]
|
632 |
+
# Convert the dictionary to a JSON string
|
633 |
+
json_str = json.dumps(original_dict)
|
634 |
+
# Replace single quotes with double quotes in the JSON string
|
635 |
+
json_str_fixed = json_str.replace("'", '"')
|
636 |
+
# Use literal_eval to safely evaluate the JSON string as a Python dictionary
|
637 |
+
fixed_dict = literal_eval(json_str_fixed)
|
638 |
+
# Create a DataFrame from the fixed dictionary
|
639 |
+
df = pd.DataFrame(fixed_dict, index=[0]).fillna('')
|
640 |
+
# df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
|
641 |
chunkdf.append(df)
|
642 |
|
643 |
concat = pd.concat(chunkdf, axis=0).reset_index().drop('index', axis=1).fillna('')
|
|
|
708 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
709 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
710 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
711 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
712 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
713 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
714 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
|
|
715 |
}}
|
716 |
if len(row['Genes'].strip().split(',')) > 1:
|
717 |
for g in row['Genes'].strip().split(','):
|
|
|
725 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
726 |
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
727 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
728 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
729 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
730 |
'Recommendation' : summary,
|
731 |
})
|
732 |
else:
|
|
|
738 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
739 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
740 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
741 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
742 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
743 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
744 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
745 |
'Recommendation' : summary,
|
746 |
}}
|
747 |
if len(row['Genes'].strip().split(',')) > 1:
|
|
|
754 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
755 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
756 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
757 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
758 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
759 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
760 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
761 |
'Recommendation' : summary,
|
762 |
})
|
763 |
else:
|
|
|
770 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
771 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
772 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
773 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
774 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
775 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
776 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
777 |
'Recommendation' : summary,
|
778 |
}
|
779 |
}
|
|
|
790 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
791 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
792 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
793 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
794 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
795 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
796 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
797 |
'Recommendation' : summary,
|
798 |
}
|
799 |
}
|
|
|
813 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
814 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
815 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
816 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
817 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
818 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
819 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
820 |
'Recommendation' : summary,
|
821 |
}}
|
822 |
if row['SNPs'] != "Not available":
|
|
|
833 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
834 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
835 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
836 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
837 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
838 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
839 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
840 |
'Recommendation' : summary,
|
841 |
})
|
842 |
else:
|
|
|
848 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
849 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
850 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
851 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
852 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
853 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
854 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
855 |
'Recommendation' : summary,
|
856 |
}}
|
857 |
if row['SNPs'] != "Not available":
|
|
|
868 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
869 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
870 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
871 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
872 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
873 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
874 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
875 |
'Recommendation' : summary,
|
876 |
})
|
877 |
else:
|
|
|
884 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
885 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
886 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
887 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
888 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
889 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
890 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
891 |
'Recommendation' : summary,
|
892 |
}
|
893 |
}
|
|
|
904 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
905 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
906 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
907 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
908 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
909 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
910 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
911 |
'Recommendation' : summary,
|
912 |
}
|
913 |
}
|
|
|
927 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
928 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
929 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
930 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
931 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
932 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
933 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
934 |
'Recommendation' : summary,
|
935 |
}
|
936 |
}
|
|
|
947 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
948 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
949 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
950 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
951 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
952 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
953 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
954 |
'Recommendation' : summary,
|
955 |
}
|
956 |
}
|
|
|
968 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
969 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
970 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
971 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
972 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
973 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
974 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
975 |
'Recommendation' : summary,
|
976 |
}
|
977 |
}
|
|
|
988 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
989 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
990 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
991 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
992 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
993 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
994 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
995 |
'Recommendation' : summary,
|
996 |
}
|
997 |
}
|
|
|
1059 |
time.sleep(0.1)
|
1060 |
st.write("☑ Generating Summary ...")
|
1061 |
|
1062 |
+
if 'SNPs' in list(concat.columns):
|
1063 |
+
concat['SNPs'] = concat['SNPs'].apply(lambda x: x if x.startswith('rs') else '')
|
1064 |
+
else:
|
1065 |
+
concat['SNPs'] = ''
|
1066 |
for col in list(concat.columns):
|
1067 |
concat[col] = concat[col].apply(lambda x: x if x not in ['N/A', 'not mentioned', 'Not mentioned', 'Unknown'] else '')
|
1068 |
|
|
|
1119 |
'Recommendation' : summary,
|
1120 |
})
|
1121 |
|
1122 |
+
csv = pd.concat([csv, pd.DataFrame(L)], ignore_index=True).drop_duplicates(subset='Genes')
|
1123 |
status.update(label="Gene and SNPs succesfully collected.")
|
1124 |
st.dataframe(csv)
|
1125 |
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|