firqaaa commited on
Commit
bf2428b
1 Parent(s): 7181652

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -76
app.py CHANGED
@@ -68,7 +68,7 @@ class Journal:
68
  def __repr__(self):
69
  return f"Journal(name='{self.name}', bytes='{self.bytes}')"
70
 
71
- llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106")
72
 
73
  textex_chain = create_extraction_chain(textex_schema, llm)
74
  tablex_chain = create_extraction_chain(tablex_schema, llm)
@@ -104,7 +104,7 @@ if uploaded_files:
104
  if on_h:
105
  chunk_size_h = st.selectbox(
106
  'Tokens amounts per process :',
107
- (15000, 12000, 10000, 8000, 5000), key='table_h'
108
  )
109
  parseButtonH = st.button("Get Result", key='table_H')
110
 
@@ -116,7 +116,7 @@ if uploaded_files:
116
  if on_v:
117
  chunk_size_v = st.selectbox(
118
  'Tokens amounts per process :',
119
- (15000, 12000, 10000, 8000, 5000), key='table_v'
120
  )
121
  parseButtonV = st.button("Get Result", key='table_V')
122
  with col3:
@@ -127,7 +127,7 @@ if uploaded_files:
127
  if on_t:
128
  chunk_size_t = st.selectbox(
129
  'Tokens amounts per process :',
130
- (15000, 12000, 10000, 8000, 5000), key='no_table'
131
  )
132
  parseButtonT = st.button("Get Result", key="no_Table")
133
 
@@ -161,7 +161,10 @@ if uploaded_files:
161
  try:
162
  df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
163
  except:
164
- df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
 
 
 
165
  # df = pd.DataFrame(repair_json(tablex_chain.run(inp)[0]))
166
  chunkdf.append(df)
167
 
@@ -203,7 +206,7 @@ if uploaded_files:
203
  embeddings = OpenAIEmbeddings()
204
 
205
  db = Chroma.from_documents(docs, embeddings)
206
- llm_table = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0)
207
  qa_chain = RetrievalQA.from_chain_type(llm_table, retriever=db.as_retriever())
208
 
209
  # List of questions
@@ -232,6 +235,7 @@ if uploaded_files:
232
  if output_list[0]['result'].split('\n')[i] != "":
233
  try:
234
  row = literal_eval(repair_json(output_list[0]['result'].split('\n')[i]))[0]
 
235
  row = {**row, **{
236
  'Title' : concat['title'][0],
237
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
@@ -397,26 +401,33 @@ if uploaded_files:
397
  'Recommendation' : summary,
398
  }
399
  }
400
- if row['SNPs'] != "Not available":
 
 
 
 
 
401
  row.update({
402
- 'SNPs' : "Not available"
403
- })
404
- if len(row['Genes'].strip().split(',')) > 1:
405
- for g in row['Genes'].strip().split(','):
406
- L.append({
407
- 'Title' : concat['title'][0],
408
- 'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
409
- 'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
410
- 'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
411
- 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
412
- 'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
413
- 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
414
- 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
415
- 'Recommendation' : summary,
416
- 'Genes' : g.strip().upper().replace('Unknown', ''),
417
- "SNPs" : "Not available",
418
- "Diseases" : ''.join(list(row['Diseases'].title() if row['Diseases'] not in ['T2D', 'T2DM', 'NAFLD', 'CVD'] else row['Diseases'])).replace('Unknown', '').replace('Unknown', '')
419
- })
 
 
420
  else:
421
  L.append(row)
422
  except SyntaxError:
@@ -616,7 +627,17 @@ if uploaded_files:
616
  chunkdf = []
617
  for i, chunk in enumerate(text_chunk):
618
  inp = chunk
619
- df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
 
 
 
 
 
 
 
 
 
 
620
  chunkdf.append(df)
621
 
622
  concat = pd.concat(chunkdf, axis=0).reset_index().drop('index', axis=1).fillna('')
@@ -687,11 +708,10 @@ if uploaded_files:
687
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
688
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
689
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
690
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
691
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
692
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
693
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
694
- 'Recommendation' : summary,
695
  }}
696
  if len(row['Genes'].strip().split(',')) > 1:
697
  for g in row['Genes'].strip().split(','):
@@ -705,8 +725,8 @@ if uploaded_files:
705
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
706
  'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
707
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
708
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
709
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
710
  'Recommendation' : summary,
711
  })
712
  else:
@@ -718,10 +738,10 @@ if uploaded_files:
718
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
719
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
720
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
721
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
722
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
723
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
724
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
725
  'Recommendation' : summary,
726
  }}
727
  if len(row['Genes'].strip().split(',')) > 1:
@@ -734,10 +754,10 @@ if uploaded_files:
734
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
735
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
736
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
737
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
738
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
739
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
740
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
741
  'Recommendation' : summary,
742
  })
743
  else:
@@ -750,10 +770,10 @@ if uploaded_files:
750
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
751
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
752
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
753
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
754
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
755
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
756
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
757
  'Recommendation' : summary,
758
  }
759
  }
@@ -770,10 +790,10 @@ if uploaded_files:
770
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
771
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
772
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
773
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
774
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
775
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
776
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
777
  'Recommendation' : summary,
778
  }
779
  }
@@ -793,10 +813,10 @@ if uploaded_files:
793
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
794
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
795
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
796
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
797
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
798
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
799
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
800
  'Recommendation' : summary,
801
  }}
802
  if row['SNPs'] != "Not available":
@@ -813,10 +833,10 @@ if uploaded_files:
813
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
814
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
815
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
816
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
817
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
818
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
819
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
820
  'Recommendation' : summary,
821
  })
822
  else:
@@ -828,10 +848,10 @@ if uploaded_files:
828
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
829
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
830
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
831
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
832
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
833
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
834
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
835
  'Recommendation' : summary,
836
  }}
837
  if row['SNPs'] != "Not available":
@@ -848,10 +868,10 @@ if uploaded_files:
848
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
849
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
850
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
851
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
852
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
853
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
854
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
855
  'Recommendation' : summary,
856
  })
857
  else:
@@ -864,10 +884,10 @@ if uploaded_files:
864
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
865
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
866
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
867
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
868
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
869
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
870
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
871
  'Recommendation' : summary,
872
  }
873
  }
@@ -884,10 +904,10 @@ if uploaded_files:
884
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
885
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
886
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
887
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
888
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
889
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
890
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
891
  'Recommendation' : summary,
892
  }
893
  }
@@ -907,10 +927,10 @@ if uploaded_files:
907
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
908
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
909
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
910
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
911
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
912
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
913
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
914
  'Recommendation' : summary,
915
  }
916
  }
@@ -927,10 +947,10 @@ if uploaded_files:
927
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
928
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
929
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
930
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
931
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
932
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
933
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
934
  'Recommendation' : summary,
935
  }
936
  }
@@ -948,10 +968,10 @@ if uploaded_files:
948
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
949
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
950
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
951
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
952
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
953
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
954
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
955
  'Recommendation' : summary,
956
  }
957
  }
@@ -968,10 +988,10 @@ if uploaded_files:
968
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
969
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
970
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
971
- 'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
972
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
973
- 'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
974
- 'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
975
  'Recommendation' : summary,
976
  }
977
  }
@@ -1039,7 +1059,10 @@ if uploaded_files:
1039
  time.sleep(0.1)
1040
  st.write("☑ Generating Summary ...")
1041
 
1042
- concat['SNPs'] = concat['SNPs'].apply(lambda x: x if x.startswith('rs') else '')
 
 
 
1043
  for col in list(concat.columns):
1044
  concat[col] = concat[col].apply(lambda x: x if x not in ['N/A', 'not mentioned', 'Not mentioned', 'Unknown'] else '')
1045
 
@@ -1096,7 +1119,7 @@ if uploaded_files:
1096
  'Recommendation' : summary,
1097
  })
1098
 
1099
- csv = pd.concat([csv, pd.DataFrame(L)], ignore_index=True)
1100
  status.update(label="Gene and SNPs succesfully collected.")
1101
  st.dataframe(csv)
1102
  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
 
68
  def __repr__(self):
69
  return f"Journal(name='{self.name}', bytes='{self.bytes}')"
70
 
71
+ llm = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
72
 
73
  textex_chain = create_extraction_chain(textex_schema, llm)
74
  tablex_chain = create_extraction_chain(tablex_schema, llm)
 
104
  if on_h:
105
  chunk_size_h = st.selectbox(
106
  'Tokens amounts per process :',
107
+ (120000, 96000, 64000, 32000), key='table_h'
108
  )
109
  parseButtonH = st.button("Get Result", key='table_H')
110
 
 
116
  if on_v:
117
  chunk_size_v = st.selectbox(
118
  'Tokens amounts per process :',
119
+ (120000, 96000, 64000, 32000), key='table_v'
120
  )
121
  parseButtonV = st.button("Get Result", key='table_V')
122
  with col3:
 
127
  if on_t:
128
  chunk_size_t = st.selectbox(
129
  'Tokens amounts per process :',
130
+ (120000, 96000, 64000, 32000), key='no_table'
131
  )
132
  parseButtonT = st.button("Get Result", key="no_Table")
133
 
 
161
  try:
162
  df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
163
  except:
164
+ try:
165
+ df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
166
+ except SyntaxError:
167
+ df = pd.DataFrame(literal_eval('[' + str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
168
  # df = pd.DataFrame(repair_json(tablex_chain.run(inp)[0]))
169
  chunkdf.append(df)
170
 
 
206
  embeddings = OpenAIEmbeddings()
207
 
208
  db = Chroma.from_documents(docs, embeddings)
209
+ llm_table = ChatOpenAI(model_name="gpt-4-0125-preview", temperature=0)
210
  qa_chain = RetrievalQA.from_chain_type(llm_table, retriever=db.as_retriever())
211
 
212
  # List of questions
 
235
  if output_list[0]['result'].split('\n')[i] != "":
236
  try:
237
  row = literal_eval(repair_json(output_list[0]['result'].split('\n')[i]))[0]
238
+ st.write(row)
239
  row = {**row, **{
240
  'Title' : concat['title'][0],
241
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
 
401
  'Recommendation' : summary,
402
  }
403
  }
404
+ if 'SNPs' in list(row.keys()):
405
+ if row['SNPs'] != "Not available":
406
+ row.update({
407
+ 'SNPs' : "Not available"
408
+ })
409
+ else:
410
  row.update({
411
+ 'SNPs' : "Not available"
412
+ })
413
+
414
+ if 'Genes' in list(row.keys()):
415
+ if len(row['Genes'].strip().split(',')) > 1:
416
+ for g in row['Genes'].strip().split(','):
417
+ L.append({
418
+ 'Title' : concat['title'][0],
419
+ 'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
420
+ 'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
421
+ 'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
422
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
423
+ 'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
424
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
425
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
426
+ 'Recommendation' : summary,
427
+ 'Genes' : g.strip().upper().replace('Unknown', ''),
428
+ "SNPs" : "Not available",
429
+ "Diseases" : ''.join(list(row['Diseases'].title() if row['Diseases'] not in ['T2D', 'T2DM', 'NAFLD', 'CVD'] else row['Diseases'])).replace('Unknown', '').replace('Unknown', '')
430
+ })
431
  else:
432
  L.append(row)
433
  except SyntaxError:
 
627
  chunkdf = []
628
  for i, chunk in enumerate(text_chunk):
629
  inp = chunk
630
+ # Assuming tablex_chain.run(inp)[0] returns a dictionary
631
+ original_dict = tablex_chain.run(inp)[0]
632
+ # Convert the dictionary to a JSON string
633
+ json_str = json.dumps(original_dict)
634
+ # Replace single quotes with double quotes in the JSON string
635
+ json_str_fixed = json_str.replace("'", '"')
636
+ # Use literal_eval to safely evaluate the JSON string as a Python dictionary
637
+ fixed_dict = literal_eval(json_str_fixed)
638
+ # Create a DataFrame from the fixed dictionary
639
+ df = pd.DataFrame(fixed_dict, index=[0]).fillna('')
640
+ # df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
641
  chunkdf.append(df)
642
 
643
  concat = pd.concat(chunkdf, axis=0).reset_index().drop('index', axis=1).fillna('')
 
708
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
709
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
710
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
711
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
712
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
713
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
714
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
 
715
  }}
716
  if len(row['Genes'].strip().split(',')) > 1:
717
  for g in row['Genes'].strip().split(','):
 
725
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
726
  'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
727
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
728
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
729
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
730
  'Recommendation' : summary,
731
  })
732
  else:
 
738
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
739
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
740
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
741
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
742
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
743
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
744
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
745
  'Recommendation' : summary,
746
  }}
747
  if len(row['Genes'].strip().split(',')) > 1:
 
754
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
755
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
756
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
757
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
758
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
759
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
760
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
761
  'Recommendation' : summary,
762
  })
763
  else:
 
770
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
771
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
772
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
773
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
774
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
775
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
776
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
777
  'Recommendation' : summary,
778
  }
779
  }
 
790
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
791
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
792
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
793
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
794
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
795
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
796
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
797
  'Recommendation' : summary,
798
  }
799
  }
 
813
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
814
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
815
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
816
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
817
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
818
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
819
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
820
  'Recommendation' : summary,
821
  }}
822
  if row['SNPs'] != "Not available":
 
833
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
834
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
835
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
836
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
837
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
838
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
839
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
840
  'Recommendation' : summary,
841
  })
842
  else:
 
848
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
849
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
850
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
851
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
852
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
853
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
854
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
855
  'Recommendation' : summary,
856
  }}
857
  if row['SNPs'] != "Not available":
 
868
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
869
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
870
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
871
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
872
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
873
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
874
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
875
  'Recommendation' : summary,
876
  })
877
  else:
 
884
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
885
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
886
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
887
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
888
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
889
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
890
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
891
  'Recommendation' : summary,
892
  }
893
  }
 
904
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
905
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
906
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
907
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
908
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
909
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
910
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
911
  'Recommendation' : summary,
912
  }
913
  }
 
927
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
928
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
929
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
930
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
931
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
932
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
933
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
934
  'Recommendation' : summary,
935
  }
936
  }
 
947
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
948
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
949
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
950
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
951
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
952
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
953
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
954
  'Recommendation' : summary,
955
  }
956
  }
 
968
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
969
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
970
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
971
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
972
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
973
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
974
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
975
  'Recommendation' : summary,
976
  }
977
  }
 
988
  'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
989
  'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
990
  'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
991
+ 'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
992
  'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
993
+ 'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
994
+ 'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
995
  'Recommendation' : summary,
996
  }
997
  }
 
1059
  time.sleep(0.1)
1060
  st.write("☑ Generating Summary ...")
1061
 
1062
+ if 'SNPs' in list(concat.columns):
1063
+ concat['SNPs'] = concat['SNPs'].apply(lambda x: x if x.startswith('rs') else '')
1064
+ else:
1065
+ concat['SNPs'] = ''
1066
  for col in list(concat.columns):
1067
  concat[col] = concat[col].apply(lambda x: x if x not in ['N/A', 'not mentioned', 'Not mentioned', 'Unknown'] else '')
1068
 
 
1119
  'Recommendation' : summary,
1120
  })
1121
 
1122
+ csv = pd.concat([csv, pd.DataFrame(L)], ignore_index=True).drop_duplicates(subset='Genes')
1123
  status.update(label="Gene and SNPs succesfully collected.")
1124
  st.dataframe(csv)
1125
  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer: