libokj commited on
Commit
73e18be
·
1 Parent(s): c2d0602

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -55
app.py CHANGED
@@ -403,12 +403,18 @@ def validate_columns(df, mandatory_cols):
403
 
404
 
405
  def process_target_fasta(sequence):
406
- # lines = sequence.strip().split("\n")
407
- # if lines[0].startswith(">"):
408
- # lines = lines[1:]
409
- # return ''.join(lines).split(">")[0]
410
- record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
411
- return str(record.seq)
 
 
 
 
 
 
412
 
413
 
414
  def send_email(receiver, msg):
@@ -804,7 +810,8 @@ To predict interactions/binding affinities of a single target against a library
804
  HelpTip(
805
  "Enter (paste) a amino acid sequence below manually or upload a FASTA file."
806
  "If multiple entities are in the FASTA, only the first will be used."
807
- "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for the sequence."
 
808
  )
809
  with gr.Row():
810
  target_input_type = gr.Dropdown(
@@ -838,9 +845,9 @@ To predict interactions/binding affinities of a single target against a library
838
  # with gr.Row():
839
  # with gr.Column():
840
  example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
841
- # with gr.Column():
842
- # gr.File(label='Example FASTA file',
843
- # value='data/examples/MAPK14.fasta', interactive=False)
844
 
845
  with gr.Row():
846
  with gr.Column():
@@ -862,7 +869,7 @@ To predict interactions/binding affinities of a single target against a library
862
  with gr.Row():
863
  with gr.Column():
864
  HelpTip(
865
- "Select a preset compound library (e.g., DrugBank)."
866
  "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
867
  "or use an SDF file."
868
  )
@@ -882,15 +889,18 @@ To predict interactions/binding affinities of a single target against a library
882
  "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
883
  "while affinity prediction directly estimates their binding strength measured using IC50."
884
  )
885
- drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Step 4. Select a Prediction Task',
 
886
  value='Compound-protein interaction')
887
 
888
  with gr.Row():
889
  with gr.Column():
890
- HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
891
- "Please refer to documentation for detailed benchamrk results."
892
- )
893
- drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a Preset Model')
 
 
894
  screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
895
  with gr.Row():
896
  with gr.Column():
@@ -901,7 +911,7 @@ To predict interactions/binding affinities of a single target against a library
901
 
902
  with gr.Row(visible=True):
903
  with gr.Column():
904
- # drug_screen_clr_btn = gr.ClearButton(size='lg')
905
  drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
906
  # TODO Modify the pd df directly with df['X2'] = target
907
 
@@ -943,7 +953,7 @@ To predict interactions/binding affinities of a single compound against a librar
943
  with gr.Row():
944
  with gr.Column():
945
  HelpTip(
946
- "By default, models trained on all protein families (general) will be applied."
947
  "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
948
  )
949
  target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
@@ -973,22 +983,26 @@ To predict interactions/binding affinities of a single compound against a librar
973
  "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
974
  "while affinity prediction directly estimates their binding strength measured using IC50."
975
  )
976
- target_identify_task = gr.Dropdown(list(TASK_MAP.keys()), label='Step 4. Select a Prediction Task',
 
977
  value='Compound-protein interaction')
978
 
979
  with gr.Row():
980
  with gr.Column():
981
- HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
982
- "Please refer to documentation for detailed benchamrk results."
983
- )
984
- target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a Preset Model')
 
 
985
  identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
986
 
987
  with gr.Row():
988
  with gr.Column():
989
  target_identify_email = gr.Textbox(
990
  label='Step 6. Email (Optional)',
991
- info="If an email is provided, a notification email will be sent to you when your job is completed."
 
992
  )
993
 
994
  with gr.Row(visible=True):
@@ -1007,45 +1021,69 @@ To predict interactions/binding affinities of a single compound against a librar
1007
  ''')
1008
  with gr.Blocks() as infer_block:
1009
  with gr.Column() as infer_page:
1010
- infer_type = gr.Dropdown(choices=['Upload a compound library and a target library',
1011
- 'Upload a CSV interaction pair dataset'],
1012
- value='Upload a compound library and a target library')
 
1013
  with gr.Column() as pair_upload:
1014
- gr.File(label="Example custom dataset",
1015
- value="data/examples/interaction_pair_inference.csv",
1016
- interactive=False)
1017
- with gr.Column():
 
1018
  infer_data_for_predict = gr.File(
1019
- label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
1020
  with gr.Column() as pair_generate:
1021
  with gr.Row():
1022
- gr.File(label='Example SDF compound library',
1023
  value='data/examples/compound_library.sdf', interactive=False)
1024
- gr.File(label='Example FASTA target library',
1025
  value='data/examples/target_library.fasta', interactive=False)
1026
  with gr.Row():
1027
- gr.File(label='Example CSV compound library',
1028
  value='data/examples/compound_library.csv', interactive=False)
1029
- gr.File(label='Example CSV target library',
1030
  value='data/examples/target_library.csv', interactive=False)
1031
  with gr.Row():
1032
- infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
1033
  file_count="single", type='filepath')
1034
- infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
1035
  file_count="single", type='filepath')
1036
 
1037
- with gr.Row(visible=True):
1038
- pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
1039
- pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
1040
- pair_infer_target_family = gr.Dropdown(choices=['General'],
1041
- label='Target family',
1042
- value='General')
 
 
 
1043
 
1044
- # with gr.Row():
1045
- # pair_infer_email = gr.Textbox(
1046
- # label='Email (optional)',
1047
- # info="Your email will be used to send you notifications when your job finishes."
1048
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
 
1050
  with gr.Row(visible=True):
1051
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
@@ -1060,7 +1098,6 @@ To predict interactions/binding affinities of a single compound against a librar
1060
  with gr.Blocks() as report:
1061
  gr.Markdown('''
1062
  # <center>DeepSEQreen Chemical Property Report</center>
1063
- <center>
1064
  To compute chemical properties for the predictions of drug hit screening,
1065
  target protein identification, and interaction pair inference.
1066
 
@@ -1068,7 +1105,6 @@ To predict interactions/binding affinities of a single compound against a librar
1068
  your own dataset. The page shows only a preview report displaying at most 30 records
1069
  (with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
1070
  generate and download a raw data CSV or interactive table HTML file below.
1071
- </center>
1072
  ''')
1073
  with gr.Row():
1074
  file_for_report = gr.File(interactive=True, type='filepath')
@@ -1087,10 +1123,10 @@ To predict interactions/binding affinities of a single compound against a librar
1087
 
1088
  with gr.Row():
1089
  with gr.Column():
1090
- csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True, variant='primary')
1091
  csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
1092
  with gr.Column():
1093
- html_generate = gr.Button(value='Generate report (HTML)', interactive=True, variant='primary')
1094
  html_download_file = gr.File(label='Download report (HTML)', visible=False)
1095
 
1096
 
@@ -1188,7 +1224,7 @@ To predict interactions/binding affinities of a single compound against a librar
1188
  def example_fill(input_type):
1189
  return {target_id: 'Q16539',
1190
  target_gene: 'MAPK14',
1191
- target_organism: 'Human',
1192
  target_fasta: """
1193
  >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
1194
  MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
@@ -1230,7 +1266,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1230
  & (benchmark_df['Scenario'] == scenario)
1231
  & (benchmark_df['all'] == False)]
1232
  row = filtered_df.loc[filtered_df[score].idxmax()]
1233
-
1234
  return gr.Dropdown(value=row['preset'],
1235
  info=f"Reason: {scenario} in the training dataset; we recommend the model "
1236
  f"with the best {score} ({float(row[score]):.3f}) "
@@ -1339,6 +1374,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1339
  screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1340
  else:
1341
  screen_df = process_drug_library_upload(library_upload)
 
1342
  if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1343
  raise gr.Error(f'The uploaded compound library has more records '
1344
  f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
@@ -1576,3 +1612,5 @@ if __name__ == "__main__":
1576
  demo.launch(
1577
  show_api=False,
1578
  )
 
 
 
403
 
404
 
405
  def process_target_fasta(sequence):
406
+ try:
407
+ if sequence:
408
+ # lines = sequence.strip().split("\n")
409
+ # if lines[0].startswith(">"):
410
+ # lines = lines[1:]
411
+ # return ''.join(lines).split(">")[0]
412
+ record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
413
+ return str(record.seq)
414
+ else:
415
+ raise ValueError('Empty FASTA sequence.')
416
+ except Exception as e:
417
+ raise gr.Error(f'Failed to process FASTA due to error: {str(e)}')
418
 
419
 
420
  def send_email(receiver, msg):
 
810
  HelpTip(
811
  "Enter (paste) a amino acid sequence below manually or upload a FASTA file."
812
  "If multiple entities are in the FASTA, only the first will be used."
813
+ "Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for "
814
+ "the sequence."
815
  )
816
  with gr.Row():
817
  target_input_type = gr.Dropdown(
 
845
  # with gr.Row():
846
  # with gr.Column():
847
  example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
848
+ # with gr.Column():
849
+ # gr.File(label='Example FASTA file',
850
+ # value='data/examples/MAPK14.fasta', interactive=False)
851
 
852
  with gr.Row():
853
  with gr.Column():
 
869
  with gr.Row():
870
  with gr.Column():
871
  HelpTip(
872
+ "Select a preset compound library (e.g., DrugBank)."
873
  "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
874
  "or use an SDF file."
875
  )
 
889
  "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
890
  "while affinity prediction directly estimates their binding strength measured using IC50."
891
  )
892
+ drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
893
+ label='Step 4. Select a Prediction Task',
894
  value='Compound-protein interaction')
895
 
896
  with gr.Row():
897
  with gr.Column():
898
+ HelpTip(
899
+ "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
900
+ "Please refer to documentation for detailed benchamrk results."
901
+ )
902
+ drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
903
+ label='Step 5. Select a Preset Model')
904
  screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
905
  with gr.Row():
906
  with gr.Column():
 
911
 
912
  with gr.Row(visible=True):
913
  with gr.Column():
914
+ # drug_screen_clr_btn = gr.ClearButton(size='lg')
915
  drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
916
  # TODO Modify the pd df directly with df['X2'] = target
917
 
 
953
  with gr.Row():
954
  with gr.Column():
955
  HelpTip(
956
+ "By default, models trained on all protein families (general) will be applied."
957
  "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
958
  )
959
  target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
 
983
  "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
984
  "while affinity prediction directly estimates their binding strength measured using IC50."
985
  )
986
+ target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
987
+ label='Step 4. Select a Prediction Task',
988
  value='Compound-protein interaction')
989
 
990
  with gr.Row():
991
  with gr.Column():
992
+ HelpTip(
993
+ "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
994
+ "Please refer to documentation for detailed benchamrk results."
995
+ )
996
+ target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
997
+ label='Step 5. Select a Preset Model')
998
  identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
999
 
1000
  with gr.Row():
1001
  with gr.Column():
1002
  target_identify_email = gr.Textbox(
1003
  label='Step 6. Email (Optional)',
1004
+ info="If an email is provided, a notification email will be sent to you when your job "
1005
+ "is completed."
1006
  )
1007
 
1008
  with gr.Row(visible=True):
 
1021
  ''')
1022
  with gr.Blocks() as infer_block:
1023
  with gr.Column() as infer_page:
1024
+ infer_type = gr.Dropdown(choices=['Upload a CSV interaction pair dataset',
1025
+ 'Upload a compound library and a target library'],
1026
+ label='Step 1. Select Pair Input Type and Input',
1027
+ value='Upload a CSV interaction pair dataset')
1028
  with gr.Column() as pair_upload:
1029
+ with gr.Row():
1030
+ gr.File(label="Example custom dataset",
1031
+ value="data/examples/interaction_pair_inference.csv",
1032
+ interactive=False)
1033
+ with gr.Row():
1034
  infer_data_for_predict = gr.File(
1035
+ label='Upload a Custom Dataset', file_count="single", type='filepath', visible=True)
1036
  with gr.Column() as pair_generate:
1037
  with gr.Row():
1038
+ gr.File(label='Example SDF Compound Library',
1039
  value='data/examples/compound_library.sdf', interactive=False)
1040
+ gr.File(label='Example FASTA Target Library',
1041
  value='data/examples/target_library.fasta', interactive=False)
1042
  with gr.Row():
1043
+ gr.File(label='Example CSV Compound Library',
1044
  value='data/examples/compound_library.csv', interactive=False)
1045
+ gr.File(label='Example CSV Target Library',
1046
  value='data/examples/target_library.csv', interactive=False)
1047
  with gr.Row():
1048
+ infer_drug = gr.File(label='SDF/CSV File containing multiple compounds',
1049
  file_count="single", type='filepath')
1050
+ infer_target = gr.File(label='FASTA/CSV File containing multiple targets',
1051
  file_count="single", type='filepath')
1052
 
1053
+ with gr.Row():
1054
+ with gr.Column():
1055
+ HelpTip(
1056
+ "By default, models trained on all protein families (general) will be applied."
1057
+ "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
1058
+ )
1059
+ pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1060
+ value='General',
1061
+ label='Step 2. Select Target Protein Family (Optional)')
1062
 
1063
+ with gr.Row():
1064
+ with gr.Column():
1065
+ HelpTip(
1066
+ "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
1067
+ "while affinity prediction directly estimates their binding strength measured using IC50."
1068
+ )
1069
+ pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
1070
+ label='Step 3. Select a Prediction Task',
1071
+ value='Compound-protein interaction')
1072
+
1073
+ with gr.Row():
1074
+ with gr.Column():
1075
+ HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
1076
+ "Please refer to documentation for detailed benchamrk results."
1077
+ )
1078
+ pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
1079
+ infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
1080
+
1081
+
1082
+ with gr.Row():
1083
+ pair_infer_email = gr.Textbox(
1084
+ label='Step 5. Email (Optional)',
1085
+ info="If an email is provided, a notification email will be sent to you when your job is completed."
1086
+ )
1087
 
1088
  with gr.Row(visible=True):
1089
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
 
1098
  with gr.Blocks() as report:
1099
  gr.Markdown('''
1100
  # <center>DeepSEQreen Chemical Property Report</center>
 
1101
  To compute chemical properties for the predictions of drug hit screening,
1102
  target protein identification, and interaction pair inference.
1103
 
 
1105
  your own dataset. The page shows only a preview report displaying at most 30 records
1106
  (with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
1107
  generate and download a raw data CSV or interactive table HTML file below.
 
1108
  ''')
1109
  with gr.Row():
1110
  file_for_report = gr.File(interactive=True, type='filepath')
 
1123
 
1124
  with gr.Row():
1125
  with gr.Column():
1126
+ csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
1127
  csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
1128
  with gr.Column():
1129
+ html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
1130
  html_download_file = gr.File(label='Download report (HTML)', visible=False)
1131
 
1132
 
 
1224
  def example_fill(input_type):
1225
  return {target_id: 'Q16539',
1226
  target_gene: 'MAPK14',
1227
+ target_organism: 'Homo sapiens',
1228
  target_fasta: """
1229
  >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
1230
  MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
 
1266
  & (benchmark_df['Scenario'] == scenario)
1267
  & (benchmark_df['all'] == False)]
1268
  row = filtered_df.loc[filtered_df[score].idxmax()]
 
1269
  return gr.Dropdown(value=row['preset'],
1270
  info=f"Reason: {scenario} in the training dataset; we recommend the model "
1271
  f"with the best {score} ({float(row[score]):.3f}) "
 
1374
  screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1375
  else:
1376
  screen_df = process_drug_library_upload(library_upload)
1377
+ print(screen_df.shape)
1378
  if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1379
  raise gr.Error(f'The uploaded compound library has more records '
1380
  f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
 
1612
  demo.launch(
1613
  show_api=False,
1614
  )
1615
+
1616
+ #%%