libokj commited on
Commit
d554c49
·
1 Parent(s): 6f9fa67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -75
app.py CHANGED
@@ -60,7 +60,7 @@ SESSION.mount('https://', ADAPTER)
60
 
61
  UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
62
 
63
- CUSTOM_DATASET_MAX_LEN = 10000
64
 
65
  CSS = """
66
  .help-tip {
@@ -403,12 +403,12 @@ def validate_columns(df, mandatory_cols):
403
 
404
 
405
  def process_target_fasta(sequence):
406
- lines = sequence.strip().split("\n")
407
- if lines[0].startswith(">"):
408
- lines = lines[1:]
409
- return ''.join(lines).split(">")[0]
410
- # record = SeqIO.parse(io.StringIO(sequence), "fasta")[0]
411
- # return str(record.seq)
412
 
413
 
414
  def send_email(receiver, msg):
@@ -732,24 +732,24 @@ def drug_library_from_sdf(sdf_path):
732
 
733
  def process_target_library_upload(library_upload):
734
  if library_upload.endswith('.csv'):
735
- identify_df = pd.read_csv(library_upload)
736
  elif library_upload.endswith('.fasta'):
737
- identify_df = target_library_from_fasta(library_upload)
738
  else:
739
  raise gr.Error('Currently only CSV and FASTA files are supported as target libraries.')
740
- validate_columns(identify_df, ['X2'])
741
- return library_upload
742
 
743
 
744
  def process_drug_library_upload(library_upload):
745
  if library_upload.endswith('.csv'):
746
- screen_df = pd.read_csv(library_upload)
747
  elif library_upload.endswith('.sdf'):
748
- screen_df = drug_library_from_sdf(library_upload)
749
  else:
750
- raise gr.Error('Currently only CSV and SDF files are supported as compound libraries.')
751
- validate_columns(screen_df, ['X1'])
752
- return screen_df
753
 
754
 
755
  def target_library_from_fasta(fasta_path):
@@ -863,14 +863,15 @@ To predict interactions/binding affinities of a single target against a library
863
  with gr.Column():
864
  HelpTip(
865
  "Select a preset compound library (e.g., DrugBank)."
866
- "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, or use an SDF file."
 
867
  )
868
  drug_library = gr.Dropdown(label='Step 3. Select or Upload a Compound Library',
869
  choices=list(DRUG_LIBRARY_MAP.keys()))
870
  with gr.Row():
871
- gr.File(label='Example SDF Compound Library',
872
  value='data/examples/compound_library.sdf', interactive=False)
873
- gr.File(label='Example CSV Compound Library',
874
  value='data/examples/compound_library.csv', interactive=False)
875
  drug_library_upload_btn = gr.UploadButton(
876
  label='Upload a custom library', variant='primary')
@@ -958,9 +959,9 @@ To predict interactions/binding affinities of a single compound against a librar
958
  target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
959
  choices=list(TARGET_LIBRARY_MAP.keys()))
960
  with gr.Row():
961
- gr.File(label='Example FASTA Target Library',
962
  value='data/examples/target_library.fasta', interactive=False)
963
- gr.File(label='Example CSV Target Library',
964
  value='data/examples/target_library.csv', interactive=False)
965
  target_library_upload_btn = gr.UploadButton(
966
  label='Upload a custom library', variant='primary')
@@ -1006,69 +1007,45 @@ To predict interactions/binding affinities of a single compound against a librar
1006
  ''')
1007
  with gr.Blocks() as infer_block:
1008
  with gr.Column() as infer_page:
1009
- infer_type = gr.Dropdown(choices=['Upload a CSV interaction pair dataset',
1010
- 'Upload a compound library and a target library'],
1011
- label='Step 1. Select Pair Input Type and Input',
1012
- value='Upload a CSV interaction pair dataset')
1013
  with gr.Column() as pair_upload:
1014
- with gr.Row():
1015
- gr.File(label="Example custom dataset",
1016
- value="data/examples/interaction_pair_inference.csv",
1017
- interactive=False)
1018
- with gr.Row():
1019
  infer_data_for_predict = gr.File(
1020
- label='Upload a Custom Dataset', file_count="single", type='filepath', visible=True)
1021
  with gr.Column() as pair_generate:
1022
  with gr.Row():
1023
- gr.File(label='Example SDF Compound Library',
1024
  value='data/examples/compound_library.sdf', interactive=False)
1025
- gr.File(label='Example FASTA Target Library',
1026
  value='data/examples/target_library.fasta', interactive=False)
1027
  with gr.Row():
1028
- gr.File(label='Example CSV Compound Library',
1029
  value='data/examples/compound_library.csv', interactive=False)
1030
- gr.File(label='Example CSV Target Library',
1031
  value='data/examples/target_library.csv', interactive=False)
1032
  with gr.Row():
1033
- infer_drug = gr.File(label='SDF/CSV File Containing Multiple Compounds',
1034
  file_count="single", type='filepath')
1035
- infer_target = gr.File(label='FASTA/CSV File Containing Multiple Targets',
1036
  file_count="single", type='filepath')
1037
 
1038
- with gr.Row():
1039
- with gr.Column():
1040
- HelpTip(
1041
- "By default, models trained on all protein families (general) will be applied."
1042
- "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
1043
- )
1044
- pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1045
- value='General',
1046
- label='Step 2. Select Target Protein Family (Optional)')
1047
-
1048
- with gr.Row():
1049
- with gr.Column():
1050
- HelpTip(
1051
- "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
1052
- "while affinity prediction directly estimates their binding strength measured using IC50."
1053
- )
1054
- pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
1055
- label='Step 3. Select a Prediction Task',
1056
- value='Compound-protein interaction')
1057
-
1058
- with gr.Row():
1059
- with gr.Column():
1060
- HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
1061
- "Please refer to documentation for detailed benchamrk results."
1062
- )
1063
- pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
1064
- infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
1065
-
1066
 
1067
- with gr.Row():
1068
- pair_infer_email = gr.Textbox(
1069
- label='Step 5. Email (Optional)',
1070
- info="If an email is provided, a notification email will be sent to you when your job is completed."
1071
- )
1072
 
1073
  with gr.Row(visible=True):
1074
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
@@ -1110,10 +1087,10 @@ To predict interactions/binding affinities of a single compound against a librar
1110
 
1111
  with gr.Row():
1112
  with gr.Column():
1113
- csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
1114
  csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
1115
  with gr.Column():
1116
- html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
1117
  html_download_file = gr.File(label='Download report (HTML)', visible=False)
1118
 
1119
 
@@ -1362,7 +1339,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1362
  screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1363
  else:
1364
  screen_df = process_drug_library_upload(library_upload)
1365
- print(screen_df.shape)
1366
  if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1367
  raise gr.Error(f'The uploaded compound library has more records '
1368
  f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
@@ -1600,5 +1576,3 @@ if __name__ == "__main__":
1600
  demo.launch(
1601
  show_api=False,
1602
  )
1603
-
1604
- #%%
 
60
 
61
  UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
62
 
63
+ CUSTOM_DATASET_MAX_LEN = 10_000
64
 
65
  CSS = """
66
  .help-tip {
 
403
 
404
 
405
  def process_target_fasta(sequence):
406
+ # lines = sequence.strip().split("\n")
407
+ # if lines[0].startswith(">"):
408
+ # lines = lines[1:]
409
+ # return ''.join(lines).split(">")[0]
410
+ record = list(SeqIO.parse(io.StringIO(sequence), "fasta"))[0]
411
+ return str(record.seq)
412
 
413
 
414
  def send_email(receiver, msg):
 
732
 
733
  def process_target_library_upload(library_upload):
734
  if library_upload.endswith('.csv'):
735
+ df = pd.read_csv(library_upload)
736
  elif library_upload.endswith('.fasta'):
737
+ df = target_library_from_fasta(library_upload)
738
  else:
739
  raise gr.Error('Currently only CSV and FASTA files are supported as target libraries.')
740
+ validate_columns(df, ['X2'])
741
+ return df
742
 
743
 
744
  def process_drug_library_upload(library_upload):
745
  if library_upload.endswith('.csv'):
746
+ df = pd.read_csv(library_upload)
747
  elif library_upload.endswith('.sdf'):
748
+ df = drug_library_from_sdf(library_upload)
749
  else:
750
+ raise gr.Error('Currently only CSV and SDF files are supported as drug libraries.')
751
+ validate_columns(df, ['X1'])
752
+ return df
753
 
754
 
755
  def target_library_from_fasta(fasta_path):
 
863
  with gr.Column():
864
  HelpTip(
865
  "Select a preset compound library (e.g., DrugBank)."
866
+ "Alternatively, upload a CSV file with a column named X1 containing compound SMILES, "
867
+ "or use an SDF file."
868
  )
869
  drug_library = gr.Dropdown(label='Step 3. Select or Upload a Compound Library',
870
  choices=list(DRUG_LIBRARY_MAP.keys()))
871
  with gr.Row():
872
+ gr.File(label='Example SDF compound library',
873
  value='data/examples/compound_library.sdf', interactive=False)
874
+ gr.File(label='Example CSV compound library',
875
  value='data/examples/compound_library.csv', interactive=False)
876
  drug_library_upload_btn = gr.UploadButton(
877
  label='Upload a custom library', variant='primary')
 
959
  target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
960
  choices=list(TARGET_LIBRARY_MAP.keys()))
961
  with gr.Row():
962
+ gr.File(label='Example FASTA target library',
963
  value='data/examples/target_library.fasta', interactive=False)
964
+ gr.File(label='Example CSV target library',
965
  value='data/examples/target_library.csv', interactive=False)
966
  target_library_upload_btn = gr.UploadButton(
967
  label='Upload a custom library', variant='primary')
 
1007
  ''')
1008
  with gr.Blocks() as infer_block:
1009
  with gr.Column() as infer_page:
1010
+ infer_type = gr.Dropdown(choices=['Upload a compound library and a target library',
1011
+ 'Upload a CSV interaction pair dataset'],
1012
+ value='Upload a compound library and a target library')
 
1013
  with gr.Column() as pair_upload:
1014
+ gr.File(label="Example custom dataset",
1015
+ value="data/examples/interaction_pair_inference.csv",
1016
+ interactive=False)
1017
+ with gr.Column():
 
1018
  infer_data_for_predict = gr.File(
1019
+ label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
1020
  with gr.Column() as pair_generate:
1021
  with gr.Row():
1022
+ gr.File(label='Example SDF compound library',
1023
  value='data/examples/compound_library.sdf', interactive=False)
1024
+ gr.File(label='Example FASTA target library',
1025
  value='data/examples/target_library.fasta', interactive=False)
1026
  with gr.Row():
1027
+ gr.File(label='Example CSV compound library',
1028
  value='data/examples/compound_library.csv', interactive=False)
1029
+ gr.File(label='Example CSV target library',
1030
  value='data/examples/target_library.csv', interactive=False)
1031
  with gr.Row():
1032
+ infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
1033
  file_count="single", type='filepath')
1034
+ infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
1035
  file_count="single", type='filepath')
1036
 
1037
+ with gr.Row(visible=True):
1038
+ pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
1039
+ pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
1040
+ pair_infer_target_family = gr.Dropdown(choices=['General'],
1041
+ label='Target family',
1042
+ value='General')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
 
1044
+ # with gr.Row():
1045
+ # pair_infer_email = gr.Textbox(
1046
+ # label='Email (optional)',
1047
+ # info="Your email will be used to send you notifications when your job finishes."
1048
+ # )
1049
 
1050
  with gr.Row(visible=True):
1051
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
 
1087
 
1088
  with gr.Row():
1089
  with gr.Column():
1090
+ csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True, variant='primary')
1091
  csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
1092
  with gr.Column():
1093
+ html_generate = gr.Button(value='Generate report (HTML)', interactive=True, variant='primary')
1094
  html_download_file = gr.File(label='Download report (HTML)', visible=False)
1095
 
1096
 
 
1339
  screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1340
  else:
1341
  screen_df = process_drug_library_upload(library_upload)
 
1342
  if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1343
  raise gr.Error(f'The uploaded compound library has more records '
1344
  f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
 
1576
  demo.launch(
1577
  show_api=False,
1578
  )