libokj commited on
Commit
7148a25
·
1 Parent(s): c7bb63d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +285 -228
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import hashlib
2
  import itertools
3
  import json
@@ -21,6 +22,7 @@ import hydra
21
  import pandas as pd
22
  import plotly.express as px
23
  import requests
 
24
  from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
25
  from requests.adapters import HTTPAdapter, Retry
26
  from rdkit import Chem
@@ -39,7 +41,7 @@ import sascorer
39
 
40
  ROOT = Path.cwd()
41
 
42
- DF_FOR_REPORT = pd.DataFrame()
43
 
44
  pd.set_option('display.float_format', '{:.3f}'.format)
45
  PandasTools.molRepresentation = 'svg'
@@ -146,7 +148,7 @@ CSS = """
146
  position: absolute;
147
  }
148
 
149
- #example {
150
  padding: 0;
151
  background: none;
152
  border: none;
@@ -171,47 +173,47 @@ class HelpTip:
171
  )
172
 
173
 
174
- def sa_score(row):
175
- return sascorer.calculateScore(row['Compound'])
176
 
177
 
178
- def mw(row):
179
- return Chem.Descriptors.MolWt(row['Compound'])
180
 
181
 
182
- def mr(row):
183
- return Crippen.MolMR(row['Compound'])
184
 
185
 
186
- def hbd(row):
187
- return Lipinski.NumHDonors(row['Compound'])
188
 
189
 
190
- def hba(row):
191
- return Lipinski.NumHAcceptors(row['Compound'])
192
 
193
 
194
- def logp(row):
195
- return Crippen.MolLogP(row['Compound'])
196
 
197
 
198
- def atom(row):
199
- return CalcNumAtoms(row['Compound'])
200
 
201
 
202
- def heavy_atom(row):
203
- return CalcNumHeavyAtoms(row['Compound'])
204
 
205
 
206
- def rotatable_bond(row):
207
- return CalcNumRotatableBonds((row['Compound']))
208
 
209
 
210
- def tpsa(row):
211
- return CalcTPSA((row['Compound']))
212
 
213
 
214
- def lipinski(row):
215
  """
216
  Lipinski's rules:
217
  Hydrogen bond donors <= 5
@@ -219,19 +221,19 @@ def lipinski(row):
219
  Molecular weight <= 500 daltons
220
  logP <= 5
221
  """
222
- if hbd(row) > 5:
223
  return False
224
- elif hba(row) > 10:
225
  return False
226
- elif mw(row) > 500:
227
  return False
228
- elif logp(row) > 5:
229
  return False
230
  else:
231
  return True
232
 
233
 
234
- def reos(row):
235
  """
236
  Rapid Elimination Of Swill filter:
237
  Molecular weight between 200 and 500
@@ -242,23 +244,23 @@ def reos(row):
242
  Rotatable bond count between 0 and 8
243
  Heavy atom count between 15 and 50
244
  """
245
- if not 200 < mw(row) < 500:
246
  return False
247
- elif not -5.0 < logp(row) < 5.0:
248
  return False
249
- elif not 0 < hbd(row) < 5:
250
  return False
251
- elif not 0 < hba(row) < 10:
252
  return False
253
- elif not 0 < rotatable_bond(row) < 8:
254
  return False
255
- elif not 15 < heavy_atom(row) < 50:
256
  return False
257
  else:
258
  return True
259
 
260
 
261
- def ghose(row):
262
  """
263
  Ghose drug like filter:
264
  Molecular weight between 160 and 480
@@ -266,34 +268,34 @@ def ghose(row):
266
  Atom count between 20 and 70
267
  Molar refractivity between 40 and 130
268
  """
269
- if not 160 < mw(row) < 480:
270
  return False
271
- elif not -0.4 < logp(row) < 5.6:
272
  return False
273
- elif not 20 < atom(row) < 70:
274
  return False
275
- elif not 40 < mr(row) < 130:
276
  return False
277
  else:
278
  return True
279
 
280
 
281
- def veber(row):
282
  """
283
  The Veber filter is a rule of thumb filter for orally active drugs described in
284
  Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
285
  Rotatable bonds <= 10
286
  Topological polar surface area <= 140
287
  """
288
- if not rotatable_bond(row) <= 10:
289
  return False
290
- elif not tpsa(row) <= 140:
291
  return False
292
  else:
293
  return True
294
 
295
 
296
- def rule_of_three(row):
297
  """
298
  Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
299
  Molecular weight <= 300
@@ -302,15 +304,15 @@ def rule_of_three(row):
302
  H-bond acceptor count <= 3
303
  Rotatable bond count <= 3
304
  """
305
- if not mw(row) <= 300:
306
  return False
307
- elif not logp(row) <= 3:
308
  return False
309
- elif not hbd(row) <= 3:
310
  return False
311
- elif not hba(row) <= 3:
312
  return False
313
- elif not rotatable_bond(row) <= 3:
314
  return False
315
  else:
316
  return True
@@ -389,6 +391,9 @@ COLUMN_ALIASES = {
389
  'X2': 'Target FASTA',
390
  'ID1': 'Compound ID',
391
  'ID2': 'Target ID',
 
 
 
392
  }
393
 
394
 
@@ -421,7 +426,7 @@ def send_email(receiver, msg):
421
  pass
422
 
423
 
424
- def submit_predict(predict_filepath, task, preset, target_family, flag, progress=gr.Progress(track_tqdm=True)):
425
  if flag:
426
  try:
427
  job_id = flag
@@ -430,10 +435,10 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
430
  preset = PRESET_MAP[preset]
431
  target_family = TARGET_FAMILY_MAP[target_family]
432
  # email_hash = hashlib.sha256(email.encode()).hexdigest()
433
- COLUMN_ALIASES = COLUMN_ALIASES | {
434
- 'Y': 'Actual interaction probability' if task == 'binary' else 'Actual binding affinity',
435
- 'Y^': 'Predicted interaction probability' if task == 'binary' else 'Predicted binding affinity'
436
- }
437
 
438
  # target_family_list = [target_family]
439
  # for family in target_family_list:
@@ -451,20 +456,18 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
451
  predictions, _ = predict(cfg)
452
  predictions = [pd.DataFrame(prediction) for prediction in predictions]
453
  prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
 
454
 
455
  predictions_file = f'temp/{job_id}_predictions.csv'
456
- prediction_df.to_csv(predictions_file, index=False)
457
 
458
  return [predictions_file,
459
  False]
460
  except Exception as e:
461
  gr.Warning(f"Prediction job failed due to error: {str(e)}")
462
- return [None,
463
- False]
464
-
465
  else:
466
- return [None,
467
- False]
468
  #
469
  # except Exception as e:
470
  # raise gr.Error(str(e))
@@ -536,19 +539,19 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
536
 
537
 
538
  def update_df(file, progress=gr.Progress(track_tqdm=True)):
539
- global DF_FOR_REPORT
540
- if file is not None:
541
  df = pd.read_csv(file)
542
- if df['X1'].nunique() > 1:
543
- df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
544
- desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
545
- # Add a new column with RDKit molecule objects
546
- if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
547
- PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
548
- includeFingerprints=True)
549
- PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
550
  includeFingerprints=True)
551
- DF_FOR_REPORT = df.copy()
 
 
552
 
553
  # pie_chart = None
554
  # value = None
@@ -563,30 +566,64 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
563
  # elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
564
  # pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
565
 
566
- return create_html_report(DF_FOR_REPORT), df # pie_chart
 
 
 
567
  else:
568
- return gr.HTML(), gr.Dataframe()
569
 
570
 
571
  def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
572
- df_html = df.copy()
573
- cols_left = ['ID1', 'ID2', 'Y', 'Y^', 'Compound', 'Scaffold', 'Scaffold SMILES', ]
 
574
  cols_right = ['X1', 'X2']
575
  cols_left = [col for col in cols_left if col in df_html.columns]
576
  cols_right = [col for col in cols_right if col in df_html.columns]
577
  df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
578
- df_html['X2'] = df_html['X2'].swifter.apply(wrap_text)
 
579
  df_html = df_html.sort_values(
580
- [col for col in ['Y', 'Y^', 'ID1', 'ID2', 'X1', 'X2'] if col in df.columns], ascending=False
581
- ).rename(columns=COLUMN_ALIASES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
  # PandasTools.RenderImagesInAllDataFrames(images=True)
583
- PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
584
- # Return the DataFrame as HTML
585
- PandasTools.RenderImagesInAllDataFrames(images=True)
 
 
 
586
 
587
  if not file:
588
- styled_df = df_html.iloc[:51].style
589
- # styled_df = df.style.format("{:.2f}")
 
 
 
 
 
 
 
 
590
  colors = sns.color_palette('husl', len(df_html.columns))
591
  for i, col in enumerate(df_html.columns):
592
  if pd.api.types.is_numeric_dtype(df_html[col]):
@@ -597,13 +634,21 @@ def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
597
  import panel as pn
598
  from bokeh.resources import INLINE
599
  from bokeh.models import NumberFormatter, BooleanFormatter
600
- bokeh_formatters = {
601
- 'float': {'type': 'progress', 'legend': True},
602
- 'bool': BooleanFormatter(),
 
 
 
 
 
 
603
  }
 
 
604
  # html = df.to_html(file)
605
  # return html
606
- pn.widgets.Tabulator(df_html, formatters=bokeh_formatters).save(file, resources=INLINE)
607
 
608
 
609
  # def create_pie_chart(df, category, value, top_k):
@@ -657,16 +702,18 @@ def create_pie_chart(df, category, value, top_k):
657
  return fig
658
 
659
 
660
- def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
661
- df = DF_FOR_REPORT.copy()
662
  try:
663
  for filter_name in filter_list:
664
- df[filter_name] = df.swifter.progress_bar(desc=f"Calculating {filter_name}").apply(
665
- FILTER_MAP[filter_name], axis=1)
 
666
 
667
  for score_name in score_list:
668
- df[score_name] = df.swifter.progress_bar(desc=f"Calculating {score_name}").apply(
669
- SCORE_MAP[score_name], axis=1)
 
670
 
671
  # pie_chart = None
672
  # value = None
@@ -681,11 +728,11 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
681
  # elif df['X2'].nunique() > 1 >= df['X1'].nunique():
682
  # pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
683
 
684
- return create_html_report(df), df # pie_chart
685
 
686
  except Exception as e:
687
- raise gr.Error(str(e))
688
-
689
 
690
  # def check_job_status(job_id):
691
  # job_lock = DATA_PATH / f"{job_id}.lock"
@@ -704,20 +751,23 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
704
 
705
 
706
  def wrap_text(text, line_length=60):
707
- wrapper = textwrap.TextWrapper(width=line_length)
708
- if text.startswith('>'):
709
- sections = text.split('>')
710
- wrapped_sections = []
711
- for section in sections:
712
- if not section:
713
- continue
714
- lines = section.split('\n')
715
- seq_header = lines[0]
716
- wrapped_seq = wrapper.fill(''.join(lines[1:]))
717
- wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
718
- return '\n'.join(wrapped_sections)
 
 
 
719
  else:
720
- return wrapper.fill(text)
721
 
722
 
723
  def unwrap_text(text):
@@ -834,17 +884,18 @@ To predict interactions/binding affinities of a single target against a library
834
  visible=False, interactive=True, scale=4, )
835
 
836
  with gr.Row():
837
- with gr.Column():
838
- target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
839
- visible=True, variant='primary',
840
- size='lg')
841
- target_query_btn = gr.Button(value='Query the sequence', variant='primary',
842
- visible=False)
843
-
 
 
844
  target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
845
  # with gr.Row():
846
  # with gr.Column():
847
- example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
848
  # with gr.Column():
849
  # gr.File(label='Example FASTA file',
850
  # value='data/examples/MAPK14.fasta', interactive=False)
@@ -853,7 +904,8 @@ To predict interactions/binding affinities of a single target against a library
853
  with gr.Column():
854
  HelpTip(
855
  "Click Auto-detect to identify the protein family using sequence alignment. "
856
- "This optional step allows applying a family-specific model instead of a all-family model (general)."
 
857
  "Manually select general if the alignment results are unsatisfactory."
858
  )
859
  drug_screen_target_family = gr.Dropdown(
@@ -886,8 +938,10 @@ To predict interactions/binding affinities of a single target against a library
886
  with gr.Row():
887
  with gr.Column():
888
  HelpTip(
889
- "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
890
- "while affinity prediction directly estimates their binding strength measured using IC50."
 
 
891
  )
892
  drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
893
  label='Step 4. Select a Prediction Task',
@@ -896,7 +950,8 @@ To predict interactions/binding affinities of a single target against a library
896
  with gr.Row():
897
  with gr.Column():
898
  HelpTip(
899
- "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
 
900
  "Please refer to documentation for detailed benchamrk results."
901
  )
902
  drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
@@ -906,7 +961,8 @@ To predict interactions/binding affinities of a single target against a library
906
  with gr.Column():
907
  drug_screen_email = gr.Textbox(
908
  label='Step 6. Email (Optional)',
909
- info="If an email is provided, a notification email will be sent to you when your job is completed."
 
910
  )
911
 
912
  with gr.Row(visible=True):
@@ -937,34 +993,39 @@ To predict interactions/binding affinities of a single compound against a librar
937
  HelpTip(
938
  "Enter (paste) a compound SMILES below manually or upload a SDF file."
939
  "If multiple entities are in the SDF, only the first will be used."
940
- "SMILES can be obtained by searching for the compound of interest in databases such as NCBI, PubChem and and ChEMBL."
 
941
  )
942
  compound_type = gr.Dropdown(
943
  label='Step 1. Select Compound Input Type and Input',
944
  choices=['SMILES', 'SDF'],
945
- info='Enter (paste) an SMILES string or upload an SDF file.',
946
  value='SMILES',
947
  interactive=True)
948
- compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
 
949
 
950
  compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
951
- example_drug = gr.Button(value='Example: Aspirin', elem_id='example')
952
 
953
  with gr.Row():
954
  with gr.Column():
955
  HelpTip(
956
  "By default, models trained on all protein families (general) will be applied."
957
- "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
 
958
  )
959
  target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
960
  value='General',
961
- label='Step 2. Select Target Protein Family (Optional)')
 
962
 
963
  with gr.Row():
964
  with gr.Column():
965
  HelpTip(
966
  "Select a preset target library (e.g., ChEMBL33_human_proteins)."
967
- "Alternatively, upload a CSV file with a column named X2 containing tareget protein sequences, or use an FASTA file."
 
968
  )
969
  target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
970
  choices=list(TARGET_LIBRARY_MAP.keys()))
@@ -980,8 +1041,10 @@ To predict interactions/binding affinities of a single compound against a librar
980
  with gr.Row():
981
  with gr.Column():
982
  HelpTip(
983
- "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
984
- "while affinity prediction directly estimates their binding strength measured using IC50."
 
 
985
  )
986
  target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
987
  label='Step 4. Select a Prediction Task',
@@ -990,11 +1053,12 @@ To predict interactions/binding affinities of a single compound against a librar
990
  with gr.Row():
991
  with gr.Column():
992
  HelpTip(
993
- "Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
 
994
  "Please refer to documentation for detailed benchamrk results."
995
  )
996
- target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
997
- label='Step 5. Select a Preset Model')
998
  identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
999
 
1000
  with gr.Row():
@@ -1021,69 +1085,46 @@ To predict interactions/binding affinities of a single compound against a librar
1021
  ''')
1022
  with gr.Blocks() as infer_block:
1023
  with gr.Column() as infer_page:
1024
- infer_type = gr.Dropdown(choices=['Upload a CSV interaction pair dataset',
1025
- 'Upload a compound library and a target library'],
1026
- label='Step 1. Select Pair Input Type and Input',
1027
- value='Upload a CSV interaction pair dataset')
1028
  with gr.Column() as pair_upload:
1029
- with gr.Row():
1030
- gr.File(label="Example custom dataset",
1031
- value="data/examples/interaction_pair_inference.csv",
1032
- interactive=False)
1033
- with gr.Row():
1034
  infer_data_for_predict = gr.File(
1035
- label='Upload a Custom Dataset', file_count="single", type='filepath', visible=True)
1036
  with gr.Column() as pair_generate:
1037
  with gr.Row():
1038
- gr.File(label='Example SDF Compound Library',
1039
  value='data/examples/compound_library.sdf', interactive=False)
1040
- gr.File(label='Example FASTA Target Library',
1041
  value='data/examples/target_library.fasta', interactive=False)
1042
  with gr.Row():
1043
- gr.File(label='Example CSV Compound Library',
1044
  value='data/examples/compound_library.csv', interactive=False)
1045
- gr.File(label='Example CSV Target Library',
1046
  value='data/examples/target_library.csv', interactive=False)
1047
  with gr.Row():
1048
- infer_drug = gr.File(label='SDF/CSV File containing multiple compounds',
1049
  file_count="single", type='filepath')
1050
- infer_target = gr.File(label='FASTA/CSV File containing multiple targets',
1051
  file_count="single", type='filepath')
1052
 
1053
- with gr.Row():
1054
- with gr.Column():
1055
- HelpTip(
1056
- "By default, models trained on all protein families (general) will be applied."
1057
- "If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
1058
- )
1059
- pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1060
- value='General',
1061
- label='Step 2. Select Target Protein Family (Optional)')
1062
-
1063
- with gr.Row():
1064
- with gr.Column():
1065
- HelpTip(
1066
- "Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
1067
- "while affinity prediction directly estimates their binding strength measured using IC50."
1068
- )
1069
- pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
1070
- label='Step 3. Select a Prediction Task',
1071
- value='Compound-protein interaction')
1072
-
1073
- with gr.Row():
1074
- with gr.Column():
1075
- HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
1076
- "Please refer to documentation for detailed benchamrk results."
1077
- )
1078
- pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
1079
- infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
1080
-
1081
 
1082
- with gr.Row():
1083
- pair_infer_email = gr.Textbox(
1084
- label='Step 5. Email (Optional)',
1085
- info="If an email is provided, a notification email will be sent to you when your job is completed."
1086
- )
1087
 
1088
  with gr.Row(visible=True):
1089
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
@@ -1098,23 +1139,28 @@ To predict interactions/binding affinities of a single compound against a librar
1098
  with gr.Blocks() as report:
1099
  gr.Markdown('''
1100
  # <center>DeepSEQreen Chemical Property Report</center>
 
1101
  To compute chemical properties for the predictions of drug hit screening,
1102
- target protein identification, and interaction pair inference.
 
 
 
 
1103
 
1104
- You may also upload
1105
- your own dataset. The page shows only a preview report displaying at most 30 records
1106
- (with top predicted CPI/CPA if reporting results from a prediction job). For a full report, please
1107
- generate and download a raw data CSV or interactive table HTML file below.
1108
  ''')
1109
  with gr.Row():
1110
  file_for_report = gr.File(interactive=True, type='filepath')
1111
- df_raw = gr.Dataframe(type="pandas", interactive=False, visible=False)
 
1112
  scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
1113
  filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
1114
 
1115
  with gr.Row():
1116
  # clear_btn = gr.ClearButton(size='lg')
1117
- analyze_btn = gr.Button('REPORT', variant='primary', size='lg')
1118
 
1119
  with gr.Row():
1120
  with gr.Column(scale=3):
@@ -1123,11 +1169,13 @@ To predict interactions/binding affinities of a single compound against a librar
1123
 
1124
  with gr.Row():
1125
  with gr.Column():
1126
- csv_generate = gr.Button(value='Generate raw data (CSV)', interactive=True)
1127
- csv_download_file = gr.File(label='Download raw data (CSV)', visible=False)
 
1128
  with gr.Column():
1129
- html_generate = gr.Button(value='Generate report (HTML)', interactive=True)
1130
- html_download_file = gr.File(label='Download report (HTML)', visible=False)
 
1131
 
1132
 
1133
  def target_input_type_select(input_type):
@@ -1224,7 +1272,7 @@ To predict interactions/binding affinities of a single compound against a librar
1224
  def example_fill(input_type):
1225
  return {target_id: 'Q16539',
1226
  target_gene: 'MAPK14',
1227
- target_organism: 'Homo sapiens',
1228
  target_fasta: """
1229
  >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
1230
  MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
@@ -1236,9 +1284,10 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1236
  """}
1237
 
1238
 
1239
- example_fasta.click(fn=example_fill, inputs=target_input_type,
1240
- outputs=[target_id, target_gene, target_organism, target_fasta], show_progress=False)
1241
-
 
1242
 
1243
  def screen_recommend_model(fasta, family, task):
1244
  task = TASK_MAP[task]
@@ -1249,7 +1298,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1249
  train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
1250
  score = 'CI'
1251
 
1252
- if fasta not in train['X2']:
1253
  scenario = "Unseen target"
1254
  else:
1255
  scenario = "Seen target"
@@ -1266,6 +1315,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1266
  & (benchmark_df['Scenario'] == scenario)
1267
  & (benchmark_df['all'] == False)]
1268
  row = filtered_df.loc[filtered_df[score].idxmax()]
 
1269
  return gr.Dropdown(value=row['preset'],
1270
  info=f"Reason: {scenario} in the training dataset; we recommend the model "
1271
  f"with the best {score} ({float(row[score]):.3f}) "
@@ -1280,13 +1330,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1280
  def compound_input_type_select(input_type):
1281
  match input_type:
1282
  case 'SMILES':
1283
- return gr.Dropdown(info='Input an SMILES string or upload an SMI file')
1284
  case 'SDF':
1285
- return gr.Dropdown(info='Convert the first molecule in an SDF file to SMILES')
1286
 
1287
 
1288
  compound_type.select(fn=compound_input_type_select,
1289
- inputs=compound_type, outputs=compound_type, show_progress=False)
1290
 
1291
 
1292
  def compound_upload_process(input_type, input_upload):
@@ -1374,7 +1424,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1374
  screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1375
  else:
1376
  screen_df = process_drug_library_upload(library_upload)
1377
- print(screen_df.shape)
1378
  if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1379
  raise gr.Error(f'The uploaded compound library has more records '
1380
  f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
@@ -1517,7 +1566,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1517
  ).then(
1518
  fn=submit_predict,
1519
  inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
1520
- drug_screen_target_family, screen_flag], # , drug_screen_email],
1521
  outputs=[file_for_report, run_state]
1522
  ).then(
1523
  fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
@@ -1529,12 +1578,12 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1529
  inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
1530
  outputs=[identify_data_for_predict, identify_flag, run_state]
1531
  ).then(
1532
- fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True), gr.Tabs(selected=3)],
1533
- outputs=[identify_page, identify_waiting, tabs]
1534
  ).then(
1535
  fn=submit_predict,
1536
  inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
1537
- target_identify_target_family, identify_flag], # , target_identify_email],
1538
  outputs=[file_for_report, run_state]
1539
  ).then(
1540
  fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
@@ -1551,45 +1600,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1551
  ).then(
1552
  fn=submit_predict,
1553
  inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
1554
- pair_infer_target_family, infer_flag], # , pair_infer_email],
1555
  outputs=[file_for_report, run_state]
1556
  ).then(
1557
- fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False)],
1558
- outputs=[infer_page, infer_waiting]
1559
  )
1560
 
1561
  # TODO background job from these 3 pipelines to update file_for_report
1562
 
1563
  file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
1564
  html_report,
1565
- df_raw,
 
 
1566
  # ranking_pie_chart
1567
  ])
1568
- analyze_btn.click(fn=submit_report, inputs=[scores, filters], outputs=[
1569
  html_report,
1570
- df_raw,
1571
  # ranking_pie_chart
1572
  ])
1573
 
1574
 
1575
- def create_csv_raw_file(df, file_report):
1576
- from datetime import datetime
1577
- now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1578
- filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1579
- df.drop(['Compound', 'Scaffold']).to_csv(filename, index=False)
1580
- return gr.File(filename, visible=True)
1581
 
 
 
 
 
1582
 
1583
  def create_html_report_file(df, file_report):
1584
- from datetime import datetime
1585
- now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1586
- filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
1587
- create_html_report(df, filename)
1588
- return gr.File(filename, visible=True)
1589
-
 
 
1590
 
1591
- csv_generate.click(fn=create_csv_raw_file, inputs=[df_raw, file_for_report], outputs=csv_download_file)
1592
- html_generate.click(fn=create_html_report_file, inputs=[df_raw, file_for_report], outputs=html_download_file)
 
 
 
1593
 
1594
  # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
1595
  # every=5)
@@ -1612,5 +1671,3 @@ if __name__ == "__main__":
1612
  demo.launch(
1613
  show_api=False,
1614
  )
1615
-
1616
- #%%
 
1
+ from datetime import datetime
2
  import hashlib
3
  import itertools
4
  import json
 
22
  import pandas as pd
23
  import plotly.express as px
24
  import requests
25
+ from bokeh.models import HTMLTemplateFormatter, StringFormatter
26
  from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
27
  from requests.adapters import HTTPAdapter, Retry
28
  from rdkit import Chem
 
41
 
42
  ROOT = Path.cwd()
43
 
44
+ # DF_FOR_REPORT = pd.DataFrame()
45
 
46
  pd.set_option('display.float_format', '{:.3f}'.format)
47
  PandasTools.molRepresentation = 'svg'
 
148
  position: absolute;
149
  }
150
 
151
+ .example {
152
  padding: 0;
153
  background: none;
154
  border: none;
 
173
  )
174
 
175
 
176
+ def sa_score(mol):
177
+ return sascorer.calculateScore(mol)
178
 
179
 
180
+ def mw(mol):
181
+ return Chem.Descriptors.MolWt(mol)
182
 
183
 
184
+ def mr(mol):
185
+ return Crippen.MolMR(mol)
186
 
187
 
188
+ def hbd(mol):
189
+ return Lipinski.NumHDonors(mol)
190
 
191
 
192
+ def hba(mol):
193
+ return Lipinski.NumHAcceptors(mol)
194
 
195
 
196
+ def logp(mol):
197
+ return Crippen.MolLogP(mol)
198
 
199
 
200
+ def atom(mol):
201
+ return CalcNumAtoms(mol)
202
 
203
 
204
+ def heavy_atom(mol):
205
+ return CalcNumHeavyAtoms(mol)
206
 
207
 
208
+ def rotatable_bond(mol):
209
+ return CalcNumRotatableBonds((mol))
210
 
211
 
212
+ def tpsa(mol):
213
+ return CalcTPSA((mol))
214
 
215
 
216
+ def lipinski(mol):
217
  """
218
  Lipinski's rules:
219
  Hydrogen bond donors <= 5
 
221
  Molecular weight <= 500 daltons
222
  logP <= 5
223
  """
224
+ if hbd(mol) > 5:
225
  return False
226
+ elif hba(mol) > 10:
227
  return False
228
+ elif mw(mol) > 500:
229
  return False
230
+ elif logp(mol) > 5:
231
  return False
232
  else:
233
  return True
234
 
235
 
236
+ def reos(mol):
237
  """
238
  Rapid Elimination Of Swill filter:
239
  Molecular weight between 200 and 500
 
244
  Rotatable bond count between 0 and 8
245
  Heavy atom count between 15 and 50
246
  """
247
+ if not 200 < mw(mol) < 500:
248
  return False
249
+ elif not -5.0 < logp(mol) < 5.0:
250
  return False
251
+ elif not 0 < hbd(mol) < 5:
252
  return False
253
+ elif not 0 < hba(mol) < 10:
254
  return False
255
+ elif not 0 < rotatable_bond(mol) < 8:
256
  return False
257
+ elif not 15 < heavy_atom(mol) < 50:
258
  return False
259
  else:
260
  return True
261
 
262
 
263
+ def ghose(mol):
264
  """
265
  Ghose drug like filter:
266
  Molecular weight between 160 and 480
 
268
  Atom count between 20 and 70
269
  Molar refractivity between 40 and 130
270
  """
271
+ if not 160 < mw(mol) < 480:
272
  return False
273
+ elif not -0.4 < logp(mol) < 5.6:
274
  return False
275
+ elif not 20 < atom(mol) < 70:
276
  return False
277
+ elif not 40 < mr(mol) < 130:
278
  return False
279
  else:
280
  return True
281
 
282
 
283
+ def veber(mol):
284
  """
285
  The Veber filter is a rule of thumb filter for orally active drugs described in
286
  Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
287
  Rotatable bonds <= 10
288
  Topological polar surface area <= 140
289
  """
290
+ if not rotatable_bond(mol) <= 10:
291
  return False
292
+ elif not tpsa(mol) <= 140:
293
  return False
294
  else:
295
  return True
296
 
297
 
298
+ def rule_of_three(mol):
299
  """
300
  Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
301
  Molecular weight <= 300
 
304
  H-bond acceptor count <= 3
305
  Rotatable bond count <= 3
306
  """
307
+ if not mw(mol) <= 300:
308
  return False
309
+ elif not logp(mol) <= 3:
310
  return False
311
+ elif not hbd(mol) <= 3:
312
  return False
313
+ elif not hba(mol) <= 3:
314
  return False
315
+ elif not rotatable_bond(mol) <= 3:
316
  return False
317
  else:
318
  return True
 
391
  'X2': 'Target FASTA',
392
  'ID1': 'Compound ID',
393
  'ID2': 'Target ID',
394
+ 'Y': 'Actual CPI/CPA',
395
+ 'Y^': 'Predicted CPI/CPA',
396
+ 'N': 'Original Index'
397
  }
398
 
399
 
 
426
  pass
427
 
428
 
429
+ def submit_predict(predict_filepath, task, preset, target_family, flag, state, progress=gr.Progress(track_tqdm=True)):
430
  if flag:
431
  try:
432
  job_id = flag
 
435
  preset = PRESET_MAP[preset]
436
  target_family = TARGET_FAMILY_MAP[target_family]
437
  # email_hash = hashlib.sha256(email.encode()).hexdigest()
438
+ COLUMN_ALIASES.update({
439
+ 'Y': 'Actual interaction probability' if task == 'DTI' else 'Actual binding affinity',
440
+ 'Y^': 'Predicted interaction probability' if task == 'DTI' else 'Predicted binding affinity'
441
+ })
442
 
443
  # target_family_list = [target_family]
444
  # for family in target_family_list:
 
456
  predictions, _ = predict(cfg)
457
  predictions = [pd.DataFrame(prediction) for prediction in predictions]
458
  prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
459
+ prediction_df.set_index('N', inplace=True)
460
 
461
  predictions_file = f'temp/{job_id}_predictions.csv'
462
+ prediction_df.to_csv(predictions_file)
463
 
464
  return [predictions_file,
465
  False]
466
  except Exception as e:
467
  gr.Warning(f"Prediction job failed due to error: {str(e)}")
468
+ return {run_state: False}
 
 
469
  else:
470
+ return {run_state: state}
 
471
  #
472
  # except Exception as e:
473
  # raise gr.Error(str(e))
 
539
 
540
 
541
  def update_df(file, progress=gr.Progress(track_tqdm=True)):
542
+ # global DF_FOR_REPORT
543
+ if Path(file).is_file():
544
  df = pd.read_csv(file)
545
+ # if df['X1'].nunique() > 1:
546
+ df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
547
+ desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
548
+ # Add a new column with RDKit molecule objects
549
+ if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
550
+ PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
 
 
551
  includeFingerprints=True)
552
+ PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
553
+ includeFingerprints=True)
554
+ # DF_FOR_REPORT = df.copy()
555
 
556
  # pie_chart = None
557
  # value = None
 
566
  # elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
567
  # pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
568
 
569
+ return {html_report: create_html_report(df),
570
+ raw_df: df,
571
+ report_df: df.copy(),
572
+ analyze_btn: gr.Button(interactive=True)} # pie_chart
573
  else:
574
+ return {analyze_btn: gr.Button(interactive=False)}
575
 
576
 
577
  def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
578
+ df_html = df.copy(deep=True)
579
+
580
+ cols_left = ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^',]
581
  cols_right = ['X1', 'X2']
582
  cols_left = [col for col in cols_left if col in df_html.columns]
583
  cols_right = [col for col in cols_right if col in df_html.columns]
584
  df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
585
+
586
+ ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted binding affinity' else False
587
  df_html = df_html.sort_values(
588
+ [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
589
+ )
590
+
591
+ # # Remove repeated info for one-against-N tasks to save visual and physical space
592
+ # if df_html['X1'].nunique() <= 1:
593
+ # columns_to_clean = ['X1', 'ID1', 'Scaffold', 'Compound'] + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
594
+ # for column in columns_to_clean:
595
+ # if column in df_html.columns:
596
+ # df_html.loc[1:, column] = pd.NA
597
+ #
598
+ # if df_html['X2'].nunique() <= 1:
599
+ # columns_to_clean = ['X2', 'ID2']
600
+ # for column in columns_to_clean:
601
+ # if column in df_html.columns:
602
+ # df_html.loc[1:, column] = pd.NA
603
+
604
+ if not file:
605
+ df_html = df_html.iloc[:31]
606
+
607
+ # PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
608
  # PandasTools.RenderImagesInAllDataFrames(images=True)
609
+ df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
610
+ 'Generating compound graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
611
+ df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
612
+ 'Generating scaffold graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
613
+ df_html = df_html.rename(columns=COLUMN_ALIASES)
614
+ df_html.index.name = 'Index'
615
 
616
  if not file:
617
+ if 'Compound ID' in df_html.columns:
618
+ df_html.drop(['Compound SMILES'], axis=1, inplace=True)
619
+ if 'Target ID' in df_html.columns:
620
+ df_html.drop(['Target FASTA'], axis=1, inplace=True)
621
+ if 'Target FASTA' in df_html.columns:
622
+ df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
623
+ 'Processing FASTA...').apply(lambda x: wrap_text(x) if not pd.isna(x) else x)
624
+ df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
625
+ # num_formatters = {col: "{:.3f}" for col in df.select_dtypes('number').columns}
626
+ styled_df = df_html.style.format(precision=3)
627
  colors = sns.color_palette('husl', len(df_html.columns))
628
  for i, col in enumerate(df_html.columns):
629
  if pd.api.types.is_numeric_dtype(df_html[col]):
 
634
  import panel as pn
635
  from bokeh.resources import INLINE
636
  from bokeh.models import NumberFormatter, BooleanFormatter
637
+
638
+ bool_formatters = {col: BooleanFormatter() for col in df_html.select_dtypes(bool).columns}
639
+ num_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('number').columns}
640
+ other_formatters = {
641
+ 'Predicted interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
642
+ 'Actual interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
643
+ 'Compound': HTMLTemplateFormatter(),
644
+ 'Scaffold': HTMLTemplateFormatter(),
645
+ 'Target FASTA': {'type': 'textarea', 'width': 60},
646
  }
647
+ formatters = {**bool_formatters, **num_formatters, **other_formatters}
648
+
649
  # html = df.to_html(file)
650
  # return html
651
+ pn.widgets.Tabulator(df_html, formatters=formatters).save(file, resources=INLINE)
652
 
653
 
654
  # def create_pie_chart(df, category, value, top_k):
 
702
  return fig
703
 
704
 
705
+ def submit_report(df, score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
706
+ df_report = df.copy()
707
  try:
708
  for filter_name in filter_list:
709
+ df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
710
+ desc=f"Calculating {filter_name}").apply(
711
+ lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x, axis=1)
712
 
713
  for score_name in score_list:
714
+ df_report[score_name] = df_report['Compound'].swifter.progress_bar(
715
+ desc=f"Calculating {score_name}").apply(
716
+ lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x, axis=1)
717
 
718
  # pie_chart = None
719
  # value = None
 
728
  # elif df['X2'].nunique() > 1 >= df['X1'].nunique():
729
  # pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
730
 
731
+ return create_html_report(df_report), df_report # pie_chart
732
 
733
  except Exception as e:
734
+ gr.Warning(f'Failed to report results due to error: {str(e)}')
735
+ return None, None
736
 
737
  # def check_job_status(job_id):
738
  # job_lock = DATA_PATH / f"{job_id}.lock"
 
751
 
752
 
753
  def wrap_text(text, line_length=60):
754
+ if isinstance(text, str):
755
+ wrapper = textwrap.TextWrapper(width=line_length)
756
+ if text.startswith('>'):
757
+ sections = text.split('>')
758
+ wrapped_sections = []
759
+ for section in sections:
760
+ if not section:
761
+ continue
762
+ lines = section.split('\n')
763
+ seq_header = lines[0]
764
+ wrapped_seq = wrapper.fill(''.join(lines[1:]))
765
+ wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
766
+ return '\n'.join(wrapped_sections)
767
+ else:
768
+ return wrapper.fill(text)
769
  else:
770
+ return text
771
 
772
 
773
  def unwrap_text(text):
 
884
  visible=False, interactive=True, scale=4, )
885
 
886
  with gr.Row():
887
+ target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
888
+ visible=True, variant='primary',
889
+ size='lg')
890
+ target_query_btn = gr.Button(value='Query the sequence', variant='primary',
891
+ visible=False)
892
+ # with gr.Row():
893
+ # example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
894
+ # example_gene = gr.Button(value='Example: MAPK14', elem_classes='example', visible=False)
895
+ example_fasta = gr.Button(value='Example: Human MAPK14', elem_classes='example')
896
  target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
897
  # with gr.Row():
898
  # with gr.Column():
 
899
  # with gr.Column():
900
  # gr.File(label='Example FASTA file',
901
  # value='data/examples/MAPK14.fasta', interactive=False)
 
904
  with gr.Column():
905
  HelpTip(
906
  "Click Auto-detect to identify the protein family using sequence alignment. "
907
+ "This optional step allows applying a family-specific model instead of a all-family "
908
+ "model (general)."
909
  "Manually select general if the alignment results are unsatisfactory."
910
  )
911
  drug_screen_target_family = gr.Dropdown(
 
938
  with gr.Row():
939
  with gr.Column():
940
  HelpTip(
941
+ "Interaction prediction provides you binding probability score between the target of "
942
+ "interest and each compound in the library,"
943
+ "while affinity prediction directly estimates their binding strength measured using "
944
+ "IC50."
945
  )
946
  drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
947
  label='Step 4. Select a Prediction Task',
 
950
  with gr.Row():
951
  with gr.Column():
952
  HelpTip(
953
+ "Select your preferred model, or click Recommend for the best-performing model based "
954
+ "on the selected task, family, and whether the target was trained."
955
  "Please refer to documentation for detailed benchamrk results."
956
  )
957
  drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
 
961
  with gr.Column():
962
  drug_screen_email = gr.Textbox(
963
  label='Step 6. Email (Optional)',
964
+ info="If an email is provided, a notification email will be sent to you when your job "
965
+ "is completed."
966
  )
967
 
968
  with gr.Row(visible=True):
 
993
  HelpTip(
994
  "Enter (paste) a compound SMILES below manually or upload a SDF file."
995
  "If multiple entities are in the SDF, only the first will be used."
996
+ "SMILES can be obtained by searching for the compound of interest in databases such "
997
+ "as NCBI, PubChem and and ChEMBL."
998
  )
999
  compound_type = gr.Dropdown(
1000
  label='Step 1. Select Compound Input Type and Input',
1001
  choices=['SMILES', 'SDF'],
1002
+ info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
1003
  value='SMILES',
1004
  interactive=True)
1005
+ compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
1006
+ type='binary', visible=False)
1007
 
1008
  compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
1009
+ example_drug = gr.Button(value='Example: Aspirin', elem_classes='example')
1010
 
1011
  with gr.Row():
1012
  with gr.Column():
1013
  HelpTip(
1014
  "By default, models trained on all protein families (general) will be applied."
1015
+ "If the proteins in the target library of interest all belong to the same protein "
1016
+ "family, manually selecting the family is supported."
1017
  )
1018
  target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
1019
  value='General',
1020
+ label='Step 2. Select Target Protein Family ('
1021
+ 'Optional)')
1022
 
1023
  with gr.Row():
1024
  with gr.Column():
1025
  HelpTip(
1026
  "Select a preset target library (e.g., ChEMBL33_human_proteins)."
1027
+ "Alternatively, upload a CSV file with a column named X2 containing target protein "
1028
+ "sequences, or use an FASTA file."
1029
  )
1030
  target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
1031
  choices=list(TARGET_LIBRARY_MAP.keys()))
 
1041
  with gr.Row():
1042
  with gr.Column():
1043
  HelpTip(
1044
+ "Interaction prediction provides you binding probability score between the target of "
1045
+ "interest and each compound in the library,"
1046
+ "while affinity prediction directly estimates their binding strength measured using "
1047
+ "IC50."
1048
  )
1049
  target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
1050
  label='Step 4. Select a Prediction Task',
 
1053
  with gr.Row():
1054
  with gr.Column():
1055
  HelpTip(
1056
+ "Select your preferred model, or click Recommend for the best-performing model based "
1057
+ "on the selected task, family, and whether the compound was trained."
1058
  "Please refer to documentation for detailed benchamrk results."
1059
  )
1060
+ target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a '
1061
+ 'Preset Model')
1062
  identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
1063
 
1064
  with gr.Row():
 
1085
  ''')
1086
  with gr.Blocks() as infer_block:
1087
  with gr.Column() as infer_page:
1088
+ infer_type = gr.Dropdown(
1089
+ choices=['Upload a compound library and a target library',
1090
+ 'Upload a CSV interaction pair dataset'],
1091
+ value='Upload a compound library and a target library')
1092
  with gr.Column() as pair_upload:
1093
+ gr.File(label="Example custom dataset",
1094
+ value="data/examples/interaction_pair_inference.csv",
1095
+ interactive=False)
1096
+ with gr.Column():
 
1097
  infer_data_for_predict = gr.File(
1098
+ label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
1099
  with gr.Column() as pair_generate:
1100
  with gr.Row():
1101
+ gr.File(label='Example SDF compound library',
1102
  value='data/examples/compound_library.sdf', interactive=False)
1103
+ gr.File(label='Example FASTA target library',
1104
  value='data/examples/target_library.fasta', interactive=False)
1105
  with gr.Row():
1106
+ gr.File(label='Example CSV compound library',
1107
  value='data/examples/compound_library.csv', interactive=False)
1108
+ gr.File(label='Example CSV target library',
1109
  value='data/examples/target_library.csv', interactive=False)
1110
  with gr.Row():
1111
+ infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
1112
  file_count="single", type='filepath')
1113
+ infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
1114
  file_count="single", type='filepath')
1115
 
1116
+ with gr.Row(visible=True):
1117
+ pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
1118
+ pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
1119
+ pair_infer_target_family = gr.Dropdown(choices=['General'],
1120
+ label='Target family',
1121
+ value='General')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1122
 
1123
+ # with gr.Row():
1124
+ # pair_infer_email = gr.Textbox(
1125
+ # label='Email (optional)',
1126
+ # info="Your email will be used to send you notifications when your job finishes."
1127
+ # )
1128
 
1129
  with gr.Row(visible=True):
1130
  # pair_infer_clr_btn = gr.ClearButton(size='lg')
 
1139
  with gr.Blocks() as report:
1140
  gr.Markdown('''
1141
  # <center>DeepSEQreen Chemical Property Report</center>
1142
+ <center>
1143
  To compute chemical properties for the predictions of drug hit screening,
1144
+ target protein identification, and interaction pair inference. You may also upload
1145
+ your own dataset.
1146
+
1147
+ The page shows only a preview report displaying at most 30 records
1148
+ (with top predicted CPI/CPA if reporting results from a prediction job).
1149
 
1150
+ For a full report, please
1151
+ generate and download a CSV or interactive HTML report below.
1152
+ </center>
 
1153
  ''')
1154
  with gr.Row():
1155
  file_for_report = gr.File(interactive=True, type='filepath')
1156
+ raw_df = gr.State(value=pd.DataFrame())
1157
+ report_df = gr.State(value=pd.DataFrame())
1158
  scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
1159
  filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
1160
 
1161
  with gr.Row():
1162
  # clear_btn = gr.ClearButton(size='lg')
1163
+ analyze_btn = gr.Button('REPORT', variant='primary', size='lg', interactive=False)
1164
 
1165
  with gr.Row():
1166
  with gr.Column(scale=3):
 
1169
 
1170
  with gr.Row():
1171
  with gr.Column():
1172
+ csv_generate = gr.Button(value='Generate CSV Report',
1173
+ interactive=True, variant='primary', visible=False)
1174
+ csv_download_file = gr.File(label='Download CSV Report', visible=False)
1175
  with gr.Column():
1176
+ html_generate = gr.Button(value='Generate HTML Report',
1177
+ interactive=True, variant='primary', visible=False)
1178
+ html_download_file = gr.File(label='Download HTML Report', visible=False)
1179
 
1180
 
1181
  def target_input_type_select(input_type):
 
1272
  def example_fill(input_type):
1273
  return {target_id: 'Q16539',
1274
  target_gene: 'MAPK14',
1275
+ target_organism: 'Human',
1276
  target_fasta: """
1277
  >sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
1278
  MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
 
1284
  """}
1285
 
1286
 
1287
+ example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
1288
+ target_id, target_gene, target_organism, target_fasta], show_progress=False)
1289
+ # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1290
+ # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1291
 
1292
  def screen_recommend_model(fasta, family, task):
1293
  task = TASK_MAP[task]
 
1298
  train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
1299
  score = 'CI'
1300
 
1301
+ if not np.isin(process_target_fasta(fasta), train['X2']):
1302
  scenario = "Unseen target"
1303
  else:
1304
  scenario = "Seen target"
 
1315
  & (benchmark_df['Scenario'] == scenario)
1316
  & (benchmark_df['all'] == False)]
1317
  row = filtered_df.loc[filtered_df[score].idxmax()]
1318
+
1319
  return gr.Dropdown(value=row['preset'],
1320
  info=f"Reason: {scenario} in the training dataset; we recommend the model "
1321
  f"with the best {score} ({float(row[score]):.3f}) "
 
1330
  def compound_input_type_select(input_type):
1331
  match input_type:
1332
  case 'SMILES':
1333
+ return gr.Button(visible=False)
1334
  case 'SDF':
1335
+ return gr.Button(visible=True)
1336
 
1337
 
1338
  compound_type.select(fn=compound_input_type_select,
1339
+ inputs=compound_type, outputs=compound_upload_btn, show_progress=False)
1340
 
1341
 
1342
  def compound_upload_process(input_type, input_upload):
 
1424
  screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
1425
  else:
1426
  screen_df = process_drug_library_upload(library_upload)
 
1427
  if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
1428
  raise gr.Error(f'The uploaded compound library has more records '
1429
  f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
 
1566
  ).then(
1567
  fn=submit_predict,
1568
  inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
1569
+ drug_screen_target_family, screen_flag, run_state], # , drug_screen_email],
1570
  outputs=[file_for_report, run_state]
1571
  ).then(
1572
  fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
 
1578
  inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
1579
  outputs=[identify_data_for_predict, identify_flag, run_state]
1580
  ).then(
1581
+ fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
1582
+ outputs=[identify_page, identify_waiting]
1583
  ).then(
1584
  fn=submit_predict,
1585
  inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
1586
+ target_identify_target_family, identify_flag, run_state], # , target_identify_email],
1587
  outputs=[file_for_report, run_state]
1588
  ).then(
1589
  fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
 
1600
  ).then(
1601
  fn=submit_predict,
1602
  inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
1603
+ pair_infer_target_family, infer_flag, run_state], # , pair_infer_email],
1604
  outputs=[file_for_report, run_state]
1605
  ).then(
1606
+ fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
1607
+ outputs=[infer_page, infer_waiting, tabs]
1608
  )
1609
 
1610
  # TODO background job from these 3 pipelines to update file_for_report
1611
 
1612
  file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
1613
  html_report,
1614
+ raw_df,
1615
+ report_df,
1616
+ analyze_btn
1617
  # ranking_pie_chart
1618
  ])
1619
+ analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters], outputs=[
1620
  html_report,
1621
+ report_df,
1622
  # ranking_pie_chart
1623
  ])
1624
 
1625
 
1626
+ def create_csv_report_file(df, file_report):
1627
+ try:
1628
+ now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1629
+ filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1630
+ df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
 
1631
 
1632
+ return gr.File(filename, visible=True), gr.Button(visible=False)
1633
+ except Exception as e:
1634
+ gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
1635
+ return None, None
1636
 
1637
  def create_html_report_file(df, file_report):
1638
+ try:
1639
+ now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1640
+ filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
1641
+ create_html_report(df, filename)
1642
+ return gr.File(filename, visible=True), gr.Button(visible=False)
1643
+ except Exception as e:
1644
+ gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
1645
+ return None, None
1646
 
1647
+ html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
1648
+ csv_generate.click(fn=create_csv_report_file, inputs=[report_df, file_for_report],
1649
+ outputs=[csv_download_file, csv_generate])
1650
+ html_generate.click(fn=create_html_report_file, inputs=[report_df, file_for_report],
1651
+ outputs=[html_download_file, html_generate])
1652
 
1653
  # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
1654
  # every=5)
 
1671
  demo.launch(
1672
  show_api=False,
1673
  )