Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import hashlib
|
2 |
import itertools
|
3 |
import json
|
@@ -21,6 +22,7 @@ import hydra
|
|
21 |
import pandas as pd
|
22 |
import plotly.express as px
|
23 |
import requests
|
|
|
24 |
from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
|
25 |
from requests.adapters import HTTPAdapter, Retry
|
26 |
from rdkit import Chem
|
@@ -39,7 +41,7 @@ import sascorer
|
|
39 |
|
40 |
ROOT = Path.cwd()
|
41 |
|
42 |
-
DF_FOR_REPORT = pd.DataFrame()
|
43 |
|
44 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
45 |
PandasTools.molRepresentation = 'svg'
|
@@ -146,7 +148,7 @@ CSS = """
|
|
146 |
position: absolute;
|
147 |
}
|
148 |
|
149 |
-
|
150 |
padding: 0;
|
151 |
background: none;
|
152 |
border: none;
|
@@ -171,47 +173,47 @@ class HelpTip:
|
|
171 |
)
|
172 |
|
173 |
|
174 |
-
def sa_score(
|
175 |
-
return sascorer.calculateScore(
|
176 |
|
177 |
|
178 |
-
def mw(
|
179 |
-
return Chem.Descriptors.MolWt(
|
180 |
|
181 |
|
182 |
-
def mr(
|
183 |
-
return Crippen.MolMR(
|
184 |
|
185 |
|
186 |
-
def hbd(
|
187 |
-
return Lipinski.NumHDonors(
|
188 |
|
189 |
|
190 |
-
def hba(
|
191 |
-
return Lipinski.NumHAcceptors(
|
192 |
|
193 |
|
194 |
-
def logp(
|
195 |
-
return Crippen.MolLogP(
|
196 |
|
197 |
|
198 |
-
def atom(
|
199 |
-
return CalcNumAtoms(
|
200 |
|
201 |
|
202 |
-
def heavy_atom(
|
203 |
-
return CalcNumHeavyAtoms(
|
204 |
|
205 |
|
206 |
-
def rotatable_bond(
|
207 |
-
return CalcNumRotatableBonds((
|
208 |
|
209 |
|
210 |
-
def tpsa(
|
211 |
-
return CalcTPSA((
|
212 |
|
213 |
|
214 |
-
def lipinski(
|
215 |
"""
|
216 |
Lipinski's rules:
|
217 |
Hydrogen bond donors <= 5
|
@@ -219,19 +221,19 @@ def lipinski(row):
|
|
219 |
Molecular weight <= 500 daltons
|
220 |
logP <= 5
|
221 |
"""
|
222 |
-
if hbd(
|
223 |
return False
|
224 |
-
elif hba(
|
225 |
return False
|
226 |
-
elif mw(
|
227 |
return False
|
228 |
-
elif logp(
|
229 |
return False
|
230 |
else:
|
231 |
return True
|
232 |
|
233 |
|
234 |
-
def reos(
|
235 |
"""
|
236 |
Rapid Elimination Of Swill filter:
|
237 |
Molecular weight between 200 and 500
|
@@ -242,23 +244,23 @@ def reos(row):
|
|
242 |
Rotatable bond count between 0 and 8
|
243 |
Heavy atom count between 15 and 50
|
244 |
"""
|
245 |
-
if not 200 < mw(
|
246 |
return False
|
247 |
-
elif not -5.0 < logp(
|
248 |
return False
|
249 |
-
elif not 0 < hbd(
|
250 |
return False
|
251 |
-
elif not 0 < hba(
|
252 |
return False
|
253 |
-
elif not 0 < rotatable_bond(
|
254 |
return False
|
255 |
-
elif not 15 < heavy_atom(
|
256 |
return False
|
257 |
else:
|
258 |
return True
|
259 |
|
260 |
|
261 |
-
def ghose(
|
262 |
"""
|
263 |
Ghose drug like filter:
|
264 |
Molecular weight between 160 and 480
|
@@ -266,34 +268,34 @@ def ghose(row):
|
|
266 |
Atom count between 20 and 70
|
267 |
Molar refractivity between 40 and 130
|
268 |
"""
|
269 |
-
if not 160 < mw(
|
270 |
return False
|
271 |
-
elif not -0.4 < logp(
|
272 |
return False
|
273 |
-
elif not 20 < atom(
|
274 |
return False
|
275 |
-
elif not 40 < mr(
|
276 |
return False
|
277 |
else:
|
278 |
return True
|
279 |
|
280 |
|
281 |
-
def veber(
|
282 |
"""
|
283 |
The Veber filter is a rule of thumb filter for orally active drugs described in
|
284 |
Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
|
285 |
Rotatable bonds <= 10
|
286 |
Topological polar surface area <= 140
|
287 |
"""
|
288 |
-
if not rotatable_bond(
|
289 |
return False
|
290 |
-
elif not tpsa(
|
291 |
return False
|
292 |
else:
|
293 |
return True
|
294 |
|
295 |
|
296 |
-
def rule_of_three(
|
297 |
"""
|
298 |
Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
|
299 |
Molecular weight <= 300
|
@@ -302,15 +304,15 @@ def rule_of_three(row):
|
|
302 |
H-bond acceptor count <= 3
|
303 |
Rotatable bond count <= 3
|
304 |
"""
|
305 |
-
if not mw(
|
306 |
return False
|
307 |
-
elif not logp(
|
308 |
return False
|
309 |
-
elif not hbd(
|
310 |
return False
|
311 |
-
elif not hba(
|
312 |
return False
|
313 |
-
elif not rotatable_bond(
|
314 |
return False
|
315 |
else:
|
316 |
return True
|
@@ -389,6 +391,9 @@ COLUMN_ALIASES = {
|
|
389 |
'X2': 'Target FASTA',
|
390 |
'ID1': 'Compound ID',
|
391 |
'ID2': 'Target ID',
|
|
|
|
|
|
|
392 |
}
|
393 |
|
394 |
|
@@ -421,7 +426,7 @@ def send_email(receiver, msg):
|
|
421 |
pass
|
422 |
|
423 |
|
424 |
-
def submit_predict(predict_filepath, task, preset, target_family, flag, progress=gr.Progress(track_tqdm=True)):
|
425 |
if flag:
|
426 |
try:
|
427 |
job_id = flag
|
@@ -430,10 +435,10 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
430 |
preset = PRESET_MAP[preset]
|
431 |
target_family = TARGET_FAMILY_MAP[target_family]
|
432 |
# email_hash = hashlib.sha256(email.encode()).hexdigest()
|
433 |
-
COLUMN_ALIASES
|
434 |
-
'Y': 'Actual interaction probability' if task == '
|
435 |
-
'Y^': 'Predicted interaction probability' if task == '
|
436 |
-
}
|
437 |
|
438 |
# target_family_list = [target_family]
|
439 |
# for family in target_family_list:
|
@@ -451,20 +456,18 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
451 |
predictions, _ = predict(cfg)
|
452 |
predictions = [pd.DataFrame(prediction) for prediction in predictions]
|
453 |
prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
|
|
|
454 |
|
455 |
predictions_file = f'temp/{job_id}_predictions.csv'
|
456 |
-
prediction_df.to_csv(predictions_file
|
457 |
|
458 |
return [predictions_file,
|
459 |
False]
|
460 |
except Exception as e:
|
461 |
gr.Warning(f"Prediction job failed due to error: {str(e)}")
|
462 |
-
return
|
463 |
-
False]
|
464 |
-
|
465 |
else:
|
466 |
-
return
|
467 |
-
False]
|
468 |
#
|
469 |
# except Exception as e:
|
470 |
# raise gr.Error(str(e))
|
@@ -536,19 +539,19 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
536 |
|
537 |
|
538 |
def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
539 |
-
global DF_FOR_REPORT
|
540 |
-
if file
|
541 |
df = pd.read_csv(file)
|
542 |
-
if df['X1'].nunique() > 1:
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
includeFingerprints=True)
|
549 |
-
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
|
550 |
includeFingerprints=True)
|
551 |
-
|
|
|
|
|
552 |
|
553 |
# pie_chart = None
|
554 |
# value = None
|
@@ -563,30 +566,64 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
563 |
# elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
|
564 |
# pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
|
565 |
|
566 |
-
return create_html_report(
|
|
|
|
|
|
|
567 |
else:
|
568 |
-
return
|
569 |
|
570 |
|
571 |
def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
572 |
-
df_html = df.copy()
|
573 |
-
|
|
|
574 |
cols_right = ['X1', 'X2']
|
575 |
cols_left = [col for col in cols_left if col in df_html.columns]
|
576 |
cols_right = [col for col in cols_right if col in df_html.columns]
|
577 |
df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
578 |
-
|
|
|
579 |
df_html = df_html.sort_values(
|
580 |
-
[col for col in ['Y', 'Y^'
|
581 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
# PandasTools.RenderImagesInAllDataFrames(images=True)
|
583 |
-
|
584 |
-
|
585 |
-
|
|
|
|
|
|
|
586 |
|
587 |
if not file:
|
588 |
-
|
589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
colors = sns.color_palette('husl', len(df_html.columns))
|
591 |
for i, col in enumerate(df_html.columns):
|
592 |
if pd.api.types.is_numeric_dtype(df_html[col]):
|
@@ -597,13 +634,21 @@ def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
|
597 |
import panel as pn
|
598 |
from bokeh.resources import INLINE
|
599 |
from bokeh.models import NumberFormatter, BooleanFormatter
|
600 |
-
|
601 |
-
|
602 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
603 |
}
|
|
|
|
|
604 |
# html = df.to_html(file)
|
605 |
# return html
|
606 |
-
pn.widgets.Tabulator(df_html, formatters=
|
607 |
|
608 |
|
609 |
# def create_pie_chart(df, category, value, top_k):
|
@@ -657,16 +702,18 @@ def create_pie_chart(df, category, value, top_k):
|
|
657 |
return fig
|
658 |
|
659 |
|
660 |
-
def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
|
661 |
-
|
662 |
try:
|
663 |
for filter_name in filter_list:
|
664 |
-
|
665 |
-
|
|
|
666 |
|
667 |
for score_name in score_list:
|
668 |
-
|
669 |
-
|
|
|
670 |
|
671 |
# pie_chart = None
|
672 |
# value = None
|
@@ -681,11 +728,11 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
|
|
681 |
# elif df['X2'].nunique() > 1 >= df['X1'].nunique():
|
682 |
# pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
|
683 |
|
684 |
-
return create_html_report(
|
685 |
|
686 |
except Exception as e:
|
687 |
-
|
688 |
-
|
689 |
|
690 |
# def check_job_status(job_id):
|
691 |
# job_lock = DATA_PATH / f"{job_id}.lock"
|
@@ -704,20 +751,23 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
|
|
704 |
|
705 |
|
706 |
def wrap_text(text, line_length=60):
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
|
|
|
|
|
|
719 |
else:
|
720 |
-
return
|
721 |
|
722 |
|
723 |
def unwrap_text(text):
|
@@ -834,17 +884,18 @@ To predict interactions/binding affinities of a single target against a library
|
|
834 |
visible=False, interactive=True, scale=4, )
|
835 |
|
836 |
with gr.Row():
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
|
|
|
|
844 |
target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
|
845 |
# with gr.Row():
|
846 |
# with gr.Column():
|
847 |
-
example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
|
848 |
# with gr.Column():
|
849 |
# gr.File(label='Example FASTA file',
|
850 |
# value='data/examples/MAPK14.fasta', interactive=False)
|
@@ -853,7 +904,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
853 |
with gr.Column():
|
854 |
HelpTip(
|
855 |
"Click Auto-detect to identify the protein family using sequence alignment. "
|
856 |
-
"This optional step allows applying a family-specific model instead of a all-family
|
|
|
857 |
"Manually select general if the alignment results are unsatisfactory."
|
858 |
)
|
859 |
drug_screen_target_family = gr.Dropdown(
|
@@ -886,8 +938,10 @@ To predict interactions/binding affinities of a single target against a library
|
|
886 |
with gr.Row():
|
887 |
with gr.Column():
|
888 |
HelpTip(
|
889 |
-
"Interaction prediction provides you binding probability score between the target of
|
890 |
-
"
|
|
|
|
|
891 |
)
|
892 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
893 |
label='Step 4. Select a Prediction Task',
|
@@ -896,7 +950,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
896 |
with gr.Row():
|
897 |
with gr.Column():
|
898 |
HelpTip(
|
899 |
-
"Select your preferred model, or click Recommend for the best-performing model based
|
|
|
900 |
"Please refer to documentation for detailed benchamrk results."
|
901 |
)
|
902 |
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
@@ -906,7 +961,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
906 |
with gr.Column():
|
907 |
drug_screen_email = gr.Textbox(
|
908 |
label='Step 6. Email (Optional)',
|
909 |
-
info="If an email is provided, a notification email will be sent to you when your job
|
|
|
910 |
)
|
911 |
|
912 |
with gr.Row(visible=True):
|
@@ -937,34 +993,39 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
937 |
HelpTip(
|
938 |
"Enter (paste) a compound SMILES below manually or upload a SDF file."
|
939 |
"If multiple entities are in the SDF, only the first will be used."
|
940 |
-
"SMILES can be obtained by searching for the compound of interest in databases such
|
|
|
941 |
)
|
942 |
compound_type = gr.Dropdown(
|
943 |
label='Step 1. Select Compound Input Type and Input',
|
944 |
choices=['SMILES', 'SDF'],
|
945 |
-
info='Enter (paste) an SMILES string or upload an SDF file.',
|
946 |
value='SMILES',
|
947 |
interactive=True)
|
948 |
-
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
|
|
|
949 |
|
950 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
951 |
-
example_drug = gr.Button(value='Example: Aspirin',
|
952 |
|
953 |
with gr.Row():
|
954 |
with gr.Column():
|
955 |
HelpTip(
|
956 |
"By default, models trained on all protein families (general) will be applied."
|
957 |
-
"If the proteins in the target library of interest all belong to the same protein
|
|
|
958 |
)
|
959 |
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
960 |
value='General',
|
961 |
-
label='Step 2. Select Target Protein Family (
|
|
|
962 |
|
963 |
with gr.Row():
|
964 |
with gr.Column():
|
965 |
HelpTip(
|
966 |
"Select a preset target library (e.g., ChEMBL33_human_proteins)."
|
967 |
-
"Alternatively, upload a CSV file with a column named X2 containing
|
|
|
968 |
)
|
969 |
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
970 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
@@ -980,8 +1041,10 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
980 |
with gr.Row():
|
981 |
with gr.Column():
|
982 |
HelpTip(
|
983 |
-
"Interaction prediction provides you binding probability score between the target of
|
984 |
-
"
|
|
|
|
|
985 |
)
|
986 |
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
987 |
label='Step 4. Select a Prediction Task',
|
@@ -990,11 +1053,12 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
990 |
with gr.Row():
|
991 |
with gr.Column():
|
992 |
HelpTip(
|
993 |
-
"Select your preferred model, or click Recommend for the best-performing model based
|
|
|
994 |
"Please refer to documentation for detailed benchamrk results."
|
995 |
)
|
996 |
-
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
997 |
-
|
998 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
999 |
|
1000 |
with gr.Row():
|
@@ -1021,69 +1085,46 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1021 |
''')
|
1022 |
with gr.Blocks() as infer_block:
|
1023 |
with gr.Column() as infer_page:
|
1024 |
-
infer_type = gr.Dropdown(
|
1025 |
-
|
1026 |
-
|
1027 |
-
|
1028 |
with gr.Column() as pair_upload:
|
1029 |
-
|
1030 |
-
|
1031 |
-
|
1032 |
-
|
1033 |
-
with gr.Row():
|
1034 |
infer_data_for_predict = gr.File(
|
1035 |
-
label='Upload a
|
1036 |
with gr.Column() as pair_generate:
|
1037 |
with gr.Row():
|
1038 |
-
gr.File(label='Example SDF
|
1039 |
value='data/examples/compound_library.sdf', interactive=False)
|
1040 |
-
gr.File(label='Example FASTA
|
1041 |
value='data/examples/target_library.fasta', interactive=False)
|
1042 |
with gr.Row():
|
1043 |
-
gr.File(label='Example CSV
|
1044 |
value='data/examples/compound_library.csv', interactive=False)
|
1045 |
-
gr.File(label='Example CSV
|
1046 |
value='data/examples/target_library.csv', interactive=False)
|
1047 |
with gr.Row():
|
1048 |
-
infer_drug = gr.File(label='SDF/CSV
|
1049 |
file_count="single", type='filepath')
|
1050 |
-
infer_target = gr.File(label='FASTA/CSV
|
1051 |
file_count="single", type='filepath')
|
1052 |
|
1053 |
-
with gr.Row():
|
1054 |
-
|
1055 |
-
|
1056 |
-
|
1057 |
-
|
1058 |
-
|
1059 |
-
pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
1060 |
-
value='General',
|
1061 |
-
label='Step 2. Select Target Protein Family (Optional)')
|
1062 |
-
|
1063 |
-
with gr.Row():
|
1064 |
-
with gr.Column():
|
1065 |
-
HelpTip(
|
1066 |
-
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
1067 |
-
"while affinity prediction directly estimates their binding strength measured using IC50."
|
1068 |
-
)
|
1069 |
-
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
|
1070 |
-
label='Step 3. Select a Prediction Task',
|
1071 |
-
value='Compound-protein interaction')
|
1072 |
-
|
1073 |
-
with gr.Row():
|
1074 |
-
with gr.Column():
|
1075 |
-
HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
|
1076 |
-
"Please refer to documentation for detailed benchamrk results."
|
1077 |
-
)
|
1078 |
-
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
|
1079 |
-
infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
1080 |
-
|
1081 |
|
1082 |
-
with gr.Row():
|
1083 |
-
|
1084 |
-
|
1085 |
-
|
1086 |
-
|
1087 |
|
1088 |
with gr.Row(visible=True):
|
1089 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
@@ -1098,23 +1139,28 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1098 |
with gr.Blocks() as report:
|
1099 |
gr.Markdown('''
|
1100 |
# <center>DeepSEQreen Chemical Property Report</center>
|
|
|
1101 |
To compute chemical properties for the predictions of drug hit screening,
|
1102 |
-
target protein identification, and interaction pair inference.
|
|
|
|
|
|
|
|
|
1103 |
|
1104 |
-
|
1105 |
-
|
1106 |
-
|
1107 |
-
generate and download a raw data CSV or interactive table HTML file below.
|
1108 |
''')
|
1109 |
with gr.Row():
|
1110 |
file_for_report = gr.File(interactive=True, type='filepath')
|
1111 |
-
|
|
|
1112 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
1113 |
filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
|
1114 |
|
1115 |
with gr.Row():
|
1116 |
# clear_btn = gr.ClearButton(size='lg')
|
1117 |
-
analyze_btn = gr.Button('REPORT', variant='primary', size='lg')
|
1118 |
|
1119 |
with gr.Row():
|
1120 |
with gr.Column(scale=3):
|
@@ -1123,11 +1169,13 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1123 |
|
1124 |
with gr.Row():
|
1125 |
with gr.Column():
|
1126 |
-
csv_generate = gr.Button(value='Generate
|
1127 |
-
|
|
|
1128 |
with gr.Column():
|
1129 |
-
html_generate = gr.Button(value='Generate
|
1130 |
-
|
|
|
1131 |
|
1132 |
|
1133 |
def target_input_type_select(input_type):
|
@@ -1224,7 +1272,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
1224 |
def example_fill(input_type):
|
1225 |
return {target_id: 'Q16539',
|
1226 |
target_gene: 'MAPK14',
|
1227 |
-
target_organism: '
|
1228 |
target_fasta: """
|
1229 |
>sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
|
1230 |
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
|
@@ -1236,9 +1284,10 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1236 |
"""}
|
1237 |
|
1238 |
|
1239 |
-
example_fasta.click(fn=example_fill, inputs=target_input_type,
|
1240 |
-
|
1241 |
-
|
|
|
1242 |
|
1243 |
def screen_recommend_model(fasta, family, task):
|
1244 |
task = TASK_MAP[task]
|
@@ -1249,7 +1298,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1249 |
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
1250 |
score = 'CI'
|
1251 |
|
1252 |
-
if
|
1253 |
scenario = "Unseen target"
|
1254 |
else:
|
1255 |
scenario = "Seen target"
|
@@ -1266,6 +1315,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1266 |
& (benchmark_df['Scenario'] == scenario)
|
1267 |
& (benchmark_df['all'] == False)]
|
1268 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
|
|
1269 |
return gr.Dropdown(value=row['preset'],
|
1270 |
info=f"Reason: {scenario} in the training dataset; we recommend the model "
|
1271 |
f"with the best {score} ({float(row[score]):.3f}) "
|
@@ -1280,13 +1330,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1280 |
def compound_input_type_select(input_type):
|
1281 |
match input_type:
|
1282 |
case 'SMILES':
|
1283 |
-
return gr.
|
1284 |
case 'SDF':
|
1285 |
-
return gr.
|
1286 |
|
1287 |
|
1288 |
compound_type.select(fn=compound_input_type_select,
|
1289 |
-
inputs=compound_type, outputs=
|
1290 |
|
1291 |
|
1292 |
def compound_upload_process(input_type, input_upload):
|
@@ -1374,7 +1424,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1374 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
1375 |
else:
|
1376 |
screen_df = process_drug_library_upload(library_upload)
|
1377 |
-
print(screen_df.shape)
|
1378 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
1379 |
raise gr.Error(f'The uploaded compound library has more records '
|
1380 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
@@ -1517,7 +1566,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1517 |
).then(
|
1518 |
fn=submit_predict,
|
1519 |
inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
|
1520 |
-
drug_screen_target_family, screen_flag], # , drug_screen_email],
|
1521 |
outputs=[file_for_report, run_state]
|
1522 |
).then(
|
1523 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
@@ -1529,12 +1578,12 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1529 |
inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
|
1530 |
outputs=[identify_data_for_predict, identify_flag, run_state]
|
1531 |
).then(
|
1532 |
-
fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)
|
1533 |
-
outputs=[identify_page, identify_waiting
|
1534 |
).then(
|
1535 |
fn=submit_predict,
|
1536 |
inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
|
1537 |
-
target_identify_target_family, identify_flag], # , target_identify_email],
|
1538 |
outputs=[file_for_report, run_state]
|
1539 |
).then(
|
1540 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
@@ -1551,45 +1600,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1551 |
).then(
|
1552 |
fn=submit_predict,
|
1553 |
inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
|
1554 |
-
pair_infer_target_family, infer_flag], # , pair_infer_email],
|
1555 |
outputs=[file_for_report, run_state]
|
1556 |
).then(
|
1557 |
-
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False)],
|
1558 |
-
outputs=[infer_page, infer_waiting]
|
1559 |
)
|
1560 |
|
1561 |
# TODO background job from these 3 pipelines to update file_for_report
|
1562 |
|
1563 |
file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
|
1564 |
html_report,
|
1565 |
-
|
|
|
|
|
1566 |
# ranking_pie_chart
|
1567 |
])
|
1568 |
-
analyze_btn.click(fn=submit_report, inputs=[scores, filters], outputs=[
|
1569 |
html_report,
|
1570 |
-
|
1571 |
# ranking_pie_chart
|
1572 |
])
|
1573 |
|
1574 |
|
1575 |
-
def
|
1576 |
-
|
1577 |
-
|
1578 |
-
|
1579 |
-
|
1580 |
-
return gr.File(filename, visible=True)
|
1581 |
|
|
|
|
|
|
|
|
|
1582 |
|
1583 |
def create_html_report_file(df, file_report):
|
1584 |
-
|
1585 |
-
|
1586 |
-
|
1587 |
-
|
1588 |
-
|
1589 |
-
|
|
|
|
|
1590 |
|
1591 |
-
|
1592 |
-
|
|
|
|
|
|
|
1593 |
|
1594 |
# screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
|
1595 |
# every=5)
|
@@ -1612,5 +1671,3 @@ if __name__ == "__main__":
|
|
1612 |
demo.launch(
|
1613 |
show_api=False,
|
1614 |
)
|
1615 |
-
|
1616 |
-
#%%
|
|
|
1 |
+
from datetime import datetime
|
2 |
import hashlib
|
3 |
import itertools
|
4 |
import json
|
|
|
22 |
import pandas as pd
|
23 |
import plotly.express as px
|
24 |
import requests
|
25 |
+
from bokeh.models import HTMLTemplateFormatter, StringFormatter
|
26 |
from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
|
27 |
from requests.adapters import HTTPAdapter, Retry
|
28 |
from rdkit import Chem
|
|
|
41 |
|
42 |
ROOT = Path.cwd()
|
43 |
|
44 |
+
# DF_FOR_REPORT = pd.DataFrame()
|
45 |
|
46 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
47 |
PandasTools.molRepresentation = 'svg'
|
|
|
148 |
position: absolute;
|
149 |
}
|
150 |
|
151 |
+
.example {
|
152 |
padding: 0;
|
153 |
background: none;
|
154 |
border: none;
|
|
|
173 |
)
|
174 |
|
175 |
|
176 |
+
def sa_score(mol):
|
177 |
+
return sascorer.calculateScore(mol)
|
178 |
|
179 |
|
180 |
+
def mw(mol):
|
181 |
+
return Chem.Descriptors.MolWt(mol)
|
182 |
|
183 |
|
184 |
+
def mr(mol):
|
185 |
+
return Crippen.MolMR(mol)
|
186 |
|
187 |
|
188 |
+
def hbd(mol):
|
189 |
+
return Lipinski.NumHDonors(mol)
|
190 |
|
191 |
|
192 |
+
def hba(mol):
|
193 |
+
return Lipinski.NumHAcceptors(mol)
|
194 |
|
195 |
|
196 |
+
def logp(mol):
|
197 |
+
return Crippen.MolLogP(mol)
|
198 |
|
199 |
|
200 |
+
def atom(mol):
|
201 |
+
return CalcNumAtoms(mol)
|
202 |
|
203 |
|
204 |
+
def heavy_atom(mol):
|
205 |
+
return CalcNumHeavyAtoms(mol)
|
206 |
|
207 |
|
208 |
+
def rotatable_bond(mol):
|
209 |
+
return CalcNumRotatableBonds((mol))
|
210 |
|
211 |
|
212 |
+
def tpsa(mol):
|
213 |
+
return CalcTPSA((mol))
|
214 |
|
215 |
|
216 |
+
def lipinski(mol):
|
217 |
"""
|
218 |
Lipinski's rules:
|
219 |
Hydrogen bond donors <= 5
|
|
|
221 |
Molecular weight <= 500 daltons
|
222 |
logP <= 5
|
223 |
"""
|
224 |
+
if hbd(mol) > 5:
|
225 |
return False
|
226 |
+
elif hba(mol) > 10:
|
227 |
return False
|
228 |
+
elif mw(mol) > 500:
|
229 |
return False
|
230 |
+
elif logp(mol) > 5:
|
231 |
return False
|
232 |
else:
|
233 |
return True
|
234 |
|
235 |
|
236 |
+
def reos(mol):
|
237 |
"""
|
238 |
Rapid Elimination Of Swill filter:
|
239 |
Molecular weight between 200 and 500
|
|
|
244 |
Rotatable bond count between 0 and 8
|
245 |
Heavy atom count between 15 and 50
|
246 |
"""
|
247 |
+
if not 200 < mw(mol) < 500:
|
248 |
return False
|
249 |
+
elif not -5.0 < logp(mol) < 5.0:
|
250 |
return False
|
251 |
+
elif not 0 < hbd(mol) < 5:
|
252 |
return False
|
253 |
+
elif not 0 < hba(mol) < 10:
|
254 |
return False
|
255 |
+
elif not 0 < rotatable_bond(mol) < 8:
|
256 |
return False
|
257 |
+
elif not 15 < heavy_atom(mol) < 50:
|
258 |
return False
|
259 |
else:
|
260 |
return True
|
261 |
|
262 |
|
263 |
+
def ghose(mol):
|
264 |
"""
|
265 |
Ghose drug like filter:
|
266 |
Molecular weight between 160 and 480
|
|
|
268 |
Atom count between 20 and 70
|
269 |
Molar refractivity between 40 and 130
|
270 |
"""
|
271 |
+
if not 160 < mw(mol) < 480:
|
272 |
return False
|
273 |
+
elif not -0.4 < logp(mol) < 5.6:
|
274 |
return False
|
275 |
+
elif not 20 < atom(mol) < 70:
|
276 |
return False
|
277 |
+
elif not 40 < mr(mol) < 130:
|
278 |
return False
|
279 |
else:
|
280 |
return True
|
281 |
|
282 |
|
283 |
+
def veber(mol):
|
284 |
"""
|
285 |
The Veber filter is a rule of thumb filter for orally active drugs described in
|
286 |
Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
|
287 |
Rotatable bonds <= 10
|
288 |
Topological polar surface area <= 140
|
289 |
"""
|
290 |
+
if not rotatable_bond(mol) <= 10:
|
291 |
return False
|
292 |
+
elif not tpsa(mol) <= 140:
|
293 |
return False
|
294 |
else:
|
295 |
return True
|
296 |
|
297 |
|
298 |
+
def rule_of_three(mol):
|
299 |
"""
|
300 |
Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
|
301 |
Molecular weight <= 300
|
|
|
304 |
H-bond acceptor count <= 3
|
305 |
Rotatable bond count <= 3
|
306 |
"""
|
307 |
+
if not mw(mol) <= 300:
|
308 |
return False
|
309 |
+
elif not logp(mol) <= 3:
|
310 |
return False
|
311 |
+
elif not hbd(mol) <= 3:
|
312 |
return False
|
313 |
+
elif not hba(mol) <= 3:
|
314 |
return False
|
315 |
+
elif not rotatable_bond(mol) <= 3:
|
316 |
return False
|
317 |
else:
|
318 |
return True
|
|
|
391 |
'X2': 'Target FASTA',
|
392 |
'ID1': 'Compound ID',
|
393 |
'ID2': 'Target ID',
|
394 |
+
'Y': 'Actual CPI/CPA',
|
395 |
+
'Y^': 'Predicted CPI/CPA',
|
396 |
+
'N': 'Original Index'
|
397 |
}
|
398 |
|
399 |
|
|
|
426 |
pass
|
427 |
|
428 |
|
429 |
+
def submit_predict(predict_filepath, task, preset, target_family, flag, state, progress=gr.Progress(track_tqdm=True)):
|
430 |
if flag:
|
431 |
try:
|
432 |
job_id = flag
|
|
|
435 |
preset = PRESET_MAP[preset]
|
436 |
target_family = TARGET_FAMILY_MAP[target_family]
|
437 |
# email_hash = hashlib.sha256(email.encode()).hexdigest()
|
438 |
+
COLUMN_ALIASES.update({
|
439 |
+
'Y': 'Actual interaction probability' if task == 'DTI' else 'Actual binding affinity',
|
440 |
+
'Y^': 'Predicted interaction probability' if task == 'DTI' else 'Predicted binding affinity'
|
441 |
+
})
|
442 |
|
443 |
# target_family_list = [target_family]
|
444 |
# for family in target_family_list:
|
|
|
456 |
predictions, _ = predict(cfg)
|
457 |
predictions = [pd.DataFrame(prediction) for prediction in predictions]
|
458 |
prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
|
459 |
+
prediction_df.set_index('N', inplace=True)
|
460 |
|
461 |
predictions_file = f'temp/{job_id}_predictions.csv'
|
462 |
+
prediction_df.to_csv(predictions_file)
|
463 |
|
464 |
return [predictions_file,
|
465 |
False]
|
466 |
except Exception as e:
|
467 |
gr.Warning(f"Prediction job failed due to error: {str(e)}")
|
468 |
+
return {run_state: False}
|
|
|
|
|
469 |
else:
|
470 |
+
return {run_state: state}
|
|
|
471 |
#
|
472 |
# except Exception as e:
|
473 |
# raise gr.Error(str(e))
|
|
|
539 |
|
540 |
|
541 |
def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
542 |
+
# global DF_FOR_REPORT
|
543 |
+
if Path(file).is_file():
|
544 |
df = pd.read_csv(file)
|
545 |
+
# if df['X1'].nunique() > 1:
|
546 |
+
df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
|
547 |
+
desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
|
548 |
+
# Add a new column with RDKit molecule objects
|
549 |
+
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
550 |
+
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
|
|
|
|
|
551 |
includeFingerprints=True)
|
552 |
+
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
|
553 |
+
includeFingerprints=True)
|
554 |
+
# DF_FOR_REPORT = df.copy()
|
555 |
|
556 |
# pie_chart = None
|
557 |
# value = None
|
|
|
566 |
# elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
|
567 |
# pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
|
568 |
|
569 |
+
return {html_report: create_html_report(df),
|
570 |
+
raw_df: df,
|
571 |
+
report_df: df.copy(),
|
572 |
+
analyze_btn: gr.Button(interactive=True)} # pie_chart
|
573 |
else:
|
574 |
+
return {analyze_btn: gr.Button(interactive=False)}
|
575 |
|
576 |
|
577 |
def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
578 |
+
df_html = df.copy(deep=True)
|
579 |
+
|
580 |
+
cols_left = ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^',]
|
581 |
cols_right = ['X1', 'X2']
|
582 |
cols_left = [col for col in cols_left if col in df_html.columns]
|
583 |
cols_right = [col for col in cols_right if col in df_html.columns]
|
584 |
df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
585 |
+
|
586 |
+
ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted binding affinity' else False
|
587 |
df_html = df_html.sort_values(
|
588 |
+
[col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
|
589 |
+
)
|
590 |
+
|
591 |
+
# # Remove repeated info for one-against-N tasks to save visual and physical space
|
592 |
+
# if df_html['X1'].nunique() <= 1:
|
593 |
+
# columns_to_clean = ['X1', 'ID1', 'Scaffold', 'Compound'] + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
|
594 |
+
# for column in columns_to_clean:
|
595 |
+
# if column in df_html.columns:
|
596 |
+
# df_html.loc[1:, column] = pd.NA
|
597 |
+
#
|
598 |
+
# if df_html['X2'].nunique() <= 1:
|
599 |
+
# columns_to_clean = ['X2', 'ID2']
|
600 |
+
# for column in columns_to_clean:
|
601 |
+
# if column in df_html.columns:
|
602 |
+
# df_html.loc[1:, column] = pd.NA
|
603 |
+
|
604 |
+
if not file:
|
605 |
+
df_html = df_html.iloc[:31]
|
606 |
+
|
607 |
+
# PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
|
608 |
# PandasTools.RenderImagesInAllDataFrames(images=True)
|
609 |
+
df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
|
610 |
+
'Generating compound graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
611 |
+
df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
|
612 |
+
'Generating scaffold graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
613 |
+
df_html = df_html.rename(columns=COLUMN_ALIASES)
|
614 |
+
df_html.index.name = 'Index'
|
615 |
|
616 |
if not file:
|
617 |
+
if 'Compound ID' in df_html.columns:
|
618 |
+
df_html.drop(['Compound SMILES'], axis=1, inplace=True)
|
619 |
+
if 'Target ID' in df_html.columns:
|
620 |
+
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
621 |
+
if 'Target FASTA' in df_html.columns:
|
622 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
|
623 |
+
'Processing FASTA...').apply(lambda x: wrap_text(x) if not pd.isna(x) else x)
|
624 |
+
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
625 |
+
# num_formatters = {col: "{:.3f}" for col in df.select_dtypes('number').columns}
|
626 |
+
styled_df = df_html.style.format(precision=3)
|
627 |
colors = sns.color_palette('husl', len(df_html.columns))
|
628 |
for i, col in enumerate(df_html.columns):
|
629 |
if pd.api.types.is_numeric_dtype(df_html[col]):
|
|
|
634 |
import panel as pn
|
635 |
from bokeh.resources import INLINE
|
636 |
from bokeh.models import NumberFormatter, BooleanFormatter
|
637 |
+
|
638 |
+
bool_formatters = {col: BooleanFormatter() for col in df_html.select_dtypes(bool).columns}
|
639 |
+
num_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('number').columns}
|
640 |
+
other_formatters = {
|
641 |
+
'Predicted interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
|
642 |
+
'Actual interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
|
643 |
+
'Compound': HTMLTemplateFormatter(),
|
644 |
+
'Scaffold': HTMLTemplateFormatter(),
|
645 |
+
'Target FASTA': {'type': 'textarea', 'width': 60},
|
646 |
}
|
647 |
+
formatters = {**bool_formatters, **num_formatters, **other_formatters}
|
648 |
+
|
649 |
# html = df.to_html(file)
|
650 |
# return html
|
651 |
+
pn.widgets.Tabulator(df_html, formatters=formatters).save(file, resources=INLINE)
|
652 |
|
653 |
|
654 |
# def create_pie_chart(df, category, value, top_k):
|
|
|
702 |
return fig
|
703 |
|
704 |
|
705 |
+
def submit_report(df, score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
|
706 |
+
df_report = df.copy()
|
707 |
try:
|
708 |
for filter_name in filter_list:
|
709 |
+
df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
|
710 |
+
desc=f"Calculating {filter_name}").apply(
|
711 |
+
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x, axis=1)
|
712 |
|
713 |
for score_name in score_list:
|
714 |
+
df_report[score_name] = df_report['Compound'].swifter.progress_bar(
|
715 |
+
desc=f"Calculating {score_name}").apply(
|
716 |
+
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x, axis=1)
|
717 |
|
718 |
# pie_chart = None
|
719 |
# value = None
|
|
|
728 |
# elif df['X2'].nunique() > 1 >= df['X1'].nunique():
|
729 |
# pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
|
730 |
|
731 |
+
return create_html_report(df_report), df_report # pie_chart
|
732 |
|
733 |
except Exception as e:
|
734 |
+
gr.Warning(f'Failed to report results due to error: {str(e)}')
|
735 |
+
return None, None
|
736 |
|
737 |
# def check_job_status(job_id):
|
738 |
# job_lock = DATA_PATH / f"{job_id}.lock"
|
|
|
751 |
|
752 |
|
753 |
def wrap_text(text, line_length=60):
|
754 |
+
if isinstance(text, str):
|
755 |
+
wrapper = textwrap.TextWrapper(width=line_length)
|
756 |
+
if text.startswith('>'):
|
757 |
+
sections = text.split('>')
|
758 |
+
wrapped_sections = []
|
759 |
+
for section in sections:
|
760 |
+
if not section:
|
761 |
+
continue
|
762 |
+
lines = section.split('\n')
|
763 |
+
seq_header = lines[0]
|
764 |
+
wrapped_seq = wrapper.fill(''.join(lines[1:]))
|
765 |
+
wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
|
766 |
+
return '\n'.join(wrapped_sections)
|
767 |
+
else:
|
768 |
+
return wrapper.fill(text)
|
769 |
else:
|
770 |
+
return text
|
771 |
|
772 |
|
773 |
def unwrap_text(text):
|
|
|
884 |
visible=False, interactive=True, scale=4, )
|
885 |
|
886 |
with gr.Row():
|
887 |
+
target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
|
888 |
+
visible=True, variant='primary',
|
889 |
+
size='lg')
|
890 |
+
target_query_btn = gr.Button(value='Query the sequence', variant='primary',
|
891 |
+
visible=False)
|
892 |
+
# with gr.Row():
|
893 |
+
# example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
|
894 |
+
# example_gene = gr.Button(value='Example: MAPK14', elem_classes='example', visible=False)
|
895 |
+
example_fasta = gr.Button(value='Example: Human MAPK14', elem_classes='example')
|
896 |
target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
|
897 |
# with gr.Row():
|
898 |
# with gr.Column():
|
|
|
899 |
# with gr.Column():
|
900 |
# gr.File(label='Example FASTA file',
|
901 |
# value='data/examples/MAPK14.fasta', interactive=False)
|
|
|
904 |
with gr.Column():
|
905 |
HelpTip(
|
906 |
"Click Auto-detect to identify the protein family using sequence alignment. "
|
907 |
+
"This optional step allows applying a family-specific model instead of a all-family "
|
908 |
+
"model (general)."
|
909 |
"Manually select general if the alignment results are unsatisfactory."
|
910 |
)
|
911 |
drug_screen_target_family = gr.Dropdown(
|
|
|
938 |
with gr.Row():
|
939 |
with gr.Column():
|
940 |
HelpTip(
|
941 |
+
"Interaction prediction provides you binding probability score between the target of "
|
942 |
+
"interest and each compound in the library,"
|
943 |
+
"while affinity prediction directly estimates their binding strength measured using "
|
944 |
+
"IC50."
|
945 |
)
|
946 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
947 |
label='Step 4. Select a Prediction Task',
|
|
|
950 |
with gr.Row():
|
951 |
with gr.Column():
|
952 |
HelpTip(
|
953 |
+
"Select your preferred model, or click Recommend for the best-performing model based "
|
954 |
+
"on the selected task, family, and whether the target was trained."
|
955 |
"Please refer to documentation for detailed benchamrk results."
|
956 |
)
|
957 |
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
|
|
961 |
with gr.Column():
|
962 |
drug_screen_email = gr.Textbox(
|
963 |
label='Step 6. Email (Optional)',
|
964 |
+
info="If an email is provided, a notification email will be sent to you when your job "
|
965 |
+
"is completed."
|
966 |
)
|
967 |
|
968 |
with gr.Row(visible=True):
|
|
|
993 |
HelpTip(
|
994 |
"Enter (paste) a compound SMILES below manually or upload a SDF file."
|
995 |
"If multiple entities are in the SDF, only the first will be used."
|
996 |
+
"SMILES can be obtained by searching for the compound of interest in databases such "
|
997 |
+
"as NCBI, PubChem and and ChEMBL."
|
998 |
)
|
999 |
compound_type = gr.Dropdown(
|
1000 |
label='Step 1. Select Compound Input Type and Input',
|
1001 |
choices=['SMILES', 'SDF'],
|
1002 |
+
info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
|
1003 |
value='SMILES',
|
1004 |
interactive=True)
|
1005 |
+
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
|
1006 |
+
type='binary', visible=False)
|
1007 |
|
1008 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
1009 |
+
example_drug = gr.Button(value='Example: Aspirin', elem_classes='example')
|
1010 |
|
1011 |
with gr.Row():
|
1012 |
with gr.Column():
|
1013 |
HelpTip(
|
1014 |
"By default, models trained on all protein families (general) will be applied."
|
1015 |
+
"If the proteins in the target library of interest all belong to the same protein "
|
1016 |
+
"family, manually selecting the family is supported."
|
1017 |
)
|
1018 |
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
1019 |
value='General',
|
1020 |
+
label='Step 2. Select Target Protein Family ('
|
1021 |
+
'Optional)')
|
1022 |
|
1023 |
with gr.Row():
|
1024 |
with gr.Column():
|
1025 |
HelpTip(
|
1026 |
"Select a preset target library (e.g., ChEMBL33_human_proteins)."
|
1027 |
+
"Alternatively, upload a CSV file with a column named X2 containing target protein "
|
1028 |
+
"sequences, or use an FASTA file."
|
1029 |
)
|
1030 |
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
1031 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
|
|
1041 |
with gr.Row():
|
1042 |
with gr.Column():
|
1043 |
HelpTip(
|
1044 |
+
"Interaction prediction provides you binding probability score between the target of "
|
1045 |
+
"interest and each compound in the library,"
|
1046 |
+
"while affinity prediction directly estimates their binding strength measured using "
|
1047 |
+
"IC50."
|
1048 |
)
|
1049 |
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
1050 |
label='Step 4. Select a Prediction Task',
|
|
|
1053 |
with gr.Row():
|
1054 |
with gr.Column():
|
1055 |
HelpTip(
|
1056 |
+
"Select your preferred model, or click Recommend for the best-performing model based "
|
1057 |
+
"on the selected task, family, and whether the compound was trained."
|
1058 |
"Please refer to documentation for detailed benchamrk results."
|
1059 |
)
|
1060 |
+
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a '
|
1061 |
+
'Preset Model')
|
1062 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
1063 |
|
1064 |
with gr.Row():
|
|
|
1085 |
''')
|
1086 |
with gr.Blocks() as infer_block:
|
1087 |
with gr.Column() as infer_page:
|
1088 |
+
infer_type = gr.Dropdown(
|
1089 |
+
choices=['Upload a compound library and a target library',
|
1090 |
+
'Upload a CSV interaction pair dataset'],
|
1091 |
+
value='Upload a compound library and a target library')
|
1092 |
with gr.Column() as pair_upload:
|
1093 |
+
gr.File(label="Example custom dataset",
|
1094 |
+
value="data/examples/interaction_pair_inference.csv",
|
1095 |
+
interactive=False)
|
1096 |
+
with gr.Column():
|
|
|
1097 |
infer_data_for_predict = gr.File(
|
1098 |
+
label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
|
1099 |
with gr.Column() as pair_generate:
|
1100 |
with gr.Row():
|
1101 |
+
gr.File(label='Example SDF compound library',
|
1102 |
value='data/examples/compound_library.sdf', interactive=False)
|
1103 |
+
gr.File(label='Example FASTA target library',
|
1104 |
value='data/examples/target_library.fasta', interactive=False)
|
1105 |
with gr.Row():
|
1106 |
+
gr.File(label='Example CSV compound library',
|
1107 |
value='data/examples/compound_library.csv', interactive=False)
|
1108 |
+
gr.File(label='Example CSV target library',
|
1109 |
value='data/examples/target_library.csv', interactive=False)
|
1110 |
with gr.Row():
|
1111 |
+
infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
|
1112 |
file_count="single", type='filepath')
|
1113 |
+
infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
|
1114 |
file_count="single", type='filepath')
|
1115 |
|
1116 |
+
with gr.Row(visible=True):
|
1117 |
+
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
|
1118 |
+
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
1119 |
+
pair_infer_target_family = gr.Dropdown(choices=['General'],
|
1120 |
+
label='Target family',
|
1121 |
+
value='General')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1122 |
|
1123 |
+
# with gr.Row():
|
1124 |
+
# pair_infer_email = gr.Textbox(
|
1125 |
+
# label='Email (optional)',
|
1126 |
+
# info="Your email will be used to send you notifications when your job finishes."
|
1127 |
+
# )
|
1128 |
|
1129 |
with gr.Row(visible=True):
|
1130 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
|
|
1139 |
with gr.Blocks() as report:
|
1140 |
gr.Markdown('''
|
1141 |
# <center>DeepSEQreen Chemical Property Report</center>
|
1142 |
+
<center>
|
1143 |
To compute chemical properties for the predictions of drug hit screening,
|
1144 |
+
target protein identification, and interaction pair inference. You may also upload
|
1145 |
+
your own dataset.
|
1146 |
+
|
1147 |
+
The page shows only a preview report displaying at most 30 records
|
1148 |
+
(with top predicted CPI/CPA if reporting results from a prediction job).
|
1149 |
|
1150 |
+
For a full report, please
|
1151 |
+
generate and download a CSV or interactive HTML report below.
|
1152 |
+
</center>
|
|
|
1153 |
''')
|
1154 |
with gr.Row():
|
1155 |
file_for_report = gr.File(interactive=True, type='filepath')
|
1156 |
+
raw_df = gr.State(value=pd.DataFrame())
|
1157 |
+
report_df = gr.State(value=pd.DataFrame())
|
1158 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
1159 |
filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
|
1160 |
|
1161 |
with gr.Row():
|
1162 |
# clear_btn = gr.ClearButton(size='lg')
|
1163 |
+
analyze_btn = gr.Button('REPORT', variant='primary', size='lg', interactive=False)
|
1164 |
|
1165 |
with gr.Row():
|
1166 |
with gr.Column(scale=3):
|
|
|
1169 |
|
1170 |
with gr.Row():
|
1171 |
with gr.Column():
|
1172 |
+
csv_generate = gr.Button(value='Generate CSV Report',
|
1173 |
+
interactive=True, variant='primary', visible=False)
|
1174 |
+
csv_download_file = gr.File(label='Download CSV Report', visible=False)
|
1175 |
with gr.Column():
|
1176 |
+
html_generate = gr.Button(value='Generate HTML Report',
|
1177 |
+
interactive=True, variant='primary', visible=False)
|
1178 |
+
html_download_file = gr.File(label='Download HTML Report', visible=False)
|
1179 |
|
1180 |
|
1181 |
def target_input_type_select(input_type):
|
|
|
1272 |
def example_fill(input_type):
|
1273 |
return {target_id: 'Q16539',
|
1274 |
target_gene: 'MAPK14',
|
1275 |
+
target_organism: 'Human',
|
1276 |
target_fasta: """
|
1277 |
>sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
|
1278 |
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
|
|
|
1284 |
"""}
|
1285 |
|
1286 |
|
1287 |
+
example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
|
1288 |
+
target_id, target_gene, target_organism, target_fasta], show_progress=False)
|
1289 |
+
# example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
1290 |
+
# example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
1291 |
|
1292 |
def screen_recommend_model(fasta, family, task):
|
1293 |
task = TASK_MAP[task]
|
|
|
1298 |
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
1299 |
score = 'CI'
|
1300 |
|
1301 |
+
if not np.isin(process_target_fasta(fasta), train['X2']):
|
1302 |
scenario = "Unseen target"
|
1303 |
else:
|
1304 |
scenario = "Seen target"
|
|
|
1315 |
& (benchmark_df['Scenario'] == scenario)
|
1316 |
& (benchmark_df['all'] == False)]
|
1317 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
1318 |
+
|
1319 |
return gr.Dropdown(value=row['preset'],
|
1320 |
info=f"Reason: {scenario} in the training dataset; we recommend the model "
|
1321 |
f"with the best {score} ({float(row[score]):.3f}) "
|
|
|
1330 |
def compound_input_type_select(input_type):
|
1331 |
match input_type:
|
1332 |
case 'SMILES':
|
1333 |
+
return gr.Button(visible=False)
|
1334 |
case 'SDF':
|
1335 |
+
return gr.Button(visible=True)
|
1336 |
|
1337 |
|
1338 |
compound_type.select(fn=compound_input_type_select,
|
1339 |
+
inputs=compound_type, outputs=compound_upload_btn, show_progress=False)
|
1340 |
|
1341 |
|
1342 |
def compound_upload_process(input_type, input_upload):
|
|
|
1424 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
1425 |
else:
|
1426 |
screen_df = process_drug_library_upload(library_upload)
|
|
|
1427 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
1428 |
raise gr.Error(f'The uploaded compound library has more records '
|
1429 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
|
1566 |
).then(
|
1567 |
fn=submit_predict,
|
1568 |
inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
|
1569 |
+
drug_screen_target_family, screen_flag, run_state], # , drug_screen_email],
|
1570 |
outputs=[file_for_report, run_state]
|
1571 |
).then(
|
1572 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
|
|
1578 |
inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
|
1579 |
outputs=[identify_data_for_predict, identify_flag, run_state]
|
1580 |
).then(
|
1581 |
+
fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
|
1582 |
+
outputs=[identify_page, identify_waiting]
|
1583 |
).then(
|
1584 |
fn=submit_predict,
|
1585 |
inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
|
1586 |
+
target_identify_target_family, identify_flag, run_state], # , target_identify_email],
|
1587 |
outputs=[file_for_report, run_state]
|
1588 |
).then(
|
1589 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
|
|
1600 |
).then(
|
1601 |
fn=submit_predict,
|
1602 |
inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
|
1603 |
+
pair_infer_target_family, infer_flag, run_state], # , pair_infer_email],
|
1604 |
outputs=[file_for_report, run_state]
|
1605 |
).then(
|
1606 |
+
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
1607 |
+
outputs=[infer_page, infer_waiting, tabs]
|
1608 |
)
|
1609 |
|
1610 |
# TODO background job from these 3 pipelines to update file_for_report
|
1611 |
|
1612 |
file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
|
1613 |
html_report,
|
1614 |
+
raw_df,
|
1615 |
+
report_df,
|
1616 |
+
analyze_btn
|
1617 |
# ranking_pie_chart
|
1618 |
])
|
1619 |
+
analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters], outputs=[
|
1620 |
html_report,
|
1621 |
+
report_df,
|
1622 |
# ranking_pie_chart
|
1623 |
])
|
1624 |
|
1625 |
|
1626 |
+
def create_csv_report_file(df, file_report):
|
1627 |
+
try:
|
1628 |
+
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
1629 |
+
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
|
1630 |
+
df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
|
|
|
1631 |
|
1632 |
+
return gr.File(filename, visible=True), gr.Button(visible=False)
|
1633 |
+
except Exception as e:
|
1634 |
+
gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
|
1635 |
+
return None, None
|
1636 |
|
1637 |
def create_html_report_file(df, file_report):
|
1638 |
+
try:
|
1639 |
+
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
1640 |
+
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
1641 |
+
create_html_report(df, filename)
|
1642 |
+
return gr.File(filename, visible=True), gr.Button(visible=False)
|
1643 |
+
except Exception as e:
|
1644 |
+
gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
|
1645 |
+
return None, None
|
1646 |
|
1647 |
+
html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
|
1648 |
+
csv_generate.click(fn=create_csv_report_file, inputs=[report_df, file_for_report],
|
1649 |
+
outputs=[csv_download_file, csv_generate])
|
1650 |
+
html_generate.click(fn=create_html_report_file, inputs=[report_df, file_for_report],
|
1651 |
+
outputs=[html_download_file, html_generate])
|
1652 |
|
1653 |
# screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
|
1654 |
# every=5)
|
|
|
1671 |
demo.launch(
|
1672 |
show_api=False,
|
1673 |
)
|
|
|
|