Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import hashlib
|
| 2 |
import itertools
|
| 3 |
import json
|
|
@@ -21,6 +22,7 @@ import hydra
|
|
| 21 |
import pandas as pd
|
| 22 |
import plotly.express as px
|
| 23 |
import requests
|
|
|
|
| 24 |
from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
|
| 25 |
from requests.adapters import HTTPAdapter, Retry
|
| 26 |
from rdkit import Chem
|
|
@@ -39,7 +41,7 @@ import sascorer
|
|
| 39 |
|
| 40 |
ROOT = Path.cwd()
|
| 41 |
|
| 42 |
-
DF_FOR_REPORT = pd.DataFrame()
|
| 43 |
|
| 44 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
| 45 |
PandasTools.molRepresentation = 'svg'
|
|
@@ -146,7 +148,7 @@ CSS = """
|
|
| 146 |
position: absolute;
|
| 147 |
}
|
| 148 |
|
| 149 |
-
|
| 150 |
padding: 0;
|
| 151 |
background: none;
|
| 152 |
border: none;
|
|
@@ -171,47 +173,47 @@ class HelpTip:
|
|
| 171 |
)
|
| 172 |
|
| 173 |
|
| 174 |
-
def sa_score(
|
| 175 |
-
return sascorer.calculateScore(
|
| 176 |
|
| 177 |
|
| 178 |
-
def mw(
|
| 179 |
-
return Chem.Descriptors.MolWt(
|
| 180 |
|
| 181 |
|
| 182 |
-
def mr(
|
| 183 |
-
return Crippen.MolMR(
|
| 184 |
|
| 185 |
|
| 186 |
-
def hbd(
|
| 187 |
-
return Lipinski.NumHDonors(
|
| 188 |
|
| 189 |
|
| 190 |
-
def hba(
|
| 191 |
-
return Lipinski.NumHAcceptors(
|
| 192 |
|
| 193 |
|
| 194 |
-
def logp(
|
| 195 |
-
return Crippen.MolLogP(
|
| 196 |
|
| 197 |
|
| 198 |
-
def atom(
|
| 199 |
-
return CalcNumAtoms(
|
| 200 |
|
| 201 |
|
| 202 |
-
def heavy_atom(
|
| 203 |
-
return CalcNumHeavyAtoms(
|
| 204 |
|
| 205 |
|
| 206 |
-
def rotatable_bond(
|
| 207 |
-
return CalcNumRotatableBonds((
|
| 208 |
|
| 209 |
|
| 210 |
-
def tpsa(
|
| 211 |
-
return CalcTPSA((
|
| 212 |
|
| 213 |
|
| 214 |
-
def lipinski(
|
| 215 |
"""
|
| 216 |
Lipinski's rules:
|
| 217 |
Hydrogen bond donors <= 5
|
|
@@ -219,19 +221,19 @@ def lipinski(row):
|
|
| 219 |
Molecular weight <= 500 daltons
|
| 220 |
logP <= 5
|
| 221 |
"""
|
| 222 |
-
if hbd(
|
| 223 |
return False
|
| 224 |
-
elif hba(
|
| 225 |
return False
|
| 226 |
-
elif mw(
|
| 227 |
return False
|
| 228 |
-
elif logp(
|
| 229 |
return False
|
| 230 |
else:
|
| 231 |
return True
|
| 232 |
|
| 233 |
|
| 234 |
-
def reos(
|
| 235 |
"""
|
| 236 |
Rapid Elimination Of Swill filter:
|
| 237 |
Molecular weight between 200 and 500
|
|
@@ -242,23 +244,23 @@ def reos(row):
|
|
| 242 |
Rotatable bond count between 0 and 8
|
| 243 |
Heavy atom count between 15 and 50
|
| 244 |
"""
|
| 245 |
-
if not 200 < mw(
|
| 246 |
return False
|
| 247 |
-
elif not -5.0 < logp(
|
| 248 |
return False
|
| 249 |
-
elif not 0 < hbd(
|
| 250 |
return False
|
| 251 |
-
elif not 0 < hba(
|
| 252 |
return False
|
| 253 |
-
elif not 0 < rotatable_bond(
|
| 254 |
return False
|
| 255 |
-
elif not 15 < heavy_atom(
|
| 256 |
return False
|
| 257 |
else:
|
| 258 |
return True
|
| 259 |
|
| 260 |
|
| 261 |
-
def ghose(
|
| 262 |
"""
|
| 263 |
Ghose drug like filter:
|
| 264 |
Molecular weight between 160 and 480
|
|
@@ -266,34 +268,34 @@ def ghose(row):
|
|
| 266 |
Atom count between 20 and 70
|
| 267 |
Molar refractivity between 40 and 130
|
| 268 |
"""
|
| 269 |
-
if not 160 < mw(
|
| 270 |
return False
|
| 271 |
-
elif not -0.4 < logp(
|
| 272 |
return False
|
| 273 |
-
elif not 20 < atom(
|
| 274 |
return False
|
| 275 |
-
elif not 40 < mr(
|
| 276 |
return False
|
| 277 |
else:
|
| 278 |
return True
|
| 279 |
|
| 280 |
|
| 281 |
-
def veber(
|
| 282 |
"""
|
| 283 |
The Veber filter is a rule of thumb filter for orally active drugs described in
|
| 284 |
Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
|
| 285 |
Rotatable bonds <= 10
|
| 286 |
Topological polar surface area <= 140
|
| 287 |
"""
|
| 288 |
-
if not rotatable_bond(
|
| 289 |
return False
|
| 290 |
-
elif not tpsa(
|
| 291 |
return False
|
| 292 |
else:
|
| 293 |
return True
|
| 294 |
|
| 295 |
|
| 296 |
-
def rule_of_three(
|
| 297 |
"""
|
| 298 |
Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
|
| 299 |
Molecular weight <= 300
|
|
@@ -302,15 +304,15 @@ def rule_of_three(row):
|
|
| 302 |
H-bond acceptor count <= 3
|
| 303 |
Rotatable bond count <= 3
|
| 304 |
"""
|
| 305 |
-
if not mw(
|
| 306 |
return False
|
| 307 |
-
elif not logp(
|
| 308 |
return False
|
| 309 |
-
elif not hbd(
|
| 310 |
return False
|
| 311 |
-
elif not hba(
|
| 312 |
return False
|
| 313 |
-
elif not rotatable_bond(
|
| 314 |
return False
|
| 315 |
else:
|
| 316 |
return True
|
|
@@ -389,6 +391,9 @@ COLUMN_ALIASES = {
|
|
| 389 |
'X2': 'Target FASTA',
|
| 390 |
'ID1': 'Compound ID',
|
| 391 |
'ID2': 'Target ID',
|
|
|
|
|
|
|
|
|
|
| 392 |
}
|
| 393 |
|
| 394 |
|
|
@@ -421,7 +426,7 @@ def send_email(receiver, msg):
|
|
| 421 |
pass
|
| 422 |
|
| 423 |
|
| 424 |
-
def submit_predict(predict_filepath, task, preset, target_family, flag, progress=gr.Progress(track_tqdm=True)):
|
| 425 |
if flag:
|
| 426 |
try:
|
| 427 |
job_id = flag
|
|
@@ -430,10 +435,10 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
| 430 |
preset = PRESET_MAP[preset]
|
| 431 |
target_family = TARGET_FAMILY_MAP[target_family]
|
| 432 |
# email_hash = hashlib.sha256(email.encode()).hexdigest()
|
| 433 |
-
COLUMN_ALIASES
|
| 434 |
-
'Y': 'Actual interaction probability' if task == '
|
| 435 |
-
'Y^': 'Predicted interaction probability' if task == '
|
| 436 |
-
}
|
| 437 |
|
| 438 |
# target_family_list = [target_family]
|
| 439 |
# for family in target_family_list:
|
|
@@ -451,20 +456,18 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
| 451 |
predictions, _ = predict(cfg)
|
| 452 |
predictions = [pd.DataFrame(prediction) for prediction in predictions]
|
| 453 |
prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
|
|
|
|
| 454 |
|
| 455 |
predictions_file = f'temp/{job_id}_predictions.csv'
|
| 456 |
-
prediction_df.to_csv(predictions_file
|
| 457 |
|
| 458 |
return [predictions_file,
|
| 459 |
False]
|
| 460 |
except Exception as e:
|
| 461 |
gr.Warning(f"Prediction job failed due to error: {str(e)}")
|
| 462 |
-
return
|
| 463 |
-
False]
|
| 464 |
-
|
| 465 |
else:
|
| 466 |
-
return
|
| 467 |
-
False]
|
| 468 |
#
|
| 469 |
# except Exception as e:
|
| 470 |
# raise gr.Error(str(e))
|
|
@@ -536,19 +539,19 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, progress
|
|
| 536 |
|
| 537 |
|
| 538 |
def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
| 539 |
-
global DF_FOR_REPORT
|
| 540 |
-
if file
|
| 541 |
df = pd.read_csv(file)
|
| 542 |
-
if df['X1'].nunique() > 1:
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
includeFingerprints=True)
|
| 549 |
-
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
|
| 550 |
includeFingerprints=True)
|
| 551 |
-
|
|
|
|
|
|
|
| 552 |
|
| 553 |
# pie_chart = None
|
| 554 |
# value = None
|
|
@@ -563,30 +566,64 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 563 |
# elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
|
| 564 |
# pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
|
| 565 |
|
| 566 |
-
return create_html_report(
|
|
|
|
|
|
|
|
|
|
| 567 |
else:
|
| 568 |
-
return
|
| 569 |
|
| 570 |
|
| 571 |
def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
| 572 |
-
df_html = df.copy()
|
| 573 |
-
|
|
|
|
| 574 |
cols_right = ['X1', 'X2']
|
| 575 |
cols_left = [col for col in cols_left if col in df_html.columns]
|
| 576 |
cols_right = [col for col in cols_right if col in df_html.columns]
|
| 577 |
df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
| 578 |
-
|
|
|
|
| 579 |
df_html = df_html.sort_values(
|
| 580 |
-
[col for col in ['Y', 'Y^'
|
| 581 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
# PandasTools.RenderImagesInAllDataFrames(images=True)
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
|
|
|
|
|
|
|
|
|
| 586 |
|
| 587 |
if not file:
|
| 588 |
-
|
| 589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
colors = sns.color_palette('husl', len(df_html.columns))
|
| 591 |
for i, col in enumerate(df_html.columns):
|
| 592 |
if pd.api.types.is_numeric_dtype(df_html[col]):
|
|
@@ -597,13 +634,21 @@ def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
|
| 597 |
import panel as pn
|
| 598 |
from bokeh.resources import INLINE
|
| 599 |
from bokeh.models import NumberFormatter, BooleanFormatter
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
}
|
|
|
|
|
|
|
| 604 |
# html = df.to_html(file)
|
| 605 |
# return html
|
| 606 |
-
pn.widgets.Tabulator(df_html, formatters=
|
| 607 |
|
| 608 |
|
| 609 |
# def create_pie_chart(df, category, value, top_k):
|
|
@@ -657,16 +702,18 @@ def create_pie_chart(df, category, value, top_k):
|
|
| 657 |
return fig
|
| 658 |
|
| 659 |
|
| 660 |
-
def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
|
| 661 |
-
|
| 662 |
try:
|
| 663 |
for filter_name in filter_list:
|
| 664 |
-
|
| 665 |
-
|
|
|
|
| 666 |
|
| 667 |
for score_name in score_list:
|
| 668 |
-
|
| 669 |
-
|
|
|
|
| 670 |
|
| 671 |
# pie_chart = None
|
| 672 |
# value = None
|
|
@@ -681,11 +728,11 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
|
|
| 681 |
# elif df['X2'].nunique() > 1 >= df['X1'].nunique():
|
| 682 |
# pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
|
| 683 |
|
| 684 |
-
return create_html_report(
|
| 685 |
|
| 686 |
except Exception as e:
|
| 687 |
-
|
| 688 |
-
|
| 689 |
|
| 690 |
# def check_job_status(job_id):
|
| 691 |
# job_lock = DATA_PATH / f"{job_id}.lock"
|
|
@@ -704,20 +751,23 @@ def submit_report(score_list, filter_list, progress=gr.Progress(track_tqdm=True)
|
|
| 704 |
|
| 705 |
|
| 706 |
def wrap_text(text, line_length=60):
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
|
|
|
|
|
|
|
|
|
| 719 |
else:
|
| 720 |
-
return
|
| 721 |
|
| 722 |
|
| 723 |
def unwrap_text(text):
|
|
@@ -834,17 +884,18 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 834 |
visible=False, interactive=True, scale=4, )
|
| 835 |
|
| 836 |
with gr.Row():
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
|
|
|
|
|
|
| 844 |
target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
|
| 845 |
# with gr.Row():
|
| 846 |
# with gr.Column():
|
| 847 |
-
example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
|
| 848 |
# with gr.Column():
|
| 849 |
# gr.File(label='Example FASTA file',
|
| 850 |
# value='data/examples/MAPK14.fasta', interactive=False)
|
|
@@ -853,7 +904,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 853 |
with gr.Column():
|
| 854 |
HelpTip(
|
| 855 |
"Click Auto-detect to identify the protein family using sequence alignment. "
|
| 856 |
-
"This optional step allows applying a family-specific model instead of a all-family
|
|
|
|
| 857 |
"Manually select general if the alignment results are unsatisfactory."
|
| 858 |
)
|
| 859 |
drug_screen_target_family = gr.Dropdown(
|
|
@@ -886,8 +938,10 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 886 |
with gr.Row():
|
| 887 |
with gr.Column():
|
| 888 |
HelpTip(
|
| 889 |
-
"Interaction prediction provides you binding probability score between the target of
|
| 890 |
-
"
|
|
|
|
|
|
|
| 891 |
)
|
| 892 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 893 |
label='Step 4. Select a Prediction Task',
|
|
@@ -896,7 +950,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 896 |
with gr.Row():
|
| 897 |
with gr.Column():
|
| 898 |
HelpTip(
|
| 899 |
-
"Select your preferred model, or click Recommend for the best-performing model based
|
|
|
|
| 900 |
"Please refer to documentation for detailed benchamrk results."
|
| 901 |
)
|
| 902 |
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
|
@@ -906,7 +961,8 @@ To predict interactions/binding affinities of a single target against a library
|
|
| 906 |
with gr.Column():
|
| 907 |
drug_screen_email = gr.Textbox(
|
| 908 |
label='Step 6. Email (Optional)',
|
| 909 |
-
info="If an email is provided, a notification email will be sent to you when your job
|
|
|
|
| 910 |
)
|
| 911 |
|
| 912 |
with gr.Row(visible=True):
|
|
@@ -937,34 +993,39 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 937 |
HelpTip(
|
| 938 |
"Enter (paste) a compound SMILES below manually or upload a SDF file."
|
| 939 |
"If multiple entities are in the SDF, only the first will be used."
|
| 940 |
-
"SMILES can be obtained by searching for the compound of interest in databases such
|
|
|
|
| 941 |
)
|
| 942 |
compound_type = gr.Dropdown(
|
| 943 |
label='Step 1. Select Compound Input Type and Input',
|
| 944 |
choices=['SMILES', 'SDF'],
|
| 945 |
-
info='Enter (paste) an SMILES string or upload an SDF file.',
|
| 946 |
value='SMILES',
|
| 947 |
interactive=True)
|
| 948 |
-
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
|
|
|
|
| 949 |
|
| 950 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
| 951 |
-
example_drug = gr.Button(value='Example: Aspirin',
|
| 952 |
|
| 953 |
with gr.Row():
|
| 954 |
with gr.Column():
|
| 955 |
HelpTip(
|
| 956 |
"By default, models trained on all protein families (general) will be applied."
|
| 957 |
-
"If the proteins in the target library of interest all belong to the same protein
|
|
|
|
| 958 |
)
|
| 959 |
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
| 960 |
value='General',
|
| 961 |
-
label='Step 2. Select Target Protein Family (
|
|
|
|
| 962 |
|
| 963 |
with gr.Row():
|
| 964 |
with gr.Column():
|
| 965 |
HelpTip(
|
| 966 |
"Select a preset target library (e.g., ChEMBL33_human_proteins)."
|
| 967 |
-
"Alternatively, upload a CSV file with a column named X2 containing
|
|
|
|
| 968 |
)
|
| 969 |
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
| 970 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
|
@@ -980,8 +1041,10 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 980 |
with gr.Row():
|
| 981 |
with gr.Column():
|
| 982 |
HelpTip(
|
| 983 |
-
"Interaction prediction provides you binding probability score between the target of
|
| 984 |
-
"
|
|
|
|
|
|
|
| 985 |
)
|
| 986 |
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 987 |
label='Step 4. Select a Prediction Task',
|
|
@@ -990,11 +1053,12 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 990 |
with gr.Row():
|
| 991 |
with gr.Column():
|
| 992 |
HelpTip(
|
| 993 |
-
"Select your preferred model, or click Recommend for the best-performing model based
|
|
|
|
| 994 |
"Please refer to documentation for detailed benchamrk results."
|
| 995 |
)
|
| 996 |
-
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
| 997 |
-
|
| 998 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 999 |
|
| 1000 |
with gr.Row():
|
|
@@ -1021,69 +1085,46 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1021 |
''')
|
| 1022 |
with gr.Blocks() as infer_block:
|
| 1023 |
with gr.Column() as infer_page:
|
| 1024 |
-
infer_type = gr.Dropdown(
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
with gr.Column() as pair_upload:
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
with gr.Row():
|
| 1034 |
infer_data_for_predict = gr.File(
|
| 1035 |
-
label='Upload a
|
| 1036 |
with gr.Column() as pair_generate:
|
| 1037 |
with gr.Row():
|
| 1038 |
-
gr.File(label='Example SDF
|
| 1039 |
value='data/examples/compound_library.sdf', interactive=False)
|
| 1040 |
-
gr.File(label='Example FASTA
|
| 1041 |
value='data/examples/target_library.fasta', interactive=False)
|
| 1042 |
with gr.Row():
|
| 1043 |
-
gr.File(label='Example CSV
|
| 1044 |
value='data/examples/compound_library.csv', interactive=False)
|
| 1045 |
-
gr.File(label='Example CSV
|
| 1046 |
value='data/examples/target_library.csv', interactive=False)
|
| 1047 |
with gr.Row():
|
| 1048 |
-
infer_drug = gr.File(label='SDF/CSV
|
| 1049 |
file_count="single", type='filepath')
|
| 1050 |
-
infer_target = gr.File(label='FASTA/CSV
|
| 1051 |
file_count="single", type='filepath')
|
| 1052 |
|
| 1053 |
-
with gr.Row():
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
pair_infer_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1060 |
-
value='General',
|
| 1061 |
-
label='Step 2. Select Target Protein Family (Optional)')
|
| 1062 |
-
|
| 1063 |
-
with gr.Row():
|
| 1064 |
-
with gr.Column():
|
| 1065 |
-
HelpTip(
|
| 1066 |
-
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
| 1067 |
-
"while affinity prediction directly estimates their binding strength measured using IC50."
|
| 1068 |
-
)
|
| 1069 |
-
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1070 |
-
label='Step 3. Select a Prediction Task',
|
| 1071 |
-
value='Compound-protein interaction')
|
| 1072 |
-
|
| 1073 |
-
with gr.Row():
|
| 1074 |
-
with gr.Column():
|
| 1075 |
-
HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and random splitting validation."
|
| 1076 |
-
"Please refer to documentation for detailed benchamrk results."
|
| 1077 |
-
)
|
| 1078 |
-
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 4. Select a Preset Model')
|
| 1079 |
-
infer_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 1080 |
-
|
| 1081 |
|
| 1082 |
-
with gr.Row():
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
|
| 1088 |
with gr.Row(visible=True):
|
| 1089 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
|
@@ -1098,23 +1139,28 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1098 |
with gr.Blocks() as report:
|
| 1099 |
gr.Markdown('''
|
| 1100 |
# <center>DeepSEQreen Chemical Property Report</center>
|
|
|
|
| 1101 |
To compute chemical properties for the predictions of drug hit screening,
|
| 1102 |
-
target protein identification, and interaction pair inference.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1103 |
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
|
| 1107 |
-
generate and download a raw data CSV or interactive table HTML file below.
|
| 1108 |
''')
|
| 1109 |
with gr.Row():
|
| 1110 |
file_for_report = gr.File(interactive=True, type='filepath')
|
| 1111 |
-
|
|
|
|
| 1112 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
| 1113 |
filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
|
| 1114 |
|
| 1115 |
with gr.Row():
|
| 1116 |
# clear_btn = gr.ClearButton(size='lg')
|
| 1117 |
-
analyze_btn = gr.Button('REPORT', variant='primary', size='lg')
|
| 1118 |
|
| 1119 |
with gr.Row():
|
| 1120 |
with gr.Column(scale=3):
|
|
@@ -1123,11 +1169,13 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1123 |
|
| 1124 |
with gr.Row():
|
| 1125 |
with gr.Column():
|
| 1126 |
-
csv_generate = gr.Button(value='Generate
|
| 1127 |
-
|
|
|
|
| 1128 |
with gr.Column():
|
| 1129 |
-
html_generate = gr.Button(value='Generate
|
| 1130 |
-
|
|
|
|
| 1131 |
|
| 1132 |
|
| 1133 |
def target_input_type_select(input_type):
|
|
@@ -1224,7 +1272,7 @@ To predict interactions/binding affinities of a single compound against a librar
|
|
| 1224 |
def example_fill(input_type):
|
| 1225 |
return {target_id: 'Q16539',
|
| 1226 |
target_gene: 'MAPK14',
|
| 1227 |
-
target_organism: '
|
| 1228 |
target_fasta: """
|
| 1229 |
>sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
|
| 1230 |
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
|
|
@@ -1236,9 +1284,10 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1236 |
"""}
|
| 1237 |
|
| 1238 |
|
| 1239 |
-
example_fasta.click(fn=example_fill, inputs=target_input_type,
|
| 1240 |
-
|
| 1241 |
-
|
|
|
|
| 1242 |
|
| 1243 |
def screen_recommend_model(fasta, family, task):
|
| 1244 |
task = TASK_MAP[task]
|
|
@@ -1249,7 +1298,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1249 |
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
| 1250 |
score = 'CI'
|
| 1251 |
|
| 1252 |
-
if
|
| 1253 |
scenario = "Unseen target"
|
| 1254 |
else:
|
| 1255 |
scenario = "Seen target"
|
|
@@ -1266,6 +1315,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1266 |
& (benchmark_df['Scenario'] == scenario)
|
| 1267 |
& (benchmark_df['all'] == False)]
|
| 1268 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
|
|
|
| 1269 |
return gr.Dropdown(value=row['preset'],
|
| 1270 |
info=f"Reason: {scenario} in the training dataset; we recommend the model "
|
| 1271 |
f"with the best {score} ({float(row[score]):.3f}) "
|
|
@@ -1280,13 +1330,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1280 |
def compound_input_type_select(input_type):
|
| 1281 |
match input_type:
|
| 1282 |
case 'SMILES':
|
| 1283 |
-
return gr.
|
| 1284 |
case 'SDF':
|
| 1285 |
-
return gr.
|
| 1286 |
|
| 1287 |
|
| 1288 |
compound_type.select(fn=compound_input_type_select,
|
| 1289 |
-
inputs=compound_type, outputs=
|
| 1290 |
|
| 1291 |
|
| 1292 |
def compound_upload_process(input_type, input_upload):
|
|
@@ -1374,7 +1424,6 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1374 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
| 1375 |
else:
|
| 1376 |
screen_df = process_drug_library_upload(library_upload)
|
| 1377 |
-
print(screen_df.shape)
|
| 1378 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
| 1379 |
raise gr.Error(f'The uploaded compound library has more records '
|
| 1380 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
@@ -1517,7 +1566,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1517 |
).then(
|
| 1518 |
fn=submit_predict,
|
| 1519 |
inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
|
| 1520 |
-
drug_screen_target_family, screen_flag], # , drug_screen_email],
|
| 1521 |
outputs=[file_for_report, run_state]
|
| 1522 |
).then(
|
| 1523 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
|
@@ -1529,12 +1578,12 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1529 |
inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
|
| 1530 |
outputs=[identify_data_for_predict, identify_flag, run_state]
|
| 1531 |
).then(
|
| 1532 |
-
fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)
|
| 1533 |
-
outputs=[identify_page, identify_waiting
|
| 1534 |
).then(
|
| 1535 |
fn=submit_predict,
|
| 1536 |
inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
|
| 1537 |
-
target_identify_target_family, identify_flag], # , target_identify_email],
|
| 1538 |
outputs=[file_for_report, run_state]
|
| 1539 |
).then(
|
| 1540 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
|
@@ -1551,45 +1600,55 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 1551 |
).then(
|
| 1552 |
fn=submit_predict,
|
| 1553 |
inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
|
| 1554 |
-
pair_infer_target_family, infer_flag], # , pair_infer_email],
|
| 1555 |
outputs=[file_for_report, run_state]
|
| 1556 |
).then(
|
| 1557 |
-
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False)],
|
| 1558 |
-
outputs=[infer_page, infer_waiting]
|
| 1559 |
)
|
| 1560 |
|
| 1561 |
# TODO background job from these 3 pipelines to update file_for_report
|
| 1562 |
|
| 1563 |
file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
|
| 1564 |
html_report,
|
| 1565 |
-
|
|
|
|
|
|
|
| 1566 |
# ranking_pie_chart
|
| 1567 |
])
|
| 1568 |
-
analyze_btn.click(fn=submit_report, inputs=[scores, filters], outputs=[
|
| 1569 |
html_report,
|
| 1570 |
-
|
| 1571 |
# ranking_pie_chart
|
| 1572 |
])
|
| 1573 |
|
| 1574 |
|
| 1575 |
-
def
|
| 1576 |
-
|
| 1577 |
-
|
| 1578 |
-
|
| 1579 |
-
|
| 1580 |
-
return gr.File(filename, visible=True)
|
| 1581 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1582 |
|
| 1583 |
def create_html_report_file(df, file_report):
|
| 1584 |
-
|
| 1585 |
-
|
| 1586 |
-
|
| 1587 |
-
|
| 1588 |
-
|
| 1589 |
-
|
|
|
|
|
|
|
| 1590 |
|
| 1591 |
-
|
| 1592 |
-
|
|
|
|
|
|
|
|
|
|
| 1593 |
|
| 1594 |
# screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
|
| 1595 |
# every=5)
|
|
@@ -1612,5 +1671,3 @@ if __name__ == "__main__":
|
|
| 1612 |
demo.launch(
|
| 1613 |
show_api=False,
|
| 1614 |
)
|
| 1615 |
-
|
| 1616 |
-
#%%
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
import hashlib
|
| 3 |
import itertools
|
| 4 |
import json
|
|
|
|
| 22 |
import pandas as pd
|
| 23 |
import plotly.express as px
|
| 24 |
import requests
|
| 25 |
+
from bokeh.models import HTMLTemplateFormatter, StringFormatter
|
| 26 |
from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
|
| 27 |
from requests.adapters import HTTPAdapter, Retry
|
| 28 |
from rdkit import Chem
|
|
|
|
| 41 |
|
| 42 |
ROOT = Path.cwd()
|
| 43 |
|
| 44 |
+
# DF_FOR_REPORT = pd.DataFrame()
|
| 45 |
|
| 46 |
pd.set_option('display.float_format', '{:.3f}'.format)
|
| 47 |
PandasTools.molRepresentation = 'svg'
|
|
|
|
| 148 |
position: absolute;
|
| 149 |
}
|
| 150 |
|
| 151 |
+
.example {
|
| 152 |
padding: 0;
|
| 153 |
background: none;
|
| 154 |
border: none;
|
|
|
|
| 173 |
)
|
| 174 |
|
| 175 |
|
| 176 |
+
def sa_score(mol):
|
| 177 |
+
return sascorer.calculateScore(mol)
|
| 178 |
|
| 179 |
|
| 180 |
+
def mw(mol):
|
| 181 |
+
return Chem.Descriptors.MolWt(mol)
|
| 182 |
|
| 183 |
|
| 184 |
+
def mr(mol):
|
| 185 |
+
return Crippen.MolMR(mol)
|
| 186 |
|
| 187 |
|
| 188 |
+
def hbd(mol):
|
| 189 |
+
return Lipinski.NumHDonors(mol)
|
| 190 |
|
| 191 |
|
| 192 |
+
def hba(mol):
|
| 193 |
+
return Lipinski.NumHAcceptors(mol)
|
| 194 |
|
| 195 |
|
| 196 |
+
def logp(mol):
|
| 197 |
+
return Crippen.MolLogP(mol)
|
| 198 |
|
| 199 |
|
| 200 |
+
def atom(mol):
|
| 201 |
+
return CalcNumAtoms(mol)
|
| 202 |
|
| 203 |
|
| 204 |
+
def heavy_atom(mol):
|
| 205 |
+
return CalcNumHeavyAtoms(mol)
|
| 206 |
|
| 207 |
|
| 208 |
+
def rotatable_bond(mol):
|
| 209 |
+
return CalcNumRotatableBonds((mol))
|
| 210 |
|
| 211 |
|
| 212 |
+
def tpsa(mol):
|
| 213 |
+
return CalcTPSA((mol))
|
| 214 |
|
| 215 |
|
| 216 |
+
def lipinski(mol):
|
| 217 |
"""
|
| 218 |
Lipinski's rules:
|
| 219 |
Hydrogen bond donors <= 5
|
|
|
|
| 221 |
Molecular weight <= 500 daltons
|
| 222 |
logP <= 5
|
| 223 |
"""
|
| 224 |
+
if hbd(mol) > 5:
|
| 225 |
return False
|
| 226 |
+
elif hba(mol) > 10:
|
| 227 |
return False
|
| 228 |
+
elif mw(mol) > 500:
|
| 229 |
return False
|
| 230 |
+
elif logp(mol) > 5:
|
| 231 |
return False
|
| 232 |
else:
|
| 233 |
return True
|
| 234 |
|
| 235 |
|
| 236 |
+
def reos(mol):
|
| 237 |
"""
|
| 238 |
Rapid Elimination Of Swill filter:
|
| 239 |
Molecular weight between 200 and 500
|
|
|
|
| 244 |
Rotatable bond count between 0 and 8
|
| 245 |
Heavy atom count between 15 and 50
|
| 246 |
"""
|
| 247 |
+
if not 200 < mw(mol) < 500:
|
| 248 |
return False
|
| 249 |
+
elif not -5.0 < logp(mol) < 5.0:
|
| 250 |
return False
|
| 251 |
+
elif not 0 < hbd(mol) < 5:
|
| 252 |
return False
|
| 253 |
+
elif not 0 < hba(mol) < 10:
|
| 254 |
return False
|
| 255 |
+
elif not 0 < rotatable_bond(mol) < 8:
|
| 256 |
return False
|
| 257 |
+
elif not 15 < heavy_atom(mol) < 50:
|
| 258 |
return False
|
| 259 |
else:
|
| 260 |
return True
|
| 261 |
|
| 262 |
|
| 263 |
+
def ghose(mol):
|
| 264 |
"""
|
| 265 |
Ghose drug like filter:
|
| 266 |
Molecular weight between 160 and 480
|
|
|
|
| 268 |
Atom count between 20 and 70
|
| 269 |
Molar refractivity between 40 and 130
|
| 270 |
"""
|
| 271 |
+
if not 160 < mw(mol) < 480:
|
| 272 |
return False
|
| 273 |
+
elif not -0.4 < logp(mol) < 5.6:
|
| 274 |
return False
|
| 275 |
+
elif not 20 < atom(mol) < 70:
|
| 276 |
return False
|
| 277 |
+
elif not 40 < mr(mol) < 130:
|
| 278 |
return False
|
| 279 |
else:
|
| 280 |
return True
|
| 281 |
|
| 282 |
|
| 283 |
+
def veber(mol):
|
| 284 |
"""
|
| 285 |
The Veber filter is a rule of thumb filter for orally active drugs described in
|
| 286 |
Veber et al., J Med Chem. 2002; 45(12): 2615-23.:
|
| 287 |
Rotatable bonds <= 10
|
| 288 |
Topological polar surface area <= 140
|
| 289 |
"""
|
| 290 |
+
if not rotatable_bond(mol) <= 10:
|
| 291 |
return False
|
| 292 |
+
elif not tpsa(mol) <= 140:
|
| 293 |
return False
|
| 294 |
else:
|
| 295 |
return True
|
| 296 |
|
| 297 |
|
| 298 |
+
def rule_of_three(mol):
|
| 299 |
"""
|
| 300 |
Rule of Three filter (Congreve et al., Drug Discov. Today. 8 (19): 876–7, (2003).):
|
| 301 |
Molecular weight <= 300
|
|
|
|
| 304 |
H-bond acceptor count <= 3
|
| 305 |
Rotatable bond count <= 3
|
| 306 |
"""
|
| 307 |
+
if not mw(mol) <= 300:
|
| 308 |
return False
|
| 309 |
+
elif not logp(mol) <= 3:
|
| 310 |
return False
|
| 311 |
+
elif not hbd(mol) <= 3:
|
| 312 |
return False
|
| 313 |
+
elif not hba(mol) <= 3:
|
| 314 |
return False
|
| 315 |
+
elif not rotatable_bond(mol) <= 3:
|
| 316 |
return False
|
| 317 |
else:
|
| 318 |
return True
|
|
|
|
| 391 |
'X2': 'Target FASTA',
|
| 392 |
'ID1': 'Compound ID',
|
| 393 |
'ID2': 'Target ID',
|
| 394 |
+
'Y': 'Actual CPI/CPA',
|
| 395 |
+
'Y^': 'Predicted CPI/CPA',
|
| 396 |
+
'N': 'Original Index'
|
| 397 |
}
|
| 398 |
|
| 399 |
|
|
|
|
| 426 |
pass
|
| 427 |
|
| 428 |
|
| 429 |
+
def submit_predict(predict_filepath, task, preset, target_family, flag, state, progress=gr.Progress(track_tqdm=True)):
|
| 430 |
if flag:
|
| 431 |
try:
|
| 432 |
job_id = flag
|
|
|
|
| 435 |
preset = PRESET_MAP[preset]
|
| 436 |
target_family = TARGET_FAMILY_MAP[target_family]
|
| 437 |
# email_hash = hashlib.sha256(email.encode()).hexdigest()
|
| 438 |
+
COLUMN_ALIASES.update({
|
| 439 |
+
'Y': 'Actual interaction probability' if task == 'DTI' else 'Actual binding affinity',
|
| 440 |
+
'Y^': 'Predicted interaction probability' if task == 'DTI' else 'Predicted binding affinity'
|
| 441 |
+
})
|
| 442 |
|
| 443 |
# target_family_list = [target_family]
|
| 444 |
# for family in target_family_list:
|
|
|
|
| 456 |
predictions, _ = predict(cfg)
|
| 457 |
predictions = [pd.DataFrame(prediction) for prediction in predictions]
|
| 458 |
prediction_df = pd.concat([prediction_df, pd.concat(predictions, ignore_index=True)])
|
| 459 |
+
prediction_df.set_index('N', inplace=True)
|
| 460 |
|
| 461 |
predictions_file = f'temp/{job_id}_predictions.csv'
|
| 462 |
+
prediction_df.to_csv(predictions_file)
|
| 463 |
|
| 464 |
return [predictions_file,
|
| 465 |
False]
|
| 466 |
except Exception as e:
|
| 467 |
gr.Warning(f"Prediction job failed due to error: {str(e)}")
|
| 468 |
+
return {run_state: False}
|
|
|
|
|
|
|
| 469 |
else:
|
| 470 |
+
return {run_state: state}
|
|
|
|
| 471 |
#
|
| 472 |
# except Exception as e:
|
| 473 |
# raise gr.Error(str(e))
|
|
|
|
| 539 |
|
| 540 |
|
| 541 |
def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
| 542 |
+
# global DF_FOR_REPORT
|
| 543 |
+
if Path(file).is_file():
|
| 544 |
df = pd.read_csv(file)
|
| 545 |
+
# if df['X1'].nunique() > 1:
|
| 546 |
+
df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
|
| 547 |
+
desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
|
| 548 |
+
# Add a new column with RDKit molecule objects
|
| 549 |
+
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
| 550 |
+
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
|
|
|
|
|
|
|
| 551 |
includeFingerprints=True)
|
| 552 |
+
PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
|
| 553 |
+
includeFingerprints=True)
|
| 554 |
+
# DF_FOR_REPORT = df.copy()
|
| 555 |
|
| 556 |
# pie_chart = None
|
| 557 |
# value = None
|
|
|
|
| 566 |
# elif DF_FOR_REPORT['X2'].nunique() > 1 >= DF_FOR_REPORT['X1'].nunique():
|
| 567 |
# pie_chart = create_pie_chart(DF_FOR_REPORT, category='Target family', value=value, top_k=100)
|
| 568 |
|
| 569 |
+
return {html_report: create_html_report(df),
|
| 570 |
+
raw_df: df,
|
| 571 |
+
report_df: df.copy(),
|
| 572 |
+
analyze_btn: gr.Button(interactive=True)} # pie_chart
|
| 573 |
else:
|
| 574 |
+
return {analyze_btn: gr.Button(interactive=False)}
|
| 575 |
|
| 576 |
|
| 577 |
def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
|
| 578 |
+
df_html = df.copy(deep=True)
|
| 579 |
+
|
| 580 |
+
cols_left = ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^',]
|
| 581 |
cols_right = ['X1', 'X2']
|
| 582 |
cols_left = [col for col in cols_left if col in df_html.columns]
|
| 583 |
cols_right = [col for col in cols_right if col in df_html.columns]
|
| 584 |
df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
|
| 585 |
+
|
| 586 |
+
ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted binding affinity' else False
|
| 587 |
df_html = df_html.sort_values(
|
| 588 |
+
[col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
# # Remove repeated info for one-against-N tasks to save visual and physical space
|
| 592 |
+
# if df_html['X1'].nunique() <= 1:
|
| 593 |
+
# columns_to_clean = ['X1', 'ID1', 'Scaffold', 'Compound'] + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
|
| 594 |
+
# for column in columns_to_clean:
|
| 595 |
+
# if column in df_html.columns:
|
| 596 |
+
# df_html.loc[1:, column] = pd.NA
|
| 597 |
+
#
|
| 598 |
+
# if df_html['X2'].nunique() <= 1:
|
| 599 |
+
# columns_to_clean = ['X2', 'ID2']
|
| 600 |
+
# for column in columns_to_clean:
|
| 601 |
+
# if column in df_html.columns:
|
| 602 |
+
# df_html.loc[1:, column] = pd.NA
|
| 603 |
+
|
| 604 |
+
if not file:
|
| 605 |
+
df_html = df_html.iloc[:31]
|
| 606 |
+
|
| 607 |
+
# PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
|
| 608 |
# PandasTools.RenderImagesInAllDataFrames(images=True)
|
| 609 |
+
df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
|
| 610 |
+
'Generating compound graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 611 |
+
df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
|
| 612 |
+
'Generating scaffold graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 613 |
+
df_html = df_html.rename(columns=COLUMN_ALIASES)
|
| 614 |
+
df_html.index.name = 'Index'
|
| 615 |
|
| 616 |
if not file:
|
| 617 |
+
if 'Compound ID' in df_html.columns:
|
| 618 |
+
df_html.drop(['Compound SMILES'], axis=1, inplace=True)
|
| 619 |
+
if 'Target ID' in df_html.columns:
|
| 620 |
+
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
| 621 |
+
if 'Target FASTA' in df_html.columns:
|
| 622 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
|
| 623 |
+
'Processing FASTA...').apply(lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 624 |
+
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
| 625 |
+
# num_formatters = {col: "{:.3f}" for col in df.select_dtypes('number').columns}
|
| 626 |
+
styled_df = df_html.style.format(precision=3)
|
| 627 |
colors = sns.color_palette('husl', len(df_html.columns))
|
| 628 |
for i, col in enumerate(df_html.columns):
|
| 629 |
if pd.api.types.is_numeric_dtype(df_html[col]):
|
|
|
|
| 634 |
import panel as pn
|
| 635 |
from bokeh.resources import INLINE
|
| 636 |
from bokeh.models import NumberFormatter, BooleanFormatter
|
| 637 |
+
|
| 638 |
+
bool_formatters = {col: BooleanFormatter() for col in df_html.select_dtypes(bool).columns}
|
| 639 |
+
num_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('number').columns}
|
| 640 |
+
other_formatters = {
|
| 641 |
+
'Predicted interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
|
| 642 |
+
'Actual interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
|
| 643 |
+
'Compound': HTMLTemplateFormatter(),
|
| 644 |
+
'Scaffold': HTMLTemplateFormatter(),
|
| 645 |
+
'Target FASTA': {'type': 'textarea', 'width': 60},
|
| 646 |
}
|
| 647 |
+
formatters = {**bool_formatters, **num_formatters, **other_formatters}
|
| 648 |
+
|
| 649 |
# html = df.to_html(file)
|
| 650 |
# return html
|
| 651 |
+
pn.widgets.Tabulator(df_html, formatters=formatters).save(file, resources=INLINE)
|
| 652 |
|
| 653 |
|
| 654 |
# def create_pie_chart(df, category, value, top_k):
|
|
|
|
| 702 |
return fig
|
| 703 |
|
| 704 |
|
| 705 |
+
def submit_report(df, score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
|
| 706 |
+
df_report = df.copy()
|
| 707 |
try:
|
| 708 |
for filter_name in filter_list:
|
| 709 |
+
df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
|
| 710 |
+
desc=f"Calculating {filter_name}").apply(
|
| 711 |
+
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x, axis=1)
|
| 712 |
|
| 713 |
for score_name in score_list:
|
| 714 |
+
df_report[score_name] = df_report['Compound'].swifter.progress_bar(
|
| 715 |
+
desc=f"Calculating {score_name}").apply(
|
| 716 |
+
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x, axis=1)
|
| 717 |
|
| 718 |
# pie_chart = None
|
| 719 |
# value = None
|
|
|
|
| 728 |
# elif df['X2'].nunique() > 1 >= df['X1'].nunique():
|
| 729 |
# pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
|
| 730 |
|
| 731 |
+
return create_html_report(df_report), df_report # pie_chart
|
| 732 |
|
| 733 |
except Exception as e:
|
| 734 |
+
gr.Warning(f'Failed to report results due to error: {str(e)}')
|
| 735 |
+
return None, None
|
| 736 |
|
| 737 |
# def check_job_status(job_id):
|
| 738 |
# job_lock = DATA_PATH / f"{job_id}.lock"
|
|
|
|
| 751 |
|
| 752 |
|
| 753 |
def wrap_text(text, line_length=60):
|
| 754 |
+
if isinstance(text, str):
|
| 755 |
+
wrapper = textwrap.TextWrapper(width=line_length)
|
| 756 |
+
if text.startswith('>'):
|
| 757 |
+
sections = text.split('>')
|
| 758 |
+
wrapped_sections = []
|
| 759 |
+
for section in sections:
|
| 760 |
+
if not section:
|
| 761 |
+
continue
|
| 762 |
+
lines = section.split('\n')
|
| 763 |
+
seq_header = lines[0]
|
| 764 |
+
wrapped_seq = wrapper.fill(''.join(lines[1:]))
|
| 765 |
+
wrapped_sections.append(f">{seq_header}\n{wrapped_seq}")
|
| 766 |
+
return '\n'.join(wrapped_sections)
|
| 767 |
+
else:
|
| 768 |
+
return wrapper.fill(text)
|
| 769 |
else:
|
| 770 |
+
return text
|
| 771 |
|
| 772 |
|
| 773 |
def unwrap_text(text):
|
|
|
|
| 884 |
visible=False, interactive=True, scale=4, )
|
| 885 |
|
| 886 |
with gr.Row():
|
| 887 |
+
target_upload_btn = gr.UploadButton(label='Upload a FASTA file', type='binary',
|
| 888 |
+
visible=True, variant='primary',
|
| 889 |
+
size='lg')
|
| 890 |
+
target_query_btn = gr.Button(value='Query the sequence', variant='primary',
|
| 891 |
+
visible=False)
|
| 892 |
+
# with gr.Row():
|
| 893 |
+
# example_uniprot = gr.Button(value='Example: Q16539', elem_classes='example', visible=False)
|
| 894 |
+
# example_gene = gr.Button(value='Example: MAPK14', elem_classes='example', visible=False)
|
| 895 |
+
example_fasta = gr.Button(value='Example: Human MAPK14', elem_classes='example')
|
| 896 |
target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
|
| 897 |
# with gr.Row():
|
| 898 |
# with gr.Column():
|
|
|
|
| 899 |
# with gr.Column():
|
| 900 |
# gr.File(label='Example FASTA file',
|
| 901 |
# value='data/examples/MAPK14.fasta', interactive=False)
|
|
|
|
| 904 |
with gr.Column():
|
| 905 |
HelpTip(
|
| 906 |
"Click Auto-detect to identify the protein family using sequence alignment. "
|
| 907 |
+
"This optional step allows applying a family-specific model instead of a all-family "
|
| 908 |
+
"model (general)."
|
| 909 |
"Manually select general if the alignment results are unsatisfactory."
|
| 910 |
)
|
| 911 |
drug_screen_target_family = gr.Dropdown(
|
|
|
|
| 938 |
with gr.Row():
|
| 939 |
with gr.Column():
|
| 940 |
HelpTip(
|
| 941 |
+
"Interaction prediction provides you binding probability score between the target of "
|
| 942 |
+
"interest and each compound in the library,"
|
| 943 |
+
"while affinity prediction directly estimates their binding strength measured using "
|
| 944 |
+
"IC50."
|
| 945 |
)
|
| 946 |
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 947 |
label='Step 4. Select a Prediction Task',
|
|
|
|
| 950 |
with gr.Row():
|
| 951 |
with gr.Column():
|
| 952 |
HelpTip(
|
| 953 |
+
"Select your preferred model, or click Recommend for the best-performing model based "
|
| 954 |
+
"on the selected task, family, and whether the target was trained."
|
| 955 |
"Please refer to documentation for detailed benchamrk results."
|
| 956 |
)
|
| 957 |
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
|
|
|
|
| 961 |
with gr.Column():
|
| 962 |
drug_screen_email = gr.Textbox(
|
| 963 |
label='Step 6. Email (Optional)',
|
| 964 |
+
info="If an email is provided, a notification email will be sent to you when your job "
|
| 965 |
+
"is completed."
|
| 966 |
)
|
| 967 |
|
| 968 |
with gr.Row(visible=True):
|
|
|
|
| 993 |
HelpTip(
|
| 994 |
"Enter (paste) a compound SMILES below manually or upload a SDF file."
|
| 995 |
"If multiple entities are in the SDF, only the first will be used."
|
| 996 |
+
"SMILES can be obtained by searching for the compound of interest in databases such "
|
| 997 |
+
"as NCBI, PubChem and and ChEMBL."
|
| 998 |
)
|
| 999 |
compound_type = gr.Dropdown(
|
| 1000 |
label='Step 1. Select Compound Input Type and Input',
|
| 1001 |
choices=['SMILES', 'SDF'],
|
| 1002 |
+
info='Enter (paste) an SMILES string or upload an SDF file to convert to SMILES.',
|
| 1003 |
value='SMILES',
|
| 1004 |
interactive=True)
|
| 1005 |
+
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary',
|
| 1006 |
+
type='binary', visible=False)
|
| 1007 |
|
| 1008 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
| 1009 |
+
example_drug = gr.Button(value='Example: Aspirin', elem_classes='example')
|
| 1010 |
|
| 1011 |
with gr.Row():
|
| 1012 |
with gr.Column():
|
| 1013 |
HelpTip(
|
| 1014 |
"By default, models trained on all protein families (general) will be applied."
|
| 1015 |
+
"If the proteins in the target library of interest all belong to the same protein "
|
| 1016 |
+
"family, manually selecting the family is supported."
|
| 1017 |
)
|
| 1018 |
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
| 1019 |
value='General',
|
| 1020 |
+
label='Step 2. Select Target Protein Family ('
|
| 1021 |
+
'Optional)')
|
| 1022 |
|
| 1023 |
with gr.Row():
|
| 1024 |
with gr.Column():
|
| 1025 |
HelpTip(
|
| 1026 |
"Select a preset target library (e.g., ChEMBL33_human_proteins)."
|
| 1027 |
+
"Alternatively, upload a CSV file with a column named X2 containing target protein "
|
| 1028 |
+
"sequences, or use an FASTA file."
|
| 1029 |
)
|
| 1030 |
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
| 1031 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
|
|
|
| 1041 |
with gr.Row():
|
| 1042 |
with gr.Column():
|
| 1043 |
HelpTip(
|
| 1044 |
+
"Interaction prediction provides you binding probability score between the target of "
|
| 1045 |
+
"interest and each compound in the library,"
|
| 1046 |
+
"while affinity prediction directly estimates their binding strength measured using "
|
| 1047 |
+
"IC50."
|
| 1048 |
)
|
| 1049 |
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
|
| 1050 |
label='Step 4. Select a Prediction Task',
|
|
|
|
| 1053 |
with gr.Row():
|
| 1054 |
with gr.Column():
|
| 1055 |
HelpTip(
|
| 1056 |
+
"Select your preferred model, or click Recommend for the best-performing model based "
|
| 1057 |
+
"on the selected task, family, and whether the compound was trained."
|
| 1058 |
"Please refer to documentation for detailed benchamrk results."
|
| 1059 |
)
|
| 1060 |
+
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a '
|
| 1061 |
+
'Preset Model')
|
| 1062 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
| 1063 |
|
| 1064 |
with gr.Row():
|
|
|
|
| 1085 |
''')
|
| 1086 |
with gr.Blocks() as infer_block:
|
| 1087 |
with gr.Column() as infer_page:
|
| 1088 |
+
infer_type = gr.Dropdown(
|
| 1089 |
+
choices=['Upload a compound library and a target library',
|
| 1090 |
+
'Upload a CSV interaction pair dataset'],
|
| 1091 |
+
value='Upload a compound library and a target library')
|
| 1092 |
with gr.Column() as pair_upload:
|
| 1093 |
+
gr.File(label="Example custom dataset",
|
| 1094 |
+
value="data/examples/interaction_pair_inference.csv",
|
| 1095 |
+
interactive=False)
|
| 1096 |
+
with gr.Column():
|
|
|
|
| 1097 |
infer_data_for_predict = gr.File(
|
| 1098 |
+
label='Upload a custom dataset', file_count="single", type='filepath', visible=True)
|
| 1099 |
with gr.Column() as pair_generate:
|
| 1100 |
with gr.Row():
|
| 1101 |
+
gr.File(label='Example SDF compound library',
|
| 1102 |
value='data/examples/compound_library.sdf', interactive=False)
|
| 1103 |
+
gr.File(label='Example FASTA target library',
|
| 1104 |
value='data/examples/target_library.fasta', interactive=False)
|
| 1105 |
with gr.Row():
|
| 1106 |
+
gr.File(label='Example CSV compound library',
|
| 1107 |
value='data/examples/compound_library.csv', interactive=False)
|
| 1108 |
+
gr.File(label='Example CSV target library',
|
| 1109 |
value='data/examples/target_library.csv', interactive=False)
|
| 1110 |
with gr.Row():
|
| 1111 |
+
infer_drug = gr.File(label='SDF/CSV file containing multiple compounds',
|
| 1112 |
file_count="single", type='filepath')
|
| 1113 |
+
infer_target = gr.File(label='FASTA/CSV file containing multiple targets',
|
| 1114 |
file_count="single", type='filepath')
|
| 1115 |
|
| 1116 |
+
with gr.Row(visible=True):
|
| 1117 |
+
pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()), label='Task')
|
| 1118 |
+
pair_infer_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
| 1119 |
+
pair_infer_target_family = gr.Dropdown(choices=['General'],
|
| 1120 |
+
label='Target family',
|
| 1121 |
+
value='General')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1122 |
|
| 1123 |
+
# with gr.Row():
|
| 1124 |
+
# pair_infer_email = gr.Textbox(
|
| 1125 |
+
# label='Email (optional)',
|
| 1126 |
+
# info="Your email will be used to send you notifications when your job finishes."
|
| 1127 |
+
# )
|
| 1128 |
|
| 1129 |
with gr.Row(visible=True):
|
| 1130 |
# pair_infer_clr_btn = gr.ClearButton(size='lg')
|
|
|
|
| 1139 |
with gr.Blocks() as report:
|
| 1140 |
gr.Markdown('''
|
| 1141 |
# <center>DeepSEQreen Chemical Property Report</center>
|
| 1142 |
+
<center>
|
| 1143 |
To compute chemical properties for the predictions of drug hit screening,
|
| 1144 |
+
target protein identification, and interaction pair inference. You may also upload
|
| 1145 |
+
your own dataset.
|
| 1146 |
+
|
| 1147 |
+
The page shows only a preview report displaying at most 30 records
|
| 1148 |
+
(with top predicted CPI/CPA if reporting results from a prediction job).
|
| 1149 |
|
| 1150 |
+
For a full report, please
|
| 1151 |
+
generate and download a CSV or interactive HTML report below.
|
| 1152 |
+
</center>
|
|
|
|
| 1153 |
''')
|
| 1154 |
with gr.Row():
|
| 1155 |
file_for_report = gr.File(interactive=True, type='filepath')
|
| 1156 |
+
raw_df = gr.State(value=pd.DataFrame())
|
| 1157 |
+
report_df = gr.State(value=pd.DataFrame())
|
| 1158 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
| 1159 |
filters = gr.CheckboxGroup(list(FILTER_MAP.keys()), label='Filters')
|
| 1160 |
|
| 1161 |
with gr.Row():
|
| 1162 |
# clear_btn = gr.ClearButton(size='lg')
|
| 1163 |
+
analyze_btn = gr.Button('REPORT', variant='primary', size='lg', interactive=False)
|
| 1164 |
|
| 1165 |
with gr.Row():
|
| 1166 |
with gr.Column(scale=3):
|
|
|
|
| 1169 |
|
| 1170 |
with gr.Row():
|
| 1171 |
with gr.Column():
|
| 1172 |
+
csv_generate = gr.Button(value='Generate CSV Report',
|
| 1173 |
+
interactive=True, variant='primary', visible=False)
|
| 1174 |
+
csv_download_file = gr.File(label='Download CSV Report', visible=False)
|
| 1175 |
with gr.Column():
|
| 1176 |
+
html_generate = gr.Button(value='Generate HTML Report',
|
| 1177 |
+
interactive=True, variant='primary', visible=False)
|
| 1178 |
+
html_download_file = gr.File(label='Download HTML Report', visible=False)
|
| 1179 |
|
| 1180 |
|
| 1181 |
def target_input_type_select(input_type):
|
|
|
|
| 1272 |
def example_fill(input_type):
|
| 1273 |
return {target_id: 'Q16539',
|
| 1274 |
target_gene: 'MAPK14',
|
| 1275 |
+
target_organism: 'Human',
|
| 1276 |
target_fasta: """
|
| 1277 |
>sp|Q16539|MK14_HUMAN Mitogen-activated protein kinase 14 OS=Homo sapiens OX=9606 GN=MAPK14 PE=1 SV=3
|
| 1278 |
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQ
|
|
|
|
| 1284 |
"""}
|
| 1285 |
|
| 1286 |
|
| 1287 |
+
example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
|
| 1288 |
+
target_id, target_gene, target_organism, target_fasta], show_progress=False)
|
| 1289 |
+
# example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
| 1290 |
+
# example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
|
| 1291 |
|
| 1292 |
def screen_recommend_model(fasta, family, task):
|
| 1293 |
task = TASK_MAP[task]
|
|
|
|
| 1298 |
train = pd.read_csv('data/benchmarks/all_families_reduced_dta_train.csv')
|
| 1299 |
score = 'CI'
|
| 1300 |
|
| 1301 |
+
if not np.isin(process_target_fasta(fasta), train['X2']):
|
| 1302 |
scenario = "Unseen target"
|
| 1303 |
else:
|
| 1304 |
scenario = "Seen target"
|
|
|
|
| 1315 |
& (benchmark_df['Scenario'] == scenario)
|
| 1316 |
& (benchmark_df['all'] == False)]
|
| 1317 |
row = filtered_df.loc[filtered_df[score].idxmax()]
|
| 1318 |
+
|
| 1319 |
return gr.Dropdown(value=row['preset'],
|
| 1320 |
info=f"Reason: {scenario} in the training dataset; we recommend the model "
|
| 1321 |
f"with the best {score} ({float(row[score]):.3f}) "
|
|
|
|
| 1330 |
def compound_input_type_select(input_type):
|
| 1331 |
match input_type:
|
| 1332 |
case 'SMILES':
|
| 1333 |
+
return gr.Button(visible=False)
|
| 1334 |
case 'SDF':
|
| 1335 |
+
return gr.Button(visible=True)
|
| 1336 |
|
| 1337 |
|
| 1338 |
compound_type.select(fn=compound_input_type_select,
|
| 1339 |
+
inputs=compound_type, outputs=compound_upload_btn, show_progress=False)
|
| 1340 |
|
| 1341 |
|
| 1342 |
def compound_upload_process(input_type, input_upload):
|
|
|
|
| 1424 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
| 1425 |
else:
|
| 1426 |
screen_df = process_drug_library_upload(library_upload)
|
|
|
|
| 1427 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
| 1428 |
raise gr.Error(f'The uploaded compound library has more records '
|
| 1429 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
|
|
| 1566 |
).then(
|
| 1567 |
fn=submit_predict,
|
| 1568 |
inputs=[screen_data_for_predict, drug_screen_task, drug_screen_preset,
|
| 1569 |
+
drug_screen_target_family, screen_flag, run_state], # , drug_screen_email],
|
| 1570 |
outputs=[file_for_report, run_state]
|
| 1571 |
).then(
|
| 1572 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
|
|
|
| 1578 |
inputs=[compound_smiles, target_library, target_library_upload, run_state], # , drug_screen_email],
|
| 1579 |
outputs=[identify_data_for_predict, identify_flag, run_state]
|
| 1580 |
).then(
|
| 1581 |
+
fn=lambda: [gr.Column(visible=False), gr.Markdown(visible=True)],
|
| 1582 |
+
outputs=[identify_page, identify_waiting]
|
| 1583 |
).then(
|
| 1584 |
fn=submit_predict,
|
| 1585 |
inputs=[identify_data_for_predict, target_identify_task, target_identify_preset,
|
| 1586 |
+
target_identify_target_family, identify_flag, run_state], # , target_identify_email],
|
| 1587 |
outputs=[file_for_report, run_state]
|
| 1588 |
).then(
|
| 1589 |
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
|
|
|
| 1600 |
).then(
|
| 1601 |
fn=submit_predict,
|
| 1602 |
inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
|
| 1603 |
+
pair_infer_target_family, infer_flag, run_state], # , pair_infer_email],
|
| 1604 |
outputs=[file_for_report, run_state]
|
| 1605 |
).then(
|
| 1606 |
+
fn=lambda: [gr.Column(visible=True), gr.Markdown(visible=False), gr.Tabs(selected=3)],
|
| 1607 |
+
outputs=[infer_page, infer_waiting, tabs]
|
| 1608 |
)
|
| 1609 |
|
| 1610 |
# TODO background job from these 3 pipelines to update file_for_report
|
| 1611 |
|
| 1612 |
file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
|
| 1613 |
html_report,
|
| 1614 |
+
raw_df,
|
| 1615 |
+
report_df,
|
| 1616 |
+
analyze_btn
|
| 1617 |
# ranking_pie_chart
|
| 1618 |
])
|
| 1619 |
+
analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters], outputs=[
|
| 1620 |
html_report,
|
| 1621 |
+
report_df,
|
| 1622 |
# ranking_pie_chart
|
| 1623 |
])
|
| 1624 |
|
| 1625 |
|
| 1626 |
+
def create_csv_report_file(df, file_report):
|
| 1627 |
+
try:
|
| 1628 |
+
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 1629 |
+
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
|
| 1630 |
+
df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
|
|
|
|
| 1631 |
|
| 1632 |
+
return gr.File(filename, visible=True), gr.Button(visible=False)
|
| 1633 |
+
except Exception as e:
|
| 1634 |
+
gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
|
| 1635 |
+
return None, None
|
| 1636 |
|
| 1637 |
def create_html_report_file(df, file_report):
|
| 1638 |
+
try:
|
| 1639 |
+
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 1640 |
+
filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
| 1641 |
+
create_html_report(df, filename)
|
| 1642 |
+
return gr.File(filename, visible=True), gr.Button(visible=False)
|
| 1643 |
+
except Exception as e:
|
| 1644 |
+
gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
|
| 1645 |
+
return None, None
|
| 1646 |
|
| 1647 |
+
html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
|
| 1648 |
+
csv_generate.click(fn=create_csv_report_file, inputs=[report_df, file_for_report],
|
| 1649 |
+
outputs=[csv_download_file, csv_generate])
|
| 1650 |
+
html_generate.click(fn=create_html_report_file, inputs=[report_df, file_for_report],
|
| 1651 |
+
outputs=[html_download_file, html_generate])
|
| 1652 |
|
| 1653 |
# screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
|
| 1654 |
# every=5)
|
|
|
|
| 1671 |
demo.launch(
|
| 1672 |
show_api=False,
|
| 1673 |
)
|
|
|
|
|
|