Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,7 @@ from email_validator import validate_email, EmailNotValidError
|
|
| 23 |
import gradio as gr
|
| 24 |
import hydra
|
| 25 |
import pandas as pd
|
|
|
|
| 26 |
import requests
|
| 27 |
from requests.adapters import HTTPAdapter, Retry
|
| 28 |
from markdown import markdown
|
|
@@ -41,7 +42,7 @@ import panel as pn
|
|
| 41 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 42 |
from tinydb import TinyDB, Query
|
| 43 |
|
| 44 |
-
import swifter
|
| 45 |
from tqdm.auto import tqdm
|
| 46 |
|
| 47 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
@@ -719,15 +720,12 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 719 |
gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
|
| 720 |
return {analyze_btn: gr.Button(interactive=False)}
|
| 721 |
if 'X1' in df.columns:
|
| 722 |
-
df['Scaffold SMILES'] = df['X1'].
|
| 723 |
-
|
| 724 |
-
df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
|
| 725 |
-
desc='Generating scaffold graphs...').apply(
|
| 726 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
| 727 |
# Add a new column with RDKit molecule objects
|
| 728 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
| 729 |
-
df['Compound'] = df['X1'].
|
| 730 |
-
desc='Generating molecular graphs...').apply(
|
| 731 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
| 732 |
|
| 733 |
# DF_FOR_REPORT = df.copy()
|
|
@@ -806,19 +804,16 @@ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm
|
|
| 806 |
elif 'Y^' in df_html.columns:
|
| 807 |
job = 'Interaction Pair Inference'
|
| 808 |
if 'Compound' in df_html.columns:
|
| 809 |
-
df_html['Compound'] = df_html['Compound'].
|
| 810 |
-
desc='Generating compound graph...').apply(
|
| 811 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 812 |
if 'Scaffold' in df_html.columns:
|
| 813 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
| 814 |
-
desc='Generating scaffold graph...').apply(
|
| 815 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 816 |
|
| 817 |
df_html.rename(columns=column_aliases, inplace=True)
|
| 818 |
df_html.index.name = 'Index'
|
| 819 |
if 'Target FASTA' in df_html.columns:
|
| 820 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
| 821 |
-
desc='Processing FASTA...').apply(
|
| 822 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 823 |
|
| 824 |
num_cols = df_html.select_dtypes('number').columns
|
|
@@ -836,8 +831,7 @@ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm
|
|
| 836 |
if 'Target ID' in df_html.columns:
|
| 837 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
| 838 |
if 'Target FASTA' in df_html.columns:
|
| 839 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
| 840 |
-
desc='Processing FASTA...').apply(
|
| 841 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 842 |
if 'Scaffold SMILES' in df_html.columns:
|
| 843 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
@@ -1091,13 +1085,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
| 1091 |
df_report = df.copy()
|
| 1092 |
try:
|
| 1093 |
for filter_name in filter_list:
|
| 1094 |
-
df_report[filter_name] = df_report['Compound'].
|
| 1095 |
-
desc=f"Calculating {filter_name}").apply(
|
| 1096 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
| 1097 |
|
| 1098 |
for score_name in score_list:
|
| 1099 |
-
df_report[score_name] = df_report['Compound'].
|
| 1100 |
-
desc=f"Calculating {score_name}").apply(
|
| 1101 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
| 1102 |
|
| 1103 |
# pie_chart = None
|
|
@@ -1713,8 +1705,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
| 1713 |
def align_score(query):
|
| 1714 |
return aligner.align(processed_fasta, query).score
|
| 1715 |
|
| 1716 |
-
alignment_df['score'] = alignment_df['X2'].
|
| 1717 |
-
desc="Detecting protein family of the target...").apply(align_score)
|
| 1718 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
| 1719 |
return gr.Dropdown(value=row['protein_family'],
|
| 1720 |
info=f"Reason: Best BLASTP score ({row['score']}) "
|
|
@@ -2022,13 +2013,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 2022 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
| 2023 |
validate_columns(infer_df, ['X1', 'X2'])
|
| 2024 |
|
| 2025 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
| 2026 |
validate_seq_str, regex=SMILES_PAT)
|
| 2027 |
if not infer_df['X1_ERR'].isna().all():
|
| 2028 |
raise ValueError(
|
| 2029 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
| 2030 |
|
| 2031 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
| 2032 |
validate_seq_str, regex=FASTA_PAT)
|
| 2033 |
if not infer_df['X2_ERR'].isna().all():
|
| 2034 |
raise ValueError(
|
|
@@ -2278,4 +2269,5 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
| 2278 |
|
| 2279 |
if __name__ == "__main__":
|
| 2280 |
hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
|
|
|
|
| 2281 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|
|
|
|
| 23 |
import gradio as gr
|
| 24 |
import hydra
|
| 25 |
import pandas as pd
|
| 26 |
+
from pandarallel import pandarallel
|
| 27 |
import requests
|
| 28 |
from requests.adapters import HTTPAdapter, Retry
|
| 29 |
from markdown import markdown
|
|
|
|
| 42 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 43 |
from tinydb import TinyDB, Query
|
| 44 |
|
| 45 |
+
# import swifter
|
| 46 |
from tqdm.auto import tqdm
|
| 47 |
|
| 48 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
|
|
| 720 |
gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
|
| 721 |
return {analyze_btn: gr.Button(interactive=False)}
|
| 722 |
if 'X1' in df.columns:
|
| 723 |
+
df['Scaffold SMILES'] = df['X1'].parallel_apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
|
| 724 |
+
df['Scaffold'] = df['Scaffold SMILES'].parallel_apply(
|
|
|
|
|
|
|
| 725 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
| 726 |
# Add a new column with RDKit molecule objects
|
| 727 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
| 728 |
+
df['Compound'] = df['X1'].parallel_apply(
|
|
|
|
| 729 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
| 730 |
|
| 731 |
# DF_FOR_REPORT = df.copy()
|
|
|
|
| 804 |
elif 'Y^' in df_html.columns:
|
| 805 |
job = 'Interaction Pair Inference'
|
| 806 |
if 'Compound' in df_html.columns:
|
| 807 |
+
df_html['Compound'] = df_html['Compound'].parallel_apply(
|
|
|
|
| 808 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 809 |
if 'Scaffold' in df_html.columns:
|
| 810 |
+
df_html['Scaffold'] = df_html['Scaffold'].parallel_apply(
|
|
|
|
| 811 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
| 812 |
|
| 813 |
df_html.rename(columns=column_aliases, inplace=True)
|
| 814 |
df_html.index.name = 'Index'
|
| 815 |
if 'Target FASTA' in df_html.columns:
|
| 816 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
|
|
|
|
| 817 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 818 |
|
| 819 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
|
| 831 |
if 'Target ID' in df_html.columns:
|
| 832 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
| 833 |
if 'Target FASTA' in df_html.columns:
|
| 834 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
|
|
|
|
| 835 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
| 836 |
if 'Scaffold SMILES' in df_html.columns:
|
| 837 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
|
|
| 1085 |
df_report = df.copy()
|
| 1086 |
try:
|
| 1087 |
for filter_name in filter_list:
|
| 1088 |
+
df_report[filter_name] = df_report['Compound'].parallel_apply(
|
|
|
|
| 1089 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
| 1090 |
|
| 1091 |
for score_name in score_list:
|
| 1092 |
+
df_report[score_name] = df_report['Compound'].parallel_apply(
|
|
|
|
| 1093 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
| 1094 |
|
| 1095 |
# pie_chart = None
|
|
|
|
| 1705 |
def align_score(query):
|
| 1706 |
return aligner.align(processed_fasta, query).score
|
| 1707 |
|
| 1708 |
+
alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
|
|
|
|
| 1709 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
| 1710 |
return gr.Dropdown(value=row['protein_family'],
|
| 1711 |
info=f"Reason: Best BLASTP score ({row['score']}) "
|
|
|
|
| 2013 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
| 2014 |
validate_columns(infer_df, ['X1', 'X2'])
|
| 2015 |
|
| 2016 |
+
infer_df['X1_ERR'] = infer_df['X1'].parallel_apply(
|
| 2017 |
validate_seq_str, regex=SMILES_PAT)
|
| 2018 |
if not infer_df['X1_ERR'].isna().all():
|
| 2019 |
raise ValueError(
|
| 2020 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
| 2021 |
|
| 2022 |
+
infer_df['X2_ERR'] = infer_df['X2'].parallel_apply(
|
| 2023 |
validate_seq_str, regex=FASTA_PAT)
|
| 2024 |
if not infer_df['X2_ERR'].isna().all():
|
| 2025 |
raise ValueError(
|
|
|
|
| 2269 |
|
| 2270 |
if __name__ == "__main__":
|
| 2271 |
hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
|
| 2272 |
+
pandarallel.initialize(progress_bar=True)
|
| 2273 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|