import gradio as gr
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from model.model import DTIModel
dt_str = "14062024_0910"
def make_spider_plot(predictions, model_names, smiles_list):
fig = go.Figure()
for i, (prediction, smiles) in enumerate(zip(predictions, smiles_list)):
fig.add_trace(go.Scatterpolar(
r=prediction,
theta=model_names,
fill='toself',
name=smiles
))
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 1]
)),
showlegend=True
)
return fig
def predict_and_plot(amino_acid_sequence, smiles_input, datasets):
model_ensemble = {}
gbm_model_paths = {
"BindingDB": f"model/xgb_models/xgb_model_BindingDB_{dt_str}_bt_optimized_0.json",
"BioSNAP": f"model/xgb_models/xgb_model_BIOSNAP_full_data_{dt_str}_bt_optimized_0.json",
"DAVIS": f"model/xgb_models/xgb_model_DAVIS_{dt_str}_bt_optimized_0.json",
"BarlowDTI XXL": f"model/xgb_models/{dt_str}_barlowdti_xxl_model.json",
}
for model in datasets:
print(f"Loading model {model}")
model_ensemble[model] = DTIModel(
bt_model_path=f"model/stash/{dt_str}",
gbm_model_path=gbm_model_paths[model],
)
smiles_list = smiles_input.strip().split('\n')
predictions = []
for model in model_ensemble.values():
model_predictions = model.predict(smiles_list, amino_acid_sequence)
predictions.append(model_predictions)
predictions = np.array(predictions).transpose().tolist()
df = pd.DataFrame(predictions, index=smiles_list, columns=datasets).reset_index()
df.columns = ["SMILES"] + datasets
fig = make_spider_plot(predictions, datasets, smiles_list)
return fig, df
dataset_names = [
"BarlowDTI XXL",
"BindingDB",
"BioSNAP",
"DAVIS",
]
title = "Predict Drug-Target Interactions with BarlowDTI"
description = """
Enter the amino acid sequence and SMILES to get interaction predictions visualized as a spider graph and in a table.
The values can be interpreted as the probability of interaction between the drug and the target (0 = no interaction, 1 = interaction).
Thank you for using BarlowDTI!
Note: Inference may take longer, you can upgrade to a paid GPU-enabled plan for faster inference.
"""
article = """
This interface lets the scientific community use BarlowDTIXXL to predict drug-target interactions.
The model ensemble consists of four models trained on different datasets: our own curated and refined dataset based on
[Golts et. al](https://doi.org/10.48550/arXiv.2401.17174)
in combination with
[BindingDB](https://doi.org/10.1093/nar/gkl999),
[BioSNAP](https://snap.stanford.edu/index.html), and
[DAVIS](https://doi.org/10.1038/nbt.1990).
If you use our approach in your research, please cite our paper:
```
@misc{schuh2024barlowtwinsdeepneural,
title={Barlow Twins Deep Neural Network for Advanced 1D Drug-Target Interaction Prediction},
author={Maximilian G. Schuh and Davide Boldini and Stephan A. Sieber},
year={2024},
eprint={2408.00040},
archivePrefix={arXiv},
primaryClass={q-bio.BM},
url={https://arxiv.org/abs/2408.00040},
}
```
"""
theme = gr.themes.Base(
primary_hue="violet",
font=[gr.themes.GoogleFont('IBM Plex Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
)
iface = gr.Interface(
fn=predict_and_plot,
inputs=[
gr.Textbox(label="Protein Sequence", info="Just one sequence is allowed. Remove FASTA syntax (e.g. >ABC)."),
gr.Textbox(label="Molecule SMILES", info="One per line, multiple allowed."),
gr.CheckboxGroup(choices=dataset_names, label="Select Models for Prediction", value="BarlowDTI XXL")
],
outputs=[
gr.Plot(label="Predictions Visualization"),
gr.DataFrame(label="Predictions DataFrame"),
],
title=title,
description=description,
article=article,
theme=theme
)
iface.launch(share=True)