Spaces:
Sleeping
Sleeping
# Runs the full strong baseline, including smina/vina docking, | |
# gnina rescoring, and an input conformational ensemble. | |
import argparse | |
import os | |
import shutil | |
import subprocess | |
import pandas as pd | |
from rdkit import Chem | |
from rdkit.Chem import AllChem, PandasTools, rdMolTransforms | |
import numpy as np | |
from moleculekit.molecule import Molecule | |
import time | |
import gradio as gr | |
from gradio_molecule3d import Molecule3D | |
def protonate_receptor_and_ligand(protein): | |
protein_out = protein.replace(".pdb","_H.pdb") | |
with open(protein_out, "w") as f: | |
subprocess.run( | |
["reduce", "-BUILD", protein], | |
stdout=f, | |
stderr=subprocess.DEVNULL, | |
) | |
def generate_conformers(ligand, num_confs=8): | |
mol = Chem.MolFromSmiles( | |
ligand | |
) | |
mol.RemoveAllConformers() | |
mol = Chem.AddHs(mol) | |
AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, randomSeed=1) | |
AllChem.UFFOptimizeMoleculeConfs(mol) | |
with Chem.SDWriter( | |
"ligand.sdf" | |
) as writer: | |
for cid in range(mol.GetNumConformers()): | |
writer.write(mol, confId=cid) | |
def get_bb(points): | |
"""Return bounding box from a set of points (N,3) | |
Parameters | |
---------- | |
points : numpy.ndarray | |
Set of points (N,3) | |
Returns | |
------- | |
boundingBox : list | |
List of the form [xmin, xmax, ymin, ymax, zmin, zmax] | |
""" | |
minx = np.min(points[:, 0]) | |
maxx = np.max(points[:, 0]) | |
miny = np.min(points[:, 1]) | |
maxy = np.max(points[:, 1]) | |
minz = np.min(points[:, 2]) | |
maxz = np.max(points[:, 2]) | |
bb = [[minx, miny, minz], [maxx, maxy, maxz]] | |
return bb | |
def run_docking(protein, ligand): | |
mol = Molecule(protein) | |
mol.center() | |
bb = get_bb(mol.coords) | |
size_x = bb[1][0] - bb[0][0] | |
size_y = bb[1][1] - bb[0][1] | |
size_z = bb[1][2] - bb[0][2] | |
subprocess.run( | |
[ | |
"gnina", | |
"-r", | |
protein.replace(".pdb","_H.pdb"), | |
"-l", | |
"ligand.sdf", | |
"-o", | |
"ligand_output.sdf", | |
"--center_x", # bounding box matching PoseBusters methodology | |
str(0), | |
"--center_y", | |
str(0), | |
"--center_z", | |
str(0), | |
"--size_x", | |
str(size_x), | |
"--size_y", | |
str(size_y), | |
"--size_z", | |
str(size_z), | |
"--scoring", | |
"vina", | |
"--exhaustiveness", | |
"4", | |
"--num_modes", | |
"1", | |
"--seed", | |
"1", | |
] | |
) | |
# sort the poses from the multiple conformation runs, so overall best is first | |
poses = PandasTools.LoadSDF( | |
"ligand_output.sdf" | |
) | |
poses["CNNscore"] = poses["CNNscore"].astype(float) | |
gnina_order = poses.sort_values("CNNscore", ascending=False).reset_index(drop=True) | |
PandasTools.WriteSDF( | |
gnina_order, | |
"ligand_output.sdf", | |
properties=list(poses.columns), | |
) | |
return poses["CNNscore"] | |
def predict (input_sequence, input_ligand,input_msa, input_protein): | |
start_time = time.time() | |
protonate_receptor_and_ligand(input_protein) | |
generate_conformers(input_ligand) | |
cnn_score = run_docking(input_protein, input_ligand) | |
metrics = {"cnn_score": cnn_score} | |
end_time = time.time() | |
run_time = end_time - start_time | |
return [input_protein, "ligand_output.sdf"], metrics, run_time | |
with gr.Blocks() as app: | |
gr.Markdown("# Strong Docking Baseline") | |
gr.Markdown("Using the strong docking baseline from inductive bio described in their [blog post](https://www.inductive.bio/blog/strong-baseline-for-alphafold-3-docking)") | |
gr.Markdown("Note that in the original implementation the binding site is defined by the original ligand (redocking), here we use a bounding box of the protein for the docking (blind docking).") | |
with gr.Row(): | |
input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)") | |
input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES") | |
with gr.Row(): | |
input_msa = gr.File(label="Input Protein MSA (A3M)") | |
input_protein = gr.File(label="Input protein monomer") | |
# define any options here | |
# for automated inference the default options are used | |
# slider_option = gr.Slider(0,10, label="Slider Option") | |
# checkbox_option = gr.Checkbox(label="Checkbox Option") | |
# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option") | |
btn = gr.Button("Run Inference") | |
reps = [ | |
{ | |
"model": 0, | |
"style": "cartoon", | |
"color": "whiteCarbon", | |
}, | |
{ | |
"model": 1, | |
"style": "stick", | |
"color": "greenCarbon", | |
} | |
] | |
out = Molecule3D(reps=reps) | |
metrics = gr.JSON(label="Metrics") | |
run_time = gr.Textbox(label="Runtime") | |
btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out,metrics, run_time]) | |
app.launch() |