File size: 9,044 Bytes
d05f89f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# -*- coding: utf-8 -*-
# The B3clf library computes the blood-brain barrier (BBB) permeability
# of organic molecules with resampling strategies.
#
# Copyright (C) 2021 The Ayers Lab
#
# This file is part of B3clf.
#
# B3clf is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# B3clf is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
#
# --
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
"""Convert SMILES to 3D and/or minimize the geometry from SDF with force field."""
def geometry_optimize(input_fname,
output_sdf,
steps_opt=10000,
# convergence=1.e-7,
tool="rdkit",
# optimization="cg",
force_field="MMFF94s",
smi_col=None,
sep="\s+|t+"):
"""Generate 3D coordinates and run geometry optimization with force field."""
# optimize the 3d coordinates
# use RDKit to minimize the geometry
if tool.lower() == "rdkit":
minimize_with_rdkit(input_molfname=input_fname,
sdf_out=output_sdf,
maxIters=steps_opt,
force_field=force_field,
smi_col=smi_col,
sep=sep)
# use openbabel to minimize the geometry
elif tool == "openbabel":
# minimize_with_openbabel(input_molfname=input_fname,
# sdf_out=output_sdf,
# steps=steps_opt,
# optimization=optimization,
# convergence=convergence,
# force_field=force_field,
# smi_col=smi_col)
raise ValueError("OpenBabel is not supported yet.")
else:
raise ValueError("{} not implemented yet.".format(tool))
def minimize_with_rdkit(input_molfname,
sdf_out,
smi_col=None,
mol_name_col=None,
maxIters=400,
force_field="MMFF94s",
sep="\s+"):
"""Add hydrogen for 3D coordinates and minimize the geometry with RdKit."""
# load molecules
if input_molfname.lower().endswith(".smi") or input_molfname.lower().endswith(".csv"):
# todo: support .txt files
# todo: add support of more flexible separators
# todo: fix problem when mol_name is empty
df_mol = pd.read_csv(input_molfname, sep=sep, engine="python", header=None)
if df_mol.shape[1] == 1:
# Case for only SMILES column
smile_list = df_mol.iloc[:, -1].to_list()
mol_name_list = df_mol.iloc[:, -1].to_list()
else:
# Case for SMILES and MOL name columns
if smi_col is None:
smile_list = df_mol.iloc[:, 0].to_list()
else:
smile_list = df_mol[smi_col].to_list()
if mol_name_col is None:
# todo: use name if column name is valid
mol_name_list = df_mol.iloc[:, -1].to_list()
else:
mol_name_list = df_mol[mol_name_col].to_list()
mols = []
for idx, smi in enumerate(smile_list):
mol = Chem.MolFromSmiles(smi)
# This will overwrite
if mol is not None:
mol.SetProp("_Name", mol_name_list[idx])
mols.append(mol)
elif input_molfname.lower().endswith(".sdf"):
suppl = Chem.SDMolSupplier(input_molfname,
sanitize=True,
removeHs=False,
strictParsing=True)
mols = [mol for mol in suppl]
for idx, mol in enumerate(mols):
if (mol.GetProp("_Name") == "") or (mol.GetProp("_Name") is None):
smi = Chem.MolToSmiles(mol)
mol.SetProp("_Name", smi)
mols[idx] = mol
writer = Chem.SDWriter(sdf_out)
for idx, mol in enumerate(mols):
mol = Chem.AddHs(mol)
if force_field == "MMFF94s":
# use MMFF~ force field if possible
# taken from
# https://open-babel.readthedocs.io/en/latest/Forcefields/mmff94.html
# Some experiments and most theoretical calculations show significant pyramidal
# “puckering” at nitrogens in isolated structures. The MMFF94s (static) variant has
# slightly different out-of-plane bending and dihedral torsion parameters to planarize
# certain types of delocalized trigonal N atoms, such as aromatic aniline. This provides
# a better match to the time-average molecular geometry in solution or crystal
# structures.
#
# If you are comparing force-field optimized molecules to crystal structure geometries,
# we recommend using the MMFF94s variant for this reason. All other parameters are
# identical. However, if you are performing “docking” simulations, consideration of
# active solution conformations, or other types of computational studies, we recommend
# using the MMFF94 variant, since one form or another of the N geometry will
# predominate.
AllChem.EmbedMolecule(mol, randomSeed=999)
# the following code will raise some errors
mini_tag = AllChem.MMFFOptimizeMolecule(mol, force_field, maxIters=maxIters)
# 0 optimize converged
# -1 can not set up force field
# 1 more iterations required
if mini_tag == 0:
writer.write(mol)
else:
if mini_tag == 1:
AllChem.MMFFOptimizeMolecule(mol, force_field, maxIters=maxIters * 2)
elif mini_tag == -1:
AllChem.UFFOptimizeMolecule(mol, maxIters=400)
writer.write(mol)
elif force_field == "uff":
# use uff force field if possible
AllChem.EmbedMolecule(mol, randomSeed=999)
# the following code will raise some errors
mini_tag = AllChem.UFFOptimizeMolecule(mol, maxIters=maxIters)
# 0 optimize converged
# -1 can not set up force field
# 1 more iterations required
if mini_tag == 0:
writer.write(mol)
else:
if mini_tag == 1:
AllChem.UFFOptimizeMolecule(mol, maxIters=maxIters * 2)
elif mini_tag == -1:
AllChem.MMFFOptimizeMolecule(mol, "MMFF94s", maxIters=maxIters)
writer.write(mol)
else:
raise NotImplementedError("This method is not implemented yet.")
writer.close()
# todo: now the implementation is not supporting adding molecule name (such as SMILES strings)
# def minimize_with_openbabel(input_molfname,
# sdf_out,
# steps=10000,
# convergence=1.e-7,
# optimization="cg",
# force_field="GAFF",
# smi_col=None):
# """Minimize the geometries with openbabel.
#
# Parameters
# ----------
# input_molfname : str
# Input molecule fie name.
# sdf_out : str
# Output molecule file name.
# steps : int, optional
# Specify the maximum number of steps. default=2500.
# optimization : str, optional
# Use conjugate gradients ("cg") or steepest descent ("sd") algorithm for optimization.
# Default="cg".
# convergence : float, optional
# convergence threshold. Default=1.e-7.
# force_field : str, optional
# ForceField name including Generalized Amber Force Field (gaff), Ghemical Force Field
# (ghemical), MMFF94 Force Field (mmff94) and Universal Force Field (uff). Default="gaff".
# """
#
# # https://open-babel.readthedocs.io/en/latest/Command-line_tools/babel.html#forcefield-energy-and-minimization
# subprocess.Popen(["obabel", input_molfname, "-h", "-O", sdf_out,
# "--gen3d", "--minimize",
# "--n", str(steps), "--sd", optimization, "--crit",
# str(convergence), "--ff", force_field])
# print("Geometry optimization with OpenBabel is done.")
|