B3clf / b3clf /geometry_opt.py
legend1234's picture
Synced repo using 'sync_with_huggingface' Github Action
d05f89f
# -*- coding: utf-8 -*-
# The B3clf library computes the blood-brain barrier (BBB) permeability
# of organic molecules with resampling strategies.
#
# Copyright (C) 2021 The Ayers Lab
#
# This file is part of B3clf.
#
# B3clf is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# B3clf is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
#
# --
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
"""Convert SMILES to 3D and/or minimize the geometry from SDF with force field."""
def geometry_optimize(input_fname,
output_sdf,
steps_opt=10000,
# convergence=1.e-7,
tool="rdkit",
# optimization="cg",
force_field="MMFF94s",
smi_col=None,
sep="\s+|t+"):
"""Generate 3D coordinates and run geometry optimization with force field."""
# optimize the 3d coordinates
# use RDKit to minimize the geometry
if tool.lower() == "rdkit":
minimize_with_rdkit(input_molfname=input_fname,
sdf_out=output_sdf,
maxIters=steps_opt,
force_field=force_field,
smi_col=smi_col,
sep=sep)
# use openbabel to minimize the geometry
elif tool == "openbabel":
# minimize_with_openbabel(input_molfname=input_fname,
# sdf_out=output_sdf,
# steps=steps_opt,
# optimization=optimization,
# convergence=convergence,
# force_field=force_field,
# smi_col=smi_col)
raise ValueError("OpenBabel is not supported yet.")
else:
raise ValueError("{} not implemented yet.".format(tool))
def minimize_with_rdkit(input_molfname,
sdf_out,
smi_col=None,
mol_name_col=None,
maxIters=400,
force_field="MMFF94s",
sep="\s+"):
"""Add hydrogen for 3D coordinates and minimize the geometry with RdKit."""
# load molecules
if input_molfname.lower().endswith(".smi") or input_molfname.lower().endswith(".csv"):
# todo: support .txt files
# todo: add support of more flexible separators
# todo: fix problem when mol_name is empty
df_mol = pd.read_csv(input_molfname, sep=sep, engine="python", header=None)
if df_mol.shape[1] == 1:
# Case for only SMILES column
smile_list = df_mol.iloc[:, -1].to_list()
mol_name_list = df_mol.iloc[:, -1].to_list()
else:
# Case for SMILES and MOL name columns
if smi_col is None:
smile_list = df_mol.iloc[:, 0].to_list()
else:
smile_list = df_mol[smi_col].to_list()
if mol_name_col is None:
# todo: use name if column name is valid
mol_name_list = df_mol.iloc[:, -1].to_list()
else:
mol_name_list = df_mol[mol_name_col].to_list()
mols = []
for idx, smi in enumerate(smile_list):
mol = Chem.MolFromSmiles(smi)
# This will overwrite
if mol is not None:
mol.SetProp("_Name", mol_name_list[idx])
mols.append(mol)
elif input_molfname.lower().endswith(".sdf"):
suppl = Chem.SDMolSupplier(input_molfname,
sanitize=True,
removeHs=False,
strictParsing=True)
mols = [mol for mol in suppl]
for idx, mol in enumerate(mols):
if (mol.GetProp("_Name") == "") or (mol.GetProp("_Name") is None):
smi = Chem.MolToSmiles(mol)
mol.SetProp("_Name", smi)
mols[idx] = mol
writer = Chem.SDWriter(sdf_out)
for idx, mol in enumerate(mols):
mol = Chem.AddHs(mol)
if force_field == "MMFF94s":
# use MMFF~ force field if possible
# taken from
# https://open-babel.readthedocs.io/en/latest/Forcefields/mmff94.html
# Some experiments and most theoretical calculations show significant pyramidal
# “puckering” at nitrogens in isolated structures. The MMFF94s (static) variant has
# slightly different out-of-plane bending and dihedral torsion parameters to planarize
# certain types of delocalized trigonal N atoms, such as aromatic aniline. This provides
# a better match to the time-average molecular geometry in solution or crystal
# structures.
#
# If you are comparing force-field optimized molecules to crystal structure geometries,
# we recommend using the MMFF94s variant for this reason. All other parameters are
# identical. However, if you are performing “docking” simulations, consideration of
# active solution conformations, or other types of computational studies, we recommend
# using the MMFF94 variant, since one form or another of the N geometry will
# predominate.
AllChem.EmbedMolecule(mol, randomSeed=999)
# the following code will raise some errors
mini_tag = AllChem.MMFFOptimizeMolecule(mol, force_field, maxIters=maxIters)
# 0 optimize converged
# -1 can not set up force field
# 1 more iterations required
if mini_tag == 0:
writer.write(mol)
else:
if mini_tag == 1:
AllChem.MMFFOptimizeMolecule(mol, force_field, maxIters=maxIters * 2)
elif mini_tag == -1:
AllChem.UFFOptimizeMolecule(mol, maxIters=400)
writer.write(mol)
elif force_field == "uff":
# use uff force field if possible
AllChem.EmbedMolecule(mol, randomSeed=999)
# the following code will raise some errors
mini_tag = AllChem.UFFOptimizeMolecule(mol, maxIters=maxIters)
# 0 optimize converged
# -1 can not set up force field
# 1 more iterations required
if mini_tag == 0:
writer.write(mol)
else:
if mini_tag == 1:
AllChem.UFFOptimizeMolecule(mol, maxIters=maxIters * 2)
elif mini_tag == -1:
AllChem.MMFFOptimizeMolecule(mol, "MMFF94s", maxIters=maxIters)
writer.write(mol)
else:
raise NotImplementedError("This method is not implemented yet.")
writer.close()
# todo: now the implementation is not supporting adding molecule name (such as SMILES strings)
# def minimize_with_openbabel(input_molfname,
# sdf_out,
# steps=10000,
# convergence=1.e-7,
# optimization="cg",
# force_field="GAFF",
# smi_col=None):
# """Minimize the geometries with openbabel.
#
# Parameters
# ----------
# input_molfname : str
# Input molecule fie name.
# sdf_out : str
# Output molecule file name.
# steps : int, optional
# Specify the maximum number of steps. default=2500.
# optimization : str, optional
# Use conjugate gradients ("cg") or steepest descent ("sd") algorithm for optimization.
# Default="cg".
# convergence : float, optional
# convergence threshold. Default=1.e-7.
# force_field : str, optional
# ForceField name including Generalized Amber Force Field (gaff), Ghemical Force Field
# (ghemical), MMFF94 Force Field (mmff94) and Universal Force Field (uff). Default="gaff".
# """
#
# # https://open-babel.readthedocs.io/en/latest/Command-line_tools/babel.html#forcefield-energy-and-minimization
# subprocess.Popen(["obabel", input_molfname, "-h", "-O", sdf_out,
# "--gen3d", "--minimize",
# "--n", str(steps), "--sd", optimization, "--crit",
# str(convergence), "--ff", force_field])
# print("Geometry optimization with OpenBabel is done.")