# -*- coding: utf-8 -*- | |
# The B3clf library computes the blood-brain barrier (BBB) permeability | |
# of organic molecules with resampling strategies. | |
# | |
# Copyright (C) 2021 The Ayers Lab | |
# | |
# This file is part of B3clf. | |
# | |
# B3clf is free software; you can redistribute it and/or | |
# modify it under the terms of the GNU General Public License | |
# as published by the Free Software Foundation; either version 3 | |
# of the License, or (at your option) any later version. | |
# | |
# B3clf is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, see <http://www.gnu.org/licenses/> | |
# | |
# -- | |
import pandas as pd | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
"""Convert SMILES to 3D and/or minimize the geometry from SDF with force field.""" | |
def geometry_optimize(input_fname, | |
output_sdf, | |
steps_opt=10000, | |
# convergence=1.e-7, | |
tool="rdkit", | |
# optimization="cg", | |
force_field="MMFF94s", | |
smi_col=None, | |
sep="\s+|t+"): | |
"""Generate 3D coordinates and run geometry optimization with force field.""" | |
# optimize the 3d coordinates | |
# use RDKit to minimize the geometry | |
if tool.lower() == "rdkit": | |
minimize_with_rdkit(input_molfname=input_fname, | |
sdf_out=output_sdf, | |
maxIters=steps_opt, | |
force_field=force_field, | |
smi_col=smi_col, | |
sep=sep) | |
# use openbabel to minimize the geometry | |
elif tool == "openbabel": | |
# minimize_with_openbabel(input_molfname=input_fname, | |
# sdf_out=output_sdf, | |
# steps=steps_opt, | |
# optimization=optimization, | |
# convergence=convergence, | |
# force_field=force_field, | |
# smi_col=smi_col) | |
raise ValueError("OpenBabel is not supported yet.") | |
else: | |
raise ValueError("{} not implemented yet.".format(tool)) | |
def minimize_with_rdkit(input_molfname, | |
sdf_out, | |
smi_col=None, | |
mol_name_col=None, | |
maxIters=400, | |
force_field="MMFF94s", | |
sep="\s+"): | |
"""Add hydrogen for 3D coordinates and minimize the geometry with RdKit.""" | |
# load molecules | |
if input_molfname.lower().endswith(".smi") or input_molfname.lower().endswith(".csv"): | |
# todo: support .txt files | |
# todo: add support of more flexible separators | |
# todo: fix problem when mol_name is empty | |
df_mol = pd.read_csv(input_molfname, sep=sep, engine="python", header=None) | |
if df_mol.shape[1] == 1: | |
# Case for only SMILES column | |
smile_list = df_mol.iloc[:, -1].to_list() | |
mol_name_list = df_mol.iloc[:, -1].to_list() | |
else: | |
# Case for SMILES and MOL name columns | |
if smi_col is None: | |
smile_list = df_mol.iloc[:, 0].to_list() | |
else: | |
smile_list = df_mol[smi_col].to_list() | |
if mol_name_col is None: | |
# todo: use name if column name is valid | |
mol_name_list = df_mol.iloc[:, -1].to_list() | |
else: | |
mol_name_list = df_mol[mol_name_col].to_list() | |
mols = [] | |
for idx, smi in enumerate(smile_list): | |
mol = Chem.MolFromSmiles(smi) | |
# This will overwrite | |
if mol is not None: | |
mol.SetProp("_Name", mol_name_list[idx]) | |
mols.append(mol) | |
elif input_molfname.lower().endswith(".sdf"): | |
suppl = Chem.SDMolSupplier(input_molfname, | |
sanitize=True, | |
removeHs=False, | |
strictParsing=True) | |
mols = [mol for mol in suppl] | |
for idx, mol in enumerate(mols): | |
if (mol.GetProp("_Name") == "") or (mol.GetProp("_Name") is None): | |
smi = Chem.MolToSmiles(mol) | |
mol.SetProp("_Name", smi) | |
mols[idx] = mol | |
writer = Chem.SDWriter(sdf_out) | |
for idx, mol in enumerate(mols): | |
mol = Chem.AddHs(mol) | |
if force_field == "MMFF94s": | |
# use MMFF~ force field if possible | |
# taken from | |
# https://open-babel.readthedocs.io/en/latest/Forcefields/mmff94.html | |
# Some experiments and most theoretical calculations show significant pyramidal | |
# “puckering” at nitrogens in isolated structures. The MMFF94s (static) variant has | |
# slightly different out-of-plane bending and dihedral torsion parameters to planarize | |
# certain types of delocalized trigonal N atoms, such as aromatic aniline. This provides | |
# a better match to the time-average molecular geometry in solution or crystal | |
# structures. | |
# | |
# If you are comparing force-field optimized molecules to crystal structure geometries, | |
# we recommend using the MMFF94s variant for this reason. All other parameters are | |
# identical. However, if you are performing “docking” simulations, consideration of | |
# active solution conformations, or other types of computational studies, we recommend | |
# using the MMFF94 variant, since one form or another of the N geometry will | |
# predominate. | |
AllChem.EmbedMolecule(mol, randomSeed=999) | |
# the following code will raise some errors | |
mini_tag = AllChem.MMFFOptimizeMolecule(mol, force_field, maxIters=maxIters) | |
# 0 optimize converged | |
# -1 can not set up force field | |
# 1 more iterations required | |
if mini_tag == 0: | |
writer.write(mol) | |
else: | |
if mini_tag == 1: | |
AllChem.MMFFOptimizeMolecule(mol, force_field, maxIters=maxIters * 2) | |
elif mini_tag == -1: | |
AllChem.UFFOptimizeMolecule(mol, maxIters=400) | |
writer.write(mol) | |
elif force_field == "uff": | |
# use uff force field if possible | |
AllChem.EmbedMolecule(mol, randomSeed=999) | |
# the following code will raise some errors | |
mini_tag = AllChem.UFFOptimizeMolecule(mol, maxIters=maxIters) | |
# 0 optimize converged | |
# -1 can not set up force field | |
# 1 more iterations required | |
if mini_tag == 0: | |
writer.write(mol) | |
else: | |
if mini_tag == 1: | |
AllChem.UFFOptimizeMolecule(mol, maxIters=maxIters * 2) | |
elif mini_tag == -1: | |
AllChem.MMFFOptimizeMolecule(mol, "MMFF94s", maxIters=maxIters) | |
writer.write(mol) | |
else: | |
raise NotImplementedError("This method is not implemented yet.") | |
writer.close() | |
# todo: now the implementation is not supporting adding molecule name (such as SMILES strings) | |
# def minimize_with_openbabel(input_molfname, | |
# sdf_out, | |
# steps=10000, | |
# convergence=1.e-7, | |
# optimization="cg", | |
# force_field="GAFF", | |
# smi_col=None): | |
# """Minimize the geometries with openbabel. | |
# | |
# Parameters | |
# ---------- | |
# input_molfname : str | |
# Input molecule fie name. | |
# sdf_out : str | |
# Output molecule file name. | |
# steps : int, optional | |
# Specify the maximum number of steps. default=2500. | |
# optimization : str, optional | |
# Use conjugate gradients ("cg") or steepest descent ("sd") algorithm for optimization. | |
# Default="cg". | |
# convergence : float, optional | |
# convergence threshold. Default=1.e-7. | |
# force_field : str, optional | |
# ForceField name including Generalized Amber Force Field (gaff), Ghemical Force Field | |
# (ghemical), MMFF94 Force Field (mmff94) and Universal Force Field (uff). Default="gaff". | |
# """ | |
# | |
# # https://open-babel.readthedocs.io/en/latest/Command-line_tools/babel.html#forcefield-energy-and-minimization | |
# subprocess.Popen(["obabel", input_molfname, "-h", "-O", sdf_out, | |
# "--gen3d", "--minimize", | |
# "--n", str(steps), "--sd", optimization, "--crit", | |
# str(convergence), "--ff", force_field]) | |
# print("Geometry optimization with OpenBabel is done.") | |