ReactSeq / e_smiles.py
Oopstom's picture
Upload 313 files
c668e80 verified
raw
history blame
118 kB
import numpy as np
from rdkit import Chem
import os
import sys
import copy
import re
from typing import List, Any
from indigo import *
indigo = Indigo()
import rdkit
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')
print('rdkit version:' + rdkit.__version__)
BOND_TYPES = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, \
Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC]
BOND_FLOAT_TO_TYPE = {
0.0: BOND_TYPES[0],
1.0: BOND_TYPES[1],
2.0: BOND_TYPES[2],
3.0: BOND_TYPES[3],
1.5: BOND_TYPES[4],}
def get_bond_info(mol: Chem.Mol):
"""Get information on bonds in the molecule.
Parameters
----------
mol: Chem.Mol
Molecule
"""
if mol is None:
return {}
bond_info = {}
for bond in mol.GetBonds():
a_start = bond.GetBeginAtom().GetAtomMapNum()
a_end = bond.GetEndAtom().GetAtomMapNum()
key_pair = sorted([a_start, a_end])
bond_info[tuple(key_pair)] = [bond.GetBondTypeAsDouble(), bond.GetIdx()]
return bond_info
def map_reac_and_frag(reac_mols: List[Chem.Mol], frag_mols: List[Chem.Mol]):
"""Aligns reactant and fragment mols by computing atom map overlaps.
Parameters
----------
reac_mols: List[Chem.Mol],
List of reactant mols
frag_mols: List[Chem.Mol],
List of fragment mols
"""
if len(reac_mols) != len(frag_mols):
return reac_mols, frag_mols
reac_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in reac_mols]
frag_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in frag_mols]
overlaps = {i: [] for i in range(len(frag_mols))}
for i, fmap in enumerate(frag_maps):
overlaps[i].extend([len(set(fmap).intersection(set(rmap))) for rmap in reac_maps])
overlaps[i] = overlaps[i].index(max(overlaps[i]))
new_frag = [Chem.Mol(mol) for mol in frag_mols]
new_reac = [Chem.Mol(reac_mols[overlaps[i]]) for i in overlaps]
return new_reac, new_frag
def remove_s_H(frag_mol):
while True:
idx = ''
for atom in frag_mol.GetAtoms():
if atom.GetAtomicNum() == 1 and atom.GetDegree() == 0:
idx= atom.GetIdx()
if idx != '' :
edit_mol = Chem.RWMol(frag_mol)
edit_mol.RemoveAtom(idx)
frag_mol = edit_mol.GetMol()
else:
break
return frag_mol
def apply_edits_to_mol_change(mol, edits):
"""Apply edits to molecular graph.
Parameters
----------
mol: Chem.Mol,
RDKit mol object
edits: Iterable[str],
Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where
a1, a2 are atom maps of participating atoms and b1, b2 are previous and
new bond orders. When a2 = 0, we update the hydrogen count.
"""
new_mol = Chem.RWMol(mol)
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()}
for edit in edits:
x, y, prev_bo, new_bo = edit.split(":")
x, y = int(x), int(y)
new_bo = float(new_bo)
bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y])
if new_bo > 0:
if bond is not None:
new_mol.RemoveBond(amap[x],amap[y])
new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo])
atom_x,atom_y = new_mol.GetAtomWithIdx(amap[x]),new_mol.GetAtomWithIdx(amap[y])
try:
atom_x.SetNumExplicitHs(int(atom_x.GetNumExplicitHs()+ float(prev_bo)-float(new_bo)))
except:
atom_x.SetNumExplicitHs(0)
try:
atom_y.SetNumExplicitHs(int(atom_y.GetNumExplicitHs()+ float(prev_bo)-float(new_bo)))
except:
atom_y.SetNumExplicitHs(0)
pred_mol = new_mol.GetMol()
return pred_mol
def apply_edits_to_mol_break(mol, edits):
"""Apply edits to molecular graph.
Parameters
----------
mol: Chem.Mol,
RDKit mol object
edits: Iterable[str],
Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where
a1, a2 are atom maps of participating atoms and b1, b2 are previous and
new bond orders. When a2 = 0, we update the hydrogen count.
"""
mol = Chem.AddHs(mol)
Chem.Kekulize(mol)
for atom in mol.GetAtoms():
atom.SetNoImplicit(True)
new_mol = Chem.RWMol(mol)
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()}
for edit in edits:
x, y, prev_bo, new_bo = edit.split(":")
x, y = int(x), int(y)
new_bo = float(new_bo)
if y == 0:
cent_atom = mol.GetAtomWithIdx(amap[x])
for neibor in cent_atom.GetNeighbors():
if neibor.GetAtomicNum() == 1:
new_mol.RemoveBond(amap[x],neibor.GetIdx())
break
else:
pass
elif y != 0:
bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y])
if bond is not None:
new_mol.RemoveBond(amap[x],amap[y])
pred_mol = new_mol.GetMol()
pred_mol = Chem.RemoveHs(pred_mol,sanitize = False)
return pred_mol
def find_reac_edit(frag_mols_1,reac_mols_1,core_edits):
reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()]
frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()]
lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num]
attach_map_num = 0
reac_edit = []
core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits]
for core_edit in core_edits:
core_edit_ = core_edit.split(':')
if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num:
attach_map_num = int(core_edit_[0])
elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num:
attach_map_num = int(core_edit_[0])
else:
continue
if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]:
continue
frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()}
reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()}
frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs()
reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs()
frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge()
reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge()
if lg_map_num != []:
for bond in reac_mols_1.GetBonds():
EndMapNum = bond.GetEndAtom().GetAtomMapNum()
BeginMapNum = bond.GetBeginAtom().GetAtomMapNum()
if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num):
reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0))
elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num):
reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0))
elif lg_map_num == []:
if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1):
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0))
if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0))
if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0))
if (reac_attach_charge - frag_attach_charge) == -1:
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0))
if (reac_attach_charge - frag_attach_charge) == 1:
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0))
return reac_edit
def correct_mol_1(mol,is_nitrine_c):
mol = copy.deepcopy(mol)
for atom in mol.GetAtoms():
if is_nitrine_c == True and atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷
atom.SetFormalCharge(1)
else:
pass
atom.SetNumRadicalElectrons(0)
atom.SetIsAromatic(False)
atom.SetNoImplicit(False)
return mol
def correct_mol(mol_,keep_map):
mol = copy.deepcopy(mol_)
atom_map_lis = []
idx_H_dic = {}
for atom in mol.GetAtoms():
atom_map_lis.append(atom.GetAtomMapNum())
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷
pass
elif atom.GetAtomicNum() == 15 and atom.GetExplicitValence() == 5 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷
idx_H_dic[atom.GetIdx()] = atom.GetNumExplicitHs()
else:
pass
atom.SetNumRadicalElectrons(0)
atom.SetNoImplicit(False)
atom.SetAtomMapNum(0)
for atom in mol.GetAtoms():
atom.SetIsAromatic(False)
temp = Chem.MolToMolBlock(mol,kekulize = True)
mol = Chem.MolFromMolBlock(temp,removeHs = False,sanitize= False)
if keep_map:
for i in range(0,mol.GetNumAtoms()):
mol.GetAtomWithIdx(i).SetAtomMapNum(atom_map_lis[i])
if i in idx_H_dic.keys():
mol.GetAtomWithIdx(i).SetNoImplicit(True)
mol.GetAtomWithIdx(i).SetNumExplicitHs(idx_H_dic[i])
for i in range(0,mol.GetNumAtoms()):
mol.GetAtomWithIdx(i).SetChiralTag(mol_.GetAtomWithIdx(i).GetChiralTag())
n_Chirals = Chem.FindMolChiralCenters(mol)
return mol
def get_atom_map_chai_dic(mol):
dic = {}
for idx,chiral in Chem.FindMolChiralCenters(mol):
atom_map = mol.GetAtomWithIdx(idx).GetAtomMapNum()
dic[atom_map] = chiral
return dic
def get_atom_map_stereo_dic(mol):
map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in mol.GetAtoms()}
stereo_dic = {}
for bond in mol.GetBonds():
b_map,e_map = map_a[bond.GetBeginAtomIdx()],map_a[bond.GetEndAtomIdx()]
stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo()
return stereo_dic
def cano_smiles_map(smiles):
atom_map_lis = []
mol = Chem.MolFromSmiles(smiles,sanitize = False)
for atom in mol.GetAtoms():
atom_map_lis.append(atom.GetAtomMapNum())
atom.SetAtomMapNum(0)
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True)
mol = Chem.MolFromSmiles(smiles,sanitize = False)
for atom in mol.GetAtoms():
atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()])
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True)
return smiles
def get_stereo_edit_mine(reac_mol,prod_mol):
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()}
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()}
for atom in reac_mol.GetAtoms():
atom.SetAtomMapNum(0)
r_rank = list(Chem.CanonicalRankAtoms(reac_mol, breakTies=False))
r_idx = [i for i in range(reac_mol.GetNumAtoms())]
dic_idx_rank = dict(zip(r_idx,r_rank))
p_stereo_dic = {}
for bond in prod_mol.GetBonds():
b_map,e_map = prod_map_a[bond.GetBeginAtomIdx()],prod_map_a[bond.GetEndAtomIdx()]
p_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo()
r_stereo_dic = {}
for bond in reac_mol.GetBonds():
if bond.GetBondTypeAsDouble() == 2.0:
b_atom,e_atom = bond.GetBeginAtom(),bond.GetEndAtom()
b_neis = b_atom.GetNeighbors()
b_neis = [i for i in b_neis if i.GetIdx() != e_atom.GetIdx()]
b_neis_rank = [dic_idx_rank[i.GetIdx()] for i in b_neis]
e_neis = e_atom.GetNeighbors()
e_neis = [i for i in e_neis if i.GetIdx() != b_atom.GetIdx()]
e_neis_rank = [dic_idx_rank[i.GetIdx()] for i in e_neis]
b_neis_rank = b_neis_rank + ['H'] * (2 - len(b_neis_rank))
e_neis_rank = e_neis_rank + ['H'] * (2 - len(e_neis_rank))
if len(b_neis_rank) == len(set(b_neis_rank)) and len(e_neis_rank) == len(set(e_neis_rank)):
b_map,e_map = reac_map_a[bond.GetBeginAtomIdx()],reac_map_a[bond.GetEndAtomIdx()]
r_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo()
else:
pass
stereo_edits = []
for atom_pair,stereo in r_stereo_dic.items():
if atom_pair in p_stereo_dic.keys() and stereo != p_stereo_dic[atom_pair]:
if stereo == Chem.rdchem.BondStereo.STEREONONE:
stereo = 'a'
elif stereo == Chem.rdchem.BondStereo.STEREOE:
stereo = 'e'
elif stereo == Chem.rdchem.BondStereo.STEREOZ:
stereo = 'z'
stereo_edits.append('{}:{}:{}:{}'.format(atom_pair[0],atom_pair[1],0,stereo))
return stereo_edits
def apply_stereo_change(prod_mol,stereo_edits):
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()}
prod_mol = copy.deepcopy(prod_mol)
prod_mol_t = copy.deepcopy(prod_mol)
for stereo_edit in stereo_edits:
b_map = int(stereo_edit.split(':')[0])
e_map = int(stereo_edit.split(':')[1])
b_n = prod_mol.GetAtomWithIdx(p_amap_idx[b_map]).GetNeighbors()
b_n = [i.GetAtomMapNum() for i in b_n]
b_n = [i for i in b_n if i not in [b_map,e_map]]
e_n = prod_mol.GetAtomWithIdx(p_amap_idx[e_map]).GetNeighbors()
e_n = [i.GetAtomMapNum() for i in e_n]
e_n = [i for i in e_n if i not in [b_map,e_map]]
f_b_n = b_n[0]
m_cip_rank = 0
for i in b_n[:]:
c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank'))
if c_cip_rank >= m_cip_rank:
f_b_n = i
m_cip_rank = c_cip_rank
f_e_n = e_n[0]
m_cip_rank = 0
for i in e_n[:]:
c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank'))
if c_cip_rank >= m_cip_rank:
f_e_n = i
m_cip_rank = c_cip_rank
if stereo_edit[-2:] == ':e':
bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map])
bond.SetStereo(Chem.rdchem.BondStereo.STEREOE)
try:
bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n])
except:
bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n])
if stereo_edit[-2:] == ':z':
bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map])
bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ)
try:
bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n])
except:
bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n])
elif stereo_edit[-2:] == ':a':
bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map])
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY)
return prod_mol
def add_Cl(mol):
add_Cl_atom_idx = []
for atom in mol.GetAtoms():
Double_O_count = 0
if atom.GetAtomicNum() == 16 and sorted([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == [1,2,2]:
neibors = atom.GetNeighbors()
for neibor in neibors:
if neibor.GetAtomicNum() == 8:
bond = mol.GetBondBetweenAtoms(atom.GetIdx(),neibor.GetIdx())
if bond.GetBondTypeAsDouble() == 2:
Double_O_count += 1
else:
pass
else:
pass
if Double_O_count == 2:
add_Cl_atom_idx.append(atom.GetIdx())
if len(add_Cl_atom_idx) == 1:
map_lis = [i.GetAtomMapNum() for i in mol.GetAtoms()]
mw = Chem.RWMol(mol)
mw.AddAtom(Chem.Atom(17))
mw.GetAtomWithIdx(len(map_lis)).SetAtomMapNum(max(map_lis)+1)
mw.AddBond(add_Cl_atom_idx[0],len(map_lis), BOND_FLOAT_TO_TYPE[1])
mol = mw.GetMol()
return mol
def neu_sulf_charge(mol):
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 8 and atom.GetFormalCharge() == -1:
neibors = atom.GetNeighbors()
if len(neibors) == 1 and neibors[0].GetAtomicNum() == 16 and neibors[0].GetExplicitValence() == 4:
atom.SetFormalCharge(0)
else:
pass
return mol
def align_kekule_pairs(r: str, p: str) :
"""Aligns kekule pairs to ensure unchanged bonds have same bond order in
previously aromatic rings.
Parameters
----------
r: str,
SMILES string representing the reactants
p: str,
SMILES string representing the product
"""
reac_mol = Chem.MolFromSmiles(r)
max_amap = max([atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()])
for atom in reac_mol.GetAtoms():
if atom.GetAtomMapNum() == 0:
atom.SetAtomMapNum(max_amap + 1)
max_amap = max_amap + 1
prod_mol = Chem.MolFromSmiles(p)
prod_prev = get_bond_info(prod_mol)
Chem.Kekulize(prod_mol)
prod_new = get_bond_info(prod_mol)
reac_prev = get_bond_info(reac_mol)
Chem.Kekulize(reac_mol)
reac_new = get_bond_info(reac_mol)
reac_edit = {}
for bond in prod_new:
if bond in reac_new and (prod_prev[bond][0] == reac_prev[bond][0]):
if reac_new[bond][0] != prod_new[bond][0] or reac_prev[bond][0] == 1.5:
reac_new[bond][0] = prod_new[bond][0]
reac_edit[bond] = reac_new[bond]
reac_mol = Chem.RWMol(reac_mol)
amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()}
for bond in reac_edit:
idx1, idx2 = amap_idx[bond[0]], amap_idx[bond[1]]
bo = reac_new[bond][0]
reac_mol.RemoveBond(idx1, idx2)
reac_mol.AddBond(idx1, idx2, BOND_FLOAT_TO_TYPE[bo])
return reac_mol.GetMol(), prod_mol
def count_kekule_d(r,p):
prod_mol = Chem.MolFromSmiles(p)
prod_s = get_bond_info(prod_mol)
prod_mol = Chem.MolFromSmiles(p,sanitize = False)
prod_k = get_bond_info(prod_mol)
reac_mol = Chem.MolFromSmiles(r)
reac_s = get_bond_info(reac_mol)
reac_mol = Chem.MolFromSmiles(r,sanitize = False)
reac_k = get_bond_info(reac_mol)
d_count = 0
for pair in reac_s.keys():
if pair in prod_s.keys():
if reac_s[pair][0] == prod_s[pair][0]:
if reac_k[pair][0] != prod_k[pair][0]:
d_count += 1
return d_count
def get_kekule_aligned_r(r,p):
if count_kekule_d(r,p) == 0:
return r
else:
min_r_s_lis = []
for r_s in r.split('.'):
min_count = 1000
min_r_s = ''
mol = Chem.MolFromSmiles(r_s)
suppl = Chem.ResonanceMolSupplier(mol, Chem.KEKULE_ALL)
for i in range(len(suppl)):
r_s = Chem.MolToSmiles(suppl[i],kekuleSmiles = True)
count = count_kekule_d(r_s,p)
if count <= min_count:
min_r_s = r_s
min_count = count
min_r_s_lis.append(min_r_s)
return '.'.join(min_r_s_lis)
def apply_edits_to_mol_connect(mol, edits):
"""Apply edits to molecular graph.
Parameters
----------
mol: Chem.Mol,
RDKit mol object
edits: Iterable[str],
Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where
a1, a2 are atom maps of participating atoms and b1, b2 are previous and
new bond orders. When a2 = 0, we update the hydrogen count.
"""
new_mol = Chem.RWMol(mol)
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()}
for edit in edits:
x, y, prev_bo, new_bo = edit.split(":")
x, y = int(x), int(y)
new_bo = float(new_bo)
new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo])
pred_mol = new_mol.GetMol()
return pred_mol
def get_charge_edit_mine(reac_mol, prod_mol,core_edits):
lg_site_lis = []
for core_edit in core_edits:
x,y,bo,n_bo = core_edit.split(':')
if float(bo) - float(n_bo) > 0:
lg_site_lis.append(int(x))
lg_site_lis.append(int(y))
lg_site_lis = [i for i in lg_site_lis if i != 0]
dict_reac_charges = {}
for atom in reac_mol.GetAtoms():
dict_reac_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge()
dict_prod_charges = {}
for atom in prod_mol.GetAtoms():
dict_prod_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge()
charge_edits = []
for atom_map, charge in dict_prod_charges.items():
if atom_map in dict_reac_charges.keys():
if dict_reac_charges[atom_map] != charge and atom_map not in lg_site_lis:
edit = f"{atom_map}:{0}:{0}:{dict_reac_charges[atom_map]}"
charge_edits.append(edit)
return charge_edits
def get_atom_map_charge_dic(mol):
dic = {}
for atom in mol.GetAtoms():
dic[atom.GetAtomMapNum()] = atom.GetFormalCharge()
return dic
def apply_charge_change(mol,charge_edits):
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in mol.GetAtoms()}
for edit in charge_edits:
x, y, prev_charge, new_charge = edit.split(":")
mol.GetAtomWithIdx(amap[int(x)]).SetFormalCharge(int(new_charge))
return mol
def get_core_edit_mine(reac_mol, prod_mol):
prod_bonds = get_bond_info(prod_mol)
reac_bonds = get_bond_info(reac_mol)
rxn_core_break = set()
rxn_core_lack = set()
rxn_core = set()
core_edits = []
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()}
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()}
for bond in prod_bonds:
if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]:
a_start, a_end = bond
prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0]
a_start, a_end = sorted([a_start, a_end])
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}"
core_edits.append(edit)
rxn_core.update([a_start, a_end])
if bond not in reac_bonds:
a_start, a_end = bond
reac_bo = 0.0
prod_bo = prod_bonds[bond][0]
start, end = sorted([a_start, a_end])
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}"
core_edits.append(edit)
rxn_core.update([a_start, a_end])
rxn_core_break.update([a_start, a_end])
for bond in reac_bonds:
if bond not in prod_bonds:
amap1, amap2 = bond
rxn_core_lack.update([amap1, amap2])
if (amap1 in p_amap_idx) and (amap2 in p_amap_idx):
a_start, a_end = sorted([amap1, amap2])
reac_bo = reac_bonds[bond][0]
edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}"
core_edits.append(edit)
rxn_core.update([a_start, a_end])
if True:
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()}
for atom in prod_mol.GetAtoms():
amap_num = atom.GetAtomMapNum()
if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack):
pass
else:
amap_num = atom.GetAtomMapNum()
numHs_prod = atom.GetTotalNumHs()
numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs()
if numHs_prod != numHs_reac:
edit = f"{amap_num}:{0}:{1.0}:{0.0}"
core_edits.append(edit)
rxn_core.add(amap_num)
return core_edits
def get_chai_edit_mine(reac_mol, prod_mol):
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()}
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()}
reac_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True):
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1]))
prod_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True):
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1]))
dict_reac_ChiralCenters = dict(reac_ChiralCenters)
dict_prod_ChiralCenters = dict(prod_ChiralCenters)
chai_edits = []
for amap_num,chiral in dict_prod_ChiralCenters.items():
if amap_num in dict_reac_ChiralCenters.keys():
if chiral != dict_reac_ChiralCenters[amap_num]:
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}"
chai_edits.append(edit)
else:
pass
for amap_num,chiral in dict_reac_ChiralCenters.items():
if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?':
edit = f"{amap_num}:{0}:{0}:{chiral}"
chai_edits.append(edit)
return chai_edits
def get_chai_edit_mine(reac_mol, prod_mol):
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()}
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()}
reac_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True):
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1]))
prod_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True):
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1]))
dict_reac_ChiralCenters = dict(reac_ChiralCenters)
dict_prod_ChiralCenters = dict(prod_ChiralCenters)
chai_edits = []
for amap_num,chiral in dict_prod_ChiralCenters.items():
if amap_num in dict_reac_ChiralCenters.keys():
if chiral != dict_reac_ChiralCenters[amap_num]:
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}"
chai_edits.append(edit)
else:
pass
for amap_num,chiral in dict_reac_ChiralCenters.items():
if (amap_num not in dict_prod_ChiralCenters.keys())and (amap_num in prod_map_a.values()):
edit = f"{amap_num}:{0}:{0}:{chiral}"
chai_edits.append(edit)
return chai_edits
def get_chai_edit_mine(reac_mol, prod_mol):
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()}
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()}
reac_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True):
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1]))
prod_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True):
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1]))
dict_reac_ChiralCenters = dict(reac_ChiralCenters)
dict_prod_ChiralCenters = dict(prod_ChiralCenters)
chai_edits = []
for amap_num,chiral in dict_prod_ChiralCenters.items():
if amap_num in dict_reac_ChiralCenters.keys():
if chiral != dict_reac_ChiralCenters[amap_num]:
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}"
chai_edits.append(edit)
else:
pass
for amap_num,chiral in dict_reac_ChiralCenters.items():
if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?':
edit = f"{amap_num}:{0}:{0}:{chiral}"
chai_edits.append(edit)
return chai_edits
def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol):
lg_map_lis = []
prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()]
for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]):
reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits)
reac_edits_a = []
reac_edits_b = []
for reac_edit in reac_edits:
if reac_edit[:3] == '0:0':
reac_edits_a.append(reac_edit)
elif reac_edit[-7:] == '0.0:0.0':
reac_edits_a.append(reac_edit)
elif reac_edit[-10:] == '0:0.0:-1.0':
reac_edits_a.append(reac_edit)
elif reac_edit[-9:] == '0:0.0:1.0':
reac_edits_a.append(reac_edit)
else:
reac_edits_b.append(reac_edit)
for reac_edit in reac_edits_a:
if reac_edit[:3] == '0:0':
pass
elif reac_edit[-7:] == '0.0:0.0':
pass
elif reac_edit[-10:] == '0:0.0:-1.0':
edit_map_num_lis = reac_edit.split(':')[:2]
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis]
lg_smiles = '-1.0'
lg_map_lis.append((lg_smiles,attach_map_num_1))
elif reac_edit[-9:] == '0:0.0:1.0':
edit_map_num_lis = reac_edit.split(':')[:2]
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis]
lg_smiles = '1.0'
lg_map_lis.append((lg_smiles,attach_map_num_1))
frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0]
reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b)
reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False)
reac_edit_added = []
for reac_frag_mol in reac_frag_mols[:]:
reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0]
if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis):
pass
else:
attach_map_num_1 = []
for reac_edit in reac_edits:
if reac_edit in reac_edit_added:
continue
else:
pass
b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1])
if e in reac_frag_map_num_lis and b in frag_1_map_num_lis:
for atom in reac_frag_mol.GetAtoms():
if atom.GetAtomMapNum() == int(e):
atom.SetAtomMapNum(500+atom.GetAtomMapNum())
break
else:
pass
reac_edit_added.append(reac_edit)
if len(attach_map_num_1) == 1:
if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: #上一个合成子上的连接点和本离去基团的连接点配对
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]):
attach_map_num_1 = [b] + attach_map_num_1
else:
attach_map_num_1.append(b)
else:
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]):
attach_map_num_1.append(b)
else:
attach_map_num_1 = [b] + attach_map_num_1
elif len(attach_map_num_1) == 0:
attach_map_num_1.append(b)
else:
pass
if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1:
break
lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True)
lg = Chem.MolFromSmiles(lg_smiles)
Chem.Kekulize(lg)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() >= 500:
atom.SetAtomMapNum(1)
pass
else:
atom.SetAtomMapNum(0)
lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True)
if attach_map_num_1 != []:
lg_map_lis.append((lg_smiles,attach_map_num_1))
return lg_map_lis
def get_core_edit_mine(reac_mol, prod_mol):
prod_bonds = get_bond_info(prod_mol)
reac_bonds = get_bond_info(reac_mol)
rxn_core_break = set()
rxn_core_lack = set()
rxn_core = set()
core_edits = []
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()}
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()}
for bond in prod_bonds:
if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]:
a_start, a_end = bond
prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0]
a_start, a_end = sorted([a_start, a_end])
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}"
core_edits.append(edit)
rxn_core.update([a_start, a_end])
if bond not in reac_bonds:
a_start, a_end = bond
reac_bo = 0.0
prod_bo = prod_bonds[bond][0]
start, end = sorted([a_start, a_end])
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}"
core_edits.append(edit)
rxn_core.update([a_start, a_end])
rxn_core_break.update([a_start, a_end])
for bond in reac_bonds:
if bond not in prod_bonds:
amap1, amap2 = bond
rxn_core_lack.update([amap1, amap2])
if (amap1 in p_amap_idx) and (amap2 in p_amap_idx):
a_start, a_end = sorted([amap1, amap2])
reac_bo = reac_bonds[bond][0]
edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}"
core_edits.append(edit)
rxn_core.update([a_start, a_end])
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()}
for atom in prod_mol.GetAtoms():
amap_num = atom.GetAtomMapNum()
if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack):
pass
else:
amap_num = atom.GetAtomMapNum()
numHs_prod = atom.GetTotalNumHs()
numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs()
if numHs_prod != numHs_reac:
edit = f"{amap_num}:{0}:{1.0}:{0.0}"
core_edits.append(edit)
rxn_core.add(amap_num)
for atom in prod_mol.GetAtoms():
amap_num = atom.GetAtomMapNum()
if amap_num in rxn_core:
pass
else:
amap_num = atom.GetAtomMapNum()
Degree_prod = atom.GetDegree()
Degree_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetDegree()
if Degree_prod - Degree_reac == -1:
edit = f"{amap_num}:{0}:{1.0}:{0.0}"
core_edits.append(edit)
rxn_core.add(amap_num)
return core_edits
def find_reac_edit(frag_mols_1,reac_mols_1,core_edits):
reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()]
frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()]
lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num]
attach_map_num = 0
reac_edit = []
core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits]
for core_edit in core_edits:
core_edit_ = core_edit.split(':')
if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num:
attach_map_num = int(core_edit_[0])
elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num:
attach_map_num = int(core_edit_[0])
else:
continue
if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]:
continue
frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()}
reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()}
frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs()
reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs()
frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge()
reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge()
if lg_map_num != []:
for bond in reac_mols_1.GetBonds():
EndMapNum = bond.GetEndAtom().GetAtomMapNum()
BeginMapNum = bond.GetBeginAtom().GetAtomMapNum()
if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num):
reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0))
elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num):
reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0))
elif lg_map_num == []:
if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1):
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0))
if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0))
if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0))
if (reac_attach_charge - frag_attach_charge) == -1:
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0))
if (reac_attach_charge - frag_attach_charge) == 1:
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0))
if (reac_attach_charge - frag_attach_charge) == 2:
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0) not in reac_edit:
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0))
return reac_edit
def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol):
lg_map_lis = []
prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()]
for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]):
reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits)
reac_edits_a = []
reac_edits_b = []
for reac_edit in reac_edits:
if reac_edit[:3] == '0:0':
reac_edits_a.append(reac_edit)
elif reac_edit[-7:] == '0.0:0.0':
reac_edits_a.append(reac_edit)
elif reac_edit[-10:] == '0:0.0:-1.0':
reac_edits_a.append(reac_edit)
elif reac_edit[-9:] == '0:0.0:1.0':
reac_edits_a.append(reac_edit)
elif reac_edit[-9:] == '0:0.0:2.0':
reac_edits_a.append(reac_edit)
else:
reac_edits_b.append(reac_edit)
for reac_edit in reac_edits_a:
if reac_edit[:3] == '0:0':
pass
elif reac_edit[-7:] == '0.0:0.0':
pass
elif reac_edit[-10:] == '0:0.0:-1.0':
edit_map_num_lis = reac_edit.split(':')[:2]
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis]
lg_smiles = '-1'
lg_map_lis.append((lg_smiles,attach_map_num_1))
elif reac_edit[-9:] == '0:0.0:1.0':
edit_map_num_lis = reac_edit.split(':')[:2]
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis]
lg_smiles = '1'
lg_map_lis.append((lg_smiles,attach_map_num_1))
elif reac_edit[-9:] == '0:0.0:2.0':
edit_map_num_lis = reac_edit.split(':')[:2]
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis]
lg_smiles = '2'
lg_map_lis.append((lg_smiles,attach_map_num_1))
frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0]
reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b)
reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False)
reac_edit_added = []
for reac_frag_mol in reac_frag_mols[:]:
reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0]
if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis):
pass
else:
attach_map_num_1 = []
for reac_edit in reac_edits:
if reac_edit in reac_edit_added:
continue
else:
pass
b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1])
if e in reac_frag_map_num_lis and b in frag_1_map_num_lis:
for atom in reac_frag_mol.GetAtoms():
if atom.GetAtomMapNum() == int(e):
atom.SetAtomMapNum(500+atom.GetAtomMapNum())
break
else:
pass
reac_edit_added.append(reac_edit)
if len(attach_map_num_1) == 1:
if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: #上一个合成子上的连接点和本离去基团的连接点配对
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]):
attach_map_num_1 = [b] + attach_map_num_1
else:
attach_map_num_1.append(b)
else:
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]):
attach_map_num_1.append(b)
else:
attach_map_num_1 = [b] + attach_map_num_1
elif len(attach_map_num_1) == 0:
attach_map_num_1.append(b)
else:
pass
if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1:
break
lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True)
lg = Chem.MolFromSmiles(lg_smiles)
Chem.Kekulize(lg)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() >= 500:
atom.SetAtomMapNum(1)
pass
else:
atom.SetAtomMapNum(0)
lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True)
if attach_map_num_1 != []:
lg_map_lis.append((lg_smiles,attach_map_num_1))
return lg_map_lis
def get_chai_edit_mine(reac_mol, prod_mol):
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()}
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()}
reac_mol_= copy.deepcopy(reac_mol)
prod_mol_= copy.deepcopy(prod_mol)
for atom in reac_mol_.GetAtoms():
atom.SetAtomMapNum(0)
for atom in prod_mol_.GetAtoms():
atom.SetAtomMapNum(0)
reac_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(reac_mol_)),includeUnassigned=True):
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1]))
prod_ChiralCenters = []
for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol_)),includeUnassigned=True):
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1]))
dict_reac_ChiralCenters = dict(reac_ChiralCenters)
dict_prod_ChiralCenters = dict(prod_ChiralCenters)
chai_edits = []
for amap_num,chiral in dict_prod_ChiralCenters.items():
if amap_num in dict_reac_ChiralCenters.keys():
if chiral != dict_reac_ChiralCenters[amap_num]:
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}"
chai_edits.append(edit)
else:
pass
for amap_num,chiral in dict_reac_ChiralCenters.items():
if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?':
edit = f"{amap_num}:{0}:{0}:{chiral}"
chai_edits.append(edit)
return chai_edits
def get_original_chair_edit(p,b):
b = copy.deepcopy(b)
for atom in b.GetAtoms():
atom.SetAtomMapNum(0)
b_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(b)),includeUnassigned=True))
temp_p = Chem.MolFromSmiles(p)
for atom in temp_p.GetAtoms():
atom.SetAtomMapNum(0)
temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True))
out = []
for i,j in temp_dic.items():
if i in b_dic:
out.append('{}:0:0:{}'.format(i+1,j))
return out
def apply_chirality_change(prod_mol,chai_edits):
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()}
prod_mol = copy.deepcopy(prod_mol)
for chai_edit in chai_edits:
amap = int(chai_edit.split(':')[0])
if chai_edit[-2:] == ':R':
atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap])
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
temp_mol_dic = get_chair_dict_without_atom_map(prod_mol)
if temp_mol_dic[atom.GetIdx()] == 'R':
pass
else:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW)
elif chai_edit[-2:] == ':S':
atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap])
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
temp_mol_dic = get_chair_dict_without_atom_map(prod_mol)
if temp_mol_dic[atom.GetIdx()] == 'S':
pass
else:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW)
temp_mol_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol)),includeUnassigned=True))
elif chai_edit[-2:] == ':?':
atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap])
atom.SetChiralTag(Chem.ChiralType.CHI_UNSPECIFIED)
return prod_mol
def get_chair_dict_without_atom_map(temp_p):
temp_p = copy.deepcopy(temp_p)
for atom in temp_p.GetAtoms():
atom.SetAtomMapNum(0)
temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True))
return temp_dic
def run_get_p_b_l(rxn_smi):
try:
r, p = rxn_smi.split(">>")
if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150:
print('error type 3')
return 'error type 3'
else:
pass
r,p = cano_smiles_map(r),cano_smiles_map(p)
reac_mol, prod_mol = align_kekule_pairs(r, p)
reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False)
reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True)
reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp)
if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)):
pass
else:
r_k = get_kekule_aligned_r(r,p)
if count_kekule_d(r_k,p) == 0:
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p)
Chem.Kekulize(reac_mol)
Chem.Kekulize(prod_mol)
else:
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p)
Chem.Kekulize(reac_mol)
Chem.Kekulize(prod_mol)
core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)]
core_edits = [i for i in core_edits if i not in core_edits_add]
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)]
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)]
chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p))
stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p))
charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits)
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p))
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p))
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b)
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c)
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add)
frag_mol = remove_s_H(frag_mol)
reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False)
frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False)
if len(reac_mols) != len(frag_mols):
frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]']
else:
pass
if len(reac_mols) != len(frag_mols):
frag_mols = [frag_mol]
else:
pass
if len(reac_mols) == len(frag_mols):
reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols)
else:
print('error type 0')
lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol)
lg_map_lis = []
for lg, map_ in lg_map_lis_temp:
lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_)
map_new = []
if lg.count(':') > 1:
lg = Chem.MolFromSmiles(lg)
Chem.Kekulize(lg)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 0:
map_new.append('*')
else:
map_new.append(map_.pop(0))
lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True)
rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False))
map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)])
map_new = [i for i in map_new if i != '*']
lg_map_lis.append((lg_smiles,map_new))
else:
lg_map_lis.append((lg, map_ ))
total_mol = frag_mol
for lg_smile,map_nums in lg_map_lis[:]:
if lg_smile not in ['-1.0','1.0','2.0']:
lg = Chem.MolFromSmiles(lg_smile)
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()]
max_map = max(total_mol_map_num_lis)
count = 1
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 1:
atom.SetAtomMapNum(max_map + count)
count += 1
else:
pass
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()]
max_map = max(total_mol_map_num_lis)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 0:
atom.SetAtomMapNum(max_map + count)
count += 1
else:
pass
total_mol = Chem.CombineMols(total_mol,lg)
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()}
new_mol = Chem.RWMol(total_mol)
is_multi_bond = 0
for idx in range(len(map_nums)):
map_num = map_nums[idx]
if lg_smile.count(':') == len(map_nums):
lg_map = max_map + 1 + idx
atom = total_mol.GetAtomWithIdx(amap[lg_map])
is_multi_bond = 0
else:
lg_map = max_map + 1
atom = total_mol.GetAtomWithIdx(amap[lg_map])
is_multi_bond= 1
if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 3.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 3.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0:
bond_float = 3.0
else:
bond_float = 1.0
new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float])
total_mol = new_mol.GetMol()
else:
map_num = map_nums[0]
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()}
atom = total_mol.GetAtomWithIdx(amap[map_num])
atom.SetNumRadicalElectrons(0)
atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile)))
total_mol = correct_mol_1(total_mol,is_nitrine_c = True)
b = correct_mol(total_mol,keep_map = True)
b_Chiral_dic = get_atom_map_chai_dic(b)
b_Stereo_dic = get_atom_map_stereo_dic(b)
dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()])
act = 0
for b_map,Chiral in b_Chiral_dic.items():
if b_map not in o_p_Chiral_dic.keys():
pass
elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]:
act =1
atom = b.GetAtomWithIdx(dic_map_idx[b_map])
if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW)
elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
if act == 1:
pass
for b_map,Stereo in b_Stereo_dic.items():
if b_map not in o_p_Stereo_dic.keys():
pass
elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]:
bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]])
bond.SetStereo(o_p_Stereo_dic[b_map])
b = apply_charge_change(b,charge_edits)
if chai_edits == []:
o_chai_edits = get_original_chair_edit(p,b)
b = apply_chirality_change(b,o_chai_edits)
else:
b = apply_chirality_change(b,chai_edits)
b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False))
b = apply_stereo_change(b,stereo_edits)
for atom in b.GetAtoms():
atom.SetAtomMapNum(0)
for bond in b.GetBonds():
if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE:
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY)
else:
pass
pre_smiles = Chem.MolToSmiles(b)
pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C')
pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles))
reac_mol = Chem.MolFromSmiles(r)
for atom in reac_mol.GetAtoms():
atom.SetAtomMapNum(0)
reac_mol_smiles = Chem.MolToSmiles(reac_mol)
reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles))
if [float(i[-3:]) for i in core_edits_add] == []:
max_add = 0
elif max([float(i[-3:]) for i in core_edits_add]) == 1:
max_add = 1
else:
max_add = 2
charges = [int(i[-1]) for i in charge_edits] + [0]
if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1:
return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis])
else:
print(pre_smiles,reac_mol_smiles,chai_edits,stereo_edits)
return 'error type 1'
except:
print('error type 2')
return 'error type 2'
def run_get_p_b_l_forward(rxn_smi):
try:
r, p = rxn_smi.split(">>")
if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150:
return 'error type 1'
else:
pass
r,p = cano_smiles_map(r),cano_smiles_map(p)
reac_mol, prod_mol = align_kekule_pairs(r, p)
reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False)
reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True)
reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp)
if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)):
pass
else:
r_k = get_kekule_aligned_r(r,p)
if count_kekule_d(r_k,p) == 0:
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p)
Chem.Kekulize(reac_mol)
Chem.Kekulize(prod_mol)
else:
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p)
Chem.Kekulize(reac_mol)
Chem.Kekulize(prod_mol)
core_edits= get_core_edit_mine(reac_mol,prod_mol)
core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)]
core_edits = [i for i in core_edits if i not in core_edits_add]
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)]
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)]
chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p))
stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p))
charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits)
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p))
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p))
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b)
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c)
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add)
frag_mol = remove_s_H(frag_mol)
reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False)
frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False)
if len(reac_mols) != len(frag_mols):
frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]']
else:
pass
if len(reac_mols) != len(frag_mols):
frag_mols = [frag_mol]
else:
pass
if len(reac_mols) == len(frag_mols):
reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols)
else:
pass
lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol)
lg_map_lis = []
for lg, map_ in lg_map_lis_temp:
lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_)
map_new = []
if lg.count(':') > 1:
lg = Chem.MolFromSmiles(lg)
Chem.Kekulize(lg)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 0:
map_new.append('*')
else:
map_new.append(map_.pop(0))
lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True)
rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False))
map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)])
map_new = [i for i in map_new if i != '*']
lg_map_lis.append((lg_smiles,map_new))
else:
lg_map_lis.append((lg, map_ ))
return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis])
except:
return 'error type 2'
def run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis):
prod_mol = Chem.MolFromSmiles(p)
core_edits = [i for i in core_edits if i not in core_edits_add]
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)]
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)]
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) #
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p))
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b)
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c)
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add)
frag_mol = remove_s_H(frag_mol)
total_mol = frag_mol
for lg_smile,map_nums in lg_map_lis[:]:
if lg_smile not in ['-1','1','2']:
lg = Chem.MolFromSmiles(lg_smile)
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()]
max_map = max(total_mol_map_num_lis)
count = 1
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 1:
atom.SetAtomMapNum(max_map + count)
count += 1
else:
pass
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()]
max_map = max(total_mol_map_num_lis)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 0:
atom.SetAtomMapNum(max_map + count)
count += 1
else:
pass
total_mol = Chem.CombineMols(total_mol,lg)
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()}
new_mol = Chem.RWMol(total_mol)
is_multi_bond = 0
for idx in range(len(map_nums)):
map_num = map_nums[idx]
if lg_smile.count(':') == len(map_nums):
lg_map = max_map + 1 + idx
atom = total_mol.GetAtomWithIdx(amap[lg_map])
is_multi_bond = 0
else:
lg_map = max_map + 1
atom = total_mol.GetAtomWithIdx(amap[lg_map])
is_multi_bond= 1
if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 3.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 3.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0:
bond_float = 3.0
else:
bond_float = 1.0
new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float])
total_mol = new_mol.GetMol()
else:
map_num = map_nums[0]
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()}
atom = total_mol.GetAtomWithIdx(amap[map_num])
atom.SetNumRadicalElectrons(0)
atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile)))
total_mol = correct_mol_1(total_mol,is_nitrine_c = True)
b = correct_mol(total_mol,keep_map = True)
b_Chiral_dic = get_atom_map_chai_dic(b)
b_Stereo_dic = get_atom_map_stereo_dic(b)
dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()])
act = 0
for b_map,Chiral in b_Chiral_dic.items():
if b_map not in o_p_Chiral_dic.keys():
pass
elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]:
act =1
atom = b.GetAtomWithIdx(dic_map_idx[b_map])
if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW)
elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
if act == 1:
pass
for b_map,Stereo in b_Stereo_dic.items():
if b_map not in o_p_Stereo_dic.keys():
pass
elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]:
bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]])
bond.SetStereo(o_p_Stereo_dic[b_map])
b = apply_charge_change(b,charge_edits)
if chai_edits == []:
o_chai_edits = get_original_chair_edit(p,b)
b = apply_chirality_change(b,o_chai_edits)
else:
b = apply_chirality_change(b,chai_edits)
b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False))
b = apply_stereo_change(b,stereo_edits)
for atom in b.GetAtoms():
atom.SetAtomMapNum(0)
for bond in b.GetBonds():
if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE:
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY)
else:
pass
pre_smiles = Chem.MolToSmiles(b)
pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C')
pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles))
return pre_smiles
def run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis):
prod_mol = Chem.MolFromSmiles(p)
core_edits = [i for i in core_edits if i not in core_edits_add]
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)]
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)]
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) #
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p))
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b)
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c)
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add)
frag_mol = remove_s_H(frag_mol)
total_mol = frag_mol
for lg_smile,map_nums in lg_map_lis[:]:
if lg_smile not in ['-1','1','2']:
lg = Chem.MolFromSmiles(lg_smile)
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()]
max_map = max(total_mol_map_num_lis)
count = 1
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 1:
atom.SetAtomMapNum(max_map + count)
count += 1
else:
pass
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()]
max_map = max(total_mol_map_num_lis)
for atom in lg.GetAtoms():
if atom.GetAtomMapNum() == 0:
atom.SetAtomMapNum(max_map + count)
count += 1
else:
pass
total_mol = Chem.CombineMols(total_mol,lg)
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()}
new_mol = Chem.RWMol(total_mol)
is_multi_bond = 0
for idx in range(len(map_nums)):
map_num = map_nums[idx]
if lg_smile.count(':') == len(map_nums):
lg_map = max_map + 1 + idx
atom = total_mol.GetAtomWithIdx(amap[lg_map])
is_multi_bond = 0
else:
lg_map = max_map + 1
atom = total_mol.GetAtomWithIdx(amap[lg_map])
is_multi_bond= 1
if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0:
bond_float = 2.0
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 3.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0:
bond_float = 3.0
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0:
bond_float = 3.0
else:
bond_float = 1.0
new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float])
total_mol = new_mol.GetMol()
else:
map_num = map_nums[0]
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()}
atom = total_mol.GetAtomWithIdx(amap[map_num])
atom.SetNumRadicalElectrons(0)
atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile)))
total_mol = correct_mol_1(total_mol,is_nitrine_c = True)
b = correct_mol(total_mol,keep_map = True)
b_Chiral_dic = get_atom_map_chai_dic(b)
b_Stereo_dic = get_atom_map_stereo_dic(b)
dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()])
act = 0
for b_map,Chiral in b_Chiral_dic.items():
if b_map not in o_p_Chiral_dic.keys():
pass
elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]:
act =1
atom = b.GetAtomWithIdx(dic_map_idx[b_map])
if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW)
elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW:
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW)
if act == 1:
pass
for b_map,Stereo in b_Stereo_dic.items():
if b_map not in o_p_Stereo_dic.keys():
pass
elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]:
bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]])
bond.SetStereo(o_p_Stereo_dic[b_map])
b = apply_charge_change(b,charge_edits)
if chai_edits == []:
o_chai_edits = get_original_chair_edit(p,b)
b = apply_chirality_change(b,o_chai_edits)
else:
b = apply_chirality_change(b,chai_edits)
b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False))
b = apply_stereo_change(b,stereo_edits)
# for atom in b.GetAtoms():
# atom.SetAtomMapNum(0)
for bond in b.GetBonds():
if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE:
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY)
else:
pass
pre_smiles = Chem.MolToSmiles(b)
# pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C')
# pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles))
return pre_smiles
def run_get_p_b_l_check(rxn):
try:
p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = run_get_p_b_l_forward(rxn)
except:
return 'error type 3'
try:
pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) # 加个5
except:
return 'error type 5'
r = rxn.split('>>')[0]
reac_mol = Chem.MolFromSmiles(r)
for atom in reac_mol.GetAtoms():
atom.SetAtomMapNum(0)
reac_mol_smiles = Chem.MolToSmiles(reac_mol)
reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles))
if [float(i[-3:]) for i in core_edits_add] == []:
max_add = 0
elif max([float(i[-3:]) for i in core_edits_add]) == 1:
max_add = 1
else:
max_add = 2
charges = [int(i[-1]) for i in charge_edits] + [0]
if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1:
return p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis
else:
return 'error type 4'
def get_atom_pair_bond_idx_dic(concise_smiles):
mol_indigo = indigo.loadMolecule(concise_smiles)
mol_block_indigo = mol_indigo.molfile()
mol = Chem.MolFromSmiles(concise_smiles,sanitize = False)
atom_num = len(mol.GetAtoms())
bond_num = len(mol.GetBonds())
mol_block_lis = mol_block_indigo.split('\n')
bond_line_lis = mol_block_lis[4+atom_num:4+atom_num+bond_num]
atom_pair_bond_idx_dic = {}
count = 0
for bond_line in bond_line_lis:
s_atom = int(bond_line[:3])
e_atom = int(bond_line[3:6])
min_atom = min((s_atom,e_atom))
max_atom = max((s_atom,e_atom))
atom_pair_bond_idx_dic[(min_atom,max_atom)] = count
count += 1
return atom_pair_bond_idx_dic
def get_rm_token_lis(concise_smiles,detailed_smiles):
detailed_smiles_length = len(detailed_smiles)
idx = 0
rm_token_lis = []
for _ in range(len(detailed_smiles)):
if detailed_smiles[idx] != concise_smiles[idx]:
rm_token_lis.append(detailed_smiles[idx])
detailed_smiles = detailed_smiles[:idx] + detailed_smiles[idx+1:]
else:
idx += 1
rm_token_lis.append(' ')
if detailed_smiles == concise_smiles and len(rm_token_lis) == detailed_smiles_length:
return rm_token_lis
else:
print('error')
pass
def get_bond_token_lis(detailed_smiles):
bond_token_lis = []
for i in range(len(detailed_smiles)):
if detailed_smiles[i] in ['-','=','#',':','/','\\'] and detailed_smiles[i+1] != ']':
bond_token_lis.append(detailed_smiles[i])
else:
bond_token_lis.append(' ')
pass
return bond_token_lis
def get_bond_token_idx_dic(bond_token_lis):
bond_token_idx_dic = {}
bond_idx = 0
token_idx = 0
for i in bond_token_lis:
token_idx += 1
if i != ' ':
bond_idx += 1
else:
pass
bond_token_idx_dic[bond_idx] = token_idx
return bond_token_idx_dic
def rerank_special_bond(mol_block_indigo_lis,bond_idx):
mol = Chem.MolFromMolBlock('\n'.join(mol_block_indigo_lis),removeHs = False)
q = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][:3]
h = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][3:6]
mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx] = h + q + mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][6:]
return mol_block_indigo_lis
def get_caption_r(caption):
words = re.findall(r'[{](.*?)[}]', caption)
words = ['{' + i + '}' for i in words ]
caption_r = caption
count = 400
for i in words:
count += 1
caption_r = caption_r.replace(i,'[{}Au]'.format(count),1)
return caption_r,words
def get_b_smiles_detailed_smiles(caption_r,smiles):
b_smiles = caption_r
b_smiles = b_smiles.replace('/','/-').replace('\\','\\-')
b_smiles = b_smiles.replace('-!','!').replace('-?','?')
mol_tmp = Chem.MolFromSmiles(smiles,sanitize = False)
detailed_smiles = Chem.MolToSmiles(mol_tmp,canonical = False,allBondsExplicit = True)
detailed_smiles = detailed_smiles.replace('/','/-').replace('\\','\\-') #
for i in range(len(detailed_smiles)):
if detailed_smiles[i] != b_smiles[i]:
if b_smiles[i] in ['!','_',';','^','&','{','}','。','《','》']:
pass
else:
b_smiles = b_smiles[:i] + detailed_smiles[i] + b_smiles[i:]
else:
pass
return b_smiles,detailed_smiles
def get_bond_dic(b_smiles,detailed_smiles):
b_smiles = b_smiles.replace('-]',']')
detailed_smiles = detailed_smiles.replace('-]',']')
count = 0
bond_dic = {}
for i,j in zip(detailed_smiles,b_smiles):
if i != j:
bond_dic[count] = j
if i in ['-','=','#',':']:
count += 1
return bond_dic
def get_t_smiles(e_smiles,o_smiles):
e_smiles_r = e_smiles.replace('!','-').replace('_','-').replace(';','-').replace('^','-').replace('&','=').replace('{','=').replace('}','=').replace('。','=').replace('《','=').replace('》','=')
mol_r = Chem.MolFromSmiles(e_smiles_r,sanitize = False)
a = Chem.MolFromSmiles(o_smiles,sanitize = False)
for atom in a.GetAtoms():
atom.SetAtomMapNum(0)
for atom in mol_r.GetAtoms():
if atom.GetIsotope() != 0:
a.GetAtomWithIdx(atom.GetIdx()).SetIsotope(atom.GetIsotope())
t_smiles = Chem.MolToSmiles(a,canonical = False)
return t_smiles
def get_b_smiles(p_b):
o_smiles = p_b[0]
core_edits = p_b[1]
chai_edits = p_b[2]
stereo_edits = p_b[3]
charge_edits = p_b[4]
core_edits_add = p_b[5]
atom_idx_mark_dic = {}
for edit in core_edits:
b = int(edit.split(':')[0])
e = int(edit.split(':')[1])
new_b = edit.split(':')[3]
if min([b,e]) == 0:
atom_map = max([b,e])
if new_b == '0.0':
atom_idx_mark_dic[atom_map] = 9
else:
pass
for edit in chai_edits:
edit_l = edit.split(':')
if edit_l[3] == 'R':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 10
else:
atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])]
elif edit_l[3] == 'S':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 20
else:
atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])]
elif edit_l[3] == '?':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 30
else:
atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])]
for edit in charge_edits:
edit_l = edit.split(':')
if edit_l[3] == '1':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 200
else:
atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])]
pass
elif edit_l[3] == '0':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 400
else:
atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])]
elif edit_l[3] == '-1':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 600
else:
atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])]
for edit in core_edits_add:
edit_l = edit.split(':')
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 100
else:
atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])]
if int(edit_l[1]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[1])] = 100
else:
atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])]
a = Chem.MolFromSmiles(o_smiles,sanitize = False)
for atom in a.GetAtoms():
if atom.GetAtomMapNum() in atom_idx_mark_dic.keys():
atom_map = atom.GetAtomMapNum()
atom.SetIsotope(atom_idx_mark_dic[atom_map])
else:
pass
atom.SetAtomMapNum(0)
mol = copy.deepcopy(a)
detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True)
concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True)
concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True)
atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality)
rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles)
bond_token_lis = get_bond_token_lis(detailed_smiles)
bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis)
bond_idx_mark_dic = {}
for edit in core_edits:
b = int(edit.split(':')[0])
e = int(edit.split(':')[1])
org_b = edit.split(':')[2]
new_b = edit.split(':')[3]
if min([b,e]) != 0:
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])]
if new_b == '0.0':
mark = '!'
elif new_b == '1.0':
mark = '_'
elif new_b == '2.0':
mark = ';'
elif new_b == '3.0':
mark = '^'
bond_idx_mark_dic[bond_idx] = mark
else:
pass
for edit in stereo_edits:
b = int(edit.split(':')[0])
e = int(edit.split(':')[1])
new_b = edit.split(':')[3]
if min([b,e]) != 0:
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])]
if bond_idx not in bond_idx_mark_dic.keys():
if new_b == 'a':
mark = '&'
elif new_b == 'e':
mark = '{'
elif new_b == 'z':
mark = '}'
bond_idx_mark_dic[bond_idx] = mark
else:
bond_idx in bond_idx_mark_dic.keys()
if new_b == 'a':
mark = '。'
elif new_b == 'e':
mark = '《'
elif new_b == 'z':
mark = '》'
bond_idx_mark_dic[bond_idx] = mark
else:
pass
for bond_idx,mark in bond_idx_mark_dic.items():
token_idx = bond_token_idx_dic[bond_idx]
rm_token_lis[token_idx] = mark
new_smiles_lis = []
for i in range(len(rm_token_lis)):
if rm_token_lis[i] == ' ':
new_smiles_lis.append(detailed_smiles[i])
elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']:
new_smiles_lis.append(rm_token_lis[i])
else:
pass
caption = ''.join(new_smiles_lis)
out_b_smiles_lis.append(caption)
caption_r = caption
t_smiles = get_t_smiles(caption_r,o_smiles)
b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles)
bond_dic = get_bond_dic(b_smiles,detailed_smiles)
atom_pair_bond_idx = {}
for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items():
atom_pair_bond_idx[bond_idx] = atom_pair
mol = Chem.MolFromSmiles(t_smiles)
Chem.Kekulize(mol)
core_edits_ = []
chai_edits_ = []
stereo_edits_ = []
charge_edits_ = []
core_edits_add_ = []
for bond_idx,mark in bond_dic.items():
b,e = atom_pair_bond_idx[bond_idx]
o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble()
if mark == '!':
n_bond = '0.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == '_':
n_bond = '1.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == ';':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == '^':
n_bond = '3.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == '&':
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a'))
elif mark == '{':
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e'))
elif mark == '}':
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z'))
elif mark == '。':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a'))
elif mark == '《':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e'))
elif mark == '》':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z'))
core_edits_add_atom_lis = []
for atom in mol.GetAtoms():
Isotope = atom.GetIsotope()
g_w = Isotope % 10
s_w = Isotope % 100 //10
b_w = Isotope // 100
if g_w == 9:
core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0'))
else:
pass
if s_w == 1:
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R'))
elif s_w == 2:
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S'))
elif s_w == 3:
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?'))
if b_w == 2 or b_w == 3:
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1))
elif b_w == 4 or b_w == 5:
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0))
elif b_w == 6 or b_w == 7:
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1))
if b_w % 2 == 1:
core_edits_add_atom_lis.append(atom.GetIdx()+1)
if core_edits_add_atom_lis != []:
core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0'))
else:
pass
if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add):
print(core_edits_,core_edits)
print(chai_edits_,chai_edits)
print(core_edits_add_,core_edits_add)
return 'error'
else:
return caption
pass
def get_b_smiles_forward(p_b):
o_smiles = p_b[0]
core_edits = p_b[1]
chai_edits = p_b[2]
stereo_edits = p_b[3]
charge_edits = p_b[4]
core_edits_add = p_b[5]
atom_idx_mark_dic = {}
for edit in core_edits:
b = int(edit.split(':')[0])
e = int(edit.split(':')[1])
new_b = edit.split(':')[3]
if min([b,e]) == 0:
atom_map = max([b,e])
if new_b == '0.0':
atom_idx_mark_dic[atom_map] = 9
else:
pass
for edit in chai_edits:
edit_l = edit.split(':')
if edit_l[3] == 'R':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 10
else:
atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])]
elif edit_l[3] == 'S':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 20
else:
atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])]
elif edit_l[3] == '?':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 30
else:
atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])]
for edit in charge_edits:
edit_l = edit.split(':')
if edit_l[3] == '1':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 200
else:
atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])]
pass
elif edit_l[3] == '0':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 400
else:
atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])]
elif edit_l[3] == '-1':
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 600
else:
atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])]
for edit in core_edits_add:
edit_l = edit.split(':')
if int(edit_l[0]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[0])] = 100
else:
atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])]
if int(edit_l[1]) not in atom_idx_mark_dic.keys():
atom_idx_mark_dic[int(edit_l[1])] = 100
else:
atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])]
a = Chem.MolFromSmiles(o_smiles,sanitize = False)
for atom in a.GetAtoms():
if atom.GetAtomMapNum() in atom_idx_mark_dic.keys():
atom_map = atom.GetAtomMapNum()
atom.SetIsotope(atom_idx_mark_dic[atom_map])
else:
pass
atom.SetAtomMapNum(0)
mol = copy.deepcopy(a)
detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True)
concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True)
concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True)
atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality)
rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles)
bond_token_lis = get_bond_token_lis(detailed_smiles)
bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis)
bond_idx_mark_dic = {}
for edit in core_edits:
b = int(edit.split(':')[0])
e = int(edit.split(':')[1])
org_b = edit.split(':')[2]
new_b = edit.split(':')[3]
if min([b,e]) != 0:
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])]
if new_b == '0.0':
mark = '!'
elif new_b == '1.0':
mark = '_'
elif new_b == '2.0':
mark = ';'
elif new_b == '3.0':
mark = '^'
bond_idx_mark_dic[bond_idx] = mark
else:
pass
for edit in stereo_edits:
b = int(edit.split(':')[0])
e = int(edit.split(':')[1])
new_b = edit.split(':')[3]
if min([b,e]) != 0:
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])]
if bond_idx not in bond_idx_mark_dic.keys():
if new_b == 'a':
mark = '&'
elif new_b == 'e':
mark = '{'
elif new_b == 'z':
mark = '}'
bond_idx_mark_dic[bond_idx] = mark
else:
bond_idx in bond_idx_mark_dic.keys()
if new_b == 'a':
mark = '。'
elif new_b == 'e':
mark = '《'
elif new_b == 'z':
mark = '》'
bond_idx_mark_dic[bond_idx] = mark
else:
pass
for bond_idx,mark in bond_idx_mark_dic.items():
token_idx = bond_token_idx_dic[bond_idx]
rm_token_lis[token_idx] = mark
new_smiles_lis = []
for i in range(len(rm_token_lis)):
if rm_token_lis[i] == ' ':
new_smiles_lis.append(detailed_smiles[i])
elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']:
new_smiles_lis.append(rm_token_lis[i])
else:
pass
return ''.join(new_smiles_lis)
def get_b_smiles_backward(caption_r,o_smiles):
t_smiles = get_t_smiles(caption_r,o_smiles)
b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles)
bond_dic = get_bond_dic(b_smiles,detailed_smiles)
atom_pair_bond_idx = {}
for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items():
atom_pair_bond_idx[bond_idx] = atom_pair
mol = Chem.MolFromSmiles(t_smiles)
Chem.Kekulize(mol)
core_edits_ = []
chai_edits_ = []
stereo_edits_ = []
charge_edits_ = []
core_edits_add_ = []
for bond_idx,mark in bond_dic.items():
b,e = atom_pair_bond_idx[bond_idx]
o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble()
if mark == '!':
n_bond = '0.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == '_':
n_bond = '1.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == ';':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == '^':
n_bond = '3.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
elif mark == '&':
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a'))
elif mark == '{':
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e'))
elif mark == '}':
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z'))
elif mark == '。':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) #any
elif mark == '《':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e'))
elif mark == '》':
n_bond = '2.0'
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond))
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z'))
core_edits_add_atom_lis = []
for atom in mol.GetAtoms():
Isotope = atom.GetIsotope()
g_w = Isotope % 10
s_w = Isotope % 100 //10
b_w = Isotope // 100
if g_w == 9:
core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0'))
else:
pass
if s_w == 1:
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R'))
elif s_w == 2:
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S'))
elif s_w == 3:
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?'))
if b_w == 2 or b_w == 3:
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1))
elif b_w == 4 or b_w == 5:
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0))
elif b_w == 6 or b_w == 7:
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1))
if b_w % 2 == 1:
core_edits_add_atom_lis.append(atom.GetIdx()+1)
if core_edits_add_atom_lis != []:
core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0'))
else:
pass
return core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_
def get_b_smiles_check(p_b):
p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = p_b
b_smiles = get_b_smiles_forward(p_b)
core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ = get_b_smiles_backward(b_smiles,p_b[0])
if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add):
print(core_edits_,core_edits)
print(chai_edits_,chai_edits)
print(core_edits_add_,core_edits_add)
return 'error'
else:
return b_smiles
import re
def replacenth(string, sub, wanted, n):
where = [m.start() for m in re.finditer(sub, string)][n-1]
before = string[:where]
after = string[where:]
after = after.replace(sub, wanted, 1)
newString = before + after
return newString
def cano_smiles_map(smiles):
atom_map_lis = []
mol = Chem.MolFromSmiles(smiles,sanitize = False)
for atom in mol.GetAtoms():
atom_map_lis.append(atom.GetAtomMapNum())
atom.SetAtomMapNum(0)
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True)
mol = Chem.MolFromSmiles(smiles,sanitize = False)
for atom in mol.GetAtoms():
atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()])
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True)
return smiles
def get_lg_forward(core_edits,lg_map):
attach_idx = []
for core_edit in core_edits:
core_edit = core_edit.split(':')
if float(core_edit[2])-float(core_edit[3]) > 0:
attach_idx.append(int(core_edit[0]))
attach_idx.append(int(core_edit[1]))
attach_idx = sorted(list(set(attach_idx)))
attach_idx = [i for i in attach_idx if i != 0]
lg_lis = [()]*len(attach_idx)
for lg,map_lis in lg_map:
if len(map_lis) == 1:
map_ = map_lis[0]
id_ = attach_idx.index(map_)
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg])
elif len(map_lis) != 1 and len(set(map_lis)) == 1:
map_ = map_lis[0]
id_ = attach_idx.index(map_)
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg])
elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 1:
for map_ in map_lis:
id_ = attach_idx.index(map_)
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg + "*"])
elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 2:
if map_lis[0]<map_lis[1]:
lg = replacenth(lg, ':1',':2',2)
else:
lg = lg.replace(':1',':2',1)
for map_ in map_lis:
id_ = attach_idx.index(map_)
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg + "*"])
else:
print('error')
return [tuple(sorted(i)) for i in lg_lis]
def get_lg_backward(core_edits_,lg_lis):
attach_idx = []
for core_edit in core_edits_:
core_edit = core_edit.split(':')
if float(core_edit[2])-float(core_edit[3]) > 0:
attach_idx.append(int(core_edit[0]))
attach_idx.append(int(core_edit[1]))
attach_idx = [i for i in attach_idx if i != 0]
attach_idx = sorted(list(set(attach_idx)))
lg_map_new = []
for id_,lg_ in zip(attach_idx,lg_lis):
for lg in list(lg_):
if lg.count(':') > 1:
lg_map_new.append((lg,[id_]*lg.count(':')))
else:
lg_map_new.append((lg,[id_]))
dic_t = {}
for i,j in lg_map_new:
if '*' in i:
dic_t.setdefault(i,[]).append(j[0])
else:
pass
lg_map_new_k =[]
for i,j in lg_map_new:
if '*' not in i:
lg_map_new_k.append((i,j))
else:
pass
for i,j in dic_t.items():
if ':2' not in i:
lg_map_new_k.append((i.replace('*',''),j))
elif i.index(':1') <= i.index(':2'):
lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j))
else:
j.reverse()
lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j))
lg_map_new = lg_map_new_k
return lg_map_new
dic_str_to_num = {}
for l in range(4,0,-1):
for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']):
for b,j in zip([0,10,20,30],['','r','s','?']):
for c,k in zip([0,9],['','~']):
if len(k+j+i) == l:
dic_str_to_num[k+j+i] = str(a+b+c)
dic_num_to_str = {}
for l in range(3,0,-1):
for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']):
for b,j in zip([0,10,20,30],['','r','s','?']):
for c,k in zip([0,9],['','~']):
if len(str(a+b+c)) == l and len(k+j+i) != 0:
dic_num_to_str[str(a+b+c)] = k+j+i
def iso_to_symbo(txt,dic_num_to_str):
for i,j in dic_num_to_str.items():
i = '[' + i
j = '[' + j
txt = txt.replace(i,j)
txt = txt.replace('。',';&').replace('》',';}').replace('《',';{')
return txt
def symbo_to_iso(txt,dic_str_to_num):
for i,j in dic_str_to_num.items():
i = '[' + i
j = '[' + j
txt = txt.replace(i,j)
txt = txt.replace(';&','。').replace(';}','》').replace(';{','《')
return txt
def merge_smiles_only(text):
text = symbo_to_iso(text,dic_str_to_num)
o_smiles = text.split('>>>')[0]
b_smiles = text.split('>>>')[1].split('<')[0]
lg_lis = []
for i in re.findall(r"[<](.*?)[>]", text):
if i == '':
lg_lis.append(tuple())
else:
lg_lis.append(tuple(i.split(',')))
core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles)
lg_map_lis = get_lg_backward(core_edits,lg_lis)
p = Chem.MolFromSmiles(o_smiles,sanitize = False)
for atom in p.GetAtoms():
atom.SetAtomMapNum(atom.GetIdx()+1)
p = Chem.MolToSmiles(p)
pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis)
return pre_smiles
def merge_smiles_with_mapping_only(text):
text = symbo_to_iso(text,dic_str_to_num)
o_smiles = text.split('>>>')[0]
b_smiles = text.split('>>>')[1].split('<')[0]
lg_lis = []
for i in re.findall(r"[<](.*?)[>]", text):
if i == '':
lg_lis.append(tuple())
else:
lg_lis.append(tuple(i.split(',')))
core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles)
lg_map_lis = get_lg_backward(core_edits,lg_lis)
p = Chem.MolFromSmiles(o_smiles,sanitize = False)
for atom in p.GetAtoms():
atom.SetAtomMapNum(atom.GetIdx()+1)
p = Chem.MolToSmiles(p)
pre_smiles = run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis)
return pre_smiles
def merge_smiles(text):
try:
return merge_smiles_only(text)
except:
return ""
def merge_smiles_with_mapping(text):
try:
return merge_smiles_with_mapping_only(text)
except:
return ""
def get_e_smiles(rxn):
p_b = run_get_p_b_l_forward(rxn)
b_smiles = get_b_smiles_check(p_b)
lg_lis = get_lg_forward(p_b[1],p_b[6])
k = p_b
b = b_smiles
c = lg_lis
a = Chem.MolFromSmiles(k[0],sanitize = False)
for atom in a.GetAtoms():
atom.SetAtomMapNum(0)
a = Chem.MolToSmiles(a,canonical = False)
str_ = ''
for i in c:
str_ = str_ + '<{}>'.format(','.join(i))
txt = a +'>>>'+ b+str_
return iso_to_symbo(txt,dic_num_to_str)
def get_e_smiles_with_check(rxn):
p_b = run_get_p_b_l_check(rxn)
b_smiles = get_b_smiles_check(p_b)
lg_lis = get_lg_forward(p_b[1],p_b[6])
k = p_b
b = b_smiles
c = lg_lis
a = Chem.MolFromSmiles(k[0],sanitize = False)
for atom in a.GetAtoms():
atom.SetAtomMapNum(0)
a = Chem.MolToSmiles(a,canonical = False)
str_ = ''
for i in c:
str_ = str_ + '<{}>'.format(','.join(i))
txt = a +'>>>'+ b+str_
return iso_to_symbo(txt,dic_num_to_str)
def get_edit_from_e_smiles(text):
text = symbo_to_iso(text,dic_str_to_num)
o_smiles = text.split('>>>')[0]
b_smiles = text.split('>>>')[1].split('<')[0]
lg_lis = []
for i in re.findall(r"[<](.*?)[>]", text):
if i == '':
lg_lis.append(tuple())
else:
lg_lis.append(tuple(i.split(',')))
core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles)
lg_map_lis = get_lg_backward(core_edits,lg_lis)
return core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis