diff --git "a/e_smiles.py" "b/e_smiles.py" new file mode 100644--- /dev/null +++ "b/e_smiles.py" @@ -0,0 +1,3284 @@ +import numpy as np +from rdkit import Chem +import os +import sys +import copy +import re +from typing import List, Any +from indigo import * +indigo = Indigo() +import rdkit +from rdkit import RDLogger +RDLogger.DisableLog('rdApp.*') +print('rdkit version:' + rdkit.__version__) + + +BOND_TYPES = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, \ + Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC] +BOND_FLOAT_TO_TYPE = { + 0.0: BOND_TYPES[0], + 1.0: BOND_TYPES[1], + 2.0: BOND_TYPES[2], + 3.0: BOND_TYPES[3], + 1.5: BOND_TYPES[4],} + + + +def get_bond_info(mol: Chem.Mol): + """Get information on bonds in the molecule. + + Parameters + ---------- + mol: Chem.Mol + Molecule + """ + if mol is None: + return {} + + bond_info = {} + for bond in mol.GetBonds(): + a_start = bond.GetBeginAtom().GetAtomMapNum() + a_end = bond.GetEndAtom().GetAtomMapNum() + key_pair = sorted([a_start, a_end]) + bond_info[tuple(key_pair)] = [bond.GetBondTypeAsDouble(), bond.GetIdx()] + + return bond_info + + +def map_reac_and_frag(reac_mols: List[Chem.Mol], frag_mols: List[Chem.Mol]): + """Aligns reactant and fragment mols by computing atom map overlaps. + + Parameters + ---------- + reac_mols: List[Chem.Mol], + List of reactant mols + frag_mols: List[Chem.Mol], + List of fragment mols + """ + if len(reac_mols) != len(frag_mols): + return reac_mols, frag_mols + reac_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in reac_mols] + frag_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in frag_mols] + + overlaps = {i: [] for i in range(len(frag_mols))} + for i, fmap in enumerate(frag_maps): + overlaps[i].extend([len(set(fmap).intersection(set(rmap))) for rmap in reac_maps]) + overlaps[i] = overlaps[i].index(max(overlaps[i])) + + new_frag = [Chem.Mol(mol) for mol in frag_mols] + new_reac = [Chem.Mol(reac_mols[overlaps[i]]) for i in overlaps] + return new_reac, new_frag + + +def remove_s_H(frag_mol): + while True: + idx = '' + for atom in frag_mol.GetAtoms(): + if atom.GetAtomicNum() == 1 and atom.GetDegree() == 0: + idx= atom.GetIdx() + + if idx != '' : + edit_mol = Chem.RWMol(frag_mol) + edit_mol.RemoveAtom(idx) + frag_mol = edit_mol.GetMol() + else: + break + + return frag_mol + + + +def apply_edits_to_mol_change(mol, edits): + """Apply edits to molecular graph. + + Parameters + ---------- + mol: Chem.Mol, + RDKit mol object + edits: Iterable[str], + Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where + a1, a2 are atom maps of participating atoms and b1, b2 are previous and + new bond orders. When a2 = 0, we update the hydrogen count. + """ + new_mol = Chem.RWMol(mol) + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} + + for edit in edits: + x, y, prev_bo, new_bo = edit.split(":") + x, y = int(x), int(y) + new_bo = float(new_bo) + + bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y]) + + if new_bo > 0: + if bond is not None: + new_mol.RemoveBond(amap[x],amap[y]) + new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo]) + atom_x,atom_y = new_mol.GetAtomWithIdx(amap[x]),new_mol.GetAtomWithIdx(amap[y]) + + try: + atom_x.SetNumExplicitHs(int(atom_x.GetNumExplicitHs()+ float(prev_bo)-float(new_bo))) + except: + atom_x.SetNumExplicitHs(0) + try: + atom_y.SetNumExplicitHs(int(atom_y.GetNumExplicitHs()+ float(prev_bo)-float(new_bo))) + except: + atom_y.SetNumExplicitHs(0) + + pred_mol = new_mol.GetMol() + return pred_mol + + +def apply_edits_to_mol_break(mol, edits): + """Apply edits to molecular graph. + + Parameters + ---------- + mol: Chem.Mol, + RDKit mol object + edits: Iterable[str], + Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where + a1, a2 are atom maps of participating atoms and b1, b2 are previous and + new bond orders. When a2 = 0, we update the hydrogen count. + """ + mol = Chem.AddHs(mol) + Chem.Kekulize(mol) + for atom in mol.GetAtoms(): + atom.SetNoImplicit(True) + new_mol = Chem.RWMol(mol) + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} + + + for edit in edits: + x, y, prev_bo, new_bo = edit.split(":") + x, y = int(x), int(y) + new_bo = float(new_bo) + + if y == 0: + cent_atom = mol.GetAtomWithIdx(amap[x]) + for neibor in cent_atom.GetNeighbors(): + if neibor.GetAtomicNum() == 1: + new_mol.RemoveBond(amap[x],neibor.GetIdx()) + break + else: + pass + + elif y != 0: + bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y]) + if bond is not None: + new_mol.RemoveBond(amap[x],amap[y]) + + pred_mol = new_mol.GetMol() + pred_mol = Chem.RemoveHs(pred_mol,sanitize = False) + + return pred_mol + + + +def find_reac_edit(frag_mols_1,reac_mols_1,core_edits): + reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()] + frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()] + lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num] + attach_map_num = 0 + + reac_edit = [] + + + core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits] + + + for core_edit in core_edits: + core_edit_ = core_edit.split(':') + if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num: + attach_map_num = int(core_edit_[0]) + elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num: + attach_map_num = int(core_edit_[0]) + + + else: + continue + + if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]: + continue + + + frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()} + reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()} + + frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs() + reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs() + + frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge() + reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge() + + + if lg_map_num != []: + for bond in reac_mols_1.GetBonds(): + EndMapNum = bond.GetEndAtom().GetAtomMapNum() + BeginMapNum = bond.GetBeginAtom().GetAtomMapNum() + + if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num): + reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0)) + elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num): + + reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0)) + + + + + elif lg_map_num == []: + + + if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1): + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0)) + if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0)) + if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0)) + + if (reac_attach_charge - frag_attach_charge) == -1: + if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0)) + + if (reac_attach_charge - frag_attach_charge) == 1: + if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0)) + + + return reac_edit + + + + + +def correct_mol_1(mol,is_nitrine_c): + mol = copy.deepcopy(mol) + for atom in mol.GetAtoms(): + + if is_nitrine_c == True and atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷 + atom.SetFormalCharge(1) + else: + pass + + atom.SetNumRadicalElectrons(0) + atom.SetIsAromatic(False) + atom.SetNoImplicit(False) + + return mol + + +def correct_mol(mol_,keep_map): + + mol = copy.deepcopy(mol_) + atom_map_lis = [] + idx_H_dic = {} + + for atom in mol.GetAtoms(): + atom_map_lis.append(atom.GetAtomMapNum()) + + for atom in mol.GetAtoms(): + if atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷 + pass + elif atom.GetAtomicNum() == 15 and atom.GetExplicitValence() == 5 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷 + idx_H_dic[atom.GetIdx()] = atom.GetNumExplicitHs() + else: + pass + atom.SetNumRadicalElectrons(0) + atom.SetNoImplicit(False) + atom.SetAtomMapNum(0) + + for atom in mol.GetAtoms(): + atom.SetIsAromatic(False) + + + temp = Chem.MolToMolBlock(mol,kekulize = True) + mol = Chem.MolFromMolBlock(temp,removeHs = False,sanitize= False) + + + + if keep_map: + for i in range(0,mol.GetNumAtoms()): + + mol.GetAtomWithIdx(i).SetAtomMapNum(atom_map_lis[i]) + if i in idx_H_dic.keys(): + + mol.GetAtomWithIdx(i).SetNoImplicit(True) + mol.GetAtomWithIdx(i).SetNumExplicitHs(idx_H_dic[i]) + + + for i in range(0,mol.GetNumAtoms()): + mol.GetAtomWithIdx(i).SetChiralTag(mol_.GetAtomWithIdx(i).GetChiralTag()) + + + n_Chirals = Chem.FindMolChiralCenters(mol) + + return mol + + +def get_atom_map_chai_dic(mol): + dic = {} + for idx,chiral in Chem.FindMolChiralCenters(mol): + atom_map = mol.GetAtomWithIdx(idx).GetAtomMapNum() + dic[atom_map] = chiral + return dic + + +def get_atom_map_stereo_dic(mol): + map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in mol.GetAtoms()} + stereo_dic = {} + for bond in mol.GetBonds(): + b_map,e_map = map_a[bond.GetBeginAtomIdx()],map_a[bond.GetEndAtomIdx()] + stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() + return stereo_dic + + +def cano_smiles_map(smiles): + atom_map_lis = [] + mol = Chem.MolFromSmiles(smiles,sanitize = False) + for atom in mol.GetAtoms(): + atom_map_lis.append(atom.GetAtomMapNum()) + atom.SetAtomMapNum(0) + smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) + mol = Chem.MolFromSmiles(smiles,sanitize = False) + for atom in mol.GetAtoms(): + atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()]) + smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) + return smiles + + + +def get_stereo_edit_mine(reac_mol,prod_mol): + + reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} + prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} + + + for atom in reac_mol.GetAtoms(): + atom.SetAtomMapNum(0) + + + r_rank = list(Chem.CanonicalRankAtoms(reac_mol, breakTies=False)) + r_idx = [i for i in range(reac_mol.GetNumAtoms())] + dic_idx_rank = dict(zip(r_idx,r_rank)) + + + p_stereo_dic = {} + for bond in prod_mol.GetBonds(): + b_map,e_map = prod_map_a[bond.GetBeginAtomIdx()],prod_map_a[bond.GetEndAtomIdx()] + p_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() + + r_stereo_dic = {} + for bond in reac_mol.GetBonds(): + if bond.GetBondTypeAsDouble() == 2.0: + + b_atom,e_atom = bond.GetBeginAtom(),bond.GetEndAtom() + + b_neis = b_atom.GetNeighbors() + b_neis = [i for i in b_neis if i.GetIdx() != e_atom.GetIdx()] + b_neis_rank = [dic_idx_rank[i.GetIdx()] for i in b_neis] + + e_neis = e_atom.GetNeighbors() + e_neis = [i for i in e_neis if i.GetIdx() != b_atom.GetIdx()] + e_neis_rank = [dic_idx_rank[i.GetIdx()] for i in e_neis] + + + b_neis_rank = b_neis_rank + ['H'] * (2 - len(b_neis_rank)) + e_neis_rank = e_neis_rank + ['H'] * (2 - len(e_neis_rank)) + + if len(b_neis_rank) == len(set(b_neis_rank)) and len(e_neis_rank) == len(set(e_neis_rank)): + + b_map,e_map = reac_map_a[bond.GetBeginAtomIdx()],reac_map_a[bond.GetEndAtomIdx()] + r_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() + else: + pass + + stereo_edits = [] + for atom_pair,stereo in r_stereo_dic.items(): + if atom_pair in p_stereo_dic.keys() and stereo != p_stereo_dic[atom_pair]: + if stereo == Chem.rdchem.BondStereo.STEREONONE: + stereo = 'a' + elif stereo == Chem.rdchem.BondStereo.STEREOE: + stereo = 'e' + elif stereo == Chem.rdchem.BondStereo.STEREOZ: + stereo = 'z' + stereo_edits.append('{}:{}:{}:{}'.format(atom_pair[0],atom_pair[1],0,stereo)) + return stereo_edits + + + +def apply_stereo_change(prod_mol,stereo_edits): + p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} + + prod_mol = copy.deepcopy(prod_mol) + + + prod_mol_t = copy.deepcopy(prod_mol) + + for stereo_edit in stereo_edits: + + b_map = int(stereo_edit.split(':')[0]) + e_map = int(stereo_edit.split(':')[1]) + + b_n = prod_mol.GetAtomWithIdx(p_amap_idx[b_map]).GetNeighbors() + b_n = [i.GetAtomMapNum() for i in b_n] + b_n = [i for i in b_n if i not in [b_map,e_map]] + + e_n = prod_mol.GetAtomWithIdx(p_amap_idx[e_map]).GetNeighbors() + e_n = [i.GetAtomMapNum() for i in e_n] + e_n = [i for i in e_n if i not in [b_map,e_map]] + + + + f_b_n = b_n[0] + m_cip_rank = 0 + for i in b_n[:]: + + c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank')) + + if c_cip_rank >= m_cip_rank: + f_b_n = i + m_cip_rank = c_cip_rank + + + f_e_n = e_n[0] + m_cip_rank = 0 + for i in e_n[:]: + + c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank')) + + if c_cip_rank >= m_cip_rank: + f_e_n = i + m_cip_rank = c_cip_rank + + + if stereo_edit[-2:] == ':e': + + bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) + bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) + + + try: + bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n]) + except: + bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n]) + + + + + if stereo_edit[-2:] == ':z': + bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) + bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) + try: + bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n]) + except: + bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n]) + + + elif stereo_edit[-2:] == ':a': + bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) + bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) + + return prod_mol + + +def add_Cl(mol): + add_Cl_atom_idx = [] + for atom in mol.GetAtoms(): + Double_O_count = 0 + if atom.GetAtomicNum() == 16 and sorted([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == [1,2,2]: + neibors = atom.GetNeighbors() + for neibor in neibors: + if neibor.GetAtomicNum() == 8: + bond = mol.GetBondBetweenAtoms(atom.GetIdx(),neibor.GetIdx()) + if bond.GetBondTypeAsDouble() == 2: + Double_O_count += 1 + else: + pass + else: + pass + if Double_O_count == 2: + add_Cl_atom_idx.append(atom.GetIdx()) + + if len(add_Cl_atom_idx) == 1: + map_lis = [i.GetAtomMapNum() for i in mol.GetAtoms()] + mw = Chem.RWMol(mol) + mw.AddAtom(Chem.Atom(17)) + + mw.GetAtomWithIdx(len(map_lis)).SetAtomMapNum(max(map_lis)+1) + mw.AddBond(add_Cl_atom_idx[0],len(map_lis), BOND_FLOAT_TO_TYPE[1]) + mol = mw.GetMol() + + return mol + + + +def neu_sulf_charge(mol): + + for atom in mol.GetAtoms(): + if atom.GetAtomicNum() == 8 and atom.GetFormalCharge() == -1: + + neibors = atom.GetNeighbors() + if len(neibors) == 1 and neibors[0].GetAtomicNum() == 16 and neibors[0].GetExplicitValence() == 4: + atom.SetFormalCharge(0) + else: + pass + + return mol + + + +def align_kekule_pairs(r: str, p: str) : + """Aligns kekule pairs to ensure unchanged bonds have same bond order in + previously aromatic rings. + + Parameters + ---------- + r: str, + SMILES string representing the reactants + p: str, + SMILES string representing the product + """ + reac_mol = Chem.MolFromSmiles(r) + max_amap = max([atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()]) + for atom in reac_mol.GetAtoms(): + if atom.GetAtomMapNum() == 0: + atom.SetAtomMapNum(max_amap + 1) + max_amap = max_amap + 1 + + prod_mol = Chem.MolFromSmiles(p) + + prod_prev = get_bond_info(prod_mol) + Chem.Kekulize(prod_mol) + prod_new = get_bond_info(prod_mol) + + reac_prev = get_bond_info(reac_mol) + Chem.Kekulize(reac_mol) + reac_new = get_bond_info(reac_mol) + + + reac_edit = {} + for bond in prod_new: + if bond in reac_new and (prod_prev[bond][0] == reac_prev[bond][0]): + if reac_new[bond][0] != prod_new[bond][0] or reac_prev[bond][0] == 1.5: + reac_new[bond][0] = prod_new[bond][0] + reac_edit[bond] = reac_new[bond] + + + + reac_mol = Chem.RWMol(reac_mol) + amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} + + for bond in reac_edit: + idx1, idx2 = amap_idx[bond[0]], amap_idx[bond[1]] + bo = reac_new[bond][0] + reac_mol.RemoveBond(idx1, idx2) + reac_mol.AddBond(idx1, idx2, BOND_FLOAT_TO_TYPE[bo]) + + return reac_mol.GetMol(), prod_mol + + +def count_kekule_d(r,p): + prod_mol = Chem.MolFromSmiles(p) + prod_s = get_bond_info(prod_mol) + + prod_mol = Chem.MolFromSmiles(p,sanitize = False) + prod_k = get_bond_info(prod_mol) + + reac_mol = Chem.MolFromSmiles(r) + reac_s = get_bond_info(reac_mol) + + reac_mol = Chem.MolFromSmiles(r,sanitize = False) + reac_k = get_bond_info(reac_mol) + + d_count = 0 + for pair in reac_s.keys(): + if pair in prod_s.keys(): + if reac_s[pair][0] == prod_s[pair][0]: + if reac_k[pair][0] != prod_k[pair][0]: + d_count += 1 + + return d_count + + +def get_kekule_aligned_r(r,p): + if count_kekule_d(r,p) == 0: + return r + else: + + min_r_s_lis = [] + for r_s in r.split('.'): + + min_count = 1000 + min_r_s = '' + + mol = Chem.MolFromSmiles(r_s) + suppl = Chem.ResonanceMolSupplier(mol, Chem.KEKULE_ALL) + + for i in range(len(suppl)): + r_s = Chem.MolToSmiles(suppl[i],kekuleSmiles = True) + count = count_kekule_d(r_s,p) + if count <= min_count: + min_r_s = r_s + min_count = count + + min_r_s_lis.append(min_r_s) + + return '.'.join(min_r_s_lis) + + +def apply_edits_to_mol_connect(mol, edits): + """Apply edits to molecular graph. + + Parameters + ---------- + mol: Chem.Mol, + RDKit mol object + edits: Iterable[str], + Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where + a1, a2 are atom maps of participating atoms and b1, b2 are previous and + new bond orders. When a2 = 0, we update the hydrogen count. + """ + new_mol = Chem.RWMol(mol) + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} + + + for edit in edits: + x, y, prev_bo, new_bo = edit.split(":") + x, y = int(x), int(y) + new_bo = float(new_bo) + + + new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo]) + + pred_mol = new_mol.GetMol() + + return pred_mol + + +def get_charge_edit_mine(reac_mol, prod_mol,core_edits): + + lg_site_lis = [] + for core_edit in core_edits: + x,y,bo,n_bo = core_edit.split(':') + if float(bo) - float(n_bo) > 0: + lg_site_lis.append(int(x)) + lg_site_lis.append(int(y)) + lg_site_lis = [i for i in lg_site_lis if i != 0] + + dict_reac_charges = {} + for atom in reac_mol.GetAtoms(): + dict_reac_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge() + + dict_prod_charges = {} + for atom in prod_mol.GetAtoms(): + dict_prod_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge() + + charge_edits = [] + for atom_map, charge in dict_prod_charges.items(): + if atom_map in dict_reac_charges.keys(): + if dict_reac_charges[atom_map] != charge and atom_map not in lg_site_lis: + edit = f"{atom_map}:{0}:{0}:{dict_reac_charges[atom_map]}" + charge_edits.append(edit) + + return charge_edits + + + + +def get_atom_map_charge_dic(mol): + dic = {} + for atom in mol.GetAtoms(): + dic[atom.GetAtomMapNum()] = atom.GetFormalCharge() + return dic + + +def apply_charge_change(mol,charge_edits): + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in mol.GetAtoms()} + for edit in charge_edits: + x, y, prev_charge, new_charge = edit.split(":") + mol.GetAtomWithIdx(amap[int(x)]).SetFormalCharge(int(new_charge)) + return mol + + +def get_core_edit_mine(reac_mol, prod_mol): + + prod_bonds = get_bond_info(prod_mol) + reac_bonds = get_bond_info(reac_mol) + + rxn_core_break = set() + rxn_core_lack = set() + rxn_core = set() + core_edits = [] + + p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} + reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} + + for bond in prod_bonds: + if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]: + a_start, a_end = bond + prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0] + + a_start, a_end = sorted([a_start, a_end]) + edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" + core_edits.append(edit) + rxn_core.update([a_start, a_end]) + + if bond not in reac_bonds: + a_start, a_end = bond + reac_bo = 0.0 + prod_bo = prod_bonds[bond][0] + + start, end = sorted([a_start, a_end]) + edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" + core_edits.append(edit) + rxn_core.update([a_start, a_end]) + rxn_core_break.update([a_start, a_end]) + + for bond in reac_bonds: + if bond not in prod_bonds: + amap1, amap2 = bond + rxn_core_lack.update([amap1, amap2]) + if (amap1 in p_amap_idx) and (amap2 in p_amap_idx): + a_start, a_end = sorted([amap1, amap2]) + reac_bo = reac_bonds[bond][0] + edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}" + core_edits.append(edit) + rxn_core.update([a_start, a_end]) + + + if True: + reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} + for atom in prod_mol.GetAtoms(): + amap_num = atom.GetAtomMapNum() + if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack): + pass + else: + amap_num = atom.GetAtomMapNum() + numHs_prod = atom.GetTotalNumHs() + numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs() + if numHs_prod != numHs_reac: + edit = f"{amap_num}:{0}:{1.0}:{0.0}" + core_edits.append(edit) + rxn_core.add(amap_num) + + + return core_edits + + + +def get_chai_edit_mine(reac_mol, prod_mol): + reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} + prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} + + reac_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): + reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) + + prod_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): + prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) + + dict_reac_ChiralCenters = dict(reac_ChiralCenters) + dict_prod_ChiralCenters = dict(prod_ChiralCenters) + + + chai_edits = [] + + for amap_num,chiral in dict_prod_ChiralCenters.items(): + if amap_num in dict_reac_ChiralCenters.keys(): + if chiral != dict_reac_ChiralCenters[amap_num]: + edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" + chai_edits.append(edit) + else: + pass + + for amap_num,chiral in dict_reac_ChiralCenters.items(): + if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': + edit = f"{amap_num}:{0}:{0}:{chiral}" + chai_edits.append(edit) + + return chai_edits + + + + + +def get_chai_edit_mine(reac_mol, prod_mol): + reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} + prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} + + reac_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): + reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) + + prod_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): + prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) + + dict_reac_ChiralCenters = dict(reac_ChiralCenters) + dict_prod_ChiralCenters = dict(prod_ChiralCenters) + + + chai_edits = [] + + for amap_num,chiral in dict_prod_ChiralCenters.items(): + if amap_num in dict_reac_ChiralCenters.keys(): + if chiral != dict_reac_ChiralCenters[amap_num]: + edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" + chai_edits.append(edit) + else: + pass + + for amap_num,chiral in dict_reac_ChiralCenters.items(): + if (amap_num not in dict_prod_ChiralCenters.keys())and (amap_num in prod_map_a.values()): + edit = f"{amap_num}:{0}:{0}:{chiral}" + + chai_edits.append(edit) + + return chai_edits + + + +def get_chai_edit_mine(reac_mol, prod_mol): + reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} + prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} + + reac_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): + reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) + + prod_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): + prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) + + dict_reac_ChiralCenters = dict(reac_ChiralCenters) + dict_prod_ChiralCenters = dict(prod_ChiralCenters) + + + chai_edits = [] + + for amap_num,chiral in dict_prod_ChiralCenters.items(): + if amap_num in dict_reac_ChiralCenters.keys(): + if chiral != dict_reac_ChiralCenters[amap_num]: + edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" + chai_edits.append(edit) + else: + pass + + for amap_num,chiral in dict_reac_ChiralCenters.items(): + if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': + edit = f"{amap_num}:{0}:{0}:{chiral}" + chai_edits.append(edit) + + return chai_edits + + + + +def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol): + + lg_map_lis = [] + prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()] + + for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]): + reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits) + + + reac_edits_a = [] + reac_edits_b = [] + for reac_edit in reac_edits: + if reac_edit[:3] == '0:0': + reac_edits_a.append(reac_edit) + elif reac_edit[-7:] == '0.0:0.0': + reac_edits_a.append(reac_edit) + elif reac_edit[-10:] == '0:0.0:-1.0': + reac_edits_a.append(reac_edit) + elif reac_edit[-9:] == '0:0.0:1.0': + reac_edits_a.append(reac_edit) + else: + reac_edits_b.append(reac_edit) + + + for reac_edit in reac_edits_a: + if reac_edit[:3] == '0:0': + pass + elif reac_edit[-7:] == '0.0:0.0': + pass + elif reac_edit[-10:] == '0:0.0:-1.0': + edit_map_num_lis = reac_edit.split(':')[:2] + attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] + lg_smiles = '-1.0' + lg_map_lis.append((lg_smiles,attach_map_num_1)) + elif reac_edit[-9:] == '0:0.0:1.0': + edit_map_num_lis = reac_edit.split(':')[:2] + attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] + lg_smiles = '1.0' + lg_map_lis.append((lg_smiles,attach_map_num_1)) + + + frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0] + reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b) + reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False) + + + reac_edit_added = [] + for reac_frag_mol in reac_frag_mols[:]: + + reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0] + + if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis): + pass + else: + attach_map_num_1 = [] + for reac_edit in reac_edits: + if reac_edit in reac_edit_added: + continue + else: + pass + + + b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1]) + if e in reac_frag_map_num_lis and b in frag_1_map_num_lis: + + for atom in reac_frag_mol.GetAtoms(): + if atom.GetAtomMapNum() == int(e): + atom.SetAtomMapNum(500+atom.GetAtomMapNum()) + break + else: + pass + reac_edit_added.append(reac_edit) + + + + if len(attach_map_num_1) == 1: + + if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: #上一个合成子上的连接点和本离去基团的连接点配对 + if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): + attach_map_num_1 = [b] + attach_map_num_1 + else: + attach_map_num_1.append(b) + else: + if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): + attach_map_num_1.append(b) + else: + attach_map_num_1 = [b] + attach_map_num_1 + elif len(attach_map_num_1) == 0: + attach_map_num_1.append(b) + + + else: + pass + + if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1: + break + + + lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True) + lg = Chem.MolFromSmiles(lg_smiles) + Chem.Kekulize(lg) + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() >= 500: + atom.SetAtomMapNum(1) + pass + else: + atom.SetAtomMapNum(0) + lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True) + + if attach_map_num_1 != []: + lg_map_lis.append((lg_smiles,attach_map_num_1)) + + return lg_map_lis + + + + + + +def get_core_edit_mine(reac_mol, prod_mol): + + prod_bonds = get_bond_info(prod_mol) + reac_bonds = get_bond_info(reac_mol) + + rxn_core_break = set() + rxn_core_lack = set() + rxn_core = set() + core_edits = [] + + p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} + reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} + + for bond in prod_bonds: + if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]: + a_start, a_end = bond + prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0] + + a_start, a_end = sorted([a_start, a_end]) + edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" + core_edits.append(edit) + rxn_core.update([a_start, a_end]) + + if bond not in reac_bonds: + a_start, a_end = bond + reac_bo = 0.0 + prod_bo = prod_bonds[bond][0] + + start, end = sorted([a_start, a_end]) + edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" + core_edits.append(edit) + rxn_core.update([a_start, a_end]) + rxn_core_break.update([a_start, a_end]) + + for bond in reac_bonds: + if bond not in prod_bonds: + amap1, amap2 = bond + rxn_core_lack.update([amap1, amap2]) + if (amap1 in p_amap_idx) and (amap2 in p_amap_idx): + a_start, a_end = sorted([amap1, amap2]) + reac_bo = reac_bonds[bond][0] + edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}" + core_edits.append(edit) + rxn_core.update([a_start, a_end]) + + + + reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} + + for atom in prod_mol.GetAtoms(): + amap_num = atom.GetAtomMapNum() + if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack): + pass + else: + amap_num = atom.GetAtomMapNum() + numHs_prod = atom.GetTotalNumHs() + numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs() + if numHs_prod != numHs_reac: + edit = f"{amap_num}:{0}:{1.0}:{0.0}" + core_edits.append(edit) + rxn_core.add(amap_num) + + + for atom in prod_mol.GetAtoms(): + amap_num = atom.GetAtomMapNum() + if amap_num in rxn_core: + pass + else: + amap_num = atom.GetAtomMapNum() + Degree_prod = atom.GetDegree() + Degree_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetDegree() + + if Degree_prod - Degree_reac == -1: + edit = f"{amap_num}:{0}:{1.0}:{0.0}" + core_edits.append(edit) + rxn_core.add(amap_num) + + + + + + return core_edits + + + +def find_reac_edit(frag_mols_1,reac_mols_1,core_edits): + reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()] + frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()] + lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num] + attach_map_num = 0 + + reac_edit = [] + + core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits] + + + for core_edit in core_edits: + core_edit_ = core_edit.split(':') + + if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num: + attach_map_num = int(core_edit_[0]) + elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num: + attach_map_num = int(core_edit_[0]) + + + else: + continue + + if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]: + continue + + + frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()} + reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()} + + frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs() + reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs() + + frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge() + reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge() + + + if lg_map_num != []: + for bond in reac_mols_1.GetBonds(): + EndMapNum = bond.GetEndAtom().GetAtomMapNum() + BeginMapNum = bond.GetBeginAtom().GetAtomMapNum() + if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num): + reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0)) + elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num): + reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0)) + + + + elif lg_map_num == []: + + + if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1): + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0)) + if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0)) + if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0)) + + if (reac_attach_charge - frag_attach_charge) == -1: + if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0)) + + if (reac_attach_charge - frag_attach_charge) == 1: + if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0)) + + + if (reac_attach_charge - frag_attach_charge) == 2: + if "{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0) not in reac_edit: + reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0)) + + + + return reac_edit + + + + +def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol): + + lg_map_lis = [] + prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()] + + for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]): + reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits) + + + reac_edits_a = [] + reac_edits_b = [] + for reac_edit in reac_edits: + if reac_edit[:3] == '0:0': + reac_edits_a.append(reac_edit) + elif reac_edit[-7:] == '0.0:0.0': + reac_edits_a.append(reac_edit) + elif reac_edit[-10:] == '0:0.0:-1.0': + reac_edits_a.append(reac_edit) + elif reac_edit[-9:] == '0:0.0:1.0': + reac_edits_a.append(reac_edit) + + elif reac_edit[-9:] == '0:0.0:2.0': + reac_edits_a.append(reac_edit) + + else: + reac_edits_b.append(reac_edit) + + + for reac_edit in reac_edits_a: + if reac_edit[:3] == '0:0': + pass + elif reac_edit[-7:] == '0.0:0.0': + pass + elif reac_edit[-10:] == '0:0.0:-1.0': + edit_map_num_lis = reac_edit.split(':')[:2] + attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] + lg_smiles = '-1' + lg_map_lis.append((lg_smiles,attach_map_num_1)) + elif reac_edit[-9:] == '0:0.0:1.0': + edit_map_num_lis = reac_edit.split(':')[:2] + attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] + lg_smiles = '1' + lg_map_lis.append((lg_smiles,attach_map_num_1)) + + elif reac_edit[-9:] == '0:0.0:2.0': + edit_map_num_lis = reac_edit.split(':')[:2] + attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] + lg_smiles = '2' + lg_map_lis.append((lg_smiles,attach_map_num_1)) + + + frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0] + reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b) + reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False) + + + reac_edit_added = [] + for reac_frag_mol in reac_frag_mols[:]: + + reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0] + + if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis): + pass + else: + attach_map_num_1 = [] + for reac_edit in reac_edits: + if reac_edit in reac_edit_added: + continue + else: + pass + + + b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1]) + if e in reac_frag_map_num_lis and b in frag_1_map_num_lis: + + for atom in reac_frag_mol.GetAtoms(): + if atom.GetAtomMapNum() == int(e): + atom.SetAtomMapNum(500+atom.GetAtomMapNum()) + break + else: + pass + reac_edit_added.append(reac_edit) + + + + if len(attach_map_num_1) == 1: + + if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: #上一个合成子上的连接点和本离去基团的连接点配对 + if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): + attach_map_num_1 = [b] + attach_map_num_1 + else: + attach_map_num_1.append(b) + else: + if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): + attach_map_num_1.append(b) + else: + attach_map_num_1 = [b] + attach_map_num_1 + elif len(attach_map_num_1) == 0: + attach_map_num_1.append(b) + + + else: + pass + + if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1: + break + + + lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True) + lg = Chem.MolFromSmiles(lg_smiles) + Chem.Kekulize(lg) + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() >= 500: + atom.SetAtomMapNum(1) + pass + else: + atom.SetAtomMapNum(0) + lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True) + + if attach_map_num_1 != []: + lg_map_lis.append((lg_smiles,attach_map_num_1)) + + return lg_map_lis + + + +def get_chai_edit_mine(reac_mol, prod_mol): + reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} + prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} + + reac_mol_= copy.deepcopy(reac_mol) + prod_mol_= copy.deepcopy(prod_mol) + + for atom in reac_mol_.GetAtoms(): + atom.SetAtomMapNum(0) + + for atom in prod_mol_.GetAtoms(): + atom.SetAtomMapNum(0) + + + reac_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(reac_mol_)),includeUnassigned=True): + reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) + + prod_ChiralCenters = [] + for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol_)),includeUnassigned=True): + prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) + + dict_reac_ChiralCenters = dict(reac_ChiralCenters) + dict_prod_ChiralCenters = dict(prod_ChiralCenters) + + + chai_edits = [] + + for amap_num,chiral in dict_prod_ChiralCenters.items(): + if amap_num in dict_reac_ChiralCenters.keys(): + if chiral != dict_reac_ChiralCenters[amap_num]: + edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" + chai_edits.append(edit) + else: + pass + + for amap_num,chiral in dict_reac_ChiralCenters.items(): + if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': + edit = f"{amap_num}:{0}:{0}:{chiral}" + chai_edits.append(edit) + + return chai_edits + + + +def get_original_chair_edit(p,b): + b = copy.deepcopy(b) + for atom in b.GetAtoms(): + atom.SetAtomMapNum(0) + b_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(b)),includeUnassigned=True)) + + temp_p = Chem.MolFromSmiles(p) + for atom in temp_p.GetAtoms(): + atom.SetAtomMapNum(0) + temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True)) + out = [] + for i,j in temp_dic.items(): + if i in b_dic: + out.append('{}:0:0:{}'.format(i+1,j)) + return out + + + + +def apply_chirality_change(prod_mol,chai_edits): + p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} + prod_mol = copy.deepcopy(prod_mol) + for chai_edit in chai_edits: + amap = int(chai_edit.split(':')[0]) + if chai_edit[-2:] == ':R': + atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) + temp_mol_dic = get_chair_dict_without_atom_map(prod_mol) + if temp_mol_dic[atom.GetIdx()] == 'R': + pass + else: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) + + elif chai_edit[-2:] == ':S': + atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) + temp_mol_dic = get_chair_dict_without_atom_map(prod_mol) + if temp_mol_dic[atom.GetIdx()] == 'S': + pass + else: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) + temp_mol_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol)),includeUnassigned=True)) + + + elif chai_edit[-2:] == ':?': + atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) + atom.SetChiralTag(Chem.ChiralType.CHI_UNSPECIFIED) + + return prod_mol + + + + +def get_chair_dict_without_atom_map(temp_p): + temp_p = copy.deepcopy(temp_p) + for atom in temp_p.GetAtoms(): + atom.SetAtomMapNum(0) + temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True)) + return temp_dic + + + +def run_get_p_b_l(rxn_smi): + try: + r, p = rxn_smi.split(">>") + + if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150: + print('error type 3') + return 'error type 3' + else: + pass + + + r,p = cano_smiles_map(r),cano_smiles_map(p) + + reac_mol, prod_mol = align_kekule_pairs(r, p) + reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False) + + + reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True) + reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp) + + if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)): + pass + else: + r_k = get_kekule_aligned_r(r,p) + if count_kekule_d(r_k,p) == 0: + reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) + Chem.Kekulize(reac_mol) + Chem.Kekulize(prod_mol) + else: + reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) + Chem.Kekulize(reac_mol) + Chem.Kekulize(prod_mol) + + + + core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)] + + core_edits = [i for i in core_edits if i not in core_edits_add] + + + edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] + edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] + + chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) + + stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) + + + + charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits) + + + o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) + o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) + + + frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) + frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) + + frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) + frag_mol = remove_s_H(frag_mol) + + + reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False) + frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False) + + if len(reac_mols) != len(frag_mols): + frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]'] + else: + pass + + if len(reac_mols) != len(frag_mols): + frag_mols = [frag_mol] + else: + pass + + + if len(reac_mols) == len(frag_mols): + reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols) + else: + print('error type 0') + + + lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol) + + lg_map_lis = [] + for lg, map_ in lg_map_lis_temp: + lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_) + map_new = [] + if lg.count(':') > 1: + lg = Chem.MolFromSmiles(lg) + Chem.Kekulize(lg) + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 0: + map_new.append('*') + else: + map_new.append(map_.pop(0)) + + lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True) + rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False)) + + + map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)]) + map_new = [i for i in map_new if i != '*'] + + lg_map_lis.append((lg_smiles,map_new)) + else: + lg_map_lis.append((lg, map_ )) + + + total_mol = frag_mol + + + + for lg_smile,map_nums in lg_map_lis[:]: + + if lg_smile not in ['-1.0','1.0','2.0']: + + lg = Chem.MolFromSmiles(lg_smile) + + total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] + max_map = max(total_mol_map_num_lis) + count = 1 + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 1: + atom.SetAtomMapNum(max_map + count) + count += 1 + else: + pass + + total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] + max_map = max(total_mol_map_num_lis) + + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 0: + atom.SetAtomMapNum(max_map + count) + count += 1 + else: + pass + + total_mol = Chem.CombineMols(total_mol,lg) + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} + new_mol = Chem.RWMol(total_mol) + + is_multi_bond = 0 + + for idx in range(len(map_nums)): + map_num = map_nums[idx] + if lg_smile.count(':') == len(map_nums): + lg_map = max_map + 1 + idx + atom = total_mol.GetAtomWithIdx(amap[lg_map]) + is_multi_bond = 0 + else: + lg_map = max_map + 1 + atom = total_mol.GetAtomWithIdx(amap[lg_map]) + is_multi_bond= 1 + + + + + if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + + + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 3.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 3.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: + bond_float = 3.0 + + + + + else: + + bond_float = 1.0 + + + new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) + total_mol = new_mol.GetMol() + + else: + + map_num = map_nums[0] + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} + atom = total_mol.GetAtomWithIdx(amap[map_num]) + atom.SetNumRadicalElectrons(0) + atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) + + + total_mol = correct_mol_1(total_mol,is_nitrine_c = True) + + b = correct_mol(total_mol,keep_map = True) + + b_Chiral_dic = get_atom_map_chai_dic(b) + b_Stereo_dic = get_atom_map_stereo_dic(b) + + dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) + + act = 0 + for b_map,Chiral in b_Chiral_dic.items(): + if b_map not in o_p_Chiral_dic.keys(): + pass + elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: + + act =1 + atom = b.GetAtomWithIdx(dic_map_idx[b_map]) + + if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) + elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) + + if act == 1: + pass + + + + + for b_map,Stereo in b_Stereo_dic.items(): + if b_map not in o_p_Stereo_dic.keys(): + pass + elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: + bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) + + bond.SetStereo(o_p_Stereo_dic[b_map]) + + b = apply_charge_change(b,charge_edits) + + if chai_edits == []: + o_chai_edits = get_original_chair_edit(p,b) + + + b = apply_chirality_change(b,o_chai_edits) + + else: + b = apply_chirality_change(b,chai_edits) + + + + b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) + + b = apply_stereo_change(b,stereo_edits) + + + + for atom in b.GetAtoms(): + atom.SetAtomMapNum(0) + + + for bond in b.GetBonds(): + + if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: + + bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) + else: + pass + + + pre_smiles = Chem.MolToSmiles(b) + + pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') + + pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) + + reac_mol = Chem.MolFromSmiles(r) + + for atom in reac_mol.GetAtoms(): + atom.SetAtomMapNum(0) + reac_mol_smiles = Chem.MolToSmiles(reac_mol) + + reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles)) + + + + + if [float(i[-3:]) for i in core_edits_add] == []: + max_add = 0 + elif max([float(i[-3:]) for i in core_edits_add]) == 1: + max_add = 1 + else: + max_add = 2 + + charges = [int(i[-1]) for i in charge_edits] + [0] + + if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1: + + return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis]) + else: + print(pre_smiles,reac_mol_smiles,chai_edits,stereo_edits) + return 'error type 1' + + + + + except: + print('error type 2') + return 'error type 2' + + + +def run_get_p_b_l_forward(rxn_smi): + try: + r, p = rxn_smi.split(">>") + + if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150: + + return 'error type 1' + else: + pass + + + r,p = cano_smiles_map(r),cano_smiles_map(p) + reac_mol, prod_mol = align_kekule_pairs(r, p) + reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False) + + + reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True) + reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp) + + if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)): + pass + else: + r_k = get_kekule_aligned_r(r,p) + if count_kekule_d(r_k,p) == 0: + reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) + Chem.Kekulize(reac_mol) + Chem.Kekulize(prod_mol) + else: + reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) + Chem.Kekulize(reac_mol) + Chem.Kekulize(prod_mol) + + + + + core_edits= get_core_edit_mine(reac_mol,prod_mol) + core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)] + core_edits = [i for i in core_edits if i not in core_edits_add] + + + edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] + edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] + + chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) + stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) + charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits) + + + o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) + o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) + + + frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) + frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) + + frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) + frag_mol = remove_s_H(frag_mol) + + + reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False) + frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False) + + if len(reac_mols) != len(frag_mols): + frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]'] + else: + pass + + if len(reac_mols) != len(frag_mols): + frag_mols = [frag_mol] + else: + pass + + + if len(reac_mols) == len(frag_mols): + reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols) + else: + + pass + + + + lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol) + + lg_map_lis = [] + for lg, map_ in lg_map_lis_temp: + lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_) + map_new = [] + if lg.count(':') > 1: + lg = Chem.MolFromSmiles(lg) + Chem.Kekulize(lg) + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 0: + map_new.append('*') + else: + map_new.append(map_.pop(0)) + + lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True) + rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False)) + map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)]) + map_new = [i for i in map_new if i != '*'] + + lg_map_lis.append((lg_smiles,map_new)) + else: + lg_map_lis.append((lg, map_ )) + + + + + return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis]) + + + + + except: + return 'error type 2' + + + +def run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis): + + prod_mol = Chem.MolFromSmiles(p) + + core_edits = [i for i in core_edits if i not in core_edits_add] + edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] + edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] + + + o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) # + o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) + + + frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) + frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) + + frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) + frag_mol = remove_s_H(frag_mol) + + + + total_mol = frag_mol + + + for lg_smile,map_nums in lg_map_lis[:]: + + if lg_smile not in ['-1','1','2']: + + lg = Chem.MolFromSmiles(lg_smile) + + total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] + max_map = max(total_mol_map_num_lis) + count = 1 + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 1: + atom.SetAtomMapNum(max_map + count) + count += 1 + else: + pass + + total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] + max_map = max(total_mol_map_num_lis) + + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 0: + atom.SetAtomMapNum(max_map + count) + count += 1 + else: + pass + + total_mol = Chem.CombineMols(total_mol,lg) + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} + new_mol = Chem.RWMol(total_mol) + + is_multi_bond = 0 + + for idx in range(len(map_nums)): + map_num = map_nums[idx] + if lg_smile.count(':') == len(map_nums): + lg_map = max_map + 1 + idx + atom = total_mol.GetAtomWithIdx(amap[lg_map]) + is_multi_bond = 0 + else: + lg_map = max_map + 1 + atom = total_mol.GetAtomWithIdx(amap[lg_map]) + is_multi_bond= 1 + + + if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + + + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 3.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 3.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: + bond_float = 3.0 + else: + + bond_float = 1.0 + + + new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) + total_mol = new_mol.GetMol() + + else: + + map_num = map_nums[0] + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} + atom = total_mol.GetAtomWithIdx(amap[map_num]) + atom.SetNumRadicalElectrons(0) + atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) + + + total_mol = correct_mol_1(total_mol,is_nitrine_c = True) + + b = correct_mol(total_mol,keep_map = True) + + b_Chiral_dic = get_atom_map_chai_dic(b) + b_Stereo_dic = get_atom_map_stereo_dic(b) + + dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) + + act = 0 + for b_map,Chiral in b_Chiral_dic.items(): + if b_map not in o_p_Chiral_dic.keys(): + pass + elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: + act =1 + atom = b.GetAtomWithIdx(dic_map_idx[b_map]) + + if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) + elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) + + if act == 1: + pass + + + + + for b_map,Stereo in b_Stereo_dic.items(): + if b_map not in o_p_Stereo_dic.keys(): + pass + elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: + bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) + + bond.SetStereo(o_p_Stereo_dic[b_map]) + + b = apply_charge_change(b,charge_edits) + + if chai_edits == []: + o_chai_edits = get_original_chair_edit(p,b) + + + b = apply_chirality_change(b,o_chai_edits) + + else: + b = apply_chirality_change(b,chai_edits) + + + b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) + b = apply_stereo_change(b,stereo_edits) + + + + for atom in b.GetAtoms(): + atom.SetAtomMapNum(0) + + + for bond in b.GetBonds(): + + if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: + + bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) + else: + pass + + pre_smiles = Chem.MolToSmiles(b) + pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') + pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) + return pre_smiles + + + + +def run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis): + + prod_mol = Chem.MolFromSmiles(p) + + core_edits = [i for i in core_edits if i not in core_edits_add] + edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] + edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] + + + o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) # + o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) + + + frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) + frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) + + frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) + frag_mol = remove_s_H(frag_mol) + + + + total_mol = frag_mol + + + for lg_smile,map_nums in lg_map_lis[:]: + + if lg_smile not in ['-1','1','2']: + + lg = Chem.MolFromSmiles(lg_smile) + + total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] + max_map = max(total_mol_map_num_lis) + count = 1 + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 1: + atom.SetAtomMapNum(max_map + count) + count += 1 + else: + pass + + total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] + max_map = max(total_mol_map_num_lis) + + for atom in lg.GetAtoms(): + if atom.GetAtomMapNum() == 0: + atom.SetAtomMapNum(max_map + count) + count += 1 + else: + pass + + total_mol = Chem.CombineMols(total_mol,lg) + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} + new_mol = Chem.RWMol(total_mol) + + is_multi_bond = 0 + + for idx in range(len(map_nums)): + map_num = map_nums[idx] + if lg_smile.count(':') == len(map_nums): + lg_map = max_map + 1 + idx + atom = total_mol.GetAtomWithIdx(amap[lg_map]) + is_multi_bond = 0 + else: + lg_map = max_map + 1 + atom = total_mol.GetAtomWithIdx(amap[lg_map]) + is_multi_bond= 1 + + + if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 2.0 + elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: + bond_float = 2.0 + + + elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 3.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: + bond_float = 3.0 + elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: + bond_float = 3.0 + else: + + bond_float = 1.0 + + + new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) + total_mol = new_mol.GetMol() + + else: + + map_num = map_nums[0] + + amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} + atom = total_mol.GetAtomWithIdx(amap[map_num]) + atom.SetNumRadicalElectrons(0) + atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) + + + total_mol = correct_mol_1(total_mol,is_nitrine_c = True) + + b = correct_mol(total_mol,keep_map = True) + + b_Chiral_dic = get_atom_map_chai_dic(b) + b_Stereo_dic = get_atom_map_stereo_dic(b) + + dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) + + act = 0 + for b_map,Chiral in b_Chiral_dic.items(): + if b_map not in o_p_Chiral_dic.keys(): + pass + elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: + act =1 + atom = b.GetAtomWithIdx(dic_map_idx[b_map]) + + if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) + elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: + atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) + + if act == 1: + pass + + + + + for b_map,Stereo in b_Stereo_dic.items(): + if b_map not in o_p_Stereo_dic.keys(): + pass + elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: + bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) + + bond.SetStereo(o_p_Stereo_dic[b_map]) + + b = apply_charge_change(b,charge_edits) + + if chai_edits == []: + o_chai_edits = get_original_chair_edit(p,b) + + + b = apply_chirality_change(b,o_chai_edits) + + else: + b = apply_chirality_change(b,chai_edits) + + + b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) + b = apply_stereo_change(b,stereo_edits) + + +# for atom in b.GetAtoms(): +# atom.SetAtomMapNum(0) + + + for bond in b.GetBonds(): + + if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: + + bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) + else: + pass + + pre_smiles = Chem.MolToSmiles(b) +# pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') +# pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) + return pre_smiles + + + +def run_get_p_b_l_check(rxn): + try: + p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = run_get_p_b_l_forward(rxn) + except: + return 'error type 3' + + try: + pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) # 加个5 + except: + return 'error type 5' + + r = rxn.split('>>')[0] + reac_mol = Chem.MolFromSmiles(r) + for atom in reac_mol.GetAtoms(): + atom.SetAtomMapNum(0) + reac_mol_smiles = Chem.MolToSmiles(reac_mol) + reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles)) + + + if [float(i[-3:]) for i in core_edits_add] == []: + max_add = 0 + elif max([float(i[-3:]) for i in core_edits_add]) == 1: + max_add = 1 + else: + max_add = 2 + + charges = [int(i[-1]) for i in charge_edits] + [0] + + if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1: + return p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis + + else: + return 'error type 4' + + + + + + +def get_atom_pair_bond_idx_dic(concise_smiles): + mol_indigo = indigo.loadMolecule(concise_smiles) + mol_block_indigo = mol_indigo.molfile() + + mol = Chem.MolFromSmiles(concise_smiles,sanitize = False) + atom_num = len(mol.GetAtoms()) + bond_num = len(mol.GetBonds()) + + mol_block_lis = mol_block_indigo.split('\n') + bond_line_lis = mol_block_lis[4+atom_num:4+atom_num+bond_num] + atom_pair_bond_idx_dic = {} + + + count = 0 + for bond_line in bond_line_lis: + s_atom = int(bond_line[:3]) + e_atom = int(bond_line[3:6]) + min_atom = min((s_atom,e_atom)) + max_atom = max((s_atom,e_atom)) + atom_pair_bond_idx_dic[(min_atom,max_atom)] = count + count += 1 + + return atom_pair_bond_idx_dic + + + + +def get_rm_token_lis(concise_smiles,detailed_smiles): + detailed_smiles_length = len(detailed_smiles) + idx = 0 + rm_token_lis = [] + for _ in range(len(detailed_smiles)): + + if detailed_smiles[idx] != concise_smiles[idx]: + rm_token_lis.append(detailed_smiles[idx]) + detailed_smiles = detailed_smiles[:idx] + detailed_smiles[idx+1:] + else: + idx += 1 + rm_token_lis.append(' ') + if detailed_smiles == concise_smiles and len(rm_token_lis) == detailed_smiles_length: + return rm_token_lis + else: + print('error') + pass + + +def get_bond_token_lis(detailed_smiles): + bond_token_lis = [] + + for i in range(len(detailed_smiles)): + + if detailed_smiles[i] in ['-','=','#',':','/','\\'] and detailed_smiles[i+1] != ']': + bond_token_lis.append(detailed_smiles[i]) + else: + bond_token_lis.append(' ') + pass + + return bond_token_lis + + +def get_bond_token_idx_dic(bond_token_lis): + bond_token_idx_dic = {} + bond_idx = 0 + token_idx = 0 + for i in bond_token_lis: + token_idx += 1 + if i != ' ': + bond_idx += 1 + else: + pass + bond_token_idx_dic[bond_idx] = token_idx + return bond_token_idx_dic + + +def rerank_special_bond(mol_block_indigo_lis,bond_idx): + mol = Chem.MolFromMolBlock('\n'.join(mol_block_indigo_lis),removeHs = False) + q = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][:3] + h = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][3:6] + mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx] = h + q + mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][6:] + return mol_block_indigo_lis + +def get_caption_r(caption): + words = re.findall(r'[{](.*?)[}]', caption) + words = ['{' + i + '}' for i in words ] + caption_r = caption + count = 400 + for i in words: + count += 1 + caption_r = caption_r.replace(i,'[{}Au]'.format(count),1) + + return caption_r,words + + +def get_b_smiles_detailed_smiles(caption_r,smiles): + b_smiles = caption_r + + b_smiles = b_smiles.replace('/','/-').replace('\\','\\-') + b_smiles = b_smiles.replace('-!','!').replace('-?','?') + + mol_tmp = Chem.MolFromSmiles(smiles,sanitize = False) + detailed_smiles = Chem.MolToSmiles(mol_tmp,canonical = False,allBondsExplicit = True) + + detailed_smiles = detailed_smiles.replace('/','/-').replace('\\','\\-') # + + for i in range(len(detailed_smiles)): + if detailed_smiles[i] != b_smiles[i]: + if b_smiles[i] in ['!','_',';','^','&','{','}','。','《','》']: + pass + else: + b_smiles = b_smiles[:i] + detailed_smiles[i] + b_smiles[i:] + else: + pass + + return b_smiles,detailed_smiles + + +def get_bond_dic(b_smiles,detailed_smiles): + b_smiles = b_smiles.replace('-]',']') + detailed_smiles = detailed_smiles.replace('-]',']') + count = 0 + bond_dic = {} + for i,j in zip(detailed_smiles,b_smiles): + if i != j: + bond_dic[count] = j + + if i in ['-','=','#',':']: + count += 1 + return bond_dic + + +def get_t_smiles(e_smiles,o_smiles): + e_smiles_r = e_smiles.replace('!','-').replace('_','-').replace(';','-').replace('^','-').replace('&','=').replace('{','=').replace('}','=').replace('。','=').replace('《','=').replace('》','=') + mol_r = Chem.MolFromSmiles(e_smiles_r,sanitize = False) + a = Chem.MolFromSmiles(o_smiles,sanitize = False) + + for atom in a.GetAtoms(): + atom.SetAtomMapNum(0) + + for atom in mol_r.GetAtoms(): + if atom.GetIsotope() != 0: + a.GetAtomWithIdx(atom.GetIdx()).SetIsotope(atom.GetIsotope()) + + t_smiles = Chem.MolToSmiles(a,canonical = False) + return t_smiles + + + + +def get_b_smiles(p_b): + + o_smiles = p_b[0] + core_edits = p_b[1] + chai_edits = p_b[2] + stereo_edits = p_b[3] + charge_edits = p_b[4] + core_edits_add = p_b[5] + atom_idx_mark_dic = {} + + for edit in core_edits: + b = int(edit.split(':')[0]) + e = int(edit.split(':')[1]) + new_b = edit.split(':')[3] + if min([b,e]) == 0: + atom_map = max([b,e]) + if new_b == '0.0': + atom_idx_mark_dic[atom_map] = 9 + else: + pass + + + + for edit in chai_edits: + + edit_l = edit.split(':') + if edit_l[3] == 'R': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 10 + else: + atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])] + elif edit_l[3] == 'S': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 20 + else: + atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])] + elif edit_l[3] == '?': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 30 + else: + atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])] + + + for edit in charge_edits: + + edit_l = edit.split(':') + if edit_l[3] == '1': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 200 + else: + atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])] + pass + + elif edit_l[3] == '0': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 400 + else: + atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])] + + + elif edit_l[3] == '-1': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 600 + else: + atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])] + + + + for edit in core_edits_add: + edit_l = edit.split(':') + + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 100 + else: + atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])] + + + if int(edit_l[1]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[1])] = 100 + else: + atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])] + + + + + + a = Chem.MolFromSmiles(o_smiles,sanitize = False) + + for atom in a.GetAtoms(): + if atom.GetAtomMapNum() in atom_idx_mark_dic.keys(): + atom_map = atom.GetAtomMapNum() + atom.SetIsotope(atom_idx_mark_dic[atom_map]) + else: + pass + atom.SetAtomMapNum(0) + + mol = copy.deepcopy(a) + + + detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True) + + + concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) + concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True) + atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality) + rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles) + bond_token_lis = get_bond_token_lis(detailed_smiles) + bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis) + + + bond_idx_mark_dic = {} + for edit in core_edits: + + b = int(edit.split(':')[0]) + e = int(edit.split(':')[1]) + org_b = edit.split(':')[2] + new_b = edit.split(':')[3] + if min([b,e]) != 0: + bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] + if new_b == '0.0': + mark = '!' + elif new_b == '1.0': + mark = '_' + elif new_b == '2.0': + mark = ';' + elif new_b == '3.0': + mark = '^' + bond_idx_mark_dic[bond_idx] = mark + else: + pass + + for edit in stereo_edits: + + b = int(edit.split(':')[0]) + e = int(edit.split(':')[1]) + new_b = edit.split(':')[3] + if min([b,e]) != 0: + bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] + if bond_idx not in bond_idx_mark_dic.keys(): + + if new_b == 'a': + mark = '&' + elif new_b == 'e': + mark = '{' + elif new_b == 'z': + mark = '}' + bond_idx_mark_dic[bond_idx] = mark + else: + bond_idx in bond_idx_mark_dic.keys() + if new_b == 'a': + mark = '。' + elif new_b == 'e': + mark = '《' + elif new_b == 'z': + mark = '》' + bond_idx_mark_dic[bond_idx] = mark + else: + pass + + + + for bond_idx,mark in bond_idx_mark_dic.items(): + token_idx = bond_token_idx_dic[bond_idx] + rm_token_lis[token_idx] = mark + + new_smiles_lis = [] + for i in range(len(rm_token_lis)): + if rm_token_lis[i] == ' ': + new_smiles_lis.append(detailed_smiles[i]) + elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']: + new_smiles_lis.append(rm_token_lis[i]) + else: + pass + + caption = ''.join(new_smiles_lis) + out_b_smiles_lis.append(caption) + + + + caption_r = caption + + + + t_smiles = get_t_smiles(caption_r,o_smiles) + + b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles) + + + bond_dic = get_bond_dic(b_smiles,detailed_smiles) + + + atom_pair_bond_idx = {} + for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items(): + atom_pair_bond_idx[bond_idx] = atom_pair + + + + mol = Chem.MolFromSmiles(t_smiles) + Chem.Kekulize(mol) + core_edits_ = [] + chai_edits_ = [] + stereo_edits_ = [] + charge_edits_ = [] + core_edits_add_ = [] + + for bond_idx,mark in bond_dic.items(): + b,e = atom_pair_bond_idx[bond_idx] + o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble() + if mark == '!': + n_bond = '0.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + elif mark == '_': + n_bond = '1.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + elif mark == ';': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + elif mark == '^': + n_bond = '3.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + + elif mark == '&': + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) + elif mark == '{': + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) + elif mark == '}': + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) + + + elif mark == '。': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) + elif mark == '《': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) + elif mark == '》': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) + + + + core_edits_add_atom_lis = [] + + for atom in mol.GetAtoms(): + Isotope = atom.GetIsotope() + g_w = Isotope % 10 + s_w = Isotope % 100 //10 + b_w = Isotope // 100 + + if g_w == 9: + core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0')) + else: + pass + + + if s_w == 1: + chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R')) + elif s_w == 2: + chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S')) + elif s_w == 3: + chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?')) + + + if b_w == 2 or b_w == 3: + charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1)) + elif b_w == 4 or b_w == 5: + charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0)) + elif b_w == 6 or b_w == 7: + charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1)) + + + if b_w % 2 == 1: + core_edits_add_atom_lis.append(atom.GetIdx()+1) + + + if core_edits_add_atom_lis != []: + core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0')) + else: + pass + + if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add): + print(core_edits_,core_edits) + print(chai_edits_,chai_edits) + print(core_edits_add_,core_edits_add) + return 'error' + else: + return caption + pass + + +def get_b_smiles_forward(p_b): + o_smiles = p_b[0] + core_edits = p_b[1] + chai_edits = p_b[2] + stereo_edits = p_b[3] + charge_edits = p_b[4] + core_edits_add = p_b[5] + atom_idx_mark_dic = {} + + + for edit in core_edits: + b = int(edit.split(':')[0]) + e = int(edit.split(':')[1]) + new_b = edit.split(':')[3] + if min([b,e]) == 0: + atom_map = max([b,e]) + if new_b == '0.0': + atom_idx_mark_dic[atom_map] = 9 + else: + pass + + + for edit in chai_edits: + + edit_l = edit.split(':') + if edit_l[3] == 'R': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 10 + else: + atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])] + elif edit_l[3] == 'S': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 20 + else: + atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])] + elif edit_l[3] == '?': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 30 + else: + atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])] + + + for edit in charge_edits: + + edit_l = edit.split(':') + if edit_l[3] == '1': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 200 + else: + atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])] + pass + + elif edit_l[3] == '0': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 400 + else: + atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])] + + + elif edit_l[3] == '-1': + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 600 + else: + atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])] + + + for edit in core_edits_add: + edit_l = edit.split(':') + + if int(edit_l[0]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[0])] = 100 + else: + atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])] + + + if int(edit_l[1]) not in atom_idx_mark_dic.keys(): + atom_idx_mark_dic[int(edit_l[1])] = 100 + else: + atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])] + + + a = Chem.MolFromSmiles(o_smiles,sanitize = False) + + for atom in a.GetAtoms(): + if atom.GetAtomMapNum() in atom_idx_mark_dic.keys(): + atom_map = atom.GetAtomMapNum() + atom.SetIsotope(atom_idx_mark_dic[atom_map]) + else: + pass + atom.SetAtomMapNum(0) + + mol = copy.deepcopy(a) + + + detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True) + + + concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) + concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True) + atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality) + rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles) + bond_token_lis = get_bond_token_lis(detailed_smiles) + bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis) + + + bond_idx_mark_dic = {} + for edit in core_edits: + + b = int(edit.split(':')[0]) + e = int(edit.split(':')[1]) + org_b = edit.split(':')[2] + new_b = edit.split(':')[3] + if min([b,e]) != 0: + bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] + if new_b == '0.0': + mark = '!' + elif new_b == '1.0': + mark = '_' + elif new_b == '2.0': + mark = ';' + elif new_b == '3.0': + mark = '^' + bond_idx_mark_dic[bond_idx] = mark + else: + pass + + for edit in stereo_edits: + + b = int(edit.split(':')[0]) + e = int(edit.split(':')[1]) + new_b = edit.split(':')[3] + if min([b,e]) != 0: + bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] + if bond_idx not in bond_idx_mark_dic.keys(): + + if new_b == 'a': + mark = '&' + elif new_b == 'e': + mark = '{' + elif new_b == 'z': + mark = '}' + bond_idx_mark_dic[bond_idx] = mark + else: + bond_idx in bond_idx_mark_dic.keys() + if new_b == 'a': + mark = '。' + elif new_b == 'e': + mark = '《' + elif new_b == 'z': + mark = '》' + bond_idx_mark_dic[bond_idx] = mark + else: + pass + + + for bond_idx,mark in bond_idx_mark_dic.items(): + token_idx = bond_token_idx_dic[bond_idx] + rm_token_lis[token_idx] = mark + + new_smiles_lis = [] + for i in range(len(rm_token_lis)): + if rm_token_lis[i] == ' ': + new_smiles_lis.append(detailed_smiles[i]) + elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']: + new_smiles_lis.append(rm_token_lis[i]) + else: + pass + + return ''.join(new_smiles_lis) + + +def get_b_smiles_backward(caption_r,o_smiles): + + t_smiles = get_t_smiles(caption_r,o_smiles) + b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles) + bond_dic = get_bond_dic(b_smiles,detailed_smiles) + + + atom_pair_bond_idx = {} + for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items(): + atom_pair_bond_idx[bond_idx] = atom_pair + + + + mol = Chem.MolFromSmiles(t_smiles) + Chem.Kekulize(mol) + core_edits_ = [] + chai_edits_ = [] + stereo_edits_ = [] + charge_edits_ = [] + core_edits_add_ = [] + + for bond_idx,mark in bond_dic.items(): + b,e = atom_pair_bond_idx[bond_idx] + o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble() + if mark == '!': + n_bond = '0.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + elif mark == '_': + n_bond = '1.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + elif mark == ';': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + elif mark == '^': + n_bond = '3.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + + elif mark == '&': + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) + elif mark == '{': + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) + elif mark == '}': + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) + + + elif mark == '。': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) #any + elif mark == '《': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) + elif mark == '》': + n_bond = '2.0' + core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) + stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) + + + core_edits_add_atom_lis = [] + + for atom in mol.GetAtoms(): + Isotope = atom.GetIsotope() + g_w = Isotope % 10 + s_w = Isotope % 100 //10 + b_w = Isotope // 100 + + if g_w == 9: + core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0')) + else: + pass + + + if s_w == 1: + chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R')) + elif s_w == 2: + chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S')) + elif s_w == 3: + chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?')) + + + if b_w == 2 or b_w == 3: + charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1)) + elif b_w == 4 or b_w == 5: + charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0)) + elif b_w == 6 or b_w == 7: + charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1)) + + + if b_w % 2 == 1: + core_edits_add_atom_lis.append(atom.GetIdx()+1) + + if core_edits_add_atom_lis != []: + core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0')) + else: + pass + + + return core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ + + + +def get_b_smiles_check(p_b): + p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = p_b + b_smiles = get_b_smiles_forward(p_b) + core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ = get_b_smiles_backward(b_smiles,p_b[0]) + if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add): + print(core_edits_,core_edits) + print(chai_edits_,chai_edits) + print(core_edits_add_,core_edits_add) + return 'error' + else: + return b_smiles + + +import re + +def replacenth(string, sub, wanted, n): + where = [m.start() for m in re.finditer(sub, string)][n-1] + before = string[:where] + after = string[where:] + after = after.replace(sub, wanted, 1) + newString = before + after + return newString + + +def cano_smiles_map(smiles): + atom_map_lis = [] + mol = Chem.MolFromSmiles(smiles,sanitize = False) + for atom in mol.GetAtoms(): + atom_map_lis.append(atom.GetAtomMapNum()) + atom.SetAtomMapNum(0) + smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) + mol = Chem.MolFromSmiles(smiles,sanitize = False) + for atom in mol.GetAtoms(): + atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()]) + smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) + return smiles + + +def get_lg_forward(core_edits,lg_map): + + attach_idx = [] + for core_edit in core_edits: + core_edit = core_edit.split(':') + if float(core_edit[2])-float(core_edit[3]) > 0: + attach_idx.append(int(core_edit[0])) + attach_idx.append(int(core_edit[1])) + + attach_idx = sorted(list(set(attach_idx))) + attach_idx = [i for i in attach_idx if i != 0] + lg_lis = [()]*len(attach_idx) + + for lg,map_lis in lg_map: + + if len(map_lis) == 1: + map_ = map_lis[0] + id_ = attach_idx.index(map_) + lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg]) + + elif len(map_lis) != 1 and len(set(map_lis)) == 1: + map_ = map_lis[0] + id_ = attach_idx.index(map_) + lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg]) + elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 1: + for map_ in map_lis: + id_ = attach_idx.index(map_) + lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg + "*"]) + elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 2: + + if map_lis[0] 0: + attach_idx.append(int(core_edit[0])) + attach_idx.append(int(core_edit[1])) + + attach_idx = [i for i in attach_idx if i != 0] + attach_idx = sorted(list(set(attach_idx))) + + lg_map_new = [] + for id_,lg_ in zip(attach_idx,lg_lis): + for lg in list(lg_): + if lg.count(':') > 1: + + lg_map_new.append((lg,[id_]*lg.count(':'))) + else: + + lg_map_new.append((lg,[id_])) + + + dic_t = {} + for i,j in lg_map_new: + if '*' in i: + dic_t.setdefault(i,[]).append(j[0]) + else: + pass + + + lg_map_new_k =[] + for i,j in lg_map_new: + if '*' not in i: + lg_map_new_k.append((i,j)) + + else: + pass + + + for i,j in dic_t.items(): + if ':2' not in i: + lg_map_new_k.append((i.replace('*',''),j)) + elif i.index(':1') <= i.index(':2'): + lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j)) + else: + j.reverse() + lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j)) + + + lg_map_new = lg_map_new_k + return lg_map_new + + + + +dic_str_to_num = {} +for l in range(4,0,-1): + for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']): + for b,j in zip([0,10,20,30],['','r','s','?']): + for c,k in zip([0,9],['','~']): + if len(k+j+i) == l: + dic_str_to_num[k+j+i] = str(a+b+c) + + +dic_num_to_str = {} +for l in range(3,0,-1): + for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']): + for b,j in zip([0,10,20,30],['','r','s','?']): + for c,k in zip([0,9],['','~']): + if len(str(a+b+c)) == l and len(k+j+i) != 0: + dic_num_to_str[str(a+b+c)] = k+j+i + + + +def iso_to_symbo(txt,dic_num_to_str): + + for i,j in dic_num_to_str.items(): + i = '[' + i + j = '[' + j + txt = txt.replace(i,j) + txt = txt.replace('。',';&').replace('》',';}').replace('《',';{') + return txt + +def symbo_to_iso(txt,dic_str_to_num): + + for i,j in dic_str_to_num.items(): + i = '[' + i + j = '[' + j + txt = txt.replace(i,j) + txt = txt.replace(';&','。').replace(';}','》').replace(';{','《') + return txt + + + +def merge_smiles_only(text): + + + text = symbo_to_iso(text,dic_str_to_num) + o_smiles = text.split('>>>')[0] + b_smiles = text.split('>>>')[1].split('<')[0] + + lg_lis = [] + for i in re.findall(r"[<](.*?)[>]", text): + if i == '': + lg_lis.append(tuple()) + else: + lg_lis.append(tuple(i.split(','))) + + core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) + lg_map_lis = get_lg_backward(core_edits,lg_lis) + + p = Chem.MolFromSmiles(o_smiles,sanitize = False) + for atom in p.GetAtoms(): + atom.SetAtomMapNum(atom.GetIdx()+1) + p = Chem.MolToSmiles(p) + + pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) + + return pre_smiles + + +def merge_smiles_with_mapping_only(text): + + + text = symbo_to_iso(text,dic_str_to_num) + o_smiles = text.split('>>>')[0] + b_smiles = text.split('>>>')[1].split('<')[0] + + lg_lis = [] + for i in re.findall(r"[<](.*?)[>]", text): + if i == '': + lg_lis.append(tuple()) + else: + lg_lis.append(tuple(i.split(','))) + + core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) + lg_map_lis = get_lg_backward(core_edits,lg_lis) + + p = Chem.MolFromSmiles(o_smiles,sanitize = False) + for atom in p.GetAtoms(): + atom.SetAtomMapNum(atom.GetIdx()+1) + p = Chem.MolToSmiles(p) + + pre_smiles = run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) + + return pre_smiles + + + +def merge_smiles(text): + try: + return merge_smiles_only(text) + except: + return "" + + +def merge_smiles_with_mapping(text): + try: + return merge_smiles_with_mapping_only(text) + except: + return "" + + +def get_e_smiles(rxn): + + p_b = run_get_p_b_l_forward(rxn) + b_smiles = get_b_smiles_check(p_b) + lg_lis = get_lg_forward(p_b[1],p_b[6]) + + k = p_b + b = b_smiles + c = lg_lis + a = Chem.MolFromSmiles(k[0],sanitize = False) + + for atom in a.GetAtoms(): + atom.SetAtomMapNum(0) + a = Chem.MolToSmiles(a,canonical = False) + + str_ = '' + for i in c: + str_ = str_ + '<{}>'.format(','.join(i)) + txt = a +'>>>'+ b+str_ + + return iso_to_symbo(txt,dic_num_to_str) + +def get_e_smiles_with_check(rxn): + + p_b = run_get_p_b_l_check(rxn) + b_smiles = get_b_smiles_check(p_b) + lg_lis = get_lg_forward(p_b[1],p_b[6]) + + k = p_b + b = b_smiles + c = lg_lis + a = Chem.MolFromSmiles(k[0],sanitize = False) + + for atom in a.GetAtoms(): + atom.SetAtomMapNum(0) + a = Chem.MolToSmiles(a,canonical = False) + + str_ = '' + for i in c: + str_ = str_ + '<{}>'.format(','.join(i)) + txt = a +'>>>'+ b+str_ + + return iso_to_symbo(txt,dic_num_to_str) + +def get_edit_from_e_smiles(text): + text = symbo_to_iso(text,dic_str_to_num) + o_smiles = text.split('>>>')[0] + b_smiles = text.split('>>>')[1].split('<')[0] + + lg_lis = [] + for i in re.findall(r"[<](.*?)[>]", text): + if i == '': + lg_lis.append(tuple()) + else: + lg_lis.append(tuple(i.split(','))) + + core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) + lg_map_lis = get_lg_backward(core_edits,lg_lis) + + return core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis \ No newline at end of file