| import warnings |
| import numpy as np |
| from loguru import logger |
| from sklearn.ensemble import RandomForestRegressor |
| from rdkit.Chem import Descriptors, rdMolDescriptors |
| import joblib |
| from rdkit import Chem, rdBase, DataStructs |
| from rdkit.Chem import AllChem |
| from typing import List |
|
|
|
|
| def fingerprints_from_mol(molecule, radius=3, size=2048, hashed=False): |
| """ |
| Create ECFP fingerprint of a molecule |
| """ |
| if hashed: |
| fp_bits = AllChem.GetHashedMorganFingerprint(molecule, radius, nBits=size) |
| else: |
| fp_bits = AllChem.GetMorganFingerprintAsBitVect(molecule, radius, nBits=size) |
| fp_np = np.zeros((1,)) |
| DataStructs.ConvertToNumpyArray(fp_bits, fp_np) |
| return fp_np.reshape(1, -1) |
|
|
|
|
| def fingerprints_from_smiles(smiles: List, size=2048): |
| """ Create ECFP fingerprints of smiles, with validity check """ |
| fps = [] |
| valid_mask = [] |
| for i, smile in enumerate(smiles): |
| mol = Chem.MolFromSmiles(smile) |
| valid_mask.append(int(mol is not None)) |
| fp = fingerprints_from_mol(mol, size=size) if mol else np.zeros((1, size)) |
| fps.append(fp) |
| |
| fps = np.concatenate(fps, axis=0) if len(fps) > 0 else np.zeros((0, size)) |
| return fps, valid_mask |
|
|
|
|
| def getMolDescriptors(mol, missingVal=0): |
| """ calculate the full list of descriptors for a molecule """ |
|
|
| values, names = [], [] |
| for nm, fn in Descriptors._descList: |
| try: |
| val = fn(mol) |
| except: |
| val = missingVal |
| values.append(val) |
| names.append(nm) |
|
|
| custom_descriptors = {'hydrogen-bond donors': rdMolDescriptors.CalcNumLipinskiHBD, |
| 'hydrogen-bond acceptors': rdMolDescriptors.CalcNumLipinskiHBA, |
| 'rotatable bonds': rdMolDescriptors.CalcNumRotatableBonds,} |
| |
| for nm, fn in custom_descriptors.items(): |
| try: |
| val = fn(mol) |
| except: |
| val = missingVal |
| values.append(val) |
| names.append(nm) |
| return values, names |
|
|
|
|
| def get_pep_dps_from_smi(smi): |
| try: |
| mol = Chem.MolFromSmiles(smi) |
| except: |
| print(f"convert smi {smi} to molecule failed!") |
| mol = None |
| |
| dps, _ = getMolDescriptors(mol) |
| return np.array(dps) |
|
|
|
|
| def get_pep_dps(smi_list): |
| if len(smi_list) == 0: |
| return np.zeros((0, 211)) |
| return np.array([get_pep_dps_from_smi(smi) for smi in smi_list]) |
|
|
|
|
|
|
| def check_smi_validity(smiles: list): |
| valid_smi, valid_idx = [], [] |
| for idx, smi in enumerate(smiles): |
| try: |
| mol = Chem.MolFromSmiles(smi) if smi else None |
| if mol: |
| valid_smi.append(smi) |
| valid_idx.append(idx) |
| except Exception as e: |
| |
| pass |
| return valid_smi, valid_idx |