Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,447 Bytes
84bfd88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import rdkit
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from typing import *
import numpy as np
from rdkit import RDLogger
RDLogger.DisableLog("rdApp.*")
def try_or_none(func, *args, **kwargs):
try:
return func(*args, **kwargs)
except:
return None
def get_smiles(mols: List[rdkit.Chem.rdchem.Mol]) -> List[str]:
"""
Gets list of smiles from list of rdkit molecules
"""
return [Chem.MolToSmiles(x) for x in mols]
def get_mols(smiles: List[str]) -> List[rdkit.Chem.rdchem.Mol]:
"""
Gets list of rdkit molecules from list of smiles
"""
return [Chem.MolFromSmiles(x) for x in smiles]
def get_fp(
mols: List[rdkit.Chem.rdchem.Mol],
radius: int = 2,
nBits: int = 1024,
useFeatures: bool = False,
):
"""
Computes ECFP/FCFP from list of RDKIT mols
"""
output = np.empty(len(mols), dtype=object)
for i, mol in enumerate(mols):
output[i] = AllChem.GetMorganFingerprintAsBitVect(
mol,
radius=radius,
nBits=nBits,
useFeatures=useFeatures,
)
return output
def store_fp(fps: List, nBits: int = 1024):
"""
Stores list of RDKIT sparse vectors in numpy array using C data structures
"""
array = np.empty((len(fps), nBits), dtype=np.float32)
for i in range(len(array)):
DataStructs.ConvertToNumpyArray(fps[i], array[i])
return array
|