File size: 1,447 Bytes
84bfd88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import rdkit
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from typing import *
import numpy as np
from rdkit import RDLogger

RDLogger.DisableLog("rdApp.*")


def try_or_none(func, *args, **kwargs):
    try:
        return func(*args, **kwargs)
    except:
        return None


def get_smiles(mols: List[rdkit.Chem.rdchem.Mol]) -> List[str]:
    """
    Gets list of smiles from list of rdkit molecules
    """
    return [Chem.MolToSmiles(x) for x in mols]


def get_mols(smiles: List[str]) -> List[rdkit.Chem.rdchem.Mol]:
    """
    Gets list of rdkit molecules from list of smiles
    """
    return [Chem.MolFromSmiles(x) for x in smiles]


def get_fp(
    mols: List[rdkit.Chem.rdchem.Mol],
    radius: int = 2,
    nBits: int = 1024,
    useFeatures: bool = False,
):
    """
    Computes ECFP/FCFP from list of RDKIT mols
    """

    output = np.empty(len(mols), dtype=object)

    for i, mol in enumerate(mols):
        output[i] = AllChem.GetMorganFingerprintAsBitVect(
            mol,
            radius=radius,
            nBits=nBits,
            useFeatures=useFeatures,
        )

    return output


def store_fp(fps: List, nBits: int = 1024):
    """
    Stores list of RDKIT sparse vectors in numpy array using C data structures
    """

    array = np.empty((len(fps), nBits), dtype=np.float32)
    for i in range(len(array)):
        DataStructs.ConvertToNumpyArray(fps[i], array[i])

    return array