Spaces:

saicharan2804
/

molgenevalmetric

Sleeping

App Files Files Community

saicharan2804 commited on Apr 12

Commit

e9ff6dc

•

1 Parent(s): b81235b

Major change, please work

Browse files

Files changed (2) hide show

molgenevalmetric.py +1 -136
requirements.txt +1 -0

molgenevalmetric.py CHANGED Viewed

@@ -38,142 +38,7 @@ from fcd_torch import FCD
 # from SCScore import SCScorer
-'''
-This is a standalone, importable SCScorer model. It does not have tensorflow as a
-dependency and is a more attractive option for deployment. The calculations are
-fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
-'''
-import numpy as np
-import time
-import rdkit.Chem as Chem
-import rdkit.Chem.AllChem as AllChem
-import json
-import gzip
-import six
-import os
-project_root = os.path.dirname(os.path.dirname(__file__))
-score_scale = 5.0
-min_separation = 0.25
-FP_len = 1024
-FP_rad = 2
-def sigmoid(x):
-  return 1 / (1 + np.exp(-x))
-class SCScorer():
-    def __init__(self, score_scale=score_scale):
-        self.vars = []
-        self.score_scale = score_scale
-        self._restored = False
-    def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
-        self.FP_len = FP_len; self.FP_rad = FP_rad
-        self._load_vars(weight_path)
-        # print('Restored variables from {}'.format(weight_path))
-        if 'uint8' in weight_path or 'counts' in weight_path:
-            def mol_to_fp(self, mol):
-                if mol is None:
-                    return np.array((self.FP_len,), dtype=np.uint8)
-                fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
-                fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
-                for k, v in six.iteritems(fp.GetNonzeroElements()):
-                    fp_folded[k % self.FP_len] += v
-                return np.array(fp_folded)
-        else:
-            def mol_to_fp(self, mol):
-                if mol is None:
-                    return np.zeros((self.FP_len,), dtype=np.float32)
-                return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
-                    useChirality=True), dtype=np.bool_)
-        self.mol_to_fp = mol_to_fp
-        self._restored = True
-        return self
-    def smi_to_fp(self, smi):
-        if not smi:
-            return np.zeros((self.FP_len,), dtype=np.float32)
-        return self.mol_to_fp(self, Chem.MolFromSmiles(smi))
-    def apply(self, x):
-        if not self._restored:
-            raise ValueError('Must restore model weights!')
-        # Each pair of vars is a weight and bias term
-        for i in range(0, len(self.vars), 2):
-            last_layer = (i == len(self.vars)-2)
-            W = self.vars[i]
-            b = self.vars[i+1]
-            x = np.matmul(x, W) + b
-            if not last_layer:
-                x = x * (x > 0) # ReLU
-        x = 1 + (score_scale - 1) * sigmoid(x)
-        return x
-    def get_score_from_smi(self, smi='', v=False):
-        if not smi:
-            return ('', 0.)
-        fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
-        if sum(fp) == 0:
-            if v: print('Could not get fingerprint?')
-            cur_score = 0.
-        else:
-            # Run
-            cur_score = self.apply(fp)
-            if v: print('Score: {}'.format(cur_score))
-        mol = Chem.MolFromSmiles(smi)
-        if mol:
-            smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
-        else:
-            smi = ''
-        return (smi, cur_score)
-    def get_avg_score(self, smis):
-        """
-        Compute the average score for a list of SMILES strings.
-        Args:
-            smis (list of str): A list of SMILES strings.
-        Returns:
-            float: The average score of the given SMILES strings.
-        """
-        if not smis:  # Check if the list is empty
-            return 0.0
-        total_score = 0.0
-        valid_smiles_count = 0
-        for smi in smis:
-            _, score = self.get_score_from_smi(smi)
-            if score > 0:  # Assuming only positive scores are valid
-                total_score += score
-                valid_smiles_count += 1
-        # Avoid division by zero
-        if valid_smiles_count == 0:
-            return 0.0
-        else:
-            return total_score / valid_smiles_count
-    def _load_vars(self, weight_path):
-        if weight_path.endswith('pickle'):
-            import pickle
-            with open(weight_path, 'rb') as fid:
-                self.vars = pickle.load(fid)
-                self.vars = [x.tolist() for x in self.vars]
-        elif weight_path.endswith('json.gz'):
-            with gzip.GzipFile(weight_path, 'r') as fin:    # 4. gzip
-                json_bytes = fin.read()                      # 3. bytes (i.e. UTF-8)
-                json_str = json_bytes.decode('utf-8')            # 2. string (i.e. JSON)
-                self.vars = json.loads(json_str)
-                self.vars = [np.array(x) for x in self.vars]
 def get_mol(smiles_or_mol):
     """

 # from SCScore import SCScorer
+from myscscore.SCScore import SCScorer
 def get_mol(smiles_or_mol):
     """

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 git+https://github.com/huggingface/evaluate@main
 numpy
 pandas
 scipy

 git+https://github.com/huggingface/evaluate@main
+git+https://github.com/saicharan2804/myscscore
 numpy
 pandas
 scipy