saicharan2804 commited on
Commit
e9ff6dc
1 Parent(s): b81235b

Major change, please work

Browse files
Files changed (2) hide show
  1. molgenevalmetric.py +1 -136
  2. requirements.txt +1 -0
molgenevalmetric.py CHANGED
@@ -38,142 +38,7 @@ from fcd_torch import FCD
38
 
39
  # from SCScore import SCScorer
40
 
41
- '''
42
- This is a standalone, importable SCScorer model. It does not have tensorflow as a
43
- dependency and is a more attractive option for deployment. The calculations are
44
- fast enough that there is no real reason to use GPUs (via tf) instead of CPUs (via np)
45
- '''
46
-
47
- import numpy as np
48
- import time
49
- import rdkit.Chem as Chem
50
- import rdkit.Chem.AllChem as AllChem
51
- import json
52
- import gzip
53
- import six
54
-
55
- import os
56
- project_root = os.path.dirname(os.path.dirname(__file__))
57
-
58
- score_scale = 5.0
59
- min_separation = 0.25
60
-
61
- FP_len = 1024
62
- FP_rad = 2
63
-
64
- def sigmoid(x):
65
- return 1 / (1 + np.exp(-x))
66
-
67
- class SCScorer():
68
- def __init__(self, score_scale=score_scale):
69
- self.vars = []
70
- self.score_scale = score_scale
71
- self._restored = False
72
-
73
- def restore(self, weight_path=os.path.join('model.ckpt-10654.as_numpy.json.gz'), FP_rad=FP_rad, FP_len=FP_len):
74
- self.FP_len = FP_len; self.FP_rad = FP_rad
75
- self._load_vars(weight_path)
76
- # print('Restored variables from {}'.format(weight_path))
77
-
78
- if 'uint8' in weight_path or 'counts' in weight_path:
79
- def mol_to_fp(self, mol):
80
- if mol is None:
81
- return np.array((self.FP_len,), dtype=np.uint8)
82
- fp = AllChem.GetMorganFingerprint(mol, self.FP_rad, useChirality=True) # uitnsparsevect
83
- fp_folded = np.zeros((self.FP_len,), dtype=np.uint8)
84
- for k, v in six.iteritems(fp.GetNonzeroElements()):
85
- fp_folded[k % self.FP_len] += v
86
- return np.array(fp_folded)
87
- else:
88
- def mol_to_fp(self, mol):
89
- if mol is None:
90
- return np.zeros((self.FP_len,), dtype=np.float32)
91
- return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len,
92
- useChirality=True), dtype=np.bool_)
93
- self.mol_to_fp = mol_to_fp
94
-
95
- self._restored = True
96
- return self
97
-
98
- def smi_to_fp(self, smi):
99
- if not smi:
100
- return np.zeros((self.FP_len,), dtype=np.float32)
101
- return self.mol_to_fp(self, Chem.MolFromSmiles(smi))
102
-
103
- def apply(self, x):
104
- if not self._restored:
105
- raise ValueError('Must restore model weights!')
106
- # Each pair of vars is a weight and bias term
107
- for i in range(0, len(self.vars), 2):
108
- last_layer = (i == len(self.vars)-2)
109
- W = self.vars[i]
110
- b = self.vars[i+1]
111
- x = np.matmul(x, W) + b
112
- if not last_layer:
113
- x = x * (x > 0) # ReLU
114
- x = 1 + (score_scale - 1) * sigmoid(x)
115
- return x
116
-
117
- def get_score_from_smi(self, smi='', v=False):
118
- if not smi:
119
- return ('', 0.)
120
- fp = np.array((self.smi_to_fp(smi)), dtype=np.float32)
121
- if sum(fp) == 0:
122
- if v: print('Could not get fingerprint?')
123
- cur_score = 0.
124
- else:
125
- # Run
126
- cur_score = self.apply(fp)
127
- if v: print('Score: {}'.format(cur_score))
128
- mol = Chem.MolFromSmiles(smi)
129
- if mol:
130
- smi = Chem.MolToSmiles(mol, isomericSmiles=True, kekuleSmiles=True)
131
- else:
132
- smi = ''
133
- return (smi, cur_score)
134
-
135
- def get_avg_score(self, smis):
136
- """
137
- Compute the average score for a list of SMILES strings.
138
-
139
- Args:
140
- smis (list of str): A list of SMILES strings.
141
-
142
- Returns:
143
- float: The average score of the given SMILES strings.
144
- """
145
- if not smis: # Check if the list is empty
146
- return 0.0
147
-
148
- total_score = 0.0
149
- valid_smiles_count = 0
150
-
151
- for smi in smis:
152
- _, score = self.get_score_from_smi(smi)
153
- if score > 0: # Assuming only positive scores are valid
154
- total_score += score
155
- valid_smiles_count += 1
156
-
157
- # Avoid division by zero
158
- if valid_smiles_count == 0:
159
- return 0.0
160
- else:
161
- return total_score / valid_smiles_count
162
-
163
- def _load_vars(self, weight_path):
164
- if weight_path.endswith('pickle'):
165
- import pickle
166
- with open(weight_path, 'rb') as fid:
167
- self.vars = pickle.load(fid)
168
- self.vars = [x.tolist() for x in self.vars]
169
- elif weight_path.endswith('json.gz'):
170
- with gzip.GzipFile(weight_path, 'r') as fin: # 4. gzip
171
- json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
172
- json_str = json_bytes.decode('utf-8') # 2. string (i.e. JSON)
173
- self.vars = json.loads(json_str)
174
- self.vars = [np.array(x) for x in self.vars]
175
-
176
-
177
 
178
  def get_mol(smiles_or_mol):
179
  """
 
38
 
39
  # from SCScore import SCScorer
40
 
41
+ from myscscore.SCScore import SCScorer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def get_mol(smiles_or_mol):
44
  """
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  git+https://github.com/huggingface/evaluate@main
 
2
  numpy
3
  pandas
4
  scipy
 
1
  git+https://github.com/huggingface/evaluate@main
2
+ git+https://github.com/saicharan2804/myscscore
3
  numpy
4
  pandas
5
  scipy