Atomu2014 commited on
Commit
3b77f6e
·
1 Parent(s): e81c73d

remove vina

Browse files
app.py CHANGED
@@ -77,12 +77,10 @@ class NpEncoder(json.JSONEncoder):
77
  return super(NpEncoder, self).default(obj)
78
 
79
 
80
- def evaluate(value: str, out_fn: str):
81
- protein_path, ligand_path = load(value)
82
-
83
- metrics = Metrics(protein_path, ligand_path, out_fn).evaluate()
84
-
85
- return json.dumps(metrics, indent=4, cls=NpEncoder)
86
 
87
 
88
 
@@ -104,9 +102,9 @@ with gr.Blocks() as demo:
104
  btn3 = gr.Button('visualize')
105
  btn3.click(show, inputs=[dropdown, candidates], outputs=[gen_complex])
106
 
107
- metrics = gr.Textbox(label='metrics')
108
- btn4 = gr.Button('evaluate')
109
- btn4.click(evaluate, inputs=[dropdown, candidates], outputs=[metrics])
110
 
111
  if __name__ == '__main__':
112
  demo.launch(share=True)
 
77
  return super(NpEncoder, self).default(obj)
78
 
79
 
80
+ # def evaluate(value: str, out_fn: str):
81
+ # protein_path, ligand_path = load(value)
82
+ # metrics = Metrics(protein_path, ligand_path, out_fn).evaluate()
83
+ # return json.dumps(metrics, indent=4, cls=NpEncoder)
 
 
84
 
85
 
86
 
 
102
  btn3 = gr.Button('visualize')
103
  btn3.click(show, inputs=[dropdown, candidates], outputs=[gen_complex])
104
 
105
+ # metrics = gr.Textbox(label='metrics')
106
+ # btn4 = gr.Button('evaluate')
107
+ # btn4.click(evaluate, inputs=[dropdown, candidates], outputs=[metrics])
108
 
109
  if __name__ == '__main__':
110
  demo.launch(share=True)
core/datasets/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
  import torch
2
  from torch.utils.data import Subset
3
- from core.datasets.pl_pair_dataset import PocketLigandPairDataset, PocketLigandPairDatasetFeaturized, PocketLigandGeneratedPairDataset
4
  from core.datasets.pdbbind import PDBBindDataset
5
 
6
 
 
1
  import torch
2
  from torch.utils.data import Subset
3
+ # from core.datasets.pl_pair_dataset import PocketLigandPairDataset, PocketLigandPairDatasetFeaturized, PocketLigandGeneratedPairDataset
4
  from core.datasets.pdbbind import PDBBindDataset
5
 
6
 
core/datasets/pl_pair_dataset.py CHANGED
@@ -1,452 +1,452 @@
1
- import os
2
- import pickle
3
- import lmdb
4
- from torch.utils.data import Dataset
5
- from tqdm.auto import tqdm
6
- import sys
7
- from time import time
8
-
9
- import torch
10
- from torch_geometric.transforms import Compose
11
-
12
- from core.datasets.utils import PDBProtein, parse_sdf_file, ATOM_FAMILIES_ID
13
- from core.datasets.pl_data import ProteinLigandData, torchify_dict
14
-
15
- import core.utils.transforms as trans
16
-
17
-
18
- class DBReader:
19
- def __init__(self, path) -> None:
20
- self.path = path
21
- self.db = None
22
- self.keys = None
23
-
24
- def _connect_db(self):
25
- """
26
- Establish read-only database connection
27
- """
28
- assert self.db is None, 'A connection has already been opened.'
29
- self.db = lmdb.open(
30
- self.path,
31
- map_size=10*(1024*1024*1024), # 10GB
32
- create=False,
33
- subdir=False,
34
- readonly=True,
35
- lock=False,
36
- readahead=False,
37
- meminit=False,
38
- )
39
- with self.db.begin() as txn:
40
- self.keys = list(txn.cursor().iternext(values=False))
41
-
42
- def _close_db(self):
43
- self.db.close()
44
- self.db = None
45
- self.keys = None
46
-
47
- def __del__(self):
48
- if self.db is not None:
49
- self._close_db()
50
-
51
- def __len__(self):
52
- if self.db is None:
53
- self._connect_db()
54
- return len(self.keys)
55
-
56
- def __getitem__(self, idx):
57
- if self.db is None:
58
- self._connect_db()
59
- key = self.keys[idx]
60
- data = pickle.loads(self.db.begin().get(key))
61
- data = ProteinLigandData(**data)
62
- data.id = idx
63
- assert data.protein_pos.size(0) > 0
64
- return data
65
 
66
 
67
- class PocketLigandPairDataset(Dataset):
68
 
69
- def __init__(self, raw_path, transform=None, version='final'):
70
- super().__init__()
71
- self.raw_path = raw_path.rstrip('/')
72
- self.index_path = os.path.join(self.raw_path, 'index.pkl')
73
- self.processed_path = os.path.join(os.path.dirname(self.raw_path),
74
- os.path.basename(self.raw_path) + f'_processed_{version}.lmdb')
75
- self.transform = transform
76
- self.reader = DBReader(self.processed_path)
77
 
78
- if not os.path.exists(self.processed_path):
79
- print(f'{self.processed_path} does not exist, begin processing data')
80
- self._process()
81
 
82
- def _process(self):
83
- db = lmdb.open(
84
- self.processed_path,
85
- map_size=10*(1024*1024*1024), # 10GB
86
- create=True,
87
- subdir=False,
88
- readonly=False, # Writable
89
- )
90
- with open(self.index_path, 'rb') as f:
91
- index = pickle.load(f)
92
-
93
- num_skipped = 0
94
- with db.begin(write=True, buffers=True) as txn:
95
- for i, (pocket_fn, ligand_fn, *_) in enumerate(tqdm(index)):
96
- if pocket_fn is None: continue
97
- try:
98
- # data_prefix = '/data/work/jiaqi/binding_affinity'
99
- data_prefix = self.raw_path
100
- pocket_dict = PDBProtein(os.path.join(data_prefix, pocket_fn)).to_dict_atom()
101
- ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
102
- data = ProteinLigandData.from_protein_ligand_dicts(
103
- protein_dict=torchify_dict(pocket_dict),
104
- ligand_dict=torchify_dict(ligand_dict),
105
- )
106
- data.protein_filename = pocket_fn
107
- data.ligand_filename = ligand_fn
108
- data = data.to_dict() # avoid torch_geometric version issue
109
- txn.put(
110
- key=str(i).encode(),
111
- value=pickle.dumps(data)
112
- )
113
- except:
114
- num_skipped += 1
115
- print('Skipping (%d) %s' % (num_skipped, ligand_fn, ))
116
- continue
117
- db.close()
118
 
119
- def __len__(self):
120
- return len(self.reader)
121
-
122
- def __getitem__(self, idx):
123
- data = self.reader[idx]
124
- if self.transform is not None:
125
- data = self.transform(data)
126
- return data
127
-
128
-
129
- class PocketLigandGeneratedPairDataset(Dataset):
130
-
131
- def __init__(self, raw_path, transform=None, version='4-decompdiff'):
132
- super().__init__()
133
- self.raw_path = raw_path.rstrip('/')
134
- self.generated_path = os.path.join('/sharefs/share/sbdd_data/all_results', f'{version}_docked_pose_checked.pt')
135
- self.processed_path = os.path.join(os.path.dirname(self.raw_path),
136
- os.path.basename(self.raw_path) + f'_processed_{version}.lmdb')
137
- self.transform = transform
138
- self.reader = DBReader(self.processed_path)
139
-
140
- if not os.path.exists(self.processed_path):
141
- print(f'{self.processed_path} does not exist, begin processing data')
142
- self._process()
143
 
144
- def _process(self):
145
- db = lmdb.open(
146
- self.processed_path,
147
- map_size=10*(1024*1024*1024), # 10GB
148
- create=True,
149
- subdir=False,
150
- readonly=False, # Writable
151
- )
152
- with open(self.generated_path, 'rb') as f:
153
- results = torch.load(f)
154
-
155
- num_skipped = 0
156
- with db.begin(write=True, buffers=True) as txn:
157
- idx = -1
158
- for i, res in tqdm(enumerate(results), total=len(results)):
159
- if isinstance(res, dict):
160
- res = [res]
161
- for r in res:
162
- idx += 1
163
- mol = r["mol"]
164
- ligand_fn = r["ligand_filename"]
165
- pocket_fn = os.path.join(
166
- os.path.dirname(ligand_fn),
167
- os.path.basename(ligand_fn)[:-4] + '_pocket10.pdb'
168
- )
169
-
170
- if pocket_fn is None: continue
171
- try:
172
- data_prefix = self.raw_path
173
- pocket_dict = PDBProtein(os.path.join(data_prefix, pocket_fn)).to_dict_atom()
174
- ligand_dict = parse_sdf_file(mol)
175
- # ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
176
- data = ProteinLigandData.from_protein_ligand_dicts(
177
- protein_dict=torchify_dict(pocket_dict),
178
- ligand_dict=torchify_dict(ligand_dict),
179
- )
180
- data.protein_filename = pocket_fn
181
- data.ligand_filename = ligand_fn
182
- data = data.to_dict() # avoid torch_geometric version issue
183
- txn.put(
184
- key=str(idx).encode(),
185
- value=pickle.dumps(data)
186
- )
187
- except Exception as e:
188
- num_skipped += 1
189
- print('Skipping (%d) %s' % (num_skipped, ligand_fn, ), e)
190
- continue
191
- db.close()
192
 
193
- def __len__(self):
194
- return len(self.reader)
195
-
196
- def __getitem__(self, idx):
197
- data = self.reader[idx]
198
- if self.transform is not None:
199
- data = self.transform(data)
200
- return data
201
-
202
-
203
- class PocketLigandPairDatasetFromComplex(Dataset):
204
- def __init__(self, raw_path, transform=None, version='final', radius=10.0):
205
- super().__init__()
206
- self.raw_path = raw_path.rstrip('/')
207
- self.index_path = os.path.join(self.raw_path, 'index.pkl')
208
- base_name = os.path.basename(self.raw_path)
209
- if 'pocket' in base_name:
210
- self.processed_path = os.path.join(os.path.dirname(self.raw_path),
211
- os.path.basename(self.raw_path) + f'_processed_{version}.lmdb')
212
- else:
213
- self.processed_path = os.path.join(os.path.dirname(self.raw_path),
214
- os.path.basename(self.raw_path) + f'_pocket{radius}_processed_{version}.lmdb')
215
- self.transform = transform
216
- self.reader = DBReader(self.processed_path)
217
-
218
- self.radius = radius
219
-
220
- if not os.path.exists(self.processed_path):
221
- print(f'{self.processed_path} does not exist, begin processing data')
222
- self._process()
223
-
224
- def _process(self):
225
- db = lmdb.open(
226
- self.processed_path,
227
- map_size=10*(1024*1024*1024), # 50GB
228
- create=True,
229
- subdir=False,
230
- readonly=False, # Writable
231
- max_readers=256,
232
- )
233
- with open(self.index_path, 'rb') as f:
234
- index = pickle.load(f)
235
-
236
- print('Processing data...', 'index', self.index_path, index[0])
237
-
238
- num_skipped = 0
239
- with db.begin(write=True, buffers=True) as txn:
240
- for i, (pocket_fn, ligand_fn, *_) in enumerate(tqdm(index)):
241
- if pocket_fn is None: continue
242
- try:
243
- data_prefix = self.raw_path
244
- # clip pocket
245
- ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
246
- protein = PDBProtein(os.path.join(data_prefix, pocket_fn))
247
- selected = protein.query_residues_ligand(ligand_dict, self.radius)
248
- pdb_block_pocket = protein.residues_to_pdb_block(selected)
249
- pocket_dict = PDBProtein(pdb_block_pocket).to_dict_atom()
250
-
251
- # pocket_dict = PDBProtein(os.path.join(data_prefix, pocket_fn)).to_dict_atom()
252
- # ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
253
 
254
- data = ProteinLigandData.from_protein_ligand_dicts(
255
- protein_dict=torchify_dict(pocket_dict),
256
- ligand_dict=torchify_dict(ligand_dict),
257
- )
258
- data.protein_filename = pocket_fn
259
- data.ligand_filename = ligand_fn
260
- data = data.to_dict() # avoid torch_geometric version issue
261
- txn.put(
262
- key=str(i).encode(),
263
- value=pickle.dumps(data)
264
- )
265
- except Exception as e:
266
- num_skipped += 1
267
- print('Skipping (%d) %s' % (num_skipped, ligand_fn, ), e)
268
- with open('skipped.txt', 'a') as f:
269
- f.write('Skip %s due to %s\n' % (ligand_fn, e))
270
- continue
271
- db.close()
272
-
273
- def __len__(self):
274
- return len(self.reader)
275
-
276
- def __getitem__(self, idx):
277
- data = self.reader[idx]
278
- if self.transform is not None:
279
- data = self.transform(data)
280
- return data
281
 
282
 
283
- class PocketLigandPairDatasetFeaturized(Dataset):
284
- def __init__(self, raw_path, ligand_atom_mode, version='simple'):
285
- """
286
- in simple version, only these features are saved for better IO:
287
- protein_pos, protein_atom_feature, protein_element,
288
- ligand_pos, ligand_atom_feature_full, ligand_element
289
- """
290
- self.raw_path = raw_path
291
- self.ligand_atom_mode = ligand_atom_mode
292
- self.version = version
293
-
294
- if version == 'simple':
295
- self.features_to_save = [
296
- 'protein_pos', 'protein_atom_feature', 'protein_element',
297
- 'ligand_pos', 'ligand_atom_feature_full', 'ligand_element',
298
- 'protein_filename', 'ligand_filename',
299
- ]
300
- else:
301
- raise NotImplementedError
302
-
303
- self.transformed_path = os.path.join(
304
- os.path.dirname(self.raw_path), os.path.basename(self.raw_path) +
305
- f'_{ligand_atom_mode}_transformed_{version}.pt'
306
- )
307
- if not os.path.exists(self.transformed_path):
308
- print(f'{self.transformed_path} does not exist, begin transforming data')
309
- self._transform()
310
- else:
311
- print(f'reading data from {self.transformed_path}...')
312
- tic = time()
313
- tr_data = torch.load(self.transformed_path)
314
- toc = time()
315
- print(f'{toc - tic} elapsed')
316
- self.train_data, self.test_data = tr_data['train'], tr_data['test']
317
- self.protein_atom_feature_dim = tr_data['protein_atom_feature_dim']
318
- self.ligand_atom_feature_dim = tr_data['ligand_atom_feature_dim']
319
 
320
- def _transform(self):
321
- raw_dataset = PocketLigandPairDataset(self.raw_path, None, 'final')
322
-
323
- split_path = os.path.join(
324
- os.path.dirname(self.raw_path), 'crossdocked_pocket10_pose_split.pt',
325
- )
326
- split = torch.load(split_path)
327
- train_ids, test_ids = split['train'], split['test']
328
- print(f'train_size: {len(train_ids)}, test_size: {len(test_ids)}')
329
-
330
- protein_featurizer = trans.FeaturizeProteinAtom()
331
- ligand_featurizer = trans.FeaturizeLigandAtom(self.ligand_atom_mode)
332
- transform_list = [
333
- protein_featurizer,
334
- ligand_featurizer,
335
- # trans.FeaturizeLigandBond(),
336
- ]
337
- transform = Compose(transform_list)
338
- self.protein_atom_feature_dim = protein_featurizer.feature_dim
339
- self.ligand_atom_feature_dim = ligand_featurizer.feature_dim
340
-
341
- def _transform_subset(ids):
342
- data_list = []
343
-
344
- for idx in tqdm(ids):
345
- data = raw_dataset[idx]
346
- data = transform(data)
347
- tr_data = {}
348
- for k in self.features_to_save:
349
- tr_data[k] = getattr(data, k)
350
- tr_data['id'] = idx
351
- tr_data = ProteinLigandData(**tr_data)
352
- data_list.append(tr_data)
353
- return data_list
354
-
355
- self.train_data = _transform_subset(train_ids)
356
- print(f'train_size: {len(self.train_data)}, {sys.getsizeof(self.train_data)}')
357
- self.test_data = _transform_subset(test_ids)
358
- print(f'test_size: {len(self.test_data)}, {sys.getsizeof(self.test_data)}')
359
- torch.save({
360
- 'train': self.train_data, 'test': self.test_data,
361
- 'protein_atom_feature_dim': self.protein_atom_feature_dim,
362
- 'ligand_atom_feature_dim': self.ligand_atom_feature_dim,
363
- }, self.transformed_path)
364
-
365
-
366
- if __name__ == '__main__':
367
- # original dataset
368
- dataset = PocketLigandPairDataset('./data/crossdocked_v1.1_rmsd1.0_pocket10')
369
- print(len(dataset), dataset[0])
370
-
371
- ############################################################
372
-
373
- # test DecompDiffDataset
374
- # dataset = PocketLigandGeneratedPairDataset('/sharefs/share/sbdd_data/crossdocked_pocket10')
375
- # print(len(dataset), dataset[0])
376
-
377
- ############################################################
378
-
379
- # test featurized dataset (GPU accelerated)
380
- # path = '/sharefs/share/sbdd_data/crossdocked_v1.1_rmsd1.0_pocket10'
381
- # ligand_atom_mode = 'add_aromatic'
382
-
383
- # dataset = PocketLigandPairDatasetFeaturized(path, ligand_atom_mode)
384
- # train_data, test_data = dataset.train_data, dataset.test_data
385
- # print(f'train_size: {len(train_data)}, {sys.getsizeof(train_data)}')
386
- # print(f'test_size: {len(test_data)}, {sys.getsizeof(test_data)}')
387
- # print(test_data[0], sys.getsizeof(test_data[0]))
388
-
389
- ############################################################
390
-
391
- # test featurization
392
- # find all atom types
393
- # atom_types = {(1, False): 0}
394
-
395
- # dataset = PocketLigandPairDataset(path, transform)
396
- # for i in tqdm(range(len(dataset))):
397
- # data = dataset[i]
398
- # element_list = data.ligand_element
399
- # hybridization_list = data.ligand_hybridization
400
- # aromatic_list = [v[trans.AROMATIC_FEAT_MAP_IDX] for v in data.ligand_atom_feature]
401
-
402
- # types = [(e, a) for e, h, a in zip(element_list, hybridization_list, aromatic_list)]
403
- # for t in types:
404
- # t = (t[0].item(), bool(t[1].item()))
405
- # if t not in atom_types:
406
- # atom_types[t] = 0
407
- # atom_types[t] += 1
408
-
409
- # idx = 0
410
- # for k in sorted(atom_types.keys()):
411
- # print(f'{k}: {idx}, # {atom_types[k]}')
412
- # idx += 1
413
-
414
- ############################################################
415
 
416
- # count atom types
417
- # type_counter, aromatic_counter, full_counter = {}, {}, {}
418
- # for i, data in enumerate(tqdm(dataset)):
419
- # element_list = data.ligand_element
420
- # hybridization_list = data.ligand_hybridization
421
- # aromatic_list = [v[trans.AROMATIC_FEAT_MAP_IDX] for v in data.ligand_atom_feature]
422
- # flag = False
423
-
424
- # for atom_type in element_list:
425
- # atom_type = int(atom_type.item())
426
- # if atom_type not in type_counter:
427
- # type_counter[atom_type] = 0
428
- # type_counter[atom_type] += 1
429
-
430
- # for e, a in zip(element_list, aromatic_list):
431
- # e = int(e.item())
432
- # a = bool(a.item())
433
- # key = (e, a)
434
- # if key not in aromatic_counter:
435
- # aromatic_counter[key] = 0
436
- # aromatic_counter[key] += 1
437
-
438
- # if key not in trans.MAP_ATOM_TYPE_AROMATIC_TO_INDEX:
439
- # flag = True
440
-
441
- # for e, h, a in zip(element_list, hybridization_list, aromatic_list):
442
- # e = int(e.item())
443
- # a = bool(a.item())
444
- # key = (e, h, a)
445
- # if key not in full_counter:
446
- # full_counter[key] = 0
447
- # full_counter[key] += 1
448
 
449
- # print('type_counter', type_counter)
450
- # print('aromatic_counter', aromatic_counter)
451
- # print('full_counter', full_counter)
452
 
 
1
+ # import os
2
+ # import pickle
3
+ # import lmdb
4
+ # from torch.utils.data import Dataset
5
+ # from tqdm.auto import tqdm
6
+ # import sys
7
+ # from time import time
8
+
9
+ # import torch
10
+ # from torch_geometric.transforms import Compose
11
+
12
+ # from core.datasets.utils import PDBProtein, parse_sdf_file, ATOM_FAMILIES_ID
13
+ # from core.datasets.pl_data import ProteinLigandData, torchify_dict
14
+
15
+ # import core.utils.transforms as trans
16
+
17
+
18
+ # class DBReader:
19
+ # def __init__(self, path) -> None:
20
+ # self.path = path
21
+ # self.db = None
22
+ # self.keys = None
23
+
24
+ # def _connect_db(self):
25
+ # """
26
+ # Establish read-only database connection
27
+ # """
28
+ # assert self.db is None, 'A connection has already been opened.'
29
+ # self.db = lmdb.open(
30
+ # self.path,
31
+ # map_size=10*(1024*1024*1024), # 10GB
32
+ # create=False,
33
+ # subdir=False,
34
+ # readonly=True,
35
+ # lock=False,
36
+ # readahead=False,
37
+ # meminit=False,
38
+ # )
39
+ # with self.db.begin() as txn:
40
+ # self.keys = list(txn.cursor().iternext(values=False))
41
+
42
+ # def _close_db(self):
43
+ # self.db.close()
44
+ # self.db = None
45
+ # self.keys = None
46
+
47
+ # def __del__(self):
48
+ # if self.db is not None:
49
+ # self._close_db()
50
+
51
+ # def __len__(self):
52
+ # if self.db is None:
53
+ # self._connect_db()
54
+ # return len(self.keys)
55
+
56
+ # def __getitem__(self, idx):
57
+ # if self.db is None:
58
+ # self._connect_db()
59
+ # key = self.keys[idx]
60
+ # data = pickle.loads(self.db.begin().get(key))
61
+ # data = ProteinLigandData(**data)
62
+ # data.id = idx
63
+ # assert data.protein_pos.size(0) > 0
64
+ # return data
65
 
66
 
67
+ # class PocketLigandPairDataset(Dataset):
68
 
69
+ # def __init__(self, raw_path, transform=None, version='final'):
70
+ # super().__init__()
71
+ # self.raw_path = raw_path.rstrip('/')
72
+ # self.index_path = os.path.join(self.raw_path, 'index.pkl')
73
+ # self.processed_path = os.path.join(os.path.dirname(self.raw_path),
74
+ # os.path.basename(self.raw_path) + f'_processed_{version}.lmdb')
75
+ # self.transform = transform
76
+ # self.reader = DBReader(self.processed_path)
77
 
78
+ # if not os.path.exists(self.processed_path):
79
+ # print(f'{self.processed_path} does not exist, begin processing data')
80
+ # self._process()
81
 
82
+ # def _process(self):
83
+ # db = lmdb.open(
84
+ # self.processed_path,
85
+ # map_size=10*(1024*1024*1024), # 10GB
86
+ # create=True,
87
+ # subdir=False,
88
+ # readonly=False, # Writable
89
+ # )
90
+ # with open(self.index_path, 'rb') as f:
91
+ # index = pickle.load(f)
92
+
93
+ # num_skipped = 0
94
+ # with db.begin(write=True, buffers=True) as txn:
95
+ # for i, (pocket_fn, ligand_fn, *_) in enumerate(tqdm(index)):
96
+ # if pocket_fn is None: continue
97
+ # try:
98
+ # # data_prefix = '/data/work/jiaqi/binding_affinity'
99
+ # data_prefix = self.raw_path
100
+ # pocket_dict = PDBProtein(os.path.join(data_prefix, pocket_fn)).to_dict_atom()
101
+ # ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
102
+ # data = ProteinLigandData.from_protein_ligand_dicts(
103
+ # protein_dict=torchify_dict(pocket_dict),
104
+ # ligand_dict=torchify_dict(ligand_dict),
105
+ # )
106
+ # data.protein_filename = pocket_fn
107
+ # data.ligand_filename = ligand_fn
108
+ # data = data.to_dict() # avoid torch_geometric version issue
109
+ # txn.put(
110
+ # key=str(i).encode(),
111
+ # value=pickle.dumps(data)
112
+ # )
113
+ # except:
114
+ # num_skipped += 1
115
+ # print('Skipping (%d) %s' % (num_skipped, ligand_fn, ))
116
+ # continue
117
+ # db.close()
118
 
119
+ # def __len__(self):
120
+ # return len(self.reader)
121
+
122
+ # def __getitem__(self, idx):
123
+ # data = self.reader[idx]
124
+ # if self.transform is not None:
125
+ # data = self.transform(data)
126
+ # return data
127
+
128
+
129
+ # class PocketLigandGeneratedPairDataset(Dataset):
130
+
131
+ # def __init__(self, raw_path, transform=None, version='4-decompdiff'):
132
+ # super().__init__()
133
+ # self.raw_path = raw_path.rstrip('/')
134
+ # self.generated_path = os.path.join('/sharefs/share/sbdd_data/all_results', f'{version}_docked_pose_checked.pt')
135
+ # self.processed_path = os.path.join(os.path.dirname(self.raw_path),
136
+ # os.path.basename(self.raw_path) + f'_processed_{version}.lmdb')
137
+ # self.transform = transform
138
+ # self.reader = DBReader(self.processed_path)
139
+
140
+ # if not os.path.exists(self.processed_path):
141
+ # print(f'{self.processed_path} does not exist, begin processing data')
142
+ # self._process()
143
 
144
+ # def _process(self):
145
+ # db = lmdb.open(
146
+ # self.processed_path,
147
+ # map_size=10*(1024*1024*1024), # 10GB
148
+ # create=True,
149
+ # subdir=False,
150
+ # readonly=False, # Writable
151
+ # )
152
+ # with open(self.generated_path, 'rb') as f:
153
+ # results = torch.load(f)
154
+
155
+ # num_skipped = 0
156
+ # with db.begin(write=True, buffers=True) as txn:
157
+ # idx = -1
158
+ # for i, res in tqdm(enumerate(results), total=len(results)):
159
+ # if isinstance(res, dict):
160
+ # res = [res]
161
+ # for r in res:
162
+ # idx += 1
163
+ # mol = r["mol"]
164
+ # ligand_fn = r["ligand_filename"]
165
+ # pocket_fn = os.path.join(
166
+ # os.path.dirname(ligand_fn),
167
+ # os.path.basename(ligand_fn)[:-4] + '_pocket10.pdb'
168
+ # )
169
+
170
+ # if pocket_fn is None: continue
171
+ # try:
172
+ # data_prefix = self.raw_path
173
+ # pocket_dict = PDBProtein(os.path.join(data_prefix, pocket_fn)).to_dict_atom()
174
+ # ligand_dict = parse_sdf_file(mol)
175
+ # # ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
176
+ # data = ProteinLigandData.from_protein_ligand_dicts(
177
+ # protein_dict=torchify_dict(pocket_dict),
178
+ # ligand_dict=torchify_dict(ligand_dict),
179
+ # )
180
+ # data.protein_filename = pocket_fn
181
+ # data.ligand_filename = ligand_fn
182
+ # data = data.to_dict() # avoid torch_geometric version issue
183
+ # txn.put(
184
+ # key=str(idx).encode(),
185
+ # value=pickle.dumps(data)
186
+ # )
187
+ # except Exception as e:
188
+ # num_skipped += 1
189
+ # print('Skipping (%d) %s' % (num_skipped, ligand_fn, ), e)
190
+ # continue
191
+ # db.close()
192
 
193
+ # def __len__(self):
194
+ # return len(self.reader)
195
+
196
+ # def __getitem__(self, idx):
197
+ # data = self.reader[idx]
198
+ # if self.transform is not None:
199
+ # data = self.transform(data)
200
+ # return data
201
+
202
+
203
+ # class PocketLigandPairDatasetFromComplex(Dataset):
204
+ # def __init__(self, raw_path, transform=None, version='final', radius=10.0):
205
+ # super().__init__()
206
+ # self.raw_path = raw_path.rstrip('/')
207
+ # self.index_path = os.path.join(self.raw_path, 'index.pkl')
208
+ # base_name = os.path.basename(self.raw_path)
209
+ # if 'pocket' in base_name:
210
+ # self.processed_path = os.path.join(os.path.dirname(self.raw_path),
211
+ # os.path.basename(self.raw_path) + f'_processed_{version}.lmdb')
212
+ # else:
213
+ # self.processed_path = os.path.join(os.path.dirname(self.raw_path),
214
+ # os.path.basename(self.raw_path) + f'_pocket{radius}_processed_{version}.lmdb')
215
+ # self.transform = transform
216
+ # self.reader = DBReader(self.processed_path)
217
+
218
+ # self.radius = radius
219
+
220
+ # if not os.path.exists(self.processed_path):
221
+ # print(f'{self.processed_path} does not exist, begin processing data')
222
+ # self._process()
223
+
224
+ # def _process(self):
225
+ # db = lmdb.open(
226
+ # self.processed_path,
227
+ # map_size=10*(1024*1024*1024), # 50GB
228
+ # create=True,
229
+ # subdir=False,
230
+ # readonly=False, # Writable
231
+ # max_readers=256,
232
+ # )
233
+ # with open(self.index_path, 'rb') as f:
234
+ # index = pickle.load(f)
235
+
236
+ # print('Processing data...', 'index', self.index_path, index[0])
237
+
238
+ # num_skipped = 0
239
+ # with db.begin(write=True, buffers=True) as txn:
240
+ # for i, (pocket_fn, ligand_fn, *_) in enumerate(tqdm(index)):
241
+ # if pocket_fn is None: continue
242
+ # try:
243
+ # data_prefix = self.raw_path
244
+ # # clip pocket
245
+ # ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
246
+ # protein = PDBProtein(os.path.join(data_prefix, pocket_fn))
247
+ # selected = protein.query_residues_ligand(ligand_dict, self.radius)
248
+ # pdb_block_pocket = protein.residues_to_pdb_block(selected)
249
+ # pocket_dict = PDBProtein(pdb_block_pocket).to_dict_atom()
250
+
251
+ # # pocket_dict = PDBProtein(os.path.join(data_prefix, pocket_fn)).to_dict_atom()
252
+ # # ligand_dict = parse_sdf_file(os.path.join(data_prefix, ligand_fn))
253
 
254
+ # data = ProteinLigandData.from_protein_ligand_dicts(
255
+ # protein_dict=torchify_dict(pocket_dict),
256
+ # ligand_dict=torchify_dict(ligand_dict),
257
+ # )
258
+ # data.protein_filename = pocket_fn
259
+ # data.ligand_filename = ligand_fn
260
+ # data = data.to_dict() # avoid torch_geometric version issue
261
+ # txn.put(
262
+ # key=str(i).encode(),
263
+ # value=pickle.dumps(data)
264
+ # )
265
+ # except Exception as e:
266
+ # num_skipped += 1
267
+ # print('Skipping (%d) %s' % (num_skipped, ligand_fn, ), e)
268
+ # with open('skipped.txt', 'a') as f:
269
+ # f.write('Skip %s due to %s\n' % (ligand_fn, e))
270
+ # continue
271
+ # db.close()
272
+
273
+ # def __len__(self):
274
+ # return len(self.reader)
275
+
276
+ # def __getitem__(self, idx):
277
+ # data = self.reader[idx]
278
+ # if self.transform is not None:
279
+ # data = self.transform(data)
280
+ # return data
281
 
282
 
283
+ # class PocketLigandPairDatasetFeaturized(Dataset):
284
+ # def __init__(self, raw_path, ligand_atom_mode, version='simple'):
285
+ # """
286
+ # in simple version, only these features are saved for better IO:
287
+ # protein_pos, protein_atom_feature, protein_element,
288
+ # ligand_pos, ligand_atom_feature_full, ligand_element
289
+ # """
290
+ # self.raw_path = raw_path
291
+ # self.ligand_atom_mode = ligand_atom_mode
292
+ # self.version = version
293
+
294
+ # if version == 'simple':
295
+ # self.features_to_save = [
296
+ # 'protein_pos', 'protein_atom_feature', 'protein_element',
297
+ # 'ligand_pos', 'ligand_atom_feature_full', 'ligand_element',
298
+ # 'protein_filename', 'ligand_filename',
299
+ # ]
300
+ # else:
301
+ # raise NotImplementedError
302
+
303
+ # self.transformed_path = os.path.join(
304
+ # os.path.dirname(self.raw_path), os.path.basename(self.raw_path) +
305
+ # f'_{ligand_atom_mode}_transformed_{version}.pt'
306
+ # )
307
+ # if not os.path.exists(self.transformed_path):
308
+ # print(f'{self.transformed_path} does not exist, begin transforming data')
309
+ # self._transform()
310
+ # else:
311
+ # print(f'reading data from {self.transformed_path}...')
312
+ # tic = time()
313
+ # tr_data = torch.load(self.transformed_path)
314
+ # toc = time()
315
+ # print(f'{toc - tic} elapsed')
316
+ # self.train_data, self.test_data = tr_data['train'], tr_data['test']
317
+ # self.protein_atom_feature_dim = tr_data['protein_atom_feature_dim']
318
+ # self.ligand_atom_feature_dim = tr_data['ligand_atom_feature_dim']
319
 
320
+ # def _transform(self):
321
+ # raw_dataset = PocketLigandPairDataset(self.raw_path, None, 'final')
322
+
323
+ # split_path = os.path.join(
324
+ # os.path.dirname(self.raw_path), 'crossdocked_pocket10_pose_split.pt',
325
+ # )
326
+ # split = torch.load(split_path)
327
+ # train_ids, test_ids = split['train'], split['test']
328
+ # print(f'train_size: {len(train_ids)}, test_size: {len(test_ids)}')
329
+
330
+ # protein_featurizer = trans.FeaturizeProteinAtom()
331
+ # ligand_featurizer = trans.FeaturizeLigandAtom(self.ligand_atom_mode)
332
+ # transform_list = [
333
+ # protein_featurizer,
334
+ # ligand_featurizer,
335
+ # # trans.FeaturizeLigandBond(),
336
+ # ]
337
+ # transform = Compose(transform_list)
338
+ # self.protein_atom_feature_dim = protein_featurizer.feature_dim
339
+ # self.ligand_atom_feature_dim = ligand_featurizer.feature_dim
340
+
341
+ # def _transform_subset(ids):
342
+ # data_list = []
343
+
344
+ # for idx in tqdm(ids):
345
+ # data = raw_dataset[idx]
346
+ # data = transform(data)
347
+ # tr_data = {}
348
+ # for k in self.features_to_save:
349
+ # tr_data[k] = getattr(data, k)
350
+ # tr_data['id'] = idx
351
+ # tr_data = ProteinLigandData(**tr_data)
352
+ # data_list.append(tr_data)
353
+ # return data_list
354
+
355
+ # self.train_data = _transform_subset(train_ids)
356
+ # print(f'train_size: {len(self.train_data)}, {sys.getsizeof(self.train_data)}')
357
+ # self.test_data = _transform_subset(test_ids)
358
+ # print(f'test_size: {len(self.test_data)}, {sys.getsizeof(self.test_data)}')
359
+ # torch.save({
360
+ # 'train': self.train_data, 'test': self.test_data,
361
+ # 'protein_atom_feature_dim': self.protein_atom_feature_dim,
362
+ # 'ligand_atom_feature_dim': self.ligand_atom_feature_dim,
363
+ # }, self.transformed_path)
364
+
365
+
366
+ # if __name__ == '__main__':
367
+ # # original dataset
368
+ # dataset = PocketLigandPairDataset('./data/crossdocked_v1.1_rmsd1.0_pocket10')
369
+ # print(len(dataset), dataset[0])
370
+
371
+ # ############################################################
372
+
373
+ # # test DecompDiffDataset
374
+ # # dataset = PocketLigandGeneratedPairDataset('/sharefs/share/sbdd_data/crossdocked_pocket10')
375
+ # # print(len(dataset), dataset[0])
376
+
377
+ # ############################################################
378
+
379
+ # # test featurized dataset (GPU accelerated)
380
+ # # path = '/sharefs/share/sbdd_data/crossdocked_v1.1_rmsd1.0_pocket10'
381
+ # # ligand_atom_mode = 'add_aromatic'
382
+
383
+ # # dataset = PocketLigandPairDatasetFeaturized(path, ligand_atom_mode)
384
+ # # train_data, test_data = dataset.train_data, dataset.test_data
385
+ # # print(f'train_size: {len(train_data)}, {sys.getsizeof(train_data)}')
386
+ # # print(f'test_size: {len(test_data)}, {sys.getsizeof(test_data)}')
387
+ # # print(test_data[0], sys.getsizeof(test_data[0]))
388
+
389
+ # ############################################################
390
+
391
+ # # test featurization
392
+ # # find all atom types
393
+ # # atom_types = {(1, False): 0}
394
+
395
+ # # dataset = PocketLigandPairDataset(path, transform)
396
+ # # for i in tqdm(range(len(dataset))):
397
+ # # data = dataset[i]
398
+ # # element_list = data.ligand_element
399
+ # # hybridization_list = data.ligand_hybridization
400
+ # # aromatic_list = [v[trans.AROMATIC_FEAT_MAP_IDX] for v in data.ligand_atom_feature]
401
+
402
+ # # types = [(e, a) for e, h, a in zip(element_list, hybridization_list, aromatic_list)]
403
+ # # for t in types:
404
+ # # t = (t[0].item(), bool(t[1].item()))
405
+ # # if t not in atom_types:
406
+ # # atom_types[t] = 0
407
+ # # atom_types[t] += 1
408
+
409
+ # # idx = 0
410
+ # # for k in sorted(atom_types.keys()):
411
+ # # print(f'{k}: {idx}, # {atom_types[k]}')
412
+ # # idx += 1
413
+
414
+ # ############################################################
415
 
416
+ # # count atom types
417
+ # # type_counter, aromatic_counter, full_counter = {}, {}, {}
418
+ # # for i, data in enumerate(tqdm(dataset)):
419
+ # # element_list = data.ligand_element
420
+ # # hybridization_list = data.ligand_hybridization
421
+ # # aromatic_list = [v[trans.AROMATIC_FEAT_MAP_IDX] for v in data.ligand_atom_feature]
422
+ # # flag = False
423
+
424
+ # # for atom_type in element_list:
425
+ # # atom_type = int(atom_type.item())
426
+ # # if atom_type not in type_counter:
427
+ # # type_counter[atom_type] = 0
428
+ # # type_counter[atom_type] += 1
429
+
430
+ # # for e, a in zip(element_list, aromatic_list):
431
+ # # e = int(e.item())
432
+ # # a = bool(a.item())
433
+ # # key = (e, a)
434
+ # # if key not in aromatic_counter:
435
+ # # aromatic_counter[key] = 0
436
+ # # aromatic_counter[key] += 1
437
+
438
+ # # if key not in trans.MAP_ATOM_TYPE_AROMATIC_TO_INDEX:
439
+ # # flag = True
440
+
441
+ # # for e, h, a in zip(element_list, hybridization_list, aromatic_list):
442
+ # # e = int(e.item())
443
+ # # a = bool(a.item())
444
+ # # key = (e, h, a)
445
+ # # if key not in full_counter:
446
+ # # full_counter[key] = 0
447
+ # # full_counter[key] += 1
448
 
449
+ # # print('type_counter', type_counter)
450
+ # # print('aromatic_counter', aromatic_counter)
451
+ # # print('full_counter', full_counter)
452
 
core/evaluation/docking_qvina.py CHANGED
@@ -1,193 +1,193 @@
1
- import os
2
- import subprocess
3
- import random
4
- import string
5
- from easydict import EasyDict
6
- from rdkit import Chem
7
- from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
8
-
9
-
10
- def get_random_id(length=30):
11
- letters = string.ascii_lowercase
12
- return ''.join(random.choice(letters) for i in range(length))
13
-
14
-
15
- def load_pdb(path):
16
- with open(path, 'r') as f:
17
- return f.read()
18
-
19
-
20
- def parse_qvina_outputs(docked_sdf_path):
21
- suppl = Chem.SDMolSupplier(docked_sdf_path)
22
- results = []
23
- for i, mol in enumerate(suppl):
24
- if mol is None:
25
- continue
26
- line = mol.GetProp('REMARK').splitlines()[0].split()[2:]
27
- results.append(EasyDict({
28
- 'rdmol': mol,
29
- 'mode_id': i,
30
- 'affinity': float(line[0]),
31
- 'rmsd_lb': float(line[1]),
32
- 'rmsd_ub': float(line[2]),
33
- }))
34
-
35
- return results
36
-
37
-
38
- class BaseDockingTask(object):
39
-
40
- def __init__(self, pdb_block, ligand_rdmol):
41
- super().__init__()
42
- self.pdb_block = pdb_block
43
- self.ligand_rdmol = ligand_rdmol
44
-
45
- def run(self):
46
- raise NotImplementedError()
47
-
48
- def get_results(self):
49
- raise NotImplementedError()
50
-
51
-
52
- class QVinaDockingTask(BaseDockingTask):
53
-
54
- @classmethod
55
- def from_generated_mol(cls, ligand_rdmol, ligand_filename, protein_root='./data/crossdocked', **kwargs):
56
- # load original pdb
57
- protein_fn = os.path.join(
58
- os.path.dirname(ligand_filename),
59
- os.path.basename(ligand_filename)[:10] + '.pdb' # PDBId_Chain_rec.pdb
60
- )
61
- protein_path = os.path.join(protein_root, protein_fn)
62
- with open(protein_path, 'r') as f:
63
- pdb_block = f.read()
64
- return cls(pdb_block, ligand_rdmol, **kwargs)
65
-
66
- @classmethod
67
- def from_original_data(cls, data, ligand_root='./data/crossdocked_pocket10', protein_root='./data/crossdocked',
68
- **kwargs):
69
- protein_fn = os.path.join(
70
- os.path.dirname(data.ligand_filename),
71
- os.path.basename(data.ligand_filename)[:10] + '.pdb'
72
- )
73
- protein_path = os.path.join(protein_root, protein_fn)
74
- with open(protein_path, 'r') as f:
75
- pdb_block = f.read()
76
-
77
- ligand_path = os.path.join(ligand_root, data.ligand_filename)
78
- ligand_rdmol = next(iter(Chem.SDMolSupplier(ligand_path)))
79
- return cls(pdb_block, ligand_rdmol, **kwargs)
80
-
81
- def __init__(self, pdb_block, ligand_rdmol, conda_env='adt', tmp_dir='./tmp', use_uff=True, center=None,
82
- size_factor=1., pos=None):
83
- super().__init__(pdb_block, ligand_rdmol)
84
- self.conda_env = conda_env
85
- self.tmp_dir = os.path.realpath(tmp_dir)
86
- os.makedirs(tmp_dir, exist_ok=True)
87
-
88
- self.task_id = get_random_id()
89
- self.receptor_id = self.task_id + '_receptor'
90
- self.ligand_id = self.task_id + '_ligand'
91
-
92
- self.receptor_path = os.path.join(self.tmp_dir, self.receptor_id + '.pdb')
93
- self.ligand_path = os.path.join(self.tmp_dir, self.ligand_id + '.sdf')
94
-
95
- with open(self.receptor_path, 'w') as f:
96
- f.write(pdb_block)
97
-
98
- ligand_rdmol = Chem.AddHs(ligand_rdmol, addCoords=True)
99
- if use_uff:
100
- UFFOptimizeMolecule(ligand_rdmol)
101
- # print('after uff smiles: ', Chem.MolToSmiles(ligand_rdmol))
102
- sdf_writer = Chem.SDWriter(self.ligand_path)
103
- sdf_writer.write(ligand_rdmol)
104
- sdf_writer.close()
105
- self.ligand_rdmol = ligand_rdmol
106
-
107
- pos = ligand_rdmol.GetConformer(0).GetPositions()
108
- if center is None:
109
- self.center = (pos.max(0) + pos.min(0)) / 2
110
- else:
111
- self.center = center
112
-
113
- if size_factor is None:
114
- self.size_x, self.size_y, self.size_z = 20, 20, 20
115
- else:
116
- self.size_x, self.size_y, self.size_z = (pos.max(0) - pos.min(0)) * size_factor
117
-
118
- self.proc = None
119
- self.results = None
120
- self.output = None
121
- self.error_output = None
122
- self.docked_sdf_path = None
123
-
124
- def run(self, exhaustiveness=16):
125
- commands = """
126
- eval "$(conda shell.bash hook)"
127
- conda activate {env}
128
- cd {tmp}
129
- # Prepare receptor (PDB->PDBQT)
130
- prepare_receptor4.py -r {receptor_id}.pdb
131
- # Prepare ligand
132
- obabel {ligand_id}.sdf -O{ligand_id}.pdbqt
133
- qvina2 \
134
- --receptor {receptor_id}.pdbqt \
135
- --ligand {ligand_id}.pdbqt \
136
- --center_x {center_x:.4f} \
137
- --center_y {center_y:.4f} \
138
- --center_z {center_z:.4f} \
139
- --size_x {size_x} --size_y {size_y} --size_z {size_z} \
140
- --exhaustiveness {exhaust}
141
- obabel {ligand_id}_out.pdbqt -O{ligand_id}_out.sdf -h
142
- """.format(
143
- receptor_id=self.receptor_id,
144
- ligand_id=self.ligand_id,
145
- env=self.conda_env,
146
- tmp=self.tmp_dir,
147
- exhaust=exhaustiveness,
148
- center_x=self.center[0],
149
- center_y=self.center[1],
150
- center_z=self.center[2],
151
- size_x=self.size_x,
152
- size_y=self.size_y,
153
- size_z=self.size_z
154
- )
155
-
156
- self.docked_sdf_path = os.path.join(self.tmp_dir, '%s_out.sdf' % self.ligand_id)
157
-
158
- self.proc = subprocess.Popen(
159
- '/bin/bash',
160
- shell=False,
161
- stdin=subprocess.PIPE,
162
- stdout=subprocess.PIPE,
163
- stderr=subprocess.PIPE
164
- )
165
-
166
- self.proc.stdin.write(commands.encode('utf-8'))
167
- self.proc.stdin.close()
168
-
169
- # return commands
170
-
171
- def run_sync(self):
172
- self.run()
173
- while self.get_results() is None:
174
- pass
175
- results = self.get_results()
176
- print('Best affinity:', results[0]['affinity'])
177
- return results
178
-
179
- def get_results(self):
180
- if self.proc is None: # Not started
181
- return None
182
- elif self.proc.poll() is None: # In progress
183
- return None
184
- else:
185
- if self.output is None:
186
- self.output = self.proc.stdout.readlines()
187
- self.error_output = self.proc.stderr.readlines()
188
- try:
189
- self.results = parse_qvina_outputs(self.docked_sdf_path)
190
- except Exception as e:
191
- print('[Error] Vina output error: %s' % self.docked_sdf_path, e)
192
- return []
193
- return self.results
 
1
+ # import os
2
+ # import subprocess
3
+ # import random
4
+ # import string
5
+ # from easydict import EasyDict
6
+ # from rdkit import Chem
7
+ # from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
8
+
9
+
10
+ # def get_random_id(length=30):
11
+ # letters = string.ascii_lowercase
12
+ # return ''.join(random.choice(letters) for i in range(length))
13
+
14
+
15
+ # def load_pdb(path):
16
+ # with open(path, 'r') as f:
17
+ # return f.read()
18
+
19
+
20
+ # def parse_qvina_outputs(docked_sdf_path):
21
+ # suppl = Chem.SDMolSupplier(docked_sdf_path)
22
+ # results = []
23
+ # for i, mol in enumerate(suppl):
24
+ # if mol is None:
25
+ # continue
26
+ # line = mol.GetProp('REMARK').splitlines()[0].split()[2:]
27
+ # results.append(EasyDict({
28
+ # 'rdmol': mol,
29
+ # 'mode_id': i,
30
+ # 'affinity': float(line[0]),
31
+ # 'rmsd_lb': float(line[1]),
32
+ # 'rmsd_ub': float(line[2]),
33
+ # }))
34
+
35
+ # return results
36
+
37
+
38
+ # class BaseDockingTask(object):
39
+
40
+ # def __init__(self, pdb_block, ligand_rdmol):
41
+ # super().__init__()
42
+ # self.pdb_block = pdb_block
43
+ # self.ligand_rdmol = ligand_rdmol
44
+
45
+ # def run(self):
46
+ # raise NotImplementedError()
47
+
48
+ # def get_results(self):
49
+ # raise NotImplementedError()
50
+
51
+
52
+ # class QVinaDockingTask(BaseDockingTask):
53
+
54
+ # @classmethod
55
+ # def from_generated_mol(cls, ligand_rdmol, ligand_filename, protein_root='./data/crossdocked', **kwargs):
56
+ # # load original pdb
57
+ # protein_fn = os.path.join(
58
+ # os.path.dirname(ligand_filename),
59
+ # os.path.basename(ligand_filename)[:10] + '.pdb' # PDBId_Chain_rec.pdb
60
+ # )
61
+ # protein_path = os.path.join(protein_root, protein_fn)
62
+ # with open(protein_path, 'r') as f:
63
+ # pdb_block = f.read()
64
+ # return cls(pdb_block, ligand_rdmol, **kwargs)
65
+
66
+ # @classmethod
67
+ # def from_original_data(cls, data, ligand_root='./data/crossdocked_pocket10', protein_root='./data/crossdocked',
68
+ # **kwargs):
69
+ # protein_fn = os.path.join(
70
+ # os.path.dirname(data.ligand_filename),
71
+ # os.path.basename(data.ligand_filename)[:10] + '.pdb'
72
+ # )
73
+ # protein_path = os.path.join(protein_root, protein_fn)
74
+ # with open(protein_path, 'r') as f:
75
+ # pdb_block = f.read()
76
+
77
+ # ligand_path = os.path.join(ligand_root, data.ligand_filename)
78
+ # ligand_rdmol = next(iter(Chem.SDMolSupplier(ligand_path)))
79
+ # return cls(pdb_block, ligand_rdmol, **kwargs)
80
+
81
+ # def __init__(self, pdb_block, ligand_rdmol, conda_env='adt', tmp_dir='./tmp', use_uff=True, center=None,
82
+ # size_factor=1., pos=None):
83
+ # super().__init__(pdb_block, ligand_rdmol)
84
+ # self.conda_env = conda_env
85
+ # self.tmp_dir = os.path.realpath(tmp_dir)
86
+ # os.makedirs(tmp_dir, exist_ok=True)
87
+
88
+ # self.task_id = get_random_id()
89
+ # self.receptor_id = self.task_id + '_receptor'
90
+ # self.ligand_id = self.task_id + '_ligand'
91
+
92
+ # self.receptor_path = os.path.join(self.tmp_dir, self.receptor_id + '.pdb')
93
+ # self.ligand_path = os.path.join(self.tmp_dir, self.ligand_id + '.sdf')
94
+
95
+ # with open(self.receptor_path, 'w') as f:
96
+ # f.write(pdb_block)
97
+
98
+ # ligand_rdmol = Chem.AddHs(ligand_rdmol, addCoords=True)
99
+ # if use_uff:
100
+ # UFFOptimizeMolecule(ligand_rdmol)
101
+ # # print('after uff smiles: ', Chem.MolToSmiles(ligand_rdmol))
102
+ # sdf_writer = Chem.SDWriter(self.ligand_path)
103
+ # sdf_writer.write(ligand_rdmol)
104
+ # sdf_writer.close()
105
+ # self.ligand_rdmol = ligand_rdmol
106
+
107
+ # pos = ligand_rdmol.GetConformer(0).GetPositions()
108
+ # if center is None:
109
+ # self.center = (pos.max(0) + pos.min(0)) / 2
110
+ # else:
111
+ # self.center = center
112
+
113
+ # if size_factor is None:
114
+ # self.size_x, self.size_y, self.size_z = 20, 20, 20
115
+ # else:
116
+ # self.size_x, self.size_y, self.size_z = (pos.max(0) - pos.min(0)) * size_factor
117
+
118
+ # self.proc = None
119
+ # self.results = None
120
+ # self.output = None
121
+ # self.error_output = None
122
+ # self.docked_sdf_path = None
123
+
124
+ # def run(self, exhaustiveness=16):
125
+ # commands = """
126
+ # eval "$(conda shell.bash hook)"
127
+ # conda activate {env}
128
+ # cd {tmp}
129
+ # # Prepare receptor (PDB->PDBQT)
130
+ # prepare_receptor4.py -r {receptor_id}.pdb
131
+ # # Prepare ligand
132
+ # obabel {ligand_id}.sdf -O{ligand_id}.pdbqt
133
+ # qvina2 \
134
+ # --receptor {receptor_id}.pdbqt \
135
+ # --ligand {ligand_id}.pdbqt \
136
+ # --center_x {center_x:.4f} \
137
+ # --center_y {center_y:.4f} \
138
+ # --center_z {center_z:.4f} \
139
+ # --size_x {size_x} --size_y {size_y} --size_z {size_z} \
140
+ # --exhaustiveness {exhaust}
141
+ # obabel {ligand_id}_out.pdbqt -O{ligand_id}_out.sdf -h
142
+ # """.format(
143
+ # receptor_id=self.receptor_id,
144
+ # ligand_id=self.ligand_id,
145
+ # env=self.conda_env,
146
+ # tmp=self.tmp_dir,
147
+ # exhaust=exhaustiveness,
148
+ # center_x=self.center[0],
149
+ # center_y=self.center[1],
150
+ # center_z=self.center[2],
151
+ # size_x=self.size_x,
152
+ # size_y=self.size_y,
153
+ # size_z=self.size_z
154
+ # )
155
+
156
+ # self.docked_sdf_path = os.path.join(self.tmp_dir, '%s_out.sdf' % self.ligand_id)
157
+
158
+ # self.proc = subprocess.Popen(
159
+ # '/bin/bash',
160
+ # shell=False,
161
+ # stdin=subprocess.PIPE,
162
+ # stdout=subprocess.PIPE,
163
+ # stderr=subprocess.PIPE
164
+ # )
165
+
166
+ # self.proc.stdin.write(commands.encode('utf-8'))
167
+ # self.proc.stdin.close()
168
+
169
+ # # return commands
170
+
171
+ # def run_sync(self):
172
+ # self.run()
173
+ # while self.get_results() is None:
174
+ # pass
175
+ # results = self.get_results()
176
+ # print('Best affinity:', results[0]['affinity'])
177
+ # return results
178
+
179
+ # def get_results(self):
180
+ # if self.proc is None: # Not started
181
+ # return None
182
+ # elif self.proc.poll() is None: # In progress
183
+ # return None
184
+ # else:
185
+ # if self.output is None:
186
+ # self.output = self.proc.stdout.readlines()
187
+ # self.error_output = self.proc.stderr.readlines()
188
+ # try:
189
+ # self.results = parse_qvina_outputs(self.docked_sdf_path)
190
+ # except Exception as e:
191
+ # print('[Error] Vina output error: %s' % self.docked_sdf_path, e)
192
+ # return []
193
+ # return self.results
core/evaluation/docking_vina.py CHANGED
@@ -1,269 +1,269 @@
1
- from openbabel import pybel
2
- from meeko import MoleculePreparation
3
- from meeko import obutils
4
- from vina import Vina
5
- import subprocess
6
- import rdkit.Chem as Chem
7
- from rdkit.Chem import AllChem
8
- import tempfile
9
- import AutoDockTools
10
- import os
11
- import contextlib
12
- from posecheck import PoseCheck
13
-
14
- from core.evaluation.docking_qvina import get_random_id, BaseDockingTask
15
-
16
-
17
- def suppress_stdout(func):
18
- def wrapper(*a, **ka):
19
- with open(os.devnull, 'w') as devnull:
20
- with contextlib.redirect_stdout(devnull):
21
- return func(*a, **ka)
22
- return wrapper
23
-
24
-
25
- class PrepLig(object):
26
- def __init__(self, input_mol, mol_format):
27
- if mol_format == 'smi':
28
- self.ob_mol = pybel.readstring('smi', input_mol)
29
- elif mol_format == 'sdf':
30
- self.ob_mol = next(pybel.readfile(mol_format, input_mol))
31
- else:
32
- raise ValueError(f'mol_format {mol_format} not supported')
33
 
34
- def addH(self, polaronly=False, correctforph=True, PH=7):
35
- self.ob_mol.OBMol.AddHydrogens(polaronly, correctforph, PH)
36
- obutils.writeMolecule(self.ob_mol.OBMol, 'tmp_h.sdf')
37
-
38
- def gen_conf(self):
39
- sdf_block = self.ob_mol.write('sdf')
40
- rdkit_mol = Chem.MolFromMolBlock(sdf_block, removeHs=False)
41
- AllChem.EmbedMolecule(rdkit_mol, Chem.rdDistGeom.ETKDGv3())
42
- self.ob_mol = pybel.readstring('sdf', Chem.MolToMolBlock(rdkit_mol))
43
- obutils.writeMolecule(self.ob_mol.OBMol, 'conf_h.sdf')
44
-
45
- @suppress_stdout
46
- def get_pdbqt(self, lig_pdbqt=None):
47
- preparator = MoleculePreparation()
48
- preparator.prepare(self.ob_mol.OBMol)
49
- if lig_pdbqt is not None:
50
- preparator.write_pdbqt_file(lig_pdbqt)
51
- return
52
- else:
53
- return preparator.write_pdbqt_string()
54
 
55
 
56
- class PrepProt(object):
57
- def __init__(self, pdb_file):
58
- self.prot = pdb_file
59
 
60
- def del_water(self, dry_pdb_file): # optional
61
- with open(self.prot) as f:
62
- lines = [l for l in f.readlines() if l.startswith('ATOM') or l.startswith('HETATM')]
63
- dry_lines = [l for l in lines if not 'HOH' in l]
64
 
65
- with open(dry_pdb_file, 'w') as f:
66
- f.write(''.join(dry_lines))
67
- self.prot = dry_pdb_file
68
 
69
- def addH(self, prot_pqr): # call pdb2pqr
70
- self.prot_pqr = prot_pqr
71
- subprocess.Popen(['pdb2pqr30','--ff=AMBER',self.prot, self.prot_pqr],
72
- stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).communicate()
73
 
74
- def get_pdbqt(self, prot_pdbqt):
75
- prepare_receptor = os.path.join(AutoDockTools.__path__[0], 'Utilities24/prepare_receptor4.py')
76
- subprocess.Popen(['python3', prepare_receptor, '-r', self.prot_pqr, '-o', prot_pdbqt],
77
- stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).communicate()
78
 
79
 
80
- class VinaDock(object):
81
- def __init__(self, lig_pdbqt, prot_pdbqt):
82
- self.lig_pdbqt = lig_pdbqt
83
- self.prot_pdbqt = prot_pdbqt
84
 
85
- def _max_min_pdb(self, pdb, buffer):
86
- with open(pdb, 'r') as f:
87
- lines = [l for l in f.readlines() if l.startswith('ATOM') or l.startswith('HEATATM')]
88
- xs = [float(l[31:39]) for l in lines]
89
- ys = [float(l[39:47]) for l in lines]
90
- zs = [float(l[47:55]) for l in lines]
91
- print(max(xs), min(xs))
92
- print(max(ys), min(ys))
93
- print(max(zs), min(zs))
94
- pocket_center = [(max(xs) + min(xs))/2, (max(ys) + min(ys))/2, (max(zs) + min(zs))/2]
95
- box_size = [(max(xs) - min(xs)) + buffer, (max(ys) - min(ys)) + buffer, (max(zs) - min(zs)) + buffer]
96
- return pocket_center, box_size
97
 
98
- def get_box(self, ref=None, buffer=0):
99
- '''
100
- ref: reference pdb to define pocket.
101
- buffer: buffer size to add
102
-
103
- if ref is not None:
104
- get the max and min on x, y, z axis in ref pdb and add buffer to each dimension
105
- else:
106
- use the entire protein to define pocket
107
- '''
108
- if ref is None:
109
- ref = self.prot_pdbqt
110
- self.pocket_center, self.box_size = self._max_min_pdb(ref, buffer)
111
- print(self.pocket_center, self.box_size)
112
-
113
- def dock(self, score_func='vina', seed=0, mode='dock', exhaustiveness=8, save_pose=False, **kwargs): # seed=0 mean random seed
114
- v = Vina(sf_name=score_func, seed=seed, verbosity=0, **kwargs)
115
- v.set_receptor(self.prot_pdbqt)
116
- v.set_ligand_from_file(self.lig_pdbqt)
117
- v.compute_vina_maps(center=self.pocket_center, box_size=self.box_size)
118
- if mode == 'score_only':
119
- score = v.score()[0]
120
- elif mode == 'minimize':
121
- score = v.optimize()[0]
122
- elif mode == 'dock':
123
- v.dock(exhaustiveness=exhaustiveness, n_poses=1)
124
- score = v.energies(n_poses=1)[0][0]
125
- else:
126
- raise ValueError
127
 
128
- if not save_pose:
129
- return score
130
- else:
131
- if mode == 'score_only':
132
- pose = None
133
- elif mode == 'minimize':
134
- tmp = tempfile.NamedTemporaryFile()
135
- with open(tmp.name, 'w') as f:
136
- v.write_pose(tmp.name, overwrite=True)
137
- with open(tmp.name, 'r') as f:
138
- pose = f.read()
139
 
140
- elif mode == 'dock':
141
- pose = v.poses(n_poses=1)
142
- else:
143
- raise ValueError
144
- return score, pose
145
-
146
-
147
- class VinaDockingTask(BaseDockingTask):
148
-
149
- @classmethod
150
- def from_original_data(cls, data, ligand_root='./data/crossdocked_pocket10', protein_root='./data/crossdocked',
151
- **kwargs):
152
- protein_fn = os.path.join(
153
- os.path.dirname(data.ligand_filename),
154
- os.path.basename(data.ligand_filename)[:10] + '.pdb'
155
- )
156
- protein_path = os.path.join(protein_root, protein_fn)
157
-
158
- ligand_path = os.path.join(ligand_root, data.ligand_filename)
159
- ligand_rdmol = next(iter(Chem.SDMolSupplier(ligand_path)))
160
- return cls(protein_path, ligand_rdmol, **kwargs)
161
-
162
- @classmethod
163
- def from_generated_mol(cls, ligand_rdmol, protein_filename, **kwargs):
164
- # load original pdb
165
- # TODO: make it more general and compatible with sample_for_pocket
166
- # protein_fn = os.path.join(
167
- # os.path.dirname(ligand_filename),
168
- # os.path.basename(ligand_filename)[:10] + '.pdb' if 'molecule' not in ligand_filename # PDBId_Chain_rec.pdb
169
- # else os.path.basename(ligand_filename).replace('_molecule', '_protein').replace('.sdf', '.pdb')
170
- # )
171
- # protein_path = os.path.join(protein_root, protein_fn)
172
- protein_path = protein_filename
173
- return cls(protein_path, ligand_rdmol, **kwargs)
174
-
175
- def __init__(self, protein_path, ligand_rdmol, tmp_dir='./tmp', center=None,
176
- size_factor=1., buffer=5.0, pos=None):
177
- super().__init__(protein_path, ligand_rdmol)
178
- # self.conda_env = conda_env
179
- self.tmp_dir = os.path.realpath(tmp_dir)
180
- os.makedirs(tmp_dir, exist_ok=True)
181
-
182
- self.task_id = get_random_id()
183
- self.receptor_id = self.task_id + '_receptor'
184
- self.ligand_id = self.task_id + '_ligand'
185
-
186
- self.receptor_path = protein_path
187
- self.ligand_path = os.path.join(self.tmp_dir, self.ligand_id + '.sdf')
188
-
189
- self.recon_ligand_mol = ligand_rdmol
190
- ligand_rdmol = Chem.AddHs(ligand_rdmol, addCoords=True)
191
-
192
- sdf_writer = Chem.SDWriter(self.ligand_path)
193
- sdf_writer.write(ligand_rdmol)
194
- sdf_writer.close()
195
- self.ligand_rdmol = ligand_rdmol
196
-
197
- pos = ligand_rdmol.GetConformer(0).GetPositions()
198
- # if pos is None:
199
- # raise ValueError('pos is None')
200
- if center is None:
201
- self.center = (pos.max(0) + pos.min(0)) / 2
202
- else:
203
- self.center = center
204
-
205
- if size_factor is None:
206
- self.size_x, self.size_y, self.size_z = 20, 20, 20
207
- else:
208
- self.size_x, self.size_y, self.size_z = (pos.max(0) - pos.min(0)) * size_factor + buffer
209
-
210
- self.proc = None
211
- self.results = None
212
- self.output = None
213
- self.error_output = None
214
- self.docked_sdf_path = None
215
-
216
- def run(self, mode='dock', exhaustiveness=8, **kwargs):
217
- ligand_pdbqt = self.ligand_path[:-4] + '.pdbqt'
218
- protein_pqr = self.receptor_path[:-4] + '.pqr'
219
- protein_pdbqt = self.receptor_path[:-4] + '.pdbqt'
220
-
221
- lig = PrepLig(self.ligand_path, 'sdf')
222
- lig.get_pdbqt(ligand_pdbqt)
223
-
224
- prot = PrepProt(self.receptor_path)
225
- if not os.path.exists(protein_pqr):
226
- prot.addH(protein_pqr)
227
- if not os.path.exists(protein_pdbqt):
228
- prot.get_pdbqt(protein_pdbqt)
229
-
230
- dock = VinaDock(ligand_pdbqt, protein_pdbqt)
231
- dock.pocket_center, dock.box_size = self.center, [self.size_x, self.size_y, self.size_z]
232
- score, pose = dock.dock(score_func='vina', mode=mode, exhaustiveness=exhaustiveness, save_pose=True, **kwargs)
233
- return [{'affinity': score, 'pose': pose}]
234
 
235
- @suppress_stdout
236
- def run_pose_check(self):
237
- pc = PoseCheck()
238
- pc.load_protein_from_pdb(self.receptor_path)
239
- # pc.load_ligands_from_sdf(self.ligand_path)
240
- pc.load_ligands_from_mols([self.ligand_rdmol])
241
- clashes = pc.calculate_clashes()
242
- strain = pc.calculate_strain_energy()
243
- return {'clashes': clashes[0], 'strain': strain[0]}
244
-
245
-
246
- # if __name__ == '__main__':
247
- # lig_pdbqt = 'data/lig.pdbqt'
248
- # mol_file = 'data/1a4k_ligand.sdf'
249
- # a = PrepLig(mol_file, 'sdf')
250
- # # mol_file = 'CC(=C)C(=O)OCCN(C)C'
251
- # # a = PrepLig(mol_file, 'smi')
252
- # a.addH()
253
- # a.gen_conf()
254
- # a.get_pdbqt(lig_pdbqt)
255
- #
256
- # prot_file = 'data/1a4k_protein_chainAB.pdb'
257
- # prot_dry = 'data/protein_dry.pdb'
258
- # prot_pqr = 'data/protein.pqr'
259
- # prot_pdbqt = 'data/protein.pdbqt'
260
- # b = PrepProt(prot_file)
261
- # b.del_water(prot_dry)
262
- # b.addH(prot_pqr)
263
- # b.get_pdbqt(prot_pdbqt)
264
- #
265
- # dock = VinaDock(lig_pdbqt, prot_pdbqt)
266
- # dock.get_box()
267
- # dock.dock()
268
 
269
 
 
1
+ # from openbabel import pybel
2
+ # from meeko import MoleculePreparation
3
+ # from meeko import obutils
4
+ # from vina import Vina
5
+ # import subprocess
6
+ # import rdkit.Chem as Chem
7
+ # from rdkit.Chem import AllChem
8
+ # import tempfile
9
+ # import AutoDockTools
10
+ # import os
11
+ # import contextlib
12
+ # from posecheck import PoseCheck
13
+
14
+ # from core.evaluation.docking_qvina import get_random_id, BaseDockingTask
15
+
16
+
17
+ # def suppress_stdout(func):
18
+ # def wrapper(*a, **ka):
19
+ # with open(os.devnull, 'w') as devnull:
20
+ # with contextlib.redirect_stdout(devnull):
21
+ # return func(*a, **ka)
22
+ # return wrapper
23
+
24
+
25
+ # class PrepLig(object):
26
+ # def __init__(self, input_mol, mol_format):
27
+ # if mol_format == 'smi':
28
+ # self.ob_mol = pybel.readstring('smi', input_mol)
29
+ # elif mol_format == 'sdf':
30
+ # self.ob_mol = next(pybel.readfile(mol_format, input_mol))
31
+ # else:
32
+ # raise ValueError(f'mol_format {mol_format} not supported')
33
 
34
+ # def addH(self, polaronly=False, correctforph=True, PH=7):
35
+ # self.ob_mol.OBMol.AddHydrogens(polaronly, correctforph, PH)
36
+ # obutils.writeMolecule(self.ob_mol.OBMol, 'tmp_h.sdf')
37
+
38
+ # def gen_conf(self):
39
+ # sdf_block = self.ob_mol.write('sdf')
40
+ # rdkit_mol = Chem.MolFromMolBlock(sdf_block, removeHs=False)
41
+ # AllChem.EmbedMolecule(rdkit_mol, Chem.rdDistGeom.ETKDGv3())
42
+ # self.ob_mol = pybel.readstring('sdf', Chem.MolToMolBlock(rdkit_mol))
43
+ # obutils.writeMolecule(self.ob_mol.OBMol, 'conf_h.sdf')
44
+
45
+ # @suppress_stdout
46
+ # def get_pdbqt(self, lig_pdbqt=None):
47
+ # preparator = MoleculePreparation()
48
+ # preparator.prepare(self.ob_mol.OBMol)
49
+ # if lig_pdbqt is not None:
50
+ # preparator.write_pdbqt_file(lig_pdbqt)
51
+ # return
52
+ # else:
53
+ # return preparator.write_pdbqt_string()
54
 
55
 
56
+ # class PrepProt(object):
57
+ # def __init__(self, pdb_file):
58
+ # self.prot = pdb_file
59
 
60
+ # def del_water(self, dry_pdb_file): # optional
61
+ # with open(self.prot) as f:
62
+ # lines = [l for l in f.readlines() if l.startswith('ATOM') or l.startswith('HETATM')]
63
+ # dry_lines = [l for l in lines if not 'HOH' in l]
64
 
65
+ # with open(dry_pdb_file, 'w') as f:
66
+ # f.write(''.join(dry_lines))
67
+ # self.prot = dry_pdb_file
68
 
69
+ # def addH(self, prot_pqr): # call pdb2pqr
70
+ # self.prot_pqr = prot_pqr
71
+ # subprocess.Popen(['pdb2pqr30','--ff=AMBER',self.prot, self.prot_pqr],
72
+ # stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).communicate()
73
 
74
+ # def get_pdbqt(self, prot_pdbqt):
75
+ # prepare_receptor = os.path.join(AutoDockTools.__path__[0], 'Utilities24/prepare_receptor4.py')
76
+ # subprocess.Popen(['python3', prepare_receptor, '-r', self.prot_pqr, '-o', prot_pdbqt],
77
+ # stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).communicate()
78
 
79
 
80
+ # class VinaDock(object):
81
+ # def __init__(self, lig_pdbqt, prot_pdbqt):
82
+ # self.lig_pdbqt = lig_pdbqt
83
+ # self.prot_pdbqt = prot_pdbqt
84
 
85
+ # def _max_min_pdb(self, pdb, buffer):
86
+ # with open(pdb, 'r') as f:
87
+ # lines = [l for l in f.readlines() if l.startswith('ATOM') or l.startswith('HEATATM')]
88
+ # xs = [float(l[31:39]) for l in lines]
89
+ # ys = [float(l[39:47]) for l in lines]
90
+ # zs = [float(l[47:55]) for l in lines]
91
+ # print(max(xs), min(xs))
92
+ # print(max(ys), min(ys))
93
+ # print(max(zs), min(zs))
94
+ # pocket_center = [(max(xs) + min(xs))/2, (max(ys) + min(ys))/2, (max(zs) + min(zs))/2]
95
+ # box_size = [(max(xs) - min(xs)) + buffer, (max(ys) - min(ys)) + buffer, (max(zs) - min(zs)) + buffer]
96
+ # return pocket_center, box_size
97
 
98
+ # def get_box(self, ref=None, buffer=0):
99
+ # '''
100
+ # ref: reference pdb to define pocket.
101
+ # buffer: buffer size to add
102
+
103
+ # if ref is not None:
104
+ # get the max and min on x, y, z axis in ref pdb and add buffer to each dimension
105
+ # else:
106
+ # use the entire protein to define pocket
107
+ # '''
108
+ # if ref is None:
109
+ # ref = self.prot_pdbqt
110
+ # self.pocket_center, self.box_size = self._max_min_pdb(ref, buffer)
111
+ # print(self.pocket_center, self.box_size)
112
+
113
+ # def dock(self, score_func='vina', seed=0, mode='dock', exhaustiveness=8, save_pose=False, **kwargs): # seed=0 mean random seed
114
+ # v = Vina(sf_name=score_func, seed=seed, verbosity=0, **kwargs)
115
+ # v.set_receptor(self.prot_pdbqt)
116
+ # v.set_ligand_from_file(self.lig_pdbqt)
117
+ # v.compute_vina_maps(center=self.pocket_center, box_size=self.box_size)
118
+ # if mode == 'score_only':
119
+ # score = v.score()[0]
120
+ # elif mode == 'minimize':
121
+ # score = v.optimize()[0]
122
+ # elif mode == 'dock':
123
+ # v.dock(exhaustiveness=exhaustiveness, n_poses=1)
124
+ # score = v.energies(n_poses=1)[0][0]
125
+ # else:
126
+ # raise ValueError
127
 
128
+ # if not save_pose:
129
+ # return score
130
+ # else:
131
+ # if mode == 'score_only':
132
+ # pose = None
133
+ # elif mode == 'minimize':
134
+ # tmp = tempfile.NamedTemporaryFile()
135
+ # with open(tmp.name, 'w') as f:
136
+ # v.write_pose(tmp.name, overwrite=True)
137
+ # with open(tmp.name, 'r') as f:
138
+ # pose = f.read()
139
 
140
+ # elif mode == 'dock':
141
+ # pose = v.poses(n_poses=1)
142
+ # else:
143
+ # raise ValueError
144
+ # return score, pose
145
+
146
+
147
+ # class VinaDockingTask(BaseDockingTask):
148
+
149
+ # @classmethod
150
+ # def from_original_data(cls, data, ligand_root='./data/crossdocked_pocket10', protein_root='./data/crossdocked',
151
+ # **kwargs):
152
+ # protein_fn = os.path.join(
153
+ # os.path.dirname(data.ligand_filename),
154
+ # os.path.basename(data.ligand_filename)[:10] + '.pdb'
155
+ # )
156
+ # protein_path = os.path.join(protein_root, protein_fn)
157
+
158
+ # ligand_path = os.path.join(ligand_root, data.ligand_filename)
159
+ # ligand_rdmol = next(iter(Chem.SDMolSupplier(ligand_path)))
160
+ # return cls(protein_path, ligand_rdmol, **kwargs)
161
+
162
+ # @classmethod
163
+ # def from_generated_mol(cls, ligand_rdmol, protein_filename, **kwargs):
164
+ # # load original pdb
165
+ # # TODO: make it more general and compatible with sample_for_pocket
166
+ # # protein_fn = os.path.join(
167
+ # # os.path.dirname(ligand_filename),
168
+ # # os.path.basename(ligand_filename)[:10] + '.pdb' if 'molecule' not in ligand_filename # PDBId_Chain_rec.pdb
169
+ # # else os.path.basename(ligand_filename).replace('_molecule', '_protein').replace('.sdf', '.pdb')
170
+ # # )
171
+ # # protein_path = os.path.join(protein_root, protein_fn)
172
+ # protein_path = protein_filename
173
+ # return cls(protein_path, ligand_rdmol, **kwargs)
174
+
175
+ # def __init__(self, protein_path, ligand_rdmol, tmp_dir='./tmp', center=None,
176
+ # size_factor=1., buffer=5.0, pos=None):
177
+ # super().__init__(protein_path, ligand_rdmol)
178
+ # # self.conda_env = conda_env
179
+ # self.tmp_dir = os.path.realpath(tmp_dir)
180
+ # os.makedirs(tmp_dir, exist_ok=True)
181
+
182
+ # self.task_id = get_random_id()
183
+ # self.receptor_id = self.task_id + '_receptor'
184
+ # self.ligand_id = self.task_id + '_ligand'
185
+
186
+ # self.receptor_path = protein_path
187
+ # self.ligand_path = os.path.join(self.tmp_dir, self.ligand_id + '.sdf')
188
+
189
+ # self.recon_ligand_mol = ligand_rdmol
190
+ # ligand_rdmol = Chem.AddHs(ligand_rdmol, addCoords=True)
191
+
192
+ # sdf_writer = Chem.SDWriter(self.ligand_path)
193
+ # sdf_writer.write(ligand_rdmol)
194
+ # sdf_writer.close()
195
+ # self.ligand_rdmol = ligand_rdmol
196
+
197
+ # pos = ligand_rdmol.GetConformer(0).GetPositions()
198
+ # # if pos is None:
199
+ # # raise ValueError('pos is None')
200
+ # if center is None:
201
+ # self.center = (pos.max(0) + pos.min(0)) / 2
202
+ # else:
203
+ # self.center = center
204
+
205
+ # if size_factor is None:
206
+ # self.size_x, self.size_y, self.size_z = 20, 20, 20
207
+ # else:
208
+ # self.size_x, self.size_y, self.size_z = (pos.max(0) - pos.min(0)) * size_factor + buffer
209
+
210
+ # self.proc = None
211
+ # self.results = None
212
+ # self.output = None
213
+ # self.error_output = None
214
+ # self.docked_sdf_path = None
215
+
216
+ # def run(self, mode='dock', exhaustiveness=8, **kwargs):
217
+ # ligand_pdbqt = self.ligand_path[:-4] + '.pdbqt'
218
+ # protein_pqr = self.receptor_path[:-4] + '.pqr'
219
+ # protein_pdbqt = self.receptor_path[:-4] + '.pdbqt'
220
+
221
+ # lig = PrepLig(self.ligand_path, 'sdf')
222
+ # lig.get_pdbqt(ligand_pdbqt)
223
+
224
+ # prot = PrepProt(self.receptor_path)
225
+ # if not os.path.exists(protein_pqr):
226
+ # prot.addH(protein_pqr)
227
+ # if not os.path.exists(protein_pdbqt):
228
+ # prot.get_pdbqt(protein_pdbqt)
229
+
230
+ # dock = VinaDock(ligand_pdbqt, protein_pdbqt)
231
+ # dock.pocket_center, dock.box_size = self.center, [self.size_x, self.size_y, self.size_z]
232
+ # score, pose = dock.dock(score_func='vina', mode=mode, exhaustiveness=exhaustiveness, save_pose=True, **kwargs)
233
+ # return [{'affinity': score, 'pose': pose}]
234
 
235
+ # @suppress_stdout
236
+ # def run_pose_check(self):
237
+ # pc = PoseCheck()
238
+ # pc.load_protein_from_pdb(self.receptor_path)
239
+ # # pc.load_ligands_from_sdf(self.ligand_path)
240
+ # pc.load_ligands_from_mols([self.ligand_rdmol])
241
+ # clashes = pc.calculate_clashes()
242
+ # strain = pc.calculate_strain_energy()
243
+ # return {'clashes': clashes[0], 'strain': strain[0]}
244
+
245
+
246
+ # # if __name__ == '__main__':
247
+ # # lig_pdbqt = 'data/lig.pdbqt'
248
+ # # mol_file = 'data/1a4k_ligand.sdf'
249
+ # # a = PrepLig(mol_file, 'sdf')
250
+ # # # mol_file = 'CC(=C)C(=O)OCCN(C)C'
251
+ # # # a = PrepLig(mol_file, 'smi')
252
+ # # a.addH()
253
+ # # a.gen_conf()
254
+ # # a.get_pdbqt(lig_pdbqt)
255
+ # #
256
+ # # prot_file = 'data/1a4k_protein_chainAB.pdb'
257
+ # # prot_dry = 'data/protein_dry.pdb'
258
+ # # prot_pqr = 'data/protein.pqr'
259
+ # # prot_pdbqt = 'data/protein.pdbqt'
260
+ # # b = PrepProt(prot_file)
261
+ # # b.del_water(prot_dry)
262
+ # # b.addH(prot_pqr)
263
+ # # b.get_pdbqt(prot_pdbqt)
264
+ # #
265
+ # # dock = VinaDock(lig_pdbqt, prot_pdbqt)
266
+ # # dock.get_box()
267
+ # # dock.dock()
268
 
269
 
core/evaluation/metrics.py CHANGED
@@ -8,8 +8,8 @@ from core.evaluation.utils import (
8
  convert_atomcloud_to_mol_smiles,
9
  mol2smiles,
10
  )
11
- from core.evaluation.docking_qvina import QVinaDockingTask
12
- from core.evaluation.docking_vina import VinaDockingTask
13
  from typing import List, Dict, Tuple
14
  from tqdm import tqdm
15
  import numpy as np
 
8
  convert_atomcloud_to_mol_smiles,
9
  mol2smiles,
10
  )
11
+ # from core.evaluation.docking_qvina import QVinaDockingTask
12
+ # from core.evaluation.docking_vina import VinaDockingTask
13
  from typing import List, Dict, Tuple
14
  from tqdm import tqdm
15
  import numpy as np
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  absl_py==2.1.0
2
- AutoDockTools_py3==1.5.7.post1+10.g9d37a13
3
  easydict==1.13
4
  fire==0.6.0
5
  gradio==4.36.1
@@ -7,14 +6,11 @@ gradio_molecule3d==0.0.5
7
  imageio==2.34.1
8
  lmdb==1.4.1
9
  matplotlib==3.4.3
10
- meeko==0.1.dev3
11
  numpy==1.23.1
12
  openbabel==3.1.1.1
13
  overrides==7.7.0
14
  Pillow==9.2.0
15
  Pillow==10.3.0
16
- posecheck==1.1
17
- posecheck.egg==info
18
  py3Dmol==2.1.0
19
  pytorch_lightning==2.0.8
20
  PyYAML==6.0.1
@@ -26,4 +22,3 @@ torch_geometric==2.1.0.post1
26
  torch_scatter==2.0.9
27
  torchdiffeq==0.2.3
28
  tqdm==4.64.0
29
- vina==1.2.2
 
1
  absl_py==2.1.0
 
2
  easydict==1.13
3
  fire==0.6.0
4
  gradio==4.36.1
 
6
  imageio==2.34.1
7
  lmdb==1.4.1
8
  matplotlib==3.4.3
 
9
  numpy==1.23.1
10
  openbabel==3.1.1.1
11
  overrides==7.7.0
12
  Pillow==9.2.0
13
  Pillow==10.3.0
 
 
14
  py3Dmol==2.1.0
15
  pytorch_lightning==2.0.8
16
  PyYAML==6.0.1
 
22
  torch_scatter==2.0.9
23
  torchdiffeq==0.2.3
24
  tqdm==4.64.0
 
sample_for_pocket.py CHANGED
@@ -30,11 +30,11 @@ from pytorch_lightning import seed_everything
30
  # from absl import logging
31
  # import glob
32
 
33
- from core.evaluation.utils import scoring_func
34
- from core.evaluation.docking_vina import VinaDockingTask
35
- from posecheck import PoseCheck
36
- import numpy as np
37
- from rdkit import Chem
38
 
39
 
40
  def get_dataloader_from_pdb(cfg):
 
30
  # from absl import logging
31
  # import glob
32
 
33
+ # from core.evaluation.utils import scoring_func
34
+ # from core.evaluation.docking_vina import VinaDockingTask
35
+ # from posecheck import PoseCheck
36
+ # import numpy as np
37
+ # from rdkit import Chem
38
 
39
 
40
  def get_dataloader_from_pdb(cfg):