Spaces:
Running
on
A10G
Running
on
A10G
import torch | |
from rdkit import Chem | |
TORCH_FLOAT = torch.float32 | |
TORCH_INT = torch.int8 | |
# #################################################################################### # | |
# ####################################### ZINC ####################################### # | |
# #################################################################################### # | |
# Atom idx for one-hot encoding | |
ATOM2IDX = {'C': 0, 'O': 1, 'N': 2, 'F': 3, 'S': 4, 'Cl': 5, 'Br': 6, 'I': 7} | |
IDX2ATOM = {0: 'C', 1: 'O', 2: 'N', 3: 'F', 4: 'S', 5: 'Cl', 6: 'Br', 7: 'I'} | |
# Atomic numbers (Z) | |
CHARGES = {'C': 6, 'O': 8, 'N': 7, 'F': 9, 'S': 16, 'Cl': 17, 'Br': 35, 'I': 53} | |
# One-hot atom types | |
NUMBER_OF_ATOM_TYPES = len(ATOM2IDX) | |
# #################################################################################### # | |
# ####################################### GEOM ####################################### # | |
# #################################################################################### # | |
# Atom idx for one-hot encoding | |
GEOM_ATOM2IDX = {'C': 0, 'O': 1, 'N': 2, 'F': 3, 'S': 4, 'Cl': 5, 'Br': 6, 'I': 7, 'P': 8} | |
GEOM_IDX2ATOM = {0: 'C', 1: 'O', 2: 'N', 3: 'F', 4: 'S', 5: 'Cl', 6: 'Br', 7: 'I', 8: 'P'} | |
# Atomic numbers (Z) | |
GEOM_CHARGES = {'C': 6, 'O': 8, 'N': 7, 'F': 9, 'S': 16, 'Cl': 17, 'Br': 35, 'I': 53, 'P': 15} | |
# One-hot atom types | |
GEOM_NUMBER_OF_ATOM_TYPES = len(GEOM_ATOM2IDX) | |
# Dataset keys | |
DATA_LIST_ATTRS = { | |
'uuid', 'name', 'fragments_smi', 'linker_smi', 'num_atoms' | |
} | |
DATA_ATTRS_TO_PAD = { | |
'positions', 'one_hot', 'charges', 'anchors', 'fragment_mask', 'linker_mask', 'pocket_mask', 'fragment_only_mask' | |
} | |
DATA_ATTRS_TO_ADD_LAST_DIM = { | |
'charges', 'anchors', 'fragment_mask', 'linker_mask', 'pocket_mask', 'fragment_only_mask' | |
} | |
# Distribution of linker size in train data | |
LINKER_SIZE_DIST = { | |
4: 85540, | |
3: 113928, | |
6: 70946, | |
7: 30408, | |
5: 77671, | |
9: 5177, | |
10: 1214, | |
8: 12712, | |
11: 158, | |
12: 7, | |
} | |
# Bond lengths from: | |
# http://www.wiredchemist.com/chemistry/data/bond_energies_lengths.html | |
# And: | |
# http://chemistry-reference.com/tables/Bond%20Lengths%20and%20Enthalpies.pdf | |
BONDS_1 = { | |
'H': { | |
'H': 74, 'C': 109, 'N': 101, 'O': 96, 'F': 92, | |
'B': 119, 'Si': 148, 'P': 144, 'As': 152, 'S': 134, | |
'Cl': 127, 'Br': 141, 'I': 161 | |
}, | |
'C': { | |
'H': 109, 'C': 154, 'N': 147, 'O': 143, 'F': 135, | |
'Si': 185, 'P': 184, 'S': 182, 'Cl': 177, 'Br': 194, | |
'I': 214 | |
}, | |
'N': { | |
'H': 101, 'C': 147, 'N': 145, 'O': 140, 'F': 136, | |
'Cl': 175, 'Br': 214, 'S': 168, 'I': 222, 'P': 177 | |
}, | |
'O': { | |
'H': 96, 'C': 143, 'N': 140, 'O': 148, 'F': 142, | |
'Br': 172, 'S': 151, 'P': 163, 'Si': 163, 'Cl': 164, | |
'I': 194 | |
}, | |
'F': { | |
'H': 92, 'C': 135, 'N': 136, 'O': 142, 'F': 142, | |
'S': 158, 'Si': 160, 'Cl': 166, 'Br': 178, 'P': 156, | |
'I': 187 | |
}, | |
'B': { | |
'H': 119, 'Cl': 175 | |
}, | |
'Si': { | |
'Si': 233, 'H': 148, 'C': 185, 'O': 163, 'S': 200, | |
'F': 160, 'Cl': 202, 'Br': 215, 'I': 243, | |
}, | |
'Cl': { | |
'Cl': 199, 'H': 127, 'C': 177, 'N': 175, 'O': 164, | |
'P': 203, 'S': 207, 'B': 175, 'Si': 202, 'F': 166, | |
'Br': 214 | |
}, | |
'S': { | |
'H': 134, 'C': 182, 'N': 168, 'O': 151, 'S': 204, | |
'F': 158, 'Cl': 207, 'Br': 225, 'Si': 200, 'P': 210, | |
'I': 234 | |
}, | |
'Br': { | |
'Br': 228, 'H': 141, 'C': 194, 'O': 172, 'N': 214, | |
'Si': 215, 'S': 225, 'F': 178, 'Cl': 214, 'P': 222 | |
}, | |
'P': { | |
'P': 221, 'H': 144, 'C': 184, 'O': 163, 'Cl': 203, | |
'S': 210, 'F': 156, 'N': 177, 'Br': 222 | |
}, | |
'I': { | |
'H': 161, 'C': 214, 'Si': 243, 'N': 222, 'O': 194, | |
'S': 234, 'F': 187, 'I': 266 | |
}, | |
'As': { | |
'H': 152 | |
} | |
} | |
BONDS_2 = { | |
'C': {'C': 134, 'N': 129, 'O': 120, 'S': 160}, | |
'N': {'C': 129, 'N': 125, 'O': 121}, | |
'O': {'C': 120, 'N': 121, 'O': 121, 'P': 150}, | |
'P': {'O': 150, 'S': 186}, | |
'S': {'P': 186} | |
} | |
BONDS_3 = { | |
'C': {'C': 120, 'N': 116, 'O': 113}, | |
'N': {'C': 116, 'N': 110}, | |
'O': {'C': 113} | |
} | |
BOND_DICT = [ | |
None, | |
Chem.rdchem.BondType.SINGLE, | |
Chem.rdchem.BondType.DOUBLE, | |
Chem.rdchem.BondType.TRIPLE, | |
Chem.rdchem.BondType.AROMATIC, | |
] | |
BOND2IDX = { | |
Chem.rdchem.BondType.SINGLE: 1, | |
Chem.rdchem.BondType.DOUBLE: 2, | |
Chem.rdchem.BondType.TRIPLE: 3, | |
Chem.rdchem.BondType.AROMATIC: 4, | |
} | |
ALLOWED_BONDS = { | |
'H': 1, | |
'C': 4, | |
'N': 3, | |
'O': 2, | |
'F': 1, | |
'B': 3, | |
'Al': 3, | |
'Si': 4, | |
'P': [3, 5], | |
'S': 4, | |
'Cl': 1, | |
'As': 3, | |
'Br': 1, | |
'I': 1, | |
'Hg': [1, 2], | |
'Bi': [3, 5] | |
} | |
MARGINS_EDM = [10, 5, 2] | |
COLORS = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8'] | |
# RADII = [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] | |
RADII = [0.77, 0.77, 0.77, 0.77, 0.77, 0.77, 0.77, 0.77, 0.77] | |
ZINC_TRAIN_LINKER_ID2SIZE = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12] | |
ZINC_TRAIN_LINKER_SIZE2ID = { | |
size: idx | |
for idx, size in enumerate(ZINC_TRAIN_LINKER_ID2SIZE) | |
} | |
ZINC_TRAIN_LINKER_SIZE_WEIGHTS = [ | |
3.47347831e-01, | |
4.63079100e-01, | |
5.12370917e-01, | |
5.62392614e-01, | |
1.30294388e+00, | |
3.24247801e+00, | |
8.12391184e+00, | |
3.45634358e+01, | |
2.72428571e+02, | |
6.26585714e+03 | |
] | |
GEOM_TRAIN_LINKER_ID2SIZE = [ | |
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 36, 38, 41 | |
] | |
GEOM_TRAIN_LINKER_SIZE2ID = { | |
size: idx | |
for idx, size in enumerate(GEOM_TRAIN_LINKER_ID2SIZE) | |
} | |
GEOM_TRAIN_LINKER_SIZE_WEIGHTS = [ | |
1.07790681e+00, 4.54693604e-01, 3.62575713e-01, 3.75199484e-01, | |
3.67812588e-01, 3.92388528e-01, 3.83421054e-01, 4.26924670e-01, | |
4.92768040e-01, 4.99761944e-01, 4.92342726e-01, 5.71456905e-01, | |
7.30631393e-01, 8.45412928e-01, 9.97252243e-01, 1.25423985e+00, | |
1.57316129e+00, 2.19902962e+00, 3.22640431e+00, 4.25481066e+00, | |
6.34749573e+00, 9.00676236e+00, 1.43084017e+01, 2.25763173e+01, | |
3.36867096e+01, 9.50713805e+01, 2.08693274e+02, 2.51659537e+02, | |
7.77856749e+02, 8.55642424e+03, 8.55642424e+03, 4.27821212e+03, | |
4.27821212e+03 | |
] | |