vumichien commited on
Commit
c1f4589
1 Parent(s): 772fe98

Create new file

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import from_pretrained_keras
2
+ import gradio as gr
3
+ import ast
4
+ import pandas as pd
5
+ import numpy as np
6
+ import tensorflow as tf
7
+ from rdkit import Chem, RDLogger
8
+ from rdkit.Chem import BondType
9
+ from rdkit.Chem.Draw import MolsToGridImage
10
+
11
+ RDLogger.DisableLog("rdApp.*")
12
+
13
+ # Config
14
+ SMILE_CHARSET = '["C", "B", "F", "I", "H", "O", "N", "S", "P", "Cl", "Br"]'
15
+ bond_mapping = {"SINGLE": 0, "DOUBLE": 1, "TRIPLE": 2, "AROMATIC": 3}
16
+ bond_mapping.update(
17
+ {0: BondType.SINGLE, 1: BondType.DOUBLE, 2: BondType.TRIPLE, 3: BondType.AROMATIC}
18
+ )
19
+ SMILE_CHARSET = ast.literal_eval(SMILE_CHARSET)
20
+ MAX_MOLSIZE = 109
21
+ SMILE_to_index = dict((c, i) for i, c in enumerate(SMILE_CHARSET))
22
+ index_to_SMILE = dict((i, c) for i, c in enumerate(SMILE_CHARSET))
23
+ atom_mapping = dict(SMILE_to_index)
24
+ atom_mapping.update(index_to_SMILE)
25
+
26
+ NUM_ATOMS = 120 # Maximum number of atoms
27
+ ATOM_DIM = 11 # Number of atom types
28
+ BOND_DIM = 4 + 1 # Number of bond types
29
+ LATENT_DIM = 435 # Size of the latent space
30
+
31
+ def graph_to_molecule(graph):
32
+ # Unpack graph
33
+ adjacency, features = graph
34
+
35
+ # RWMol is a molecule object intended to be edited
36
+ molecule = Chem.RWMol()
37
+
38
+ # Remove "no atoms" & atoms with no bonds
39
+ keep_idx = np.where(
40
+ (np.argmax(features, axis=1) != ATOM_DIM - 1)
41
+ & (np.sum(adjacency[:-1], axis=(0, 1)) != 0)
42
+ )[0]
43
+ features = features[keep_idx]
44
+ adjacency = adjacency[:, keep_idx, :][:, :, keep_idx]
45
+
46
+ # Add atoms to molecule
47
+ for atom_type_idx in np.argmax(features, axis=1):
48
+ atom = Chem.Atom(atom_mapping[atom_type_idx])
49
+ _ = molecule.AddAtom(atom)
50
+
51
+ # Add bonds between atoms in molecule; based on the upper triangles
52
+ # of the [symmetric] adjacency tensor
53
+ (bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1)
54
+ for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j):
55
+ if atom_i == atom_j or bond_ij == BOND_DIM - 1:
56
+ continue
57
+ bond_type = bond_mapping[bond_ij]
58
+ molecule.AddBond(int(atom_i), int(atom_j), bond_type)
59
+
60
+ # Sanitize the molecule; for more information on sanitization, see
61
+ # https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization
62
+ flag = Chem.SanitizeMol(molecule, catchErrors=True)
63
+ # Let's be strict. If sanitization fails, return None
64
+ if flag != Chem.SanitizeFlags.SANITIZE_NONE:
65
+ return None
66
+
67
+ return molecule
68
+
69
+ model = from_pretrained_keras("keras-io/drug-molecule-generation-with-VAE")
70
+
71
+
72
+ def inference(num_mol):
73
+ z = tf.random.normal((1000, LATENT_DIM))
74
+ reconstruction_adjacency, reconstruction_features = model.predict(z)
75
+ # obtain one-hot encoded adjacency tensor
76
+ adjacency = tf.argmax(reconstruction_adjacency, axis=1)
77
+ adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1)
78
+ # Remove potential self-loops from adjacency
79
+ adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1]))
80
+ # obtain one-hot encoded feature tensor
81
+ features = tf.argmax(reconstruction_features, axis=2)
82
+ features = tf.one_hot(features, depth=ATOM_DIM, axis=2)
83
+ molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(1000)]
84
+ MolsToGridImage(
85
+ [m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(260, 160)
86
+ ).save("img.png")
87
+ return 'img.png'
88
+
89
+ gr.Interface(
90
+ fn=inference,
91
+ title="Generating Drug Molecule with VAE",
92
+ description = "Implementing a Convolutional Variational AutoEncoder (VAE) for Drug Discovery 🔬",
93
+ inputs=[
94
+ gr.inputs.Slider(20, 100, label='Number of Molecular Graphs', step=20, default=40),
95
+ ],
96
+ outputs="image",
97
+ article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/generative/molecule_generation/\">Victor Basu</a>",
98
+ ).launch(enable_queue=True, debug=True)