Spaces:
Build error
Build error
Create new file
Browse files
app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import from_pretrained_keras
|
2 |
+
import gradio as gr
|
3 |
+
import ast
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
import tensorflow as tf
|
7 |
+
from rdkit import Chem, RDLogger
|
8 |
+
from rdkit.Chem import BondType
|
9 |
+
from rdkit.Chem.Draw import MolsToGridImage
|
10 |
+
|
11 |
+
RDLogger.DisableLog("rdApp.*")
|
12 |
+
|
13 |
+
# Config
|
14 |
+
SMILE_CHARSET = '["C", "B", "F", "I", "H", "O", "N", "S", "P", "Cl", "Br"]'
|
15 |
+
bond_mapping = {"SINGLE": 0, "DOUBLE": 1, "TRIPLE": 2, "AROMATIC": 3}
|
16 |
+
bond_mapping.update(
|
17 |
+
{0: BondType.SINGLE, 1: BondType.DOUBLE, 2: BondType.TRIPLE, 3: BondType.AROMATIC}
|
18 |
+
)
|
19 |
+
SMILE_CHARSET = ast.literal_eval(SMILE_CHARSET)
|
20 |
+
MAX_MOLSIZE = 109
|
21 |
+
SMILE_to_index = dict((c, i) for i, c in enumerate(SMILE_CHARSET))
|
22 |
+
index_to_SMILE = dict((i, c) for i, c in enumerate(SMILE_CHARSET))
|
23 |
+
atom_mapping = dict(SMILE_to_index)
|
24 |
+
atom_mapping.update(index_to_SMILE)
|
25 |
+
|
26 |
+
NUM_ATOMS = 120 # Maximum number of atoms
|
27 |
+
ATOM_DIM = 11 # Number of atom types
|
28 |
+
BOND_DIM = 4 + 1 # Number of bond types
|
29 |
+
LATENT_DIM = 435 # Size of the latent space
|
30 |
+
|
31 |
+
def graph_to_molecule(graph):
|
32 |
+
# Unpack graph
|
33 |
+
adjacency, features = graph
|
34 |
+
|
35 |
+
# RWMol is a molecule object intended to be edited
|
36 |
+
molecule = Chem.RWMol()
|
37 |
+
|
38 |
+
# Remove "no atoms" & atoms with no bonds
|
39 |
+
keep_idx = np.where(
|
40 |
+
(np.argmax(features, axis=1) != ATOM_DIM - 1)
|
41 |
+
& (np.sum(adjacency[:-1], axis=(0, 1)) != 0)
|
42 |
+
)[0]
|
43 |
+
features = features[keep_idx]
|
44 |
+
adjacency = adjacency[:, keep_idx, :][:, :, keep_idx]
|
45 |
+
|
46 |
+
# Add atoms to molecule
|
47 |
+
for atom_type_idx in np.argmax(features, axis=1):
|
48 |
+
atom = Chem.Atom(atom_mapping[atom_type_idx])
|
49 |
+
_ = molecule.AddAtom(atom)
|
50 |
+
|
51 |
+
# Add bonds between atoms in molecule; based on the upper triangles
|
52 |
+
# of the [symmetric] adjacency tensor
|
53 |
+
(bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1)
|
54 |
+
for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j):
|
55 |
+
if atom_i == atom_j or bond_ij == BOND_DIM - 1:
|
56 |
+
continue
|
57 |
+
bond_type = bond_mapping[bond_ij]
|
58 |
+
molecule.AddBond(int(atom_i), int(atom_j), bond_type)
|
59 |
+
|
60 |
+
# Sanitize the molecule; for more information on sanitization, see
|
61 |
+
# https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization
|
62 |
+
flag = Chem.SanitizeMol(molecule, catchErrors=True)
|
63 |
+
# Let's be strict. If sanitization fails, return None
|
64 |
+
if flag != Chem.SanitizeFlags.SANITIZE_NONE:
|
65 |
+
return None
|
66 |
+
|
67 |
+
return molecule
|
68 |
+
|
69 |
+
model = from_pretrained_keras("keras-io/drug-molecule-generation-with-VAE")
|
70 |
+
|
71 |
+
|
72 |
+
def inference(num_mol):
|
73 |
+
z = tf.random.normal((1000, LATENT_DIM))
|
74 |
+
reconstruction_adjacency, reconstruction_features = model.predict(z)
|
75 |
+
# obtain one-hot encoded adjacency tensor
|
76 |
+
adjacency = tf.argmax(reconstruction_adjacency, axis=1)
|
77 |
+
adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1)
|
78 |
+
# Remove potential self-loops from adjacency
|
79 |
+
adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1]))
|
80 |
+
# obtain one-hot encoded feature tensor
|
81 |
+
features = tf.argmax(reconstruction_features, axis=2)
|
82 |
+
features = tf.one_hot(features, depth=ATOM_DIM, axis=2)
|
83 |
+
molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(1000)]
|
84 |
+
MolsToGridImage(
|
85 |
+
[m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(260, 160)
|
86 |
+
).save("img.png")
|
87 |
+
return 'img.png'
|
88 |
+
|
89 |
+
gr.Interface(
|
90 |
+
fn=inference,
|
91 |
+
title="Generating Drug Molecule with VAE",
|
92 |
+
description = "Implementing a Convolutional Variational AutoEncoder (VAE) for Drug Discovery 🔬",
|
93 |
+
inputs=[
|
94 |
+
gr.inputs.Slider(20, 100, label='Number of Molecular Graphs', step=20, default=40),
|
95 |
+
],
|
96 |
+
outputs="image",
|
97 |
+
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/generative/molecule_generation/\">Victor Basu</a>",
|
98 |
+
).launch(enable_queue=True, debug=True)
|