from huggingface_hub import from_pretrained_keras import gradio as gr from rdkit import Chem, RDLogger from rdkit.Chem.Draw import MolsToGridImage import numpy as np import tensorflow as tf from tensorflow import keras # Config atom_mapping = { "C": 0, 0: "C", "N": 1, 1: "N", "O": 2, 2: "O", "F": 3, 3: "F", } bond_mapping = { "SINGLE": 0, 0: Chem.BondType.SINGLE, "DOUBLE": 1, 1: Chem.BondType.DOUBLE, "TRIPLE": 2, 2: Chem.BondType.TRIPLE, "AROMATIC": 3, 3: Chem.BondType.AROMATIC, } NUM_ATOMS = 9 # Maximum number of atoms ATOM_DIM = 4 + 1 # Number of atom types BOND_DIM = 4 + 1 # Number of bond types LATENT_DIM = 64 # Size of the latent space RDLogger.DisableLog("rdApp.*") def graph_to_molecule(graph): # Unpack graph adjacency, features = graph # RWMol is a molecule object intended to be edited molecule = Chem.RWMol() # Remove "no atoms" & atoms with no bonds keep_idx = np.where( (np.argmax(features, axis=1) != ATOM_DIM - 1) & (np.sum(adjacency[:-1], axis=(0, 1)) != 0) )[0] features = features[keep_idx] adjacency = adjacency[:, keep_idx, :][:, :, keep_idx] # Add atoms to molecule for atom_type_idx in np.argmax(features, axis=1): atom = Chem.Atom(atom_mapping[atom_type_idx]) _ = molecule.AddAtom(atom) # Add bonds between atoms in molecule; based on the upper triangles # of the [symmetric] adjacency tensor (bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1) for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j): if atom_i == atom_j or bond_ij == BOND_DIM - 1: continue bond_type = bond_mapping[bond_ij] molecule.AddBond(int(atom_i), int(atom_j), bond_type) # Sanitize the molecule; for more information on sanitization, see # https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization flag = Chem.SanitizeMol(molecule, catchErrors=True) # Let's be strict. If sanitization fails, return None if flag != Chem.SanitizeFlags.SANITIZE_NONE: return None return molecule generator = from_pretrained_keras("keras-io/wgan-molecular-graphs") def predict(num_mol): samples = num_mol*2 z = tf.random.normal((samples, LATENT_DIM)) graph = generator.predict(z) # obtain one-hot encoded adjacency tensor adjacency = tf.argmax(graph[0], axis=1) adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1) # Remove potential self-loops from adjacency adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1])) # obtain one-hot encoded feature tensor features = tf.argmax(graph[1], axis=2) features = tf.one_hot(features, depth=ATOM_DIM, axis=2) molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(samples) ] MolsToGridImage( [m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(150, 150), returnPNG=False, ).save("img.png") return 'img.png' gr.Interface( fn=predict, title="Generating molecular graphs by WGAN-GP", description = "WGAN-GP with R-GCN for the generation of small molecular graphs 🔬", inputs=[ gr.inputs.Slider(5, 50, label='Number of Molecular Graphs', step=5, default=10), ], outputs="image", article = "Author: Vu Minh Chien. Based on the keras example from Alexander Kensert", ).launch(enable_queue=True)