Spaces:

GT4SD
/

moler

Running

App Files Files Community

jannisborn commited on Jan 7, 2023

Commit

895a807

•

1 Parent(s): 2543308

wip: initial version

Browse files

Files changed (9) hide show

.gitignore +1 -0
LICENSE +21 -0
README.md +8 -7
app.py +79 -0
model_cards/article.md +23 -0
model_cards/description.md +7 -0
model_cards/examples.csv +5 -0
requirements.txt +28 -0
utils.py +49 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Gt4sd Moler
-emoji: 👁
-colorFrom: indigo
-colorTo: green
 sdk: gradio
-sdk_version: 3.16.0
 app_file: app.py
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MoLeR
+emoji: 💡
+colorFrom: green
+colorTo: blue
 sdk: gradio
+sdk_version: 3.9.1
 app_file: app.py
 pinned: false
+python_version: 3.8.13
+pypi_version: 20.2.4
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import logging
+import pathlib
+import gradio as gr
+import pandas as pd
+from gt4sd.algorithms.generation.moler import MoLeR, MoLeRDefaultGenerator
+from gt4sd.algorithms.registry import ApplicationsRegistry
+from utils import draw_grid_generate
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+TITLE = "MoLeR"
+def run_inference(
+    algorithm_version: str,
+    scaffolds: str,
+    beam_size: int,
+    number_of_samples: int,
+    seed: int,
+):
+    config = MoLeRDefaultGenerator(
+        algorithm_version=algorithm_version,
+        scaffolds=scaffolds,
+        beam_size=beam_size,
+        num_samples=4,
+        seed=seed,
+    )
+    model = MoLeR(configuration=config)
+    samples = list(model.sample(number_of_samples))
+    draw_grid_generate(samples)
+if __name__ == "__main__":
+    # Preparation (retrieve all available algorithms)
+    all_algos = ApplicationsRegistry.list_available()
+    algos = [
+        x["algorithm_version"]
+        for x in list(filter(lambda x: TITLE in x["algorithm_name"], all_algos))
+    ]
+    # Load metadata
+    metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
+    examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
+        ""
+    )
+    with open(metadata_root.joinpath("article.md"), "r") as f:
+        article = f.read()
+    with open(metadata_root.joinpath("description.md"), "r") as f:
+        description = f.read()
+    demo = gr.Interface(
+        fn=run_inference,
+        title=TITLE,
+        inputs=[
+            gr.Dropdown(algos, label="Algorithm version", value="v0"),
+            gr.Textbox(
+                label="Scaffolds",
+                placeholder="CC(C#C)N(C)C(=O)NC1=CC=C(Cl)C=C1",
+                lines=1,
+            ),
+            gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Beam_size"),
+            gr.Slider(
+                minimum=1, maximum=50, value=10, label="Number of samples", step=1
+            ),
+            gr.Number(value=42, label="Seed", precision=0),
+        ],
+        outputs=gr.HTML(label="Output"),
+        article=article,
+        description=description,
+        examples=examples.values.tolist(),
+    )
+    demo.launch(debug=True, show_error=True)

model_cards/article.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# Model card -- MoLeR
+## Parameters
+### Algorithm Version:
+Which model checkpoint to use (trained on different datasets).
+### Number of samples
+How many samples should be generated (between 1 and 50).
+## Citation
+```bib
+@inproceedings{maziarz2021learning,
+  author={Krzysztof Maziarz and Henry Richard Jackson{-}Flux and Pashmina Cameron and
+    Finton Sirockin and Nadine Schneider and Nikolaus Stiefl and Marwin H. S. Segler and Marc Brockschmidt},
+  title     = {Learning to Extend Molecular Scaffolds with Structural Motifs},
+  booktitle = {The Tenth International Conference on Learning Representations, {ICLR}},
+  year      = {2022}
+}
+```

model_cards/description.md ADDED Viewed

	@@ -0,0 +1,7 @@

+### Concurrent sequence regression and generation for molecular language modeling
+The RT is a multitask Transformer that reformulates regression as a conditional sequence modeling task.
+This yields a dichotomous language model that seamlessly integrates property prediction with property-driven conditional generation. For details see the [arXiv preprint](https://arxiv.org/abs/2202.01338), the [development code](https://github.com/IBM/regression-transformer) and the [GT4SD endpoint](https://github.com/GT4SD/gt4sd-core) for inference.
+Each `algorithm_version` refers to one trained model. Each model can be used for **two tasks**, either to *predict* one (or multiple) properties of a molecule or to *generate* a molecule (given a seed molecule and a property constraint).

model_cards/examples.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+v0,,1,4,0
+v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
+v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2,3,5,5

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+-f https://download.pytorch.org/whl/cpu/torch_stable.html
+-f https://data.pyg.org/whl/torch-1.12.1+cpu.html
+# pip==20.2.4
+torch==1.12.1
+torch-scatter
+torch-spline-conv
+torch-sparse
+torch-geometric
+torchvision==0.13.1
+torchaudio==0.12.1
+gt4sd>=1.0.0
+molgx>=0.22.0a1
+molecule_generation
+nglview
+PyTDC==0.3.7
+gradio==3.12.0
+markdown-it-py>=2.1.0
+mols2grid>=0.2.0
+pandas>=1.0.0
+terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
+guacamol_baselines @ git+https://github.com/GT4SD/guacamol_baselines.git@v0.0.2
+moses @ git+https://github.com/GT4SD/moses.git@v0.1.0
+paccmann_chemistry @ git+https://github.com/PaccMann/paccmann_chemistry@0.0.4
+paccmann_generator @ git+https://github.com/PaccMann/paccmann_generator@0.0.2
+paccmann_gp @ git+https://github.com/PaccMann/paccmann_gp@0.1.1
+paccmann_omics @ git+https://github.com/PaccMann/paccmann_omics@0.0.1.1
+paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
+reinvent_models @ git+https://github.com/GT4SD/reinvent_models@v0.0.1

utils.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import json
+import logging
+import os
+from collections import defaultdict
+from typing import Dict, List, Tuple
+import mols2grid
+import pandas as pd
+from rdkit import Chem
+from terminator.selfies import decoder
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+def draw_grid_generate(
+    samples: List[Tuple[str]],
+    domain: str = "molecules",
+    n_cols: int = 5,
+    size=(140, 200),
+) -> str:
+    """
+    Uses mols2grid to draw a HTML grid for the generated molecules
+    Args:
+        samples: The generated samples.
+        n_cols: Number of columns in grid. Defaults to 5.
+        size: Size of molecule in grid. Defaults to (140, 200).
+    Returns:
+        HTML to display
+    """
+    result = defaultdict(list)
+    result.update(
+        {"SMILES": samples, "Name": [f"sample_{i}" for i in range(len(samples))]}
+    )
+    result_df = pd.DataFrame(result)
+    print('RESTULT', result_df)
+    obj = mols2grid.display(
+        result_df,
+        tooltip=list(result.keys()),
+        height=1100,
+        n_cols=n_cols,
+        name="Results",
+        size=size,
+    )
+    return obj.data