import logging import pathlib import gradio as gr import pandas as pd from gt4sd.algorithms.generation.hugging_face import ( HuggingFaceSeq2SeqGenerator, HuggingFaceGenerationAlgorithm, ) from transformers import AutoTokenizer logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) task2prefix = { "forward": "Predict the product of the following reaction: ", "retrosynthesis": "Predict the reaction that produces the following product: ", "paragraph to actions": "Which actions are described in the following paragraph: ", "molecular captioning": "Caption the following smile: ", "text-conditional de novo generation": "Write in SMILES the described molecule: ", } def run_inference( model_name_or_path: str, task: str, prompt: str, num_beams: int, ): instruction = task2prefix[task] config = HuggingFaceSeq2SeqGenerator( algorithm_version=model_name_or_path, prefix=instruction, prompt=prompt, num_beams=num_beams, ) model = HuggingFaceGenerationAlgorithm(config) tokenizer = AutoTokenizer.from_pretrained("t5-small") text = list(model.sample(1))[0] text = text.replace(instruction + prompt, "") text = text.split(tokenizer.eos_token)[0] text = text.replace(tokenizer.pad_token, "") text = text.strip() return text if __name__ == "__main__": models = [ "multitask-text-and-chemistry-t5-small-standard", "multitask-text-and-chemistry-t5-small-augm", "multitask-text-and-chemistry-t5-base-standard", "multitask-text-and-chemistry-t5-base-augm", ] metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna( "" ) print("Examples: ", examples.values.tolist()) with open(metadata_root.joinpath("article.md"), "r") as f: article = f.read() with open(metadata_root.joinpath("description.md"), "r") as f: description = f.read() demo = gr.Interface( fn=run_inference, title="Multitask Text and Chemistry T5", inputs=[ gr.Dropdown( models, label="Language model", value="multitask-text-and-chemistry-t5-small-augm", ), gr.Radio( choices=[ "forward", "retrosynthesis", "paragraph to actions", "molecular captioning", "text-conditional de novo generation", ], label="Task", value="paragraph to actions", ), gr.Textbox( label="Text prompt", placeholder="I'm a stochastic parrot.", lines=1, ), gr.Slider(minimum=1, maximum=50, value=10, label="num_beams", step=1), ], outputs=gr.Textbox(label="Output"), article=article, description=description, examples=examples.values.tolist(), ) demo.launch(debug=True, show_error=True)