jannisborn's picture
update
321305d unverified
import logging
import pathlib
import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY
from utils import draw_grid_predict
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]
def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
prop_name = property.lower()
algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]
# Pass hyperparameters if applicable
kwargs = {}
if prop_name in AMIDE_FNS:
kwargs["amide"] = amide
if prop_name in PH_FNS:
kwargs["ph"] = ph
model = algo(config(**kwargs))
# Read and parse data
if seq != "" and seq_file is not None:
raise ValueError("Pass either smiles or seq_file, not both.")
elif seq != "":
seqs = [seq]
elif seq_file is not None:
seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
props = np.array(list(map(model, seqs))).round(2)
# Expand to 2D array if needed
if len(props.shape) == 1:
props = np.expand_dims(np.array(props), -1)
return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")
if __name__ == "__main__":
# Preparation (retrieve all available algorithms)
properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
properties = list(map(lambda x: x.capitalize(), properties))
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = [
["Aliphaticity", "", str(metadata_root.joinpath("examples.smi")), False, 7],
["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
]
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=main,
title="Protein properties",
inputs=[
gr.Dropdown(properties, label="Property", value="Instability"),
gr.Textbox(
label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
),
gr.File(file_types=[".smi"], label="One AAS per line"),
gr.Radio(choices=[True, False], label="Amide", value=True),
gr.Slider(minimum=0, maximum=14, value=7, label="pH"),
],
outputs=gr.HTML(label="Output"),
article=article,
description=description,
examples=examples,
)
demo.launch(debug=True, show_error=True)