jannisborn's picture
update
ef5346f unverified
raw
history blame
No virus
2.78 kB
import logging
import pathlib
import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY
from utils import draw_grid_predict
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]
def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
prop_name = property.lower()
algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]
# Pass hyperparameters if applicable
kwargs = {}
if prop_name in AMIDE_FNS:
kwargs["amide"] = amide
if prop_name in PH_FNS:
kwargs["ph"] = ph
model = algo(config(**kwargs))
# Read and parse data
if seq != "" and seq_file is not None:
raise ValueError("Pass either smiles or seq_file, not both.")
elif seq != "":
seqs = [seq]
elif seq_file is not None:
seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
props = np.array(list(map(model, seqs))).round(2)
# Expand to 2D array if needed
if len(props.shape) == 1:
props = np.expand_dims(np.array(props), -1)
return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")
if __name__ == "__main__":
# Preparation (retrieve all available algorithms)
properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
properties = list(map(lambda x: x.capitalize(), properties))
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = [
["Aliphaticity", "", metadata_root.joinpath("examples.smi"), False, 7],
["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
]
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=main,
title="Protein properties",
inputs=[
gr.Dropdown(properties, label="Property", value="Instability"),
gr.Textbox(
label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
),
gr.File(file_types=[".smi"], label="One AAS per line"),
gr.Radio(choices=[True, False], label="Amide", value=True),
gr.Slider(minimum=0, maximum=14, value=7, label="pH"),
],
outputs=gr.HTML(label="Output"),
article=article,
description=description,
examples=examples,
)
demo.launch(debug=True, show_error=True)