import gradio as gr import numpy as np import os import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from transformers import pipeline as pl import numpy as np import matplotlib.pyplot as plt import sys print(os.getcwd()) if "/home/user/app/alphafold" not in sys.path: sys.path.append("/home/user/app/alphafold") from alphafold.common import protein from alphafold.data import pipeline from alphafold.data import templates from alphafold.model import data from alphafold.model import config from alphafold.model import model def update_seqs(choice): return gr.Textbox.update(choice) def mk_mock_template(query_sequence): """create blank template""" ln = len(query_sequence) output_templates_sequence = "-" * ln templates_all_atom_positions = np.zeros( (ln, templates.residue_constants.atom_type_num, 3) ) templates_all_atom_masks = np.zeros((ln, templates.residue_constants.atom_type_num)) templates_aatype = templates.residue_constants.sequence_to_onehot( output_templates_sequence, templates.residue_constants.HHBLITS_AA_TO_ID ) template_features = { "template_all_atom_positions": templates_all_atom_positions[None], "template_all_atom_masks": templates_all_atom_masks[None], "template_aatype": np.array(templates_aatype)[None], "template_domain_names": [f"none".encode()], } return template_features def predict_structure(prefix, feature_dict, model_runners, random_seed=0): """Predicts structure using AlphaFold for the given sequence.""" # Run the models. plddts = {} for model_name, model_runner in model_runners.items(): processed_feature_dict = model_runner.process_features( feature_dict, random_seed=random_seed ) prediction_result = model_runner.predict(processed_feature_dict) b_factors = ( prediction_result["plddt"][:, None] * prediction_result["structure_module"]["final_atom_mask"] ) unrelaxed_protein = protein.from_prediction( processed_feature_dict, prediction_result, b_factors ) unrelaxed_pdb_path = f"{prefix}_unrelaxed_{model_name}.pdb" plddts[model_name] = prediction_result["plddt"] print(f"{model_name} {plddts[model_name].mean()}") with open(unrelaxed_pdb_path, "w") as f: f.write(protein.to_pdb(unrelaxed_protein)) return plddts def run_protgpt2(startsequence, length): protgpt2 = pl("text-generation", model="nferruz/ProtGPT2") sequences = protgpt2( startsequence, max_length=length, do_sample=True, top_k=950, repetition_penalty=1.2, num_return_sequences=5, eos_token_id=0, ) return sequences def run_alphafold(startsequence): model_runners = {} models = ["model_1"] # ,"model_2","model_3","model_4","model_5"] for model_name in models: model_config = config.model_config(model_name) model_config.data.eval.num_ensemble = 1 model_params = data.get_model_haiku_params(model_name=model_name, data_dir=".") model_runner = model.RunModel(model_config, model_params) model_runners[model_name] = model_runner query_sequence = startsequence.replace("\n", "") feature_dict = { **pipeline.make_sequence_features( sequence=query_sequence, description="none", num_res=len(query_sequence) ), **pipeline.make_msa_features( msas=[[query_sequence]], deletion_matrices=[[[0] * len(query_sequence)]] ), **mk_mock_template(query_sequence), } plddts = predict_structure("test", feature_dict, model_runners) return plddts["model_1"] def update_protGPT2(inp, length): startsequence = inp seqlen = length generated_seqs = run_protgpt2(startsequence, seqlen) gen_seqs = [x["generated_text"] for x in generated_seqs] print(gen_seqs) return gr.Radio.update(gen_seqs) def update(inp): print("Running AF on", inp) startsequence = inp plddts = run_alphafold(startsequence) print(plddts) x = np.arange(10) plt.style.use(["seaborn-ticks", "seaborn-talk"]) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(plddts) ax.set_ylabel("predicted LDDT") ax.set_xlabel("positions") ax.set_title("pLDDT") return ( molecule( f"test_unrelaxed_model_1.pdb", ), fig, f"{np.mean(plddts):.1f} ± {np.std(plddts):.1f}", ) def read_mol(molpath): with open(molpath, "r") as fp: lines = fp.readlines() mol = "" for l in lines: mol += l return mol def molecule(pdb): mol = read_mol(pdb) x = ( """