File size: 6,399 Bytes
02970c0
 
db541e4
02970c0
4d5beeb
f73076c
4d5beeb
02970c0
2ec65d5
8505e9d
 
db541e4
4d5beeb
 
c72f5fe
7f5c48e
 
 
8505e9d
7f5c48e
 
 
 
 
 
 
 
 
 
 
 
 
 
8505e9d
7f5c48e
4d5beeb
7f5c48e
affd796
2ec65d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2466cb5
7f5c48e
 
 
 
 
 
 
 
 
 
2466cb5
 
 
7f5c48e
2466cb5
 
c26a162
7f5c48e
2466cb5
7f5c48e
 
 
 
 
2466cb5
 
 
7f5c48e
2466cb5
 
c26a162
02970c0
4d5beeb
 
 
1082445
b6b61f6
a046ca2
4d5beeb
7f5c48e
 
 
 
4d5beeb
 
 
 
 
 
 
 
 
2466cb5
a046ca2
4d5beeb
7f5c48e
 
 
 
d92a3e6
4d5beeb
 
 
affd796
 
 
2f14da2
 
 
 
f73076c
 
 
 
4d5beeb
2f14da2
 
affd796
4d5beeb
 
 
db541e4
affd796
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import re
from pathlib import Path
import gradio as gr

from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple

import py3Dmol
from colabfold.download import download_alphafold_params
from colabfold.batch import run

def a3m_file(file):
    return "tmp.a3m"

def predict_protein(sequence):
    download_alphafold_params("alphafold2_ptm", Path("."))
    results = run(
        queries=[('evodiff_protein', sequence, None)],
        result_dir='evodiff_protein',
        use_templates=False,
        num_relax=0,
        msa_mode="mmseqs2_uniref_env",
        model_type="alphafold2_ptm",
        num_models=1,
        num_recycles=1,
        model_order=[1],
        is_complex=False,
        data_dir=Path("."),
        keep_existing_results=False,
        rank_by="auto",
        stop_at_score=float(100),
        zip_results=False,
        user_agent="colabfold/google-colab-main"
    )

    return f"evodiff_protein/evodiff_protein_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb"

def display_pdb(path_to_pdb):
    '''
        #function to display pdb in py3dmol
        SOURCE: https://huggingface.co/spaces/merle/PROTEIN_GENERATOR/blob/main/app.py
    '''
    pdb = open(path_to_pdb, "r").read()
    
    view = py3Dmol.view(width=500, height=500)
    view.addModel(pdb, "pdb")
    view.setStyle({'model': -1}, {"cartoon": {'colorscheme':{'prop':'b','gradient':'roygb','min':0,'max':1}}})#'linear', 'min': 0, 'max': 1, 'colors': ["#ff9ef0","#a903fc",]}}}) 
    view.zoomTo()
    output = view._make_html().replace("'", '"')
    print(view._make_html())
    x = f"""<!DOCTYPE html><html></center> {output} </center></html>"""  # do not use ' in this input
    
    return f"""<iframe height="500px" width="100%"  name="result" allow="midi; geolocation; microphone; camera;
                            display-capture; encrypted-media;" sandbox="allow-modals allow-forms
                            allow-scripts allow-same-origin allow-popups
                            allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
                            allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""

'''
    return f"""<iframe  style="width: 100%; height:700px" name="result" allow="midi; geolocation; microphone; camera; 
                            display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
                            allow-scripts allow-same-origin allow-popups 
                            allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
                            allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
'''

def make_uncond_seq(seq_len, model_type, pred_structure):
    if model_type == "EvoDiff-Seq-OADM 38M":
        checkpoint = OA_DM_38M()
        model, collater, tokenizer, scheme = checkpoint
        tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')
    
    if model_type == "EvoDiff-D3PM-Uniform 38M":
        checkpoint = D3PM_UNIFORM_38M(return_all=True)
        model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
        tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')

    if pred_structure:
        path_to_pdb = predict_protein(generated_sequence)
        molhtml = display_pdb(path_to_pdb)

        return generated_sequence, molhtml
    else:
        return generated_sequence, None

def make_cond_seq(seq_len, msa_file, model_type, pred_structure):
    if model_type == "EvoDiff-MSA":
        checkpoint = MSA_OA_DM_MAXSUB()
        model, collater, tokenizer, scheme = checkpoint
        tokeinzed_sample, generated_sequence  = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, n_sequences=64, seq_length=seq_len, device='cpu', selection_type='random')

    if pred_structure:
        path_to_pdb = predict_protein(generated_sequence)
        molhtml = display_pdb(path_to_pdb)

        return generated_sequence, molhtml
    else:
        return generated_sequence, None

usg_app = gr.Interface(
            fn=make_uncond_seq,
            inputs=[
                gr.Slider(10, 100, step=1, label = "Sequence Length"),
                gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model"),
                gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
                ],
            outputs=[
                "text",
                gr.HTML()
            ],
            title = "Unconditional sequence generation",
            description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
            )

csg_app = gr.Interface(
            fn=make_cond_seq,
            inputs=[
                gr.Slider(10, 100, label = "Sequence Length"),
                gr.File(file_types=["a3m"], label = "MSA File"),
                gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
                gr.Checkbox(value=False, label = "Predict Structure?", visible=False)
                ],
            outputs=[
                "text",
                gr.HTML()
            ],
            # examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]], 
            title = "Conditional sequence generation",
            description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
            )


with gr.Blocks() as edapp:
    with gr.Row():
        gr.Markdown(
            """
            # EvoDiff
            ## Generation of protein sequences and evolutionary alignments via discrete diffusion models

            Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, ProfileAva P. Amini, and Kevin K. Yang]
            
            Spaces App By: [Colby T. Ford](httos://github.com/colbyford)
            """
        )
    with gr.Row():
        gr.TabbedInterface([usg_app, csg_app], ["Unconditional sequence generation", "Conditional generation"])



if __name__ == "__main__":
    edapp.launch()