File size: 4,235 Bytes
2388920
 
 
 
 
 
 
2348010
 
 
 
2388920
 
 
47e3cde
035577c
 
2293b93
035577c
47e3cde
035577c
1eff7fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2293b93
47e3cde
 
 
2293b93
035577c
2508b3c
 
7c30300
2508b3c
 
47e3cde
 
 
2508b3c
 
035577c
 
23ee17c
2388920
 
2293b93
4d512f9
 
2293b93
47e3cde
1eff7fb
47e3cde
2508b3c
47e3cde
035577c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
foldingdiff implements a diffusion model for generating protein structures. Inspired by the biological folding process,
we perform diffusion on the angles between amino acid residues rather than the absolute 3D coordinates of each residue.
By effectively treating each residue as its own reference frame, we shift the equivariance constraints into the
representation space itself; this allows us to use a vanilla transformer model as our model. Here, we provide a simple
online interface for generating single backbones with a given length, starting from a given random seed. 

Tips for generating proteins:
* The maximum sequence sequence length this model has been trained on is 128 residues. The shorter a sequence is, the more likely it will be "designable" (see our preprint).
* FoldingDiff does *not* generate the amino acid sequence for its structures, it simply fills the structure with Glycine residues; use a tool like ESM-IF1 to generate amino acids corresponding to generated structure.

See our preprint at https://arxiv.org/abs/2209.15611 and our full codebase at https://github.com/microsoft/foldingdiff
"""

import os
import gradio as gr

import torch
from foldingdiff import sampling
from foldingdiff import angles_and_coords as ac

def read_mol(molpath: str) -> str:
    with open(molpath, "r") as fp:
        lines = fp.readlines()
    mol = ""
    for l in lines:
        mol += l
    return mol

def molecule(input_pdb: str) -> str:
    """Get the string to view the given pdb in 3dmol.js"""
    mol = read_mol(input_pdb)

    x = (
        """<!DOCTYPE html>
        <html>
        <head>    
    <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
    <style>
    body{
        font-family:sans-serif
    }
    .mol-container {
    width: 100%;
    height: 600px;
    position: relative;
    }
    .mol-container select{
        background-image:None;
    }
    </style>
    <script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
    </head>
    <body>  
    <div id="container" class="mol-container"></div>
  
            <script>
               let pdb = `"""
        + mol
        + """`  
      
             $(document).ready(function () {
                let element = $("#container");
                let config = { backgroundColor: "black" };
                let viewer = $3Dmol.createViewer(element, config);
                viewer.addModel(pdb, "pdb");
                viewer.getModel(0).setStyle({}, { stick: { colorscheme:"whiteCarbon" } });
                viewer.zoomTo();
                viewer.render();
                viewer.zoom(0.8, 2000);
              })
        </script>
        </body></html>"""
    )

    return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera; 
    display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
    allow-scripts allow-same-origin allow-popups 
    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
    allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""

def sample_at_length(l:int, seed:int):
    """
    Sample a single structure at the given length
    """
    torch.manual_seed(seed)
    l = int(l)
 
    # Sample the angles
    s = sampling.sample_simple("wukevin/foldingdiff_cath", n=1, sweep_lengths=(l, l+1)).pop()

    # Create a PDB file after building out the structure in 3D coordinates
    outdir = os.path.join(os.getcwd(), "output")
    os.makedirs(outdir, exist_ok=True)
    pdb_file = ac.create_new_chain_nerf(os.path.join(outdir, "generated.pdb"), s)
    
    return molecule(pdb_file), pdb_file

interface = gr.Interface(
    fn=sample_at_length,
    title="foldingdiff - protein backbone structure generation with diffusion models",
    description=__doc__,
    inputs=[
        gr.Number(value=85, label="Protein backbone length to generate", show_label=True, precision=0),
        gr.Number(value=123, label="Random seed", show_label=True, precision=0),
    ],
    outputs=[
        gr.HTML(),
        gr.File(label="Generated structure in PDB format (cartesian coordinates)"),
        # gr.Dataframe(label="Generated angles defining structure", max_rows=8),
    ],
)
interface.launch()