foldingdiff / app.py
wukevin's picture
Update app.py
7c30300
"""
foldingdiff implements a diffusion model for generating protein structures. Inspired by the biological folding process,
we perform diffusion on the angles between amino acid residues rather than the absolute 3D coordinates of each residue.
By effectively treating each residue as its own reference frame, we shift the equivariance constraints into the
representation space itself; this allows us to use a vanilla transformer model as our model. Here, we provide a simple
online interface for generating single backbones with a given length, starting from a given random seed.
Tips for generating proteins:
* The maximum sequence sequence length this model has been trained on is 128 residues. The shorter a sequence is, the more likely it will be "designable" (see our preprint).
* FoldingDiff does *not* generate the amino acid sequence for its structures, it simply fills the structure with Glycine residues; use a tool like ESM-IF1 to generate amino acids corresponding to generated structure.
See our preprint at https://arxiv.org/abs/2209.15611 and our full codebase at https://github.com/microsoft/foldingdiff
"""
import os
import gradio as gr
import torch
from foldingdiff import sampling
from foldingdiff import angles_and_coords as ac
def read_mol(molpath: str) -> str:
with open(molpath, "r") as fp:
lines = fp.readlines()
mol = ""
for l in lines:
mol += l
return mol
def molecule(input_pdb: str) -> str:
"""Get the string to view the given pdb in 3dmol.js"""
mol = read_mol(input_pdb)
x = (
"""<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<style>
body{
font-family:sans-serif
}
.mol-container {
width: 100%;
height: 600px;
position: relative;
}
.mol-container select{
background-image:None;
}
</style>
<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
</head>
<body>
<div id="container" class="mol-container"></div>
<script>
let pdb = `"""
+ mol
+ """`
$(document).ready(function () {
let element = $("#container");
let config = { backgroundColor: "black" };
let viewer = $3Dmol.createViewer(element, config);
viewer.addModel(pdb, "pdb");
viewer.getModel(0).setStyle({}, { stick: { colorscheme:"whiteCarbon" } });
viewer.zoomTo();
viewer.render();
viewer.zoom(0.8, 2000);
})
</script>
</body></html>"""
)
return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
def sample_at_length(l:int, seed:int):
"""
Sample a single structure at the given length
"""
torch.manual_seed(seed)
l = int(l)
# Sample the angles
s = sampling.sample_simple("wukevin/foldingdiff_cath", n=1, sweep_lengths=(l, l+1)).pop()
# Create a PDB file after building out the structure in 3D coordinates
outdir = os.path.join(os.getcwd(), "output")
os.makedirs(outdir, exist_ok=True)
pdb_file = ac.create_new_chain_nerf(os.path.join(outdir, "generated.pdb"), s)
return molecule(pdb_file), pdb_file
interface = gr.Interface(
fn=sample_at_length,
title="foldingdiff - protein backbone structure generation with diffusion models",
description=__doc__,
inputs=[
gr.Number(value=85, label="Protein backbone length to generate", show_label=True, precision=0),
gr.Number(value=123, label="Random seed", show_label=True, precision=0),
],
outputs=[
gr.HTML(),
gr.File(label="Generated structure in PDB format (cartesian coordinates)"),
# gr.Dataframe(label="Generated angles defining structure", max_rows=8),
],
)
interface.launch()