Spaces:

ProteinDesignLab
/

protpardelle

Running on T4

protpardelle / modules.py

Simon Duerr

webapp

8c639ec about 1 year ago

22.6 kB

	"""
	https://github.com/ProteinDesignLab/protpardelle
	License: MIT
	Author: Alex Chu

	Neural network modules. Many of these are adapted from open source modules.
	"""
	from typing import List, Sequence, Optional

	from einops import rearrange, reduce, repeat
	from einops.layers.torch import Rearrange
	import numpy as np
	from rotary_embedding_torch import RotaryEmbedding
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from transformers import AutoTokenizer, EsmModel

	from core import protein_mpnn
	from core import residue_constants
	from core import utils


	########################################
	# Adapted from https://github.com/ermongroup/ddim


	def downsample(x):
	return nn.functional.avg_pool2d(x, 2, 2, ceil_mode=True)


	def upsample_coords(x, shape):
	new_l, new_w = shape
	return nn.functional.interpolate(x, size=(new_l, new_w), mode="nearest")


	########################################
	# Adapted from https://github.com/aqlaboratory/openfold


	def permute_final_dims(tensor: torch.Tensor, inds: List[int]):
	zero_index = -1 * len(inds)
	first_inds = list(range(len(tensor.shape[:zero_index])))
	return tensor.contiguous().permute(first_inds + [zero_index + i for i in inds])


	def lddt(
	all_atom_pred_pos: torch.Tensor,
	all_atom_positions: torch.Tensor,
	all_atom_mask: torch.Tensor,
	cutoff: float = 15.0,
	eps: float = 1e-10,
	per_residue: bool = True,
	) -> torch.Tensor:
	n = all_atom_mask.shape[-2]
	dmat_true = torch.sqrt(
	eps
	+ torch.sum(
	(all_atom_positions[..., None, :] - all_atom_positions[..., None, :, :])
	** 2,
	dim=-1,
	)
	)

	dmat_pred = torch.sqrt(
	eps
	+ torch.sum(
	(all_atom_pred_pos[..., None, :] - all_atom_pred_pos[..., None, :, :]) ** 2,
	dim=-1,
	)
	)
	dists_to_score = (
	(dmat_true < cutoff)
	* all_atom_mask
	* permute_final_dims(all_atom_mask, (1, 0))
	* (1.0 - torch.eye(n, device=all_atom_mask.device))
	)

	dist_l1 = torch.abs(dmat_true - dmat_pred)

	score = (
	(dist_l1 < 0.5).type(dist_l1.dtype)
	+ (dist_l1 < 1.0).type(dist_l1.dtype)
	+ (dist_l1 < 2.0).type(dist_l1.dtype)
	+ (dist_l1 < 4.0).type(dist_l1.dtype)
	)
	score = score * 0.25

	dims = (-1,) if per_residue else (-2, -1)
	norm = 1.0 / (eps + torch.sum(dists_to_score, dim=dims))
	score = norm * (eps + torch.sum(dists_to_score * score, dim=dims))

	return score


	class RelativePositionalEncoding(nn.Module):
	def __init__(self, attn_dim=8, max_rel_idx=32):
	super().__init__()
	self.max_rel_idx = max_rel_idx
	self.n_rel_pos = 2 * self.max_rel_idx + 1
	self.linear = nn.Linear(self.n_rel_pos, attn_dim)

	def forward(self, residue_index):
	d_ij = residue_index[..., None] - residue_index[..., None, :]
	v_bins = torch.arange(self.n_rel_pos).to(d_ij.device) - self.max_rel_idx
	idxs = (d_ij[..., None] - v_bins[None, None]).abs().argmin(-1)
	p_ij = nn.functional.one_hot(idxs, num_classes=self.n_rel_pos)
	embeddings = self.linear(p_ij.float())
	return embeddings


	########################################
	# Adapted from https://github.com/NVlabs/edm


	class Noise_Embedding(nn.Module):
	def __init__(self, num_channels, max_positions=10000, endpoint=False):
	super().__init__()
	self.num_channels = num_channels
	self.max_positions = max_positions
	self.endpoint = endpoint

	def forward(self, x):
	freqs = torch.arange(
	start=0, end=self.num_channels // 2, dtype=torch.float32, device=x.device
	)
	freqs = freqs / (self.num_channels // 2 - (1 if self.endpoint else 0))
	freqs = (1 / self.max_positions) ** freqs
	x = x.outer(freqs.to(x.dtype))
	x = torch.cat([x.cos(), x.sin()], dim=1)
	return x


	########################################
	# Adapted from github.com/lucidrains
	# https://github.com/lucidrains/denoising-diffusion-pytorch
	# https://github.com/lucidrains/recurrent-interface-network-pytorch


	def exists(x):
	return x is not None


	def default(val, d):
	if exists(val):
	return val
	return d() if callable(d) else d


	def posemb_sincos_1d(patches, temperature=10000, residue_index=None):
	_, n, dim, device, dtype = *patches.shape, patches.device, patches.dtype

	n = torch.arange(n, device=device) if residue_index is None else residue_index
	assert (dim % 2) == 0, "feature dimension must be multiple of 2 for sincos emb"
	omega = torch.arange(dim // 2, device=device) / (dim // 2 - 1)
	omega = 1.0 / (temperature**omega)

	n = n[..., None] * omega
	pe = torch.cat((n.sin(), n.cos()), dim=-1)
	return pe.type(dtype)


	class LayerNorm(nn.Module):
	def __init__(self, dim):
	super().__init__()
	self.gamma = nn.Parameter(torch.ones(dim))
	self.register_buffer("beta", torch.zeros(dim))

	def forward(self, x):
	return F.layer_norm(x, x.shape[-1:], self.gamma, self.beta)


	class NoiseConditioningBlock(nn.Module):
	def __init__(self, n_in_channel, n_out_channel):
	super().__init__()
	self.block = nn.Sequential(
	Noise_Embedding(n_in_channel),
	nn.Linear(n_in_channel, n_out_channel),
	nn.SiLU(),
	nn.Linear(n_out_channel, n_out_channel),
	Rearrange("b d -> b 1 d"),
	)

	def forward(self, noise_level):
	return self.block(noise_level)


	class TimeCondResnetBlock(nn.Module):
	def __init__(
	self, nic, noc, cond_nc, conv_layer=nn.Conv2d, dropout=0.1, n_norm_in_groups=4
	):
	super().__init__()
	self.block1 = nn.Sequential(
	nn.GroupNorm(num_groups=nic // n_norm_in_groups, num_channels=nic),
	nn.SiLU(),
	conv_layer(nic, noc, 3, 1, 1),
	)
	self.cond_proj = nn.Linear(cond_nc, noc * 2)
	self.mid_norm = nn.GroupNorm(num_groups=noc // 4, num_channels=noc)
	self.dropout = dropout if dropout is None else nn.Dropout(dropout)
	self.block2 = nn.Sequential(
	nn.GroupNorm(num_groups=noc // 4, num_channels=noc),
	nn.SiLU(),
	conv_layer(noc, noc, 3, 1, 1),
	)
	self.mismatch = False
	if nic != noc:
	self.mismatch = True
	self.conv_match = conv_layer(nic, noc, 1, 1, 0)

	def forward(self, x, time=None):
	h = self.block1(x)

	if time is not None:
	h = self.mid_norm(h)
	scale, shift = self.cond_proj(time).chunk(2, dim=-1)
	h = (h * (utils.expand(scale, h) + 1)) + utils.expand(shift, h)

	if self.dropout is not None:
	h = self.dropout(h)

	h = self.block2(h)

	if self.mismatch:
	x = self.conv_match(x)

	return x + h


	class TimeCondAttention(nn.Module):
	def __init__(
	self,
	dim,
	dim_context=None,
	heads=4,
	dim_head=32,
	norm=False,
	norm_context=False,
	time_cond_dim=None,
	attn_bias_dim=None,
	rotary_embedding_module=None,
	):
	super().__init__()
	hidden_dim = dim_head * heads
	dim_context = default(dim_context, dim)

	self.time_cond = None

	if exists(time_cond_dim):
	self.time_cond = nn.Sequential(nn.SiLU(), nn.Linear(time_cond_dim, dim * 2))

	nn.init.zeros_(self.time_cond[-1].weight)
	nn.init.zeros_(self.time_cond[-1].bias)

	self.scale = dim_head**-0.5
	self.heads = heads

	self.norm = LayerNorm(dim) if norm else nn.Identity()
	self.norm_context = LayerNorm(dim_context) if norm_context else nn.Identity()

	self.attn_bias_proj = None
	if attn_bias_dim is not None:
	self.attn_bias_proj = nn.Sequential(
	Rearrange("b a i j -> b i j a"),
	nn.Linear(attn_bias_dim, heads),
	Rearrange("b i j a -> b a i j"),
	)

	self.to_q = nn.Linear(dim, hidden_dim, bias=False)
	self.to_kv = nn.Linear(dim_context, hidden_dim * 2, bias=False)
	self.to_out = nn.Linear(hidden_dim, dim, bias=False)
	nn.init.zeros_(self.to_out.weight)

	self.use_rope = False
	if rotary_embedding_module is not None:
	self.use_rope = True
	self.rope = rotary_embedding_module

	def forward(self, x, context=None, time=None, attn_bias=None, seq_mask=None):
	# attn_bias is b, c, i, j
	h = self.heads
	has_context = exists(context)

	context = default(context, x)

	if x.shape[-1] != self.norm.gamma.shape[-1]:
	print(context.shape, x.shape, self.norm.gamma.shape)

	x = self.norm(x)

	if exists(time):
	scale, shift = self.time_cond(time).chunk(2, dim=-1)
	x = (x * (scale + 1)) + shift

	if has_context:
	context = self.norm_context(context)

	if seq_mask is not None:
	x = x * seq_mask[..., None]

	qkv = (self.to_q(x), *self.to_kv(context).chunk(2, dim=-1))
	q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), qkv)

	q = q * self.scale

	if self.use_rope:
	q = self.rope.rotate_queries_or_keys(q)
	k = self.rope.rotate_queries_or_keys(k)

	sim = torch.einsum("b h i d, b h j d -> b h i j", q, k)
	if attn_bias is not None:
	if self.attn_bias_proj is not None:
	attn_bias = self.attn_bias_proj(attn_bias)
	sim += attn_bias
	if seq_mask is not None:
	attn_mask = torch.einsum("b i, b j -> b i j", seq_mask, seq_mask)[:, None]
	sim -= (1 - attn_mask) * 1e6
	attn = sim.softmax(dim=-1)

	out = torch.einsum("b h i j, b h j d -> b h i d", attn, v)
	out = rearrange(out, "b h n d -> b n (h d)")
	out = self.to_out(out)
	if seq_mask is not None:
	out = out * seq_mask[..., None]
	return out


	class TimeCondFeedForward(nn.Module):
	def __init__(self, dim, mult=4, dim_out=None, time_cond_dim=None, dropout=0.1):
	super().__init__()
	if dim_out is None:
	dim_out = dim
	self.norm = LayerNorm(dim)

	self.time_cond = None
	self.dropout = None
	inner_dim = int(dim * mult)

	if exists(time_cond_dim):
	self.time_cond = nn.Sequential(
	nn.SiLU(),
	nn.Linear(time_cond_dim, inner_dim * 2),
	)

	nn.init.zeros_(self.time_cond[-1].weight)
	nn.init.zeros_(self.time_cond[-1].bias)

	self.linear_in = nn.Linear(dim, inner_dim)
	self.nonlinearity = nn.SiLU()
	if dropout is not None:
	self.dropout = nn.Dropout(dropout)
	self.linear_out = nn.Linear(inner_dim, dim_out)
	nn.init.zeros_(self.linear_out.weight)
	nn.init.zeros_(self.linear_out.bias)

	def forward(self, x, time=None):
	x = self.norm(x)
	x = self.linear_in(x)
	x = self.nonlinearity(x)

	if exists(time):
	scale, shift = self.time_cond(time).chunk(2, dim=-1)
	x = (x * (scale + 1)) + shift

	if exists(self.dropout):
	x = self.dropout(x)

	return self.linear_out(x)


	class TimeCondTransformer(nn.Module):
	def __init__(
	self,
	dim,
	depth,
	heads,
	dim_head,
	time_cond_dim,
	attn_bias_dim=None,
	mlp_inner_dim_mult=4,
	position_embedding_type: str = "rotary",
	):
	super().__init__()

	self.rope = None
	self.pos_emb_type = position_embedding_type
	if position_embedding_type == "rotary":
	self.rope = RotaryEmbedding(dim=32)
	elif position_embedding_type == "relative":
	self.relpos = nn.Sequential(
	RelativePositionalEncoding(attn_dim=heads),
	Rearrange("b i j d -> b d i j"),
	)

	self.layers = nn.ModuleList([])
	for _ in range(depth):
	self.layers.append(
	nn.ModuleList(
	[
	TimeCondAttention(
	dim,
	heads=heads,
	dim_head=dim_head,
	norm=True,
	time_cond_dim=time_cond_dim,
	attn_bias_dim=attn_bias_dim,
	rotary_embedding_module=self.rope,
	),
	TimeCondFeedForward(
	dim, mlp_inner_dim_mult, time_cond_dim=time_cond_dim
	),
	]
	)
	)

	def forward(
	self,
	x,
	time=None,
	attn_bias=None,
	context=None,
	seq_mask=None,
	residue_index=None,
	):
	if self.pos_emb_type == "absolute":
	pos_emb = posemb_sincos_1d(x)
	x = x + pos_emb
	elif self.pos_emb_type == "absolute_residx":
	assert residue_index is not None
	pos_emb = posemb_sincos_1d(x, residue_index=residue_index)
	x = x + pos_emb
	elif self.pos_emb_type == "relative":
	assert residue_index is not None
	pos_emb = self.relpos(residue_index)
	attn_bias = pos_emb if attn_bias is None else attn_bias + pos_emb
	if seq_mask is not None:
	x = x * seq_mask[..., None]

	for i, (attn, ff) in enumerate(self.layers):
	x = x + attn(
	x, context=context, time=time, attn_bias=attn_bias, seq_mask=seq_mask
	)
	x = x + ff(x, time=time)
	if seq_mask is not None:
	x = x * seq_mask[..., None]

	return x


	class TimeCondUViT(nn.Module):
	def __init__(
	self,
	*,
	seq_len: int,
	dim: int,
	patch_size: int = 1,
	depth: int = 6,
	heads: int = 8,
	dim_head: int = 32,
	n_filt_per_layer: List[int] = [],
	n_blocks_per_layer: int = 2,
	n_atoms: int = 37,
	channels_per_atom: int = 6,
	attn_bias_dim: int = None,
	time_cond_dim: int = None,
	conv_skip_connection: bool = False,
	position_embedding_type: str = "rotary",
	):
	super().__init__()

	# Initialize configuration params
	if time_cond_dim is None:
	time_cond_dim = dim * 4
	self.position_embedding_type = position_embedding_type
	channels = channels_per_atom
	self.n_conv_layers = n_conv_layers = len(n_filt_per_layer)
	if n_conv_layers > 0:
	post_conv_filt = n_filt_per_layer[-1]
	self.conv_skip_connection = conv_skip_connection and n_conv_layers == 1
	transformer_seq_len = seq_len // (2**n_conv_layers)
	assert transformer_seq_len % patch_size == 0
	num_patches = transformer_seq_len // patch_size
	dim_a = post_conv_atom_dim = max(1, n_atoms // (2 ** (n_conv_layers - 1)))
	if n_conv_layers == 0:
	patch_dim = patch_size * n_atoms * channels_per_atom
	patch_dim_out = patch_size * n_atoms * 3
	dim_a = n_atoms
	elif conv_skip_connection and n_conv_layers == 1:
	patch_dim = patch_size * (channels + post_conv_filt) * post_conv_atom_dim
	patch_dim_out = patch_size * post_conv_filt * post_conv_atom_dim
	elif n_conv_layers > 0:
	patch_dim = patch_dim_out = patch_size * post_conv_filt * post_conv_atom_dim

	# Make downsampling conv
	# Downsamples n-1 times where n is n_conv_layers
	down_conv = []
	block_in = channels
	for i, nf in enumerate(n_filt_per_layer):
	block_out = nf
	layer = []
	for j in range(n_blocks_per_layer):
	n_groups = 2 if i == 0 and j == 0 else 4
	layer.append(
	TimeCondResnetBlock(
	block_in, block_out, time_cond_dim, n_norm_in_groups=n_groups
	)
	)
	block_in = block_out
	down_conv.append(nn.ModuleList(layer))
	self.down_conv = nn.ModuleList(down_conv)

	# Make transformer
	self.to_patch_embedding = nn.Sequential(
	Rearrange("b c (n p) a -> b n (p c a)", p=patch_size),
	nn.Linear(patch_dim, dim),
	LayerNorm(dim),
	)
	self.transformer = TimeCondTransformer(
	dim,
	depth,
	heads,
	dim_head,
	time_cond_dim,
	attn_bias_dim=attn_bias_dim,
	position_embedding_type=position_embedding_type,
	)
	self.from_patch = nn.Sequential(
	LayerNorm(dim),
	nn.Linear(dim, patch_dim_out),
	Rearrange("b n (p c a) -> b c (n p) a", p=patch_size, a=dim_a),
	)
	nn.init.zeros_(self.from_patch[-2].weight)
	nn.init.zeros_(self.from_patch[-2].bias)

	# Make upsampling conv
	up_conv = []
	for i, nf in enumerate(reversed(n_filt_per_layer)):
	skip_in = nf
	block_out = nf
	layer = []
	for j in range(n_blocks_per_layer):
	layer.append(
	TimeCondResnetBlock(block_in + skip_in, block_out, time_cond_dim)
	)
	block_in = block_out
	up_conv.append(nn.ModuleList(layer))
	self.up_conv = nn.ModuleList(up_conv)

	# Conv out
	if n_conv_layers > 0:
	self.conv_out = nn.Sequential(
	nn.GroupNorm(num_groups=block_out // 4, num_channels=block_out),
	nn.SiLU(),
	nn.Conv2d(block_out, channels // 2, 3, 1, 1),
	)

	def forward(
	self, coords, time_cond, pair_bias=None, seq_mask=None, residue_index=None
	):
	if self.n_conv_layers > 0: # pad up to even dims
	coords = F.pad(coords, (0, 0, 0, 0, 0, 1, 0, 0))

	x = rearr_coords = rearrange(coords, "b n a c -> b c n a")
	hiddens = []
	for i, layer in enumerate(self.down_conv):
	for block in layer:
	x = block(x, time=time_cond)
	hiddens.append(x)
	if i != self.n_conv_layers - 1:
	x = downsample(x)

	if self.conv_skip_connection:
	x = torch.cat([x, rearr_coords], 1)

	x = self.to_patch_embedding(x)
	# if self.position_embedding_type == 'absolute':
	# pos_emb = posemb_sincos_1d(x)
	# x = x + pos_emb
	if seq_mask is not None and x.shape[1] == seq_mask.shape[1]:
	x *= seq_mask[..., None]
	x = self.transformer(
	x,
	time=time_cond,
	attn_bias=pair_bias,
	seq_mask=seq_mask,
	residue_index=residue_index,
	)
	x = self.from_patch(x)

	for i, layer in enumerate(self.up_conv):
	for block in layer:
	x = torch.cat([x, hiddens.pop()], 1)
	x = block(x, time=time_cond)
	if i != self.n_conv_layers - 1:
	x = upsample_coords(x, hiddens[-1].shape[2:])

	if self.n_conv_layers > 0:
	x = self.conv_out(x)
	x = x[..., :-1, :] # drop even-dims padding

	x = rearrange(x, "b c n a -> b n a c")
	return x


	########################################


	class LinearWarmupCosineDecay(torch.optim.lr_scheduler._LRScheduler):
	def __init__(
	self,
	optimizer,
	max_lr,
	warmup_steps=1000,
	decay_steps=int(1e6),
	min_lr=1e-6,
	**kwargs,
	):
	self.max_lr = max_lr
	self.min_lr = min_lr
	self.warmup_steps = warmup_steps
	self.decay_steps = decay_steps
	self.total_steps = warmup_steps + decay_steps
	super(LinearWarmupCosineDecay, self).__init__(optimizer, **kwargs)

	def get_lr(self):
	# TODO double check for off-by-one errors
	if self.last_epoch < self.warmup_steps:
	curr_lr = self.last_epoch / self.warmup_steps * self.max_lr
	return [curr_lr for group in self.optimizer.param_groups]
	elif self.last_epoch < self.total_steps:
	time = (self.last_epoch - self.warmup_steps) / self.decay_steps * np.pi
	curr_lr = self.min_lr + (self.max_lr - self.min_lr) * 0.5 * (
	1 + np.cos(time)
	)
	return [curr_lr for group in self.optimizer.param_groups]
	else:
	return [self.min_lr for group in self.optimizer.param_groups]


	class NoiseConditionalProteinMPNN(nn.Module):
	def __init__(
	self,
	n_channel=128,
	n_layers=3,
	n_neighbors=32,
	time_cond_dim=None,
	vocab_size=21,
	input_S_is_embeddings=False,
	):
	super().__init__()
	self.n_channel = n_channel
	self.n_layers = n_layers
	self.n_neighbors = n_neighbors
	self.time_cond_dim = time_cond_dim
	self.vocab_size = vocab_size
	self.bb_idxs_if_atom37 = [
	residue_constants.atom_order[a] for a in ["N", "CA", "C", "O"]
	]

	self.mpnn = protein_mpnn.ProteinMPNN(
	num_letters=vocab_size,
	node_features=n_channel,
	edge_features=n_channel,
	hidden_dim=n_channel,
	num_encoder_layers=n_layers,
	num_decoder_layers=n_layers,
	vocab=vocab_size,
	k_neighbors=n_neighbors,
	augment_eps=0.0,
	dropout=0.1,
	ca_only=False,
	time_cond_dim=time_cond_dim,
	input_S_is_embeddings=input_S_is_embeddings,
	)

	def forward(
	self, denoised_coords, noisy_aatype, seq_mask, residue_index, time_cond
	):
	if denoised_coords.shape[-2] == 37:
	denoised_coords = denoised_coords[:, :, self.bb_idxs_if_atom37]

	node_embs, encoder_embs = self.mpnn(
	X=denoised_coords,
	S=noisy_aatype,
	mask=seq_mask,
	chain_M=seq_mask,
	residue_idx=residue_index,
	chain_encoding_all=seq_mask,
	randn=None,
	use_input_decoding_order=False,
	decoding_order=None,
	causal_mask=False,
	time_cond=time_cond,
	return_node_embs=True,
	)
	return node_embs, encoder_embs