|
import torch |
|
from torch import nn |
|
|
|
|
|
class PositionalEncodingsFixed(nn.Module): |
|
|
|
def __init__(self, emb_dim, temperature=10000): |
|
|
|
super(PositionalEncodingsFixed, self).__init__() |
|
|
|
self.emb_dim = emb_dim |
|
self.temperature = temperature |
|
|
|
def _1d_pos_enc(self, mask, dim): |
|
temp = torch.arange(self.emb_dim // 2).float().to(mask.device) |
|
temp = self.temperature ** (2 * (temp.div(2, rounding_mode='floor')) / self.emb_dim) |
|
|
|
enc = (~mask).cumsum(dim).float().unsqueeze(-1) / temp |
|
enc = torch.stack([ |
|
enc[..., 0::2].sin(), enc[..., 1::2].cos() |
|
], dim=-1).flatten(-2) |
|
|
|
return enc |
|
|
|
def forward(self, bs, h, w, device): |
|
mask = torch.zeros(bs, h, w, dtype=torch.bool, requires_grad=False, device=device) |
|
x = self._1d_pos_enc(mask, dim=2) |
|
y = self._1d_pos_enc(mask, dim=1) |
|
|
|
return torch.cat([y, x], dim=3).permute(0, 3, 1, 2) |
|
|