Spaces:
Build error
Build error
File size: 2,984 Bytes
711b041 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import numpy as np
import torch
import torch.nn as nn
import torch.nn.init as init
class Linear(nn.Module):
def __init__(self, in_features, out_features, bias=True):
super(Linear, self).__init__()
self.linear = nn.Linear(in_features, out_features, bias=bias)
init.xavier_normal_(self.linear.weight)
init.zeros_(self.linear.bias)
def forward(self, inputs):
return self.linear(inputs)
class ScaledDotProductAttention(nn.Module):
def __init__(self, d_k, dropout=.1):
super(ScaledDotProductAttention, self).__init__()
self.scale_factor = np.sqrt(d_k)
self.softmax = nn.Softmax(dim=-1)
self.dropout = nn.Dropout(dropout)
def forward(self, q, k, v, attn_mask=None):
# q: [b_size x n_heads x len_q x d_k]
# k: [b_size x n_heads x len_k x d_k]
# v: [b_size x n_heads x len_v x d_v] note: (len_k == len_v)
# attn: [b_size x n_heads x len_q x len_k]
scores = torch.matmul(q, k.transpose(-1, -2)) / self.scale_factor
if attn_mask is not None:
assert attn_mask.size() == scores.size()
scores.masked_fill_(attn_mask, -1e9)
attn = self.dropout(self.softmax(scores))
# outputs: [b_size x n_heads x len_q x d_v]
context = torch.matmul(attn, v)
return context, attn
class LayerNormalization(nn.Module):
def __init__(self, d_hid, eps=1e-6):
super(LayerNormalization, self).__init__()
self.gamma = nn.Parameter(torch.ones(d_hid))
self.beta = nn.Parameter(torch.zeros(d_hid))
self.eps = eps
def forward(self, z):
mean = z.mean(dim=-1, keepdim=True,)
std = z.std(dim=-1, keepdim=True,)
ln_out = (z - mean) / (std + self.eps)
ln_out = self.gamma * ln_out + self.beta
return ln_out
class PosEncoding(nn.Module):
def __init__(self, max_seq_len, d_word_vec):
super(PosEncoding, self).__init__()
pos_enc = np.array(
[[pos / np.power(10000, 2.0 * (j // 2) / d_word_vec) for j in range(d_word_vec)]
for pos in range(max_seq_len)])
pos_enc[:, 0::2] = np.sin(pos_enc[:, 0::2])
pos_enc[:, 1::2] = np.cos(pos_enc[:, 1::2])
pad_row = np.zeros([1, d_word_vec])
pos_enc = np.concatenate([pad_row, pos_enc]).astype(np.float32)
# additional single row for PAD idx
self.pos_enc = nn.Embedding(max_seq_len + 1, d_word_vec)
# fix positional encoding: exclude weight from grad computation
self.pos_enc.weight = nn.Parameter(torch.from_numpy(pos_enc), requires_grad=False)
self.max_len = int(max_seq_len/10)
def forward(self, input_len):
max_len = self.max_len # torch.max(input_len)
tensor = torch.cuda.LongTensor if input_len.is_cuda else torch.LongTensor
input_pos = tensor([list(range(1, len+1)) + [0]*(max_len-len) for len in input_len])
return self.pos_enc(input_pos)
|