Upload folder using huggingface_hub

e8f4897 verified about 2 months ago

7.44 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.nn.parameter import Parameter


	class BiAAttention(nn.Module):
	'''
	Bi-Affine attention layer.
	'''

	def __init__(self, input_size_encoder, input_size_decoder, num_labels, biaffine=True, **kwargs):
	'''

	Args:
	input_size_encoder: int
	the dimension of the encoder input.
	input_size_decoder: int
	the dimension of the decoder input.
	num_labels: int
	the number of labels of the crf layer
	biaffine: bool
	if apply bi-affine parameter.
	**kwargs:
	'''
	super(BiAAttention, self).__init__()
	self.input_size_encoder = input_size_encoder
	self.input_size_decoder = input_size_decoder
	self.num_labels = num_labels
	self.biaffine = biaffine

	self.W_d = Parameter(torch.Tensor(self.num_labels, self.input_size_decoder))
	self.W_e = Parameter(torch.Tensor(self.num_labels, self.input_size_encoder))
	self.b = Parameter(torch.Tensor(self.num_labels, 1, 1))
	if self.biaffine:
	self.U = Parameter(torch.Tensor(self.num_labels, self.input_size_decoder, self.input_size_encoder))
	else:
	self.register_parameter('U', None)

	self.reset_parameters()

	def reset_parameters(self):
	nn.init.xavier_uniform_(self.W_d)
	nn.init.xavier_uniform_(self.W_e)
	nn.init.constant_(self.b, 0.)
	if self.biaffine:
	nn.init.xavier_uniform_(self.U)

	def forward(self, input_d, input_e, mask_d=None, mask_e=None):
	'''

	Args:
	input_d: Tensor
	the decoder input tensor with shape = [batch_size, length_decoder, input_size]
	input_e: Tensor
	the child input tensor with shape = [batch_size, length_encoder, input_size]
	mask_d: Tensor or None
	the mask tensor for decoder with shape = [batch_size, length_decoder]
	mask_e: Tensor or None
	the mask tensor for encoder with shape = [batch_size, length_encoder]

	Returns: Tensor
	the energy tensor with shape = [batch_size, num_label, length, length]

	'''
	assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.'
	batch_size, length_decoder, _ = input_d.size()
	_, length_encoder, _ = input_e.size()

	# compute decoder part: [num_label, input_size_decoder] * [batch_size, input_size_decoder, length_decoder]
	# the output shape is [batch_size, num_label, length_decoder]
	out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3)
	# compute decoder part: [num_label, input_size_encoder] * [batch_size, input_size_encoder, length_encoder]
	# the output shape is [batch_size, num_label, length_encoder]
	out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2)

	# output shape [batch_size, num_label, length_decoder, length_encoder]
	if self.biaffine:
	# compute bi-affine part
	# [batch_size, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder]
	# output shape [batch_size, num_label, length_decoder, input_size_encoder]
	output = torch.matmul(input_d.unsqueeze(1), self.U)
	# [batch_size, num_label, length_decoder, input_size_encoder] * [batch_size, 1, input_size_encoder, length_encoder]
	# output shape [batch_size, num_label, length_decoder, length_encoder]
	output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3))

	output = output + out_d + out_e + self.b
	else:
	output = out_d + out_d + self.b

	if mask_d is not None:
	output = output * mask_d.unsqueeze(1).unsqueeze(3) * mask_e.unsqueeze(1).unsqueeze(2)

	return output


	class ConcatAttention(nn.Module):
	'''
	Concatenate attention layer.
	'''
	# TODO test it!

	def __init__(self, input_size_encoder, input_size_decoder, hidden_size, num_labels, **kwargs):
	'''

	Args:
	input_size_encoder: int
	the dimension of the encoder input.
	input_size_decoder: int
	the dimension of the decoder input.
	hidden_size: int
	the dimension of the hidden.
	num_labels: int
	the number of labels of the crf layer
	biaffine: bool
	if apply bi-affine parameter.
	**kwargs:
	'''
	super(ConcatAttention, self).__init__()
	self.input_size_encoder = input_size_encoder
	self.input_size_decoder = input_size_decoder
	self.hidden_size = hidden_size
	self.num_labels = num_labels

	self.W_d = Parameter(torch.Tensor(self.input_size_decoder, self.hidden_size))
	self.W_e = Parameter(torch.Tensor(self.input_size_encoder, self.hidden_size))
	self.b = Parameter(torch.Tensor(self.hidden_size))
	self.v = Parameter(torch.Tensor(self.hidden_size, self.num_labels))

	self.reset_parameters()

	def reset_parameters(self):
	nn.init.xavier_uniform(self.W_d)
	nn.init.xavier_uniform(self.W_e)
	nn.init.xavier_uniform(self.v)
	nn.init.constant(self.b, 0.)

	def forward(self, input_d, input_e, mask_d=None, mask_e=None):
	'''

	Args:
	input_d: Tensor
	the decoder input tensor with shape = [batch_size, length_decoder, input_size]
	input_e: Tensor
	the child input tensor with shape = [batch_size, length_encoder, input_size]
	mask_d: Tensor or None
	the mask tensor for decoder with shape = [batch_size, length_decoder]
	mask_e: Tensor or None
	the mask tensor for encoder with shape = [batch_size, length_encoder]

	Returns: Tensor
	the energy tensor with shape = [batch_size, num_label, length, length]

	'''
	assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.'
	batch_size, length_decoder, _ = input_d.size()
	_, length_encoder, _ = input_e.size()

	# compute decoder part: [batch_size, length_decoder, input_size_decoder] * [input_size_decoder, hidden_size]
	# the output shape is [batch_size, length_decoder, hidden_size]
	# then --> [batch_size, 1, length_decoder, hidden_size]
	out_d = torch.matmul(input_d, self.W_d).unsqueeze(1)
	# compute decoder part: [batch_size, length_encoder, input_size_encoder] * [input_size_encoder, hidden_size]
	# the output shape is [batch_size, length_encoder, hidden_size]
	# then --> [batch_size, length_encoder, 1, hidden_size]
	out_e = torch.matmul(input_e, self.W_e).unsqueeze(2)

	# add them together [batch_size, length_encoder, length_decoder, hidden_size]
	out = torch.tanh(out_d + out_e + self.b)

	# product with v
	# [batch_size, length_encoder, length_decoder, hidden_size] * [hidden, num_label]
	# [batch_size, length_encoder, length_decoder, num_labels]
	# then --> [batch_size, num_labels, length_decoder, length_encoder]
	return torch.matmul(out, self.v).transpose(1, 3)