Spaces:

abhaskumarsinha
/

Ragdoll

Sleeping

App Files Files Community

Ragdoll / models /decoder.py

abhaskumarsinha

Added the alpha version of Corpus2GPT

ceed47a 7 months ago

raw

history blame

2.15 kB

	import keras
	from models.attention import Attention, AttentionTrain

	class Decoder(keras.layers.Layer):
	"""
	Decoder layer in a Transformer model architecture.

	This layer implements the decoder component of the Transformer model, which is responsible for generating
	the output sequence based on the encoded input sequence and previously generated output tokens.

	Parameters:
	attention (keras.layers.Layer): Attention layer for the decoder.
	dropout_rate (float): Dropout rate applied to the outputs of each sub-layer. Default is 0.2.
	"""

	def __init__(self, dropout_rate=0.2, num_heads = 32, head_dims = 40, fc_dim_factor = 5, input_len = 64):
	"""
	Initializes the Decoder layer.

	Args:
	attention (keras.layers.Layer): Attention layer for the decoder.
	dropout_rate (float): Dropout rate applied to the outputs of each sub-layer. Default is 0.2.
	"""
	super().__init__()

	# Layer Normalization for the first sub-layer
	self.norm1 = keras.layers.LayerNormalization(epsilon=1e-9)

	# Layer Normalization for the second sub-layer
	self.norm2 = keras.layers.LayerNormalization(epsilon=1e-9)

	# Attention mechanism
	self.attn = AttentionTrain(head_dims=head_dims, num_heads=num_heads, dropout=dropout_rate, input_len = input_len)

	# Dense layer for the first feed-forward sub-layer
	self.fc1 = keras.layers.Dense(num_heads * head_dims * fc_dim_factor, activation='gelu')

	# Dense layer for the second feed-forward sub-layer
	self.fc2 = keras.layers.Dense(num_heads * head_dims, activation='gelu')

	# Dropout layers
	self.dropout1 = keras.layers.Dropout(dropout_rate)
	self.dropout2 = keras.layers.Dropout(dropout_rate)

	self._config = {'dropout_rate': dropout_rate}

	def call(self, inputs):
	x = inputs
	x = self.attn(x)
	x = self.dropout1(x)
	out1 = self.norm1(x + inputs)

	x = out1
	out1 = self.fc2(self.fc1(out1))
	out1 = self.dropout2(out1)
	return self.norm2(out1 + x)