nicholasKluge commited on
Commit
eea3ce3
1 Parent(s): 9c144a7

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eng-por.txt filter=lfs diff=lfs merge=lfs -text
eng-por.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5650439f87f33bc1278d4e29e0e9ed84bec84c60dacde83ccf5526d549932fe3
3
+ size 24609459
english_vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
keras_transformer_blocks.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow import keras
3
+ from keras import layers
4
+
5
+ class TransformerEncoder(layers.Layer):
6
+ """
7
+ The TransformerEncoder class is a custom Keras layer that implements a
8
+ single transformer encoder block. The transformer encoder block consists
9
+ of a multi-head self-attention layer followed by a feedforward neural
10
+ network with a residual connection and layer normalization applied at
11
+ the input and output of each sub-layer.
12
+
13
+ The class takes in the following arguments:
14
+
15
+ embed_dim: an integer specifying the dimensionality of the embedding space.
16
+ dense_dim: an integer specifying the number of units in the feedforward neural network.
17
+ num_heads: an integer specifying the number of attention heads to use.
18
+
19
+ The call method is the main computation performed by the layer. It takes
20
+ in an input tensor and an optional mask tensor indicating which inputs to
21
+ consider in the attention calculation. It returns the output tensor of the
22
+ transformer encoder block.
23
+
24
+ The get_config method returns a dictionary of configuration information for
25
+ the layer, including the embed_dim, num_heads, and dense_dim parameters.
26
+ """
27
+ def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
28
+ super().__init__(**kwargs)
29
+ self.embed_dim = embed_dim
30
+ self.dense_dim = dense_dim
31
+ self.num_heads = num_heads
32
+ self.attention = layers.MultiHeadAttention(
33
+ num_heads=num_heads, key_dim=embed_dim)
34
+ self.dense_proj = keras.Sequential(
35
+ [layers.Dense(dense_dim, activation="relu"),
36
+ layers.Dense(embed_dim),]
37
+ )
38
+ self.layernorm_1 = layers.LayerNormalization()
39
+ self.layernorm_2 = layers.LayerNormalization()
40
+
41
+ def call(self, inputs, mask=None):
42
+ if mask is not None:
43
+ mask = mask[:, tf.newaxis, :]
44
+ attention_output = self.attention(
45
+ inputs, inputs, attention_mask=mask)
46
+ proj_input = self.layernorm_1(inputs + attention_output)
47
+ proj_output = self.dense_proj(proj_input)
48
+ return self.layernorm_2(proj_input + proj_output)
49
+
50
+ def get_config(self):
51
+ config = super().get_config()
52
+ config.update({
53
+ "embed_dim": self.embed_dim,
54
+ "num_heads": self.num_heads,
55
+ "dense_dim": self.dense_dim,
56
+ })
57
+ return config
58
+
59
+ class TransformerDecoder(layers.Layer):
60
+ """
61
+ A Transformer decoder layer that attends over the input
62
+ sequence and the encoder outputs.
63
+
64
+ Args:
65
+ embed_dim (int): Dimension of the input embeddings.
66
+ dense_dim (int): Dimension of the dense layer in the feedforward sublayer.
67
+ num_heads (int): Number of attention heads in each multi-head attention layer.
68
+
69
+ Attributes:
70
+ attention_1 (MultiHeadAttention): First multi-head attention layer.
71
+ attention_2 (MultiHeadAttention): Second multi-head attention layer.
72
+ dense_proj (Sequential): Feedforward sublayer consisting of two dense layers.
73
+ layernorm_1 (LayerNormalization): Layer normalization layer
74
+ after the first attention layer.
75
+ layernorm_2 (LayerNormalization): Layer normalization layer
76
+ after the second attention layer.
77
+ layernorm_3 (LayerNormalization): Layer normalization layer
78
+ after the feedforward sublayer.
79
+ supports_masking (bool): Whether the layer supports masking.
80
+
81
+ Methods:
82
+ get_config(): Returns a dictionary with the configuration of the layer.
83
+ get_causal_attention_mask(inputs): Returns a 3D tensor with a
84
+ causal mask for the given input sequence.
85
+ call(inputs, encoder_outputs, mask=None): Computes the output of
86
+ the layer for the given inputs and encoder outputs.
87
+ """
88
+ def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
89
+ super().__init__(**kwargs)
90
+ self.embed_dim = embed_dim
91
+ self.dense_dim = dense_dim
92
+ self.num_heads = num_heads
93
+ self.attention_1 = layers.MultiHeadAttention(
94
+ num_heads=num_heads, key_dim=embed_dim)
95
+ self.attention_2 = layers.MultiHeadAttention(
96
+ num_heads=num_heads, key_dim=embed_dim)
97
+ self.dense_proj = keras.Sequential(
98
+ [layers.Dense(dense_dim, activation="relu"),
99
+ layers.Dense(embed_dim),]
100
+ )
101
+ self.layernorm_1 = layers.LayerNormalization()
102
+ self.layernorm_2 = layers.LayerNormalization()
103
+ self.layernorm_3 = layers.LayerNormalization()
104
+ self.supports_masking = True
105
+
106
+ def get_config(self):
107
+ config = super().get_config()
108
+ config.update({
109
+ "embed_dim": self.embed_dim,
110
+ "num_heads": self.num_heads,
111
+ "dense_dim": self.dense_dim,
112
+ })
113
+ return config
114
+
115
+ def get_causal_attention_mask(self, inputs):
116
+ input_shape = tf.shape(inputs)
117
+ batch_size, sequence_length = input_shape[0], input_shape[1]
118
+ i = tf.range(sequence_length)[:, tf.newaxis]
119
+ j = tf.range(sequence_length)
120
+ mask = tf.cast(i >= j, dtype="int32")
121
+ mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
122
+ mult = tf.concat(
123
+ [tf.expand_dims(batch_size, -1),
124
+ tf.constant([1, 1], dtype=tf.int32)], axis=0)
125
+ return tf.tile(mask, mult)
126
+
127
+ def call(self, inputs, encoder_outputs, mask=None):
128
+ causal_mask = self.get_causal_attention_mask(inputs)
129
+ if mask is not None:
130
+ padding_mask = tf.cast(
131
+ mask[:, tf.newaxis, :], dtype="int32")
132
+ padding_mask = tf.minimum(padding_mask, causal_mask)
133
+ attention_output_1 = self.attention_1(
134
+ query=inputs,
135
+ value=inputs,
136
+ key=inputs,
137
+ attention_mask=causal_mask)
138
+ attention_output_1 = self.layernorm_1(inputs + attention_output_1)
139
+ attention_output_2 = self.attention_2(
140
+ query=attention_output_1,
141
+ value=encoder_outputs,
142
+ key=encoder_outputs,
143
+ attention_mask=padding_mask,
144
+ )
145
+ attention_output_2 = self.layernorm_2(
146
+ attention_output_1 + attention_output_2)
147
+ proj_output = self.dense_proj(attention_output_2)
148
+ return self.layernorm_3(attention_output_2 + proj_output)
149
+
150
+ class PositionalEmbedding(layers.Layer):
151
+ """
152
+ The PositionalEmbedding layer class is used to create an embedding layer that
153
+ combines both token embeddings and positional embeddings for input sequences.
154
+
155
+ The class takes in the following arguments:
156
+
157
+ sequence_length: An integer representing the maximum length of the input sequence.
158
+ input_dim: An integer representing the size of the input vocabulary.
159
+ output_dim: An integer representing the size of the embedding vectors.
160
+
161
+ The call(self, inputs) method that takes input tensor as an argument and
162
+ returns the embedded tensor after adding the token embeddings and positional
163
+ embeddings. It also computes the positions for the input sequence.
164
+
165
+ The compute_mask(self, inputs, mask=None) method that returns a mask tensor
166
+ computed based on the input tensor.
167
+
168
+ The get_config(self): Method that returns a dictionary containing the configuration
169
+ of the layer.
170
+ """
171
+ def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
172
+ super().__init__(**kwargs)
173
+ self.token_embeddings = layers.Embedding(
174
+ input_dim=input_dim, output_dim=output_dim)
175
+ self.position_embeddings = layers.Embedding(
176
+ input_dim=sequence_length, output_dim=output_dim)
177
+ self.sequence_length = sequence_length
178
+ self.input_dim = input_dim
179
+ self.output_dim = output_dim
180
+
181
+ def call(self, inputs):
182
+ length = tf.shape(inputs)[-1]
183
+ positions = tf.range(start=0, limit=length, delta=1)
184
+ embedded_tokens = self.token_embeddings(inputs)
185
+ embedded_positions = self.position_embeddings(positions)
186
+ return embedded_tokens + embedded_positions
187
+
188
+ def compute_mask(self, inputs, mask=None):
189
+ return tf.math.not_equal(inputs, 0)
190
+
191
+ def get_config(self):
192
+ config = super(PositionalEmbedding, self).get_config()
193
+ config.update({
194
+ "output_dim": self.output_dim,
195
+ "sequence_length": self.sequence_length,
196
+ "input_dim": self.input_dim,
197
+ })
198
+ return config
portuguese_vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
transformer_eng_por.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8bba1f36f70f1d5bf80c722d06ee22747642c9bcd6d3ee4ab753b3108d85eb
3
+ size 190618344