SergioMtz commited on
Commit
3dedab3
·
1 Parent(s): 7ae4fc5

Create new file

Browse files
Files changed (1) hide show
  1. Model.py +192 -0
Model.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras.layers import Dense, Dropout, Embedding, LayerNormalization, Layer, Flatten
3
+ from tensorflow.keras.models import Model
4
+ import numpy as np
5
+
6
+
7
+ class PositionalEncoder(Layer):
8
+ def __init__(self, name = "Positional_Encoder"):
9
+ super(PositionalEncoder, self).__init__(name = name)
10
+
11
+ def get_angles(self, pos, i, d_model): # pos: (seq_length, 1) i: (1, d_model)
12
+ angles = 1 / np.power(10000., (2*(i//2)) / np.float32(d_model))
13
+ return pos * angles # (seq_length, d_model)
14
+
15
+ def call(self, inputs):
16
+ seq_length = inputs.shape.as_list()[-2]
17
+ d_model = inputs.shape.as_list()[-1]
18
+ angles = self.get_angles(np.arange(seq_length)[:, np.newaxis],
19
+ np.arange(d_model)[np.newaxis, :],
20
+ d_model)
21
+ angles[:, 0::2] = np.sin(angles[:, 0::2])
22
+ angles[:, 1::2] = np.cos(angles[:, 1::2])
23
+ pos_encoding = angles[np.newaxis, ...]
24
+ return inputs + tf.cast(pos_encoding, tf.float32)
25
+
26
+ class ScaledDotProductAttention(Layer):
27
+ def __init__(self, name = "Attention"):
28
+ super(ScaledDotProductAttention, self).__init__(name = name)
29
+
30
+ def call(self, queries, keys, values, mask):
31
+ product = tf.matmul(queries, keys, transpose_b = True)
32
+
33
+ keys_dim = tf.cast(tf.shape(keys)[-1], dtype = tf.float32)
34
+ scaled_product = product / tf.math.sqrt(keys_dim)
35
+
36
+ if mask is not None:
37
+ scaled_product += (mask * -1e9)
38
+
39
+ attention = tf.matmul(tf.nn.softmax(scaled_product, axis = -1), values)
40
+
41
+ return attention
42
+
43
+ class MultiHeadAttention(Layer):
44
+ def __init__(self, nb_proj, name = "Multi_Head_Attention"):
45
+ super(MultiHeadAttention, self).__init__(name = name)
46
+ self.nb_proj = nb_proj
47
+
48
+ def build(self, input_shape):
49
+ self.d_model = input_shape[-1]
50
+ assert self.d_model % self.nb_proj == 0
51
+
52
+ self.d_proj = self.d_model // self.nb_proj
53
+
54
+ self.Query_Dense = Dense(units = self.d_model)
55
+ self.Key_Dense = Dense(units = self.d_model)
56
+ self.Value_Dense = Dense(units = self.d_model)
57
+
58
+ self.Final_Dense = Dense(units = self.d_model)
59
+
60
+ self.Attention = ScaledDotProductAttention()
61
+
62
+ def split_proj(self, inputs, batch_size): # inputs: (batch_size, seq_length, d_model)
63
+ shape = (batch_size,
64
+ -1,
65
+ self.nb_proj,
66
+ self.d_proj)
67
+ splitted_inputs = tf.reshape(inputs, shape = shape) # (batch_size, seq_length, nb_proj, d_proj)
68
+ return tf.transpose(splitted_inputs, perm = [0, 2, 1, 3]) # (batch_size, nb_proj, seq_length, d_proj)
69
+
70
+ def call(self, queries, keys, values, mask):
71
+ batch_size = tf.shape(queries)[0]
72
+
73
+ queries = self.Query_Dense(queries)
74
+ keys = self.Key_Dense(keys)
75
+ values = self.Value_Dense(values)
76
+
77
+ queries = self.split_proj(queries, batch_size)
78
+ keys = self.split_proj(keys, batch_size)
79
+ values = self.split_proj(values, batch_size)
80
+
81
+ attention = self.Attention(queries, keys, values, mask)
82
+
83
+ attention = tf.transpose(attention, perm = [0, 2, 1, 3]) # (batch_size, seq_length, nb_proj, d_proj)
84
+
85
+ concat_attention = tf.reshape(attention, shape = (batch_size, -1, self.d_model))
86
+
87
+ outputs = self.Final_Dense(concat_attention)
88
+
89
+ return outputs
90
+
91
+ class EncoderLayer(Layer):
92
+ def __init__(self, FFN_units, nb_proj, dropout_rate, name = "Encoder_Layer"):
93
+ super(EncoderLayer, self).__init__(name = name)
94
+ self.FFN_units = FFN_units
95
+ self.nb_proj = nb_proj
96
+ self.dropout_rate = dropout_rate
97
+
98
+ def build(self, input_shape):
99
+ self.d_model = input_shape[-1]
100
+
101
+ self.multi_head_attention = MultiHeadAttention(self.nb_proj)
102
+ self.dropout_1 = Dropout(rate = self.dropout_rate)
103
+ self.norm_1 = LayerNormalization(epsilon = 1e-6)
104
+
105
+ self.Dense_1 = Dense(units = self.FFN_units, activation = "relu")
106
+ self.Dense_2 = Dense(units = self.d_model)
107
+ self.dropout_2 = Dropout(rate = self.dropout_rate)
108
+ self.norm_2 = LayerNormalization(epsilon = 1e-6)
109
+
110
+ def call(self, inputs, mask, training):
111
+ attention = self.multi_head_attention(inputs,
112
+ inputs,
113
+ inputs,
114
+ mask)
115
+ attention = self.dropout_1(attention, training)
116
+ attention = self.norm_1(attention + inputs)
117
+
118
+ outputs = self.Dense_1(attention)
119
+ outputs = self.Dense_2(outputs)
120
+ outputs = self.dropout_2(outputs, training)
121
+ outputs = self.norm_2(outputs + attention)
122
+
123
+ return outputs
124
+
125
+ class Encoder(Layer):
126
+ def __init__(self, nb_layers, FFN_units,
127
+ nb_proj, dropout_rate,
128
+ vocab_size, d_model,
129
+ name = "Encoder"):
130
+ super(Encoder, self).__init__(name = name)
131
+ self.nb_layers = nb_layers
132
+ self.d_model = d_model
133
+
134
+ self.embedding = Embedding(vocab_size, d_model)
135
+ self.pos_encoder = PositionalEncoder()
136
+ self.dropout = Dropout(rate = dropout_rate)
137
+ self.enc_layers = [EncoderLayer(FFN_units,
138
+ nb_proj,
139
+ dropout_rate)
140
+ for _ in range(nb_layers)]
141
+
142
+ def call(self, inputs, mask, training):
143
+ outputs = self.embedding(inputs)
144
+ outputs *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
145
+ outputs = self.pos_encoder(outputs)
146
+ outputs = self.dropout(outputs, training)
147
+
148
+ for i in range(self.nb_layers):
149
+ outputs = self.enc_layers[i](outputs, mask, training)
150
+
151
+ return outputs
152
+
153
+ class Transformer(Model):
154
+ def __init__(self,
155
+ vocab_size_enc,
156
+ vocab_size_dec,
157
+ d_model,
158
+ nb_layers,
159
+ FFN_units,
160
+ nb_proj,
161
+ dropout_rate,
162
+ name = "Transformer"):
163
+ super(Transformer, self).__init__(name = name)
164
+
165
+ self.encoder = Encoder(nb_layers,
166
+ FFN_units,
167
+ nb_proj,
168
+ dropout_rate,
169
+ vocab_size_enc,
170
+ d_model)
171
+
172
+ self.Flatten = Flatten()
173
+ self.Last_Dense = Dense(units = vocab_size_dec, activation = "sigmoid", name = "Linear_Output")
174
+
175
+ def create_padding_mask(self, seq): # seq: (batch_size, seq_length)
176
+ mask = tf.cast(tf.equal(seq, 0), dtype = tf.float32)
177
+ return mask[:, tf.newaxis, tf.newaxis, :]
178
+
179
+ def create_look_ahead_mask(self, seq):
180
+ seq_len = tf.shape(seq)[1]
181
+ look_ahead_mask = 1 - tf.linalg.band_part(tf.ones(shape = (seq_len, seq_len)), -1, 0)
182
+ return look_ahead_mask
183
+
184
+ def call(self, enc_inputs, training):
185
+ enc_mask = self.create_padding_mask(enc_inputs)
186
+
187
+ enc_outputs = self.encoder(enc_inputs, enc_mask, training)
188
+
189
+ enc_outputs = self.Flatten(enc_outputs)
190
+ outputs = self.Last_Dense(enc_outputs)
191
+
192
+ return outputs