rbgo commited on
Commit
7e9669a
1 Parent(s): 41b5476
.gitattributes CHANGED
@@ -25,3 +25,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ train_tokenizer_objects.pickle filter=lfs diff=lfs merge=lfs -text
29
+ valid_tokenizer_objects.pickle filter=lfs diff=lfs merge=lfs -text
30
+ transformer_weights.h5 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Eng Ass Former
3
- emoji: 💩
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: streamlit
 
1
  ---
2
  title: Eng Ass Former
3
+ emoji: 🤖
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: streamlit
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import model
3
+ import inference
4
+
5
+
6
+ with st.spinner('Your TransFormer is on the way...'):
7
+ if 'transformer' not in st.session_state:
8
+ transformer,tokenizer_ass,tokenizer_en,MAX_LENGTH = model.prepare_model()
9
+
10
+ st.session_state['transformer'] = transformer
11
+ st.session_state['tokenizer_ass'] = tokenizer_ass
12
+ st.session_state['tokenizer_en'] = tokenizer_en
13
+ st.session_state['MAX_LENGTH'] = MAX_LENGTH
14
+
15
+ def show_information():
16
+ st.header('Translate Assamese with Transformer!🤖')
17
+
18
+ def select_text():
19
+ option = st.selectbox(
20
+ 'Select these suggested Assamese Sentences',
21
+ ('মানুহে সদায় ইজনে সিজনক সহায় কৰিব লাগিব',
22
+ 'আমি সদায় আমাৰ মাক সন্মান কৰিব লাগিব',
23
+ 'আপুনি আপোনাৰ সপোন প্ৰাপ্ত নকৰালৈকে সদায় কঠোৰ আৰু কঠোৰ পৰিশ্ৰম কৰিব লাগিব'))
24
+
25
+ st.write('You have selected suggested text')
26
+
27
+ title = st.text_input('Assamese Text Input', option)
28
+ # st.write('Your Assamese Text', title)
29
+
30
+ return title
31
+
32
+
33
+
34
+ def main():
35
+ st.title('📚Assamese to English Translator🤖')
36
+ show_information()
37
+ text = select_text()
38
+ if st.button('Translate'):
39
+ result = inference.translate_main(st.session_state['transformer'],text,st.session_state['tokenizer_ass'],
40
+ st.session_state['tokenizer_en'],st.session_state['MAX_LENGTH'])
41
+
42
+ st.caption('Your Assamese translated text')
43
+ st.text(result)
44
+
45
+ if __name__ == "__main__":
46
+ main()
inference.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.add_dll_directory("C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/bin")
3
+ import tensorflow as tf
4
+
5
+ def create_padding_mask(seq):
6
+ seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
7
+ # add extra dimensions to add the padding
8
+ # to the attention logits.
9
+ return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len)
10
+
11
+ def create_look_ahead_mask(size):
12
+ mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
13
+ return mask # (seq_len, seq_len)
14
+
15
+ def create_masks(inp, tar):
16
+ # Encoder padding mask
17
+ enc_padding_mask = create_padding_mask(inp)
18
+
19
+ # Used in the 2nd attention block in the decoder.
20
+ # This padding mask is used to mask the encoder outputs.
21
+ dec_padding_mask = create_padding_mask(inp)
22
+
23
+ # Used in the 1st attention block in the decoder.
24
+ # It is used to pad and mask future tokens in the input received by
25
+ # the decoder.
26
+ look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
27
+ dec_target_padding_mask = create_padding_mask(tar)
28
+ combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
29
+
30
+ return enc_padding_mask, combined_mask, dec_padding_mask
31
+
32
+ def translate_main(transformer,inp_sentence,tokenizer_ass,tokenizer_en,MAX_LENGTH):
33
+ def evaluate(inp_sentence):
34
+ start_token = [tokenizer_ass.vocab_size]
35
+ end_token = [tokenizer_ass.vocab_size + 1]
36
+
37
+ # inp sentence is portuguese, hence adding the start and end token
38
+ inp_sentence = start_token + tokenizer_ass.encode(inp_sentence) + end_token
39
+ encoder_input = tf.expand_dims(inp_sentence, 0)
40
+
41
+ # as the target is english, the first word to the transformer should be the
42
+ # english start token.
43
+ decoder_input = [tokenizer_en.vocab_size]
44
+ output = tf.expand_dims(decoder_input, 0)
45
+
46
+ for i in range(MAX_LENGTH):
47
+ enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
48
+ encoder_input, output)
49
+
50
+ # predictions.shape == (batch_size, seq_len, vocab_size)
51
+ predictions, attention_weights = transformer(encoder_input,
52
+ output,
53
+ False,
54
+ enc_padding_mask,
55
+ combined_mask,
56
+ dec_padding_mask)
57
+
58
+ # select the last word from the seq_len dimension
59
+ predictions = predictions[: ,-1:, :] # (batch_size, 1, vocab_size)
60
+
61
+ predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
62
+
63
+ # return the result if the predicted_id is equal to the end token
64
+ if predicted_id == tokenizer_en.vocab_size+1:
65
+ return tf.squeeze(output, axis=0), attention_weights
66
+
67
+ # concatentate the predicted_id to the output which is given to the decoder
68
+ # as its input.
69
+ output = tf.concat([output, predicted_id], axis=-1)
70
+
71
+ return tf.squeeze(output, axis=0), attention_weights
72
+
73
+ def translate(sentence):
74
+ result, attention_weights = evaluate(sentence)
75
+
76
+ predicted_sentence = tokenizer_en.decode([i for i in result
77
+ if i < tokenizer_en.vocab_size])
78
+
79
+ # print('Input: {}'.format(sentence))
80
+ # print('Predicted translation: {}'.format(predicted_sentence))
81
+ return predicted_sentence
82
+
83
+ result = translate(inp_sentence)
84
+ return result
model.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.add_dll_directory("C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/bin")
3
+ import tensorflow as tf
4
+ import tensorflow_datasets as tfds
5
+ import os
6
+ import pandas as pd
7
+ import numpy as np
8
+ import time
9
+ import re
10
+ import pickle
11
+
12
+
13
+ def pickle_load():
14
+ with open("tokenizer/train_tokenizer_objects.pickle", 'rb') as f:
15
+ data = pickle.load(f)
16
+ train_ass = data['input_tensor']
17
+ train_eng = data['target_tensor']
18
+ train = data['train']
19
+
20
+ return train,train_ass,train_eng
21
+
22
+
23
+ def prepare_datasets():
24
+
25
+ train,train_ass,train_eng = pickle_load()
26
+ def encode(lang1, lang2):
27
+ lang1 = [tokenizer_ass.vocab_size] + tokenizer_ass.encode(
28
+ lang1.numpy()) + [tokenizer_ass.vocab_size+1]
29
+
30
+ lang2 = [tokenizer_en.vocab_size] + tokenizer_en.encode(
31
+ lang2.numpy()) + [tokenizer_en.vocab_size+1]
32
+
33
+ return lang1, lang2
34
+
35
+ def filter_max_length(x, y, max_length=40):
36
+ return tf.logical_and(tf.size(x) <= max_length,
37
+ tf.size(y) <= max_length)
38
+
39
+ def tf_encode(row):
40
+ result_ass, result_en = tf.py_function(encode, [row[1], row[0]], [tf.int64, tf.int64])
41
+ result_ass.set_shape([None])
42
+ result_en.set_shape([None])
43
+
44
+ return result_ass, result_en
45
+
46
+ train_ = tf.data.Dataset.from_tensor_slices(train)
47
+
48
+ en = tf.data.Dataset.from_tensor_slices(train_eng.to_list())
49
+ ass = tf.data.Dataset.from_tensor_slices(train_ass.to_list())
50
+
51
+ tokenizer_en = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
52
+ (e.numpy() for e in en), target_vocab_size=2**13)
53
+
54
+ tokenizer_ass = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
55
+ (a.numpy() for a in ass), target_vocab_size=2**13)
56
+
57
+ input_vocab_size = tokenizer_ass.vocab_size + 2
58
+ target_vocab_size = tokenizer_en.vocab_size + 2
59
+
60
+
61
+ BUFFER_SIZE = 20000
62
+ BATCH_SIZE = 64
63
+ MAX_LENGTH = 40
64
+
65
+ train_dataset = train_.map(tf_encode)
66
+ train_dataset = train_dataset.filter(filter_max_length)
67
+ # cache the dataset to memory to get a speedup while reading from it.
68
+ train_dataset = train_dataset.cache()
69
+ train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE)
70
+ train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
71
+
72
+ return train_dataset,tokenizer_en,tokenizer_ass
73
+
74
+
75
+ def get_angles(pos, i, d_model):
76
+ angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
77
+ return pos * angle_rates
78
+
79
+
80
+
81
+ def positional_encoding(position, d_model):
82
+ angle_rads = get_angles(np.arange(position)[:, np.newaxis],
83
+ np.arange(d_model)[np.newaxis, :],
84
+ d_model)
85
+
86
+ # apply sin to even indices in the array; 2i
87
+ angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
88
+
89
+ # apply cos to odd indices in the array; 2i+1
90
+ angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
91
+
92
+ pos_encoding = angle_rads[np.newaxis, ...]
93
+
94
+ return tf.cast(pos_encoding, dtype=tf.float32)
95
+
96
+
97
+ # Masking
98
+
99
+ '''Mask all the pad tokens in the batch of sequence.
100
+ It ensures that the model does not treat padding as the input.
101
+ The mask indicates where pad value 0 is present: it outputs a 1 at those locations, and a 0 otherwise.
102
+ '''
103
+
104
+ def create_padding_mask(seq):
105
+ seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
106
+
107
+ # add extra dimensions to add the padding
108
+ # to the attention logits.
109
+ return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len)
110
+
111
+ # Looakahead mask
112
+
113
+ """The look-ahead mask is used to mask the future tokens in a sequence.
114
+ In other words, the mask indicates which entries should not be used.
115
+ """
116
+ def create_look_ahead_mask(size):
117
+ mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
118
+ return mask # (seq_len, seq_len)
119
+
120
+ def scaled_dot_product_attention(q, k, v, mask):
121
+ """Calculate the attention weights.
122
+ q, k, v must have matching leading dimensions.
123
+ k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
124
+ The mask has different shapes depending on its type(padding or look ahead)
125
+ but it must be broadcastable for addition.
126
+
127
+ Args:
128
+ q: query shape == (..., seq_len_q, depth)
129
+ k: key shape == (..., seq_len_k, depth)
130
+ v: value shape == (..., seq_len_v, depth_v)
131
+ mask: Float tensor with shape broadcastable
132
+ to (..., seq_len_q, seq_len_k). Defaults to None.
133
+
134
+ Returns:
135
+ output, attention_weights
136
+ """
137
+
138
+
139
+ matmul_qk = tf.matmul(q, k, transpose_b=True) # (..., seq_len_q, seq_len_k)
140
+
141
+ # scale matmul_qk
142
+ dk = tf.cast(tf.shape(k)[-1], tf.float32)
143
+ scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
144
+
145
+ # add the mask to the scaled tensor.
146
+ if mask is not None:
147
+ scaled_attention_logits += (mask * -1e9)
148
+
149
+ # softmax is normalized on the last axis (seq_len_k) so that the scores
150
+ # add up to 1.
151
+ attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) # (..., seq_len_q, seq_len_k)
152
+
153
+ output = tf.matmul(attention_weights, v) # (..., seq_len_q, depth_v)
154
+
155
+ return output, attention_weights
156
+
157
+
158
+ class MultiHeadAttention(tf.keras.layers.Layer):
159
+ def __init__(self, d_model, num_heads):
160
+ super(MultiHeadAttention, self).__init__()
161
+ self.num_heads = num_heads
162
+ self.d_model = d_model
163
+
164
+ assert d_model % self.num_heads == 0
165
+
166
+ self.depth = d_model // self.num_heads
167
+
168
+ self.wq = tf.keras.layers.Dense(d_model)
169
+ self.wk = tf.keras.layers.Dense(d_model)
170
+ self.wv = tf.keras.layers.Dense(d_model)
171
+
172
+ self.dense = tf.keras.layers.Dense(d_model)
173
+
174
+ def split_heads(self, x, batch_size):
175
+ """Split the last dimension into (num_heads, depth).
176
+ Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
177
+ """
178
+ x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
179
+ return tf.transpose(x, perm=[0, 2, 1, 3])
180
+
181
+ def call(self, v, k, q, mask):
182
+ batch_size = tf.shape(q)[0]
183
+
184
+ q = self.wq(q) # (batch_size, seq_len, d_model)
185
+ k = self.wk(k) # (batch_size, seq_len, d_model)
186
+ v = self.wv(v) # (batch_size, seq_len, d_model)
187
+
188
+ q = self.split_heads(q, batch_size) # (batch_size, num_heads, seq_len_q, depth)
189
+ k = self.split_heads(k, batch_size) # (batch_size, num_heads, seq_len_k, depth)
190
+ v = self.split_heads(v, batch_size) # (batch_size, num_heads, seq_len_v, depth)
191
+
192
+ # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
193
+ # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
194
+ scaled_attention, attention_weights = scaled_dot_product_attention(
195
+ q, k, v, mask)
196
+
197
+ scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, num_heads, depth)
198
+
199
+ concat_attention = tf.reshape(scaled_attention,
200
+ (batch_size, -1, self.d_model)) # (batch_size, seq_len_q, d_model)
201
+
202
+ output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model)
203
+
204
+ return output, attention_weights
205
+
206
+ # dff are the number of activation units that you have in feedforward models
207
+ def point_wise_feed_forward_network(d_model, dff):
208
+ return tf.keras.Sequential([
209
+ tf.keras.layers.Dense(dff, activation='relu'), # (batch_size, seq_len, dff)
210
+ tf.keras.layers.Dense(d_model) # (batch_size, seq_len, d_model)
211
+ ])
212
+
213
+ class EncoderLayer(tf.keras.layers.Layer):
214
+ def __init__(self, d_model, num_heads, dff, rate=0.1):
215
+ super(EncoderLayer, self).__init__()
216
+
217
+ self.mha = MultiHeadAttention(d_model, num_heads)
218
+ self.ffn = point_wise_feed_forward_network(d_model, dff)
219
+
220
+ self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
221
+ self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
222
+
223
+ self.dropout1 = tf.keras.layers.Dropout(rate)
224
+ self.dropout2 = tf.keras.layers.Dropout(rate)
225
+
226
+ def call(self, x, training, mask):
227
+
228
+ attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
229
+ attn_output = self.dropout1(attn_output, training=training)
230
+ out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
231
+
232
+ ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
233
+ ffn_output = self.dropout2(ffn_output, training=training)
234
+ out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
235
+
236
+ return out2
237
+
238
+ class DecoderLayer(tf.keras.layers.Layer):
239
+ def __init__(self, d_model, num_heads, dff, rate=0.1):
240
+ super(DecoderLayer, self).__init__()
241
+
242
+ self.mha1 = MultiHeadAttention(d_model, num_heads)
243
+ self.mha2 = MultiHeadAttention(d_model, num_heads)
244
+
245
+ self.ffn = point_wise_feed_forward_network(d_model, dff)
246
+
247
+ self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
248
+ self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
249
+ self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
250
+
251
+ self.dropout1 = tf.keras.layers.Dropout(rate)
252
+ self.dropout2 = tf.keras.layers.Dropout(rate)
253
+ self.dropout3 = tf.keras.layers.Dropout(rate)
254
+
255
+
256
+ def call(self, x, enc_output, training,
257
+ look_ahead_mask, padding_mask):
258
+ # enc_output.shape == (batch_size, input_seq_len, d_model)
259
+
260
+ attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask) # (batch_size, target_seq_len, d_model)
261
+ attn1 = self.dropout1(attn1, training=training)
262
+ out1 = self.layernorm1(attn1 + x)
263
+
264
+ attn2, attn_weights_block2 = self.mha2(
265
+ enc_output, enc_output, out1, padding_mask) # (batch_size, target_seq_len, d_model)
266
+ attn2 = self.dropout2(attn2, training=training)
267
+ out2 = self.layernorm2(attn2 + out1) # (batch_size, target_seq_len, d_model)
268
+
269
+ ffn_output = self.ffn(out2) # (batch_size, target_seq_len, d_model)
270
+ ffn_output = self.dropout3(ffn_output, training=training)
271
+ out3 = self.layernorm3(ffn_output + out2) # (batch_size, target_seq_len, d_model)
272
+
273
+ return out3, attn_weights_block1, attn_weights_block2
274
+
275
+ class Encoder(tf.keras.layers.Layer):
276
+ def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
277
+ maximum_position_encoding, rate=0.1):
278
+ super(Encoder, self).__init__()
279
+
280
+ self.d_model = d_model
281
+ self.num_layers = num_layers
282
+
283
+ self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
284
+ self.pos_encoding = positional_encoding(maximum_position_encoding,
285
+ self.d_model)
286
+
287
+
288
+ self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
289
+ for _ in range(num_layers)]
290
+
291
+ self.dropout = tf.keras.layers.Dropout(rate)
292
+
293
+ def call(self, x, training, mask):
294
+
295
+ seq_len = tf.shape(x)[1]
296
+
297
+ # adding embedding and position encoding.
298
+ x = self.embedding(x) # (batch_size, input_seq_len, d_model)
299
+ x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
300
+ x += self.pos_encoding[:, :seq_len, :]
301
+
302
+ x = self.dropout(x, training=training)
303
+
304
+ for i in range(self.num_layers):
305
+ x = self.enc_layers[i](x, training, mask)
306
+
307
+ return x # (batch_size, input_seq_len, d_model)
308
+
309
+
310
+ class Decoder(tf.keras.layers.Layer):
311
+ def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size,
312
+ maximum_position_encoding, rate=0.1):
313
+ super(Decoder, self).__init__()
314
+
315
+ self.d_model = d_model
316
+ self.num_layers = num_layers
317
+
318
+ self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
319
+ self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
320
+
321
+ self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate)
322
+ for _ in range(num_layers)]
323
+ self.dropout = tf.keras.layers.Dropout(rate)
324
+
325
+ def call(self, x, enc_output, training,
326
+ look_ahead_mask, padding_mask):
327
+
328
+ seq_len = tf.shape(x)[1]
329
+ attention_weights = {}
330
+
331
+ x = self.embedding(x) # (batch_size, target_seq_len, d_model)
332
+ x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
333
+ x += self.pos_encoding[:, :seq_len, :]
334
+
335
+ x = self.dropout(x, training=training)
336
+
337
+ for i in range(self.num_layers):
338
+ x, block1, block2 = self.dec_layers[i](x, enc_output, training,
339
+ look_ahead_mask, padding_mask)
340
+
341
+ attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
342
+ attention_weights['decoder_layer{}_block2'.format(i+1)] = block2
343
+
344
+ # x.shape == (batch_size, target_seq_len, d_model)
345
+ return x, attention_weights
346
+
347
+ class Transformer(tf.keras.Model):
348
+ def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
349
+ target_vocab_size, pe_input, pe_target, rate=0.1):
350
+ super(Transformer, self).__init__()
351
+
352
+ self.encoder = Encoder(num_layers, d_model, num_heads, dff,
353
+ input_vocab_size, pe_input, rate)
354
+
355
+ self.decoder = Decoder(num_layers, d_model, num_heads, dff,
356
+ target_vocab_size, pe_target, rate)
357
+
358
+ self.final_layer = tf.keras.layers.Dense(target_vocab_size)
359
+
360
+ def call(self, inp, tar, training, enc_padding_mask,
361
+ look_ahead_mask, dec_padding_mask):
362
+
363
+ enc_output = self.encoder(inp, training, enc_padding_mask) # (batch_size, inp_seq_len, d_model)
364
+
365
+ # dec_output.shape == (batch_size, tar_seq_len, d_model)
366
+ dec_output, attention_weights = self.decoder(
367
+ tar, enc_output, training, look_ahead_mask, dec_padding_mask)
368
+
369
+ final_output = self.final_layer(dec_output) # (batch_size, tar_seq_len, target_vocab_size)
370
+
371
+ return final_output, attention_weights
372
+
373
+ class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
374
+ def __init__(self, d_model, warmup_steps=4000):
375
+ super(CustomSchedule, self).__init__()
376
+
377
+ self.d_model = d_model
378
+ self.d_model = tf.cast(self.d_model, tf.float32)
379
+
380
+ self.warmup_steps = warmup_steps
381
+
382
+ def __call__(self, step):
383
+ arg1 = tf.math.rsqrt(step)
384
+ arg2 = step * (self.warmup_steps ** -1.5)
385
+
386
+ return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
387
+
388
+ def create_masks(inp, tar):
389
+ # Encoder padding mask
390
+ enc_padding_mask = create_padding_mask(inp)
391
+
392
+ # Used in the 2nd attention block in the decoder.
393
+ # This padding mask is used to mask the encoder outputs.
394
+ dec_padding_mask = create_padding_mask(inp)
395
+
396
+ # Used in the 1st attention block in the decoder.
397
+ # It is used to pad and mask future tokens in the input received by
398
+ # the decoder.
399
+ look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
400
+ dec_target_padding_mask = create_padding_mask(tar)
401
+ combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
402
+
403
+ return enc_padding_mask, combined_mask, dec_padding_mask
404
+
405
+ def prepare_model():
406
+ train_dataset,tokenizer_en,tokenizer_ass = prepare_datasets()
407
+ num_layers = 4
408
+ d_model = 128
409
+ dff = 512
410
+ num_heads = 8
411
+
412
+ input_vocab_size = tokenizer_ass.vocab_size + 2
413
+ target_vocab_size = tokenizer_en.vocab_size + 2
414
+ dropout_rate = 0.1
415
+ learning_rate = CustomSchedule(d_model)
416
+
417
+ optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
418
+ epsilon=1e-7)
419
+
420
+ temp_learning_rate_schedule = CustomSchedule(d_model)
421
+
422
+ loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
423
+ from_logits=True, reduction='none')
424
+
425
+ def loss_function(real, pred):
426
+ mask = tf.math.logical_not(tf.math.equal(real, 0))
427
+ loss_ = loss_object(real, pred)
428
+
429
+ mask = tf.cast(mask, dtype=loss_.dtype)
430
+ loss_ *= mask
431
+
432
+ return tf.reduce_sum(loss_)/tf.reduce_sum(mask)
433
+
434
+ train_loss = tf.keras.metrics.Mean(name='train_loss')
435
+ train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
436
+ name='train_accuracy')
437
+
438
+ transformer = Transformer(num_layers, d_model, num_heads, dff,
439
+ input_vocab_size, target_vocab_size,
440
+ pe_input=input_vocab_size,
441
+ pe_target=target_vocab_size,
442
+ rate=dropout_rate)
443
+
444
+ checkpoint_path = "C:\Huggingface\Eng-Ass-Former\checkpoints"
445
+
446
+ ckpt = tf.train.Checkpoint(transformer=transformer,
447
+ optimizer=optimizer)
448
+
449
+ ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)
450
+
451
+ # if a checkpoint exists, restore the latest checkpoint.
452
+ if ckpt_manager.latest_checkpoint:
453
+ ckpt.restore(ckpt_manager.latest_checkpoint)
454
+ print ('Latest checkpoint restored!!')
455
+
456
+ EPOCHS = 1
457
+ # The @tf.function trace-compiles train_step into a TF graph for faster
458
+ # execution. The function specializes to the precise shape of the argument
459
+ # tensors. To avoid re-tracing due to the variable sequence lengths or variable
460
+ # batch sizes (the last batch is smaller), use input_signature to specify
461
+ # more generic shapes.
462
+
463
+ train_step_signature = [
464
+ tf.TensorSpec(shape=(None, None), dtype=tf.int64),
465
+ tf.TensorSpec(shape=(None, None), dtype=tf.int64),
466
+ ]
467
+
468
+ @tf.function(input_signature=train_step_signature)
469
+ def train_step(inp, tar):
470
+ tar_inp = tar[:, :-1]
471
+ tar_real = tar[:, 1:]
472
+
473
+ enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
474
+
475
+ with tf.GradientTape() as tape:
476
+ predictions, _ = transformer(inp, tar_inp,
477
+ True,
478
+ enc_padding_mask,
479
+ combined_mask,
480
+ dec_padding_mask)
481
+ loss = loss_function(tar_real, predictions)
482
+
483
+ gradients = tape.gradient(loss, transformer.trainable_variables)
484
+ optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
485
+
486
+ train_loss(loss)
487
+ train_accuracy(tar_real, predictions)
488
+
489
+ print("STARTING THE TRAINING PROCESS!")
490
+ for epoch in range(EPOCHS):
491
+ start = time.time()
492
+ train_loss.reset_states()
493
+ train_accuracy.reset_states()
494
+
495
+ # inp -> portuguese, tar -> english
496
+ for (batch, (inp, tar)) in enumerate(train_dataset):
497
+ train_step(inp, tar)
498
+ if batch % 50 == 0:
499
+ print ('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
500
+ epoch + 1, batch, train_loss.result(), train_accuracy.result()))
501
+ break
502
+
503
+ if (epoch + 1) % 5 == 0:
504
+ ckpt_save_path = ckpt_manager.save()
505
+ print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
506
+ ckpt_save_path))
507
+
508
+ print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1,
509
+ train_loss.result(),
510
+ train_accuracy.result()))
511
+
512
+ print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))
513
+
514
+
515
+ transformer.load_weights('weights/transformer_weights.h5')
516
+
517
+ print("Weight Loaded")
518
+ return transformer,tokenizer_ass,tokenizer_en,40
519
+
520
+ # if __name__ == "__main__":
521
+ # prepare_model_params()
522
+ # print("DONE")
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ keras==2.8.0
2
+ Keras-Preprocessing==1.1.2
3
+ nltk==3.7
4
+ numpy==1.21.3
5
+ pandas==1.4.1
6
+ pickleshare==0.7.5
7
+ regex==2022.3.15
8
+ scikit-learn==1.0.2
9
+ scipy==1.8.0
10
+ streamlit==1.7.0
11
+ tensorflow==2.8.0
12
+ tensorflow-datasets==4.0.1
13
+
tokenizer/train_tokenizer_objects.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832911b2b7dcc722c2b7596be08ba99e814aaacd031fea173d10fda9df5d7ba1
3
+ size 29437531
tokenizer/valid_tokenizer_objects.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0fa0fb957513bb3a5e5bc9e9bacd2a9eea933bb26b41380251c0f87f4380a1
3
+ size 7395243
weights/transformer_weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a682c81530d7666e75d1df4ecb9c82f29c3e905f4a59e31211b2c6ae8a42ff
3
+ size 19690032