LOHJC commited on
Commit
19574d8
1 Parent(s): 8ffee80

add application files

Browse files
Files changed (4) hide show
  1. app.py +98 -0
  2. dependency.py +316 -0
  3. requirements.txt +4 -0
  4. styles.css +4 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import dependency
4
+
5
+ #hard to make it work as depend on environment
6
+ # MODEL = "cn_to_en_transformer.keras"
7
+ # transformer = tf.keras.models.load_model(MODEL)
8
+
9
+ EMBEDDING_DEPTH = dependency.EMBEDDING_DEPTH
10
+ MAX_TOKENIZE_LENGTH = dependency.MAX_TOKENIZE_LENGTH
11
+ tokenizer_cn = dependency.tokenizer_cn
12
+ tokenizer_en = dependency.tokenizer_en
13
+
14
+ num_layers = 1
15
+ d_model = EMBEDDING_DEPTH
16
+ dff = MAX_TOKENIZE_LENGTH
17
+ num_heads = 8
18
+ dropout_rate = 0.1
19
+
20
+ # Create a new model instance
21
+ transformer = dependency.Transformer(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff,\
22
+ input_vocab_size=tokenizer_cn.vocab_size,target_vocab_size=tokenizer_en.vocab_size, dropout_rate=dropout_rate)
23
+ transformer.load_weights('./checkpoints/cn_to_en_transformer_checkpoint')
24
+
25
+ def preprocess(text):
26
+ text = tf.constant(tokenizer_cn.encode(text, add_special_tokens=True))[tf.newaxis]
27
+ return text
28
+
29
+ def inference(text):
30
+ start_end = tokenizer_en.encode("", add_special_tokens=True)
31
+ start = tf.constant(start_end[0],dtype=tf.int64)[tf.newaxis]
32
+ end = tf.constant(start_end[1],dtype=tf.int64)[tf.newaxis]
33
+
34
+ # `tf.TensorArray` is required here (instead of a Python list), so that the
35
+ # dynamic-loop can be traced by `tf.function`.
36
+ output_array = tf.TensorArray(dtype=tf.int64, size=0, dynamic_size=True)
37
+ output_array = output_array.write(0, start)
38
+
39
+ for i in tf.range(MAX_TOKENIZE_LENGTH):
40
+ output = tf.transpose(output_array.stack())
41
+ predictions = transformer([text, output], training=False)
42
+
43
+ # Select the last token from the `seq_len` dimension.
44
+ predictions = predictions[:, -1:, :] # Shape `(batch_size, 1, vocab_size)`.
45
+
46
+ predicted_id = tf.argmax(predictions, axis=-1)
47
+
48
+ # Concatenate the `predicted_id` to the output which is given to the
49
+ # decoder as its input.
50
+ output_array = output_array.write(i+1, predicted_id[0])
51
+
52
+ if predicted_id == end:
53
+ break
54
+
55
+ text = tf.transpose(output_array.stack())
56
+ return text
57
+
58
+ def postprocess(text):
59
+ text = tokenizer_en.decode(text[0], skip_special_tokens=True)
60
+ return text
61
+
62
+ def translate(text):
63
+ if (text.strip()==""):
64
+ return ""
65
+ text = preprocess(text)
66
+ text = inference(text)
67
+ return postprocess(text)
68
+
69
+ DESCRIPTION = ""
70
+ DESCRIPTION += "<h1>中英翻译器</h1>"
71
+ DESCRIPTION += "<h1>Chinese to English translator</h1>"
72
+ DESCRIPTION += "<p>This translator is building by using transformer from scratch</p>"
73
+ DESCRIPTION += "<p>This is just a demonstration of usage of transformer, the translation is not 100% correct</p>"
74
+ DESCRIPTION += "<ul><li><a href=\"https://medium.com/@jiachiewloh/nlp-chinese-to-english-translation-by-using-transformer-6503c1f4a139\">Article</li>"
75
+ DESCRIPTION += "<li><a href=\"https://www.kaggle.com/code/jclohjc/cn-en-translation-using-transformer\">Code</li></ul>"
76
+
77
+ with gr.Blocks(css="styles.css") as demo:
78
+ gr.HTML(DESCRIPTION)
79
+
80
+ #the input and output
81
+ with gr.Row():
82
+ input_text = gr.Text(label="中文 (Chinese)",\
83
+ info="请输入您想翻译的句子(Please enter the text to be translated)")
84
+ output_text = gr.Text(label="English (英文)",\
85
+ info="Here is the translated text (这是翻译后的句子)")
86
+
87
+ with gr.Row():
88
+ gr.Button("Translate").click(fn=translate,inputs=input_text,outputs=output_text)
89
+ gr.ClearButton().add([input_text,output_text])
90
+
91
+ #Examples
92
+ gr.Examples(examples=[["祝您有个美好的一天","Have a nice day"], ["早上好,很高心见到你","Good Morning, nice to meet you"],
93
+ ["你叫什么名字","What is your name"],["我喜欢爬山","I like climbing"],["我爱你","I love you"]],\
94
+ inputs=[input_text,output_text],\
95
+ outputs=[output_text])
96
+
97
+ demo.launch()
98
+
dependency.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import tensorflow as tf
3
+ import numpy as np
4
+
5
+ from transformers import BertTokenizer
6
+ tokenizer_en = BertTokenizer.from_pretrained("bert-base-cased")
7
+ tokenizer_cn = BertTokenizer.from_pretrained("bert-base-chinese")
8
+ MAX_TOKENIZE_LENGTH = 128
9
+ EMBEDDING_DEPTH = 256
10
+
11
+ def positional_encoding(length, depth):
12
+ depth = depth/2
13
+ positions = np.arange(length)[:, np.newaxis] # (seq, 1)
14
+ depths = np.arange(depth)[np.newaxis, :]/depth # (1, depth)
15
+
16
+ angle_rates = 1 / (10000**depths) # (1, depth)
17
+ angle_rads = positions * angle_rates # (pos, depth)
18
+
19
+ pos_encoding = np.concatenate(
20
+ [np.sin(angle_rads), np.cos(angle_rads)],
21
+ axis=-1)
22
+ return tf.cast(pos_encoding, dtype=tf.float32)
23
+
24
+ class PositionalEmbedding(tf.keras.layers.Layer):
25
+ def __init__(self, vocab_size, d_model):
26
+ super().__init__()
27
+ self.d_model = d_model
28
+ self.embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=d_model, mask_zero=True)
29
+ self.pos_encoding = positional_encoding(length=MAX_TOKENIZE_LENGTH, depth=d_model)
30
+
31
+ def compute_mask(self, *args, **kwargs):
32
+ return self.embedding.compute_mask(*args, **kwargs)
33
+
34
+ def call(self, x):
35
+ length = tf.shape(x)[1]
36
+ x = self.embedding(x)
37
+ # This factor sets the relative scale of the embedding and positonal_encoding.
38
+ x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
39
+ x = x + self.pos_encoding[tf.newaxis, :length, :]
40
+ return x
41
+
42
+ class BaseAttention(tf.keras.layers.Layer):
43
+ def __init__(self, **kwargs):
44
+ super().__init__()
45
+ self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
46
+ self.layernorm = tf.keras.layers.LayerNormalization()
47
+ self.add = tf.keras.layers.Add()
48
+
49
+ class CrossAttention(BaseAttention):
50
+ def call(self, x, context): #x = query, content = key,value pairs
51
+ attn_output, attn_scores = self.mha(
52
+ query=x,
53
+ key=context,
54
+ value=context,
55
+ return_attention_scores=True)
56
+
57
+ # Cache the attention scores for plotting later.
58
+ self.last_attn_scores = attn_scores
59
+
60
+ x = self.add([x, attn_output])
61
+ x = self.layernorm(x)
62
+
63
+ return x
64
+
65
+ class GlobalSelfAttention(BaseAttention):
66
+ def call(self, x):
67
+ attn_output = self.mha(
68
+ query=x,
69
+ value=x,
70
+ key=x)
71
+ x = self.add([x, attn_output])
72
+ x = self.layernorm(x)
73
+ return x
74
+
75
+ class CausalSelfAttention(BaseAttention):
76
+ def call(self, x):
77
+ attn_output = self.mha(
78
+ query=x,
79
+ value=x,
80
+ key=x,
81
+ use_causal_mask = True)
82
+ x = self.add([x, attn_output])
83
+ x = self.layernorm(x)
84
+ return x
85
+
86
+ class FeedForward(tf.keras.layers.Layer):
87
+ def __init__(self, d_model, dff, dropout_rate=0.1):
88
+ super().__init__()
89
+ self.seq = tf.keras.Sequential([
90
+ tf.keras.layers.Dense(dff, activation='relu'),
91
+ tf.keras.layers.Dense(d_model),
92
+ tf.keras.layers.Dropout(dropout_rate)
93
+ ])
94
+ self.add = tf.keras.layers.Add()
95
+ self.layer_norm = tf.keras.layers.LayerNormalization()
96
+
97
+ def call(self, x):
98
+ x = self.add([x, self.seq(x)])
99
+ x = self.layer_norm(x)
100
+ return x
101
+
102
+ class EncoderLayer(tf.keras.layers.Layer):
103
+ def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
104
+ super().__init__()
105
+
106
+ self.self_attention = GlobalSelfAttention(
107
+ num_heads=num_heads,
108
+ key_dim=d_model,
109
+ dropout=dropout_rate)
110
+
111
+ self.ffn = FeedForward(d_model, dff)
112
+
113
+ def call(self, x):
114
+ x = self.self_attention(x)
115
+ x = self.ffn(x)
116
+ return x
117
+
118
+ class DecoderLayer(tf.keras.layers.Layer):
119
+ def __init__(self,
120
+ *,
121
+ d_model,
122
+ num_heads,
123
+ dff,
124
+ dropout_rate=0.1):
125
+ super(DecoderLayer, self).__init__()
126
+
127
+ self.causal_self_attention = CausalSelfAttention(
128
+ num_heads=num_heads,
129
+ key_dim=d_model,
130
+ dropout=dropout_rate)
131
+
132
+ self.cross_attention = CrossAttention(
133
+ num_heads=num_heads,
134
+ key_dim=d_model,
135
+ dropout=dropout_rate)
136
+
137
+ self.ffn = FeedForward(d_model, dff)
138
+
139
+ def call(self, x, context):
140
+ x = self.causal_self_attention(x=x)
141
+ x = self.cross_attention(x=x, context=context)
142
+
143
+ # Cache the last attention scores for plotting later
144
+ self.last_attn_scores = self.cross_attention.last_attn_scores
145
+
146
+ x = self.ffn(x) # Shape `(batch_size, seq_len, d_model)`.
147
+ return x
148
+
149
+ class Encoder(tf.keras.layers.Layer):
150
+ def __init__(self, *, num_layers, d_model, num_heads,
151
+ dff, vocab_size, dropout_rate=0.1):
152
+ super().__init__()
153
+
154
+ self.d_model = d_model
155
+ self.num_layers = num_layers
156
+
157
+ self.pos_embedding = PositionalEmbedding(
158
+ vocab_size=vocab_size, d_model=d_model)
159
+
160
+ self.enc_layers = [
161
+ EncoderLayer(d_model=d_model,
162
+ num_heads=num_heads,
163
+ dff=dff,
164
+ dropout_rate=dropout_rate)
165
+ for _ in range(num_layers)]
166
+ self.dropout = tf.keras.layers.Dropout(dropout_rate)
167
+
168
+ def call(self, x):
169
+ # `x` is token-IDs shape: (batch, seq_len)
170
+ x = self.pos_embedding(x) # Shape `(batch_size, seq_len, d_model)`.
171
+
172
+ # Add dropout.
173
+ x = self.dropout(x)
174
+
175
+ for i in range(self.num_layers):
176
+ x = self.enc_layers[i](x)
177
+
178
+ return x # Shape `(batch_size, seq_len, d_model)`.
179
+
180
+ class Decoder(tf.keras.layers.Layer):
181
+ def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size,
182
+ dropout_rate=0.1):
183
+ super(Decoder, self).__init__()
184
+
185
+ self.d_model = d_model
186
+ self.num_layers = num_layers
187
+
188
+ self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
189
+ d_model=d_model)
190
+ self.dropout = tf.keras.layers.Dropout(dropout_rate)
191
+ self.dec_layers = [
192
+ DecoderLayer(d_model=d_model, num_heads=num_heads,
193
+ dff=dff, dropout_rate=dropout_rate)
194
+ for _ in range(num_layers)]
195
+
196
+ self.last_attn_scores = None
197
+
198
+ def call(self, x, context):
199
+ # `x` is token-IDs shape (batch, target_seq_len)
200
+ x = self.pos_embedding(x) # (batch_size, target_seq_len, d_model)
201
+
202
+ x = self.dropout(x)
203
+
204
+ for i in range(self.num_layers):
205
+ x = self.dec_layers[i](x, context)
206
+
207
+ self.last_attn_scores = self.dec_layers[-1].last_attn_scores
208
+
209
+ # The shape of x is (batch_size, target_seq_len, d_model).
210
+ return x
211
+
212
+ # @tf.keras.saving.register_keras_serializable()
213
+ class Transformer(tf.keras.Model):
214
+ def __init__(self, *, num_layers, d_model, num_heads, dff,
215
+ input_vocab_size, target_vocab_size, dropout_rate=0.1):
216
+ super().__init__()
217
+ self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
218
+ num_heads=num_heads, dff=dff,
219
+ vocab_size=input_vocab_size,
220
+ dropout_rate=dropout_rate)
221
+
222
+ self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
223
+ num_heads=num_heads, dff=dff,
224
+ vocab_size=target_vocab_size,
225
+ dropout_rate=dropout_rate)
226
+
227
+ self.final_layer = tf.keras.layers.Dense(target_vocab_size)
228
+
229
+ def call(self, inputs):
230
+ # To use a Keras model with `.fit` you must pass all your inputs in the
231
+ # first argument.
232
+ context, x = inputs
233
+
234
+ context = self.encoder(context) # (batch_size, context_len, d_model)
235
+
236
+ x = self.decoder(x, context) # (batch_size, target_len, d_model)
237
+
238
+ # Final linear layer output.
239
+ logits = self.final_layer(x) # (batch_size, target_len, target_vocab_size)
240
+
241
+ try:
242
+ # Drop the keras mask, so it doesn't scale the losses/metrics.
243
+ # b/250038731
244
+ del logits._keras_mask
245
+ except AttributeError:
246
+ pass
247
+
248
+ # Return the final output and the attention weights.
249
+ return logits
250
+
251
+ # @tf.keras.saving.register_keras_serializable()
252
+ # class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
253
+ # def __init__(self, d_model, warmup_steps=4000):
254
+ # super().__init__()
255
+
256
+ # self.d_model = d_model
257
+ # self.d_model = tf.cast(self.d_model, tf.float32)
258
+
259
+ # self.warmup_steps = warmup_steps
260
+
261
+ # def __call__(self, step):
262
+ # step = tf.cast(step, dtype=tf.float32)
263
+ # arg1 = tf.math.rsqrt(step)
264
+ # arg2 = step * (self.warmup_steps ** -1.5)
265
+
266
+ # return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
267
+
268
+ # def get_config(self):
269
+ # return {
270
+ # 'd_model': int(self.d_model),
271
+ # 'warmup_steps': int(self.warmup_steps)
272
+ # }
273
+
274
+ # # learning_rate = CustomSchedule(EMBEDDING_DEPTH)
275
+
276
+ # # @tf.keras.saving.register_keras_serializable()
277
+ # class CustomAdam(tf.keras.optimizers.Adam):
278
+ # def __init__(self, custom_param, **kwargs):
279
+ # super(CustomAdam, self).__init__(**kwargs)
280
+ # self.custom_param = custom_param #this is the learning rate (custom schedule)
281
+
282
+ # def get_config(self):
283
+ # config = super(CustomAdam, self).get_config()
284
+ # config.update({
285
+ # 'custom_param': self.custom_param
286
+ # })
287
+ # return config
288
+
289
+ # # optimizer = CustomAdam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
290
+
291
+ # # @tf.keras.saving.register_keras_serializable()
292
+ # def masked_loss(label, pred):
293
+ # mask = label != 0
294
+ # loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
295
+ # from_logits=True, reduction='none')
296
+ # loss = loss_object(label, pred)
297
+
298
+ # mask = tf.cast(mask, dtype=loss.dtype)
299
+ # loss *= mask
300
+
301
+ # loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
302
+ # return loss
303
+
304
+ # # @tf.keras.saving.register_keras_serializable()
305
+ # def masked_accuracy(label, pred):
306
+ # pred = tf.argmax(pred, axis=2)
307
+ # label = tf.cast(label, pred.dtype)
308
+ # match = label == pred
309
+
310
+ # mask = label != 0
311
+
312
+ # match = match & mask
313
+
314
+ # match = tf.cast(match, dtype=tf.float32)
315
+ # mask = tf.cast(mask, dtype=tf.float32)
316
+ # return tf.reduce_sum(match)/tf.reduce_sum(mask)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ tensorflow
2
+ numpy
3
+ matplotlib
4
+ transformers
styles.css ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ h1, p , ul{
3
+ text-align: center;
4
+ }