Spaces:
Sleeping
Sleeping
LOHJC
commited on
Commit
•
19574d8
1
Parent(s):
8ffee80
add application files
Browse files- app.py +98 -0
- dependency.py +316 -0
- requirements.txt +4 -0
- styles.css +4 -0
app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import tensorflow as tf
|
3 |
+
import dependency
|
4 |
+
|
5 |
+
#hard to make it work as depend on environment
|
6 |
+
# MODEL = "cn_to_en_transformer.keras"
|
7 |
+
# transformer = tf.keras.models.load_model(MODEL)
|
8 |
+
|
9 |
+
EMBEDDING_DEPTH = dependency.EMBEDDING_DEPTH
|
10 |
+
MAX_TOKENIZE_LENGTH = dependency.MAX_TOKENIZE_LENGTH
|
11 |
+
tokenizer_cn = dependency.tokenizer_cn
|
12 |
+
tokenizer_en = dependency.tokenizer_en
|
13 |
+
|
14 |
+
num_layers = 1
|
15 |
+
d_model = EMBEDDING_DEPTH
|
16 |
+
dff = MAX_TOKENIZE_LENGTH
|
17 |
+
num_heads = 8
|
18 |
+
dropout_rate = 0.1
|
19 |
+
|
20 |
+
# Create a new model instance
|
21 |
+
transformer = dependency.Transformer(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff,\
|
22 |
+
input_vocab_size=tokenizer_cn.vocab_size,target_vocab_size=tokenizer_en.vocab_size, dropout_rate=dropout_rate)
|
23 |
+
transformer.load_weights('./checkpoints/cn_to_en_transformer_checkpoint')
|
24 |
+
|
25 |
+
def preprocess(text):
|
26 |
+
text = tf.constant(tokenizer_cn.encode(text, add_special_tokens=True))[tf.newaxis]
|
27 |
+
return text
|
28 |
+
|
29 |
+
def inference(text):
|
30 |
+
start_end = tokenizer_en.encode("", add_special_tokens=True)
|
31 |
+
start = tf.constant(start_end[0],dtype=tf.int64)[tf.newaxis]
|
32 |
+
end = tf.constant(start_end[1],dtype=tf.int64)[tf.newaxis]
|
33 |
+
|
34 |
+
# `tf.TensorArray` is required here (instead of a Python list), so that the
|
35 |
+
# dynamic-loop can be traced by `tf.function`.
|
36 |
+
output_array = tf.TensorArray(dtype=tf.int64, size=0, dynamic_size=True)
|
37 |
+
output_array = output_array.write(0, start)
|
38 |
+
|
39 |
+
for i in tf.range(MAX_TOKENIZE_LENGTH):
|
40 |
+
output = tf.transpose(output_array.stack())
|
41 |
+
predictions = transformer([text, output], training=False)
|
42 |
+
|
43 |
+
# Select the last token from the `seq_len` dimension.
|
44 |
+
predictions = predictions[:, -1:, :] # Shape `(batch_size, 1, vocab_size)`.
|
45 |
+
|
46 |
+
predicted_id = tf.argmax(predictions, axis=-1)
|
47 |
+
|
48 |
+
# Concatenate the `predicted_id` to the output which is given to the
|
49 |
+
# decoder as its input.
|
50 |
+
output_array = output_array.write(i+1, predicted_id[0])
|
51 |
+
|
52 |
+
if predicted_id == end:
|
53 |
+
break
|
54 |
+
|
55 |
+
text = tf.transpose(output_array.stack())
|
56 |
+
return text
|
57 |
+
|
58 |
+
def postprocess(text):
|
59 |
+
text = tokenizer_en.decode(text[0], skip_special_tokens=True)
|
60 |
+
return text
|
61 |
+
|
62 |
+
def translate(text):
|
63 |
+
if (text.strip()==""):
|
64 |
+
return ""
|
65 |
+
text = preprocess(text)
|
66 |
+
text = inference(text)
|
67 |
+
return postprocess(text)
|
68 |
+
|
69 |
+
DESCRIPTION = ""
|
70 |
+
DESCRIPTION += "<h1>中英翻译器</h1>"
|
71 |
+
DESCRIPTION += "<h1>Chinese to English translator</h1>"
|
72 |
+
DESCRIPTION += "<p>This translator is building by using transformer from scratch</p>"
|
73 |
+
DESCRIPTION += "<p>This is just a demonstration of usage of transformer, the translation is not 100% correct</p>"
|
74 |
+
DESCRIPTION += "<ul><li><a href=\"https://medium.com/@jiachiewloh/nlp-chinese-to-english-translation-by-using-transformer-6503c1f4a139\">Article</li>"
|
75 |
+
DESCRIPTION += "<li><a href=\"https://www.kaggle.com/code/jclohjc/cn-en-translation-using-transformer\">Code</li></ul>"
|
76 |
+
|
77 |
+
with gr.Blocks(css="styles.css") as demo:
|
78 |
+
gr.HTML(DESCRIPTION)
|
79 |
+
|
80 |
+
#the input and output
|
81 |
+
with gr.Row():
|
82 |
+
input_text = gr.Text(label="中文 (Chinese)",\
|
83 |
+
info="请输入您想翻译的句子(Please enter the text to be translated)")
|
84 |
+
output_text = gr.Text(label="English (英文)",\
|
85 |
+
info="Here is the translated text (这是翻译后的句子)")
|
86 |
+
|
87 |
+
with gr.Row():
|
88 |
+
gr.Button("Translate").click(fn=translate,inputs=input_text,outputs=output_text)
|
89 |
+
gr.ClearButton().add([input_text,output_text])
|
90 |
+
|
91 |
+
#Examples
|
92 |
+
gr.Examples(examples=[["祝您有个美好的一天","Have a nice day"], ["早上好,很高心见到你","Good Morning, nice to meet you"],
|
93 |
+
["你叫什么名字","What is your name"],["我喜欢爬山","I like climbing"],["我爱你","I love you"]],\
|
94 |
+
inputs=[input_text,output_text],\
|
95 |
+
outputs=[output_text])
|
96 |
+
|
97 |
+
demo.launch()
|
98 |
+
|
dependency.py
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import tensorflow as tf
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from transformers import BertTokenizer
|
6 |
+
tokenizer_en = BertTokenizer.from_pretrained("bert-base-cased")
|
7 |
+
tokenizer_cn = BertTokenizer.from_pretrained("bert-base-chinese")
|
8 |
+
MAX_TOKENIZE_LENGTH = 128
|
9 |
+
EMBEDDING_DEPTH = 256
|
10 |
+
|
11 |
+
def positional_encoding(length, depth):
|
12 |
+
depth = depth/2
|
13 |
+
positions = np.arange(length)[:, np.newaxis] # (seq, 1)
|
14 |
+
depths = np.arange(depth)[np.newaxis, :]/depth # (1, depth)
|
15 |
+
|
16 |
+
angle_rates = 1 / (10000**depths) # (1, depth)
|
17 |
+
angle_rads = positions * angle_rates # (pos, depth)
|
18 |
+
|
19 |
+
pos_encoding = np.concatenate(
|
20 |
+
[np.sin(angle_rads), np.cos(angle_rads)],
|
21 |
+
axis=-1)
|
22 |
+
return tf.cast(pos_encoding, dtype=tf.float32)
|
23 |
+
|
24 |
+
class PositionalEmbedding(tf.keras.layers.Layer):
|
25 |
+
def __init__(self, vocab_size, d_model):
|
26 |
+
super().__init__()
|
27 |
+
self.d_model = d_model
|
28 |
+
self.embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=d_model, mask_zero=True)
|
29 |
+
self.pos_encoding = positional_encoding(length=MAX_TOKENIZE_LENGTH, depth=d_model)
|
30 |
+
|
31 |
+
def compute_mask(self, *args, **kwargs):
|
32 |
+
return self.embedding.compute_mask(*args, **kwargs)
|
33 |
+
|
34 |
+
def call(self, x):
|
35 |
+
length = tf.shape(x)[1]
|
36 |
+
x = self.embedding(x)
|
37 |
+
# This factor sets the relative scale of the embedding and positonal_encoding.
|
38 |
+
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
|
39 |
+
x = x + self.pos_encoding[tf.newaxis, :length, :]
|
40 |
+
return x
|
41 |
+
|
42 |
+
class BaseAttention(tf.keras.layers.Layer):
|
43 |
+
def __init__(self, **kwargs):
|
44 |
+
super().__init__()
|
45 |
+
self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
|
46 |
+
self.layernorm = tf.keras.layers.LayerNormalization()
|
47 |
+
self.add = tf.keras.layers.Add()
|
48 |
+
|
49 |
+
class CrossAttention(BaseAttention):
|
50 |
+
def call(self, x, context): #x = query, content = key,value pairs
|
51 |
+
attn_output, attn_scores = self.mha(
|
52 |
+
query=x,
|
53 |
+
key=context,
|
54 |
+
value=context,
|
55 |
+
return_attention_scores=True)
|
56 |
+
|
57 |
+
# Cache the attention scores for plotting later.
|
58 |
+
self.last_attn_scores = attn_scores
|
59 |
+
|
60 |
+
x = self.add([x, attn_output])
|
61 |
+
x = self.layernorm(x)
|
62 |
+
|
63 |
+
return x
|
64 |
+
|
65 |
+
class GlobalSelfAttention(BaseAttention):
|
66 |
+
def call(self, x):
|
67 |
+
attn_output = self.mha(
|
68 |
+
query=x,
|
69 |
+
value=x,
|
70 |
+
key=x)
|
71 |
+
x = self.add([x, attn_output])
|
72 |
+
x = self.layernorm(x)
|
73 |
+
return x
|
74 |
+
|
75 |
+
class CausalSelfAttention(BaseAttention):
|
76 |
+
def call(self, x):
|
77 |
+
attn_output = self.mha(
|
78 |
+
query=x,
|
79 |
+
value=x,
|
80 |
+
key=x,
|
81 |
+
use_causal_mask = True)
|
82 |
+
x = self.add([x, attn_output])
|
83 |
+
x = self.layernorm(x)
|
84 |
+
return x
|
85 |
+
|
86 |
+
class FeedForward(tf.keras.layers.Layer):
|
87 |
+
def __init__(self, d_model, dff, dropout_rate=0.1):
|
88 |
+
super().__init__()
|
89 |
+
self.seq = tf.keras.Sequential([
|
90 |
+
tf.keras.layers.Dense(dff, activation='relu'),
|
91 |
+
tf.keras.layers.Dense(d_model),
|
92 |
+
tf.keras.layers.Dropout(dropout_rate)
|
93 |
+
])
|
94 |
+
self.add = tf.keras.layers.Add()
|
95 |
+
self.layer_norm = tf.keras.layers.LayerNormalization()
|
96 |
+
|
97 |
+
def call(self, x):
|
98 |
+
x = self.add([x, self.seq(x)])
|
99 |
+
x = self.layer_norm(x)
|
100 |
+
return x
|
101 |
+
|
102 |
+
class EncoderLayer(tf.keras.layers.Layer):
|
103 |
+
def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
|
104 |
+
super().__init__()
|
105 |
+
|
106 |
+
self.self_attention = GlobalSelfAttention(
|
107 |
+
num_heads=num_heads,
|
108 |
+
key_dim=d_model,
|
109 |
+
dropout=dropout_rate)
|
110 |
+
|
111 |
+
self.ffn = FeedForward(d_model, dff)
|
112 |
+
|
113 |
+
def call(self, x):
|
114 |
+
x = self.self_attention(x)
|
115 |
+
x = self.ffn(x)
|
116 |
+
return x
|
117 |
+
|
118 |
+
class DecoderLayer(tf.keras.layers.Layer):
|
119 |
+
def __init__(self,
|
120 |
+
*,
|
121 |
+
d_model,
|
122 |
+
num_heads,
|
123 |
+
dff,
|
124 |
+
dropout_rate=0.1):
|
125 |
+
super(DecoderLayer, self).__init__()
|
126 |
+
|
127 |
+
self.causal_self_attention = CausalSelfAttention(
|
128 |
+
num_heads=num_heads,
|
129 |
+
key_dim=d_model,
|
130 |
+
dropout=dropout_rate)
|
131 |
+
|
132 |
+
self.cross_attention = CrossAttention(
|
133 |
+
num_heads=num_heads,
|
134 |
+
key_dim=d_model,
|
135 |
+
dropout=dropout_rate)
|
136 |
+
|
137 |
+
self.ffn = FeedForward(d_model, dff)
|
138 |
+
|
139 |
+
def call(self, x, context):
|
140 |
+
x = self.causal_self_attention(x=x)
|
141 |
+
x = self.cross_attention(x=x, context=context)
|
142 |
+
|
143 |
+
# Cache the last attention scores for plotting later
|
144 |
+
self.last_attn_scores = self.cross_attention.last_attn_scores
|
145 |
+
|
146 |
+
x = self.ffn(x) # Shape `(batch_size, seq_len, d_model)`.
|
147 |
+
return x
|
148 |
+
|
149 |
+
class Encoder(tf.keras.layers.Layer):
|
150 |
+
def __init__(self, *, num_layers, d_model, num_heads,
|
151 |
+
dff, vocab_size, dropout_rate=0.1):
|
152 |
+
super().__init__()
|
153 |
+
|
154 |
+
self.d_model = d_model
|
155 |
+
self.num_layers = num_layers
|
156 |
+
|
157 |
+
self.pos_embedding = PositionalEmbedding(
|
158 |
+
vocab_size=vocab_size, d_model=d_model)
|
159 |
+
|
160 |
+
self.enc_layers = [
|
161 |
+
EncoderLayer(d_model=d_model,
|
162 |
+
num_heads=num_heads,
|
163 |
+
dff=dff,
|
164 |
+
dropout_rate=dropout_rate)
|
165 |
+
for _ in range(num_layers)]
|
166 |
+
self.dropout = tf.keras.layers.Dropout(dropout_rate)
|
167 |
+
|
168 |
+
def call(self, x):
|
169 |
+
# `x` is token-IDs shape: (batch, seq_len)
|
170 |
+
x = self.pos_embedding(x) # Shape `(batch_size, seq_len, d_model)`.
|
171 |
+
|
172 |
+
# Add dropout.
|
173 |
+
x = self.dropout(x)
|
174 |
+
|
175 |
+
for i in range(self.num_layers):
|
176 |
+
x = self.enc_layers[i](x)
|
177 |
+
|
178 |
+
return x # Shape `(batch_size, seq_len, d_model)`.
|
179 |
+
|
180 |
+
class Decoder(tf.keras.layers.Layer):
|
181 |
+
def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size,
|
182 |
+
dropout_rate=0.1):
|
183 |
+
super(Decoder, self).__init__()
|
184 |
+
|
185 |
+
self.d_model = d_model
|
186 |
+
self.num_layers = num_layers
|
187 |
+
|
188 |
+
self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
|
189 |
+
d_model=d_model)
|
190 |
+
self.dropout = tf.keras.layers.Dropout(dropout_rate)
|
191 |
+
self.dec_layers = [
|
192 |
+
DecoderLayer(d_model=d_model, num_heads=num_heads,
|
193 |
+
dff=dff, dropout_rate=dropout_rate)
|
194 |
+
for _ in range(num_layers)]
|
195 |
+
|
196 |
+
self.last_attn_scores = None
|
197 |
+
|
198 |
+
def call(self, x, context):
|
199 |
+
# `x` is token-IDs shape (batch, target_seq_len)
|
200 |
+
x = self.pos_embedding(x) # (batch_size, target_seq_len, d_model)
|
201 |
+
|
202 |
+
x = self.dropout(x)
|
203 |
+
|
204 |
+
for i in range(self.num_layers):
|
205 |
+
x = self.dec_layers[i](x, context)
|
206 |
+
|
207 |
+
self.last_attn_scores = self.dec_layers[-1].last_attn_scores
|
208 |
+
|
209 |
+
# The shape of x is (batch_size, target_seq_len, d_model).
|
210 |
+
return x
|
211 |
+
|
212 |
+
# @tf.keras.saving.register_keras_serializable()
|
213 |
+
class Transformer(tf.keras.Model):
|
214 |
+
def __init__(self, *, num_layers, d_model, num_heads, dff,
|
215 |
+
input_vocab_size, target_vocab_size, dropout_rate=0.1):
|
216 |
+
super().__init__()
|
217 |
+
self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
|
218 |
+
num_heads=num_heads, dff=dff,
|
219 |
+
vocab_size=input_vocab_size,
|
220 |
+
dropout_rate=dropout_rate)
|
221 |
+
|
222 |
+
self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
|
223 |
+
num_heads=num_heads, dff=dff,
|
224 |
+
vocab_size=target_vocab_size,
|
225 |
+
dropout_rate=dropout_rate)
|
226 |
+
|
227 |
+
self.final_layer = tf.keras.layers.Dense(target_vocab_size)
|
228 |
+
|
229 |
+
def call(self, inputs):
|
230 |
+
# To use a Keras model with `.fit` you must pass all your inputs in the
|
231 |
+
# first argument.
|
232 |
+
context, x = inputs
|
233 |
+
|
234 |
+
context = self.encoder(context) # (batch_size, context_len, d_model)
|
235 |
+
|
236 |
+
x = self.decoder(x, context) # (batch_size, target_len, d_model)
|
237 |
+
|
238 |
+
# Final linear layer output.
|
239 |
+
logits = self.final_layer(x) # (batch_size, target_len, target_vocab_size)
|
240 |
+
|
241 |
+
try:
|
242 |
+
# Drop the keras mask, so it doesn't scale the losses/metrics.
|
243 |
+
# b/250038731
|
244 |
+
del logits._keras_mask
|
245 |
+
except AttributeError:
|
246 |
+
pass
|
247 |
+
|
248 |
+
# Return the final output and the attention weights.
|
249 |
+
return logits
|
250 |
+
|
251 |
+
# @tf.keras.saving.register_keras_serializable()
|
252 |
+
# class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
|
253 |
+
# def __init__(self, d_model, warmup_steps=4000):
|
254 |
+
# super().__init__()
|
255 |
+
|
256 |
+
# self.d_model = d_model
|
257 |
+
# self.d_model = tf.cast(self.d_model, tf.float32)
|
258 |
+
|
259 |
+
# self.warmup_steps = warmup_steps
|
260 |
+
|
261 |
+
# def __call__(self, step):
|
262 |
+
# step = tf.cast(step, dtype=tf.float32)
|
263 |
+
# arg1 = tf.math.rsqrt(step)
|
264 |
+
# arg2 = step * (self.warmup_steps ** -1.5)
|
265 |
+
|
266 |
+
# return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
|
267 |
+
|
268 |
+
# def get_config(self):
|
269 |
+
# return {
|
270 |
+
# 'd_model': int(self.d_model),
|
271 |
+
# 'warmup_steps': int(self.warmup_steps)
|
272 |
+
# }
|
273 |
+
|
274 |
+
# # learning_rate = CustomSchedule(EMBEDDING_DEPTH)
|
275 |
+
|
276 |
+
# # @tf.keras.saving.register_keras_serializable()
|
277 |
+
# class CustomAdam(tf.keras.optimizers.Adam):
|
278 |
+
# def __init__(self, custom_param, **kwargs):
|
279 |
+
# super(CustomAdam, self).__init__(**kwargs)
|
280 |
+
# self.custom_param = custom_param #this is the learning rate (custom schedule)
|
281 |
+
|
282 |
+
# def get_config(self):
|
283 |
+
# config = super(CustomAdam, self).get_config()
|
284 |
+
# config.update({
|
285 |
+
# 'custom_param': self.custom_param
|
286 |
+
# })
|
287 |
+
# return config
|
288 |
+
|
289 |
+
# # optimizer = CustomAdam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
|
290 |
+
|
291 |
+
# # @tf.keras.saving.register_keras_serializable()
|
292 |
+
# def masked_loss(label, pred):
|
293 |
+
# mask = label != 0
|
294 |
+
# loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
|
295 |
+
# from_logits=True, reduction='none')
|
296 |
+
# loss = loss_object(label, pred)
|
297 |
+
|
298 |
+
# mask = tf.cast(mask, dtype=loss.dtype)
|
299 |
+
# loss *= mask
|
300 |
+
|
301 |
+
# loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
|
302 |
+
# return loss
|
303 |
+
|
304 |
+
# # @tf.keras.saving.register_keras_serializable()
|
305 |
+
# def masked_accuracy(label, pred):
|
306 |
+
# pred = tf.argmax(pred, axis=2)
|
307 |
+
# label = tf.cast(label, pred.dtype)
|
308 |
+
# match = label == pred
|
309 |
+
|
310 |
+
# mask = label != 0
|
311 |
+
|
312 |
+
# match = match & mask
|
313 |
+
|
314 |
+
# match = tf.cast(match, dtype=tf.float32)
|
315 |
+
# mask = tf.cast(mask, dtype=tf.float32)
|
316 |
+
# return tf.reduce_sum(match)/tf.reduce_sum(mask)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tensorflow
|
2 |
+
numpy
|
3 |
+
matplotlib
|
4 |
+
transformers
|
styles.css
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
h1, p , ul{
|
3 |
+
text-align: center;
|
4 |
+
}
|