guillermoruiz
commited on
Commit
•
d32f7f0
1
Parent(s):
81dd73b
Upload TFBilma
Browse files- config.json +6 -4
- configuration_bilma.py +13 -5
- modeling_bilma.py +52 -19
- tf_model.h5 +1 -1
config.json
CHANGED
@@ -1,17 +1,19 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
|
|
|
|
3 |
"auto_map": {
|
4 |
"AutoConfig": "configuration_bilma.BilmaConfig",
|
5 |
-
"TFAutoModel": "modeling_bilma.TFBilma"
|
6 |
-
"TFAutoModelForMaskedLM": "modeling_bilma.TFBilma"
|
7 |
},
|
8 |
"hidden_dropout_prob": 0.1,
|
9 |
"hidden_size": 512,
|
10 |
-
"include_head": null,
|
11 |
"include_top": true,
|
12 |
"model_type": "bilma",
|
13 |
"num_attention_heads": 4,
|
14 |
"num_hidden_layers": 2,
|
|
|
15 |
"seq_max_length": 280,
|
16 |
"transformers_version": "4.30.2",
|
17 |
"vocab_size": 29025,
|
|
|
1 |
{
|
2 |
+
"add_head": null,
|
3 |
+
"architectures": [
|
4 |
+
"Bilma"
|
5 |
+
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_bilma.BilmaConfig",
|
8 |
+
"TFAutoModel": "modeling_bilma.TFBilma"
|
|
|
9 |
},
|
10 |
"hidden_dropout_prob": 0.1,
|
11 |
"hidden_size": 512,
|
|
|
12 |
"include_top": true,
|
13 |
"model_type": "bilma",
|
14 |
"num_attention_heads": 4,
|
15 |
"num_hidden_layers": 2,
|
16 |
+
"pooling": null,
|
17 |
"seq_max_length": 280,
|
18 |
"transformers_version": "4.30.2",
|
19 |
"vocab_size": 29025,
|
configuration_bilma.py
CHANGED
@@ -7,7 +7,8 @@ class BilmaConfig(PretrainedConfig):
|
|
7 |
self,
|
8 |
weights="MX",
|
9 |
include_top = True,
|
10 |
-
|
|
|
11 |
num_attention_heads: int = 4,
|
12 |
num_hidden_layers: int = 2,
|
13 |
seq_max_length: int = 280,
|
@@ -17,14 +18,20 @@ class BilmaConfig(PretrainedConfig):
|
|
17 |
**kwargs,
|
18 |
):
|
19 |
countries = ["MX"]
|
|
|
20 |
if weights not in countries:
|
21 |
raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
|
22 |
-
if
|
23 |
-
raise ValueError(f"To
|
|
|
|
|
|
|
|
|
24 |
if weights is not None:
|
25 |
self.weights = weights
|
26 |
self.include_top = include_top
|
27 |
-
self.
|
|
|
28 |
self.num_attention_heads = 4
|
29 |
self.num_hidden_layers = 2
|
30 |
self.seq_max_length = 280
|
@@ -36,7 +43,8 @@ class BilmaConfig(PretrainedConfig):
|
|
36 |
|
37 |
self.weights = weights
|
38 |
self.include_top = include_top
|
39 |
-
self.
|
|
|
40 |
self.num_attention_heads = num_attention_heads
|
41 |
self.num_hidden_layers = num_hidden_layers
|
42 |
self.seq_max_length = seq_max_length
|
|
|
7 |
self,
|
8 |
weights="MX",
|
9 |
include_top = True,
|
10 |
+
add_head = None,
|
11 |
+
pooling = None,
|
12 |
num_attention_heads: int = 4,
|
13 |
num_hidden_layers: int = 2,
|
14 |
seq_max_length: int = 280,
|
|
|
18 |
**kwargs,
|
19 |
):
|
20 |
countries = ["MX"]
|
21 |
+
poolings = ["mean", "cls", "max"]
|
22 |
if weights not in countries:
|
23 |
raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
|
24 |
+
if add_head is not None and include_top == True:
|
25 |
+
raise ValueError(f"To add a head, 'include_top' must be False")
|
26 |
+
if pooling is not None and include_top == True:
|
27 |
+
raise ValueError(f"To specify a pooling, 'include_top' must be False")
|
28 |
+
if pooling is not None and pooling not in poolings:
|
29 |
+
raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.")
|
30 |
if weights is not None:
|
31 |
self.weights = weights
|
32 |
self.include_top = include_top
|
33 |
+
self.add_head = add_head
|
34 |
+
self.pooling = pooling
|
35 |
self.num_attention_heads = 4
|
36 |
self.num_hidden_layers = 2
|
37 |
self.seq_max_length = 280
|
|
|
43 |
|
44 |
self.weights = weights
|
45 |
self.include_top = include_top
|
46 |
+
self.add_head = add_head
|
47 |
+
self.pooling = pooling
|
48 |
self.num_attention_heads = num_attention_heads
|
49 |
self.num_hidden_layers = num_hidden_layers
|
50 |
self.seq_max_length = seq_max_length
|
modeling_bilma.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
from transformers import TFPreTrainedModel, PreTrainedTokenizer
|
|
|
2 |
from tensorflow.keras.models import Model, load_model, Sequential
|
3 |
from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
|
4 |
import tensorflow as tf
|
@@ -9,7 +10,7 @@ from typing import Dict
|
|
9 |
import re
|
10 |
import unicodedata
|
11 |
|
12 |
-
from
|
13 |
|
14 |
# copied from preprocessing.py
|
15 |
BLANK = ' '
|
@@ -38,7 +39,7 @@ class TFBilma(TFPreTrainedModel):
|
|
38 |
def __init__(self, config):
|
39 |
self.seq_max_length = config.seq_max_length
|
40 |
self.include_top = config.include_top
|
41 |
-
self.
|
42 |
super().__init__(config)
|
43 |
|
44 |
self.model = bilma(num_enc=config.num_hidden_layers,
|
@@ -49,7 +50,8 @@ class TFBilma(TFPreTrainedModel):
|
|
49 |
vocab_size=config.vocab_size,
|
50 |
rate=config.hidden_dropout_prob,
|
51 |
include_top = config.include_top,
|
52 |
-
|
|
|
53 |
|
54 |
@property
|
55 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
@@ -72,19 +74,26 @@ class TFBilma(TFPreTrainedModel):
|
|
72 |
|
73 |
|
74 |
def call(self, inputs):
|
75 |
-
if isinstance(inputs, Dict):
|
76 |
ins = tf.cast(inputs["input_ids"], tf.float32)
|
77 |
else:
|
78 |
ins = inputs
|
79 |
if self.include_top:
|
80 |
output = {"logits":self.model(ins)}
|
81 |
else:
|
82 |
-
if self.
|
83 |
output = {"last_hidden_state":self.model(ins)}
|
84 |
else:
|
85 |
-
output = {"
|
86 |
return output
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
# copied from bilma_model.py
|
89 |
# --------------------------
|
90 |
|
@@ -115,18 +124,38 @@ def accuracy_function(ignore_id=0):
|
|
115 |
|
116 |
def mean_vectors(inputs, enc_vectors, max_length):
|
117 |
p = tf.where(inputs == 3)
|
118 |
-
count, _ = inputs.shape
|
119 |
pos = tf.transpose(p)[1]
|
120 |
C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
|
121 |
-
#C = tf.ragged.constant([[1]*i for i in pos.numpy()], dtype=tf.float32)
|
122 |
-
#C = C.to_tensor(0, shape=(count, max_length))
|
123 |
C = tf.reshape(C, (-1, max_length, 1))
|
124 |
S = tf.reduce_sum(enc_vectors * C, 1)
|
125 |
x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
|
126 |
return x
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
|
|
|
130 |
capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
|
131 |
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
132 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
@@ -136,14 +165,20 @@ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, voca
|
|
136 |
if include_top:
|
137 |
fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
|
138 |
else:
|
139 |
-
|
140 |
-
|
141 |
-
else:
|
142 |
-
x = enc_output
|
143 |
x = mean_vectors(capt_inputs_ids, x, max_length)
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
|
146 |
-
fin_output =
|
147 |
|
148 |
caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
|
149 |
return caption_model
|
@@ -160,7 +195,6 @@ def load(model_file):
|
|
160 |
#
|
161 |
# Copied from transformer_text.py
|
162 |
# -------------------------------
|
163 |
-
|
164 |
class EncoderBlock(Layer):
|
165 |
def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
|
166 |
super(EncoderBlock, self).__init__(**kwargs)
|
@@ -242,7 +276,6 @@ class DecoderBlock(Layer):
|
|
242 |
|
243 |
return final_output, attn_output1, attn_encoder
|
244 |
|
245 |
-
|
246 |
class Encoder(Layer):
|
247 |
def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
|
248 |
super(Encoder, self).__init__(**kwargs)
|
|
|
1 |
+
from transformers import TFPreTrainedModel, PreTrainedTokenizer, BatchEncoding
|
2 |
+
|
3 |
from tensorflow.keras.models import Model, load_model, Sequential
|
4 |
from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
|
5 |
import tensorflow as tf
|
|
|
10 |
import re
|
11 |
import unicodedata
|
12 |
|
13 |
+
from configuration_bilma import BilmaConfig
|
14 |
|
15 |
# copied from preprocessing.py
|
16 |
BLANK = ' '
|
|
|
39 |
def __init__(self, config):
|
40 |
self.seq_max_length = config.seq_max_length
|
41 |
self.include_top = config.include_top
|
42 |
+
self.add_head = config.add_head
|
43 |
super().__init__(config)
|
44 |
|
45 |
self.model = bilma(num_enc=config.num_hidden_layers,
|
|
|
50 |
vocab_size=config.vocab_size,
|
51 |
rate=config.hidden_dropout_prob,
|
52 |
include_top = config.include_top,
|
53 |
+
add_head = config.add_head,
|
54 |
+
pooling = config.pooling)
|
55 |
|
56 |
@property
|
57 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
|
|
74 |
|
75 |
|
76 |
def call(self, inputs):
|
77 |
+
if isinstance(inputs, Dict) or isinstance(inputs, BatchEncoding):
|
78 |
ins = tf.cast(inputs["input_ids"], tf.float32)
|
79 |
else:
|
80 |
ins = inputs
|
81 |
if self.include_top:
|
82 |
output = {"logits":self.model(ins)}
|
83 |
else:
|
84 |
+
if self.add_head is None:
|
85 |
output = {"last_hidden_state":self.model(ins)}
|
86 |
else:
|
87 |
+
output = {"label":self.model(ins)}
|
88 |
return output
|
89 |
|
90 |
+
def get_loss_function():
|
91 |
+
return loss_funtion()
|
92 |
+
|
93 |
+
def get_acc_function():
|
94 |
+
return accuracy_function()
|
95 |
+
|
96 |
+
|
97 |
# copied from bilma_model.py
|
98 |
# --------------------------
|
99 |
|
|
|
124 |
|
125 |
def mean_vectors(inputs, enc_vectors, max_length):
|
126 |
p = tf.where(inputs == 3)
|
|
|
127 |
pos = tf.transpose(p)[1]
|
128 |
C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
|
|
|
|
|
129 |
C = tf.reshape(C, (-1, max_length, 1))
|
130 |
S = tf.reduce_sum(enc_vectors * C, 1)
|
131 |
x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
|
132 |
return x
|
133 |
|
134 |
+
def mean_diff_vectors(inputs, enc_vectors, max_length):
|
135 |
+
p = tf.where(inputs == 3)
|
136 |
+
pos = tf.transpose(p)[1]
|
137 |
+
C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
|
138 |
+
C = tf.reshape(C, (-1, max_length, 1))
|
139 |
+
vecs = enc_vectors * C
|
140 |
+
S = tf.reduce_sum(vecs, 1)
|
141 |
+
mu = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
|
142 |
+
x = tf.reduce_sum(mu - vecs, 1) / tf.expand_dims(tf.cast(pos, tf.float32), (1))
|
143 |
+
return x
|
144 |
+
|
145 |
+
def max_vectors(inputs, enc_vectors, max_length):
|
146 |
+
p = tf.where(inputs == 3)
|
147 |
+
pos = tf.transpose(p)[1]
|
148 |
+
C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
|
149 |
+
C = tf.reshape(C, (-1, max_length, 1))
|
150 |
+
x = tf.reduce_max(enc_vectors * C, 1)
|
151 |
+
return x
|
152 |
+
|
153 |
+
def cls_vectors(inputs, enc_vectors, max_length):
|
154 |
+
x = tf.squeeze(enc_vectors[:, 0:1, :], axis=1)
|
155 |
+
return x
|
156 |
|
157 |
+
|
158 |
+
def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, add_head=None, pooling=None):
|
159 |
capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
|
160 |
capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
|
161 |
capt_inputs = capt_embedding(capt_inputs_ids)
|
|
|
165 |
if include_top:
|
166 |
fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
|
167 |
else:
|
168 |
+
x = enc_output
|
169 |
+
if pooling == "mean":
|
|
|
|
|
170 |
x = mean_vectors(capt_inputs_ids, x, max_length)
|
171 |
+
elif pooling == "cls":
|
172 |
+
x = cls_vectors(capt_inputs_ids, x, max_length)
|
173 |
+
elif pooling == "max":
|
174 |
+
x = max_vectors(capt_inputs_ids, x, max_length)
|
175 |
+
|
176 |
+
if add_head is None:
|
177 |
+
fin_output = x
|
178 |
+
else:
|
179 |
+
for i, m in enumerate(add_head[:-1]):
|
180 |
x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
|
181 |
+
fin_output = Dense(add_head[-1], use_bias=True, activation="softmax", name=f"bilma/dense_ex_final")(x)
|
182 |
|
183 |
caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
|
184 |
return caption_model
|
|
|
195 |
#
|
196 |
# Copied from transformer_text.py
|
197 |
# -------------------------------
|
|
|
198 |
class EncoderBlock(Layer):
|
199 |
def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
|
200 |
super(EncoderBlock, self).__init__(**kwargs)
|
|
|
276 |
|
277 |
return final_output, attn_output1, attn_encoder
|
278 |
|
|
|
279 |
class Encoder(Layer):
|
280 |
def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
|
281 |
super(Encoder, self).__init__(**kwargs)
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 156875820
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cc8b04b7a93e6fa9eb46a7a30d89f2e97e4b8ac52da1c0e35239ded8a29482c
|
3 |
size 156875820
|