guillermoruiz commited on
Commit
d32f7f0
1 Parent(s): 81dd73b

Upload TFBilma

Browse files
Files changed (4) hide show
  1. config.json +6 -4
  2. configuration_bilma.py +13 -5
  3. modeling_bilma.py +52 -19
  4. tf_model.h5 +1 -1
config.json CHANGED
@@ -1,17 +1,19 @@
1
  {
2
- "_name_or_path": "bilma_MX",
 
 
 
3
  "auto_map": {
4
  "AutoConfig": "configuration_bilma.BilmaConfig",
5
- "TFAutoModel": "modeling_bilma.TFBilma",
6
- "TFAutoModelForMaskedLM": "modeling_bilma.TFBilma"
7
  },
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 512,
10
- "include_head": null,
11
  "include_top": true,
12
  "model_type": "bilma",
13
  "num_attention_heads": 4,
14
  "num_hidden_layers": 2,
 
15
  "seq_max_length": 280,
16
  "transformers_version": "4.30.2",
17
  "vocab_size": 29025,
 
1
  {
2
+ "add_head": null,
3
+ "architectures": [
4
+ "Bilma"
5
+ ],
6
  "auto_map": {
7
  "AutoConfig": "configuration_bilma.BilmaConfig",
8
+ "TFAutoModel": "modeling_bilma.TFBilma"
 
9
  },
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 512,
 
12
  "include_top": true,
13
  "model_type": "bilma",
14
  "num_attention_heads": 4,
15
  "num_hidden_layers": 2,
16
+ "pooling": null,
17
  "seq_max_length": 280,
18
  "transformers_version": "4.30.2",
19
  "vocab_size": 29025,
configuration_bilma.py CHANGED
@@ -7,7 +7,8 @@ class BilmaConfig(PretrainedConfig):
7
  self,
8
  weights="MX",
9
  include_top = True,
10
- include_head = None,
 
11
  num_attention_heads: int = 4,
12
  num_hidden_layers: int = 2,
13
  seq_max_length: int = 280,
@@ -17,14 +18,20 @@ class BilmaConfig(PretrainedConfig):
17
  **kwargs,
18
  ):
19
  countries = ["MX"]
 
20
  if weights not in countries:
21
  raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
22
- if include_head is not None and include_top == True:
23
- raise ValueError(f"To include a head, 'include_top' must be False")
 
 
 
 
24
  if weights is not None:
25
  self.weights = weights
26
  self.include_top = include_top
27
- self.include_head = include_head
 
28
  self.num_attention_heads = 4
29
  self.num_hidden_layers = 2
30
  self.seq_max_length = 280
@@ -36,7 +43,8 @@ class BilmaConfig(PretrainedConfig):
36
 
37
  self.weights = weights
38
  self.include_top = include_top
39
- self.include_head = include_head
 
40
  self.num_attention_heads = num_attention_heads
41
  self.num_hidden_layers = num_hidden_layers
42
  self.seq_max_length = seq_max_length
 
7
  self,
8
  weights="MX",
9
  include_top = True,
10
+ add_head = None,
11
+ pooling = None,
12
  num_attention_heads: int = 4,
13
  num_hidden_layers: int = 2,
14
  seq_max_length: int = 280,
 
18
  **kwargs,
19
  ):
20
  countries = ["MX"]
21
+ poolings = ["mean", "cls", "max"]
22
  if weights not in countries:
23
  raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
24
+ if add_head is not None and include_top == True:
25
+ raise ValueError(f"To add a head, 'include_top' must be False")
26
+ if pooling is not None and include_top == True:
27
+ raise ValueError(f"To specify a pooling, 'include_top' must be False")
28
+ if pooling is not None and pooling not in poolings:
29
+ raise ValueError(f"`pooling` must be one of {poolings}, got {pooling}.")
30
  if weights is not None:
31
  self.weights = weights
32
  self.include_top = include_top
33
+ self.add_head = add_head
34
+ self.pooling = pooling
35
  self.num_attention_heads = 4
36
  self.num_hidden_layers = 2
37
  self.seq_max_length = 280
 
43
 
44
  self.weights = weights
45
  self.include_top = include_top
46
+ self.add_head = add_head
47
+ self.pooling = pooling
48
  self.num_attention_heads = num_attention_heads
49
  self.num_hidden_layers = num_hidden_layers
50
  self.seq_max_length = seq_max_length
modeling_bilma.py CHANGED
@@ -1,4 +1,5 @@
1
- from transformers import TFPreTrainedModel, PreTrainedTokenizer
 
2
  from tensorflow.keras.models import Model, load_model, Sequential
3
  from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
4
  import tensorflow as tf
@@ -9,7 +10,7 @@ from typing import Dict
9
  import re
10
  import unicodedata
11
 
12
- from .configuration_bilma import BilmaConfig
13
 
14
  # copied from preprocessing.py
15
  BLANK = ' '
@@ -38,7 +39,7 @@ class TFBilma(TFPreTrainedModel):
38
  def __init__(self, config):
39
  self.seq_max_length = config.seq_max_length
40
  self.include_top = config.include_top
41
- self.include_head = config.include_head
42
  super().__init__(config)
43
 
44
  self.model = bilma(num_enc=config.num_hidden_layers,
@@ -49,7 +50,8 @@ class TFBilma(TFPreTrainedModel):
49
  vocab_size=config.vocab_size,
50
  rate=config.hidden_dropout_prob,
51
  include_top = config.include_top,
52
- include_head = config.include_head)
 
53
 
54
  @property
55
  def dummy_inputs(self) -> Dict[str, tf.Tensor]:
@@ -72,19 +74,26 @@ class TFBilma(TFPreTrainedModel):
72
 
73
 
74
  def call(self, inputs):
75
- if isinstance(inputs, Dict):
76
  ins = tf.cast(inputs["input_ids"], tf.float32)
77
  else:
78
  ins = inputs
79
  if self.include_top:
80
  output = {"logits":self.model(ins)}
81
  else:
82
- if self.include_head is None:
83
  output = {"last_hidden_state":self.model(ins)}
84
  else:
85
- output = {"logits":self.model(ins)}
86
  return output
87
 
 
 
 
 
 
 
 
88
  # copied from bilma_model.py
89
  # --------------------------
90
 
@@ -115,18 +124,38 @@ def accuracy_function(ignore_id=0):
115
 
116
  def mean_vectors(inputs, enc_vectors, max_length):
117
  p = tf.where(inputs == 3)
118
- count, _ = inputs.shape
119
  pos = tf.transpose(p)[1]
120
  C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
121
- #C = tf.ragged.constant([[1]*i for i in pos.numpy()], dtype=tf.float32)
122
- #C = C.to_tensor(0, shape=(count, max_length))
123
  C = tf.reshape(C, (-1, max_length, 1))
124
  S = tf.reduce_sum(enc_vectors * C, 1)
125
  x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
126
  return x
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, include_head=None):
 
130
  capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
131
  capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
132
  capt_inputs = capt_embedding(capt_inputs_ids)
@@ -136,14 +165,20 @@ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, voca
136
  if include_top:
137
  fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
138
  else:
139
- if include_head is None:
140
- fin_output = enc_output
141
- else:
142
- x = enc_output
143
  x = mean_vectors(capt_inputs_ids, x, max_length)
144
- for i, m in enumerate(include_head[:-1]):
 
 
 
 
 
 
 
 
145
  x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
146
- fin_output = [Dense(include_head[-1], use_bias=True, activation="softmax", name=f"bilma/dense_ex_final")(x), enc_output]
147
 
148
  caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
149
  return caption_model
@@ -160,7 +195,6 @@ def load(model_file):
160
  #
161
  # Copied from transformer_text.py
162
  # -------------------------------
163
-
164
  class EncoderBlock(Layer):
165
  def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
166
  super(EncoderBlock, self).__init__(**kwargs)
@@ -242,7 +276,6 @@ class DecoderBlock(Layer):
242
 
243
  return final_output, attn_output1, attn_encoder
244
 
245
-
246
  class Encoder(Layer):
247
  def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
248
  super(Encoder, self).__init__(**kwargs)
 
1
+ from transformers import TFPreTrainedModel, PreTrainedTokenizer, BatchEncoding
2
+
3
  from tensorflow.keras.models import Model, load_model, Sequential
4
  from tensorflow.keras.layers import Layer, Dense, concatenate, Input, add, Dropout, LayerNormalization, MultiHeadAttention, Embedding
5
  import tensorflow as tf
 
10
  import re
11
  import unicodedata
12
 
13
+ from configuration_bilma import BilmaConfig
14
 
15
  # copied from preprocessing.py
16
  BLANK = ' '
 
39
  def __init__(self, config):
40
  self.seq_max_length = config.seq_max_length
41
  self.include_top = config.include_top
42
+ self.add_head = config.add_head
43
  super().__init__(config)
44
 
45
  self.model = bilma(num_enc=config.num_hidden_layers,
 
50
  vocab_size=config.vocab_size,
51
  rate=config.hidden_dropout_prob,
52
  include_top = config.include_top,
53
+ add_head = config.add_head,
54
+ pooling = config.pooling)
55
 
56
  @property
57
  def dummy_inputs(self) -> Dict[str, tf.Tensor]:
 
74
 
75
 
76
  def call(self, inputs):
77
+ if isinstance(inputs, Dict) or isinstance(inputs, BatchEncoding):
78
  ins = tf.cast(inputs["input_ids"], tf.float32)
79
  else:
80
  ins = inputs
81
  if self.include_top:
82
  output = {"logits":self.model(ins)}
83
  else:
84
+ if self.add_head is None:
85
  output = {"last_hidden_state":self.model(ins)}
86
  else:
87
+ output = {"label":self.model(ins)}
88
  return output
89
 
90
+ def get_loss_function():
91
+ return loss_funtion()
92
+
93
+ def get_acc_function():
94
+ return accuracy_function()
95
+
96
+
97
  # copied from bilma_model.py
98
  # --------------------------
99
 
 
124
 
125
  def mean_vectors(inputs, enc_vectors, max_length):
126
  p = tf.where(inputs == 3)
 
127
  pos = tf.transpose(p)[1]
128
  C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
 
 
129
  C = tf.reshape(C, (-1, max_length, 1))
130
  S = tf.reduce_sum(enc_vectors * C, 1)
131
  x = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
132
  return x
133
 
134
+ def mean_diff_vectors(inputs, enc_vectors, max_length):
135
+ p = tf.where(inputs == 3)
136
+ pos = tf.transpose(p)[1]
137
+ C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
138
+ C = tf.reshape(C, (-1, max_length, 1))
139
+ vecs = enc_vectors * C
140
+ S = tf.reduce_sum(vecs, 1)
141
+ mu = S / tf.expand_dims(tf.cast(pos, tf.float32), (1))
142
+ x = tf.reduce_sum(mu - vecs, 1) / tf.expand_dims(tf.cast(pos, tf.float32), (1))
143
+ return x
144
+
145
+ def max_vectors(inputs, enc_vectors, max_length):
146
+ p = tf.where(inputs == 3)
147
+ pos = tf.transpose(p)[1]
148
+ C = tf.sequence_mask(pos, maxlen=max_length, dtype=tf.float32)
149
+ C = tf.reshape(C, (-1, max_length, 1))
150
+ x = tf.reduce_max(enc_vectors * C, 1)
151
+ return x
152
+
153
+ def cls_vectors(inputs, enc_vectors, max_length):
154
+ x = tf.squeeze(enc_vectors[:, 0:1, :], axis=1)
155
+ return x
156
 
157
+
158
+ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, add_head=None, pooling=None):
159
  capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
160
  capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
161
  capt_inputs = capt_embedding(capt_inputs_ids)
 
165
  if include_top:
166
  fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
167
  else:
168
+ x = enc_output
169
+ if pooling == "mean":
 
 
170
  x = mean_vectors(capt_inputs_ids, x, max_length)
171
+ elif pooling == "cls":
172
+ x = cls_vectors(capt_inputs_ids, x, max_length)
173
+ elif pooling == "max":
174
+ x = max_vectors(capt_inputs_ids, x, max_length)
175
+
176
+ if add_head is None:
177
+ fin_output = x
178
+ else:
179
+ for i, m in enumerate(add_head[:-1]):
180
  x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
181
+ fin_output = Dense(add_head[-1], use_bias=True, activation="softmax", name=f"bilma/dense_ex_final")(x)
182
 
183
  caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
184
  return caption_model
 
195
  #
196
  # Copied from transformer_text.py
197
  # -------------------------------
 
198
  class EncoderBlock(Layer):
199
  def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
200
  super(EncoderBlock, self).__init__(**kwargs)
 
276
 
277
  return final_output, attn_output1, attn_encoder
278
 
 
279
  class Encoder(Layer):
280
  def __init__(self, n, embed_dim, max_length, num_heads, ff_dim, rate=0.1, **kwargs):
281
  super(Encoder, self).__init__(**kwargs)
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53217c655c71bcc7ab238879925eb57c52f8a2d170554a8ca059c0cadd490c2a
3
  size 156875820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc8b04b7a93e6fa9eb46a7a30d89f2e97e4b8ac52da1c0e35239ded8a29482c
3
  size 156875820