guillermoruiz commited on
Commit
4309e8e
1 Parent(s): 4b45dbc

Upload TFBilma

Browse files
Files changed (4) hide show
  1. config.json +1 -0
  2. configuration_bilma.py +4 -1
  3. modeling_bilma.py +8 -4
  4. tf_model.h5 +1 -1
config.json CHANGED
@@ -9,6 +9,7 @@
9
  },
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 512,
 
12
  "model_type": "bilma",
13
  "num_attention_heads": 4,
14
  "num_hidden_layers": 2,
 
9
  },
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 512,
12
+ "include_top": true,
13
  "model_type": "bilma",
14
  "num_attention_heads": 4,
15
  "num_hidden_layers": 2,
configuration_bilma.py CHANGED
@@ -6,6 +6,7 @@ class BilmaConfig(PretrainedConfig):
6
  def __init__(
7
  self,
8
  weights="spanish",
 
9
  num_attention_heads: int = 4,
10
  num_hidden_layers: int = 2,
11
  seq_max_length: int = 280,
@@ -15,9 +16,10 @@ class BilmaConfig(PretrainedConfig):
15
  **kwargs,
16
  ):
17
  if weights not in ["spanish", ""]:
18
- raise ValueError(f"`weights` must be 'spanish' or '', got {weights}.")
19
  if weights == "spanish":
20
  self.weights = weights
 
21
  self.num_attention_heads = 4
22
  self.num_hidden_layers = 2
23
  self.seq_max_length = 280
@@ -28,6 +30,7 @@ class BilmaConfig(PretrainedConfig):
28
  return
29
 
30
  self.weights = weights
 
31
  self.num_attention_heads = num_attention_heads
32
  self.num_hidden_layers = num_hidden_layers
33
  self.seq_max_length = seq_max_length
 
6
  def __init__(
7
  self,
8
  weights="spanish",
9
+ include_top=True,
10
  num_attention_heads: int = 4,
11
  num_hidden_layers: int = 2,
12
  seq_max_length: int = 280,
 
16
  **kwargs,
17
  ):
18
  if weights not in ["spanish", ""]:
19
+ raise ValueError(f"`weights` must be 'spanish', got {weights}.")
20
  if weights == "spanish":
21
  self.weights = weights
22
+ self.include_top = include_top
23
  self.num_attention_heads = 4
24
  self.num_hidden_layers = 2
25
  self.seq_max_length = 280
 
30
  return
31
 
32
  self.weights = weights
33
+ self.include_top = include_top
34
  self.num_attention_heads = num_attention_heads
35
  self.num_hidden_layers = num_hidden_layers
36
  self.seq_max_length = seq_max_length
modeling_bilma.py CHANGED
@@ -9,7 +9,7 @@ from typing import Dict
9
  import re
10
  import unicodedata
11
 
12
- from .configuration_bilma import BilmaConfig
13
 
14
  # copied from preprocessing.py
15
  BLANK = ' '
@@ -49,7 +49,8 @@ class TFBilma(TFPreTrainedModel):
49
  num_heads=config.num_attention_heads,
50
  ff_dim=config.hidden_size,
51
  vocab_size=config.vocab_size,
52
- rate=config.hidden_dropout_prob)
 
53
 
54
  @property
55
  def dummy_inputs(self) -> Dict[str, tf.Tensor]:
@@ -486,14 +487,17 @@ def accuracy_function(ignore_id=0):
486
  return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
487
  return acc_mlm
488
 
489
- def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
490
  capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
491
  capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
492
  capt_inputs = capt_embedding(capt_inputs_ids)
493
 
494
  enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
495
  enc_output = enc(capt_inputs)
496
- fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
 
 
 
497
 
498
  caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
499
  return caption_model
 
9
  import re
10
  import unicodedata
11
 
12
+ from configuration_bilma import BilmaConfig
13
 
14
  # copied from preprocessing.py
15
  BLANK = ' '
 
49
  num_heads=config.num_attention_heads,
50
  ff_dim=config.hidden_size,
51
  vocab_size=config.vocab_size,
52
+ rate=config.hidden_dropout_prob,
53
+ include_top = config.include_top)
54
 
55
  @property
56
  def dummy_inputs(self) -> Dict[str, tf.Tensor]:
 
487
  return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
488
  return acc_mlm
489
 
490
+ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True):
491
  capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
492
  capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
493
  capt_inputs = capt_embedding(capt_inputs_ids)
494
 
495
  enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
496
  enc_output = enc(capt_inputs)
497
+ if include_top:
498
+ fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
499
+ else:
500
+ fin_output = enc_output
501
 
502
  caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
503
  return caption_model
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae6a05836d990960fb2c4a9fa4616d18e0c396c2e075f85cc98d1ab8aecdc7a0
3
  size 156564220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d31e357973be9bf86a3676237280b3ffe852ac994efd62d6eb67e06e36cd039
3
  size 156564220