guillermoruiz commited on
Commit
b9233ad
1 Parent(s): 2a371ee

Upload Bilma

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. config.json +4 -2
  3. modeling_bilma.py +13 -13
  4. tf_model.h5 +2 -2
README.md CHANGED
@@ -11,7 +11,7 @@ probably proofread and complete it, then remove this comment. -->
11
 
12
  # bilma
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
 
17
 
 
11
 
12
  # bilma
13
 
14
+ This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
 
17
 
config.json CHANGED
@@ -1,14 +1,16 @@
1
  {
 
2
  "architectures": [
3
- "TFAutoModel"
4
  ],
5
  "auto_map": {
6
  "AutoConfig": "configuration_bilma.BilmaConfig",
7
- "TFAutoModel": "modeling_bilma.Bilma"
8
  },
9
  "drop_rate": 0.1,
10
  "embedding_dim": 512,
11
  "model_type": "bilma",
 
12
  "num_attention_heads": 4,
13
  "num_encoders": 2,
14
  "seq_max_length": 280,
 
1
  {
2
+ "_name_or_path": "w",
3
  "architectures": [
4
+ "lma"
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "configuration_bilma.BilmaConfig",
8
+ "TFAutoModelForMaskedLM": "modeling_bilma.Bilma"
9
  },
10
  "drop_rate": 0.1,
11
  "embedding_dim": 512,
12
  "model_type": "bilma",
13
+ "name": "xxx",
14
  "num_attention_heads": 4,
15
  "num_encoders": 2,
16
  "seq_max_length": 280,
modeling_bilma.py CHANGED
@@ -9,7 +9,7 @@ from typing import Dict
9
  import re
10
  import unicodedata
11
 
12
- from .configuration_bilma import BilmaConfig
13
 
14
  # copied from preprocessing.py
15
  BLANK = ' '
@@ -33,6 +33,7 @@ SYMBOLS = set(";:,.@\\-\"/" + SYMBOLS_)
33
  class Bilma(TFPreTrainedModel):
34
  config_class = BilmaConfig
35
  main_input_name = "capt_input"
 
36
 
37
  def __init__(self, config):
38
  self.seq_max_length = config.seq_max_length
@@ -49,9 +50,7 @@ class Bilma(TFPreTrainedModel):
49
  ff_dim=config.embedding_dim,
50
  vocab_size=config.vocab_size,
51
  rate=config.drop_rate)
52
-
53
- #self.call(np.zeros((1, config.seq_max_length)))
54
-
55
  @property
56
  def dummy_inputs(self) -> Dict[str, tf.Tensor]:
57
 
@@ -83,8 +82,9 @@ class Bilma(TFPreTrainedModel):
83
  # -------------------------------
84
 
85
  class EncoderBlock(Layer):
86
- def __init__(self, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
87
  super(EncoderBlock, self).__init__(**kwargs)
 
88
  self.p_d = patch_dim
89
  self.n_h = num_heads
90
  self.f_d = ff_dim
@@ -94,8 +94,8 @@ class EncoderBlock(Layer):
94
  self.ffn = Sequential(
95
  #[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
96
  # Conv1D(patch_dim, kernel_size=1),]
97
- [Dense(ff_dim, activation=tf.nn.gelu),
98
- Dense(patch_dim),]
99
  )
100
  #self.layernorm0 = LayerNormalization(epsilon=1e-6)
101
  self.layernorm1 = LayerNormalization(epsilon=1e-6)
@@ -105,7 +105,7 @@ class EncoderBlock(Layer):
105
 
106
  def get_config(self):
107
  config = super(EncoderBlock, self).get_config()
108
- config.update({"patch_dim":self.p_d, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
109
  return config
110
 
111
  def call(self, inputs, training=False):
@@ -172,7 +172,7 @@ class Encoder(Layer):
172
  self.n_h = num_heads
173
  self.f_d = ff_dim
174
  self.rate = rate
175
- self._layers = [EncoderBlock(embed_dim, num_heads, ff_dim, rate=0.1) for _ in range(n)]
176
  self.pe = positional_encoding(self.max_length, self.embed_dim)
177
 
178
  def get_config(self):
@@ -485,14 +485,14 @@ def accuracy_function(ignore_id=0):
485
 
486
  def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
487
  capt_inputs_ids = Input(shape=(max_length, ), name='capt_input')
488
- capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False)
489
  capt_inputs = capt_embedding(capt_inputs_ids)
490
 
491
- enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate)
492
  enc_output = enc(capt_inputs)
493
- fin_output = Dense(vocab_size, use_bias=True)(enc_output)
494
 
495
- caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output])
496
  return caption_model
497
 
498
  def load(model_file):
 
9
  import re
10
  import unicodedata
11
 
12
+ from configuration_bilma import BilmaConfig
13
 
14
  # copied from preprocessing.py
15
  BLANK = ' '
 
33
  class Bilma(TFPreTrainedModel):
34
  config_class = BilmaConfig
35
  main_input_name = "capt_input"
36
+ base_model_prefix = "bilma"
37
 
38
  def __init__(self, config):
39
  self.seq_max_length = config.seq_max_length
 
50
  ff_dim=config.embedding_dim,
51
  vocab_size=config.vocab_size,
52
  rate=config.drop_rate)
53
+
 
 
54
  @property
55
  def dummy_inputs(self) -> Dict[str, tf.Tensor]:
56
 
 
82
  # -------------------------------
83
 
84
  class EncoderBlock(Layer):
85
+ def __init__(self, layer_num, patch_dim, num_heads, ff_dim, rate=0.1, **kwargs):
86
  super(EncoderBlock, self).__init__(**kwargs)
87
+ self.ln = layer_num
88
  self.p_d = patch_dim
89
  self.n_h = num_heads
90
  self.f_d = ff_dim
 
94
  self.ffn = Sequential(
95
  #[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
96
  # Conv1D(patch_dim, kernel_size=1),]
97
+ [Dense(ff_dim, activation=tf.nn.gelu, name=f"bilma/dense1_{layer_num}"),
98
+ Dense(patch_dim, name=f"bilma/dense2_{layer_num}")]
99
  )
100
  #self.layernorm0 = LayerNormalization(epsilon=1e-6)
101
  self.layernorm1 = LayerNormalization(epsilon=1e-6)
 
105
 
106
  def get_config(self):
107
  config = super(EncoderBlock, self).get_config()
108
+ config.update({"layer_num":self.ln, "patch_dim":self.p_d, "num_heads":self.n_h, "ff_dim":self.f_d, "rate":self.rate})
109
  return config
110
 
111
  def call(self, inputs, training=False):
 
172
  self.n_h = num_heads
173
  self.f_d = ff_dim
174
  self.rate = rate
175
+ self._layers = [EncoderBlock(i, embed_dim, num_heads, ff_dim, rate=0.1) for i in range(n)]
176
  self.pe = positional_encoding(self.max_length, self.embed_dim)
177
 
178
  def get_config(self):
 
485
 
486
  def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1):
487
  capt_inputs_ids = Input(shape=(max_length, ), name='capt_input')
488
+ capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
489
  capt_inputs = capt_embedding(capt_inputs_ids)
490
 
491
+ enc = Encoder(num_enc, embed_dim, max_length, num_heads, ff_dim, rate=rate, name="bilma/encoder")
492
  enc_output = enc(capt_inputs)
493
+ fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
494
 
495
+ caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
496
  return caption_model
497
 
498
  def load(model_file):
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f932984cd1b53af396b362f3b882736143583d47f4c86f356e7ae359b6bcba7c
3
- size 156561684
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e658b722954addfd4fe9af9d4daaa2386fd98f7838d3c763bd6e7f03c1ed79
3
+ size 156562964