guillermoruiz commited on
Commit
88153b4
1 Parent(s): 21d7546

Upload TFBilma

Browse files
Files changed (4) hide show
  1. config.json +4 -4
  2. configuration_bilma.py +1 -1
  3. modeling_bilma.py +4 -4
  4. tf_model.h5 +2 -2
config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "_name_or_path": "guillermoruiz/bilma",
3
  "architectures": [
4
- "AutoModel"
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "configuration_bilma.BilmaConfig",
8
- "TFAutoModel": "modeling_bilma.Bilma"
9
  },
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 512,
12
- "model_type": "Bilma",
13
  "num_attention_heads": 4,
14
  "num_hidden_layers": 2,
15
  "seq_max_length": 280,
 
1
  {
2
+ "_name_or_path": "bilma",
3
  "architectures": [
4
+ "Bilma"
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "configuration_bilma.BilmaConfig",
8
+ "TFAutoModel": "modeling_bilma.TFBilma"
9
  },
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 512,
12
+ "model_type": "bilma",
13
  "num_attention_heads": 4,
14
  "num_hidden_layers": 2,
15
  "seq_max_length": 280,
configuration_bilma.py CHANGED
@@ -1,7 +1,7 @@
1
  from transformers import PretrainedConfig
2
 
3
  class BilmaConfig(PretrainedConfig):
4
- model_type = "Bilma"
5
 
6
  def __init__(
7
  self,
 
1
  from transformers import PretrainedConfig
2
 
3
  class BilmaConfig(PretrainedConfig):
4
+ model_type = "bilma"
5
 
6
  def __init__(
7
  self,
modeling_bilma.py CHANGED
@@ -90,7 +90,7 @@ class EncoderBlock(Layer):
90
  self.f_d = ff_dim
91
  self.rate = rate
92
 
93
- self.att = MultiHeadAttention(num_heads=num_heads, key_dim=patch_dim)
94
  self.ffn = Sequential(
95
  #[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
96
  # Conv1D(patch_dim, kernel_size=1),]
@@ -98,8 +98,8 @@ class EncoderBlock(Layer):
98
  Dense(patch_dim, name=f"bilma/dense2_{layer_num}")]
99
  )
100
  #self.layernorm0 = LayerNormalization(epsilon=1e-6)
101
- self.layernorm1 = LayerNormalization(epsilon=1e-6)
102
- self.layernorm2 = LayerNormalization(epsilon=1e-6)
103
  self.dropout1 = Dropout(rate)
104
  self.dropout2 = Dropout(rate)
105
 
@@ -172,7 +172,7 @@ class Encoder(Layer):
172
  self.n_h = num_heads
173
  self.f_d = ff_dim
174
  self.rate = rate
175
- self._layers = [EncoderBlock(i, embed_dim, num_heads, ff_dim, rate=0.1) for i in range(n)]
176
  self.pe = positional_encoding(self.max_length, self.embed_dim)
177
 
178
  def get_config(self):
 
90
  self.f_d = ff_dim
91
  self.rate = rate
92
 
93
+ self.att = MultiHeadAttention(num_heads=num_heads, key_dim=patch_dim, name=f"bilma/MHA_{layer_num}")
94
  self.ffn = Sequential(
95
  #[Conv1D(ff_dim, kernel_size=1, activation=tf.nn.gelu),
96
  # Conv1D(patch_dim, kernel_size=1),]
 
98
  Dense(patch_dim, name=f"bilma/dense2_{layer_num}")]
99
  )
100
  #self.layernorm0 = LayerNormalization(epsilon=1e-6)
101
+ self.layernorm1 = LayerNormalization(epsilon=1e-6, name=f"ln1_{layer_num}")
102
+ self.layernorm2 = LayerNormalization(epsilon=1e-6, name=f"ln2_{layer_num}")
103
  self.dropout1 = Dropout(rate)
104
  self.dropout2 = Dropout(rate)
105
 
 
172
  self.n_h = num_heads
173
  self.f_d = ff_dim
174
  self.rate = rate
175
+ self._layers = [EncoderBlock(i, embed_dim, num_heads, ff_dim, rate=0.1, name=f"enc_block_{i}") for i in range(n)]
176
  self.pe = positional_encoding(self.max_length, self.embed_dim)
177
 
178
  def get_config(self):
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fa26aa874976519680daa037613b66a5edc0021c69039e2b4e01f20e499f690
3
- size 156562964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b66af189fde956eb4a944a6473178c837e1e3616230fc6049a11ed1c1b38379
3
+ size 156564220