guillermoruiz
commited on
Commit
•
93ed73c
1
Parent(s):
0970c0f
Upload TFBilma
Browse files- config.json +6 -6
- configuration_bilma.py +10 -10
- modeling_bilma.py +6 -6
- tf_model.h5 +1 -1
config.json
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
{
|
2 |
"_name_or_path": "w",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_bilma.BilmaConfig",
|
8 |
-
"TFAutoModel": "modeling_bilma.
|
9 |
},
|
10 |
-
"
|
11 |
-
"
|
12 |
-
"model_type": "
|
13 |
"num_attention_heads": 4,
|
14 |
-
"
|
15 |
"seq_max_length": 280,
|
16 |
"transformers_version": "4.30.2",
|
17 |
"vocab_size": 28949,
|
|
|
1 |
{
|
2 |
"_name_or_path": "w",
|
3 |
"architectures": [
|
4 |
+
"Bilma"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_bilma.BilmaConfig",
|
8 |
+
"TFAutoModel": "modeling_bilma.TFBilma"
|
9 |
},
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 512,
|
12 |
+
"model_type": "TFBilma",
|
13 |
"num_attention_heads": 4,
|
14 |
+
"num_hidden_layers": 2,
|
15 |
"seq_max_length": 280,
|
16 |
"transformers_version": "4.30.2",
|
17 |
"vocab_size": 28949,
|
configuration_bilma.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
from transformers import PretrainedConfig
|
2 |
|
3 |
class BilmaConfig(PretrainedConfig):
|
4 |
-
model_type = "
|
5 |
|
6 |
def __init__(
|
7 |
self,
|
8 |
weights="spanish",
|
9 |
num_attention_heads: int = 4,
|
10 |
-
|
11 |
seq_max_length: int = 280,
|
12 |
-
|
13 |
vocab_size: int = 28949,
|
14 |
-
|
15 |
**kwargs,
|
16 |
):
|
17 |
if weights not in ["spanish", ""]:
|
@@ -19,19 +19,19 @@ class BilmaConfig(PretrainedConfig):
|
|
19 |
if weights == "spanish":
|
20 |
self.weights = weights
|
21 |
self.num_attention_heads = 4
|
22 |
-
self.
|
23 |
self.seq_max_length = 280
|
24 |
-
self.
|
25 |
self.vocab_size = 28949
|
26 |
-
self.
|
27 |
super().__init__(**kwargs)
|
28 |
return
|
29 |
|
30 |
self.weights = weights
|
31 |
self.num_attention_heads = num_attention_heads
|
32 |
-
self.
|
33 |
self.seq_max_length = seq_max_length
|
34 |
-
self.
|
35 |
self.vocab_size = vocab_size
|
36 |
-
self.
|
37 |
super().__init__(**kwargs)
|
|
|
1 |
from transformers import PretrainedConfig
|
2 |
|
3 |
class BilmaConfig(PretrainedConfig):
|
4 |
+
model_type = "TFBilma"
|
5 |
|
6 |
def __init__(
|
7 |
self,
|
8 |
weights="spanish",
|
9 |
num_attention_heads: int = 4,
|
10 |
+
num_hidden_layers: int = 2,
|
11 |
seq_max_length: int = 280,
|
12 |
+
hidden_size: int = 512,
|
13 |
vocab_size: int = 28949,
|
14 |
+
hidden_dropout_prob: float = 0.1,
|
15 |
**kwargs,
|
16 |
):
|
17 |
if weights not in ["spanish", ""]:
|
|
|
19 |
if weights == "spanish":
|
20 |
self.weights = weights
|
21 |
self.num_attention_heads = 4
|
22 |
+
self.num_hidden_layers = 2
|
23 |
self.seq_max_length = 280
|
24 |
+
self.hidden_size = 512
|
25 |
self.vocab_size = 28949
|
26 |
+
self.hidden_dropout_prob = 0.1
|
27 |
super().__init__(**kwargs)
|
28 |
return
|
29 |
|
30 |
self.weights = weights
|
31 |
self.num_attention_heads = num_attention_heads
|
32 |
+
self.num_hidden_layers = num_hidden_layers
|
33 |
self.seq_max_length = seq_max_length
|
34 |
+
self.hidden_size = hidden_size
|
35 |
self.vocab_size = vocab_size
|
36 |
+
self.hidden_dropout_prob = hidden_dropout_prob
|
37 |
super().__init__(**kwargs)
|
modeling_bilma.py
CHANGED
@@ -30,10 +30,10 @@ SYMBOLS = set(";:,.@\\-\"/" + SYMBOLS_)
|
|
30 |
# ------------------
|
31 |
|
32 |
|
33 |
-
class
|
34 |
config_class = BilmaConfig
|
35 |
main_input_name = "capt_input"
|
36 |
-
base_model_prefix = "bilma"
|
37 |
|
38 |
def __init__(self, config):
|
39 |
self.seq_max_length = config.seq_max_length
|
@@ -43,13 +43,13 @@ class Bilma(TFPreTrainedModel):
|
|
43 |
# model_file = str((my_resources / "bilma_dataset_small_epoch-1_part-60.h5").joinpath())
|
44 |
# self.model = bm.load(model_file)
|
45 |
#else:
|
46 |
-
self.model = bilma(num_enc=config.
|
47 |
-
embed_dim=config.
|
48 |
max_length=config.seq_max_length,
|
49 |
num_heads=config.num_attention_heads,
|
50 |
-
ff_dim=config.
|
51 |
vocab_size=config.vocab_size,
|
52 |
-
rate=config.
|
53 |
|
54 |
@property
|
55 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
|
|
30 |
# ------------------
|
31 |
|
32 |
|
33 |
+
class TFBilma(TFPreTrainedModel):
|
34 |
config_class = BilmaConfig
|
35 |
main_input_name = "capt_input"
|
36 |
+
#base_model_prefix = "bilma"
|
37 |
|
38 |
def __init__(self, config):
|
39 |
self.seq_max_length = config.seq_max_length
|
|
|
43 |
# model_file = str((my_resources / "bilma_dataset_small_epoch-1_part-60.h5").joinpath())
|
44 |
# self.model = bm.load(model_file)
|
45 |
#else:
|
46 |
+
self.model = bilma(num_enc=config.num_hidden_layers,
|
47 |
+
embed_dim=config.hidden_size,
|
48 |
max_length=config.seq_max_length,
|
49 |
num_heads=config.num_attention_heads,
|
50 |
+
ff_dim=config.hidden_size,
|
51 |
vocab_size=config.vocab_size,
|
52 |
+
rate=config.hidden_dropout_prob)
|
53 |
|
54 |
@property
|
55 |
def dummy_inputs(self) -> Dict[str, tf.Tensor]:
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 156562964
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5e4fc365e4ff80eba3e9363ebe96b53093ae73fa5a8e48b8e31b43da3a2c6e3
|
3 |
size 156562964
|