KurtMica commited on
Commit
933e371
1 Parent(s): 456edc7

Model files.

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.th filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - allennlp
4
+ ---
5
+
6
+ # TODO: Fill this model card
best.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebd204298a4831976599eae5c8e54c5289ab3c89e6894b66fbb26309b3a9d2c8
3
+ size 507197049
config.json ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "multitask",
4
+ "readers": {
5
+ "ud": {
6
+ "type": "universal_dependencies",
7
+ "token_indexers": {
8
+ "transformer": {
9
+ "type": "pretrained_transformer_mismatched",
10
+ "max_length": 512,
11
+ "model_name": "MLRS/BERTu"
12
+ }
13
+ }
14
+ }
15
+ }
16
+ },
17
+ "model": {
18
+ "type": "multitask",
19
+ "arg_name_mapping": {
20
+ "backbone": {
21
+ "tokens": "text",
22
+ "words": "text"
23
+ }
24
+ },
25
+ "backbone": {
26
+ "type": "embedder_and_mask",
27
+ "text_field_embedder": {
28
+ "token_embedders": {
29
+ "transformer": {
30
+ "type": "pretrained_transformer_mismatched_with_dropout",
31
+ "last_layer_only": false,
32
+ "layer_dropout": 0.1,
33
+ "max_length": 512,
34
+ "model_name": "MLRS/BERTu",
35
+ "tokenizer_kwargs": {},
36
+ "train_parameters": true
37
+ }
38
+ }
39
+ }
40
+ },
41
+ "heads": {
42
+ "ud": {
43
+ "type": "biaffine_parser",
44
+ "arc_representation_dim": 100,
45
+ "dropout": 0.3,
46
+ "encoder": {
47
+ "type": "pass_through",
48
+ "input_dim": 768
49
+ },
50
+ "initializer": {
51
+ "regexes": [
52
+ [
53
+ ".*projection.*weight",
54
+ {
55
+ "type": "xavier_uniform"
56
+ }
57
+ ],
58
+ [
59
+ ".*projection.*bias",
60
+ {
61
+ "type": "zero"
62
+ }
63
+ ],
64
+ [
65
+ ".*tag_bilinear.*weight",
66
+ {
67
+ "type": "xavier_uniform"
68
+ }
69
+ ],
70
+ [
71
+ ".*tag_bilinear.*bias",
72
+ {
73
+ "type": "zero"
74
+ }
75
+ ],
76
+ [
77
+ ".*weight_ih.*",
78
+ {
79
+ "type": "xavier_uniform"
80
+ }
81
+ ],
82
+ [
83
+ ".*weight_hh.*",
84
+ {
85
+ "type": "orthogonal"
86
+ }
87
+ ],
88
+ [
89
+ ".*bias_ih.*",
90
+ {
91
+ "type": "zero"
92
+ }
93
+ ],
94
+ [
95
+ ".*bias_hh.*",
96
+ {
97
+ "type": "lstm_hidden_bias"
98
+ }
99
+ ]
100
+ ]
101
+ },
102
+ "input_dropout": 0.3,
103
+ "tag_representation_dim": 100,
104
+ "use_mst_decoding_for_validation": true
105
+ }
106
+ }
107
+ },
108
+ "train_data_path": {
109
+ "ud": "ud-treebanks-v2.8/UD_Maltese-MUDT/mt_mudt-ud-train.conllu"
110
+ },
111
+ "validation_data_path": {
112
+ "ud": "ud-treebanks-v2.8/UD_Maltese-MUDT/mt_mudt-ud-dev.conllu"
113
+ },
114
+ "trainer": {
115
+ "callbacks": [
116
+ {
117
+ "tensorboard_writer": {
118
+ "should_log_learning_rate": true,
119
+ "should_log_parameter_statistics": true
120
+ },
121
+ "type": "tensorboard"
122
+ }
123
+ ],
124
+ "cuda_device": 0,
125
+ "grad_norm": 5,
126
+ "learning_rate_scheduler": {
127
+ "type": "ulmfit_sqrt",
128
+ "affected_group_count": 2,
129
+ "decay_factor": 0.05,
130
+ "discriminative_fine_tuning": true,
131
+ "factor": 5,
132
+ "gradual_unfreezing": true,
133
+ "model_size": 1,
134
+ "start_step": 9,
135
+ "warmup_steps": 9
136
+ },
137
+ "num_epochs": 200,
138
+ "optimizer": {
139
+ "type": "huggingface_adamw",
140
+ "betas": [
141
+ 0.9,
142
+ 0.999
143
+ ],
144
+ "correct_bias": false,
145
+ "lr": 0.0005,
146
+ "parameter_groups": [
147
+ [
148
+ [
149
+ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
150
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
151
+ ],
152
+ {}
153
+ ],
154
+ [
155
+ [
156
+ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
157
+ "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
158
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
159
+ "text_field_embedder.*transformer_model.pooler.dense.bias"
160
+ ],
161
+ {
162
+ "weight_decay": 0
163
+ }
164
+ ],
165
+ [
166
+ [
167
+ "text_field_embedder.*._scalar_mix.*",
168
+ "text_field_embedder.*transformer_model.pooler.dense.weight",
169
+ "_head_sentinel",
170
+ "head_arc_feedforward._linear_layers.*.weight",
171
+ "child_arc_feedforward._linear_layers.*.weight",
172
+ "head_tag_feedforward._linear_layers.*.weight",
173
+ "child_tag_feedforward._linear_layers.*.weight",
174
+ "arc_attention._weight_matrix",
175
+ "tag_bilinear.weight",
176
+ "tag_projection_layer._module.weight",
177
+ "crf",
178
+ "linear.weight",
179
+ "tagger_linear.weight"
180
+ ],
181
+ {}
182
+ ],
183
+ [
184
+ [
185
+ "head_arc_feedforward._linear_layers.*.bias",
186
+ "child_arc_feedforward._linear_layers.*.bias",
187
+ "head_tag_feedforward._linear_layers.*.bias",
188
+ "child_tag_feedforward._linear_layers.*.bias",
189
+ "arc_attention._bias",
190
+ "tag_bilinear.bias",
191
+ "tag_projection_layer._module.bias",
192
+ "linear.bias",
193
+ "tagger_linear.bias"
194
+ ],
195
+ {
196
+ "weight_decay": 0
197
+ }
198
+ ]
199
+ ],
200
+ "weight_decay": 0.01
201
+ },
202
+ "patience": 20,
203
+ "validation_metric": [
204
+ "+ud_LAS"
205
+ ]
206
+ },
207
+ "data_loader": {
208
+ "type": "multitask",
209
+ "scheduler": {
210
+ "type": "unbalanced_homogeneous_roundrobin",
211
+ "batch_size": 128,
212
+ "dataset_sizes": {
213
+ "ud": 1123
214
+ }
215
+ },
216
+ "shuffle": true
217
+ },
218
+ "numpy_seed": 2460,
219
+ "pytorch_seed": 246,
220
+ "random_seed": 24601,
221
+ "validation_data_loader": {
222
+ "type": "multitask",
223
+ "scheduler": {
224
+ "type": "homogeneous_roundrobin",
225
+ "batch_size": 128
226
+ },
227
+ "shuffle": true
228
+ }
229
+ }
log/train/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e8739db30b40b47551cb2d54384117cb4bd03e4383dabba4c75a036c980abcc
3
+ size 1625837
log/validation/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9bcc47355c8d01911bdbc830a909c902c40b8c2fda6c1097366b4dd7ed1318
3
+ size 17262
metrics.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 59,
3
+ "peak_worker_0_memory_MB": 4387.515625,
4
+ "peak_gpu_0_memory_MB": 15878.68505859375,
5
+ "training_duration": "0:09:54.768662",
6
+ "training_start_epoch": 0,
7
+ "training_epochs": 78,
8
+ "epoch": 78,
9
+ "training_ud_UAS": 0.9929822815050766,
10
+ "training_ud_LAS": 0.9908421262193908,
11
+ "training_ud_UEM": 0.9073909171861086,
12
+ "training_ud_LEM": 0.8797862867319679,
13
+ "training_loss": 0.029289920917815633,
14
+ "training_worker_0_memory_MB": 4387.515625,
15
+ "training_gpu_0_memory_MB": 15878.68505859375,
16
+ "validation_ud_UAS": 0.9297203929612436,
17
+ "validation_ud_LAS": 0.8946345676346756,
18
+ "validation_ud_UEM": 0.3903002309468822,
19
+ "validation_ud_LEM": 0.23325635103926096,
20
+ "validation_loss": 1.351550132036209,
21
+ "best_validation_ud_UAS": 0.9311238259743064,
22
+ "best_validation_ud_LAS": 0.8967936953470798,
23
+ "best_validation_ud_UEM": 0.4018475750577367,
24
+ "best_validation_ud_LEM": 0.2471131639722864,
25
+ "best_validation_loss": 1.1966833174228668
26
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/head_tags.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ punct
2
+ det
3
+ case
4
+ mark
5
+ case:det
6
+ obl
7
+ amod
8
+ nsubj
9
+ root
10
+ nmod:poss
11
+ conj
12
+ obj
13
+ cc
14
+ advmod
15
+ acl
16
+ xcomp
17
+ nmod
18
+ advcl
19
+ ccomp
20
+ cop
21
+ flat:name
22
+ aux
23
+ advmod:neg
24
+ aux:part
25
+ discourse
26
+ list
27
+ obl:arg
28
+ nummod
29
+ flat
30
+ compound
31
+ nsubj:pass
32
+ appos
33
+ aux:neg
34
+ parataxis
35
+ fixed
36
+ aux:pass
37
+ obl:agent
38
+ iobj
39
+ vocative
40
+ csubj
41
+ expl
42
+ dislocated
43
+ cop:expl
44
+ goeswith
45
+ dep
46
+ orphan
47
+ reparandum
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *labels
2
+ *tags
vocabulary/pos.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @@UNKNOWN@@
2
+ NOUN
3
+ VERB
4
+ ADP
5
+ PUNCT
6
+ DET
7
+ SCONJ
8
+ ADJ
9
+ PRON
10
+ PROPN
11
+ ADV
12
+ AUX
13
+ CCONJ
14
+ NUM
15
+ X
16
+ PART
17
+ SYM
18
+ INTJ