KurtMica commited on
Commit
73136b2
1 Parent(s): a72f1aa

Model files.

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.th filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - allennlp
4
+ ---
5
+
6
+ # TODO: Fill this model card
best.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00963fc2cb35f01730934d9d8a119e4d7849a7803bc68a217fc7fea6b2b2a82e
3
+ size 504043215
config.json ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "multitask",
4
+ "readers": {
5
+ "sentiment": {
6
+ "type": "sentiment_analysis",
7
+ "token_indexers": {
8
+ "transformer": {
9
+ "type": "pretrained_transformer_mismatched",
10
+ "max_length": 512,
11
+ "model_name": "MLRS/BERTu"
12
+ }
13
+ }
14
+ }
15
+ }
16
+ },
17
+ "model": {
18
+ "type": "multitask",
19
+ "arg_name_mapping": {
20
+ "backbone": {
21
+ "tokens": "text",
22
+ "words": "text"
23
+ }
24
+ },
25
+ "backbone": {
26
+ "type": "embedder_and_mask",
27
+ "text_field_embedder": {
28
+ "token_embedders": {
29
+ "transformer": {
30
+ "type": "pretrained_transformer_mismatched_with_dropout",
31
+ "last_layer_only": false,
32
+ "layer_dropout": 0.1,
33
+ "max_length": 512,
34
+ "model_name": "MLRS/BERTu",
35
+ "tokenizer_kwargs": {},
36
+ "train_parameters": true
37
+ }
38
+ }
39
+ }
40
+ },
41
+ "heads": {
42
+ "sentiment": {
43
+ "type": "linear_classifier",
44
+ "dropout": 0.5,
45
+ "encoder": {
46
+ "type": "pass_through",
47
+ "input_dim": 768
48
+ },
49
+ "initializer": {
50
+ "regexes": [
51
+ [
52
+ ".*projection.*weight",
53
+ {
54
+ "type": "xavier_uniform"
55
+ }
56
+ ],
57
+ [
58
+ ".*projection.*bias",
59
+ {
60
+ "type": "zero"
61
+ }
62
+ ],
63
+ [
64
+ ".*tag_bilinear.*weight",
65
+ {
66
+ "type": "xavier_uniform"
67
+ }
68
+ ],
69
+ [
70
+ ".*tag_bilinear.*bias",
71
+ {
72
+ "type": "zero"
73
+ }
74
+ ],
75
+ [
76
+ ".*weight_ih.*",
77
+ {
78
+ "type": "xavier_uniform"
79
+ }
80
+ ],
81
+ [
82
+ ".*weight_hh.*",
83
+ {
84
+ "type": "orthogonal"
85
+ }
86
+ ],
87
+ [
88
+ ".*bias_ih.*",
89
+ {
90
+ "type": "zero"
91
+ }
92
+ ],
93
+ [
94
+ ".*bias_hh.*",
95
+ {
96
+ "type": "lstm_hidden_bias"
97
+ }
98
+ ]
99
+ ]
100
+ }
101
+ }
102
+ }
103
+ },
104
+ "train_data_path": {
105
+ "sentiment": "sentiment/mt/train.csv"
106
+ },
107
+ "validation_data_path": {
108
+ "sentiment": "sentiment/mt/dev.csv"
109
+ },
110
+ "trainer": {
111
+ "callbacks": [
112
+ {
113
+ "tensorboard_writer": {
114
+ "should_log_learning_rate": true,
115
+ "should_log_parameter_statistics": true
116
+ },
117
+ "type": "tensorboard"
118
+ }
119
+ ],
120
+ "cuda_device": 0,
121
+ "grad_norm": 5,
122
+ "learning_rate_scheduler": {
123
+ "type": "ulmfit_sqrt",
124
+ "affected_group_count": 2,
125
+ "decay_factor": 0.05,
126
+ "discriminative_fine_tuning": true,
127
+ "factor": 5,
128
+ "gradual_unfreezing": true,
129
+ "model_size": 1,
130
+ "start_step": 19,
131
+ "warmup_steps": 19
132
+ },
133
+ "num_epochs": 200,
134
+ "optimizer": {
135
+ "type": "huggingface_adamw",
136
+ "betas": [
137
+ 0.9,
138
+ 0.999
139
+ ],
140
+ "correct_bias": false,
141
+ "lr": 0.0001,
142
+ "parameter_groups": [
143
+ [
144
+ [
145
+ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
146
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
147
+ ],
148
+ {}
149
+ ],
150
+ [
151
+ [
152
+ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
153
+ "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
154
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
155
+ "text_field_embedder.*transformer_model.pooler.dense.bias"
156
+ ],
157
+ {
158
+ "weight_decay": 0
159
+ }
160
+ ],
161
+ [
162
+ [
163
+ "text_field_embedder.*._scalar_mix.*",
164
+ "text_field_embedder.*transformer_model.pooler.dense.weight",
165
+ "_head_sentinel",
166
+ "head_arc_feedforward._linear_layers.*.weight",
167
+ "child_arc_feedforward._linear_layers.*.weight",
168
+ "head_tag_feedforward._linear_layers.*.weight",
169
+ "child_tag_feedforward._linear_layers.*.weight",
170
+ "arc_attention._weight_matrix",
171
+ "tag_bilinear.weight",
172
+ "tag_projection_layer._module.weight",
173
+ "crf",
174
+ "linear.weight",
175
+ "tagger_linear.weight"
176
+ ],
177
+ {}
178
+ ],
179
+ [
180
+ [
181
+ "head_arc_feedforward._linear_layers.*.bias",
182
+ "child_arc_feedforward._linear_layers.*.bias",
183
+ "head_tag_feedforward._linear_layers.*.bias",
184
+ "child_tag_feedforward._linear_layers.*.bias",
185
+ "arc_attention._bias",
186
+ "tag_bilinear.bias",
187
+ "tag_projection_layer._module.bias",
188
+ "linear.bias",
189
+ "tagger_linear.bias"
190
+ ],
191
+ {
192
+ "weight_decay": 0
193
+ }
194
+ ]
195
+ ],
196
+ "weight_decay": 0.01
197
+ },
198
+ "patience": 20,
199
+ "validation_metric": [
200
+ "+sentiment_fscore"
201
+ ]
202
+ },
203
+ "data_loader": {
204
+ "type": "multitask",
205
+ "scheduler": {
206
+ "type": "unbalanced_homogeneous_roundrobin",
207
+ "batch_size": 8,
208
+ "dataset_sizes": {
209
+ "sentiment": 595
210
+ }
211
+ },
212
+ "shuffle": true
213
+ },
214
+ "distributed": {
215
+ "cuda_devices": [
216
+ 0,
217
+ 1,
218
+ 2,
219
+ 3
220
+ ]
221
+ },
222
+ "numpy_seed": 1537,
223
+ "pytorch_seed": 153,
224
+ "random_seed": 15370,
225
+ "validation_data_loader": {
226
+ "type": "multitask",
227
+ "scheduler": {
228
+ "type": "homogeneous_roundrobin",
229
+ "batch_size": 8
230
+ },
231
+ "shuffle": true
232
+ }
233
+ }
log/train/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d2d2aea4e0e0e377895d855a49d7a520bdbe2ccc6f93620a3f388607596a77
3
+ size 1086229
log/validation/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:991af68639bfd44bac316bb77fa3d318334efb87cb11818701f87a658b8f1e0c
3
+ size 4801
metrics.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 3,
3
+ "peak_worker_0_memory_MB": 4755.15625,
4
+ "peak_worker_1_memory_MB": 4183.89453125,
5
+ "peak_worker_2_memory_MB": 4178.01171875,
6
+ "peak_worker_3_memory_MB": 4177.8125,
7
+ "peak_gpu_0_memory_MB": 5976.16162109375,
8
+ "peak_gpu_1_memory_MB": 2261.0439453125,
9
+ "peak_gpu_2_memory_MB": 2281.87451171875,
10
+ "peak_gpu_3_memory_MB": 2118.19384765625,
11
+ "training_duration": "0:01:56.202622",
12
+ "training_start_epoch": 0,
13
+ "training_epochs": 22,
14
+ "epoch": 22,
15
+ "training_sentiment_precision": 1.0,
16
+ "training_sentiment_recall": 1.0,
17
+ "training_sentiment_fscore": 1.0,
18
+ "training_loss": 0.00010353123108966668,
19
+ "training_worker_0_memory_MB": 4755.15625,
20
+ "training_worker_1_memory_MB": 4183.875,
21
+ "training_worker_2_memory_MB": 4177.984375,
22
+ "training_worker_3_memory_MB": 4177.7890625,
23
+ "training_gpu_0_memory_MB": 5976.16162109375,
24
+ "training_gpu_1_memory_MB": 2261.0439453125,
25
+ "training_gpu_2_memory_MB": 2281.87451171875,
26
+ "training_gpu_3_memory_MB": 2118.19384765625,
27
+ "validation_sentiment_precision": 0.9013409614562988,
28
+ "validation_sentiment_recall": 0.8870074152946472,
29
+ "validation_sentiment_fscore": 0.8934837579727173,
30
+ "validation_loss": 0.32014626264572144,
31
+ "best_validation_sentiment_precision": 0.9113408327102661,
32
+ "best_validation_sentiment_recall": 0.9042487740516663,
33
+ "best_validation_sentiment_fscore": 0.9076230525970459,
34
+ "best_validation_loss": 0.22802153353889784
35
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *labels
2
+ *tags