speech-timer / training.log
1
2021-11-14 01:12:49,647 ----------------------------------------------------------------------------------------------------
2
2021-11-14 01:12:49,648 Model: "SequenceTagger(
3
  (embeddings): TransformerWordEmbeddings(
4
    (model): CamembertModel(
5
      (embeddings): RobertaEmbeddings(
6
        (word_embeddings): Embedding(32005, 768, padding_idx=1)
7
        (position_embeddings): Embedding(514, 768, padding_idx=1)
8
        (token_type_embeddings): Embedding(1, 768)
9
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
10
        (dropout): Dropout(p=0.1, inplace=False)
11
      )
12
      (encoder): RobertaEncoder(
13
        (layer): ModuleList(
14
          (0): RobertaLayer(
15
            (attention): RobertaAttention(
16
              (self): RobertaSelfAttention(
17
                (query): Linear(in_features=768, out_features=768, bias=True)
18
                (key): Linear(in_features=768, out_features=768, bias=True)
19
                (value): Linear(in_features=768, out_features=768, bias=True)
20
                (dropout): Dropout(p=0.1, inplace=False)
21
              )
22
              (output): RobertaSelfOutput(
23
                (dense): Linear(in_features=768, out_features=768, bias=True)
24
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
25
                (dropout): Dropout(p=0.1, inplace=False)
26
              )
27
            )
28
            (intermediate): RobertaIntermediate(
29
              (dense): Linear(in_features=768, out_features=3072, bias=True)
30
            )
31
            (output): RobertaOutput(
32
              (dense): Linear(in_features=3072, out_features=768, bias=True)
33
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
34
              (dropout): Dropout(p=0.1, inplace=False)
35
            )
36
          )
37
          (1): RobertaLayer(
38
            (attention): RobertaAttention(
39
              (self): RobertaSelfAttention(
40
                (query): Linear(in_features=768, out_features=768, bias=True)
41
                (key): Linear(in_features=768, out_features=768, bias=True)
42
                (value): Linear(in_features=768, out_features=768, bias=True)
43
                (dropout): Dropout(p=0.1, inplace=False)
44
              )
45
              (output): RobertaSelfOutput(
46
                (dense): Linear(in_features=768, out_features=768, bias=True)
47
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
48
                (dropout): Dropout(p=0.1, inplace=False)
49
              )
50
            )
51
            (intermediate): RobertaIntermediate(
52
              (dense): Linear(in_features=768, out_features=3072, bias=True)
53
            )
54
            (output): RobertaOutput(
55
              (dense): Linear(in_features=3072, out_features=768, bias=True)
56
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
57
              (dropout): Dropout(p=0.1, inplace=False)
58
            )
59
          )
60
          (2): RobertaLayer(
61
            (attention): RobertaAttention(
62
              (self): RobertaSelfAttention(
63
                (query): Linear(in_features=768, out_features=768, bias=True)
64
                (key): Linear(in_features=768, out_features=768, bias=True)
65
                (value): Linear(in_features=768, out_features=768, bias=True)
66
                (dropout): Dropout(p=0.1, inplace=False)
67
              )
68
              (output): RobertaSelfOutput(
69
                (dense): Linear(in_features=768, out_features=768, bias=True)
70
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
71
                (dropout): Dropout(p=0.1, inplace=False)
72
              )
73
            )
74
            (intermediate): RobertaIntermediate(
75
              (dense): Linear(in_features=768, out_features=3072, bias=True)
76
            )
77
            (output): RobertaOutput(
78
              (dense): Linear(in_features=3072, out_features=768, bias=True)
79
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
80
              (dropout): Dropout(p=0.1, inplace=False)
81
            )
82
          )
83
          (3): RobertaLayer(
84
            (attention): RobertaAttention(
85
              (self): RobertaSelfAttention(
86
                (query): Linear(in_features=768, out_features=768, bias=True)
87
                (key): Linear(in_features=768, out_features=768, bias=True)
88
                (value): Linear(in_features=768, out_features=768, bias=True)
89
                (dropout): Dropout(p=0.1, inplace=False)
90
              )
91
              (output): RobertaSelfOutput(
92
                (dense): Linear(in_features=768, out_features=768, bias=True)
93
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
94
                (dropout): Dropout(p=0.1, inplace=False)
95
              )
96
            )
97
            (intermediate): RobertaIntermediate(
98
              (dense): Linear(in_features=768, out_features=3072, bias=True)
99
            )
100
            (output): RobertaOutput(
101
              (dense): Linear(in_features=3072, out_features=768, bias=True)
102
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
103
              (dropout): Dropout(p=0.1, inplace=False)
104
            )
105
          )
106
          (4): RobertaLayer(
107
            (attention): RobertaAttention(
108
              (self): RobertaSelfAttention(
109
                (query): Linear(in_features=768, out_features=768, bias=True)
110
                (key): Linear(in_features=768, out_features=768, bias=True)
111
                (value): Linear(in_features=768, out_features=768, bias=True)
112
                (dropout): Dropout(p=0.1, inplace=False)
113
              )
114
              (output): RobertaSelfOutput(
115
                (dense): Linear(in_features=768, out_features=768, bias=True)
116
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
117
                (dropout): Dropout(p=0.1, inplace=False)
118
              )
119
            )
120
            (intermediate): RobertaIntermediate(
121
              (dense): Linear(in_features=768, out_features=3072, bias=True)
122
            )
123
            (output): RobertaOutput(
124
              (dense): Linear(in_features=3072, out_features=768, bias=True)
125
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
126
              (dropout): Dropout(p=0.1, inplace=False)
127
            )
128
          )
129
          (5): RobertaLayer(
130
            (attention): RobertaAttention(
131
              (self): RobertaSelfAttention(
132
                (query): Linear(in_features=768, out_features=768, bias=True)
133
                (key): Linear(in_features=768, out_features=768, bias=True)
134
                (value): Linear(in_features=768, out_features=768, bias=True)
135
                (dropout): Dropout(p=0.1, inplace=False)
136
              )
137
              (output): RobertaSelfOutput(
138
                (dense): Linear(in_features=768, out_features=768, bias=True)
139
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
140
                (dropout): Dropout(p=0.1, inplace=False)
141
              )
142
            )
143
            (intermediate): RobertaIntermediate(
144
              (dense): Linear(in_features=768, out_features=3072, bias=True)
145
            )
146
            (output): RobertaOutput(
147
              (dense): Linear(in_features=3072, out_features=768, bias=True)
148
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
149
              (dropout): Dropout(p=0.1, inplace=False)
150
            )
151
          )
152
          (6): RobertaLayer(
153
            (attention): RobertaAttention(
154
              (self): RobertaSelfAttention(
155
                (query): Linear(in_features=768, out_features=768, bias=True)
156
                (key): Linear(in_features=768, out_features=768, bias=True)
157
                (value): Linear(in_features=768, out_features=768, bias=True)
158
                (dropout): Dropout(p=0.1, inplace=False)
159
              )
160
              (output): RobertaSelfOutput(
161
                (dense): Linear(in_features=768, out_features=768, bias=True)
162
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
163
                (dropout): Dropout(p=0.1, inplace=False)
164
              )
165
            )
166
            (intermediate): RobertaIntermediate(
167
              (dense): Linear(in_features=768, out_features=3072, bias=True)
168
            )
169
            (output): RobertaOutput(
170
              (dense): Linear(in_features=3072, out_features=768, bias=True)
171
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
172
              (dropout): Dropout(p=0.1, inplace=False)
173
            )
174
          )
175
          (7): RobertaLayer(
176
            (attention): RobertaAttention(
177
              (self): RobertaSelfAttention(
178
                (query): Linear(in_features=768, out_features=768, bias=True)
179
                (key): Linear(in_features=768, out_features=768, bias=True)
180
                (value): Linear(in_features=768, out_features=768, bias=True)
181
                (dropout): Dropout(p=0.1, inplace=False)
182
              )
183
              (output): RobertaSelfOutput(
184
                (dense): Linear(in_features=768, out_features=768, bias=True)
185
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
186
                (dropout): Dropout(p=0.1, inplace=False)
187
              )
188
            )
189
            (intermediate): RobertaIntermediate(
190
              (dense): Linear(in_features=768, out_features=3072, bias=True)
191
            )
192
            (output): RobertaOutput(
193
              (dense): Linear(in_features=3072, out_features=768, bias=True)
194
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
195
              (dropout): Dropout(p=0.1, inplace=False)
196
            )
197
          )
198
          (8): RobertaLayer(
199
            (attention): RobertaAttention(
200
              (self): RobertaSelfAttention(
201
                (query): Linear(in_features=768, out_features=768, bias=True)
202
                (key): Linear(in_features=768, out_features=768, bias=True)
203
                (value): Linear(in_features=768, out_features=768, bias=True)
204
                (dropout): Dropout(p=0.1, inplace=False)
205
              )
206
              (output): RobertaSelfOutput(
207
                (dense): Linear(in_features=768, out_features=768, bias=True)
208
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
209
                (dropout): Dropout(p=0.1, inplace=False)
210
              )
211
            )
212
            (intermediate): RobertaIntermediate(
213
              (dense): Linear(in_features=768, out_features=3072, bias=True)
214
            )
215
            (output): RobertaOutput(
216
              (dense): Linear(in_features=3072, out_features=768, bias=True)
217
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
218
              (dropout): Dropout(p=0.1, inplace=False)
219
            )
220
          )
221
          (9): RobertaLayer(
222
            (attention): RobertaAttention(
223
              (self): RobertaSelfAttention(
224
                (query): Linear(in_features=768, out_features=768, bias=True)
225
                (key): Linear(in_features=768, out_features=768, bias=True)
226
                (value): Linear(in_features=768, out_features=768, bias=True)
227
                (dropout): Dropout(p=0.1, inplace=False)
228
              )
229
              (output): RobertaSelfOutput(
230
                (dense): Linear(in_features=768, out_features=768, bias=True)
231
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
232
                (dropout): Dropout(p=0.1, inplace=False)
233
              )
234
            )
235
            (intermediate): RobertaIntermediate(
236
              (dense): Linear(in_features=768, out_features=3072, bias=True)
237
            )
238
            (output): RobertaOutput(
239
              (dense): Linear(in_features=3072, out_features=768, bias=True)
240
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
241
              (dropout): Dropout(p=0.1, inplace=False)
242
            )
243
          )
244
          (10): RobertaLayer(
245
            (attention): RobertaAttention(
246
              (self): RobertaSelfAttention(
247
                (query): Linear(in_features=768, out_features=768, bias=True)
248
                (key): Linear(in_features=768, out_features=768, bias=True)
249
                (value): Linear(in_features=768, out_features=768, bias=True)
250
                (dropout): Dropout(p=0.1, inplace=False)
251
              )
252
              (output): RobertaSelfOutput(
253
                (dense): Linear(in_features=768, out_features=768, bias=True)
254
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
255
                (dropout): Dropout(p=0.1, inplace=False)
256
              )
257
            )
258
            (intermediate): RobertaIntermediate(
259
              (dense): Linear(in_features=768, out_features=3072, bias=True)
260
            )
261
            (output): RobertaOutput(
262
              (dense): Linear(in_features=3072, out_features=768, bias=True)
263
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
264
              (dropout): Dropout(p=0.1, inplace=False)
265
            )
266
          )
267
          (11): RobertaLayer(
268
            (attention): RobertaAttention(
269
              (self): RobertaSelfAttention(
270
                (query): Linear(in_features=768, out_features=768, bias=True)
271
                (key): Linear(in_features=768, out_features=768, bias=True)
272
                (value): Linear(in_features=768, out_features=768, bias=True)
273
                (dropout): Dropout(p=0.1, inplace=False)
274
              )
275
              (output): RobertaSelfOutput(
276
                (dense): Linear(in_features=768, out_features=768, bias=True)
277
                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
278
                (dropout): Dropout(p=0.1, inplace=False)
279
              )
280
            )
281
            (intermediate): RobertaIntermediate(
282
              (dense): Linear(in_features=768, out_features=3072, bias=True)
283
            )
284
            (output): RobertaOutput(
285
              (dense): Linear(in_features=3072, out_features=768, bias=True)
286
              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
287
              (dropout): Dropout(p=0.1, inplace=False)
288
            )
289
          )
290
        )
291
      )
292
      (pooler): RobertaPooler(
293
        (dense): Linear(in_features=768, out_features=768, bias=True)
294
        (activation): Tanh()
295
      )
296
    )
297
  )
298
  (word_dropout): WordDropout(p=0.05)
299
  (locked_dropout): LockedDropout(p=0.5)
300
  (embedding2nn): Linear(in_features=1536, out_features=1536, bias=True)
301
  (linear): Linear(in_features=1536, out_features=16, bias=True)
302
  (beta): 1.0
303
  (weights): None
304
  (weight_tensor) None
305
)"
306
2021-11-14 01:12:49,649 ----------------------------------------------------------------------------------------------------
307
2021-11-14 01:12:49,649 Corpus: "Corpus: 56700 train + 6300 dev + 7000 test sentences"
308
2021-11-14 01:12:49,650 ----------------------------------------------------------------------------------------------------
309
2021-11-14 01:12:49,650 Parameters:
310
2021-11-14 01:12:49,650  - learning_rate: "5e-05"
311
2021-11-14 01:12:49,651  - mini_batch_size: "64"
312
2021-11-14 01:12:49,651  - patience: "3"
313
2021-11-14 01:12:49,652  - anneal_factor: "0.5"
314
2021-11-14 01:12:49,652  - max_epochs: "8"
315
2021-11-14 01:12:49,653  - shuffle: "True"
316
2021-11-14 01:12:49,653  - train_with_dev: "False"
317
2021-11-14 01:12:49,654  - batch_growth_annealing: "False"
318
2021-11-14 01:12:49,654 ----------------------------------------------------------------------------------------------------
319
2021-11-14 01:12:49,655 Model training base path: "training/flair_ner/14112021_011130"
320
2021-11-14 01:12:49,656 ----------------------------------------------------------------------------------------------------
321
2021-11-14 01:12:49,656 Device: cuda
322
2021-11-14 01:12:49,657 ----------------------------------------------------------------------------------------------------
323
2021-11-14 01:12:49,657 Embeddings storage mode: cpu
324
2021-11-14 01:12:49,659 ----------------------------------------------------------------------------------------------------
325
2021-11-14 01:13:08,832 epoch 1 - iter 88/886 - loss 0.98596606 - samples/sec: 293.89 - lr: 0.000050
326
2021-11-14 01:13:28,224 epoch 1 - iter 176/886 - loss 0.56674940 - samples/sec: 290.55 - lr: 0.000050
327
2021-11-14 01:13:46,801 epoch 1 - iter 264/886 - loss 0.42609266 - samples/sec: 303.28 - lr: 0.000050
328
2021-11-14 01:14:05,497 epoch 1 - iter 352/886 - loss 0.35537700 - samples/sec: 301.36 - lr: 0.000050
329
2021-11-14 01:14:24,349 epoch 1 - iter 440/886 - loss 0.31377922 - samples/sec: 298.86 - lr: 0.000050
330
2021-11-14 01:14:43,031 epoch 1 - iter 528/886 - loss 0.28429453 - samples/sec: 301.58 - lr: 0.000050
331
2021-11-14 01:15:02,142 epoch 1 - iter 616/886 - loss 0.27880202 - samples/sec: 294.85 - lr: 0.000050
332
2021-11-14 01:15:20,814 epoch 1 - iter 704/886 - loss 0.26046120 - samples/sec: 301.80 - lr: 0.000050
333
2021-11-14 01:15:39,918 epoch 1 - iter 792/886 - loss 0.24399388 - samples/sec: 295.02 - lr: 0.000050
334
2021-11-14 01:15:58,554 epoch 1 - iter 880/886 - loss 0.23065481 - samples/sec: 302.35 - lr: 0.000050
335
2021-11-14 01:15:59,827 ----------------------------------------------------------------------------------------------------
336
2021-11-14 01:15:59,828 EPOCH 1 done: loss 0.2298 - lr 0.0000500
337
2021-11-14 01:16:14,731 DEV : loss 0.0016565551050007343 - f1-score (micro avg)  0.9988
338
2021-11-14 01:16:14,821 BAD EPOCHS (no improvement): 0
339
2021-11-14 01:16:14,821 saving best model
340
2021-11-14 01:16:15,220 ----------------------------------------------------------------------------------------------------
341
2021-11-14 01:16:34,035 epoch 2 - iter 88/886 - loss 0.11443562 - samples/sec: 299.51 - lr: 0.000050
342
2021-11-14 01:16:52,711 epoch 2 - iter 176/886 - loss 0.11391112 - samples/sec: 301.72 - lr: 0.000050
343
2021-11-14 01:17:11,410 epoch 2 - iter 264/886 - loss 0.11275449 - samples/sec: 301.34 - lr: 0.000050
344
2021-11-14 01:17:30,059 epoch 2 - iter 352/886 - loss 0.11148830 - samples/sec: 302.14 - lr: 0.000050
345
2021-11-14 01:17:48,869 epoch 2 - iter 440/886 - loss 0.11192871 - samples/sec: 299.56 - lr: 0.000050
346
2021-11-14 01:18:07,635 epoch 2 - iter 528/886 - loss 0.11243003 - samples/sec: 300.27 - lr: 0.000050
347
2021-11-14 01:18:27,756 epoch 2 - iter 616/886 - loss 0.11202302 - samples/sec: 280.03 - lr: 0.000050
348
2021-11-14 01:18:46,477 epoch 2 - iter 704/886 - loss 0.11150461 - samples/sec: 301.00 - lr: 0.000050
349
2021-11-14 01:19:05,152 epoch 2 - iter 792/886 - loss 0.11090826 - samples/sec: 301.81 - lr: 0.000050
350
2021-11-14 01:19:23,958 epoch 2 - iter 880/886 - loss 0.11109339 - samples/sec: 299.71 - lr: 0.000050
351
2021-11-14 01:19:25,234 ----------------------------------------------------------------------------------------------------
352
2021-11-14 01:19:25,234 EPOCH 2 done: loss 0.1110 - lr 0.0000500
353
2021-11-14 01:19:41,637 DEV : loss 0.0011662252945825458 - f1-score (micro avg)  0.9987
354
2021-11-14 01:19:41,739 BAD EPOCHS (no improvement): 1
355
2021-11-14 01:19:41,742 ----------------------------------------------------------------------------------------------------
356
2021-11-14 01:20:00,648 epoch 3 - iter 88/886 - loss 0.11136958 - samples/sec: 298.07 - lr: 0.000050
357
2021-11-14 01:20:19,564 epoch 3 - iter 176/886 - loss 0.11280468 - samples/sec: 297.97 - lr: 0.000050
358
2021-11-14 01:20:38,568 epoch 3 - iter 264/886 - loss 0.11045104 - samples/sec: 296.60 - lr: 0.000050
359
2021-11-14 01:20:57,435 epoch 3 - iter 352/886 - loss 0.10911278 - samples/sec: 298.75 - lr: 0.000050
360
2021-11-14 01:21:16,245 epoch 3 - iter 440/886 - loss 0.10930290 - samples/sec: 299.56 - lr: 0.000050
361
2021-11-14 01:21:35,246 epoch 3 - iter 528/886 - loss 0.10928782 - samples/sec: 296.54 - lr: 0.000050
362
2021-11-14 01:21:54,644 epoch 3 - iter 616/886 - loss 0.10980571 - samples/sec: 290.50 - lr: 0.000050
363
2021-11-14 01:22:13,526 epoch 3 - iter 704/886 - loss 0.10986299 - samples/sec: 298.42 - lr: 0.000050
364
2021-11-14 01:22:32,408 epoch 3 - iter 792/886 - loss 0.11021279 - samples/sec: 298.42 - lr: 0.000050
365
2021-11-14 01:22:51,317 epoch 3 - iter 880/886 - loss 0.11010333 - samples/sec: 297.99 - lr: 0.000050
366
2021-11-14 01:22:52,607 ----------------------------------------------------------------------------------------------------
367
2021-11-14 01:22:52,608 EPOCH 3 done: loss 0.1101 - lr 0.0000500
368
2021-11-14 01:23:10,750 DEV : loss 0.0018373305210843682 - f1-score (micro avg)  0.9977
369
2021-11-14 01:23:10,838 BAD EPOCHS (no improvement): 2
370
2021-11-14 01:23:10,839 ----------------------------------------------------------------------------------------------------
371
2021-11-14 01:23:30,566 epoch 4 - iter 88/886 - loss 0.10992709 - samples/sec: 285.68 - lr: 0.000050
372
2021-11-14 01:23:50,362 epoch 4 - iter 176/886 - loss 0.10809355 - samples/sec: 284.67 - lr: 0.000050
373
2021-11-14 01:24:10,080 epoch 4 - iter 264/886 - loss 0.10844173 - samples/sec: 285.87 - lr: 0.000050
374
2021-11-14 01:24:30,946 epoch 4 - iter 352/886 - loss 0.10836201 - samples/sec: 270.06 - lr: 0.000050
375
2021-11-14 01:24:51,474 epoch 4 - iter 440/886 - loss 0.10794139 - samples/sec: 274.51 - lr: 0.000050
376
2021-11-14 01:25:12,388 epoch 4 - iter 528/886 - loss 0.10878776 - samples/sec: 269.43 - lr: 0.000050
377
2021-11-14 01:25:33,189 epoch 4 - iter 616/886 - loss 0.10894668 - samples/sec: 270.92 - lr: 0.000050
378
2021-11-14 01:25:54,237 epoch 4 - iter 704/886 - loss 0.10934898 - samples/sec: 267.79 - lr: 0.000050
379
2021-11-14 01:26:15,172 epoch 4 - iter 792/886 - loss 0.10987029 - samples/sec: 269.18 - lr: 0.000050
380
2021-11-14 01:26:35,568 epoch 4 - iter 880/886 - loss 0.10994285 - samples/sec: 276.35 - lr: 0.000050
381
2021-11-14 01:26:36,958 ----------------------------------------------------------------------------------------------------
382
2021-11-14 01:26:36,959 EPOCH 4 done: loss 0.1099 - lr 0.0000500
383
2021-11-14 01:26:56,814 DEV : loss 0.0014131164643913507 - f1-score (micro avg)  0.999
384
2021-11-14 01:26:56,904 BAD EPOCHS (no improvement): 0
385
2021-11-14 01:26:56,907 saving best model
386
2021-11-14 01:26:57,746 ----------------------------------------------------------------------------------------------------
387
2021-11-14 01:27:17,983 epoch 5 - iter 88/886 - loss 0.10864585 - samples/sec: 278.47 - lr: 0.000050
388
2021-11-14 01:27:37,584 epoch 5 - iter 176/886 - loss 0.10902201 - samples/sec: 287.48 - lr: 0.000050
389
2021-11-14 01:27:57,285 epoch 5 - iter 264/886 - loss 0.10824347 - samples/sec: 286.02 - lr: 0.000050
390
2021-11-14 01:28:16,752 epoch 5 - iter 352/886 - loss 0.10819784 - samples/sec: 289.50 - lr: 0.000050
391
2021-11-14 01:28:35,991 epoch 5 - iter 440/886 - loss 0.10806523 - samples/sec: 292.89 - lr: 0.000050
392
2021-11-14 01:28:55,004 epoch 5 - iter 528/886 - loss 0.10874710 - samples/sec: 296.35 - lr: 0.000050
393
2021-11-14 01:29:14,287 epoch 5 - iter 616/886 - loss 0.10819233 - samples/sec: 292.22 - lr: 0.000050
394
2021-11-14 01:29:33,882 epoch 5 - iter 704/886 - loss 0.10856081 - samples/sec: 287.57 - lr: 0.000050
395
2021-11-14 01:29:53,701 epoch 5 - iter 792/886 - loss 0.10878005 - samples/sec: 284.31 - lr: 0.000050
396
2021-11-14 01:30:13,249 epoch 5 - iter 880/886 - loss 0.10877142 - samples/sec: 288.26 - lr: 0.000050
397
2021-11-14 01:30:14,542 ----------------------------------------------------------------------------------------------------
398
2021-11-14 01:30:14,543 EPOCH 5 done: loss 0.1088 - lr 0.0000500
399
2021-11-14 01:30:32,668 DEV : loss 0.0017454695189371705 - f1-score (micro avg)  0.9993
400
2021-11-14 01:30:32,754 BAD EPOCHS (no improvement): 0
401
2021-11-14 01:30:32,757 saving best model
402
2021-11-14 01:30:33,509 ----------------------------------------------------------------------------------------------------
403
2021-11-14 01:30:52,836 epoch 6 - iter 88/886 - loss 0.10524382 - samples/sec: 291.60 - lr: 0.000050
404
2021-11-14 01:31:12,126 epoch 6 - iter 176/886 - loss 0.10690102 - samples/sec: 292.11 - lr: 0.000050
405
2021-11-14 01:31:31,803 epoch 6 - iter 264/886 - loss 0.10714116 - samples/sec: 286.38 - lr: 0.000050
406
2021-11-14 01:31:51,724 epoch 6 - iter 352/886 - loss 0.10771656 - samples/sec: 282.86 - lr: 0.000050
407
2021-11-14 01:32:11,047 epoch 6 - iter 440/886 - loss 0.10879216 - samples/sec: 291.61 - lr: 0.000050
408
2021-11-14 01:32:30,353 epoch 6 - iter 528/886 - loss 0.10867079 - samples/sec: 291.88 - lr: 0.000050
409
2021-11-14 01:32:49,795 epoch 6 - iter 616/886 - loss 0.10904316 - samples/sec: 289.82 - lr: 0.000050
410
2021-11-14 01:33:09,113 epoch 6 - iter 704/886 - loss 0.10898605 - samples/sec: 291.70 - lr: 0.000050
411
2021-11-14 01:33:28,312 epoch 6 - iter 792/886 - loss 0.10895071 - samples/sec: 293.49 - lr: 0.000050
412
2021-11-14 01:33:48,207 epoch 6 - iter 880/886 - loss 0.10936169 - samples/sec: 283.23 - lr: 0.000050
413
2021-11-14 01:33:49,618 ----------------------------------------------------------------------------------------------------
414
2021-11-14 01:33:49,619 EPOCH 6 done: loss 0.1094 - lr 0.0000500
415
2021-11-14 01:34:08,307 DEV : loss 0.0012574659194797277 - f1-score (micro avg)  0.9991
416
2021-11-14 01:34:08,393 BAD EPOCHS (no improvement): 1
417
2021-11-14 01:34:08,396 ----------------------------------------------------------------------------------------------------
418
2021-11-14 01:34:28,456 epoch 7 - iter 88/886 - loss 0.10772567 - samples/sec: 280.95 - lr: 0.000050
419
2021-11-14 01:34:48,077 epoch 7 - iter 176/886 - loss 0.10831423 - samples/sec: 287.18 - lr: 0.000050
420
2021-11-14 01:35:07,762 epoch 7 - iter 264/886 - loss 0.10889045 - samples/sec: 286.25 - lr: 0.000050
421
2021-11-14 01:35:27,543 epoch 7 - iter 352/886 - loss 0.10923627 - samples/sec: 284.87 - lr: 0.000050
422
2021-11-14 01:35:47,152 epoch 7 - iter 440/886 - loss 0.10891691 - samples/sec: 287.36 - lr: 0.000050
423
2021-11-14 01:36:06,760 epoch 7 - iter 528/886 - loss 0.10886164 - samples/sec: 287.38 - lr: 0.000050
424
2021-11-14 01:36:26,264 epoch 7 - iter 616/886 - loss 0.10925453 - samples/sec: 288.92 - lr: 0.000050
425
2021-11-14 01:36:45,846 epoch 7 - iter 704/886 - loss 0.10944528 - samples/sec: 287.78 - lr: 0.000050
426
2021-11-14 01:37:05,161 epoch 7 - iter 792/886 - loss 0.10963480 - samples/sec: 291.83 - lr: 0.000050
427
2021-11-14 01:37:25,344 epoch 7 - iter 880/886 - loss 0.10941620 - samples/sec: 279.19 - lr: 0.000050
428
2021-11-14 01:37:26,675 ----------------------------------------------------------------------------------------------------
429
2021-11-14 01:37:26,676 EPOCH 7 done: loss 0.1093 - lr 0.0000500
430
2021-11-14 01:37:46,332 DEV : loss 0.0008941686828620732 - f1-score (micro avg)  0.9994
431
2021-11-14 01:37:46,425 BAD EPOCHS (no improvement): 0
432
2021-11-14 01:37:46,428 saving best model
433
2021-11-14 01:37:47,268 ----------------------------------------------------------------------------------------------------
434
2021-11-14 01:38:06,968 epoch 8 - iter 88/886 - loss 0.10842313 - samples/sec: 286.09 - lr: 0.000050
435
2021-11-14 01:38:26,508 epoch 8 - iter 176/886 - loss 0.10686590 - samples/sec: 288.47 - lr: 0.000050
436
2021-11-14 01:38:45,880 epoch 8 - iter 264/886 - loss 0.10866318 - samples/sec: 290.87 - lr: 0.000050
437
2021-11-14 01:39:05,447 epoch 8 - iter 352/886 - loss 0.10886654 - samples/sec: 287.98 - lr: 0.000050
438
2021-11-14 01:39:25,039 epoch 8 - iter 440/886 - loss 0.10893653 - samples/sec: 287.62 - lr: 0.000050
439
2021-11-14 01:39:44,508 epoch 8 - iter 528/886 - loss 0.10845487 - samples/sec: 289.43 - lr: 0.000050
440
2021-11-14 01:40:04,009 epoch 8 - iter 616/886 - loss 0.10849658 - samples/sec: 288.96 - lr: 0.000050
441
2021-11-14 01:40:23,270 epoch 8 - iter 704/886 - loss 0.10852857 - samples/sec: 292.55 - lr: 0.000050
442
2021-11-14 01:40:42,423 epoch 8 - iter 792/886 - loss 0.10825218 - samples/sec: 294.21 - lr: 0.000050
443
2021-11-14 01:41:01,605 epoch 8 - iter 880/886 - loss 0.10839605 - samples/sec: 293.76 - lr: 0.000050
444
2021-11-14 01:41:02,928 ----------------------------------------------------------------------------------------------------
445
2021-11-14 01:41:02,929 EPOCH 8 done: loss 0.1084 - lr 0.0000500
446
2021-11-14 01:41:22,401 DEV : loss 0.0013162429677322507 - f1-score (micro avg)  0.9994
447
2021-11-14 01:41:22,539 BAD EPOCHS (no improvement): 1
448
2021-11-14 01:41:23,014 ----------------------------------------------------------------------------------------------------
449
2021-11-14 01:41:23,015 loading file training/flair_ner/14112021_011130/best-model.pt
450
2021-11-14 01:41:42,464 0.9996	0.9996	0.9996	0.9996
451
2021-11-14 01:41:42,465 
452
Results:
453
- F-score (micro) 0.9996
454
- F-score (macro) 0.9994
455
- Accuracy 0.9996
456
457
By class:
458
                 precision    recall  f1-score   support
459
460
      nb_rounds     1.0000    0.9988    0.9994      6894
461
 duration_wt_sd     1.0000    1.0000    1.0000      3288
462
duration_br_min     0.9982    1.0000    0.9991      3251
463
duration_wt_min     1.0000    1.0000    1.0000      2677
464
 duration_br_sd     0.9995    1.0000    0.9998      2080
465
 duration_wt_hr     1.0000    1.0000    1.0000      1050
466
 duration_br_hr     0.9957    1.0000    0.9978       230
467
468
      micro avg     0.9996    0.9996    0.9996     19470
469
      macro avg     0.9990    0.9998    0.9994     19470
470
   weighted avg     0.9996    0.9996    0.9996     19470
471
    samples avg     0.9996    0.9996    0.9996     19470
472
473
2021-11-14 01:41:42,466 ----------------------------------------------------------------------------------------------------
474