flair-icdar-fr / training.log
stefan-it's picture
Upload ./training.log with huggingface_hub
07e79d1
2023-10-24 16:55:43,926 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,927 Model: "SequenceTagger(
(embeddings): TransformerWordEmbeddings(
(model): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(64001, 768)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
)
(locked_dropout): LockedDropout(p=0.5)
(linear): Linear(in_features=768, out_features=13, bias=True)
(loss_function): CrossEntropyLoss()
)"
2023-10-24 16:55:43,927 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,927 MultiCorpus: 7936 train + 992 dev + 992 test sentences
- NER_ICDAR_EUROPEANA Corpus: 7936 train + 992 dev + 992 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/fr
2023-10-24 16:55:43,927 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,927 Train: 7936 sentences
2023-10-24 16:55:43,927 (train_with_dev=False, train_with_test=False)
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 Training Params:
2023-10-24 16:55:43,928 - learning_rate: "3e-05"
2023-10-24 16:55:43,928 - mini_batch_size: "4"
2023-10-24 16:55:43,928 - max_epochs: "10"
2023-10-24 16:55:43,928 - shuffle: "True"
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 Plugins:
2023-10-24 16:55:43,928 - TensorboardLogger
2023-10-24 16:55:43,928 - LinearScheduler | warmup_fraction: '0.1'
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 Final evaluation on model from best epoch (best-model.pt)
2023-10-24 16:55:43,928 - metric: "('micro avg', 'f1-score')"
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 Computation:
2023-10-24 16:55:43,928 - compute on device: cuda:0
2023-10-24 16:55:43,928 - embedding storage: none
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 Model training base path: "hmbench-icdar/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-3"
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 ----------------------------------------------------------------------------------------------------
2023-10-24 16:55:43,928 Logging anything other than scalars to TensorBoard is currently not supported.
2023-10-24 16:55:56,135 epoch 1 - iter 198/1984 - loss 1.38943938 - time (sec): 12.21 - samples/sec: 1429.33 - lr: 0.000003 - momentum: 0.000000
2023-10-24 16:56:08,160 epoch 1 - iter 396/1984 - loss 0.87338015 - time (sec): 24.23 - samples/sec: 1389.55 - lr: 0.000006 - momentum: 0.000000
2023-10-24 16:56:20,110 epoch 1 - iter 594/1984 - loss 0.65982021 - time (sec): 36.18 - samples/sec: 1357.07 - lr: 0.000009 - momentum: 0.000000
2023-10-24 16:56:32,204 epoch 1 - iter 792/1984 - loss 0.53649000 - time (sec): 48.28 - samples/sec: 1359.51 - lr: 0.000012 - momentum: 0.000000
2023-10-24 16:56:44,187 epoch 1 - iter 990/1984 - loss 0.46413907 - time (sec): 60.26 - samples/sec: 1349.64 - lr: 0.000015 - momentum: 0.000000
2023-10-24 16:56:56,148 epoch 1 - iter 1188/1984 - loss 0.41329533 - time (sec): 72.22 - samples/sec: 1344.79 - lr: 0.000018 - momentum: 0.000000
2023-10-24 16:57:08,421 epoch 1 - iter 1386/1984 - loss 0.36950313 - time (sec): 84.49 - samples/sec: 1347.81 - lr: 0.000021 - momentum: 0.000000
2023-10-24 16:57:20,569 epoch 1 - iter 1584/1984 - loss 0.33972792 - time (sec): 96.64 - samples/sec: 1348.12 - lr: 0.000024 - momentum: 0.000000
2023-10-24 16:57:32,795 epoch 1 - iter 1782/1984 - loss 0.31934420 - time (sec): 108.87 - samples/sec: 1353.02 - lr: 0.000027 - momentum: 0.000000
2023-10-24 16:57:44,981 epoch 1 - iter 1980/1984 - loss 0.30139055 - time (sec): 121.05 - samples/sec: 1351.60 - lr: 0.000030 - momentum: 0.000000
2023-10-24 16:57:45,234 ----------------------------------------------------------------------------------------------------
2023-10-24 16:57:45,234 EPOCH 1 done: loss 0.3010 - lr: 0.000030
2023-10-24 16:57:48,301 DEV : loss 0.08988756686449051 - f1-score (micro avg) 0.7331
2023-10-24 16:57:48,316 saving best model
2023-10-24 16:57:48,785 ----------------------------------------------------------------------------------------------------
2023-10-24 16:58:00,799 epoch 2 - iter 198/1984 - loss 0.11015239 - time (sec): 12.01 - samples/sec: 1357.08 - lr: 0.000030 - momentum: 0.000000
2023-10-24 16:58:12,903 epoch 2 - iter 396/1984 - loss 0.11165230 - time (sec): 24.12 - samples/sec: 1347.03 - lr: 0.000029 - momentum: 0.000000
2023-10-24 16:58:24,975 epoch 2 - iter 594/1984 - loss 0.11377525 - time (sec): 36.19 - samples/sec: 1350.17 - lr: 0.000029 - momentum: 0.000000
2023-10-24 16:58:37,241 epoch 2 - iter 792/1984 - loss 0.11506086 - time (sec): 48.46 - samples/sec: 1353.84 - lr: 0.000029 - momentum: 0.000000
2023-10-24 16:58:49,374 epoch 2 - iter 990/1984 - loss 0.11305507 - time (sec): 60.59 - samples/sec: 1360.09 - lr: 0.000028 - momentum: 0.000000
2023-10-24 16:59:01,455 epoch 2 - iter 1188/1984 - loss 0.11198699 - time (sec): 72.67 - samples/sec: 1358.52 - lr: 0.000028 - momentum: 0.000000
2023-10-24 16:59:13,628 epoch 2 - iter 1386/1984 - loss 0.11098977 - time (sec): 84.84 - samples/sec: 1361.77 - lr: 0.000028 - momentum: 0.000000
2023-10-24 16:59:25,721 epoch 2 - iter 1584/1984 - loss 0.10937736 - time (sec): 96.94 - samples/sec: 1353.64 - lr: 0.000027 - momentum: 0.000000
2023-10-24 16:59:37,810 epoch 2 - iter 1782/1984 - loss 0.11058550 - time (sec): 109.02 - samples/sec: 1350.06 - lr: 0.000027 - momentum: 0.000000
2023-10-24 16:59:50,060 epoch 2 - iter 1980/1984 - loss 0.11259310 - time (sec): 121.27 - samples/sec: 1350.04 - lr: 0.000027 - momentum: 0.000000
2023-10-24 16:59:50,293 ----------------------------------------------------------------------------------------------------
2023-10-24 16:59:50,293 EPOCH 2 done: loss 0.1125 - lr: 0.000027
2023-10-24 16:59:53,708 DEV : loss 0.09264685958623886 - f1-score (micro avg) 0.7178
2023-10-24 16:59:53,723 ----------------------------------------------------------------------------------------------------
2023-10-24 17:00:05,764 epoch 3 - iter 198/1984 - loss 0.07194458 - time (sec): 12.04 - samples/sec: 1344.06 - lr: 0.000026 - momentum: 0.000000
2023-10-24 17:00:17,986 epoch 3 - iter 396/1984 - loss 0.07937385 - time (sec): 24.26 - samples/sec: 1357.81 - lr: 0.000026 - momentum: 0.000000
2023-10-24 17:00:30,051 epoch 3 - iter 594/1984 - loss 0.08581189 - time (sec): 36.33 - samples/sec: 1345.29 - lr: 0.000026 - momentum: 0.000000
2023-10-24 17:00:42,040 epoch 3 - iter 792/1984 - loss 0.08360831 - time (sec): 48.32 - samples/sec: 1340.89 - lr: 0.000025 - momentum: 0.000000
2023-10-24 17:00:54,457 epoch 3 - iter 990/1984 - loss 0.08135214 - time (sec): 60.73 - samples/sec: 1356.85 - lr: 0.000025 - momentum: 0.000000
2023-10-24 17:01:06,626 epoch 3 - iter 1188/1984 - loss 0.08288010 - time (sec): 72.90 - samples/sec: 1355.03 - lr: 0.000025 - momentum: 0.000000
2023-10-24 17:01:18,609 epoch 3 - iter 1386/1984 - loss 0.08319010 - time (sec): 84.89 - samples/sec: 1351.84 - lr: 0.000024 - momentum: 0.000000
2023-10-24 17:01:30,806 epoch 3 - iter 1584/1984 - loss 0.08252110 - time (sec): 97.08 - samples/sec: 1352.96 - lr: 0.000024 - momentum: 0.000000
2023-10-24 17:01:42,960 epoch 3 - iter 1782/1984 - loss 0.08183765 - time (sec): 109.24 - samples/sec: 1353.28 - lr: 0.000024 - momentum: 0.000000
2023-10-24 17:01:54,990 epoch 3 - iter 1980/1984 - loss 0.08232756 - time (sec): 121.27 - samples/sec: 1350.29 - lr: 0.000023 - momentum: 0.000000
2023-10-24 17:01:55,227 ----------------------------------------------------------------------------------------------------
2023-10-24 17:01:55,228 EPOCH 3 done: loss 0.0823 - lr: 0.000023
2023-10-24 17:01:58,337 DEV : loss 0.12425895780324936 - f1-score (micro avg) 0.748
2023-10-24 17:01:58,353 saving best model
2023-10-24 17:01:58,959 ----------------------------------------------------------------------------------------------------
2023-10-24 17:02:10,932 epoch 4 - iter 198/1984 - loss 0.05660289 - time (sec): 11.97 - samples/sec: 1320.40 - lr: 0.000023 - momentum: 0.000000
2023-10-24 17:02:23,327 epoch 4 - iter 396/1984 - loss 0.06137809 - time (sec): 24.37 - samples/sec: 1345.54 - lr: 0.000023 - momentum: 0.000000
2023-10-24 17:02:35,490 epoch 4 - iter 594/1984 - loss 0.05994790 - time (sec): 36.53 - samples/sec: 1347.81 - lr: 0.000022 - momentum: 0.000000
2023-10-24 17:02:47,476 epoch 4 - iter 792/1984 - loss 0.06293485 - time (sec): 48.52 - samples/sec: 1342.62 - lr: 0.000022 - momentum: 0.000000
2023-10-24 17:02:59,577 epoch 4 - iter 990/1984 - loss 0.06173995 - time (sec): 60.62 - samples/sec: 1346.36 - lr: 0.000022 - momentum: 0.000000
2023-10-24 17:03:11,344 epoch 4 - iter 1188/1984 - loss 0.06025572 - time (sec): 72.38 - samples/sec: 1331.03 - lr: 0.000021 - momentum: 0.000000
2023-10-24 17:03:23,543 epoch 4 - iter 1386/1984 - loss 0.06154385 - time (sec): 84.58 - samples/sec: 1341.57 - lr: 0.000021 - momentum: 0.000000
2023-10-24 17:03:35,705 epoch 4 - iter 1584/1984 - loss 0.06060378 - time (sec): 96.74 - samples/sec: 1340.90 - lr: 0.000021 - momentum: 0.000000
2023-10-24 17:03:48,022 epoch 4 - iter 1782/1984 - loss 0.06089033 - time (sec): 109.06 - samples/sec: 1341.13 - lr: 0.000020 - momentum: 0.000000
2023-10-24 17:04:00,486 epoch 4 - iter 1980/1984 - loss 0.06041258 - time (sec): 121.53 - samples/sec: 1346.67 - lr: 0.000020 - momentum: 0.000000
2023-10-24 17:04:00,727 ----------------------------------------------------------------------------------------------------
2023-10-24 17:04:00,727 EPOCH 4 done: loss 0.0603 - lr: 0.000020
2023-10-24 17:04:03,851 DEV : loss 0.17354941368103027 - f1-score (micro avg) 0.7452
2023-10-24 17:04:03,866 ----------------------------------------------------------------------------------------------------
2023-10-24 17:04:16,120 epoch 5 - iter 198/1984 - loss 0.03818146 - time (sec): 12.25 - samples/sec: 1386.56 - lr: 0.000020 - momentum: 0.000000
2023-10-24 17:04:28,224 epoch 5 - iter 396/1984 - loss 0.04086454 - time (sec): 24.36 - samples/sec: 1355.49 - lr: 0.000019 - momentum: 0.000000
2023-10-24 17:04:40,492 epoch 5 - iter 594/1984 - loss 0.04353732 - time (sec): 36.62 - samples/sec: 1353.93 - lr: 0.000019 - momentum: 0.000000
2023-10-24 17:04:52,589 epoch 5 - iter 792/1984 - loss 0.04314748 - time (sec): 48.72 - samples/sec: 1339.17 - lr: 0.000019 - momentum: 0.000000
2023-10-24 17:05:04,686 epoch 5 - iter 990/1984 - loss 0.04590035 - time (sec): 60.82 - samples/sec: 1347.08 - lr: 0.000018 - momentum: 0.000000
2023-10-24 17:05:16,672 epoch 5 - iter 1188/1984 - loss 0.04501377 - time (sec): 72.81 - samples/sec: 1343.70 - lr: 0.000018 - momentum: 0.000000
2023-10-24 17:05:28,989 epoch 5 - iter 1386/1984 - loss 0.04487474 - time (sec): 85.12 - samples/sec: 1348.08 - lr: 0.000018 - momentum: 0.000000
2023-10-24 17:05:41,166 epoch 5 - iter 1584/1984 - loss 0.04660773 - time (sec): 97.30 - samples/sec: 1347.44 - lr: 0.000017 - momentum: 0.000000
2023-10-24 17:05:53,109 epoch 5 - iter 1782/1984 - loss 0.04691609 - time (sec): 109.24 - samples/sec: 1345.32 - lr: 0.000017 - momentum: 0.000000
2023-10-24 17:06:05,294 epoch 5 - iter 1980/1984 - loss 0.04565686 - time (sec): 121.43 - samples/sec: 1347.64 - lr: 0.000017 - momentum: 0.000000
2023-10-24 17:06:05,541 ----------------------------------------------------------------------------------------------------
2023-10-24 17:06:05,541 EPOCH 5 done: loss 0.0458 - lr: 0.000017
2023-10-24 17:06:08,662 DEV : loss 0.21091219782829285 - f1-score (micro avg) 0.7348
2023-10-24 17:06:08,678 ----------------------------------------------------------------------------------------------------
2023-10-24 17:06:20,950 epoch 6 - iter 198/1984 - loss 0.03634734 - time (sec): 12.27 - samples/sec: 1323.11 - lr: 0.000016 - momentum: 0.000000
2023-10-24 17:06:33,123 epoch 6 - iter 396/1984 - loss 0.03594311 - time (sec): 24.44 - samples/sec: 1350.94 - lr: 0.000016 - momentum: 0.000000
2023-10-24 17:06:45,222 epoch 6 - iter 594/1984 - loss 0.03443057 - time (sec): 36.54 - samples/sec: 1356.66 - lr: 0.000016 - momentum: 0.000000
2023-10-24 17:06:57,225 epoch 6 - iter 792/1984 - loss 0.03374808 - time (sec): 48.55 - samples/sec: 1356.48 - lr: 0.000015 - momentum: 0.000000
2023-10-24 17:07:09,782 epoch 6 - iter 990/1984 - loss 0.03467893 - time (sec): 61.10 - samples/sec: 1352.93 - lr: 0.000015 - momentum: 0.000000
2023-10-24 17:07:21,887 epoch 6 - iter 1188/1984 - loss 0.03450278 - time (sec): 73.21 - samples/sec: 1347.25 - lr: 0.000015 - momentum: 0.000000
2023-10-24 17:07:33,957 epoch 6 - iter 1386/1984 - loss 0.03433692 - time (sec): 85.28 - samples/sec: 1341.65 - lr: 0.000014 - momentum: 0.000000
2023-10-24 17:07:46,058 epoch 6 - iter 1584/1984 - loss 0.03334634 - time (sec): 97.38 - samples/sec: 1342.51 - lr: 0.000014 - momentum: 0.000000
2023-10-24 17:07:58,090 epoch 6 - iter 1782/1984 - loss 0.03396585 - time (sec): 109.41 - samples/sec: 1336.77 - lr: 0.000014 - momentum: 0.000000
2023-10-24 17:08:10,180 epoch 6 - iter 1980/1984 - loss 0.03474326 - time (sec): 121.50 - samples/sec: 1347.16 - lr: 0.000013 - momentum: 0.000000
2023-10-24 17:08:10,422 ----------------------------------------------------------------------------------------------------
2023-10-24 17:08:10,422 EPOCH 6 done: loss 0.0347 - lr: 0.000013
2023-10-24 17:08:13,549 DEV : loss 0.1887310892343521 - f1-score (micro avg) 0.7538
2023-10-24 17:08:13,565 saving best model
2023-10-24 17:08:14,156 ----------------------------------------------------------------------------------------------------
2023-10-24 17:08:26,505 epoch 7 - iter 198/1984 - loss 0.02827359 - time (sec): 12.35 - samples/sec: 1359.01 - lr: 0.000013 - momentum: 0.000000
2023-10-24 17:08:38,484 epoch 7 - iter 396/1984 - loss 0.02865567 - time (sec): 24.33 - samples/sec: 1334.37 - lr: 0.000013 - momentum: 0.000000
2023-10-24 17:08:50,648 epoch 7 - iter 594/1984 - loss 0.02475648 - time (sec): 36.49 - samples/sec: 1335.51 - lr: 0.000012 - momentum: 0.000000
2023-10-24 17:09:02,801 epoch 7 - iter 792/1984 - loss 0.02504839 - time (sec): 48.64 - samples/sec: 1324.60 - lr: 0.000012 - momentum: 0.000000
2023-10-24 17:09:14,867 epoch 7 - iter 990/1984 - loss 0.02489200 - time (sec): 60.71 - samples/sec: 1323.82 - lr: 0.000012 - momentum: 0.000000
2023-10-24 17:09:27,252 epoch 7 - iter 1188/1984 - loss 0.02447758 - time (sec): 73.10 - samples/sec: 1338.12 - lr: 0.000011 - momentum: 0.000000
2023-10-24 17:09:39,473 epoch 7 - iter 1386/1984 - loss 0.02467051 - time (sec): 85.32 - samples/sec: 1344.90 - lr: 0.000011 - momentum: 0.000000
2023-10-24 17:09:51,517 epoch 7 - iter 1584/1984 - loss 0.02483831 - time (sec): 97.36 - samples/sec: 1345.72 - lr: 0.000011 - momentum: 0.000000
2023-10-24 17:10:03,564 epoch 7 - iter 1782/1984 - loss 0.02499026 - time (sec): 109.41 - samples/sec: 1346.91 - lr: 0.000010 - momentum: 0.000000
2023-10-24 17:10:15,630 epoch 7 - iter 1980/1984 - loss 0.02535832 - time (sec): 121.47 - samples/sec: 1346.06 - lr: 0.000010 - momentum: 0.000000
2023-10-24 17:10:15,884 ----------------------------------------------------------------------------------------------------
2023-10-24 17:10:15,884 EPOCH 7 done: loss 0.0253 - lr: 0.000010
2023-10-24 17:10:19,005 DEV : loss 0.2231946587562561 - f1-score (micro avg) 0.7507
2023-10-24 17:10:19,021 ----------------------------------------------------------------------------------------------------
2023-10-24 17:10:31,676 epoch 8 - iter 198/1984 - loss 0.01362913 - time (sec): 12.65 - samples/sec: 1371.16 - lr: 0.000010 - momentum: 0.000000
2023-10-24 17:10:43,986 epoch 8 - iter 396/1984 - loss 0.01453365 - time (sec): 24.96 - samples/sec: 1368.42 - lr: 0.000009 - momentum: 0.000000
2023-10-24 17:10:55,962 epoch 8 - iter 594/1984 - loss 0.01371635 - time (sec): 36.94 - samples/sec: 1345.54 - lr: 0.000009 - momentum: 0.000000
2023-10-24 17:11:08,188 epoch 8 - iter 792/1984 - loss 0.01363124 - time (sec): 49.17 - samples/sec: 1338.60 - lr: 0.000009 - momentum: 0.000000
2023-10-24 17:11:20,225 epoch 8 - iter 990/1984 - loss 0.01455714 - time (sec): 61.20 - samples/sec: 1334.80 - lr: 0.000008 - momentum: 0.000000
2023-10-24 17:11:32,426 epoch 8 - iter 1188/1984 - loss 0.01600626 - time (sec): 73.40 - samples/sec: 1346.37 - lr: 0.000008 - momentum: 0.000000
2023-10-24 17:11:44,542 epoch 8 - iter 1386/1984 - loss 0.01607764 - time (sec): 85.52 - samples/sec: 1348.15 - lr: 0.000008 - momentum: 0.000000
2023-10-24 17:11:56,395 epoch 8 - iter 1584/1984 - loss 0.01596874 - time (sec): 97.37 - samples/sec: 1339.42 - lr: 0.000007 - momentum: 0.000000
2023-10-24 17:12:08,622 epoch 8 - iter 1782/1984 - loss 0.01600345 - time (sec): 109.60 - samples/sec: 1340.49 - lr: 0.000007 - momentum: 0.000000
2023-10-24 17:12:20,768 epoch 8 - iter 1980/1984 - loss 0.01670679 - time (sec): 121.75 - samples/sec: 1344.01 - lr: 0.000007 - momentum: 0.000000
2023-10-24 17:12:21,005 ----------------------------------------------------------------------------------------------------
2023-10-24 17:12:21,005 EPOCH 8 done: loss 0.0167 - lr: 0.000007
2023-10-24 17:12:24,126 DEV : loss 0.2260325700044632 - f1-score (micro avg) 0.7547
2023-10-24 17:12:24,142 saving best model
2023-10-24 17:12:24,734 ----------------------------------------------------------------------------------------------------
2023-10-24 17:12:36,802 epoch 9 - iter 198/1984 - loss 0.01167945 - time (sec): 12.07 - samples/sec: 1314.60 - lr: 0.000006 - momentum: 0.000000
2023-10-24 17:12:48,856 epoch 9 - iter 396/1984 - loss 0.01028989 - time (sec): 24.12 - samples/sec: 1310.79 - lr: 0.000006 - momentum: 0.000000
2023-10-24 17:13:00,869 epoch 9 - iter 594/1984 - loss 0.01244219 - time (sec): 36.13 - samples/sec: 1307.66 - lr: 0.000006 - momentum: 0.000000
2023-10-24 17:13:13,465 epoch 9 - iter 792/1984 - loss 0.01152707 - time (sec): 48.73 - samples/sec: 1326.87 - lr: 0.000005 - momentum: 0.000000
2023-10-24 17:13:25,772 epoch 9 - iter 990/1984 - loss 0.01079042 - time (sec): 61.04 - samples/sec: 1339.77 - lr: 0.000005 - momentum: 0.000000
2023-10-24 17:13:38,011 epoch 9 - iter 1188/1984 - loss 0.01084398 - time (sec): 73.28 - samples/sec: 1342.98 - lr: 0.000005 - momentum: 0.000000
2023-10-24 17:13:50,001 epoch 9 - iter 1386/1984 - loss 0.01095687 - time (sec): 85.27 - samples/sec: 1342.23 - lr: 0.000004 - momentum: 0.000000
2023-10-24 17:14:02,064 epoch 9 - iter 1584/1984 - loss 0.01043359 - time (sec): 97.33 - samples/sec: 1342.13 - lr: 0.000004 - momentum: 0.000000
2023-10-24 17:14:14,071 epoch 9 - iter 1782/1984 - loss 0.01070738 - time (sec): 109.34 - samples/sec: 1343.09 - lr: 0.000004 - momentum: 0.000000
2023-10-24 17:14:26,147 epoch 9 - iter 1980/1984 - loss 0.01099379 - time (sec): 121.41 - samples/sec: 1348.31 - lr: 0.000003 - momentum: 0.000000
2023-10-24 17:14:26,381 ----------------------------------------------------------------------------------------------------
2023-10-24 17:14:26,381 EPOCH 9 done: loss 0.0110 - lr: 0.000003
2023-10-24 17:14:29,825 DEV : loss 0.24125918745994568 - f1-score (micro avg) 0.7639
2023-10-24 17:14:29,841 saving best model
2023-10-24 17:14:30,454 ----------------------------------------------------------------------------------------------------
2023-10-24 17:14:42,411 epoch 10 - iter 198/1984 - loss 0.00345145 - time (sec): 11.96 - samples/sec: 1355.66 - lr: 0.000003 - momentum: 0.000000
2023-10-24 17:14:54,503 epoch 10 - iter 396/1984 - loss 0.00398821 - time (sec): 24.05 - samples/sec: 1344.86 - lr: 0.000003 - momentum: 0.000000
2023-10-24 17:15:06,668 epoch 10 - iter 594/1984 - loss 0.00606419 - time (sec): 36.21 - samples/sec: 1355.76 - lr: 0.000002 - momentum: 0.000000
2023-10-24 17:15:18,803 epoch 10 - iter 792/1984 - loss 0.00655537 - time (sec): 48.35 - samples/sec: 1368.69 - lr: 0.000002 - momentum: 0.000000
2023-10-24 17:15:30,830 epoch 10 - iter 990/1984 - loss 0.00666700 - time (sec): 60.38 - samples/sec: 1363.80 - lr: 0.000002 - momentum: 0.000000
2023-10-24 17:15:42,958 epoch 10 - iter 1188/1984 - loss 0.00648338 - time (sec): 72.50 - samples/sec: 1355.06 - lr: 0.000001 - momentum: 0.000000
2023-10-24 17:15:55,093 epoch 10 - iter 1386/1984 - loss 0.00695280 - time (sec): 84.64 - samples/sec: 1354.00 - lr: 0.000001 - momentum: 0.000000
2023-10-24 17:16:07,044 epoch 10 - iter 1584/1984 - loss 0.00694583 - time (sec): 96.59 - samples/sec: 1348.02 - lr: 0.000001 - momentum: 0.000000
2023-10-24 17:16:19,281 epoch 10 - iter 1782/1984 - loss 0.00731465 - time (sec): 108.83 - samples/sec: 1348.88 - lr: 0.000000 - momentum: 0.000000
2023-10-24 17:16:31,552 epoch 10 - iter 1980/1984 - loss 0.00719445 - time (sec): 121.10 - samples/sec: 1351.26 - lr: 0.000000 - momentum: 0.000000
2023-10-24 17:16:31,799 ----------------------------------------------------------------------------------------------------
2023-10-24 17:16:31,799 EPOCH 10 done: loss 0.0072 - lr: 0.000000
2023-10-24 17:16:34,920 DEV : loss 0.24395139515399933 - f1-score (micro avg) 0.7747
2023-10-24 17:16:34,936 saving best model
2023-10-24 17:16:35,995 ----------------------------------------------------------------------------------------------------
2023-10-24 17:16:35,996 Loading model from best epoch ...
2023-10-24 17:16:37,468 SequenceTagger predicts: Dictionary with 13 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG
2023-10-24 17:16:40,539
Results:
- F-score (micro) 0.7847
- F-score (macro) 0.7007
- Accuracy 0.6667
By class:
precision recall f1-score support
LOC 0.8338 0.8580 0.8457 655
PER 0.6923 0.8072 0.7453 223
ORG 0.5800 0.4567 0.5110 127
micro avg 0.7737 0.7960 0.7847 1005
macro avg 0.7020 0.7073 0.7007 1005
weighted avg 0.7704 0.7960 0.7812 1005
2023-10-24 17:16:40,539 ----------------------------------------------------------------------------------------------------