|
2023-10-24 16:55:43,926 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,927 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 16:55:43,927 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,927 MultiCorpus: 7936 train + 992 dev + 992 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 7936 train + 992 dev + 992 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/fr |
|
2023-10-24 16:55:43,927 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,927 Train: 7936 sentences |
|
2023-10-24 16:55:43,927 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 Training Params: |
|
2023-10-24 16:55:43,928 - learning_rate: "3e-05" |
|
2023-10-24 16:55:43,928 - mini_batch_size: "4" |
|
2023-10-24 16:55:43,928 - max_epochs: "10" |
|
2023-10-24 16:55:43,928 - shuffle: "True" |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 Plugins: |
|
2023-10-24 16:55:43,928 - TensorboardLogger |
|
2023-10-24 16:55:43,928 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 16:55:43,928 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 Computation: |
|
2023-10-24 16:55:43,928 - compute on device: cuda:0 |
|
2023-10-24 16:55:43,928 - embedding storage: none |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 Model training base path: "hmbench-icdar/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-3" |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:55:43,928 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 16:55:56,135 epoch 1 - iter 198/1984 - loss 1.38943938 - time (sec): 12.21 - samples/sec: 1429.33 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 16:56:08,160 epoch 1 - iter 396/1984 - loss 0.87338015 - time (sec): 24.23 - samples/sec: 1389.55 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 16:56:20,110 epoch 1 - iter 594/1984 - loss 0.65982021 - time (sec): 36.18 - samples/sec: 1357.07 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 16:56:32,204 epoch 1 - iter 792/1984 - loss 0.53649000 - time (sec): 48.28 - samples/sec: 1359.51 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 16:56:44,187 epoch 1 - iter 990/1984 - loss 0.46413907 - time (sec): 60.26 - samples/sec: 1349.64 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 16:56:56,148 epoch 1 - iter 1188/1984 - loss 0.41329533 - time (sec): 72.22 - samples/sec: 1344.79 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 16:57:08,421 epoch 1 - iter 1386/1984 - loss 0.36950313 - time (sec): 84.49 - samples/sec: 1347.81 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 16:57:20,569 epoch 1 - iter 1584/1984 - loss 0.33972792 - time (sec): 96.64 - samples/sec: 1348.12 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 16:57:32,795 epoch 1 - iter 1782/1984 - loss 0.31934420 - time (sec): 108.87 - samples/sec: 1353.02 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 16:57:44,981 epoch 1 - iter 1980/1984 - loss 0.30139055 - time (sec): 121.05 - samples/sec: 1351.60 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 16:57:45,234 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:57:45,234 EPOCH 1 done: loss 0.3010 - lr: 0.000030 |
|
2023-10-24 16:57:48,301 DEV : loss 0.08988756686449051 - f1-score (micro avg) 0.7331 |
|
2023-10-24 16:57:48,316 saving best model |
|
2023-10-24 16:57:48,785 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:58:00,799 epoch 2 - iter 198/1984 - loss 0.11015239 - time (sec): 12.01 - samples/sec: 1357.08 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 16:58:12,903 epoch 2 - iter 396/1984 - loss 0.11165230 - time (sec): 24.12 - samples/sec: 1347.03 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 16:58:24,975 epoch 2 - iter 594/1984 - loss 0.11377525 - time (sec): 36.19 - samples/sec: 1350.17 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 16:58:37,241 epoch 2 - iter 792/1984 - loss 0.11506086 - time (sec): 48.46 - samples/sec: 1353.84 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 16:58:49,374 epoch 2 - iter 990/1984 - loss 0.11305507 - time (sec): 60.59 - samples/sec: 1360.09 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 16:59:01,455 epoch 2 - iter 1188/1984 - loss 0.11198699 - time (sec): 72.67 - samples/sec: 1358.52 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 16:59:13,628 epoch 2 - iter 1386/1984 - loss 0.11098977 - time (sec): 84.84 - samples/sec: 1361.77 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 16:59:25,721 epoch 2 - iter 1584/1984 - loss 0.10937736 - time (sec): 96.94 - samples/sec: 1353.64 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 16:59:37,810 epoch 2 - iter 1782/1984 - loss 0.11058550 - time (sec): 109.02 - samples/sec: 1350.06 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 16:59:50,060 epoch 2 - iter 1980/1984 - loss 0.11259310 - time (sec): 121.27 - samples/sec: 1350.04 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 16:59:50,293 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 16:59:50,293 EPOCH 2 done: loss 0.1125 - lr: 0.000027 |
|
2023-10-24 16:59:53,708 DEV : loss 0.09264685958623886 - f1-score (micro avg) 0.7178 |
|
2023-10-24 16:59:53,723 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:00:05,764 epoch 3 - iter 198/1984 - loss 0.07194458 - time (sec): 12.04 - samples/sec: 1344.06 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 17:00:17,986 epoch 3 - iter 396/1984 - loss 0.07937385 - time (sec): 24.26 - samples/sec: 1357.81 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 17:00:30,051 epoch 3 - iter 594/1984 - loss 0.08581189 - time (sec): 36.33 - samples/sec: 1345.29 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 17:00:42,040 epoch 3 - iter 792/1984 - loss 0.08360831 - time (sec): 48.32 - samples/sec: 1340.89 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 17:00:54,457 epoch 3 - iter 990/1984 - loss 0.08135214 - time (sec): 60.73 - samples/sec: 1356.85 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 17:01:06,626 epoch 3 - iter 1188/1984 - loss 0.08288010 - time (sec): 72.90 - samples/sec: 1355.03 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 17:01:18,609 epoch 3 - iter 1386/1984 - loss 0.08319010 - time (sec): 84.89 - samples/sec: 1351.84 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 17:01:30,806 epoch 3 - iter 1584/1984 - loss 0.08252110 - time (sec): 97.08 - samples/sec: 1352.96 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 17:01:42,960 epoch 3 - iter 1782/1984 - loss 0.08183765 - time (sec): 109.24 - samples/sec: 1353.28 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 17:01:54,990 epoch 3 - iter 1980/1984 - loss 0.08232756 - time (sec): 121.27 - samples/sec: 1350.29 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 17:01:55,227 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:01:55,228 EPOCH 3 done: loss 0.0823 - lr: 0.000023 |
|
2023-10-24 17:01:58,337 DEV : loss 0.12425895780324936 - f1-score (micro avg) 0.748 |
|
2023-10-24 17:01:58,353 saving best model |
|
2023-10-24 17:01:58,959 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:02:10,932 epoch 4 - iter 198/1984 - loss 0.05660289 - time (sec): 11.97 - samples/sec: 1320.40 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 17:02:23,327 epoch 4 - iter 396/1984 - loss 0.06137809 - time (sec): 24.37 - samples/sec: 1345.54 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 17:02:35,490 epoch 4 - iter 594/1984 - loss 0.05994790 - time (sec): 36.53 - samples/sec: 1347.81 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 17:02:47,476 epoch 4 - iter 792/1984 - loss 0.06293485 - time (sec): 48.52 - samples/sec: 1342.62 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 17:02:59,577 epoch 4 - iter 990/1984 - loss 0.06173995 - time (sec): 60.62 - samples/sec: 1346.36 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 17:03:11,344 epoch 4 - iter 1188/1984 - loss 0.06025572 - time (sec): 72.38 - samples/sec: 1331.03 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 17:03:23,543 epoch 4 - iter 1386/1984 - loss 0.06154385 - time (sec): 84.58 - samples/sec: 1341.57 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 17:03:35,705 epoch 4 - iter 1584/1984 - loss 0.06060378 - time (sec): 96.74 - samples/sec: 1340.90 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 17:03:48,022 epoch 4 - iter 1782/1984 - loss 0.06089033 - time (sec): 109.06 - samples/sec: 1341.13 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 17:04:00,486 epoch 4 - iter 1980/1984 - loss 0.06041258 - time (sec): 121.53 - samples/sec: 1346.67 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 17:04:00,727 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:04:00,727 EPOCH 4 done: loss 0.0603 - lr: 0.000020 |
|
2023-10-24 17:04:03,851 DEV : loss 0.17354941368103027 - f1-score (micro avg) 0.7452 |
|
2023-10-24 17:04:03,866 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:04:16,120 epoch 5 - iter 198/1984 - loss 0.03818146 - time (sec): 12.25 - samples/sec: 1386.56 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 17:04:28,224 epoch 5 - iter 396/1984 - loss 0.04086454 - time (sec): 24.36 - samples/sec: 1355.49 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 17:04:40,492 epoch 5 - iter 594/1984 - loss 0.04353732 - time (sec): 36.62 - samples/sec: 1353.93 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 17:04:52,589 epoch 5 - iter 792/1984 - loss 0.04314748 - time (sec): 48.72 - samples/sec: 1339.17 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 17:05:04,686 epoch 5 - iter 990/1984 - loss 0.04590035 - time (sec): 60.82 - samples/sec: 1347.08 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 17:05:16,672 epoch 5 - iter 1188/1984 - loss 0.04501377 - time (sec): 72.81 - samples/sec: 1343.70 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 17:05:28,989 epoch 5 - iter 1386/1984 - loss 0.04487474 - time (sec): 85.12 - samples/sec: 1348.08 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 17:05:41,166 epoch 5 - iter 1584/1984 - loss 0.04660773 - time (sec): 97.30 - samples/sec: 1347.44 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 17:05:53,109 epoch 5 - iter 1782/1984 - loss 0.04691609 - time (sec): 109.24 - samples/sec: 1345.32 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 17:06:05,294 epoch 5 - iter 1980/1984 - loss 0.04565686 - time (sec): 121.43 - samples/sec: 1347.64 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 17:06:05,541 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:06:05,541 EPOCH 5 done: loss 0.0458 - lr: 0.000017 |
|
2023-10-24 17:06:08,662 DEV : loss 0.21091219782829285 - f1-score (micro avg) 0.7348 |
|
2023-10-24 17:06:08,678 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:06:20,950 epoch 6 - iter 198/1984 - loss 0.03634734 - time (sec): 12.27 - samples/sec: 1323.11 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 17:06:33,123 epoch 6 - iter 396/1984 - loss 0.03594311 - time (sec): 24.44 - samples/sec: 1350.94 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 17:06:45,222 epoch 6 - iter 594/1984 - loss 0.03443057 - time (sec): 36.54 - samples/sec: 1356.66 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 17:06:57,225 epoch 6 - iter 792/1984 - loss 0.03374808 - time (sec): 48.55 - samples/sec: 1356.48 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 17:07:09,782 epoch 6 - iter 990/1984 - loss 0.03467893 - time (sec): 61.10 - samples/sec: 1352.93 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 17:07:21,887 epoch 6 - iter 1188/1984 - loss 0.03450278 - time (sec): 73.21 - samples/sec: 1347.25 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 17:07:33,957 epoch 6 - iter 1386/1984 - loss 0.03433692 - time (sec): 85.28 - samples/sec: 1341.65 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 17:07:46,058 epoch 6 - iter 1584/1984 - loss 0.03334634 - time (sec): 97.38 - samples/sec: 1342.51 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 17:07:58,090 epoch 6 - iter 1782/1984 - loss 0.03396585 - time (sec): 109.41 - samples/sec: 1336.77 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 17:08:10,180 epoch 6 - iter 1980/1984 - loss 0.03474326 - time (sec): 121.50 - samples/sec: 1347.16 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 17:08:10,422 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:08:10,422 EPOCH 6 done: loss 0.0347 - lr: 0.000013 |
|
2023-10-24 17:08:13,549 DEV : loss 0.1887310892343521 - f1-score (micro avg) 0.7538 |
|
2023-10-24 17:08:13,565 saving best model |
|
2023-10-24 17:08:14,156 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:08:26,505 epoch 7 - iter 198/1984 - loss 0.02827359 - time (sec): 12.35 - samples/sec: 1359.01 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 17:08:38,484 epoch 7 - iter 396/1984 - loss 0.02865567 - time (sec): 24.33 - samples/sec: 1334.37 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 17:08:50,648 epoch 7 - iter 594/1984 - loss 0.02475648 - time (sec): 36.49 - samples/sec: 1335.51 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 17:09:02,801 epoch 7 - iter 792/1984 - loss 0.02504839 - time (sec): 48.64 - samples/sec: 1324.60 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 17:09:14,867 epoch 7 - iter 990/1984 - loss 0.02489200 - time (sec): 60.71 - samples/sec: 1323.82 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 17:09:27,252 epoch 7 - iter 1188/1984 - loss 0.02447758 - time (sec): 73.10 - samples/sec: 1338.12 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 17:09:39,473 epoch 7 - iter 1386/1984 - loss 0.02467051 - time (sec): 85.32 - samples/sec: 1344.90 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 17:09:51,517 epoch 7 - iter 1584/1984 - loss 0.02483831 - time (sec): 97.36 - samples/sec: 1345.72 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 17:10:03,564 epoch 7 - iter 1782/1984 - loss 0.02499026 - time (sec): 109.41 - samples/sec: 1346.91 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 17:10:15,630 epoch 7 - iter 1980/1984 - loss 0.02535832 - time (sec): 121.47 - samples/sec: 1346.06 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 17:10:15,884 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:10:15,884 EPOCH 7 done: loss 0.0253 - lr: 0.000010 |
|
2023-10-24 17:10:19,005 DEV : loss 0.2231946587562561 - f1-score (micro avg) 0.7507 |
|
2023-10-24 17:10:19,021 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:10:31,676 epoch 8 - iter 198/1984 - loss 0.01362913 - time (sec): 12.65 - samples/sec: 1371.16 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 17:10:43,986 epoch 8 - iter 396/1984 - loss 0.01453365 - time (sec): 24.96 - samples/sec: 1368.42 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 17:10:55,962 epoch 8 - iter 594/1984 - loss 0.01371635 - time (sec): 36.94 - samples/sec: 1345.54 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 17:11:08,188 epoch 8 - iter 792/1984 - loss 0.01363124 - time (sec): 49.17 - samples/sec: 1338.60 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 17:11:20,225 epoch 8 - iter 990/1984 - loss 0.01455714 - time (sec): 61.20 - samples/sec: 1334.80 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 17:11:32,426 epoch 8 - iter 1188/1984 - loss 0.01600626 - time (sec): 73.40 - samples/sec: 1346.37 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 17:11:44,542 epoch 8 - iter 1386/1984 - loss 0.01607764 - time (sec): 85.52 - samples/sec: 1348.15 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 17:11:56,395 epoch 8 - iter 1584/1984 - loss 0.01596874 - time (sec): 97.37 - samples/sec: 1339.42 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 17:12:08,622 epoch 8 - iter 1782/1984 - loss 0.01600345 - time (sec): 109.60 - samples/sec: 1340.49 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 17:12:20,768 epoch 8 - iter 1980/1984 - loss 0.01670679 - time (sec): 121.75 - samples/sec: 1344.01 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 17:12:21,005 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:12:21,005 EPOCH 8 done: loss 0.0167 - lr: 0.000007 |
|
2023-10-24 17:12:24,126 DEV : loss 0.2260325700044632 - f1-score (micro avg) 0.7547 |
|
2023-10-24 17:12:24,142 saving best model |
|
2023-10-24 17:12:24,734 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:12:36,802 epoch 9 - iter 198/1984 - loss 0.01167945 - time (sec): 12.07 - samples/sec: 1314.60 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 17:12:48,856 epoch 9 - iter 396/1984 - loss 0.01028989 - time (sec): 24.12 - samples/sec: 1310.79 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 17:13:00,869 epoch 9 - iter 594/1984 - loss 0.01244219 - time (sec): 36.13 - samples/sec: 1307.66 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 17:13:13,465 epoch 9 - iter 792/1984 - loss 0.01152707 - time (sec): 48.73 - samples/sec: 1326.87 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 17:13:25,772 epoch 9 - iter 990/1984 - loss 0.01079042 - time (sec): 61.04 - samples/sec: 1339.77 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 17:13:38,011 epoch 9 - iter 1188/1984 - loss 0.01084398 - time (sec): 73.28 - samples/sec: 1342.98 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 17:13:50,001 epoch 9 - iter 1386/1984 - loss 0.01095687 - time (sec): 85.27 - samples/sec: 1342.23 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 17:14:02,064 epoch 9 - iter 1584/1984 - loss 0.01043359 - time (sec): 97.33 - samples/sec: 1342.13 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 17:14:14,071 epoch 9 - iter 1782/1984 - loss 0.01070738 - time (sec): 109.34 - samples/sec: 1343.09 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 17:14:26,147 epoch 9 - iter 1980/1984 - loss 0.01099379 - time (sec): 121.41 - samples/sec: 1348.31 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 17:14:26,381 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:14:26,381 EPOCH 9 done: loss 0.0110 - lr: 0.000003 |
|
2023-10-24 17:14:29,825 DEV : loss 0.24125918745994568 - f1-score (micro avg) 0.7639 |
|
2023-10-24 17:14:29,841 saving best model |
|
2023-10-24 17:14:30,454 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:14:42,411 epoch 10 - iter 198/1984 - loss 0.00345145 - time (sec): 11.96 - samples/sec: 1355.66 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 17:14:54,503 epoch 10 - iter 396/1984 - loss 0.00398821 - time (sec): 24.05 - samples/sec: 1344.86 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 17:15:06,668 epoch 10 - iter 594/1984 - loss 0.00606419 - time (sec): 36.21 - samples/sec: 1355.76 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 17:15:18,803 epoch 10 - iter 792/1984 - loss 0.00655537 - time (sec): 48.35 - samples/sec: 1368.69 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 17:15:30,830 epoch 10 - iter 990/1984 - loss 0.00666700 - time (sec): 60.38 - samples/sec: 1363.80 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 17:15:42,958 epoch 10 - iter 1188/1984 - loss 0.00648338 - time (sec): 72.50 - samples/sec: 1355.06 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 17:15:55,093 epoch 10 - iter 1386/1984 - loss 0.00695280 - time (sec): 84.64 - samples/sec: 1354.00 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 17:16:07,044 epoch 10 - iter 1584/1984 - loss 0.00694583 - time (sec): 96.59 - samples/sec: 1348.02 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 17:16:19,281 epoch 10 - iter 1782/1984 - loss 0.00731465 - time (sec): 108.83 - samples/sec: 1348.88 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 17:16:31,552 epoch 10 - iter 1980/1984 - loss 0.00719445 - time (sec): 121.10 - samples/sec: 1351.26 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 17:16:31,799 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:16:31,799 EPOCH 10 done: loss 0.0072 - lr: 0.000000 |
|
2023-10-24 17:16:34,920 DEV : loss 0.24395139515399933 - f1-score (micro avg) 0.7747 |
|
2023-10-24 17:16:34,936 saving best model |
|
2023-10-24 17:16:35,995 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 17:16:35,996 Loading model from best epoch ... |
|
2023-10-24 17:16:37,468 SequenceTagger predicts: Dictionary with 13 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-24 17:16:40,539 |
|
Results: |
|
- F-score (micro) 0.7847 |
|
- F-score (macro) 0.7007 |
|
- Accuracy 0.6667 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
LOC 0.8338 0.8580 0.8457 655 |
|
PER 0.6923 0.8072 0.7453 223 |
|
ORG 0.5800 0.4567 0.5110 127 |
|
|
|
micro avg 0.7737 0.7960 0.7847 1005 |
|
macro avg 0.7020 0.7073 0.7007 1005 |
|
weighted avg 0.7704 0.7960 0.7812 1005 |
|
|
|
2023-10-24 17:16:40,539 ---------------------------------------------------------------------------------------------------- |
|
|