2023-10-24 21:54:39,743 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,744 Model: "SequenceTagger( (embeddings): TransformerWordEmbeddings( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(64001, 768) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (locked_dropout): LockedDropout(p=0.5) (linear): Linear(in_features=768, out_features=13, bias=True) (loss_function): CrossEntropyLoss() )" 2023-10-24 21:54:39,744 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,744 MultiCorpus: 5777 train + 722 dev + 723 test sentences - NER_ICDAR_EUROPEANA Corpus: 5777 train + 722 dev + 723 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/nl 2023-10-24 21:54:39,744 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,744 Train: 5777 sentences 2023-10-24 21:54:39,745 (train_with_dev=False, train_with_test=False) 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 Training Params: 2023-10-24 21:54:39,745 - learning_rate: "3e-05" 2023-10-24 21:54:39,745 - mini_batch_size: "4" 2023-10-24 21:54:39,745 - max_epochs: "10" 2023-10-24 21:54:39,745 - shuffle: "True" 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 Plugins: 2023-10-24 21:54:39,745 - TensorboardLogger 2023-10-24 21:54:39,745 - LinearScheduler | warmup_fraction: '0.1' 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 Final evaluation on model from best epoch (best-model.pt) 2023-10-24 21:54:39,745 - metric: "('micro avg', 'f1-score')" 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 Computation: 2023-10-24 21:54:39,745 - compute on device: cuda:0 2023-10-24 21:54:39,745 - embedding storage: none 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 Model training base path: "hmbench-icdar/nl-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-1" 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:54:39,745 Logging anything other than scalars to TensorBoard is currently not supported. 2023-10-24 21:54:50,587 epoch 1 - iter 144/1445 - loss 1.81376766 - time (sec): 10.84 - samples/sec: 1631.80 - lr: 0.000003 - momentum: 0.000000 2023-10-24 21:55:00,870 epoch 1 - iter 288/1445 - loss 1.04845476 - time (sec): 21.12 - samples/sec: 1667.24 - lr: 0.000006 - momentum: 0.000000 2023-10-24 21:55:11,485 epoch 1 - iter 432/1445 - loss 0.76140912 - time (sec): 31.74 - samples/sec: 1705.82 - lr: 0.000009 - momentum: 0.000000 2023-10-24 21:55:21,493 epoch 1 - iter 576/1445 - loss 0.62525491 - time (sec): 41.75 - samples/sec: 1689.38 - lr: 0.000012 - momentum: 0.000000 2023-10-24 21:55:31,497 epoch 1 - iter 720/1445 - loss 0.53447084 - time (sec): 51.75 - samples/sec: 1683.91 - lr: 0.000015 - momentum: 0.000000 2023-10-24 21:55:41,710 epoch 1 - iter 864/1445 - loss 0.47506408 - time (sec): 61.96 - samples/sec: 1681.67 - lr: 0.000018 - momentum: 0.000000 2023-10-24 21:55:51,795 epoch 1 - iter 1008/1445 - loss 0.43026392 - time (sec): 72.05 - samples/sec: 1676.91 - lr: 0.000021 - momentum: 0.000000 2023-10-24 21:56:02,312 epoch 1 - iter 1152/1445 - loss 0.39358120 - time (sec): 82.57 - samples/sec: 1681.72 - lr: 0.000024 - momentum: 0.000000 2023-10-24 21:56:12,669 epoch 1 - iter 1296/1445 - loss 0.36332737 - time (sec): 92.92 - samples/sec: 1690.57 - lr: 0.000027 - momentum: 0.000000 2023-10-24 21:56:23,299 epoch 1 - iter 1440/1445 - loss 0.33854273 - time (sec): 103.55 - samples/sec: 1697.27 - lr: 0.000030 - momentum: 0.000000 2023-10-24 21:56:23,610 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:56:23,610 EPOCH 1 done: loss 0.3381 - lr: 0.000030 2023-10-24 21:56:26,809 DEV : loss 0.16900984942913055 - f1-score (micro avg) 0.3766 2023-10-24 21:56:26,820 saving best model 2023-10-24 21:56:27,384 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:56:37,658 epoch 2 - iter 144/1445 - loss 0.11826824 - time (sec): 10.27 - samples/sec: 1659.46 - lr: 0.000030 - momentum: 0.000000 2023-10-24 21:56:47,685 epoch 2 - iter 288/1445 - loss 0.11630277 - time (sec): 20.30 - samples/sec: 1646.32 - lr: 0.000029 - momentum: 0.000000 2023-10-24 21:56:57,995 epoch 2 - iter 432/1445 - loss 0.11271448 - time (sec): 30.61 - samples/sec: 1653.63 - lr: 0.000029 - momentum: 0.000000 2023-10-24 21:57:08,734 epoch 2 - iter 576/1445 - loss 0.10875524 - time (sec): 41.35 - samples/sec: 1675.21 - lr: 0.000029 - momentum: 0.000000 2023-10-24 21:57:19,614 epoch 2 - iter 720/1445 - loss 0.10283488 - time (sec): 52.23 - samples/sec: 1694.77 - lr: 0.000028 - momentum: 0.000000 2023-10-24 21:57:30,574 epoch 2 - iter 864/1445 - loss 0.10133854 - time (sec): 63.19 - samples/sec: 1698.08 - lr: 0.000028 - momentum: 0.000000 2023-10-24 21:57:40,866 epoch 2 - iter 1008/1445 - loss 0.10017031 - time (sec): 73.48 - samples/sec: 1693.71 - lr: 0.000028 - momentum: 0.000000 2023-10-24 21:57:50,757 epoch 2 - iter 1152/1445 - loss 0.10315088 - time (sec): 83.37 - samples/sec: 1682.40 - lr: 0.000027 - momentum: 0.000000 2023-10-24 21:58:01,154 epoch 2 - iter 1296/1445 - loss 0.10336237 - time (sec): 93.77 - samples/sec: 1680.38 - lr: 0.000027 - momentum: 0.000000 2023-10-24 21:58:11,679 epoch 2 - iter 1440/1445 - loss 0.10342005 - time (sec): 104.29 - samples/sec: 1683.55 - lr: 0.000027 - momentum: 0.000000 2023-10-24 21:58:12,004 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:58:12,004 EPOCH 2 done: loss 0.1035 - lr: 0.000027 2023-10-24 21:58:15,677 DEV : loss 0.1034025326371193 - f1-score (micro avg) 0.8052 2023-10-24 21:58:15,689 saving best model 2023-10-24 21:58:16,491 ---------------------------------------------------------------------------------------------------- 2023-10-24 21:58:26,986 epoch 3 - iter 144/1445 - loss 0.07272831 - time (sec): 10.49 - samples/sec: 1666.02 - lr: 0.000026 - momentum: 0.000000 2023-10-24 21:58:37,393 epoch 3 - iter 288/1445 - loss 0.06637558 - time (sec): 20.90 - samples/sec: 1672.71 - lr: 0.000026 - momentum: 0.000000 2023-10-24 21:58:47,702 epoch 3 - iter 432/1445 - loss 0.07131592 - time (sec): 31.21 - samples/sec: 1674.66 - lr: 0.000026 - momentum: 0.000000 2023-10-24 21:58:58,388 epoch 3 - iter 576/1445 - loss 0.06974866 - time (sec): 41.90 - samples/sec: 1681.94 - lr: 0.000025 - momentum: 0.000000 2023-10-24 21:59:08,950 epoch 3 - iter 720/1445 - loss 0.06896857 - time (sec): 52.46 - samples/sec: 1681.63 - lr: 0.000025 - momentum: 0.000000 2023-10-24 21:59:19,705 epoch 3 - iter 864/1445 - loss 0.07063252 - time (sec): 63.21 - samples/sec: 1693.15 - lr: 0.000025 - momentum: 0.000000 2023-10-24 21:59:30,022 epoch 3 - iter 1008/1445 - loss 0.07103855 - time (sec): 73.53 - samples/sec: 1678.89 - lr: 0.000024 - momentum: 0.000000 2023-10-24 21:59:40,341 epoch 3 - iter 1152/1445 - loss 0.07115340 - time (sec): 83.85 - samples/sec: 1671.02 - lr: 0.000024 - momentum: 0.000000 2023-10-24 21:59:50,899 epoch 3 - iter 1296/1445 - loss 0.07080450 - time (sec): 94.41 - samples/sec: 1671.77 - lr: 0.000024 - momentum: 0.000000 2023-10-24 22:00:01,593 epoch 3 - iter 1440/1445 - loss 0.07048554 - time (sec): 105.10 - samples/sec: 1673.54 - lr: 0.000023 - momentum: 0.000000 2023-10-24 22:00:01,884 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:00:01,884 EPOCH 3 done: loss 0.0706 - lr: 0.000023 2023-10-24 22:00:05,309 DEV : loss 0.10592877864837646 - f1-score (micro avg) 0.8065 2023-10-24 22:00:05,321 saving best model 2023-10-24 22:00:06,098 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:00:16,449 epoch 4 - iter 144/1445 - loss 0.04420143 - time (sec): 10.35 - samples/sec: 1690.62 - lr: 0.000023 - momentum: 0.000000 2023-10-24 22:00:26,906 epoch 4 - iter 288/1445 - loss 0.05211392 - time (sec): 20.81 - samples/sec: 1669.44 - lr: 0.000023 - momentum: 0.000000 2023-10-24 22:00:37,283 epoch 4 - iter 432/1445 - loss 0.05533820 - time (sec): 31.18 - samples/sec: 1626.59 - lr: 0.000022 - momentum: 0.000000 2023-10-24 22:00:47,604 epoch 4 - iter 576/1445 - loss 0.05568201 - time (sec): 41.51 - samples/sec: 1620.21 - lr: 0.000022 - momentum: 0.000000 2023-10-24 22:00:58,327 epoch 4 - iter 720/1445 - loss 0.05465541 - time (sec): 52.23 - samples/sec: 1643.67 - lr: 0.000022 - momentum: 0.000000 2023-10-24 22:01:08,980 epoch 4 - iter 864/1445 - loss 0.05576727 - time (sec): 62.88 - samples/sec: 1654.90 - lr: 0.000021 - momentum: 0.000000 2023-10-24 22:01:19,883 epoch 4 - iter 1008/1445 - loss 0.05481408 - time (sec): 73.78 - samples/sec: 1660.38 - lr: 0.000021 - momentum: 0.000000 2023-10-24 22:01:30,417 epoch 4 - iter 1152/1445 - loss 0.05383623 - time (sec): 84.32 - samples/sec: 1665.80 - lr: 0.000021 - momentum: 0.000000 2023-10-24 22:01:40,977 epoch 4 - iter 1296/1445 - loss 0.05292807 - time (sec): 94.88 - samples/sec: 1665.78 - lr: 0.000020 - momentum: 0.000000 2023-10-24 22:01:51,448 epoch 4 - iter 1440/1445 - loss 0.05208862 - time (sec): 105.35 - samples/sec: 1668.68 - lr: 0.000020 - momentum: 0.000000 2023-10-24 22:01:51,753 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:01:51,753 EPOCH 4 done: loss 0.0521 - lr: 0.000020 2023-10-24 22:01:55,170 DEV : loss 0.1245899349451065 - f1-score (micro avg) 0.8002 2023-10-24 22:01:55,181 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:02:05,922 epoch 5 - iter 144/1445 - loss 0.03735872 - time (sec): 10.74 - samples/sec: 1703.86 - lr: 0.000020 - momentum: 0.000000 2023-10-24 22:02:16,659 epoch 5 - iter 288/1445 - loss 0.04258998 - time (sec): 21.48 - samples/sec: 1666.21 - lr: 0.000019 - momentum: 0.000000 2023-10-24 22:02:27,208 epoch 5 - iter 432/1445 - loss 0.03756029 - time (sec): 32.03 - samples/sec: 1666.14 - lr: 0.000019 - momentum: 0.000000 2023-10-24 22:02:38,230 epoch 5 - iter 576/1445 - loss 0.03855021 - time (sec): 43.05 - samples/sec: 1678.82 - lr: 0.000019 - momentum: 0.000000 2023-10-24 22:02:48,549 epoch 5 - iter 720/1445 - loss 0.04032539 - time (sec): 53.37 - samples/sec: 1676.33 - lr: 0.000018 - momentum: 0.000000 2023-10-24 22:02:59,250 epoch 5 - iter 864/1445 - loss 0.04038091 - time (sec): 64.07 - samples/sec: 1680.42 - lr: 0.000018 - momentum: 0.000000 2023-10-24 22:03:09,256 epoch 5 - iter 1008/1445 - loss 0.04015816 - time (sec): 74.07 - samples/sec: 1667.89 - lr: 0.000018 - momentum: 0.000000 2023-10-24 22:03:19,744 epoch 5 - iter 1152/1445 - loss 0.03895989 - time (sec): 84.56 - samples/sec: 1672.97 - lr: 0.000017 - momentum: 0.000000 2023-10-24 22:03:30,067 epoch 5 - iter 1296/1445 - loss 0.03887108 - time (sec): 94.89 - samples/sec: 1664.78 - lr: 0.000017 - momentum: 0.000000 2023-10-24 22:03:40,569 epoch 5 - iter 1440/1445 - loss 0.03840375 - time (sec): 105.39 - samples/sec: 1664.80 - lr: 0.000017 - momentum: 0.000000 2023-10-24 22:03:40,994 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:03:40,995 EPOCH 5 done: loss 0.0384 - lr: 0.000017 2023-10-24 22:03:44,691 DEV : loss 0.133424773812294 - f1-score (micro avg) 0.8276 2023-10-24 22:03:44,702 saving best model 2023-10-24 22:03:45,401 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:03:55,990 epoch 6 - iter 144/1445 - loss 0.01932972 - time (sec): 10.59 - samples/sec: 1618.75 - lr: 0.000016 - momentum: 0.000000 2023-10-24 22:04:06,468 epoch 6 - iter 288/1445 - loss 0.02346905 - time (sec): 21.07 - samples/sec: 1631.06 - lr: 0.000016 - momentum: 0.000000 2023-10-24 22:04:17,422 epoch 6 - iter 432/1445 - loss 0.02853611 - time (sec): 32.02 - samples/sec: 1665.15 - lr: 0.000016 - momentum: 0.000000 2023-10-24 22:04:27,879 epoch 6 - iter 576/1445 - loss 0.02850934 - time (sec): 42.48 - samples/sec: 1652.38 - lr: 0.000015 - momentum: 0.000000 2023-10-24 22:04:38,329 epoch 6 - iter 720/1445 - loss 0.02770765 - time (sec): 52.93 - samples/sec: 1650.35 - lr: 0.000015 - momentum: 0.000000 2023-10-24 22:04:48,976 epoch 6 - iter 864/1445 - loss 0.02850972 - time (sec): 63.57 - samples/sec: 1656.23 - lr: 0.000015 - momentum: 0.000000 2023-10-24 22:04:59,427 epoch 6 - iter 1008/1445 - loss 0.02784045 - time (sec): 74.02 - samples/sec: 1666.26 - lr: 0.000014 - momentum: 0.000000 2023-10-24 22:05:09,931 epoch 6 - iter 1152/1445 - loss 0.02748521 - time (sec): 84.53 - samples/sec: 1666.18 - lr: 0.000014 - momentum: 0.000000 2023-10-24 22:05:20,372 epoch 6 - iter 1296/1445 - loss 0.02764222 - time (sec): 94.97 - samples/sec: 1669.46 - lr: 0.000014 - momentum: 0.000000 2023-10-24 22:05:30,719 epoch 6 - iter 1440/1445 - loss 0.02849307 - time (sec): 105.32 - samples/sec: 1668.04 - lr: 0.000013 - momentum: 0.000000 2023-10-24 22:05:31,053 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:05:31,054 EPOCH 6 done: loss 0.0284 - lr: 0.000013 2023-10-24 22:05:34,476 DEV : loss 0.1365528404712677 - f1-score (micro avg) 0.8267 2023-10-24 22:05:34,487 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:05:44,971 epoch 7 - iter 144/1445 - loss 0.01391007 - time (sec): 10.48 - samples/sec: 1706.87 - lr: 0.000013 - momentum: 0.000000 2023-10-24 22:05:55,671 epoch 7 - iter 288/1445 - loss 0.01968379 - time (sec): 21.18 - samples/sec: 1669.56 - lr: 0.000013 - momentum: 0.000000 2023-10-24 22:06:06,319 epoch 7 - iter 432/1445 - loss 0.01910457 - time (sec): 31.83 - samples/sec: 1653.60 - lr: 0.000012 - momentum: 0.000000 2023-10-24 22:06:16,913 epoch 7 - iter 576/1445 - loss 0.02249157 - time (sec): 42.43 - samples/sec: 1670.82 - lr: 0.000012 - momentum: 0.000000 2023-10-24 22:06:27,746 epoch 7 - iter 720/1445 - loss 0.02204607 - time (sec): 53.26 - samples/sec: 1673.30 - lr: 0.000012 - momentum: 0.000000 2023-10-24 22:06:38,023 epoch 7 - iter 864/1445 - loss 0.02216207 - time (sec): 63.53 - samples/sec: 1658.27 - lr: 0.000011 - momentum: 0.000000 2023-10-24 22:06:48,444 epoch 7 - iter 1008/1445 - loss 0.02155640 - time (sec): 73.96 - samples/sec: 1654.13 - lr: 0.000011 - momentum: 0.000000 2023-10-24 22:06:58,974 epoch 7 - iter 1152/1445 - loss 0.02155786 - time (sec): 84.49 - samples/sec: 1655.26 - lr: 0.000011 - momentum: 0.000000 2023-10-24 22:07:09,670 epoch 7 - iter 1296/1445 - loss 0.02076335 - time (sec): 95.18 - samples/sec: 1660.34 - lr: 0.000010 - momentum: 0.000000 2023-10-24 22:07:20,216 epoch 7 - iter 1440/1445 - loss 0.02036066 - time (sec): 105.73 - samples/sec: 1660.35 - lr: 0.000010 - momentum: 0.000000 2023-10-24 22:07:20,622 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:07:20,623 EPOCH 7 done: loss 0.0204 - lr: 0.000010 2023-10-24 22:07:24,044 DEV : loss 0.1544482260942459 - f1-score (micro avg) 0.8467 2023-10-24 22:07:24,056 saving best model 2023-10-24 22:07:24,752 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:07:35,303 epoch 8 - iter 144/1445 - loss 0.00603330 - time (sec): 10.55 - samples/sec: 1672.68 - lr: 0.000010 - momentum: 0.000000 2023-10-24 22:07:46,131 epoch 8 - iter 288/1445 - loss 0.01060664 - time (sec): 21.38 - samples/sec: 1658.98 - lr: 0.000009 - momentum: 0.000000 2023-10-24 22:07:56,453 epoch 8 - iter 432/1445 - loss 0.01037218 - time (sec): 31.70 - samples/sec: 1674.02 - lr: 0.000009 - momentum: 0.000000 2023-10-24 22:08:07,689 epoch 8 - iter 576/1445 - loss 0.01144562 - time (sec): 42.94 - samples/sec: 1704.63 - lr: 0.000009 - momentum: 0.000000 2023-10-24 22:08:18,129 epoch 8 - iter 720/1445 - loss 0.01120662 - time (sec): 53.38 - samples/sec: 1689.53 - lr: 0.000008 - momentum: 0.000000 2023-10-24 22:08:28,593 epoch 8 - iter 864/1445 - loss 0.01145175 - time (sec): 63.84 - samples/sec: 1687.16 - lr: 0.000008 - momentum: 0.000000 2023-10-24 22:08:39,169 epoch 8 - iter 1008/1445 - loss 0.01216960 - time (sec): 74.42 - samples/sec: 1680.24 - lr: 0.000008 - momentum: 0.000000 2023-10-24 22:08:49,133 epoch 8 - iter 1152/1445 - loss 0.01273434 - time (sec): 84.38 - samples/sec: 1661.94 - lr: 0.000007 - momentum: 0.000000 2023-10-24 22:08:59,424 epoch 8 - iter 1296/1445 - loss 0.01238139 - time (sec): 94.67 - samples/sec: 1660.14 - lr: 0.000007 - momentum: 0.000000 2023-10-24 22:09:10,169 epoch 8 - iter 1440/1445 - loss 0.01332204 - time (sec): 105.42 - samples/sec: 1664.86 - lr: 0.000007 - momentum: 0.000000 2023-10-24 22:09:10,600 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:09:10,600 EPOCH 8 done: loss 0.0133 - lr: 0.000007 2023-10-24 22:09:14,309 DEV : loss 0.17273983359336853 - f1-score (micro avg) 0.821 2023-10-24 22:09:14,320 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:09:25,157 epoch 9 - iter 144/1445 - loss 0.00588071 - time (sec): 10.84 - samples/sec: 1729.19 - lr: 0.000006 - momentum: 0.000000 2023-10-24 22:09:35,274 epoch 9 - iter 288/1445 - loss 0.00736073 - time (sec): 20.95 - samples/sec: 1673.31 - lr: 0.000006 - momentum: 0.000000 2023-10-24 22:09:46,262 epoch 9 - iter 432/1445 - loss 0.00797289 - time (sec): 31.94 - samples/sec: 1676.64 - lr: 0.000006 - momentum: 0.000000 2023-10-24 22:09:56,805 epoch 9 - iter 576/1445 - loss 0.01074603 - time (sec): 42.48 - samples/sec: 1671.91 - lr: 0.000005 - momentum: 0.000000 2023-10-24 22:10:07,293 epoch 9 - iter 720/1445 - loss 0.01010368 - time (sec): 52.97 - samples/sec: 1667.34 - lr: 0.000005 - momentum: 0.000000 2023-10-24 22:10:17,826 epoch 9 - iter 864/1445 - loss 0.00940377 - time (sec): 63.50 - samples/sec: 1671.76 - lr: 0.000005 - momentum: 0.000000 2023-10-24 22:10:28,471 epoch 9 - iter 1008/1445 - loss 0.00943012 - time (sec): 74.15 - samples/sec: 1671.65 - lr: 0.000004 - momentum: 0.000000 2023-10-24 22:10:38,826 epoch 9 - iter 1152/1445 - loss 0.00936929 - time (sec): 84.51 - samples/sec: 1669.72 - lr: 0.000004 - momentum: 0.000000 2023-10-24 22:10:49,280 epoch 9 - iter 1296/1445 - loss 0.00923211 - time (sec): 94.96 - samples/sec: 1668.88 - lr: 0.000004 - momentum: 0.000000 2023-10-24 22:10:59,885 epoch 9 - iter 1440/1445 - loss 0.00932590 - time (sec): 105.56 - samples/sec: 1665.64 - lr: 0.000003 - momentum: 0.000000 2023-10-24 22:11:00,186 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:11:00,186 EPOCH 9 done: loss 0.0093 - lr: 0.000003 2023-10-24 22:11:03,616 DEV : loss 0.18522778153419495 - f1-score (micro avg) 0.8267 2023-10-24 22:11:03,628 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:11:14,197 epoch 10 - iter 144/1445 - loss 0.00628755 - time (sec): 10.57 - samples/sec: 1651.50 - lr: 0.000003 - momentum: 0.000000 2023-10-24 22:11:24,926 epoch 10 - iter 288/1445 - loss 0.00707012 - time (sec): 21.30 - samples/sec: 1667.33 - lr: 0.000003 - momentum: 0.000000 2023-10-24 22:11:35,710 epoch 10 - iter 432/1445 - loss 0.00618521 - time (sec): 32.08 - samples/sec: 1697.14 - lr: 0.000002 - momentum: 0.000000 2023-10-24 22:11:46,638 epoch 10 - iter 576/1445 - loss 0.00651463 - time (sec): 43.01 - samples/sec: 1692.68 - lr: 0.000002 - momentum: 0.000000 2023-10-24 22:11:56,990 epoch 10 - iter 720/1445 - loss 0.00589889 - time (sec): 53.36 - samples/sec: 1678.07 - lr: 0.000002 - momentum: 0.000000 2023-10-24 22:12:07,571 epoch 10 - iter 864/1445 - loss 0.00603718 - time (sec): 63.94 - samples/sec: 1669.95 - lr: 0.000001 - momentum: 0.000000 2023-10-24 22:12:18,192 epoch 10 - iter 1008/1445 - loss 0.00651159 - time (sec): 74.56 - samples/sec: 1664.75 - lr: 0.000001 - momentum: 0.000000 2023-10-24 22:12:28,603 epoch 10 - iter 1152/1445 - loss 0.00639162 - time (sec): 84.97 - samples/sec: 1665.64 - lr: 0.000001 - momentum: 0.000000 2023-10-24 22:12:39,228 epoch 10 - iter 1296/1445 - loss 0.00623833 - time (sec): 95.60 - samples/sec: 1659.76 - lr: 0.000000 - momentum: 0.000000 2023-10-24 22:12:49,561 epoch 10 - iter 1440/1445 - loss 0.00626178 - time (sec): 105.93 - samples/sec: 1659.74 - lr: 0.000000 - momentum: 0.000000 2023-10-24 22:12:49,857 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:12:49,858 EPOCH 10 done: loss 0.0062 - lr: 0.000000 2023-10-24 22:12:53,288 DEV : loss 0.18949156999588013 - f1-score (micro avg) 0.831 2023-10-24 22:12:53,858 ---------------------------------------------------------------------------------------------------- 2023-10-24 22:12:53,859 Loading model from best epoch ... 2023-10-24 22:12:55,820 SequenceTagger predicts: Dictionary with 13 tags: O, S-LOC, B-LOC, E-LOC, I-LOC, S-PER, B-PER, E-PER, I-PER, S-ORG, B-ORG, E-ORG, I-ORG 2023-10-24 22:12:59,365 Results: - F-score (micro) 0.7981 - F-score (macro) 0.676 - Accuracy 0.6764 By class: precision recall f1-score support PER 0.8382 0.7739 0.8047 482 LOC 0.9044 0.8057 0.8522 458 ORG 0.4182 0.3333 0.3710 69 micro avg 0.8425 0.7582 0.7981 1009 macro avg 0.7203 0.6376 0.6760 1009 weighted avg 0.8395 0.7582 0.7966 1009 2023-10-24 22:12:59,365 ----------------------------------------------------------------------------------------------------