2023-10-24 23:21:40,285 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,286 Model: "SequenceTagger( (embeddings): TransformerWordEmbeddings( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(64001, 768) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (locked_dropout): LockedDropout(p=0.5) (linear): Linear(in_features=768, out_features=13, bias=True) (loss_function): CrossEntropyLoss() )" 2023-10-24 23:21:40,286 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,286 MultiCorpus: 5777 train + 722 dev + 723 test sentences - NER_ICDAR_EUROPEANA Corpus: 5777 train + 722 dev + 723 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/nl 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Train: 5777 sentences 2023-10-24 23:21:40,287 (train_with_dev=False, train_with_test=False) 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Training Params: 2023-10-24 23:21:40,287 - learning_rate: "5e-05" 2023-10-24 23:21:40,287 - mini_batch_size: "4" 2023-10-24 23:21:40,287 - max_epochs: "10" 2023-10-24 23:21:40,287 - shuffle: "True" 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Plugins: 2023-10-24 23:21:40,287 - TensorboardLogger 2023-10-24 23:21:40,287 - LinearScheduler | warmup_fraction: '0.1' 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Final evaluation on model from best epoch (best-model.pt) 2023-10-24 23:21:40,287 - metric: "('micro avg', 'f1-score')" 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Computation: 2023-10-24 23:21:40,287 - compute on device: cuda:0 2023-10-24 23:21:40,287 - embedding storage: none 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Model training base path: "hmbench-icdar/nl-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-2" 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:21:40,287 Logging anything other than scalars to TensorBoard is currently not supported. 2023-10-24 23:21:50,481 epoch 1 - iter 144/1445 - loss 1.25687448 - time (sec): 10.19 - samples/sec: 1654.23 - lr: 0.000005 - momentum: 0.000000 2023-10-24 23:22:00,854 epoch 1 - iter 288/1445 - loss 0.74150684 - time (sec): 20.57 - samples/sec: 1652.68 - lr: 0.000010 - momentum: 0.000000 2023-10-24 23:22:11,799 epoch 1 - iter 432/1445 - loss 0.55051956 - time (sec): 31.51 - samples/sec: 1686.01 - lr: 0.000015 - momentum: 0.000000 2023-10-24 23:22:22,479 epoch 1 - iter 576/1445 - loss 0.45307796 - time (sec): 42.19 - samples/sec: 1680.61 - lr: 0.000020 - momentum: 0.000000 2023-10-24 23:22:33,064 epoch 1 - iter 720/1445 - loss 0.39513176 - time (sec): 52.78 - samples/sec: 1679.07 - lr: 0.000025 - momentum: 0.000000 2023-10-24 23:22:43,519 epoch 1 - iter 864/1445 - loss 0.35898856 - time (sec): 63.23 - samples/sec: 1670.38 - lr: 0.000030 - momentum: 0.000000 2023-10-24 23:22:54,299 epoch 1 - iter 1008/1445 - loss 0.32689338 - time (sec): 74.01 - samples/sec: 1679.95 - lr: 0.000035 - momentum: 0.000000 2023-10-24 23:23:04,536 epoch 1 - iter 1152/1445 - loss 0.30459934 - time (sec): 84.25 - samples/sec: 1675.60 - lr: 0.000040 - momentum: 0.000000 2023-10-24 23:23:14,906 epoch 1 - iter 1296/1445 - loss 0.28615854 - time (sec): 94.62 - samples/sec: 1674.07 - lr: 0.000045 - momentum: 0.000000 2023-10-24 23:23:25,291 epoch 1 - iter 1440/1445 - loss 0.26993534 - time (sec): 105.00 - samples/sec: 1673.08 - lr: 0.000050 - momentum: 0.000000 2023-10-24 23:23:25,621 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:23:25,621 EPOCH 1 done: loss 0.2693 - lr: 0.000050 2023-10-24 23:23:28,916 DEV : loss 0.13464847207069397 - f1-score (micro avg) 0.4989 2023-10-24 23:23:28,928 saving best model 2023-10-24 23:23:29,405 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:23:39,846 epoch 2 - iter 144/1445 - loss 0.11663024 - time (sec): 10.44 - samples/sec: 1666.93 - lr: 0.000049 - momentum: 0.000000 2023-10-24 23:23:50,257 epoch 2 - iter 288/1445 - loss 0.11374738 - time (sec): 20.85 - samples/sec: 1623.79 - lr: 0.000049 - momentum: 0.000000 2023-10-24 23:24:01,219 epoch 2 - iter 432/1445 - loss 0.11436629 - time (sec): 31.81 - samples/sec: 1681.21 - lr: 0.000048 - momentum: 0.000000 2023-10-24 23:24:12,002 epoch 2 - iter 576/1445 - loss 0.10968049 - time (sec): 42.60 - samples/sec: 1680.12 - lr: 0.000048 - momentum: 0.000000 2023-10-24 23:24:22,453 epoch 2 - iter 720/1445 - loss 0.11276602 - time (sec): 53.05 - samples/sec: 1675.23 - lr: 0.000047 - momentum: 0.000000 2023-10-24 23:24:32,693 epoch 2 - iter 864/1445 - loss 0.12224799 - time (sec): 63.29 - samples/sec: 1663.20 - lr: 0.000047 - momentum: 0.000000 2023-10-24 23:24:43,046 epoch 2 - iter 1008/1445 - loss 0.12806467 - time (sec): 73.64 - samples/sec: 1657.02 - lr: 0.000046 - momentum: 0.000000 2023-10-24 23:24:53,742 epoch 2 - iter 1152/1445 - loss 0.12619685 - time (sec): 84.34 - samples/sec: 1664.66 - lr: 0.000046 - momentum: 0.000000 2023-10-24 23:25:04,256 epoch 2 - iter 1296/1445 - loss 0.12638505 - time (sec): 94.85 - samples/sec: 1663.82 - lr: 0.000045 - momentum: 0.000000 2023-10-24 23:25:14,881 epoch 2 - iter 1440/1445 - loss 0.12835770 - time (sec): 105.48 - samples/sec: 1666.64 - lr: 0.000044 - momentum: 0.000000 2023-10-24 23:25:15,196 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:25:15,197 EPOCH 2 done: loss 0.1283 - lr: 0.000044 2023-10-24 23:25:18,912 DEV : loss 0.14913305640220642 - f1-score (micro avg) 0.5666 2023-10-24 23:25:18,924 saving best model 2023-10-24 23:25:19,520 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:25:30,192 epoch 3 - iter 144/1445 - loss 0.09556737 - time (sec): 10.67 - samples/sec: 1676.80 - lr: 0.000044 - momentum: 0.000000 2023-10-24 23:25:40,576 epoch 3 - iter 288/1445 - loss 0.09151055 - time (sec): 21.06 - samples/sec: 1686.19 - lr: 0.000043 - momentum: 0.000000 2023-10-24 23:25:51,424 epoch 3 - iter 432/1445 - loss 0.09590453 - time (sec): 31.90 - samples/sec: 1698.93 - lr: 0.000043 - momentum: 0.000000 2023-10-24 23:26:02,002 epoch 3 - iter 576/1445 - loss 0.09325284 - time (sec): 42.48 - samples/sec: 1699.52 - lr: 0.000042 - momentum: 0.000000 2023-10-24 23:26:12,235 epoch 3 - iter 720/1445 - loss 0.09294158 - time (sec): 52.71 - samples/sec: 1690.27 - lr: 0.000042 - momentum: 0.000000 2023-10-24 23:26:22,689 epoch 3 - iter 864/1445 - loss 0.09217848 - time (sec): 63.17 - samples/sec: 1678.39 - lr: 0.000041 - momentum: 0.000000 2023-10-24 23:26:32,980 epoch 3 - iter 1008/1445 - loss 0.09182787 - time (sec): 73.46 - samples/sec: 1668.27 - lr: 0.000041 - momentum: 0.000000 2023-10-24 23:26:43,805 epoch 3 - iter 1152/1445 - loss 0.09081107 - time (sec): 84.28 - samples/sec: 1665.95 - lr: 0.000040 - momentum: 0.000000 2023-10-24 23:26:54,149 epoch 3 - iter 1296/1445 - loss 0.08982439 - time (sec): 94.63 - samples/sec: 1666.10 - lr: 0.000039 - momentum: 0.000000 2023-10-24 23:27:04,923 epoch 3 - iter 1440/1445 - loss 0.08774452 - time (sec): 105.40 - samples/sec: 1666.91 - lr: 0.000039 - momentum: 0.000000 2023-10-24 23:27:05,285 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:27:05,286 EPOCH 3 done: loss 0.0876 - lr: 0.000039 2023-10-24 23:27:08,720 DEV : loss 0.09554920345544815 - f1-score (micro avg) 0.7987 2023-10-24 23:27:08,732 saving best model 2023-10-24 23:27:09,323 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:27:19,724 epoch 4 - iter 144/1445 - loss 0.05617425 - time (sec): 10.40 - samples/sec: 1681.39 - lr: 0.000038 - momentum: 0.000000 2023-10-24 23:27:30,780 epoch 4 - iter 288/1445 - loss 0.05608470 - time (sec): 21.46 - samples/sec: 1634.93 - lr: 0.000038 - momentum: 0.000000 2023-10-24 23:27:41,462 epoch 4 - iter 432/1445 - loss 0.05672814 - time (sec): 32.14 - samples/sec: 1662.75 - lr: 0.000037 - momentum: 0.000000 2023-10-24 23:27:52,225 epoch 4 - iter 576/1445 - loss 0.05849627 - time (sec): 42.90 - samples/sec: 1671.54 - lr: 0.000037 - momentum: 0.000000 2023-10-24 23:28:02,934 epoch 4 - iter 720/1445 - loss 0.05648828 - time (sec): 53.61 - samples/sec: 1672.20 - lr: 0.000036 - momentum: 0.000000 2023-10-24 23:28:13,606 epoch 4 - iter 864/1445 - loss 0.05791966 - time (sec): 64.28 - samples/sec: 1674.82 - lr: 0.000036 - momentum: 0.000000 2023-10-24 23:28:23,554 epoch 4 - iter 1008/1445 - loss 0.05982341 - time (sec): 74.23 - samples/sec: 1665.23 - lr: 0.000035 - momentum: 0.000000 2023-10-24 23:28:33,833 epoch 4 - iter 1152/1445 - loss 0.06036417 - time (sec): 84.51 - samples/sec: 1661.07 - lr: 0.000034 - momentum: 0.000000 2023-10-24 23:28:44,524 epoch 4 - iter 1296/1445 - loss 0.06076328 - time (sec): 95.20 - samples/sec: 1659.37 - lr: 0.000034 - momentum: 0.000000 2023-10-24 23:28:55,064 epoch 4 - iter 1440/1445 - loss 0.06071194 - time (sec): 105.74 - samples/sec: 1662.20 - lr: 0.000033 - momentum: 0.000000 2023-10-24 23:28:55,379 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:28:55,379 EPOCH 4 done: loss 0.0607 - lr: 0.000033 2023-10-24 23:28:58,805 DEV : loss 0.14087821543216705 - f1-score (micro avg) 0.7709 2023-10-24 23:28:58,817 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:29:09,059 epoch 5 - iter 144/1445 - loss 0.03779963 - time (sec): 10.24 - samples/sec: 1583.68 - lr: 0.000033 - momentum: 0.000000 2023-10-24 23:29:19,818 epoch 5 - iter 288/1445 - loss 0.05292038 - time (sec): 21.00 - samples/sec: 1627.18 - lr: 0.000032 - momentum: 0.000000 2023-10-24 23:29:29,864 epoch 5 - iter 432/1445 - loss 0.04769913 - time (sec): 31.05 - samples/sec: 1618.22 - lr: 0.000032 - momentum: 0.000000 2023-10-24 23:29:40,463 epoch 5 - iter 576/1445 - loss 0.04620324 - time (sec): 41.65 - samples/sec: 1625.30 - lr: 0.000031 - momentum: 0.000000 2023-10-24 23:29:51,150 epoch 5 - iter 720/1445 - loss 0.04549958 - time (sec): 52.33 - samples/sec: 1637.80 - lr: 0.000031 - momentum: 0.000000 2023-10-24 23:30:01,787 epoch 5 - iter 864/1445 - loss 0.04525116 - time (sec): 62.97 - samples/sec: 1646.50 - lr: 0.000030 - momentum: 0.000000 2023-10-24 23:30:12,838 epoch 5 - iter 1008/1445 - loss 0.04663334 - time (sec): 74.02 - samples/sec: 1653.57 - lr: 0.000029 - momentum: 0.000000 2023-10-24 23:30:23,025 epoch 5 - iter 1152/1445 - loss 0.04587806 - time (sec): 84.21 - samples/sec: 1652.03 - lr: 0.000029 - momentum: 0.000000 2023-10-24 23:30:33,618 epoch 5 - iter 1296/1445 - loss 0.04699800 - time (sec): 94.80 - samples/sec: 1656.41 - lr: 0.000028 - momentum: 0.000000 2023-10-24 23:30:44,514 epoch 5 - iter 1440/1445 - loss 0.04628123 - time (sec): 105.70 - samples/sec: 1663.62 - lr: 0.000028 - momentum: 0.000000 2023-10-24 23:30:44,822 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:30:44,823 EPOCH 5 done: loss 0.0463 - lr: 0.000028 2023-10-24 23:30:48,549 DEV : loss 0.19031141698360443 - f1-score (micro avg) 0.7835 2023-10-24 23:30:48,561 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:30:59,126 epoch 6 - iter 144/1445 - loss 0.03075064 - time (sec): 10.56 - samples/sec: 1638.27 - lr: 0.000027 - momentum: 0.000000 2023-10-24 23:31:09,574 epoch 6 - iter 288/1445 - loss 0.02796459 - time (sec): 21.01 - samples/sec: 1663.44 - lr: 0.000027 - momentum: 0.000000 2023-10-24 23:31:19,916 epoch 6 - iter 432/1445 - loss 0.02789741 - time (sec): 31.35 - samples/sec: 1657.60 - lr: 0.000026 - momentum: 0.000000 2023-10-24 23:31:30,428 epoch 6 - iter 576/1445 - loss 0.02733910 - time (sec): 41.87 - samples/sec: 1666.41 - lr: 0.000026 - momentum: 0.000000 2023-10-24 23:31:41,324 epoch 6 - iter 720/1445 - loss 0.02846954 - time (sec): 52.76 - samples/sec: 1676.89 - lr: 0.000025 - momentum: 0.000000 2023-10-24 23:31:51,905 epoch 6 - iter 864/1445 - loss 0.03196300 - time (sec): 63.34 - samples/sec: 1676.82 - lr: 0.000024 - momentum: 0.000000 2023-10-24 23:32:02,373 epoch 6 - iter 1008/1445 - loss 0.03158090 - time (sec): 73.81 - samples/sec: 1678.18 - lr: 0.000024 - momentum: 0.000000 2023-10-24 23:32:12,706 epoch 6 - iter 1152/1445 - loss 0.03159776 - time (sec): 84.14 - samples/sec: 1669.57 - lr: 0.000023 - momentum: 0.000000 2023-10-24 23:32:23,320 epoch 6 - iter 1296/1445 - loss 0.03120304 - time (sec): 94.76 - samples/sec: 1669.75 - lr: 0.000023 - momentum: 0.000000 2023-10-24 23:32:33,943 epoch 6 - iter 1440/1445 - loss 0.03155238 - time (sec): 105.38 - samples/sec: 1662.26 - lr: 0.000022 - momentum: 0.000000 2023-10-24 23:32:34,425 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:32:34,426 EPOCH 6 done: loss 0.0314 - lr: 0.000022 2023-10-24 23:32:37,842 DEV : loss 0.2146977335214615 - f1-score (micro avg) 0.766 2023-10-24 23:32:37,854 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:32:48,058 epoch 7 - iter 144/1445 - loss 0.02441531 - time (sec): 10.20 - samples/sec: 1624.67 - lr: 0.000022 - momentum: 0.000000 2023-10-24 23:32:58,154 epoch 7 - iter 288/1445 - loss 0.01969335 - time (sec): 20.30 - samples/sec: 1604.97 - lr: 0.000021 - momentum: 0.000000 2023-10-24 23:33:09,274 epoch 7 - iter 432/1445 - loss 0.02274451 - time (sec): 31.42 - samples/sec: 1645.46 - lr: 0.000021 - momentum: 0.000000 2023-10-24 23:33:19,975 epoch 7 - iter 576/1445 - loss 0.02260029 - time (sec): 42.12 - samples/sec: 1633.49 - lr: 0.000020 - momentum: 0.000000 2023-10-24 23:33:30,556 epoch 7 - iter 720/1445 - loss 0.02066555 - time (sec): 52.70 - samples/sec: 1653.84 - lr: 0.000019 - momentum: 0.000000 2023-10-24 23:33:41,236 epoch 7 - iter 864/1445 - loss 0.02077775 - time (sec): 63.38 - samples/sec: 1654.21 - lr: 0.000019 - momentum: 0.000000 2023-10-24 23:33:51,553 epoch 7 - iter 1008/1445 - loss 0.02058252 - time (sec): 73.70 - samples/sec: 1646.87 - lr: 0.000018 - momentum: 0.000000 2023-10-24 23:34:02,072 epoch 7 - iter 1152/1445 - loss 0.02160228 - time (sec): 84.22 - samples/sec: 1648.44 - lr: 0.000018 - momentum: 0.000000 2023-10-24 23:34:12,751 epoch 7 - iter 1296/1445 - loss 0.02180055 - time (sec): 94.90 - samples/sec: 1651.68 - lr: 0.000017 - momentum: 0.000000 2023-10-24 23:34:23,741 epoch 7 - iter 1440/1445 - loss 0.02146058 - time (sec): 105.89 - samples/sec: 1660.10 - lr: 0.000017 - momentum: 0.000000 2023-10-24 23:34:24,083 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:34:24,083 EPOCH 7 done: loss 0.0215 - lr: 0.000017 2023-10-24 23:34:27,511 DEV : loss 0.18124361336231232 - f1-score (micro avg) 0.7818 2023-10-24 23:34:27,523 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:34:37,876 epoch 8 - iter 144/1445 - loss 0.01881265 - time (sec): 10.35 - samples/sec: 1675.43 - lr: 0.000016 - momentum: 0.000000 2023-10-24 23:34:48,384 epoch 8 - iter 288/1445 - loss 0.02283118 - time (sec): 20.86 - samples/sec: 1668.74 - lr: 0.000016 - momentum: 0.000000 2023-10-24 23:34:58,579 epoch 8 - iter 432/1445 - loss 0.02220532 - time (sec): 31.05 - samples/sec: 1643.19 - lr: 0.000015 - momentum: 0.000000 2023-10-24 23:35:09,315 epoch 8 - iter 576/1445 - loss 0.01976420 - time (sec): 41.79 - samples/sec: 1664.66 - lr: 0.000014 - momentum: 0.000000 2023-10-24 23:35:19,753 epoch 8 - iter 720/1445 - loss 0.01917837 - time (sec): 52.23 - samples/sec: 1659.44 - lr: 0.000014 - momentum: 0.000000 2023-10-24 23:35:30,497 epoch 8 - iter 864/1445 - loss 0.01842880 - time (sec): 62.97 - samples/sec: 1652.58 - lr: 0.000013 - momentum: 0.000000 2023-10-24 23:35:41,106 epoch 8 - iter 1008/1445 - loss 0.01678623 - time (sec): 73.58 - samples/sec: 1656.73 - lr: 0.000013 - momentum: 0.000000 2023-10-24 23:35:51,530 epoch 8 - iter 1152/1445 - loss 0.01618883 - time (sec): 84.01 - samples/sec: 1653.43 - lr: 0.000012 - momentum: 0.000000 2023-10-24 23:36:02,329 epoch 8 - iter 1296/1445 - loss 0.01598200 - time (sec): 94.81 - samples/sec: 1656.02 - lr: 0.000012 - momentum: 0.000000 2023-10-24 23:36:13,079 epoch 8 - iter 1440/1445 - loss 0.01527453 - time (sec): 105.55 - samples/sec: 1661.63 - lr: 0.000011 - momentum: 0.000000 2023-10-24 23:36:13,481 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:36:13,481 EPOCH 8 done: loss 0.0153 - lr: 0.000011 2023-10-24 23:36:17,208 DEV : loss 0.19522705674171448 - f1-score (micro avg) 0.8037 2023-10-24 23:36:17,220 saving best model 2023-10-24 23:36:17,807 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:36:28,281 epoch 9 - iter 144/1445 - loss 0.00487168 - time (sec): 10.47 - samples/sec: 1668.77 - lr: 0.000011 - momentum: 0.000000 2023-10-24 23:36:38,986 epoch 9 - iter 288/1445 - loss 0.00375882 - time (sec): 21.18 - samples/sec: 1659.68 - lr: 0.000010 - momentum: 0.000000 2023-10-24 23:36:49,311 epoch 9 - iter 432/1445 - loss 0.00501835 - time (sec): 31.50 - samples/sec: 1662.22 - lr: 0.000009 - momentum: 0.000000 2023-10-24 23:36:59,963 epoch 9 - iter 576/1445 - loss 0.00620410 - time (sec): 42.16 - samples/sec: 1674.46 - lr: 0.000009 - momentum: 0.000000 2023-10-24 23:37:10,341 epoch 9 - iter 720/1445 - loss 0.00619216 - time (sec): 52.53 - samples/sec: 1673.29 - lr: 0.000008 - momentum: 0.000000 2023-10-24 23:37:21,076 epoch 9 - iter 864/1445 - loss 0.00765207 - time (sec): 63.27 - samples/sec: 1670.95 - lr: 0.000008 - momentum: 0.000000 2023-10-24 23:37:31,604 epoch 9 - iter 1008/1445 - loss 0.00810858 - time (sec): 73.80 - samples/sec: 1674.78 - lr: 0.000007 - momentum: 0.000000 2023-10-24 23:37:42,611 epoch 9 - iter 1152/1445 - loss 0.00861122 - time (sec): 84.80 - samples/sec: 1674.31 - lr: 0.000007 - momentum: 0.000000 2023-10-24 23:37:52,574 epoch 9 - iter 1296/1445 - loss 0.00885029 - time (sec): 94.77 - samples/sec: 1668.72 - lr: 0.000006 - momentum: 0.000000 2023-10-24 23:38:03,199 epoch 9 - iter 1440/1445 - loss 0.00850372 - time (sec): 105.39 - samples/sec: 1667.11 - lr: 0.000006 - momentum: 0.000000 2023-10-24 23:38:03,529 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:38:03,530 EPOCH 9 done: loss 0.0085 - lr: 0.000006 2023-10-24 23:38:06,954 DEV : loss 0.21074502170085907 - f1-score (micro avg) 0.8142 2023-10-24 23:38:06,966 saving best model 2023-10-24 23:38:07,559 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:38:18,127 epoch 10 - iter 144/1445 - loss 0.00645055 - time (sec): 10.57 - samples/sec: 1631.27 - lr: 0.000005 - momentum: 0.000000 2023-10-24 23:38:28,580 epoch 10 - iter 288/1445 - loss 0.00490595 - time (sec): 21.02 - samples/sec: 1663.02 - lr: 0.000004 - momentum: 0.000000 2023-10-24 23:38:39,577 epoch 10 - iter 432/1445 - loss 0.00556624 - time (sec): 32.02 - samples/sec: 1654.94 - lr: 0.000004 - momentum: 0.000000 2023-10-24 23:38:50,128 epoch 10 - iter 576/1445 - loss 0.00492256 - time (sec): 42.57 - samples/sec: 1664.16 - lr: 0.000003 - momentum: 0.000000 2023-10-24 23:39:00,748 epoch 10 - iter 720/1445 - loss 0.00507698 - time (sec): 53.19 - samples/sec: 1666.96 - lr: 0.000003 - momentum: 0.000000 2023-10-24 23:39:11,086 epoch 10 - iter 864/1445 - loss 0.00484983 - time (sec): 63.53 - samples/sec: 1664.20 - lr: 0.000002 - momentum: 0.000000 2023-10-24 23:39:21,382 epoch 10 - iter 1008/1445 - loss 0.00494508 - time (sec): 73.82 - samples/sec: 1664.17 - lr: 0.000002 - momentum: 0.000000 2023-10-24 23:39:32,014 epoch 10 - iter 1152/1445 - loss 0.00513985 - time (sec): 84.45 - samples/sec: 1662.77 - lr: 0.000001 - momentum: 0.000000 2023-10-24 23:39:42,971 epoch 10 - iter 1296/1445 - loss 0.00514301 - time (sec): 95.41 - samples/sec: 1668.08 - lr: 0.000001 - momentum: 0.000000 2023-10-24 23:39:53,162 epoch 10 - iter 1440/1445 - loss 0.00522215 - time (sec): 105.60 - samples/sec: 1664.03 - lr: 0.000000 - momentum: 0.000000 2023-10-24 23:39:53,489 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:39:53,489 EPOCH 10 done: loss 0.0053 - lr: 0.000000 2023-10-24 23:39:56,913 DEV : loss 0.21832986176013947 - f1-score (micro avg) 0.8061 2023-10-24 23:39:57,402 ---------------------------------------------------------------------------------------------------- 2023-10-24 23:39:57,403 Loading model from best epoch ... 2023-10-24 23:39:59,204 SequenceTagger predicts: Dictionary with 13 tags: O, S-LOC, B-LOC, E-LOC, I-LOC, S-PER, B-PER, E-PER, I-PER, S-ORG, B-ORG, E-ORG, I-ORG 2023-10-24 23:40:02,743 Results: - F-score (micro) 0.7996 - F-score (macro) 0.694 - Accuracy 0.6756 By class: precision recall f1-score support PER 0.8510 0.7822 0.8151 482 LOC 0.9008 0.7729 0.8320 458 ORG 0.5435 0.3623 0.4348 69 micro avg 0.8571 0.7493 0.7996 1009 macro avg 0.7651 0.6391 0.6940 1009 weighted avg 0.8526 0.7493 0.7968 1009 2023-10-24 23:40:02,743 ----------------------------------------------------------------------------------------------------