2022-05-09 23:40:59,402 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:40:59,404 Model: "SequenceTagger( (embeddings): TransformerWordEmbeddings( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(28996, 768, padding_idx=0) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (word_dropout): WordDropout(p=0.05) (locked_dropout): LockedDropout(p=0.5) (linear): Linear(in_features=768, out_features=17, bias=True) (loss_function): CrossEntropyLoss() )" 2022-05-09 23:40:59,408 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:40:59,408 Corpus: "Corpus: 14987 train + 3466 dev + 3684 test sentences" 2022-05-09 23:40:59,408 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:40:59,408 Parameters: 2022-05-09 23:40:59,408 - learning_rate: "0.000050" 2022-05-09 23:40:59,408 - mini_batch_size: "16" 2022-05-09 23:40:59,408 - patience: "3" 2022-05-09 23:40:59,409 - anneal_factor: "0.5" 2022-05-09 23:40:59,409 - max_epochs: "10" 2022-05-09 23:40:59,409 - shuffle: "True" 2022-05-09 23:40:59,409 - train_with_dev: "False" 2022-05-09 23:40:59,409 - batch_growth_annealing: "False" 2022-05-09 23:40:59,409 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:40:59,409 Model training base path: "resources\taggers\ner" 2022-05-09 23:40:59,409 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:40:59,409 Device: cuda:0 2022-05-09 23:40:59,410 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:40:59,410 Embeddings storage mode: none 2022-05-09 23:40:59,410 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:41:15,820 epoch 1 - iter 93/937 - loss 2.04152065 - samples/sec: 90.73 - lr: 0.000005 2022-05-09 23:41:31,406 epoch 1 - iter 186/937 - loss 1.48569545 - samples/sec: 95.52 - lr: 0.000010 2022-05-09 23:41:46,603 epoch 1 - iter 279/937 - loss 1.18645416 - samples/sec: 97.92 - lr: 0.000015 2022-05-09 23:42:01,525 epoch 1 - iter 372/937 - loss 1.01481547 - samples/sec: 99.74 - lr: 0.000020 2022-05-09 23:42:16,869 epoch 1 - iter 465/937 - loss 0.86894115 - samples/sec: 97.01 - lr: 0.000025 2022-05-09 23:42:32,505 epoch 1 - iter 558/937 - loss 0.75848951 - samples/sec: 95.21 - lr: 0.000030 2022-05-09 23:42:48,889 epoch 1 - iter 651/937 - loss 0.68004440 - samples/sec: 90.87 - lr: 0.000035 2022-05-09 23:43:05,305 epoch 1 - iter 744/937 - loss 0.62468227 - samples/sec: 90.67 - lr: 0.000040 2022-05-09 23:43:22,552 epoch 1 - iter 837/937 - loss 0.57575609 - samples/sec: 86.33 - lr: 0.000045 2022-05-09 23:43:40,505 epoch 1 - iter 930/937 - loss 0.53467358 - samples/sec: 82.91 - lr: 0.000050 2022-05-09 23:43:41,669 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:43:41,670 EPOCH 1 done: loss 0.5328 - lr 0.000050 2022-05-09 23:44:01,944 Evaluating as a multi-label problem: False 2022-05-09 23:44:01,998 DEV : loss 0.08702843636274338 - f1-score (micro avg) 0.9042 2022-05-09 23:44:02,088 BAD EPOCHS (no improvement): 4 2022-05-09 23:44:02,089 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:44:19,412 epoch 2 - iter 93/937 - loss 0.21171218 - samples/sec: 85.94 - lr: 0.000049 2022-05-09 23:44:39,339 epoch 2 - iter 186/937 - loss 0.20667256 - samples/sec: 74.71 - lr: 0.000049 2022-05-09 23:44:57,325 epoch 2 - iter 279/937 - loss 0.20359662 - samples/sec: 82.76 - lr: 0.000048 2022-05-09 23:45:15,903 epoch 2 - iter 372/937 - loss 0.20181902 - samples/sec: 80.11 - lr: 0.000048 2022-05-09 23:45:33,625 epoch 2 - iter 465/937 - loss 0.20239195 - samples/sec: 84.00 - lr: 0.000047 2022-05-09 23:45:51,983 epoch 2 - iter 558/937 - loss 0.20029145 - samples/sec: 81.07 - lr: 0.000047 2022-05-09 23:46:10,178 epoch 2 - iter 651/937 - loss 0.19802516 - samples/sec: 81.82 - lr: 0.000046 2022-05-09 23:46:27,567 epoch 2 - iter 744/937 - loss 0.19751023 - samples/sec: 85.60 - lr: 0.000046 2022-05-09 23:46:46,030 epoch 2 - iter 837/937 - loss 0.19578745 - samples/sec: 80.62 - lr: 0.000045 2022-05-09 23:47:03,838 epoch 2 - iter 930/937 - loss 0.19400286 - samples/sec: 83.60 - lr: 0.000044 2022-05-09 23:47:05,067 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:47:05,067 EPOCH 2 done: loss 0.1938 - lr 0.000044 2022-05-09 23:47:24,009 Evaluating as a multi-label problem: False 2022-05-09 23:47:24,058 DEV : loss 0.06405811011791229 - f1-score (micro avg) 0.9361 2022-05-09 23:47:24,143 BAD EPOCHS (no improvement): 4 2022-05-09 23:47:24,144 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:47:43,087 epoch 3 - iter 93/937 - loss 0.17145472 - samples/sec: 78.59 - lr: 0.000044 2022-05-09 23:48:02,729 epoch 3 - iter 186/937 - loss 0.16975910 - samples/sec: 75.78 - lr: 0.000043 2022-05-09 23:48:22,058 epoch 3 - iter 279/937 - loss 0.16698979 - samples/sec: 77.00 - lr: 0.000043 2022-05-09 23:48:42,011 epoch 3 - iter 372/937 - loss 0.16408423 - samples/sec: 74.60 - lr: 0.000042 2022-05-09 23:49:02,832 epoch 3 - iter 465/937 - loss 0.16405058 - samples/sec: 71.49 - lr: 0.000042 2022-05-09 23:49:24,164 epoch 3 - iter 558/937 - loss 0.16308247 - samples/sec: 69.79 - lr: 0.000041 2022-05-09 23:49:44,385 epoch 3 - iter 651/937 - loss 0.16211092 - samples/sec: 73.61 - lr: 0.000041 2022-05-09 23:50:05,176 epoch 3 - iter 744/937 - loss 0.16230919 - samples/sec: 71.59 - lr: 0.000040 2022-05-09 23:50:24,259 epoch 3 - iter 837/937 - loss 0.16223568 - samples/sec: 78.01 - lr: 0.000039 2022-05-09 23:50:42,702 epoch 3 - iter 930/937 - loss 0.16166223 - samples/sec: 80.71 - lr: 0.000039 2022-05-09 23:50:43,928 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:50:43,928 EPOCH 3 done: loss 0.1620 - lr 0.000039 2022-05-09 23:51:01,357 Evaluating as a multi-label problem: False 2022-05-09 23:51:01,410 DEV : loss 0.06513667851686478 - f1-score (micro avg) 0.9462 2022-05-09 23:51:01,494 BAD EPOCHS (no improvement): 4 2022-05-09 23:51:01,495 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:51:19,373 epoch 4 - iter 93/937 - loss 0.14617156 - samples/sec: 83.28 - lr: 0.000038 2022-05-09 23:51:39,862 epoch 4 - iter 186/937 - loss 0.15318927 - samples/sec: 72.64 - lr: 0.000038 2022-05-09 23:51:58,633 epoch 4 - iter 279/937 - loss 0.15311397 - samples/sec: 79.31 - lr: 0.000037 2022-05-09 23:52:17,782 epoch 4 - iter 372/937 - loss 0.15237270 - samples/sec: 77.73 - lr: 0.000037 2022-05-09 23:52:37,756 epoch 4 - iter 465/937 - loss 0.15252893 - samples/sec: 74.51 - lr: 0.000036 2022-05-09 23:52:57,040 epoch 4 - iter 558/937 - loss 0.15296964 - samples/sec: 77.19 - lr: 0.000036 2022-05-09 23:53:17,120 epoch 4 - iter 651/937 - loss 0.15177070 - samples/sec: 74.12 - lr: 0.000035 2022-05-09 23:53:36,789 epoch 4 - iter 744/937 - loss 0.15212670 - samples/sec: 75.67 - lr: 0.000034 2022-05-09 23:53:55,789 epoch 4 - iter 837/937 - loss 0.15188826 - samples/sec: 78.35 - lr: 0.000034 2022-05-09 23:54:15,078 epoch 4 - iter 930/937 - loss 0.15158585 - samples/sec: 77.16 - lr: 0.000033 2022-05-09 23:54:16,427 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:54:16,428 EPOCH 4 done: loss 0.1514 - lr 0.000033 2022-05-09 23:54:37,613 Evaluating as a multi-label problem: False 2022-05-09 23:54:37,666 DEV : loss 0.0851067453622818 - f1-score (micro avg) 0.9445 2022-05-09 23:54:37,758 BAD EPOCHS (no improvement): 4 2022-05-09 23:54:37,759 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:54:57,548 epoch 5 - iter 93/937 - loss 0.13786995 - samples/sec: 75.23 - lr: 0.000033 2022-05-09 23:55:17,232 epoch 5 - iter 186/937 - loss 0.14230070 - samples/sec: 75.62 - lr: 0.000032 2022-05-09 23:55:36,628 epoch 5 - iter 279/937 - loss 0.14258916 - samples/sec: 76.74 - lr: 0.000032 2022-05-09 23:55:56,340 epoch 5 - iter 372/937 - loss 0.14284130 - samples/sec: 75.52 - lr: 0.000031 2022-05-09 23:56:15,854 epoch 5 - iter 465/937 - loss 0.14169986 - samples/sec: 76.27 - lr: 0.000031 2022-05-09 23:56:34,410 epoch 5 - iter 558/937 - loss 0.14100332 - samples/sec: 80.21 - lr: 0.000030 2022-05-09 23:56:53,730 epoch 5 - iter 651/937 - loss 0.14139534 - samples/sec: 77.04 - lr: 0.000029 2022-05-09 23:57:12,846 epoch 5 - iter 744/937 - loss 0.14072810 - samples/sec: 77.88 - lr: 0.000029 2022-05-09 23:57:32,509 epoch 5 - iter 837/937 - loss 0.13972343 - samples/sec: 75.72 - lr: 0.000028 2022-05-09 23:57:51,218 epoch 5 - iter 930/937 - loss 0.14088149 - samples/sec: 79.56 - lr: 0.000028 2022-05-09 23:57:52,684 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:57:52,685 EPOCH 5 done: loss 0.1408 - lr 0.000028 2022-05-09 23:58:11,005 Evaluating as a multi-label problem: False 2022-05-09 23:58:11,060 DEV : loss 0.07939312607049942 - f1-score (micro avg) 0.9502 2022-05-09 23:58:11,147 BAD EPOCHS (no improvement): 4 2022-05-09 23:58:11,148 ---------------------------------------------------------------------------------------------------- 2022-05-09 23:58:29,830 epoch 6 - iter 93/937 - loss 0.13587072 - samples/sec: 79.69 - lr: 0.000027 2022-05-09 23:58:48,422 epoch 6 - iter 186/937 - loss 0.13733201 - samples/sec: 80.06 - lr: 0.000027 2022-05-09 23:59:06,303 epoch 6 - iter 279/937 - loss 0.14061270 - samples/sec: 83.23 - lr: 0.000026 2022-05-09 23:59:24,586 epoch 6 - iter 372/937 - loss 0.13957657 - samples/sec: 81.44 - lr: 0.000026 2022-05-09 23:59:43,413 epoch 6 - iter 465/937 - loss 0.13980319 - samples/sec: 79.05 - lr: 0.000025 2022-05-10 00:00:01,871 epoch 6 - iter 558/937 - loss 0.13997926 - samples/sec: 80.63 - lr: 0.000024 2022-05-10 00:00:19,776 epoch 6 - iter 651/937 - loss 0.13934109 - samples/sec: 83.13 - lr: 0.000024 2022-05-10 00:00:38,921 epoch 6 - iter 744/937 - loss 0.13935470 - samples/sec: 77.75 - lr: 0.000023 2022-05-10 00:00:57,515 epoch 6 - iter 837/937 - loss 0.13944998 - samples/sec: 80.07 - lr: 0.000023 2022-05-10 00:01:15,467 epoch 6 - iter 930/937 - loss 0.13962343 - samples/sec: 82.92 - lr: 0.000022 2022-05-10 00:01:16,715 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:01:16,715 EPOCH 6 done: loss 0.1396 - lr 0.000022 2022-05-10 00:01:40,529 Evaluating as a multi-label problem: False 2022-05-10 00:01:40,579 DEV : loss 0.08579559624195099 - f1-score (micro avg) 0.9497 2022-05-10 00:01:40,666 BAD EPOCHS (no improvement): 4 2022-05-10 00:01:40,667 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:01:59,831 epoch 7 - iter 93/937 - loss 0.13534539 - samples/sec: 77.69 - lr: 0.000022 2022-05-10 00:02:18,246 epoch 7 - iter 186/937 - loss 0.13551684 - samples/sec: 80.83 - lr: 0.000021 2022-05-10 00:02:36,156 epoch 7 - iter 279/937 - loss 0.13584534 - samples/sec: 83.13 - lr: 0.000021 2022-05-10 00:02:55,093 epoch 7 - iter 372/937 - loss 0.13345388 - samples/sec: 78.60 - lr: 0.000020 2022-05-10 00:03:13,968 epoch 7 - iter 465/937 - loss 0.13357006 - samples/sec: 78.85 - lr: 0.000019 2022-05-10 00:03:33,833 epoch 7 - iter 558/937 - loss 0.13346607 - samples/sec: 74.94 - lr: 0.000019 2022-05-10 00:03:52,609 epoch 7 - iter 651/937 - loss 0.13318798 - samples/sec: 79.29 - lr: 0.000018 2022-05-10 00:04:11,143 epoch 7 - iter 744/937 - loss 0.13297235 - samples/sec: 80.32 - lr: 0.000018 2022-05-10 00:04:29,324 epoch 7 - iter 837/937 - loss 0.13294986 - samples/sec: 81.87 - lr: 0.000017 2022-05-10 00:04:48,227 epoch 7 - iter 930/937 - loss 0.13304211 - samples/sec: 78.74 - lr: 0.000017 2022-05-10 00:04:49,540 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:04:49,540 EPOCH 7 done: loss 0.1331 - lr 0.000017 2022-05-10 00:05:07,897 Evaluating as a multi-label problem: False 2022-05-10 00:05:07,956 DEV : loss 0.09259101003408432 - f1-score (micro avg) 0.9515 2022-05-10 00:05:08,048 BAD EPOCHS (no improvement): 4 2022-05-10 00:05:08,049 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:05:26,187 epoch 8 - iter 93/937 - loss 0.13287977 - samples/sec: 82.08 - lr: 0.000016 2022-05-10 00:05:46,292 epoch 8 - iter 186/937 - loss 0.13409706 - samples/sec: 74.04 - lr: 0.000016 2022-05-10 00:06:04,623 epoch 8 - iter 279/937 - loss 0.13270913 - samples/sec: 81.19 - lr: 0.000015 2022-05-10 00:06:23,601 epoch 8 - iter 372/937 - loss 0.13243728 - samples/sec: 78.43 - lr: 0.000014 2022-05-10 00:06:42,643 epoch 8 - iter 465/937 - loss 0.13287784 - samples/sec: 78.17 - lr: 0.000014 2022-05-10 00:07:02,185 epoch 8 - iter 558/937 - loss 0.13373988 - samples/sec: 76.17 - lr: 0.000013 2022-05-10 00:07:20,122 epoch 8 - iter 651/937 - loss 0.13402409 - samples/sec: 82.98 - lr: 0.000013 2022-05-10 00:07:39,327 epoch 8 - iter 744/937 - loss 0.13327101 - samples/sec: 77.50 - lr: 0.000012 2022-05-10 00:07:57,782 epoch 8 - iter 837/937 - loss 0.13355020 - samples/sec: 80.65 - lr: 0.000012 2022-05-10 00:08:16,804 epoch 8 - iter 930/937 - loss 0.13294805 - samples/sec: 78.25 - lr: 0.000011 2022-05-10 00:08:18,099 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:08:18,099 EPOCH 8 done: loss 0.1327 - lr 0.000011 2022-05-10 00:08:36,160 Evaluating as a multi-label problem: False 2022-05-10 00:08:36,214 DEV : loss 0.09469996392726898 - f1-score (micro avg) 0.9505 2022-05-10 00:08:36,300 BAD EPOCHS (no improvement): 4 2022-05-10 00:08:36,301 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:08:54,628 epoch 9 - iter 93/937 - loss 0.13256573 - samples/sec: 81.23 - lr: 0.000011 2022-05-10 00:09:13,253 epoch 9 - iter 186/937 - loss 0.13218317 - samples/sec: 79.94 - lr: 0.000010 2022-05-10 00:09:31,322 epoch 9 - iter 279/937 - loss 0.13240640 - samples/sec: 82.40 - lr: 0.000009 2022-05-10 00:09:49,199 epoch 9 - iter 372/937 - loss 0.13118429 - samples/sec: 83.28 - lr: 0.000009 2022-05-10 00:10:06,958 epoch 9 - iter 465/937 - loss 0.13128632 - samples/sec: 83.83 - lr: 0.000008 2022-05-10 00:10:25,134 epoch 9 - iter 558/937 - loss 0.12936261 - samples/sec: 81.90 - lr: 0.000008 2022-05-10 00:10:43,680 epoch 9 - iter 651/937 - loss 0.12973987 - samples/sec: 80.27 - lr: 0.000007 2022-05-10 00:11:01,678 epoch 9 - iter 744/937 - loss 0.12968500 - samples/sec: 82.71 - lr: 0.000007 2022-05-10 00:11:19,484 epoch 9 - iter 837/937 - loss 0.12985020 - samples/sec: 83.59 - lr: 0.000006 2022-05-10 00:11:37,340 epoch 9 - iter 930/937 - loss 0.12947938 - samples/sec: 83.36 - lr: 0.000006 2022-05-10 00:11:38,689 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:11:38,689 EPOCH 9 done: loss 0.1294 - lr 0.000006 2022-05-10 00:11:56,867 Evaluating as a multi-label problem: False 2022-05-10 00:11:56,918 DEV : loss 0.09501232951879501 - f1-score (micro avg) 0.9504 2022-05-10 00:11:57,003 BAD EPOCHS (no improvement): 4 2022-05-10 00:11:57,004 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:12:15,701 epoch 10 - iter 93/937 - loss 0.12882436 - samples/sec: 79.62 - lr: 0.000005 2022-05-10 00:12:34,784 epoch 10 - iter 186/937 - loss 0.12932802 - samples/sec: 78.02 - lr: 0.000004 2022-05-10 00:12:53,563 epoch 10 - iter 279/937 - loss 0.12935565 - samples/sec: 79.27 - lr: 0.000004 2022-05-10 00:13:12,428 epoch 10 - iter 372/937 - loss 0.13016513 - samples/sec: 78.91 - lr: 0.000003 2022-05-10 00:13:31,484 epoch 10 - iter 465/937 - loss 0.13001423 - samples/sec: 78.12 - lr: 0.000003 2022-05-10 00:13:50,860 epoch 10 - iter 558/937 - loss 0.12967414 - samples/sec: 76.82 - lr: 0.000002 2022-05-10 00:14:10,036 epoch 10 - iter 651/937 - loss 0.13044245 - samples/sec: 77.61 - lr: 0.000002 2022-05-10 00:14:29,046 epoch 10 - iter 744/937 - loss 0.13049319 - samples/sec: 78.30 - lr: 0.000001 2022-05-10 00:14:47,934 epoch 10 - iter 837/937 - loss 0.12970693 - samples/sec: 78.83 - lr: 0.000001 2022-05-10 00:15:06,881 epoch 10 - iter 930/937 - loss 0.12987301 - samples/sec: 78.57 - lr: 0.000000 2022-05-10 00:15:08,384 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:15:08,384 EPOCH 10 done: loss 0.1298 - lr 0.000000 2022-05-10 00:15:27,169 Evaluating as a multi-label problem: False 2022-05-10 00:15:27,221 DEV : loss 0.09416753053665161 - f1-score (micro avg) 0.9513 2022-05-10 00:15:27,303 BAD EPOCHS (no improvement): 4 2022-05-10 00:15:28,112 ---------------------------------------------------------------------------------------------------- 2022-05-10 00:15:28,113 Testing using last state of model ... 2022-05-10 00:15:47,035 Evaluating as a multi-label problem: False 2022-05-10 00:15:47,087 0.9117 0.9212 0.9164 0.879 2022-05-10 00:15:47,087 Results: - F-score (micro) 0.9164 - F-score (macro) 0.9024 - Accuracy 0.879 By class: precision recall f1-score support ORG 0.8893 0.9097 0.8994 1661 LOC 0.9301 0.9335 0.9318 1668 PER 0.9699 0.9579 0.9639 1617 MISC 0.7951 0.8348 0.8145 702 micro avg 0.9117 0.9212 0.9164 5648 macro avg 0.8961 0.9090 0.9024 5648 weighted avg 0.9127 0.9212 0.9169 5648 2022-05-10 00:15:47,088 ----------------------------------------------------------------------------------------------------