|
2022-05-09 23:40:59,402 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:40:59,404 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(28996, 768, padding_idx=0) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(word_dropout): WordDropout(p=0.05) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=17, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2022-05-09 23:40:59,408 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:40:59,408 Corpus: "Corpus: 14987 train + 3466 dev + 3684 test sentences" |
|
2022-05-09 23:40:59,408 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:40:59,408 Parameters: |
|
2022-05-09 23:40:59,408 - learning_rate: "0.000050" |
|
2022-05-09 23:40:59,408 - mini_batch_size: "16" |
|
2022-05-09 23:40:59,408 - patience: "3" |
|
2022-05-09 23:40:59,409 - anneal_factor: "0.5" |
|
2022-05-09 23:40:59,409 - max_epochs: "10" |
|
2022-05-09 23:40:59,409 - shuffle: "True" |
|
2022-05-09 23:40:59,409 - train_with_dev: "False" |
|
2022-05-09 23:40:59,409 - batch_growth_annealing: "False" |
|
2022-05-09 23:40:59,409 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:40:59,409 Model training base path: "resources\taggers\ner" |
|
2022-05-09 23:40:59,409 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:40:59,409 Device: cuda:0 |
|
2022-05-09 23:40:59,410 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:40:59,410 Embeddings storage mode: none |
|
2022-05-09 23:40:59,410 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:41:15,820 epoch 1 - iter 93/937 - loss 2.04152065 - samples/sec: 90.73 - lr: 0.000005 |
|
2022-05-09 23:41:31,406 epoch 1 - iter 186/937 - loss 1.48569545 - samples/sec: 95.52 - lr: 0.000010 |
|
2022-05-09 23:41:46,603 epoch 1 - iter 279/937 - loss 1.18645416 - samples/sec: 97.92 - lr: 0.000015 |
|
2022-05-09 23:42:01,525 epoch 1 - iter 372/937 - loss 1.01481547 - samples/sec: 99.74 - lr: 0.000020 |
|
2022-05-09 23:42:16,869 epoch 1 - iter 465/937 - loss 0.86894115 - samples/sec: 97.01 - lr: 0.000025 |
|
2022-05-09 23:42:32,505 epoch 1 - iter 558/937 - loss 0.75848951 - samples/sec: 95.21 - lr: 0.000030 |
|
2022-05-09 23:42:48,889 epoch 1 - iter 651/937 - loss 0.68004440 - samples/sec: 90.87 - lr: 0.000035 |
|
2022-05-09 23:43:05,305 epoch 1 - iter 744/937 - loss 0.62468227 - samples/sec: 90.67 - lr: 0.000040 |
|
2022-05-09 23:43:22,552 epoch 1 - iter 837/937 - loss 0.57575609 - samples/sec: 86.33 - lr: 0.000045 |
|
2022-05-09 23:43:40,505 epoch 1 - iter 930/937 - loss 0.53467358 - samples/sec: 82.91 - lr: 0.000050 |
|
2022-05-09 23:43:41,669 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:43:41,670 EPOCH 1 done: loss 0.5328 - lr 0.000050 |
|
2022-05-09 23:44:01,944 Evaluating as a multi-label problem: False |
|
2022-05-09 23:44:01,998 DEV : loss 0.08702843636274338 - f1-score (micro avg) 0.9042 |
|
2022-05-09 23:44:02,088 BAD EPOCHS (no improvement): 4 |
|
2022-05-09 23:44:02,089 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:44:19,412 epoch 2 - iter 93/937 - loss 0.21171218 - samples/sec: 85.94 - lr: 0.000049 |
|
2022-05-09 23:44:39,339 epoch 2 - iter 186/937 - loss 0.20667256 - samples/sec: 74.71 - lr: 0.000049 |
|
2022-05-09 23:44:57,325 epoch 2 - iter 279/937 - loss 0.20359662 - samples/sec: 82.76 - lr: 0.000048 |
|
2022-05-09 23:45:15,903 epoch 2 - iter 372/937 - loss 0.20181902 - samples/sec: 80.11 - lr: 0.000048 |
|
2022-05-09 23:45:33,625 epoch 2 - iter 465/937 - loss 0.20239195 - samples/sec: 84.00 - lr: 0.000047 |
|
2022-05-09 23:45:51,983 epoch 2 - iter 558/937 - loss 0.20029145 - samples/sec: 81.07 - lr: 0.000047 |
|
2022-05-09 23:46:10,178 epoch 2 - iter 651/937 - loss 0.19802516 - samples/sec: 81.82 - lr: 0.000046 |
|
2022-05-09 23:46:27,567 epoch 2 - iter 744/937 - loss 0.19751023 - samples/sec: 85.60 - lr: 0.000046 |
|
2022-05-09 23:46:46,030 epoch 2 - iter 837/937 - loss 0.19578745 - samples/sec: 80.62 - lr: 0.000045 |
|
2022-05-09 23:47:03,838 epoch 2 - iter 930/937 - loss 0.19400286 - samples/sec: 83.60 - lr: 0.000044 |
|
2022-05-09 23:47:05,067 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:47:05,067 EPOCH 2 done: loss 0.1938 - lr 0.000044 |
|
2022-05-09 23:47:24,009 Evaluating as a multi-label problem: False |
|
2022-05-09 23:47:24,058 DEV : loss 0.06405811011791229 - f1-score (micro avg) 0.9361 |
|
2022-05-09 23:47:24,143 BAD EPOCHS (no improvement): 4 |
|
2022-05-09 23:47:24,144 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:47:43,087 epoch 3 - iter 93/937 - loss 0.17145472 - samples/sec: 78.59 - lr: 0.000044 |
|
2022-05-09 23:48:02,729 epoch 3 - iter 186/937 - loss 0.16975910 - samples/sec: 75.78 - lr: 0.000043 |
|
2022-05-09 23:48:22,058 epoch 3 - iter 279/937 - loss 0.16698979 - samples/sec: 77.00 - lr: 0.000043 |
|
2022-05-09 23:48:42,011 epoch 3 - iter 372/937 - loss 0.16408423 - samples/sec: 74.60 - lr: 0.000042 |
|
2022-05-09 23:49:02,832 epoch 3 - iter 465/937 - loss 0.16405058 - samples/sec: 71.49 - lr: 0.000042 |
|
2022-05-09 23:49:24,164 epoch 3 - iter 558/937 - loss 0.16308247 - samples/sec: 69.79 - lr: 0.000041 |
|
2022-05-09 23:49:44,385 epoch 3 - iter 651/937 - loss 0.16211092 - samples/sec: 73.61 - lr: 0.000041 |
|
2022-05-09 23:50:05,176 epoch 3 - iter 744/937 - loss 0.16230919 - samples/sec: 71.59 - lr: 0.000040 |
|
2022-05-09 23:50:24,259 epoch 3 - iter 837/937 - loss 0.16223568 - samples/sec: 78.01 - lr: 0.000039 |
|
2022-05-09 23:50:42,702 epoch 3 - iter 930/937 - loss 0.16166223 - samples/sec: 80.71 - lr: 0.000039 |
|
2022-05-09 23:50:43,928 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:50:43,928 EPOCH 3 done: loss 0.1620 - lr 0.000039 |
|
2022-05-09 23:51:01,357 Evaluating as a multi-label problem: False |
|
2022-05-09 23:51:01,410 DEV : loss 0.06513667851686478 - f1-score (micro avg) 0.9462 |
|
2022-05-09 23:51:01,494 BAD EPOCHS (no improvement): 4 |
|
2022-05-09 23:51:01,495 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:51:19,373 epoch 4 - iter 93/937 - loss 0.14617156 - samples/sec: 83.28 - lr: 0.000038 |
|
2022-05-09 23:51:39,862 epoch 4 - iter 186/937 - loss 0.15318927 - samples/sec: 72.64 - lr: 0.000038 |
|
2022-05-09 23:51:58,633 epoch 4 - iter 279/937 - loss 0.15311397 - samples/sec: 79.31 - lr: 0.000037 |
|
2022-05-09 23:52:17,782 epoch 4 - iter 372/937 - loss 0.15237270 - samples/sec: 77.73 - lr: 0.000037 |
|
2022-05-09 23:52:37,756 epoch 4 - iter 465/937 - loss 0.15252893 - samples/sec: 74.51 - lr: 0.000036 |
|
2022-05-09 23:52:57,040 epoch 4 - iter 558/937 - loss 0.15296964 - samples/sec: 77.19 - lr: 0.000036 |
|
2022-05-09 23:53:17,120 epoch 4 - iter 651/937 - loss 0.15177070 - samples/sec: 74.12 - lr: 0.000035 |
|
2022-05-09 23:53:36,789 epoch 4 - iter 744/937 - loss 0.15212670 - samples/sec: 75.67 - lr: 0.000034 |
|
2022-05-09 23:53:55,789 epoch 4 - iter 837/937 - loss 0.15188826 - samples/sec: 78.35 - lr: 0.000034 |
|
2022-05-09 23:54:15,078 epoch 4 - iter 930/937 - loss 0.15158585 - samples/sec: 77.16 - lr: 0.000033 |
|
2022-05-09 23:54:16,427 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:54:16,428 EPOCH 4 done: loss 0.1514 - lr 0.000033 |
|
2022-05-09 23:54:37,613 Evaluating as a multi-label problem: False |
|
2022-05-09 23:54:37,666 DEV : loss 0.0851067453622818 - f1-score (micro avg) 0.9445 |
|
2022-05-09 23:54:37,758 BAD EPOCHS (no improvement): 4 |
|
2022-05-09 23:54:37,759 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:54:57,548 epoch 5 - iter 93/937 - loss 0.13786995 - samples/sec: 75.23 - lr: 0.000033 |
|
2022-05-09 23:55:17,232 epoch 5 - iter 186/937 - loss 0.14230070 - samples/sec: 75.62 - lr: 0.000032 |
|
2022-05-09 23:55:36,628 epoch 5 - iter 279/937 - loss 0.14258916 - samples/sec: 76.74 - lr: 0.000032 |
|
2022-05-09 23:55:56,340 epoch 5 - iter 372/937 - loss 0.14284130 - samples/sec: 75.52 - lr: 0.000031 |
|
2022-05-09 23:56:15,854 epoch 5 - iter 465/937 - loss 0.14169986 - samples/sec: 76.27 - lr: 0.000031 |
|
2022-05-09 23:56:34,410 epoch 5 - iter 558/937 - loss 0.14100332 - samples/sec: 80.21 - lr: 0.000030 |
|
2022-05-09 23:56:53,730 epoch 5 - iter 651/937 - loss 0.14139534 - samples/sec: 77.04 - lr: 0.000029 |
|
2022-05-09 23:57:12,846 epoch 5 - iter 744/937 - loss 0.14072810 - samples/sec: 77.88 - lr: 0.000029 |
|
2022-05-09 23:57:32,509 epoch 5 - iter 837/937 - loss 0.13972343 - samples/sec: 75.72 - lr: 0.000028 |
|
2022-05-09 23:57:51,218 epoch 5 - iter 930/937 - loss 0.14088149 - samples/sec: 79.56 - lr: 0.000028 |
|
2022-05-09 23:57:52,684 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:57:52,685 EPOCH 5 done: loss 0.1408 - lr 0.000028 |
|
2022-05-09 23:58:11,005 Evaluating as a multi-label problem: False |
|
2022-05-09 23:58:11,060 DEV : loss 0.07939312607049942 - f1-score (micro avg) 0.9502 |
|
2022-05-09 23:58:11,147 BAD EPOCHS (no improvement): 4 |
|
2022-05-09 23:58:11,148 ---------------------------------------------------------------------------------------------------- |
|
2022-05-09 23:58:29,830 epoch 6 - iter 93/937 - loss 0.13587072 - samples/sec: 79.69 - lr: 0.000027 |
|
2022-05-09 23:58:48,422 epoch 6 - iter 186/937 - loss 0.13733201 - samples/sec: 80.06 - lr: 0.000027 |
|
2022-05-09 23:59:06,303 epoch 6 - iter 279/937 - loss 0.14061270 - samples/sec: 83.23 - lr: 0.000026 |
|
2022-05-09 23:59:24,586 epoch 6 - iter 372/937 - loss 0.13957657 - samples/sec: 81.44 - lr: 0.000026 |
|
2022-05-09 23:59:43,413 epoch 6 - iter 465/937 - loss 0.13980319 - samples/sec: 79.05 - lr: 0.000025 |
|
2022-05-10 00:00:01,871 epoch 6 - iter 558/937 - loss 0.13997926 - samples/sec: 80.63 - lr: 0.000024 |
|
2022-05-10 00:00:19,776 epoch 6 - iter 651/937 - loss 0.13934109 - samples/sec: 83.13 - lr: 0.000024 |
|
2022-05-10 00:00:38,921 epoch 6 - iter 744/937 - loss 0.13935470 - samples/sec: 77.75 - lr: 0.000023 |
|
2022-05-10 00:00:57,515 epoch 6 - iter 837/937 - loss 0.13944998 - samples/sec: 80.07 - lr: 0.000023 |
|
2022-05-10 00:01:15,467 epoch 6 - iter 930/937 - loss 0.13962343 - samples/sec: 82.92 - lr: 0.000022 |
|
2022-05-10 00:01:16,715 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:01:16,715 EPOCH 6 done: loss 0.1396 - lr 0.000022 |
|
2022-05-10 00:01:40,529 Evaluating as a multi-label problem: False |
|
2022-05-10 00:01:40,579 DEV : loss 0.08579559624195099 - f1-score (micro avg) 0.9497 |
|
2022-05-10 00:01:40,666 BAD EPOCHS (no improvement): 4 |
|
2022-05-10 00:01:40,667 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:01:59,831 epoch 7 - iter 93/937 - loss 0.13534539 - samples/sec: 77.69 - lr: 0.000022 |
|
2022-05-10 00:02:18,246 epoch 7 - iter 186/937 - loss 0.13551684 - samples/sec: 80.83 - lr: 0.000021 |
|
2022-05-10 00:02:36,156 epoch 7 - iter 279/937 - loss 0.13584534 - samples/sec: 83.13 - lr: 0.000021 |
|
2022-05-10 00:02:55,093 epoch 7 - iter 372/937 - loss 0.13345388 - samples/sec: 78.60 - lr: 0.000020 |
|
2022-05-10 00:03:13,968 epoch 7 - iter 465/937 - loss 0.13357006 - samples/sec: 78.85 - lr: 0.000019 |
|
2022-05-10 00:03:33,833 epoch 7 - iter 558/937 - loss 0.13346607 - samples/sec: 74.94 - lr: 0.000019 |
|
2022-05-10 00:03:52,609 epoch 7 - iter 651/937 - loss 0.13318798 - samples/sec: 79.29 - lr: 0.000018 |
|
2022-05-10 00:04:11,143 epoch 7 - iter 744/937 - loss 0.13297235 - samples/sec: 80.32 - lr: 0.000018 |
|
2022-05-10 00:04:29,324 epoch 7 - iter 837/937 - loss 0.13294986 - samples/sec: 81.87 - lr: 0.000017 |
|
2022-05-10 00:04:48,227 epoch 7 - iter 930/937 - loss 0.13304211 - samples/sec: 78.74 - lr: 0.000017 |
|
2022-05-10 00:04:49,540 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:04:49,540 EPOCH 7 done: loss 0.1331 - lr 0.000017 |
|
2022-05-10 00:05:07,897 Evaluating as a multi-label problem: False |
|
2022-05-10 00:05:07,956 DEV : loss 0.09259101003408432 - f1-score (micro avg) 0.9515 |
|
2022-05-10 00:05:08,048 BAD EPOCHS (no improvement): 4 |
|
2022-05-10 00:05:08,049 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:05:26,187 epoch 8 - iter 93/937 - loss 0.13287977 - samples/sec: 82.08 - lr: 0.000016 |
|
2022-05-10 00:05:46,292 epoch 8 - iter 186/937 - loss 0.13409706 - samples/sec: 74.04 - lr: 0.000016 |
|
2022-05-10 00:06:04,623 epoch 8 - iter 279/937 - loss 0.13270913 - samples/sec: 81.19 - lr: 0.000015 |
|
2022-05-10 00:06:23,601 epoch 8 - iter 372/937 - loss 0.13243728 - samples/sec: 78.43 - lr: 0.000014 |
|
2022-05-10 00:06:42,643 epoch 8 - iter 465/937 - loss 0.13287784 - samples/sec: 78.17 - lr: 0.000014 |
|
2022-05-10 00:07:02,185 epoch 8 - iter 558/937 - loss 0.13373988 - samples/sec: 76.17 - lr: 0.000013 |
|
2022-05-10 00:07:20,122 epoch 8 - iter 651/937 - loss 0.13402409 - samples/sec: 82.98 - lr: 0.000013 |
|
2022-05-10 00:07:39,327 epoch 8 - iter 744/937 - loss 0.13327101 - samples/sec: 77.50 - lr: 0.000012 |
|
2022-05-10 00:07:57,782 epoch 8 - iter 837/937 - loss 0.13355020 - samples/sec: 80.65 - lr: 0.000012 |
|
2022-05-10 00:08:16,804 epoch 8 - iter 930/937 - loss 0.13294805 - samples/sec: 78.25 - lr: 0.000011 |
|
2022-05-10 00:08:18,099 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:08:18,099 EPOCH 8 done: loss 0.1327 - lr 0.000011 |
|
2022-05-10 00:08:36,160 Evaluating as a multi-label problem: False |
|
2022-05-10 00:08:36,214 DEV : loss 0.09469996392726898 - f1-score (micro avg) 0.9505 |
|
2022-05-10 00:08:36,300 BAD EPOCHS (no improvement): 4 |
|
2022-05-10 00:08:36,301 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:08:54,628 epoch 9 - iter 93/937 - loss 0.13256573 - samples/sec: 81.23 - lr: 0.000011 |
|
2022-05-10 00:09:13,253 epoch 9 - iter 186/937 - loss 0.13218317 - samples/sec: 79.94 - lr: 0.000010 |
|
2022-05-10 00:09:31,322 epoch 9 - iter 279/937 - loss 0.13240640 - samples/sec: 82.40 - lr: 0.000009 |
|
2022-05-10 00:09:49,199 epoch 9 - iter 372/937 - loss 0.13118429 - samples/sec: 83.28 - lr: 0.000009 |
|
2022-05-10 00:10:06,958 epoch 9 - iter 465/937 - loss 0.13128632 - samples/sec: 83.83 - lr: 0.000008 |
|
2022-05-10 00:10:25,134 epoch 9 - iter 558/937 - loss 0.12936261 - samples/sec: 81.90 - lr: 0.000008 |
|
2022-05-10 00:10:43,680 epoch 9 - iter 651/937 - loss 0.12973987 - samples/sec: 80.27 - lr: 0.000007 |
|
2022-05-10 00:11:01,678 epoch 9 - iter 744/937 - loss 0.12968500 - samples/sec: 82.71 - lr: 0.000007 |
|
2022-05-10 00:11:19,484 epoch 9 - iter 837/937 - loss 0.12985020 - samples/sec: 83.59 - lr: 0.000006 |
|
2022-05-10 00:11:37,340 epoch 9 - iter 930/937 - loss 0.12947938 - samples/sec: 83.36 - lr: 0.000006 |
|
2022-05-10 00:11:38,689 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:11:38,689 EPOCH 9 done: loss 0.1294 - lr 0.000006 |
|
2022-05-10 00:11:56,867 Evaluating as a multi-label problem: False |
|
2022-05-10 00:11:56,918 DEV : loss 0.09501232951879501 - f1-score (micro avg) 0.9504 |
|
2022-05-10 00:11:57,003 BAD EPOCHS (no improvement): 4 |
|
2022-05-10 00:11:57,004 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:12:15,701 epoch 10 - iter 93/937 - loss 0.12882436 - samples/sec: 79.62 - lr: 0.000005 |
|
2022-05-10 00:12:34,784 epoch 10 - iter 186/937 - loss 0.12932802 - samples/sec: 78.02 - lr: 0.000004 |
|
2022-05-10 00:12:53,563 epoch 10 - iter 279/937 - loss 0.12935565 - samples/sec: 79.27 - lr: 0.000004 |
|
2022-05-10 00:13:12,428 epoch 10 - iter 372/937 - loss 0.13016513 - samples/sec: 78.91 - lr: 0.000003 |
|
2022-05-10 00:13:31,484 epoch 10 - iter 465/937 - loss 0.13001423 - samples/sec: 78.12 - lr: 0.000003 |
|
2022-05-10 00:13:50,860 epoch 10 - iter 558/937 - loss 0.12967414 - samples/sec: 76.82 - lr: 0.000002 |
|
2022-05-10 00:14:10,036 epoch 10 - iter 651/937 - loss 0.13044245 - samples/sec: 77.61 - lr: 0.000002 |
|
2022-05-10 00:14:29,046 epoch 10 - iter 744/937 - loss 0.13049319 - samples/sec: 78.30 - lr: 0.000001 |
|
2022-05-10 00:14:47,934 epoch 10 - iter 837/937 - loss 0.12970693 - samples/sec: 78.83 - lr: 0.000001 |
|
2022-05-10 00:15:06,881 epoch 10 - iter 930/937 - loss 0.12987301 - samples/sec: 78.57 - lr: 0.000000 |
|
2022-05-10 00:15:08,384 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:15:08,384 EPOCH 10 done: loss 0.1298 - lr 0.000000 |
|
2022-05-10 00:15:27,169 Evaluating as a multi-label problem: False |
|
2022-05-10 00:15:27,221 DEV : loss 0.09416753053665161 - f1-score (micro avg) 0.9513 |
|
2022-05-10 00:15:27,303 BAD EPOCHS (no improvement): 4 |
|
2022-05-10 00:15:28,112 ---------------------------------------------------------------------------------------------------- |
|
2022-05-10 00:15:28,113 Testing using last state of model ... |
|
2022-05-10 00:15:47,035 Evaluating as a multi-label problem: False |
|
2022-05-10 00:15:47,087 0.9117 0.9212 0.9164 0.879 |
|
2022-05-10 00:15:47,087 |
|
Results: |
|
- F-score (micro) 0.9164 |
|
- F-score (macro) 0.9024 |
|
- Accuracy 0.879 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
ORG 0.8893 0.9097 0.8994 1661 |
|
LOC 0.9301 0.9335 0.9318 1668 |
|
PER 0.9699 0.9579 0.9639 1617 |
|
MISC 0.7951 0.8348 0.8145 702 |
|
|
|
micro avg 0.9117 0.9212 0.9164 5648 |
|
macro avg 0.8961 0.9090 0.9024 5648 |
|
weighted avg 0.9127 0.9212 0.9169 5648 |
|
|
|
2022-05-10 00:15:47,088 ---------------------------------------------------------------------------------------------------- |
|
|