|
2023-10-25 15:38:47,008 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 MultiCorpus: 14465 train + 1392 dev + 2432 test sentences |
|
- NER_HIPE_2022 Corpus: 14465 train + 1392 dev + 2432 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/letemps/fr/with_doc_seperator |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Train: 14465 sentences |
|
2023-10-25 15:38:47,009 (train_with_dev=False, train_with_test=False) |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Training Params: |
|
2023-10-25 15:38:47,009 - learning_rate: "3e-05" |
|
2023-10-25 15:38:47,009 - mini_batch_size: "4" |
|
2023-10-25 15:38:47,009 - max_epochs: "10" |
|
2023-10-25 15:38:47,009 - shuffle: "True" |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Plugins: |
|
2023-10-25 15:38:47,009 - TensorboardLogger |
|
2023-10-25 15:38:47,009 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-25 15:38:47,009 - metric: "('micro avg', 'f1-score')" |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Computation: |
|
2023-10-25 15:38:47,009 - compute on device: cuda:0 |
|
2023-10-25 15:38:47,009 - embedding storage: none |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Model training base path: "hmbench-letemps/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-4" |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:38:47,009 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-25 15:39:09,553 epoch 1 - iter 361/3617 - loss 1.19886281 - time (sec): 22.54 - samples/sec: 1661.18 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 15:39:32,433 epoch 1 - iter 722/3617 - loss 0.68831441 - time (sec): 45.42 - samples/sec: 1682.42 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 15:39:54,936 epoch 1 - iter 1083/3617 - loss 0.51170947 - time (sec): 67.93 - samples/sec: 1667.26 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 15:40:17,679 epoch 1 - iter 1444/3617 - loss 0.41162390 - time (sec): 90.67 - samples/sec: 1675.46 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 15:40:40,401 epoch 1 - iter 1805/3617 - loss 0.35326768 - time (sec): 113.39 - samples/sec: 1674.11 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 15:41:03,110 epoch 1 - iter 2166/3617 - loss 0.31329417 - time (sec): 136.10 - samples/sec: 1682.14 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 15:41:25,626 epoch 1 - iter 2527/3617 - loss 0.28535492 - time (sec): 158.62 - samples/sec: 1680.30 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 15:41:48,351 epoch 1 - iter 2888/3617 - loss 0.26499215 - time (sec): 181.34 - samples/sec: 1682.04 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 15:42:10,950 epoch 1 - iter 3249/3617 - loss 0.24775587 - time (sec): 203.94 - samples/sec: 1678.70 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 15:42:33,298 epoch 1 - iter 3610/3617 - loss 0.23447570 - time (sec): 226.29 - samples/sec: 1675.45 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 15:42:33,748 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:42:33,748 EPOCH 1 done: loss 0.2341 - lr: 0.000030 |
|
2023-10-25 15:42:38,729 DEV : loss 0.12141559273004532 - f1-score (micro avg) 0.6425 |
|
2023-10-25 15:42:38,752 saving best model |
|
2023-10-25 15:42:39,301 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:43:02,256 epoch 2 - iter 361/3617 - loss 0.10229911 - time (sec): 22.95 - samples/sec: 1700.22 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 15:43:24,847 epoch 2 - iter 722/3617 - loss 0.10119859 - time (sec): 45.55 - samples/sec: 1678.69 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 15:43:47,602 epoch 2 - iter 1083/3617 - loss 0.10115542 - time (sec): 68.30 - samples/sec: 1671.77 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 15:44:10,203 epoch 2 - iter 1444/3617 - loss 0.10048881 - time (sec): 90.90 - samples/sec: 1676.55 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 15:44:32,710 epoch 2 - iter 1805/3617 - loss 0.09969364 - time (sec): 113.41 - samples/sec: 1664.78 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 15:44:55,757 epoch 2 - iter 2166/3617 - loss 0.09899570 - time (sec): 136.45 - samples/sec: 1680.19 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 15:45:18,336 epoch 2 - iter 2527/3617 - loss 0.09812338 - time (sec): 159.03 - samples/sec: 1675.61 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 15:45:40,866 epoch 2 - iter 2888/3617 - loss 0.09881509 - time (sec): 181.56 - samples/sec: 1674.82 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 15:46:03,446 epoch 2 - iter 3249/3617 - loss 0.09818580 - time (sec): 204.14 - samples/sec: 1678.00 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 15:46:26,000 epoch 2 - iter 3610/3617 - loss 0.09912199 - time (sec): 226.70 - samples/sec: 1673.03 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 15:46:26,427 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:46:26,427 EPOCH 2 done: loss 0.0991 - lr: 0.000027 |
|
2023-10-25 15:46:31,155 DEV : loss 0.10703670233488083 - f1-score (micro avg) 0.5748 |
|
2023-10-25 15:46:31,178 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:46:54,123 epoch 3 - iter 361/3617 - loss 0.06451220 - time (sec): 22.94 - samples/sec: 1635.94 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 15:47:17,018 epoch 3 - iter 722/3617 - loss 0.07071297 - time (sec): 45.84 - samples/sec: 1661.24 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 15:47:39,779 epoch 3 - iter 1083/3617 - loss 0.07369259 - time (sec): 68.60 - samples/sec: 1671.64 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 15:48:02,401 epoch 3 - iter 1444/3617 - loss 0.07364717 - time (sec): 91.22 - samples/sec: 1660.50 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 15:48:25,138 epoch 3 - iter 1805/3617 - loss 0.07368861 - time (sec): 113.96 - samples/sec: 1661.84 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 15:48:47,723 epoch 3 - iter 2166/3617 - loss 0.07186830 - time (sec): 136.54 - samples/sec: 1669.15 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 15:49:10,464 epoch 3 - iter 2527/3617 - loss 0.07251865 - time (sec): 159.28 - samples/sec: 1672.41 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 15:49:32,837 epoch 3 - iter 2888/3617 - loss 0.07304301 - time (sec): 181.66 - samples/sec: 1667.81 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 15:49:55,910 epoch 3 - iter 3249/3617 - loss 0.07314916 - time (sec): 204.73 - samples/sec: 1669.74 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 15:50:18,464 epoch 3 - iter 3610/3617 - loss 0.07317717 - time (sec): 227.28 - samples/sec: 1667.96 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 15:50:18,929 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:50:18,929 EPOCH 3 done: loss 0.0731 - lr: 0.000023 |
|
2023-10-25 15:50:23,703 DEV : loss 0.22103023529052734 - f1-score (micro avg) 0.6461 |
|
2023-10-25 15:50:23,726 saving best model |
|
2023-10-25 15:50:24,448 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:50:47,337 epoch 4 - iter 361/3617 - loss 0.04349179 - time (sec): 22.89 - samples/sec: 1687.96 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 15:51:09,914 epoch 4 - iter 722/3617 - loss 0.04765068 - time (sec): 45.47 - samples/sec: 1697.83 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 15:51:32,736 epoch 4 - iter 1083/3617 - loss 0.04598577 - time (sec): 68.29 - samples/sec: 1696.69 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 15:51:55,386 epoch 4 - iter 1444/3617 - loss 0.04854533 - time (sec): 90.94 - samples/sec: 1670.10 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 15:52:17,964 epoch 4 - iter 1805/3617 - loss 0.05057207 - time (sec): 113.51 - samples/sec: 1665.68 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 15:52:40,951 epoch 4 - iter 2166/3617 - loss 0.05016728 - time (sec): 136.50 - samples/sec: 1678.27 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 15:53:03,727 epoch 4 - iter 2527/3617 - loss 0.05059513 - time (sec): 159.28 - samples/sec: 1678.58 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 15:53:26,318 epoch 4 - iter 2888/3617 - loss 0.05293486 - time (sec): 181.87 - samples/sec: 1675.87 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 15:53:49,399 epoch 4 - iter 3249/3617 - loss 0.05285239 - time (sec): 204.95 - samples/sec: 1669.80 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 15:54:11,936 epoch 4 - iter 3610/3617 - loss 0.05260123 - time (sec): 227.49 - samples/sec: 1666.60 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 15:54:12,392 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:54:12,392 EPOCH 4 done: loss 0.0525 - lr: 0.000020 |
|
2023-10-25 15:54:17,149 DEV : loss 0.24151772260665894 - f1-score (micro avg) 0.6262 |
|
2023-10-25 15:54:17,172 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:54:39,932 epoch 5 - iter 361/3617 - loss 0.02889314 - time (sec): 22.76 - samples/sec: 1633.70 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 15:55:02,383 epoch 5 - iter 722/3617 - loss 0.02845671 - time (sec): 45.21 - samples/sec: 1640.80 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 15:55:25,074 epoch 5 - iter 1083/3617 - loss 0.02905149 - time (sec): 67.90 - samples/sec: 1652.84 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 15:55:47,562 epoch 5 - iter 1444/3617 - loss 0.03106635 - time (sec): 90.39 - samples/sec: 1657.09 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 15:56:10,156 epoch 5 - iter 1805/3617 - loss 0.03395971 - time (sec): 112.98 - samples/sec: 1662.83 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 15:56:32,681 epoch 5 - iter 2166/3617 - loss 0.03439912 - time (sec): 135.51 - samples/sec: 1657.17 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 15:56:55,301 epoch 5 - iter 2527/3617 - loss 0.03516551 - time (sec): 158.13 - samples/sec: 1655.70 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 15:57:18,303 epoch 5 - iter 2888/3617 - loss 0.03540794 - time (sec): 181.13 - samples/sec: 1670.73 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 15:57:40,862 epoch 5 - iter 3249/3617 - loss 0.03667999 - time (sec): 203.69 - samples/sec: 1666.12 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 15:58:03,741 epoch 5 - iter 3610/3617 - loss 0.03647650 - time (sec): 226.57 - samples/sec: 1674.18 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 15:58:04,146 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:58:04,146 EPOCH 5 done: loss 0.0365 - lr: 0.000017 |
|
2023-10-25 15:58:09,429 DEV : loss 0.27911558747291565 - f1-score (micro avg) 0.6411 |
|
2023-10-25 15:58:09,452 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 15:58:32,153 epoch 6 - iter 361/3617 - loss 0.01844611 - time (sec): 22.70 - samples/sec: 1684.49 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 15:58:54,997 epoch 6 - iter 722/3617 - loss 0.01909398 - time (sec): 45.54 - samples/sec: 1660.35 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 15:59:18,007 epoch 6 - iter 1083/3617 - loss 0.02262065 - time (sec): 68.55 - samples/sec: 1690.87 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 15:59:40,435 epoch 6 - iter 1444/3617 - loss 0.02337790 - time (sec): 90.98 - samples/sec: 1680.22 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 16:00:03,256 epoch 6 - iter 1805/3617 - loss 0.02294877 - time (sec): 113.80 - samples/sec: 1686.69 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 16:00:25,717 epoch 6 - iter 2166/3617 - loss 0.02260980 - time (sec): 136.26 - samples/sec: 1685.27 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 16:00:48,516 epoch 6 - iter 2527/3617 - loss 0.02245400 - time (sec): 159.06 - samples/sec: 1682.99 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 16:01:11,169 epoch 6 - iter 2888/3617 - loss 0.02342671 - time (sec): 181.72 - samples/sec: 1678.37 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 16:01:33,582 epoch 6 - iter 3249/3617 - loss 0.02358711 - time (sec): 204.13 - samples/sec: 1671.21 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 16:01:56,285 epoch 6 - iter 3610/3617 - loss 0.02404475 - time (sec): 226.83 - samples/sec: 1671.46 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 16:01:56,730 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:01:56,730 EPOCH 6 done: loss 0.0240 - lr: 0.000013 |
|
2023-10-25 16:02:02,029 DEV : loss 0.30914661288261414 - f1-score (micro avg) 0.6277 |
|
2023-10-25 16:02:02,052 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:02:24,697 epoch 7 - iter 361/3617 - loss 0.01518405 - time (sec): 22.64 - samples/sec: 1682.07 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 16:02:47,369 epoch 7 - iter 722/3617 - loss 0.01696119 - time (sec): 45.32 - samples/sec: 1686.25 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 16:03:10,138 epoch 7 - iter 1083/3617 - loss 0.01811277 - time (sec): 68.08 - samples/sec: 1679.52 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 16:03:33,026 epoch 7 - iter 1444/3617 - loss 0.01753427 - time (sec): 90.97 - samples/sec: 1687.97 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 16:03:55,497 epoch 7 - iter 1805/3617 - loss 0.01779560 - time (sec): 113.44 - samples/sec: 1678.37 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 16:04:18,296 epoch 7 - iter 2166/3617 - loss 0.01660255 - time (sec): 136.24 - samples/sec: 1684.12 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 16:04:40,932 epoch 7 - iter 2527/3617 - loss 0.01698618 - time (sec): 158.88 - samples/sec: 1684.01 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 16:05:03,601 epoch 7 - iter 2888/3617 - loss 0.01728988 - time (sec): 181.55 - samples/sec: 1677.49 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 16:05:26,391 epoch 7 - iter 3249/3617 - loss 0.01721398 - time (sec): 204.34 - samples/sec: 1672.88 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 16:05:48,951 epoch 7 - iter 3610/3617 - loss 0.01693395 - time (sec): 226.90 - samples/sec: 1671.22 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 16:05:49,404 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:05:49,404 EPOCH 7 done: loss 0.0169 - lr: 0.000010 |
|
2023-10-25 16:05:54,703 DEV : loss 0.35005614161491394 - f1-score (micro avg) 0.6476 |
|
2023-10-25 16:05:54,726 saving best model |
|
2023-10-25 16:05:55,436 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:06:18,101 epoch 8 - iter 361/3617 - loss 0.01402887 - time (sec): 22.66 - samples/sec: 1711.01 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 16:06:40,840 epoch 8 - iter 722/3617 - loss 0.01326071 - time (sec): 45.40 - samples/sec: 1682.72 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 16:07:03,485 epoch 8 - iter 1083/3617 - loss 0.01183986 - time (sec): 68.05 - samples/sec: 1685.05 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 16:07:26,172 epoch 8 - iter 1444/3617 - loss 0.01108027 - time (sec): 90.74 - samples/sec: 1677.66 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 16:07:48,816 epoch 8 - iter 1805/3617 - loss 0.01123144 - time (sec): 113.38 - samples/sec: 1672.07 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 16:08:11,441 epoch 8 - iter 2166/3617 - loss 0.01079378 - time (sec): 136.00 - samples/sec: 1672.45 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 16:08:34,072 epoch 8 - iter 2527/3617 - loss 0.01078423 - time (sec): 158.63 - samples/sec: 1670.37 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 16:08:56,585 epoch 8 - iter 2888/3617 - loss 0.01037040 - time (sec): 181.15 - samples/sec: 1664.64 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 16:09:19,603 epoch 8 - iter 3249/3617 - loss 0.01027725 - time (sec): 204.17 - samples/sec: 1671.72 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 16:09:42,381 epoch 8 - iter 3610/3617 - loss 0.01022367 - time (sec): 226.94 - samples/sec: 1671.16 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 16:09:42,798 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:09:42,799 EPOCH 8 done: loss 0.0102 - lr: 0.000007 |
|
2023-10-25 16:09:47,567 DEV : loss 0.3698480725288391 - f1-score (micro avg) 0.6525 |
|
2023-10-25 16:09:47,591 saving best model |
|
2023-10-25 16:09:48,302 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:10:11,572 epoch 9 - iter 361/3617 - loss 0.00763808 - time (sec): 23.27 - samples/sec: 1670.63 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 16:10:34,042 epoch 9 - iter 722/3617 - loss 0.00979704 - time (sec): 45.74 - samples/sec: 1646.34 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 16:10:56,797 epoch 9 - iter 1083/3617 - loss 0.00817557 - time (sec): 68.49 - samples/sec: 1662.63 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 16:11:19,697 epoch 9 - iter 1444/3617 - loss 0.00803821 - time (sec): 91.39 - samples/sec: 1664.55 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 16:11:42,445 epoch 9 - iter 1805/3617 - loss 0.00799518 - time (sec): 114.14 - samples/sec: 1674.93 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 16:12:04,911 epoch 9 - iter 2166/3617 - loss 0.00724524 - time (sec): 136.61 - samples/sec: 1664.75 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 16:12:27,624 epoch 9 - iter 2527/3617 - loss 0.00774410 - time (sec): 159.32 - samples/sec: 1659.46 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 16:12:50,435 epoch 9 - iter 2888/3617 - loss 0.00794723 - time (sec): 182.13 - samples/sec: 1665.12 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 16:13:13,176 epoch 9 - iter 3249/3617 - loss 0.00789576 - time (sec): 204.87 - samples/sec: 1665.74 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 16:13:35,910 epoch 9 - iter 3610/3617 - loss 0.00792795 - time (sec): 227.61 - samples/sec: 1666.40 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 16:13:36,335 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:13:36,335 EPOCH 9 done: loss 0.0080 - lr: 0.000003 |
|
2023-10-25 16:13:41,094 DEV : loss 0.3735716640949249 - f1-score (micro avg) 0.6539 |
|
2023-10-25 16:13:41,117 saving best model |
|
2023-10-25 16:13:41,777 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:14:04,798 epoch 10 - iter 361/3617 - loss 0.00287944 - time (sec): 23.02 - samples/sec: 1742.64 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 16:14:27,220 epoch 10 - iter 722/3617 - loss 0.00450426 - time (sec): 45.44 - samples/sec: 1693.31 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 16:14:49,716 epoch 10 - iter 1083/3617 - loss 0.00489107 - time (sec): 67.94 - samples/sec: 1682.68 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 16:15:12,220 epoch 10 - iter 1444/3617 - loss 0.00465774 - time (sec): 90.44 - samples/sec: 1677.15 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 16:15:34,869 epoch 10 - iter 1805/3617 - loss 0.00452385 - time (sec): 113.09 - samples/sec: 1671.53 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 16:15:57,676 epoch 10 - iter 2166/3617 - loss 0.00459334 - time (sec): 135.90 - samples/sec: 1678.53 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 16:16:20,440 epoch 10 - iter 2527/3617 - loss 0.00459891 - time (sec): 158.66 - samples/sec: 1677.78 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 16:16:43,162 epoch 10 - iter 2888/3617 - loss 0.00448095 - time (sec): 181.38 - samples/sec: 1681.58 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 16:17:06,066 epoch 10 - iter 3249/3617 - loss 0.00453443 - time (sec): 204.29 - samples/sec: 1671.10 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 16:17:28,567 epoch 10 - iter 3610/3617 - loss 0.00449270 - time (sec): 226.79 - samples/sec: 1672.45 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 16:17:28,984 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:17:28,985 EPOCH 10 done: loss 0.0045 - lr: 0.000000 |
|
2023-10-25 16:17:33,739 DEV : loss 0.4017893970012665 - f1-score (micro avg) 0.6536 |
|
2023-10-25 16:17:34,312 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 16:17:34,312 Loading model from best epoch ... |
|
2023-10-25 16:17:36,078 SequenceTagger predicts: Dictionary with 13 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org |
|
2023-10-25 16:17:41,702 |
|
Results: |
|
- F-score (micro) 0.6735 |
|
- F-score (macro) 0.5491 |
|
- Accuracy 0.5216 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.6490 0.7885 0.7120 591 |
|
pers 0.5944 0.7759 0.6731 357 |
|
org 0.3721 0.2025 0.2623 79 |
|
|
|
micro avg 0.6186 0.7390 0.6735 1027 |
|
macro avg 0.5385 0.5890 0.5491 1027 |
|
weighted avg 0.6087 0.7390 0.6639 1027 |
|
|
|
2023-10-25 16:17:41,702 ---------------------------------------------------------------------------------------------------- |
|
|