|
2023-10-25 17:25:12,830 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,831 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-25 17:25:12,831 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 MultiCorpus: 14465 train + 1392 dev + 2432 test sentences |
|
- NER_HIPE_2022 Corpus: 14465 train + 1392 dev + 2432 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/letemps/fr/with_doc_seperator |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Train: 14465 sentences |
|
2023-10-25 17:25:12,832 (train_with_dev=False, train_with_test=False) |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Training Params: |
|
2023-10-25 17:25:12,832 - learning_rate: "5e-05" |
|
2023-10-25 17:25:12,832 - mini_batch_size: "8" |
|
2023-10-25 17:25:12,832 - max_epochs: "10" |
|
2023-10-25 17:25:12,832 - shuffle: "True" |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Plugins: |
|
2023-10-25 17:25:12,832 - TensorboardLogger |
|
2023-10-25 17:25:12,832 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-25 17:25:12,832 - metric: "('micro avg', 'f1-score')" |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Computation: |
|
2023-10-25 17:25:12,832 - compute on device: cuda:0 |
|
2023-10-25 17:25:12,832 - embedding storage: none |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Model training base path: "hmbench-letemps/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-5" |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:25:12,832 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-25 17:25:28,883 epoch 1 - iter 180/1809 - loss 1.08867170 - time (sec): 16.05 - samples/sec: 2407.65 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 17:25:44,521 epoch 1 - iter 360/1809 - loss 0.65004327 - time (sec): 31.69 - samples/sec: 2416.98 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 17:26:00,346 epoch 1 - iter 540/1809 - loss 0.48753569 - time (sec): 47.51 - samples/sec: 2407.41 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 17:26:15,761 epoch 1 - iter 720/1809 - loss 0.39766425 - time (sec): 62.93 - samples/sec: 2421.92 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 17:26:31,459 epoch 1 - iter 900/1809 - loss 0.34169738 - time (sec): 78.63 - samples/sec: 2408.69 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 17:26:47,241 epoch 1 - iter 1080/1809 - loss 0.30530543 - time (sec): 94.41 - samples/sec: 2400.17 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 17:27:03,504 epoch 1 - iter 1260/1809 - loss 0.27539034 - time (sec): 110.67 - samples/sec: 2397.75 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-25 17:27:19,006 epoch 1 - iter 1440/1809 - loss 0.25487372 - time (sec): 126.17 - samples/sec: 2394.96 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-25 17:27:35,047 epoch 1 - iter 1620/1809 - loss 0.23807391 - time (sec): 142.21 - samples/sec: 2389.53 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-25 17:27:51,142 epoch 1 - iter 1800/1809 - loss 0.22438103 - time (sec): 158.31 - samples/sec: 2389.71 - lr: 0.000050 - momentum: 0.000000 |
|
2023-10-25 17:27:51,843 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:27:51,843 EPOCH 1 done: loss 0.2237 - lr: 0.000050 |
|
2023-10-25 17:27:56,354 DEV : loss 0.11131972819566727 - f1-score (micro avg) 0.5998 |
|
2023-10-25 17:27:56,377 saving best model |
|
2023-10-25 17:27:56,928 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:28:12,813 epoch 2 - iter 180/1809 - loss 0.08088650 - time (sec): 15.88 - samples/sec: 2358.09 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-25 17:28:28,596 epoch 2 - iter 360/1809 - loss 0.08543964 - time (sec): 31.67 - samples/sec: 2357.98 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-25 17:28:44,265 epoch 2 - iter 540/1809 - loss 0.08544855 - time (sec): 47.34 - samples/sec: 2378.99 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-25 17:28:59,984 epoch 2 - iter 720/1809 - loss 0.08766095 - time (sec): 63.06 - samples/sec: 2384.45 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-25 17:29:16,164 epoch 2 - iter 900/1809 - loss 0.08687484 - time (sec): 79.23 - samples/sec: 2399.84 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-25 17:29:31,913 epoch 2 - iter 1080/1809 - loss 0.08737062 - time (sec): 94.98 - samples/sec: 2401.58 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-25 17:29:47,549 epoch 2 - iter 1260/1809 - loss 0.08575598 - time (sec): 110.62 - samples/sec: 2399.66 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-25 17:30:03,603 epoch 2 - iter 1440/1809 - loss 0.08550450 - time (sec): 126.67 - samples/sec: 2395.66 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-25 17:30:19,491 epoch 2 - iter 1620/1809 - loss 0.08602069 - time (sec): 142.56 - samples/sec: 2392.68 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-25 17:30:35,371 epoch 2 - iter 1800/1809 - loss 0.08584415 - time (sec): 158.44 - samples/sec: 2389.42 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-25 17:30:36,066 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:30:36,066 EPOCH 2 done: loss 0.0859 - lr: 0.000044 |
|
2023-10-25 17:30:41,320 DEV : loss 0.14367857575416565 - f1-score (micro avg) 0.621 |
|
2023-10-25 17:30:41,343 saving best model |
|
2023-10-25 17:30:42,046 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:30:57,826 epoch 3 - iter 180/1809 - loss 0.06105910 - time (sec): 15.78 - samples/sec: 2444.97 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-25 17:31:14,553 epoch 3 - iter 360/1809 - loss 0.06291567 - time (sec): 32.51 - samples/sec: 2454.20 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-25 17:31:30,567 epoch 3 - iter 540/1809 - loss 0.05939069 - time (sec): 48.52 - samples/sec: 2434.77 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-25 17:31:46,426 epoch 3 - iter 720/1809 - loss 0.06189798 - time (sec): 64.38 - samples/sec: 2420.88 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-25 17:32:02,072 epoch 3 - iter 900/1809 - loss 0.06226376 - time (sec): 80.02 - samples/sec: 2408.22 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-25 17:32:17,628 epoch 3 - iter 1080/1809 - loss 0.06229431 - time (sec): 95.58 - samples/sec: 2403.38 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-25 17:32:33,396 epoch 3 - iter 1260/1809 - loss 0.06309156 - time (sec): 111.35 - samples/sec: 2401.39 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-25 17:32:49,252 epoch 3 - iter 1440/1809 - loss 0.06334011 - time (sec): 127.20 - samples/sec: 2389.92 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-25 17:33:05,170 epoch 3 - iter 1620/1809 - loss 0.06299930 - time (sec): 143.12 - samples/sec: 2384.04 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-25 17:33:20,601 epoch 3 - iter 1800/1809 - loss 0.06268902 - time (sec): 158.55 - samples/sec: 2385.99 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-25 17:33:21,442 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:33:21,442 EPOCH 3 done: loss 0.0626 - lr: 0.000039 |
|
2023-10-25 17:33:26,209 DEV : loss 0.16750071942806244 - f1-score (micro avg) 0.6271 |
|
2023-10-25 17:33:26,232 saving best model |
|
2023-10-25 17:33:27,448 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:33:43,590 epoch 4 - iter 180/1809 - loss 0.04454260 - time (sec): 16.14 - samples/sec: 2370.51 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-25 17:33:59,289 epoch 4 - iter 360/1809 - loss 0.04376843 - time (sec): 31.84 - samples/sec: 2384.31 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-25 17:34:15,317 epoch 4 - iter 540/1809 - loss 0.04208892 - time (sec): 47.87 - samples/sec: 2401.85 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-25 17:34:31,148 epoch 4 - iter 720/1809 - loss 0.04219370 - time (sec): 63.70 - samples/sec: 2412.85 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-25 17:34:46,949 epoch 4 - iter 900/1809 - loss 0.04300031 - time (sec): 79.50 - samples/sec: 2412.83 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-25 17:35:02,757 epoch 4 - iter 1080/1809 - loss 0.04422396 - time (sec): 95.31 - samples/sec: 2397.75 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-25 17:35:18,399 epoch 4 - iter 1260/1809 - loss 0.04338815 - time (sec): 110.95 - samples/sec: 2391.32 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-25 17:35:34,237 epoch 4 - iter 1440/1809 - loss 0.04365612 - time (sec): 126.79 - samples/sec: 2388.41 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-25 17:35:49,818 epoch 4 - iter 1620/1809 - loss 0.04436943 - time (sec): 142.37 - samples/sec: 2386.02 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-25 17:36:05,768 epoch 4 - iter 1800/1809 - loss 0.04471372 - time (sec): 158.32 - samples/sec: 2388.70 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-25 17:36:06,615 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:36:06,615 EPOCH 4 done: loss 0.0448 - lr: 0.000033 |
|
2023-10-25 17:36:11,377 DEV : loss 0.21365126967430115 - f1-score (micro avg) 0.6119 |
|
2023-10-25 17:36:11,400 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:36:27,342 epoch 5 - iter 180/1809 - loss 0.02797797 - time (sec): 15.94 - samples/sec: 2446.04 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-25 17:36:43,491 epoch 5 - iter 360/1809 - loss 0.02861473 - time (sec): 32.09 - samples/sec: 2410.53 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-25 17:36:59,285 epoch 5 - iter 540/1809 - loss 0.02970454 - time (sec): 47.88 - samples/sec: 2406.82 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-25 17:37:14,595 epoch 5 - iter 720/1809 - loss 0.03017154 - time (sec): 63.19 - samples/sec: 2383.00 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-25 17:37:30,823 epoch 5 - iter 900/1809 - loss 0.02993842 - time (sec): 79.42 - samples/sec: 2390.07 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-25 17:37:47,071 epoch 5 - iter 1080/1809 - loss 0.03036229 - time (sec): 95.67 - samples/sec: 2389.07 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 17:38:02,683 epoch 5 - iter 1260/1809 - loss 0.03032083 - time (sec): 111.28 - samples/sec: 2380.89 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 17:38:18,242 epoch 5 - iter 1440/1809 - loss 0.03017265 - time (sec): 126.84 - samples/sec: 2381.62 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 17:38:34,319 epoch 5 - iter 1620/1809 - loss 0.03017571 - time (sec): 142.92 - samples/sec: 2368.45 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 17:38:50,611 epoch 5 - iter 1800/1809 - loss 0.02982099 - time (sec): 159.21 - samples/sec: 2376.76 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 17:38:51,342 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:38:51,342 EPOCH 5 done: loss 0.0299 - lr: 0.000028 |
|
2023-10-25 17:38:56,115 DEV : loss 0.2786865830421448 - f1-score (micro avg) 0.6472 |
|
2023-10-25 17:38:56,139 saving best model |
|
2023-10-25 17:38:56,860 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:39:12,505 epoch 6 - iter 180/1809 - loss 0.01698871 - time (sec): 15.64 - samples/sec: 2353.61 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 17:39:28,137 epoch 6 - iter 360/1809 - loss 0.01883337 - time (sec): 31.28 - samples/sec: 2382.83 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 17:39:44,244 epoch 6 - iter 540/1809 - loss 0.02197274 - time (sec): 47.38 - samples/sec: 2391.47 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 17:39:59,902 epoch 6 - iter 720/1809 - loss 0.02290012 - time (sec): 63.04 - samples/sec: 2395.80 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 17:40:16,174 epoch 6 - iter 900/1809 - loss 0.02254507 - time (sec): 79.31 - samples/sec: 2378.20 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 17:40:31,791 epoch 6 - iter 1080/1809 - loss 0.02233300 - time (sec): 94.93 - samples/sec: 2375.99 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 17:40:47,376 epoch 6 - iter 1260/1809 - loss 0.02223982 - time (sec): 110.51 - samples/sec: 2371.40 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 17:41:03,399 epoch 6 - iter 1440/1809 - loss 0.02230768 - time (sec): 126.54 - samples/sec: 2379.75 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 17:41:19,391 epoch 6 - iter 1620/1809 - loss 0.02169893 - time (sec): 142.53 - samples/sec: 2382.79 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 17:41:35,589 epoch 6 - iter 1800/1809 - loss 0.02156452 - time (sec): 158.73 - samples/sec: 2382.82 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 17:41:36,334 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:41:36,334 EPOCH 6 done: loss 0.0216 - lr: 0.000022 |
|
2023-10-25 17:41:41,614 DEV : loss 0.36510854959487915 - f1-score (micro avg) 0.6298 |
|
2023-10-25 17:41:41,637 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:41:57,608 epoch 7 - iter 180/1809 - loss 0.02022287 - time (sec): 15.97 - samples/sec: 2400.72 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 17:42:13,475 epoch 7 - iter 360/1809 - loss 0.02033749 - time (sec): 31.84 - samples/sec: 2400.51 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 17:42:29,613 epoch 7 - iter 540/1809 - loss 0.02001745 - time (sec): 47.98 - samples/sec: 2407.10 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 17:42:45,451 epoch 7 - iter 720/1809 - loss 0.01848045 - time (sec): 63.81 - samples/sec: 2415.31 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 17:43:00,702 epoch 7 - iter 900/1809 - loss 0.01690827 - time (sec): 79.06 - samples/sec: 2403.90 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 17:43:16,683 epoch 7 - iter 1080/1809 - loss 0.01639987 - time (sec): 95.05 - samples/sec: 2384.14 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 17:43:32,462 epoch 7 - iter 1260/1809 - loss 0.01694441 - time (sec): 110.82 - samples/sec: 2375.78 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 17:43:48,583 epoch 7 - iter 1440/1809 - loss 0.01604881 - time (sec): 126.95 - samples/sec: 2373.46 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 17:44:04,373 epoch 7 - iter 1620/1809 - loss 0.01596925 - time (sec): 142.74 - samples/sec: 2380.20 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 17:44:20,312 epoch 7 - iter 1800/1809 - loss 0.01571324 - time (sec): 158.67 - samples/sec: 2384.33 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 17:44:21,026 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:44:21,026 EPOCH 7 done: loss 0.0157 - lr: 0.000017 |
|
2023-10-25 17:44:26,345 DEV : loss 0.3604075312614441 - f1-score (micro avg) 0.6238 |
|
2023-10-25 17:44:26,368 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:44:42,092 epoch 8 - iter 180/1809 - loss 0.00997516 - time (sec): 15.72 - samples/sec: 2339.27 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 17:44:57,918 epoch 8 - iter 360/1809 - loss 0.01105999 - time (sec): 31.55 - samples/sec: 2358.40 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 17:45:13,718 epoch 8 - iter 540/1809 - loss 0.01121835 - time (sec): 47.35 - samples/sec: 2355.37 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 17:45:29,687 epoch 8 - iter 720/1809 - loss 0.01185460 - time (sec): 63.32 - samples/sec: 2359.55 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 17:45:45,573 epoch 8 - iter 900/1809 - loss 0.01093216 - time (sec): 79.20 - samples/sec: 2365.07 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 17:46:01,155 epoch 8 - iter 1080/1809 - loss 0.01026454 - time (sec): 94.79 - samples/sec: 2366.17 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 17:46:16,917 epoch 8 - iter 1260/1809 - loss 0.00974729 - time (sec): 110.55 - samples/sec: 2370.48 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 17:46:32,383 epoch 8 - iter 1440/1809 - loss 0.00942034 - time (sec): 126.01 - samples/sec: 2370.26 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 17:46:48,687 epoch 8 - iter 1620/1809 - loss 0.01013871 - time (sec): 142.32 - samples/sec: 2373.81 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 17:47:05,389 epoch 8 - iter 1800/1809 - loss 0.01001323 - time (sec): 159.02 - samples/sec: 2378.22 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 17:47:06,147 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:47:06,147 EPOCH 8 done: loss 0.0101 - lr: 0.000011 |
|
2023-10-25 17:47:11,459 DEV : loss 0.3776438534259796 - f1-score (micro avg) 0.649 |
|
2023-10-25 17:47:11,482 saving best model |
|
2023-10-25 17:47:12,162 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:47:28,015 epoch 9 - iter 180/1809 - loss 0.00806189 - time (sec): 15.85 - samples/sec: 2396.31 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 17:47:43,744 epoch 9 - iter 360/1809 - loss 0.00786294 - time (sec): 31.58 - samples/sec: 2414.19 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 17:47:59,740 epoch 9 - iter 540/1809 - loss 0.00779681 - time (sec): 47.58 - samples/sec: 2404.36 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 17:48:15,611 epoch 9 - iter 720/1809 - loss 0.00824623 - time (sec): 63.45 - samples/sec: 2391.40 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 17:48:31,872 epoch 9 - iter 900/1809 - loss 0.00749187 - time (sec): 79.71 - samples/sec: 2379.55 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 17:48:47,559 epoch 9 - iter 1080/1809 - loss 0.00729148 - time (sec): 95.40 - samples/sec: 2381.02 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 17:49:03,417 epoch 9 - iter 1260/1809 - loss 0.00706207 - time (sec): 111.25 - samples/sec: 2374.16 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 17:49:19,068 epoch 9 - iter 1440/1809 - loss 0.00684409 - time (sec): 126.90 - samples/sec: 2373.51 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 17:49:34,617 epoch 9 - iter 1620/1809 - loss 0.00687108 - time (sec): 142.45 - samples/sec: 2378.28 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 17:49:50,728 epoch 9 - iter 1800/1809 - loss 0.00658565 - time (sec): 158.56 - samples/sec: 2381.77 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 17:49:51,725 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:49:51,725 EPOCH 9 done: loss 0.0067 - lr: 0.000006 |
|
2023-10-25 17:49:56,514 DEV : loss 0.3927135467529297 - f1-score (micro avg) 0.6399 |
|
2023-10-25 17:49:56,538 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:50:12,657 epoch 10 - iter 180/1809 - loss 0.00422793 - time (sec): 16.12 - samples/sec: 2350.66 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 17:50:28,618 epoch 10 - iter 360/1809 - loss 0.00339541 - time (sec): 32.08 - samples/sec: 2385.59 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 17:50:44,793 epoch 10 - iter 540/1809 - loss 0.00334334 - time (sec): 48.25 - samples/sec: 2373.68 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 17:51:00,624 epoch 10 - iter 720/1809 - loss 0.00349222 - time (sec): 64.09 - samples/sec: 2368.92 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 17:51:16,681 epoch 10 - iter 900/1809 - loss 0.00336080 - time (sec): 80.14 - samples/sec: 2376.57 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 17:51:32,495 epoch 10 - iter 1080/1809 - loss 0.00320549 - time (sec): 95.96 - samples/sec: 2379.97 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 17:51:48,151 epoch 10 - iter 1260/1809 - loss 0.00327382 - time (sec): 111.61 - samples/sec: 2376.20 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 17:52:04,075 epoch 10 - iter 1440/1809 - loss 0.00335080 - time (sec): 127.54 - samples/sec: 2378.32 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 17:52:20,274 epoch 10 - iter 1620/1809 - loss 0.00332977 - time (sec): 143.74 - samples/sec: 2380.65 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 17:52:35,875 epoch 10 - iter 1800/1809 - loss 0.00380800 - time (sec): 159.34 - samples/sec: 2375.32 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 17:52:36,617 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:52:36,617 EPOCH 10 done: loss 0.0038 - lr: 0.000000 |
|
2023-10-25 17:52:41,399 DEV : loss 0.4173244535923004 - f1-score (micro avg) 0.6413 |
|
2023-10-25 17:52:41,977 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 17:52:41,978 Loading model from best epoch ... |
|
2023-10-25 17:52:43,747 SequenceTagger predicts: Dictionary with 13 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org |
|
2023-10-25 17:52:50,008 |
|
Results: |
|
- F-score (micro) 0.6409 |
|
- F-score (macro) 0.4973 |
|
- Accuracy 0.4862 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.6403 0.7259 0.6804 591 |
|
pers 0.5885 0.7451 0.6576 357 |
|
org 0.1961 0.1266 0.1538 79 |
|
|
|
micro avg 0.6010 0.6865 0.6409 1027 |
|
macro avg 0.4750 0.5325 0.4973 1027 |
|
weighted avg 0.5881 0.6865 0.6320 1027 |
|
|
|
2023-10-25 17:52:50,008 ---------------------------------------------------------------------------------------------------- |
|
|