|
2023-10-24 10:16:40,868 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,869 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 MultiCorpus: 5901 train + 1287 dev + 1505 test sentences |
|
- NER_HIPE_2022 Corpus: 5901 train + 1287 dev + 1505 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/fr/with_doc_seperator |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 Train: 5901 sentences |
|
2023-10-24 10:16:40,870 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 Training Params: |
|
2023-10-24 10:16:40,870 - learning_rate: "5e-05" |
|
2023-10-24 10:16:40,870 - mini_batch_size: "8" |
|
2023-10-24 10:16:40,870 - max_epochs: "10" |
|
2023-10-24 10:16:40,870 - shuffle: "True" |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 Plugins: |
|
2023-10-24 10:16:40,870 - TensorboardLogger |
|
2023-10-24 10:16:40,870 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 10:16:40,870 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 Computation: |
|
2023-10-24 10:16:40,870 - compute on device: cuda:0 |
|
2023-10-24 10:16:40,870 - embedding storage: none |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 Model training base path: "hmbench-hipe2020/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-2" |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,870 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:16:40,871 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 10:16:47,112 epoch 1 - iter 73/738 - loss 2.05518661 - time (sec): 6.24 - samples/sec: 2475.91 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 10:16:53,715 epoch 1 - iter 146/738 - loss 1.27480619 - time (sec): 12.84 - samples/sec: 2422.55 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 10:17:00,735 epoch 1 - iter 219/738 - loss 0.95281861 - time (sec): 19.86 - samples/sec: 2400.73 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 10:17:07,500 epoch 1 - iter 292/738 - loss 0.78846141 - time (sec): 26.63 - samples/sec: 2379.15 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 10:17:14,445 epoch 1 - iter 365/738 - loss 0.67734295 - time (sec): 33.57 - samples/sec: 2378.22 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 10:17:21,035 epoch 1 - iter 438/738 - loss 0.60199999 - time (sec): 40.16 - samples/sec: 2364.63 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 10:17:28,483 epoch 1 - iter 511/738 - loss 0.53810835 - time (sec): 47.61 - samples/sec: 2358.75 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 10:17:35,174 epoch 1 - iter 584/738 - loss 0.49259482 - time (sec): 54.30 - samples/sec: 2360.28 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 10:17:42,764 epoch 1 - iter 657/738 - loss 0.45528197 - time (sec): 61.89 - samples/sec: 2362.49 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 10:17:50,629 epoch 1 - iter 730/738 - loss 0.42243012 - time (sec): 69.76 - samples/sec: 2359.29 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 10:17:51,371 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:17:51,372 EPOCH 1 done: loss 0.4193 - lr: 0.000049 |
|
2023-10-24 10:17:57,620 DEV : loss 0.10763996839523315 - f1-score (micro avg) 0.7463 |
|
2023-10-24 10:17:57,641 saving best model |
|
2023-10-24 10:17:58,193 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:18:05,129 epoch 2 - iter 73/738 - loss 0.13463893 - time (sec): 6.94 - samples/sec: 2336.83 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 10:18:12,442 epoch 2 - iter 146/738 - loss 0.12009300 - time (sec): 14.25 - samples/sec: 2343.70 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 10:18:19,081 epoch 2 - iter 219/738 - loss 0.11875354 - time (sec): 20.89 - samples/sec: 2340.04 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 10:18:26,548 epoch 2 - iter 292/738 - loss 0.11942559 - time (sec): 28.35 - samples/sec: 2317.92 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 10:18:33,585 epoch 2 - iter 365/738 - loss 0.11674533 - time (sec): 35.39 - samples/sec: 2342.79 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 10:18:40,281 epoch 2 - iter 438/738 - loss 0.11287412 - time (sec): 42.09 - samples/sec: 2348.10 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 10:18:47,326 epoch 2 - iter 511/738 - loss 0.11427507 - time (sec): 49.13 - samples/sec: 2336.47 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 10:18:54,785 epoch 2 - iter 584/738 - loss 0.11523486 - time (sec): 56.59 - samples/sec: 2347.70 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 10:19:01,751 epoch 2 - iter 657/738 - loss 0.11496530 - time (sec): 63.56 - samples/sec: 2343.31 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 10:19:08,307 epoch 2 - iter 730/738 - loss 0.11411985 - time (sec): 70.11 - samples/sec: 2353.71 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 10:19:08,940 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:19:08,941 EPOCH 2 done: loss 0.1138 - lr: 0.000045 |
|
2023-10-24 10:19:17,441 DEV : loss 0.11076617985963821 - f1-score (micro avg) 0.7851 |
|
2023-10-24 10:19:17,462 saving best model |
|
2023-10-24 10:19:18,255 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:19:24,971 epoch 3 - iter 73/738 - loss 0.05987983 - time (sec): 6.72 - samples/sec: 2393.11 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 10:19:31,657 epoch 3 - iter 146/738 - loss 0.06764843 - time (sec): 13.40 - samples/sec: 2398.88 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 10:19:38,524 epoch 3 - iter 219/738 - loss 0.06887826 - time (sec): 20.27 - samples/sec: 2351.53 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 10:19:46,201 epoch 3 - iter 292/738 - loss 0.07228092 - time (sec): 27.94 - samples/sec: 2364.60 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 10:19:53,373 epoch 3 - iter 365/738 - loss 0.07130705 - time (sec): 35.12 - samples/sec: 2369.74 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 10:19:59,932 epoch 3 - iter 438/738 - loss 0.06965638 - time (sec): 41.68 - samples/sec: 2377.66 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 10:20:06,395 epoch 3 - iter 511/738 - loss 0.06872064 - time (sec): 48.14 - samples/sec: 2384.01 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 10:20:14,101 epoch 3 - iter 584/738 - loss 0.06719052 - time (sec): 55.84 - samples/sec: 2374.07 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 10:20:20,652 epoch 3 - iter 657/738 - loss 0.06794882 - time (sec): 62.40 - samples/sec: 2376.69 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 10:20:27,945 epoch 3 - iter 730/738 - loss 0.06844055 - time (sec): 69.69 - samples/sec: 2363.20 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 10:20:28,635 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:20:28,635 EPOCH 3 done: loss 0.0684 - lr: 0.000039 |
|
2023-10-24 10:20:37,100 DEV : loss 0.13374151289463043 - f1-score (micro avg) 0.7813 |
|
2023-10-24 10:20:37,121 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:20:43,861 epoch 4 - iter 73/738 - loss 0.04708492 - time (sec): 6.74 - samples/sec: 2325.31 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 10:20:51,552 epoch 4 - iter 146/738 - loss 0.05032776 - time (sec): 14.43 - samples/sec: 2321.82 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 10:20:58,693 epoch 4 - iter 219/738 - loss 0.04834384 - time (sec): 21.57 - samples/sec: 2411.34 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 10:21:05,816 epoch 4 - iter 292/738 - loss 0.04616541 - time (sec): 28.69 - samples/sec: 2385.18 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 10:21:12,304 epoch 4 - iter 365/738 - loss 0.04734719 - time (sec): 35.18 - samples/sec: 2392.13 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 10:21:19,399 epoch 4 - iter 438/738 - loss 0.04801740 - time (sec): 42.28 - samples/sec: 2388.28 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 10:21:26,371 epoch 4 - iter 511/738 - loss 0.04688569 - time (sec): 49.25 - samples/sec: 2367.48 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 10:21:33,223 epoch 4 - iter 584/738 - loss 0.04658140 - time (sec): 56.10 - samples/sec: 2365.19 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 10:21:40,427 epoch 4 - iter 657/738 - loss 0.04744532 - time (sec): 63.31 - samples/sec: 2354.70 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-24 10:21:47,079 epoch 4 - iter 730/738 - loss 0.04829040 - time (sec): 69.96 - samples/sec: 2352.93 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 10:21:47,813 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:21:47,813 EPOCH 4 done: loss 0.0487 - lr: 0.000033 |
|
2023-10-24 10:21:56,318 DEV : loss 0.1580280065536499 - f1-score (micro avg) 0.8111 |
|
2023-10-24 10:21:56,339 saving best model |
|
2023-10-24 10:21:57,038 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:22:04,206 epoch 5 - iter 73/738 - loss 0.03718835 - time (sec): 7.17 - samples/sec: 2255.33 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 10:22:11,651 epoch 5 - iter 146/738 - loss 0.02979801 - time (sec): 14.61 - samples/sec: 2336.65 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 10:22:18,226 epoch 5 - iter 219/738 - loss 0.03123392 - time (sec): 21.19 - samples/sec: 2368.82 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 10:22:25,266 epoch 5 - iter 292/738 - loss 0.02977012 - time (sec): 28.23 - samples/sec: 2370.26 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 10:22:31,997 epoch 5 - iter 365/738 - loss 0.03159583 - time (sec): 34.96 - samples/sec: 2386.77 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 10:22:39,512 epoch 5 - iter 438/738 - loss 0.03205785 - time (sec): 42.47 - samples/sec: 2389.52 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 10:22:46,104 epoch 5 - iter 511/738 - loss 0.03374145 - time (sec): 49.07 - samples/sec: 2376.11 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 10:22:52,975 epoch 5 - iter 584/738 - loss 0.03380636 - time (sec): 55.94 - samples/sec: 2364.44 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 10:22:59,808 epoch 5 - iter 657/738 - loss 0.03302627 - time (sec): 62.77 - samples/sec: 2354.68 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 10:23:07,303 epoch 5 - iter 730/738 - loss 0.03329040 - time (sec): 70.26 - samples/sec: 2342.67 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 10:23:07,984 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:23:07,985 EPOCH 5 done: loss 0.0331 - lr: 0.000028 |
|
2023-10-24 10:23:16,524 DEV : loss 0.19062422215938568 - f1-score (micro avg) 0.8127 |
|
2023-10-24 10:23:16,546 saving best model |
|
2023-10-24 10:23:17,249 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:23:25,450 epoch 6 - iter 73/738 - loss 0.02091842 - time (sec): 8.20 - samples/sec: 2434.02 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 10:23:31,681 epoch 6 - iter 146/738 - loss 0.01994648 - time (sec): 14.43 - samples/sec: 2414.68 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 10:23:39,358 epoch 6 - iter 219/738 - loss 0.02411263 - time (sec): 22.11 - samples/sec: 2343.15 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 10:23:45,795 epoch 6 - iter 292/738 - loss 0.02457377 - time (sec): 28.55 - samples/sec: 2333.74 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 10:23:52,528 epoch 6 - iter 365/738 - loss 0.02515097 - time (sec): 35.28 - samples/sec: 2353.54 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 10:23:59,567 epoch 6 - iter 438/738 - loss 0.02385974 - time (sec): 42.32 - samples/sec: 2356.57 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 10:24:06,218 epoch 6 - iter 511/738 - loss 0.02515094 - time (sec): 48.97 - samples/sec: 2361.10 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 10:24:12,835 epoch 6 - iter 584/738 - loss 0.02435233 - time (sec): 55.59 - samples/sec: 2358.56 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 10:24:19,177 epoch 6 - iter 657/738 - loss 0.02324233 - time (sec): 61.93 - samples/sec: 2358.28 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 10:24:26,619 epoch 6 - iter 730/738 - loss 0.02387630 - time (sec): 69.37 - samples/sec: 2364.77 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 10:24:27,676 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:24:27,676 EPOCH 6 done: loss 0.0239 - lr: 0.000022 |
|
2023-10-24 10:24:36,170 DEV : loss 0.2060367912054062 - f1-score (micro avg) 0.8198 |
|
2023-10-24 10:24:36,191 saving best model |
|
2023-10-24 10:24:36,892 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:24:43,743 epoch 7 - iter 73/738 - loss 0.02341470 - time (sec): 6.85 - samples/sec: 2416.06 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 10:24:50,783 epoch 7 - iter 146/738 - loss 0.01709146 - time (sec): 13.89 - samples/sec: 2382.65 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 10:24:58,184 epoch 7 - iter 219/738 - loss 0.01713074 - time (sec): 21.29 - samples/sec: 2372.46 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 10:25:05,352 epoch 7 - iter 292/738 - loss 0.01770969 - time (sec): 28.46 - samples/sec: 2351.56 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 10:25:12,272 epoch 7 - iter 365/738 - loss 0.01778214 - time (sec): 35.38 - samples/sec: 2342.03 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 10:25:19,415 epoch 7 - iter 438/738 - loss 0.01827725 - time (sec): 42.52 - samples/sec: 2329.98 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 10:25:27,293 epoch 7 - iter 511/738 - loss 0.01829362 - time (sec): 50.40 - samples/sec: 2319.74 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 10:25:33,772 epoch 7 - iter 584/738 - loss 0.01803134 - time (sec): 56.88 - samples/sec: 2314.60 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 10:25:40,028 epoch 7 - iter 657/738 - loss 0.01746771 - time (sec): 63.14 - samples/sec: 2332.70 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 10:25:47,583 epoch 7 - iter 730/738 - loss 0.01760240 - time (sec): 70.69 - samples/sec: 2333.02 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 10:25:48,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:25:48,215 EPOCH 7 done: loss 0.0176 - lr: 0.000017 |
|
2023-10-24 10:25:56,764 DEV : loss 0.20993147790431976 - f1-score (micro avg) 0.8337 |
|
2023-10-24 10:25:56,786 saving best model |
|
2023-10-24 10:25:57,492 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:26:04,446 epoch 8 - iter 73/738 - loss 0.00680709 - time (sec): 6.95 - samples/sec: 2286.14 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 10:26:11,526 epoch 8 - iter 146/738 - loss 0.00996759 - time (sec): 14.03 - samples/sec: 2337.43 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 10:26:19,078 epoch 8 - iter 219/738 - loss 0.00832750 - time (sec): 21.58 - samples/sec: 2309.20 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 10:26:25,988 epoch 8 - iter 292/738 - loss 0.00975386 - time (sec): 28.50 - samples/sec: 2347.39 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 10:26:33,555 epoch 8 - iter 365/738 - loss 0.01134821 - time (sec): 36.06 - samples/sec: 2369.33 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 10:26:40,285 epoch 8 - iter 438/738 - loss 0.01035983 - time (sec): 42.79 - samples/sec: 2353.37 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 10:26:47,426 epoch 8 - iter 511/738 - loss 0.01020561 - time (sec): 49.93 - samples/sec: 2353.12 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 10:26:54,330 epoch 8 - iter 584/738 - loss 0.01099361 - time (sec): 56.84 - samples/sec: 2341.22 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 10:27:01,235 epoch 8 - iter 657/738 - loss 0.01086690 - time (sec): 63.74 - samples/sec: 2345.74 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 10:27:07,621 epoch 8 - iter 730/738 - loss 0.01091282 - time (sec): 70.13 - samples/sec: 2350.24 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 10:27:08,380 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:27:08,380 EPOCH 8 done: loss 0.0110 - lr: 0.000011 |
|
2023-10-24 10:27:16,921 DEV : loss 0.22084267437458038 - f1-score (micro avg) 0.8289 |
|
2023-10-24 10:27:16,943 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:27:23,612 epoch 9 - iter 73/738 - loss 0.00392491 - time (sec): 6.67 - samples/sec: 2348.97 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 10:27:30,655 epoch 9 - iter 146/738 - loss 0.00565212 - time (sec): 13.71 - samples/sec: 2325.82 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 10:27:37,232 epoch 9 - iter 219/738 - loss 0.00863430 - time (sec): 20.29 - samples/sec: 2340.99 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 10:27:44,391 epoch 9 - iter 292/738 - loss 0.00652702 - time (sec): 27.45 - samples/sec: 2357.34 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 10:27:51,468 epoch 9 - iter 365/738 - loss 0.00611000 - time (sec): 34.52 - samples/sec: 2336.89 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 10:27:57,940 epoch 9 - iter 438/738 - loss 0.00682427 - time (sec): 41.00 - samples/sec: 2344.35 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 10:28:04,361 epoch 9 - iter 511/738 - loss 0.00667386 - time (sec): 47.42 - samples/sec: 2341.90 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 10:28:11,950 epoch 9 - iter 584/738 - loss 0.00602754 - time (sec): 55.01 - samples/sec: 2348.28 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 10:28:19,399 epoch 9 - iter 657/738 - loss 0.00614002 - time (sec): 62.46 - samples/sec: 2358.54 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 10:28:27,019 epoch 9 - iter 730/738 - loss 0.00579260 - time (sec): 70.08 - samples/sec: 2353.84 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 10:28:27,659 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:28:27,660 EPOCH 9 done: loss 0.0057 - lr: 0.000006 |
|
2023-10-24 10:28:36,193 DEV : loss 0.23757678270339966 - f1-score (micro avg) 0.8268 |
|
2023-10-24 10:28:36,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:28:43,879 epoch 10 - iter 73/738 - loss 0.00141152 - time (sec): 7.66 - samples/sec: 2250.50 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 10:28:50,591 epoch 10 - iter 146/738 - loss 0.00131568 - time (sec): 14.38 - samples/sec: 2311.69 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 10:28:57,262 epoch 10 - iter 219/738 - loss 0.00226691 - time (sec): 21.05 - samples/sec: 2306.18 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 10:29:04,438 epoch 10 - iter 292/738 - loss 0.00252578 - time (sec): 28.22 - samples/sec: 2315.22 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 10:29:11,870 epoch 10 - iter 365/738 - loss 0.00337101 - time (sec): 35.65 - samples/sec: 2355.34 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 10:29:18,816 epoch 10 - iter 438/738 - loss 0.00392772 - time (sec): 42.60 - samples/sec: 2350.20 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 10:29:26,183 epoch 10 - iter 511/738 - loss 0.00412300 - time (sec): 49.97 - samples/sec: 2353.44 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 10:29:33,369 epoch 10 - iter 584/738 - loss 0.00445029 - time (sec): 57.15 - samples/sec: 2349.77 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 10:29:39,730 epoch 10 - iter 657/738 - loss 0.00448197 - time (sec): 63.51 - samples/sec: 2350.36 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 10:29:46,578 epoch 10 - iter 730/738 - loss 0.00472168 - time (sec): 70.36 - samples/sec: 2343.02 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 10:29:47,263 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:29:47,264 EPOCH 10 done: loss 0.0047 - lr: 0.000000 |
|
2023-10-24 10:29:55,807 DEV : loss 0.24125587940216064 - f1-score (micro avg) 0.8294 |
|
2023-10-24 10:29:56,387 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 10:29:56,388 Loading model from best epoch ... |
|
2023-10-24 10:29:58,274 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-time, B-time, E-time, I-time, S-prod, B-prod, E-prod, I-prod |
|
2023-10-24 10:30:04,557 |
|
Results: |
|
- F-score (micro) 0.7809 |
|
- F-score (macro) 0.6755 |
|
- Accuracy 0.663 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8286 0.8788 0.8529 858 |
|
pers 0.7242 0.7970 0.7589 537 |
|
org 0.5448 0.5530 0.5489 132 |
|
time 0.5147 0.6481 0.5738 54 |
|
prod 0.7059 0.5902 0.6429 61 |
|
|
|
micro avg 0.7560 0.8076 0.7809 1642 |
|
macro avg 0.6636 0.6934 0.6755 1642 |
|
weighted avg 0.7567 0.8076 0.7807 1642 |
|
|
|
2023-10-24 10:30:04,557 ---------------------------------------------------------------------------------------------------- |
|
|