|
2023-10-24 12:23:04,212 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,213 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 12:23:04,213 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 MultiCorpus: 5901 train + 1287 dev + 1505 test sentences |
|
- NER_HIPE_2022 Corpus: 5901 train + 1287 dev + 1505 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/fr/with_doc_seperator |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Train: 5901 sentences |
|
2023-10-24 12:23:04,214 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Training Params: |
|
2023-10-24 12:23:04,214 - learning_rate: "5e-05" |
|
2023-10-24 12:23:04,214 - mini_batch_size: "8" |
|
2023-10-24 12:23:04,214 - max_epochs: "10" |
|
2023-10-24 12:23:04,214 - shuffle: "True" |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Plugins: |
|
2023-10-24 12:23:04,214 - TensorboardLogger |
|
2023-10-24 12:23:04,214 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 12:23:04,214 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Computation: |
|
2023-10-24 12:23:04,214 - compute on device: cuda:0 |
|
2023-10-24 12:23:04,214 - embedding storage: none |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Model training base path: "hmbench-hipe2020/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-4" |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:23:04,214 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 12:23:10,901 epoch 1 - iter 73/738 - loss 2.25556870 - time (sec): 6.69 - samples/sec: 2318.83 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 12:23:18,456 epoch 1 - iter 146/738 - loss 1.32458759 - time (sec): 14.24 - samples/sec: 2358.45 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 12:23:25,593 epoch 1 - iter 219/738 - loss 1.01075058 - time (sec): 21.38 - samples/sec: 2367.02 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 12:23:32,606 epoch 1 - iter 292/738 - loss 0.83445723 - time (sec): 28.39 - samples/sec: 2343.02 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 12:23:39,151 epoch 1 - iter 365/738 - loss 0.72880147 - time (sec): 34.94 - samples/sec: 2346.86 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 12:23:46,732 epoch 1 - iter 438/738 - loss 0.63053944 - time (sec): 42.52 - samples/sec: 2358.95 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 12:23:53,499 epoch 1 - iter 511/738 - loss 0.56698661 - time (sec): 49.28 - samples/sec: 2371.01 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 12:24:00,652 epoch 1 - iter 584/738 - loss 0.51817214 - time (sec): 56.44 - samples/sec: 2377.30 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 12:24:07,209 epoch 1 - iter 657/738 - loss 0.48234951 - time (sec): 62.99 - samples/sec: 2369.99 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 12:24:13,975 epoch 1 - iter 730/738 - loss 0.45297912 - time (sec): 69.76 - samples/sec: 2367.11 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 12:24:14,570 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:24:14,571 EPOCH 1 done: loss 0.4506 - lr: 0.000049 |
|
2023-10-24 12:24:20,500 DEV : loss 0.11068389564752579 - f1-score (micro avg) 0.7249 |
|
2023-10-24 12:24:20,521 saving best model |
|
2023-10-24 12:24:21,077 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:24:27,228 epoch 2 - iter 73/738 - loss 0.11850381 - time (sec): 6.15 - samples/sec: 2439.37 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 12:24:34,287 epoch 2 - iter 146/738 - loss 0.12699373 - time (sec): 13.21 - samples/sec: 2354.09 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 12:24:41,528 epoch 2 - iter 219/738 - loss 0.11903104 - time (sec): 20.45 - samples/sec: 2333.96 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 12:24:48,726 epoch 2 - iter 292/738 - loss 0.11407540 - time (sec): 27.65 - samples/sec: 2334.41 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 12:24:56,438 epoch 2 - iter 365/738 - loss 0.11505869 - time (sec): 35.36 - samples/sec: 2333.00 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 12:25:04,375 epoch 2 - iter 438/738 - loss 0.11550463 - time (sec): 43.30 - samples/sec: 2339.16 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 12:25:11,227 epoch 2 - iter 511/738 - loss 0.11320576 - time (sec): 50.15 - samples/sec: 2352.34 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 12:25:17,791 epoch 2 - iter 584/738 - loss 0.11271620 - time (sec): 56.71 - samples/sec: 2338.51 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 12:25:24,632 epoch 2 - iter 657/738 - loss 0.11215704 - time (sec): 63.55 - samples/sec: 2344.70 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 12:25:31,326 epoch 2 - iter 730/738 - loss 0.11238531 - time (sec): 70.25 - samples/sec: 2345.92 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 12:25:32,012 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:25:32,012 EPOCH 2 done: loss 0.1122 - lr: 0.000045 |
|
2023-10-24 12:25:40,500 DEV : loss 0.11246663331985474 - f1-score (micro avg) 0.7944 |
|
2023-10-24 12:25:40,521 saving best model |
|
2023-10-24 12:25:41,224 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:25:49,422 epoch 3 - iter 73/738 - loss 0.07078854 - time (sec): 8.20 - samples/sec: 2245.39 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 12:25:56,519 epoch 3 - iter 146/738 - loss 0.06814029 - time (sec): 15.29 - samples/sec: 2271.00 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 12:26:03,588 epoch 3 - iter 219/738 - loss 0.07010610 - time (sec): 22.36 - samples/sec: 2294.46 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 12:26:11,167 epoch 3 - iter 292/738 - loss 0.07189901 - time (sec): 29.94 - samples/sec: 2337.78 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 12:26:17,598 epoch 3 - iter 365/738 - loss 0.07031058 - time (sec): 36.37 - samples/sec: 2349.67 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 12:26:24,143 epoch 3 - iter 438/738 - loss 0.07025265 - time (sec): 42.92 - samples/sec: 2351.45 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 12:26:31,058 epoch 3 - iter 511/738 - loss 0.06950700 - time (sec): 49.83 - samples/sec: 2354.27 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 12:26:38,632 epoch 3 - iter 584/738 - loss 0.06836676 - time (sec): 57.41 - samples/sec: 2339.94 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 12:26:44,892 epoch 3 - iter 657/738 - loss 0.06787050 - time (sec): 63.67 - samples/sec: 2340.10 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 12:26:51,851 epoch 3 - iter 730/738 - loss 0.06782298 - time (sec): 70.63 - samples/sec: 2334.17 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 12:26:52,516 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:26:52,517 EPOCH 3 done: loss 0.0681 - lr: 0.000039 |
|
2023-10-24 12:27:00,992 DEV : loss 0.12488321214914322 - f1-score (micro avg) 0.7849 |
|
2023-10-24 12:27:01,014 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:27:07,997 epoch 4 - iter 73/738 - loss 0.03529092 - time (sec): 6.98 - samples/sec: 2331.51 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 12:27:14,897 epoch 4 - iter 146/738 - loss 0.04400146 - time (sec): 13.88 - samples/sec: 2384.00 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 12:27:22,033 epoch 4 - iter 219/738 - loss 0.04320250 - time (sec): 21.02 - samples/sec: 2395.32 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 12:27:29,819 epoch 4 - iter 292/738 - loss 0.04596927 - time (sec): 28.80 - samples/sec: 2388.43 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 12:27:36,622 epoch 4 - iter 365/738 - loss 0.04951583 - time (sec): 35.61 - samples/sec: 2373.30 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 12:27:43,467 epoch 4 - iter 438/738 - loss 0.04961283 - time (sec): 42.45 - samples/sec: 2372.58 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 12:27:50,802 epoch 4 - iter 511/738 - loss 0.05020400 - time (sec): 49.79 - samples/sec: 2360.02 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 12:27:57,605 epoch 4 - iter 584/738 - loss 0.04984050 - time (sec): 56.59 - samples/sec: 2351.48 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 12:28:03,941 epoch 4 - iter 657/738 - loss 0.04943057 - time (sec): 62.93 - samples/sec: 2362.15 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-24 12:28:11,245 epoch 4 - iter 730/738 - loss 0.04969244 - time (sec): 70.23 - samples/sec: 2347.66 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 12:28:11,862 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:28:11,863 EPOCH 4 done: loss 0.0496 - lr: 0.000033 |
|
2023-10-24 12:28:20,370 DEV : loss 0.1528576910495758 - f1-score (micro avg) 0.8346 |
|
2023-10-24 12:28:20,391 saving best model |
|
2023-10-24 12:28:21,188 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:28:27,267 epoch 5 - iter 73/738 - loss 0.02360388 - time (sec): 6.08 - samples/sec: 2375.89 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 12:28:34,364 epoch 5 - iter 146/738 - loss 0.02865947 - time (sec): 13.18 - samples/sec: 2330.91 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 12:28:41,461 epoch 5 - iter 219/738 - loss 0.03521711 - time (sec): 20.27 - samples/sec: 2358.28 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 12:28:49,550 epoch 5 - iter 292/738 - loss 0.03531793 - time (sec): 28.36 - samples/sec: 2335.68 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 12:28:56,399 epoch 5 - iter 365/738 - loss 0.03374791 - time (sec): 35.21 - samples/sec: 2346.52 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 12:29:03,408 epoch 5 - iter 438/738 - loss 0.03393183 - time (sec): 42.22 - samples/sec: 2341.88 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 12:29:10,073 epoch 5 - iter 511/738 - loss 0.03423198 - time (sec): 48.88 - samples/sec: 2343.57 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 12:29:16,646 epoch 5 - iter 584/738 - loss 0.03510926 - time (sec): 55.46 - samples/sec: 2349.24 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 12:29:23,922 epoch 5 - iter 657/738 - loss 0.03369695 - time (sec): 62.73 - samples/sec: 2355.86 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 12:29:30,929 epoch 5 - iter 730/738 - loss 0.03362935 - time (sec): 69.74 - samples/sec: 2361.93 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 12:29:31,620 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:29:31,620 EPOCH 5 done: loss 0.0337 - lr: 0.000028 |
|
2023-10-24 12:29:40,123 DEV : loss 0.20074297487735748 - f1-score (micro avg) 0.8205 |
|
2023-10-24 12:29:40,144 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:29:46,357 epoch 6 - iter 73/738 - loss 0.02542708 - time (sec): 6.21 - samples/sec: 2417.52 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 12:29:53,136 epoch 6 - iter 146/738 - loss 0.02568352 - time (sec): 12.99 - samples/sec: 2409.24 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 12:30:00,416 epoch 6 - iter 219/738 - loss 0.02443887 - time (sec): 20.27 - samples/sec: 2390.55 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 12:30:07,727 epoch 6 - iter 292/738 - loss 0.02588078 - time (sec): 27.58 - samples/sec: 2405.48 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 12:30:14,464 epoch 6 - iter 365/738 - loss 0.02687911 - time (sec): 34.32 - samples/sec: 2383.09 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 12:30:21,772 epoch 6 - iter 438/738 - loss 0.02737354 - time (sec): 41.63 - samples/sec: 2388.00 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 12:30:28,310 epoch 6 - iter 511/738 - loss 0.02655544 - time (sec): 48.16 - samples/sec: 2384.58 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 12:30:35,756 epoch 6 - iter 584/738 - loss 0.02609074 - time (sec): 55.61 - samples/sec: 2379.16 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 12:30:42,681 epoch 6 - iter 657/738 - loss 0.02521831 - time (sec): 62.54 - samples/sec: 2371.16 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 12:30:49,679 epoch 6 - iter 730/738 - loss 0.02484775 - time (sec): 69.53 - samples/sec: 2373.88 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 12:30:50,284 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:30:50,285 EPOCH 6 done: loss 0.0251 - lr: 0.000022 |
|
2023-10-24 12:30:58,828 DEV : loss 0.19787970185279846 - f1-score (micro avg) 0.8193 |
|
2023-10-24 12:30:58,849 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:31:05,898 epoch 7 - iter 73/738 - loss 0.02178662 - time (sec): 7.05 - samples/sec: 2269.53 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 12:31:12,506 epoch 7 - iter 146/738 - loss 0.01701664 - time (sec): 13.66 - samples/sec: 2341.36 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 12:31:19,699 epoch 7 - iter 219/738 - loss 0.01455939 - time (sec): 20.85 - samples/sec: 2319.67 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 12:31:28,146 epoch 7 - iter 292/738 - loss 0.01477413 - time (sec): 29.30 - samples/sec: 2328.96 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 12:31:34,631 epoch 7 - iter 365/738 - loss 0.01628691 - time (sec): 35.78 - samples/sec: 2325.31 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 12:31:42,064 epoch 7 - iter 438/738 - loss 0.01719358 - time (sec): 43.21 - samples/sec: 2348.09 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 12:31:48,886 epoch 7 - iter 511/738 - loss 0.01677537 - time (sec): 50.04 - samples/sec: 2355.69 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 12:31:55,545 epoch 7 - iter 584/738 - loss 0.01632985 - time (sec): 56.70 - samples/sec: 2347.07 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 12:32:03,008 epoch 7 - iter 657/738 - loss 0.01601922 - time (sec): 64.16 - samples/sec: 2343.68 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 12:32:09,324 epoch 7 - iter 730/738 - loss 0.01649089 - time (sec): 70.47 - samples/sec: 2339.45 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 12:32:09,953 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:32:09,953 EPOCH 7 done: loss 0.0163 - lr: 0.000017 |
|
2023-10-24 12:32:18,482 DEV : loss 0.2052779197692871 - f1-score (micro avg) 0.8136 |
|
2023-10-24 12:32:18,503 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:32:25,024 epoch 8 - iter 73/738 - loss 0.01014088 - time (sec): 6.52 - samples/sec: 2316.92 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 12:32:31,985 epoch 8 - iter 146/738 - loss 0.01091345 - time (sec): 13.48 - samples/sec: 2355.44 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 12:32:39,044 epoch 8 - iter 219/738 - loss 0.00836325 - time (sec): 20.54 - samples/sec: 2324.74 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 12:32:46,254 epoch 8 - iter 292/738 - loss 0.00855304 - time (sec): 27.75 - samples/sec: 2329.75 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 12:32:52,553 epoch 8 - iter 365/738 - loss 0.00936106 - time (sec): 34.05 - samples/sec: 2347.44 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 12:32:59,322 epoch 8 - iter 438/738 - loss 0.01052962 - time (sec): 40.82 - samples/sec: 2352.29 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 12:33:05,965 epoch 8 - iter 511/738 - loss 0.01078551 - time (sec): 47.46 - samples/sec: 2356.96 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 12:33:12,809 epoch 8 - iter 584/738 - loss 0.01123143 - time (sec): 54.30 - samples/sec: 2349.41 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 12:33:21,301 epoch 8 - iter 657/738 - loss 0.01091470 - time (sec): 62.80 - samples/sec: 2361.75 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 12:33:28,538 epoch 8 - iter 730/738 - loss 0.01089738 - time (sec): 70.03 - samples/sec: 2350.54 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 12:33:29,390 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:33:29,390 EPOCH 8 done: loss 0.0108 - lr: 0.000011 |
|
2023-10-24 12:33:37,920 DEV : loss 0.22938336431980133 - f1-score (micro avg) 0.8183 |
|
2023-10-24 12:33:37,942 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:33:44,466 epoch 9 - iter 73/738 - loss 0.00599525 - time (sec): 6.52 - samples/sec: 2417.47 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 12:33:51,210 epoch 9 - iter 146/738 - loss 0.00485341 - time (sec): 13.27 - samples/sec: 2348.42 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 12:33:58,351 epoch 9 - iter 219/738 - loss 0.00589012 - time (sec): 20.41 - samples/sec: 2373.20 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 12:34:05,189 epoch 9 - iter 292/738 - loss 0.00614522 - time (sec): 27.25 - samples/sec: 2368.45 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 12:34:12,513 epoch 9 - iter 365/738 - loss 0.00734451 - time (sec): 34.57 - samples/sec: 2360.35 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 12:34:19,446 epoch 9 - iter 438/738 - loss 0.00635353 - time (sec): 41.50 - samples/sec: 2345.89 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 12:34:26,356 epoch 9 - iter 511/738 - loss 0.00658119 - time (sec): 48.41 - samples/sec: 2333.87 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 12:34:33,137 epoch 9 - iter 584/738 - loss 0.00637887 - time (sec): 55.19 - samples/sec: 2339.84 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 12:34:39,566 epoch 9 - iter 657/738 - loss 0.00597823 - time (sec): 61.62 - samples/sec: 2345.82 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 12:34:48,630 epoch 9 - iter 730/738 - loss 0.00624891 - time (sec): 70.69 - samples/sec: 2327.39 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 12:34:49,359 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:34:49,359 EPOCH 9 done: loss 0.0063 - lr: 0.000006 |
|
2023-10-24 12:34:57,887 DEV : loss 0.233811616897583 - f1-score (micro avg) 0.8223 |
|
2023-10-24 12:34:57,908 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:35:04,659 epoch 10 - iter 73/738 - loss 0.00175446 - time (sec): 6.75 - samples/sec: 2269.95 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 12:35:12,018 epoch 10 - iter 146/738 - loss 0.00290935 - time (sec): 14.11 - samples/sec: 2356.44 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 12:35:19,796 epoch 10 - iter 219/738 - loss 0.00267648 - time (sec): 21.89 - samples/sec: 2356.72 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 12:35:26,318 epoch 10 - iter 292/738 - loss 0.00318567 - time (sec): 28.41 - samples/sec: 2342.03 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 12:35:33,934 epoch 10 - iter 365/738 - loss 0.00328863 - time (sec): 36.02 - samples/sec: 2329.75 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 12:35:40,825 epoch 10 - iter 438/738 - loss 0.00317695 - time (sec): 42.92 - samples/sec: 2325.64 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 12:35:48,379 epoch 10 - iter 511/738 - loss 0.00329836 - time (sec): 50.47 - samples/sec: 2333.67 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 12:35:55,542 epoch 10 - iter 584/738 - loss 0.00413409 - time (sec): 57.63 - samples/sec: 2327.06 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 12:36:01,814 epoch 10 - iter 657/738 - loss 0.00384205 - time (sec): 63.91 - samples/sec: 2341.52 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 12:36:08,283 epoch 10 - iter 730/738 - loss 0.00381734 - time (sec): 70.37 - samples/sec: 2341.51 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 12:36:08,969 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:36:08,969 EPOCH 10 done: loss 0.0039 - lr: 0.000000 |
|
2023-10-24 12:36:17,503 DEV : loss 0.23587684333324432 - f1-score (micro avg) 0.824 |
|
2023-10-24 12:36:18,085 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 12:36:18,086 Loading model from best epoch ... |
|
2023-10-24 12:36:19,970 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-time, B-time, E-time, I-time, S-prod, B-prod, E-prod, I-prod |
|
2023-10-24 12:36:26,293 |
|
Results: |
|
- F-score (micro) 0.7954 |
|
- F-score (macro) 0.6961 |
|
- Accuracy 0.6842 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8505 0.8753 0.8627 858 |
|
pers 0.7553 0.7989 0.7765 537 |
|
org 0.5785 0.5303 0.5534 132 |
|
time 0.5484 0.6296 0.5862 54 |
|
prod 0.7547 0.6557 0.7018 61 |
|
|
|
micro avg 0.7848 0.8063 0.7954 1642 |
|
macro avg 0.6975 0.6980 0.6961 1642 |
|
weighted avg 0.7840 0.8063 0.7946 1642 |
|
|
|
2023-10-24 12:36:26,293 ---------------------------------------------------------------------------------------------------- |
|
|