2023-04-05 12:41:26,937 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:26,943 Model: "SequenceTagger( (embeddings): TransformerWordEmbeddings( (model): BertModel( (embeddings): BertEmbeddings( (word_embeddings): Embedding(105880, 768) (position_embeddings): Embedding(512, 768) (token_type_embeddings): Embedding(2, 768) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): BertEncoder( (layer): ModuleList( (0): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (1): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (2): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (3): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (4): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (5): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (6): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (7): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (8): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (9): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (10): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (11): BertLayer( (attention): BertAttention( (self): BertSelfAttention( (query): Linear(in_features=768, out_features=768, bias=True) (key): Linear(in_features=768, out_features=768, bias=True) (value): Linear(in_features=768, out_features=768, bias=True) (dropout): Dropout(p=0.1, inplace=False) ) (output): BertSelfOutput( (dense): Linear(in_features=768, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (intermediate): BertIntermediate( (dense): Linear(in_features=768, out_features=3072, bias=True) (intermediate_act_fn): GELUActivation() ) (output): BertOutput( (dense): Linear(in_features=3072, out_features=768, bias=True) (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (pooler): BertPooler( (dense): Linear(in_features=768, out_features=768, bias=True) (activation): Tanh() ) ) ) (word_dropout): WordDropout(p=0.05) (locked_dropout): LockedDropout(p=0.5) (embedding2nn): Linear(in_features=768, out_features=768, bias=True) (rnn): LSTM(768, 256, batch_first=True, bidirectional=True) (linear): Linear(in_features=512, out_features=11, bias=True) (loss_function): ViterbiLoss() (crf): CRF() )" 2023-04-05 12:41:26,945 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:26,947 Corpus: "Corpus: 2253 train + 250 dev + 441 test sentences" 2023-04-05 12:41:26,948 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:26,948 Parameters: 2023-04-05 12:41:26,949 - learning_rate: "0.100000" 2023-04-05 12:41:26,950 - mini_batch_size: "32" 2023-04-05 12:41:26,951 - patience: "3" 2023-04-05 12:41:26,951 - anneal_factor: "0.5" 2023-04-05 12:41:26,953 - max_epochs: "40" 2023-04-05 12:41:26,954 - shuffle: "True" 2023-04-05 12:41:26,955 - train_with_dev: "False" 2023-04-05 12:41:26,955 - batch_growth_annealing: "False" 2023-04-05 12:41:26,958 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:26,958 Model training base path: "resources\taggers\addressNER" 2023-04-05 12:41:26,959 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:26,960 Device: cuda:0 2023-04-05 12:41:26,961 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:26,962 Embeddings storage mode: none 2023-04-05 12:41:26,962 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:41:30,223 epoch 1 - iter 7/71 - loss 2.28721713 - time (sec): 3.26 - samples/sec: 178.21 - lr: 0.100000 2023-04-05 12:41:33,767 epoch 1 - iter 14/71 - loss 1.92622932 - time (sec): 6.80 - samples/sec: 171.67 - lr: 0.100000 2023-04-05 12:41:37,177 epoch 1 - iter 21/71 - loss 1.73813053 - time (sec): 10.21 - samples/sec: 168.49 - lr: 0.100000 2023-04-05 12:41:40,953 epoch 1 - iter 28/71 - loss 1.61830648 - time (sec): 13.99 - samples/sec: 166.33 - lr: 0.100000 2023-04-05 12:41:44,268 epoch 1 - iter 35/71 - loss 1.53214432 - time (sec): 17.30 - samples/sec: 168.39 - lr: 0.100000 2023-04-05 12:41:48,032 epoch 1 - iter 42/71 - loss 1.45733425 - time (sec): 21.07 - samples/sec: 167.12 - lr: 0.100000 2023-04-05 12:41:51,701 epoch 1 - iter 49/71 - loss 1.39384449 - time (sec): 24.74 - samples/sec: 166.14 - lr: 0.100000 2023-04-05 12:41:55,370 epoch 1 - iter 56/71 - loss 1.34539887 - time (sec): 28.41 - samples/sec: 164.71 - lr: 0.100000 2023-04-05 12:41:59,094 epoch 1 - iter 63/71 - loss 1.30608958 - time (sec): 32.13 - samples/sec: 163.55 - lr: 0.100000 2023-04-05 12:42:02,838 epoch 1 - iter 70/71 - loss 1.26400603 - time (sec): 35.87 - samples/sec: 163.93 - lr: 0.100000 2023-04-05 12:42:03,627 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:42:03,628 EPOCH 1 done: loss 1.2625 - lr 0.100000 2023-04-05 12:42:34,974 Evaluating as a multi-label problem: False 2023-04-05 12:42:34,994 TRAIN : loss 0.861785352230072 - f1-score (micro avg) 0.7261 2023-04-05 12:42:38,596 Evaluating as a multi-label problem: False 2023-04-05 12:42:38,610 DEV : loss 0.9671951532363892 - f1-score (micro avg) 0.6729 2023-04-05 12:42:38,617 BAD EPOCHS (no improvement): 0 2023-04-05 12:42:38,618 saving best model 2023-04-05 12:42:39,745 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:42:42,695 epoch 2 - iter 7/71 - loss 0.92334886 - time (sec): 2.95 - samples/sec: 202.11 - lr: 0.100000 2023-04-05 12:42:46,576 epoch 2 - iter 14/71 - loss 0.87175200 - time (sec): 6.83 - samples/sec: 177.16 - lr: 0.100000 2023-04-05 12:42:50,257 epoch 2 - iter 21/71 - loss 0.81372023 - time (sec): 10.51 - samples/sec: 171.63 - lr: 0.100000 2023-04-05 12:42:53,892 epoch 2 - iter 28/71 - loss 0.78629231 - time (sec): 14.15 - samples/sec: 168.38 - lr: 0.100000 2023-04-05 12:42:57,549 epoch 2 - iter 35/71 - loss 0.76274145 - time (sec): 17.80 - samples/sec: 166.10 - lr: 0.100000 2023-04-05 12:43:01,159 epoch 2 - iter 42/71 - loss 0.73990095 - time (sec): 21.41 - samples/sec: 163.64 - lr: 0.100000 2023-04-05 12:43:04,701 epoch 2 - iter 49/71 - loss 0.71935682 - time (sec): 24.96 - samples/sec: 164.09 - lr: 0.100000 2023-04-05 12:43:08,315 epoch 2 - iter 56/71 - loss 0.69784735 - time (sec): 28.57 - samples/sec: 163.32 - lr: 0.100000 2023-04-05 12:43:11,830 epoch 2 - iter 63/71 - loss 0.68374551 - time (sec): 32.08 - samples/sec: 164.13 - lr: 0.100000 2023-04-05 12:43:15,403 epoch 2 - iter 70/71 - loss 0.68318206 - time (sec): 35.66 - samples/sec: 165.13 - lr: 0.100000 2023-04-05 12:43:16,197 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:43:16,199 EPOCH 2 done: loss 0.6825 - lr 0.100000 2023-04-05 12:43:47,058 Evaluating as a multi-label problem: False 2023-04-05 12:43:47,079 TRAIN : loss 0.5168215036392212 - f1-score (micro avg) 0.7765 2023-04-05 12:43:50,605 Evaluating as a multi-label problem: False 2023-04-05 12:43:50,618 DEV : loss 0.6507071852684021 - f1-score (micro avg) 0.7109 2023-04-05 12:43:50,623 BAD EPOCHS (no improvement): 0 2023-04-05 12:43:50,624 saving best model 2023-04-05 12:43:51,833 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:43:54,803 epoch 3 - iter 7/71 - loss 0.54805596 - time (sec): 2.97 - samples/sec: 196.06 - lr: 0.100000 2023-04-05 12:43:58,496 epoch 3 - iter 14/71 - loss 0.52917100 - time (sec): 6.66 - samples/sec: 178.19 - lr: 0.100000 2023-04-05 12:44:02,157 epoch 3 - iter 21/71 - loss 0.53172923 - time (sec): 10.32 - samples/sec: 173.12 - lr: 0.100000 2023-04-05 12:44:05,748 epoch 3 - iter 28/71 - loss 0.53976229 - time (sec): 13.91 - samples/sec: 168.90 - lr: 0.100000 2023-04-05 12:44:09,391 epoch 3 - iter 35/71 - loss 0.54268651 - time (sec): 17.56 - samples/sec: 167.34 - lr: 0.100000 2023-04-05 12:44:13,039 epoch 3 - iter 42/71 - loss 0.53676146 - time (sec): 21.20 - samples/sec: 165.96 - lr: 0.100000 2023-04-05 12:44:16,753 epoch 3 - iter 49/71 - loss 0.52184996 - time (sec): 24.92 - samples/sec: 163.25 - lr: 0.100000 2023-04-05 12:44:20,426 epoch 3 - iter 56/71 - loss 0.51321120 - time (sec): 28.59 - samples/sec: 162.14 - lr: 0.100000 2023-04-05 12:44:24,224 epoch 3 - iter 63/71 - loss 0.50438788 - time (sec): 32.39 - samples/sec: 161.35 - lr: 0.100000 2023-04-05 12:44:27,884 epoch 3 - iter 70/71 - loss 0.51802873 - time (sec): 36.05 - samples/sec: 163.16 - lr: 0.100000 2023-04-05 12:44:28,729 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:44:28,730 EPOCH 3 done: loss 0.5184 - lr 0.100000 2023-04-05 12:44:59,957 Evaluating as a multi-label problem: False 2023-04-05 12:44:59,978 TRAIN : loss 0.3893236517906189 - f1-score (micro avg) 0.8265 2023-04-05 12:45:03,682 Evaluating as a multi-label problem: False 2023-04-05 12:45:03,692 DEV : loss 0.4997367858886719 - f1-score (micro avg) 0.7814 2023-04-05 12:45:03,700 BAD EPOCHS (no improvement): 0 2023-04-05 12:45:03,702 saving best model 2023-04-05 12:45:04,923 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:45:07,851 epoch 4 - iter 7/71 - loss 0.40043816 - time (sec): 2.92 - samples/sec: 198.64 - lr: 0.100000 2023-04-05 12:45:11,521 epoch 4 - iter 14/71 - loss 0.43328716 - time (sec): 6.60 - samples/sec: 179.83 - lr: 0.100000 2023-04-05 12:45:15,244 epoch 4 - iter 21/71 - loss 0.44871122 - time (sec): 10.32 - samples/sec: 173.97 - lr: 0.100000 2023-04-05 12:45:18,967 epoch 4 - iter 28/71 - loss 0.44610987 - time (sec): 14.04 - samples/sec: 167.08 - lr: 0.100000 2023-04-05 12:45:22,672 epoch 4 - iter 35/71 - loss 0.46221491 - time (sec): 17.75 - samples/sec: 167.81 - lr: 0.100000 2023-04-05 12:45:26,360 epoch 4 - iter 42/71 - loss 0.43613944 - time (sec): 21.43 - samples/sec: 166.04 - lr: 0.100000 2023-04-05 12:45:30,026 epoch 4 - iter 49/71 - loss 0.43457310 - time (sec): 25.10 - samples/sec: 163.95 - lr: 0.100000 2023-04-05 12:45:33,722 epoch 4 - iter 56/71 - loss 0.42344792 - time (sec): 28.80 - samples/sec: 163.63 - lr: 0.100000 2023-04-05 12:45:37,362 epoch 4 - iter 63/71 - loss 0.41981578 - time (sec): 32.44 - samples/sec: 163.15 - lr: 0.100000 2023-04-05 12:45:41,136 epoch 4 - iter 70/71 - loss 0.41432375 - time (sec): 36.21 - samples/sec: 162.52 - lr: 0.100000 2023-04-05 12:45:41,950 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:45:41,951 EPOCH 4 done: loss 0.4140 - lr 0.100000 2023-04-05 12:46:11,773 Evaluating as a multi-label problem: False 2023-04-05 12:46:11,790 TRAIN : loss 0.3005659878253937 - f1-score (micro avg) 0.8763 2023-04-05 12:46:15,334 Evaluating as a multi-label problem: False 2023-04-05 12:46:15,345 DEV : loss 0.3956667482852936 - f1-score (micro avg) 0.8253 2023-04-05 12:46:15,352 BAD EPOCHS (no improvement): 0 2023-04-05 12:46:15,353 saving best model 2023-04-05 12:46:16,744 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:46:19,771 epoch 5 - iter 7/71 - loss 0.33104508 - time (sec): 3.03 - samples/sec: 191.03 - lr: 0.100000 2023-04-05 12:46:23,484 epoch 5 - iter 14/71 - loss 0.37656699 - time (sec): 6.74 - samples/sec: 178.21 - lr: 0.100000 2023-04-05 12:46:27,097 epoch 5 - iter 21/71 - loss 0.37195919 - time (sec): 10.35 - samples/sec: 172.91 - lr: 0.100000 2023-04-05 12:46:30,706 epoch 5 - iter 28/71 - loss 0.37480153 - time (sec): 13.96 - samples/sec: 168.97 - lr: 0.100000 2023-04-05 12:46:34,278 epoch 5 - iter 35/71 - loss 0.38207478 - time (sec): 17.53 - samples/sec: 169.28 - lr: 0.100000 2023-04-05 12:46:38,037 epoch 5 - iter 42/71 - loss 0.37466200 - time (sec): 21.29 - samples/sec: 166.22 - lr: 0.100000 2023-04-05 12:46:41,693 epoch 5 - iter 49/71 - loss 0.37114011 - time (sec): 24.95 - samples/sec: 165.58 - lr: 0.100000 2023-04-05 12:46:45,303 epoch 5 - iter 56/71 - loss 0.37649024 - time (sec): 28.56 - samples/sec: 165.56 - lr: 0.100000 2023-04-05 12:46:49,121 epoch 5 - iter 63/71 - loss 0.37565114 - time (sec): 32.38 - samples/sec: 164.82 - lr: 0.100000 2023-04-05 12:46:52,825 epoch 5 - iter 70/71 - loss 0.36875926 - time (sec): 36.08 - samples/sec: 163.00 - lr: 0.100000 2023-04-05 12:46:53,641 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:46:53,642 EPOCH 5 done: loss 0.3682 - lr 0.100000 2023-04-05 12:47:23,884 Evaluating as a multi-label problem: False 2023-04-05 12:47:23,900 TRAIN : loss 0.24057592451572418 - f1-score (micro avg) 0.8969 2023-04-05 12:47:27,486 Evaluating as a multi-label problem: False 2023-04-05 12:47:27,497 DEV : loss 0.3635919690132141 - f1-score (micro avg) 0.808 2023-04-05 12:47:27,503 BAD EPOCHS (no improvement): 1 2023-04-05 12:47:27,509 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:47:30,597 epoch 6 - iter 7/71 - loss 0.29060467 - time (sec): 3.09 - samples/sec: 191.74 - lr: 0.100000 2023-04-05 12:47:34,411 epoch 6 - iter 14/71 - loss 0.33442431 - time (sec): 6.90 - samples/sec: 175.32 - lr: 0.100000 2023-04-05 12:47:38,195 epoch 6 - iter 21/71 - loss 0.33081293 - time (sec): 10.69 - samples/sec: 169.02 - lr: 0.100000 2023-04-05 12:47:41,883 epoch 6 - iter 28/71 - loss 0.31355855 - time (sec): 14.37 - samples/sec: 164.05 - lr: 0.100000 2023-04-05 12:47:45,626 epoch 6 - iter 35/71 - loss 0.30769625 - time (sec): 18.12 - samples/sec: 161.79 - lr: 0.100000 2023-04-05 12:47:49,335 epoch 6 - iter 42/71 - loss 0.30399027 - time (sec): 21.83 - samples/sec: 160.77 - lr: 0.100000 2023-04-05 12:47:53,093 epoch 6 - iter 49/71 - loss 0.30522447 - time (sec): 25.58 - samples/sec: 161.32 - lr: 0.100000 2023-04-05 12:47:56,821 epoch 6 - iter 56/71 - loss 0.30943381 - time (sec): 29.31 - samples/sec: 160.49 - lr: 0.100000 2023-04-05 12:48:00,614 epoch 6 - iter 63/71 - loss 0.30689469 - time (sec): 33.10 - samples/sec: 159.74 - lr: 0.100000 2023-04-05 12:48:04,217 epoch 6 - iter 70/71 - loss 0.31271804 - time (sec): 36.71 - samples/sec: 160.30 - lr: 0.100000 2023-04-05 12:48:05,056 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:48:05,057 EPOCH 6 done: loss 0.3120 - lr 0.100000 2023-04-05 12:48:35,310 Evaluating as a multi-label problem: False 2023-04-05 12:48:35,330 TRAIN : loss 0.2524365782737732 - f1-score (micro avg) 0.8698 2023-04-05 12:48:38,882 Evaluating as a multi-label problem: False 2023-04-05 12:48:38,893 DEV : loss 0.3847086429595947 - f1-score (micro avg) 0.7949 2023-04-05 12:48:38,897 BAD EPOCHS (no improvement): 2 2023-04-05 12:48:38,900 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:48:41,937 epoch 7 - iter 7/71 - loss 0.37316342 - time (sec): 3.04 - samples/sec: 194.56 - lr: 0.100000 2023-04-05 12:48:45,658 epoch 7 - iter 14/71 - loss 0.30328593 - time (sec): 6.76 - samples/sec: 173.42 - lr: 0.100000 2023-04-05 12:48:49,422 epoch 7 - iter 21/71 - loss 0.29911219 - time (sec): 10.52 - samples/sec: 168.42 - lr: 0.100000 2023-04-05 12:48:53,079 epoch 7 - iter 28/71 - loss 0.29218439 - time (sec): 14.18 - samples/sec: 167.57 - lr: 0.100000 2023-04-05 12:48:56,921 epoch 7 - iter 35/71 - loss 0.29301233 - time (sec): 18.02 - samples/sec: 164.19 - lr: 0.100000 2023-04-05 12:49:00,741 epoch 7 - iter 42/71 - loss 0.29024371 - time (sec): 21.84 - samples/sec: 162.13 - lr: 0.100000 2023-04-05 12:49:04,470 epoch 7 - iter 49/71 - loss 0.29491898 - time (sec): 25.57 - samples/sec: 161.04 - lr: 0.100000 2023-04-05 12:49:08,148 epoch 7 - iter 56/71 - loss 0.29485370 - time (sec): 29.25 - samples/sec: 161.27 - lr: 0.100000 2023-04-05 12:49:11,798 epoch 7 - iter 63/71 - loss 0.29048646 - time (sec): 32.90 - samples/sec: 161.74 - lr: 0.100000 2023-04-05 12:49:15,495 epoch 7 - iter 70/71 - loss 0.28277861 - time (sec): 36.60 - samples/sec: 160.65 - lr: 0.100000 2023-04-05 12:49:16,301 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:49:16,302 EPOCH 7 done: loss 0.2833 - lr 0.100000 2023-04-05 12:49:46,527 Evaluating as a multi-label problem: False 2023-04-05 12:49:46,546 TRAIN : loss 0.21568651497364044 - f1-score (micro avg) 0.8845 2023-04-05 12:49:50,126 Evaluating as a multi-label problem: False 2023-04-05 12:49:50,138 DEV : loss 0.3467901051044464 - f1-score (micro avg) 0.8182 2023-04-05 12:49:50,144 BAD EPOCHS (no improvement): 3 2023-04-05 12:49:50,145 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:49:53,171 epoch 8 - iter 7/71 - loss 0.25938026 - time (sec): 3.03 - samples/sec: 191.70 - lr: 0.100000 2023-04-05 12:49:56,942 epoch 8 - iter 14/71 - loss 0.25775377 - time (sec): 6.80 - samples/sec: 173.18 - lr: 0.100000 2023-04-05 12:50:00,558 epoch 8 - iter 21/71 - loss 0.25357329 - time (sec): 10.41 - samples/sec: 168.64 - lr: 0.100000 2023-04-05 12:50:04,296 epoch 8 - iter 28/71 - loss 0.24824351 - time (sec): 14.15 - samples/sec: 164.02 - lr: 0.100000 2023-04-05 12:50:08,001 epoch 8 - iter 35/71 - loss 0.25876964 - time (sec): 17.86 - samples/sec: 162.19 - lr: 0.100000 2023-04-05 12:50:11,740 epoch 8 - iter 42/71 - loss 0.25788824 - time (sec): 21.60 - samples/sec: 162.58 - lr: 0.100000 2023-04-05 12:50:15,342 epoch 8 - iter 49/71 - loss 0.25424281 - time (sec): 25.20 - samples/sec: 162.72 - lr: 0.100000 2023-04-05 12:50:19,188 epoch 8 - iter 56/71 - loss 0.25450229 - time (sec): 29.04 - samples/sec: 161.73 - lr: 0.100000 2023-04-05 12:50:23,093 epoch 8 - iter 63/71 - loss 0.25746436 - time (sec): 32.95 - samples/sec: 160.01 - lr: 0.100000 2023-04-05 12:50:27,329 epoch 8 - iter 70/71 - loss 0.25933626 - time (sec): 37.18 - samples/sec: 158.05 - lr: 0.100000 2023-04-05 12:50:28,163 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:50:28,165 EPOCH 8 done: loss 0.2589 - lr 0.100000 2023-04-05 12:50:59,296 Evaluating as a multi-label problem: False 2023-04-05 12:50:59,312 TRAIN : loss 0.19819645583629608 - f1-score (micro avg) 0.8943 2023-04-05 12:51:02,965 Evaluating as a multi-label problem: False 2023-04-05 12:51:02,980 DEV : loss 0.33811214566230774 - f1-score (micro avg) 0.8118 2023-04-05 12:51:02,988 Epoch 8: reducing learning rate of group 0 to 5.0000e-02. 2023-04-05 12:51:02,989 BAD EPOCHS (no improvement): 4 2023-04-05 12:51:02,990 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:51:06,070 epoch 9 - iter 7/71 - loss 0.23812631 - time (sec): 3.08 - samples/sec: 194.17 - lr: 0.050000 2023-04-05 12:51:09,867 epoch 9 - iter 14/71 - loss 0.19886070 - time (sec): 6.88 - samples/sec: 169.72 - lr: 0.050000 2023-04-05 12:51:13,813 epoch 9 - iter 21/71 - loss 0.22626942 - time (sec): 10.82 - samples/sec: 161.52 - lr: 0.050000 2023-04-05 12:51:17,892 epoch 9 - iter 28/71 - loss 0.22291349 - time (sec): 14.90 - samples/sec: 158.04 - lr: 0.050000 2023-04-05 12:51:21,762 epoch 9 - iter 35/71 - loss 0.22330927 - time (sec): 18.77 - samples/sec: 155.93 - lr: 0.050000 2023-04-05 12:51:25,462 epoch 9 - iter 42/71 - loss 0.22661256 - time (sec): 22.47 - samples/sec: 158.69 - lr: 0.050000 2023-04-05 12:51:29,109 epoch 9 - iter 49/71 - loss 0.22209246 - time (sec): 26.12 - samples/sec: 158.89 - lr: 0.050000 2023-04-05 12:51:32,660 epoch 9 - iter 56/71 - loss 0.21543228 - time (sec): 29.67 - samples/sec: 158.89 - lr: 0.050000 2023-04-05 12:51:36,421 epoch 9 - iter 63/71 - loss 0.22191567 - time (sec): 33.43 - samples/sec: 158.57 - lr: 0.050000 2023-04-05 12:51:40,056 epoch 9 - iter 70/71 - loss 0.22014300 - time (sec): 37.06 - samples/sec: 158.83 - lr: 0.050000 2023-04-05 12:51:40,907 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:51:40,908 EPOCH 9 done: loss 0.2208 - lr 0.050000 2023-04-05 12:52:11,299 Evaluating as a multi-label problem: False 2023-04-05 12:52:11,317 TRAIN : loss 0.14378328621387482 - f1-score (micro avg) 0.9285 2023-04-05 12:52:14,963 Evaluating as a multi-label problem: False 2023-04-05 12:52:14,976 DEV : loss 0.2700260877609253 - f1-score (micro avg) 0.8339 2023-04-05 12:52:14,981 BAD EPOCHS (no improvement): 0 2023-04-05 12:52:14,983 saving best model 2023-04-05 12:52:16,467 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:52:19,567 epoch 10 - iter 7/71 - loss 0.17365492 - time (sec): 3.10 - samples/sec: 178.18 - lr: 0.050000 2023-04-05 12:52:23,341 epoch 10 - iter 14/71 - loss 0.18889650 - time (sec): 6.87 - samples/sec: 165.16 - lr: 0.050000 2023-04-05 12:52:27,018 epoch 10 - iter 21/71 - loss 0.20267585 - time (sec): 10.55 - samples/sec: 165.89 - lr: 0.050000 2023-04-05 12:52:30,734 epoch 10 - iter 28/71 - loss 0.19907475 - time (sec): 14.26 - samples/sec: 163.63 - lr: 0.050000 2023-04-05 12:52:34,373 epoch 10 - iter 35/71 - loss 0.19695937 - time (sec): 17.90 - samples/sec: 162.87 - lr: 0.050000 2023-04-05 12:52:38,176 epoch 10 - iter 42/71 - loss 0.20787867 - time (sec): 21.71 - samples/sec: 163.27 - lr: 0.050000 2023-04-05 12:52:41,917 epoch 10 - iter 49/71 - loss 0.21020299 - time (sec): 25.45 - samples/sec: 162.92 - lr: 0.050000 2023-04-05 12:52:45,576 epoch 10 - iter 56/71 - loss 0.21217935 - time (sec): 29.11 - samples/sec: 162.53 - lr: 0.050000 2023-04-05 12:52:49,330 epoch 10 - iter 63/71 - loss 0.20993717 - time (sec): 32.86 - samples/sec: 161.37 - lr: 0.050000 2023-04-05 12:52:53,093 epoch 10 - iter 70/71 - loss 0.20975981 - time (sec): 36.62 - samples/sec: 160.74 - lr: 0.050000 2023-04-05 12:52:53,956 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:52:53,957 EPOCH 10 done: loss 0.2100 - lr 0.050000 2023-04-05 12:53:24,907 Evaluating as a multi-label problem: False 2023-04-05 12:53:24,923 TRAIN : loss 0.1295960396528244 - f1-score (micro avg) 0.936 2023-04-05 12:53:28,577 Evaluating as a multi-label problem: False 2023-04-05 12:53:28,590 DEV : loss 0.2524380087852478 - f1-score (micro avg) 0.8561 2023-04-05 12:53:28,596 BAD EPOCHS (no improvement): 0 2023-04-05 12:53:28,601 saving best model 2023-04-05 12:53:29,758 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:53:32,976 epoch 11 - iter 7/71 - loss 0.23687125 - time (sec): 3.22 - samples/sec: 196.25 - lr: 0.050000 2023-04-05 12:53:36,734 epoch 11 - iter 14/71 - loss 0.23011958 - time (sec): 6.97 - samples/sec: 175.94 - lr: 0.050000 2023-04-05 12:53:40,400 epoch 11 - iter 21/71 - loss 0.20729973 - time (sec): 10.64 - samples/sec: 170.31 - lr: 0.050000 2023-04-05 12:53:44,055 epoch 11 - iter 28/71 - loss 0.20154173 - time (sec): 14.29 - samples/sec: 166.99 - lr: 0.050000 2023-04-05 12:53:47,926 epoch 11 - iter 35/71 - loss 0.20303865 - time (sec): 18.17 - samples/sec: 164.55 - lr: 0.050000 2023-04-05 12:53:51,746 epoch 11 - iter 42/71 - loss 0.19656997 - time (sec): 21.99 - samples/sec: 162.47 - lr: 0.050000 2023-04-05 12:53:55,488 epoch 11 - iter 49/71 - loss 0.20078721 - time (sec): 25.73 - samples/sec: 160.57 - lr: 0.050000 2023-04-05 12:53:59,182 epoch 11 - iter 56/71 - loss 0.19665885 - time (sec): 29.42 - samples/sec: 160.26 - lr: 0.050000 2023-04-05 12:54:03,005 epoch 11 - iter 63/71 - loss 0.19670097 - time (sec): 33.24 - samples/sec: 158.79 - lr: 0.050000 2023-04-05 12:54:06,832 epoch 11 - iter 70/71 - loss 0.19415408 - time (sec): 37.07 - samples/sec: 158.61 - lr: 0.050000 2023-04-05 12:54:07,718 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:54:07,719 EPOCH 11 done: loss 0.1942 - lr 0.050000 2023-04-05 12:54:38,401 Evaluating as a multi-label problem: False 2023-04-05 12:54:38,416 TRAIN : loss 0.1299719512462616 - f1-score (micro avg) 0.9345 2023-04-05 12:54:42,043 Evaluating as a multi-label problem: False 2023-04-05 12:54:42,055 DEV : loss 0.2761968970298767 - f1-score (micro avg) 0.8452 2023-04-05 12:54:42,061 BAD EPOCHS (no improvement): 1 2023-04-05 12:54:42,062 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:54:45,126 epoch 12 - iter 7/71 - loss 0.18339264 - time (sec): 3.06 - samples/sec: 187.09 - lr: 0.050000 2023-04-05 12:54:48,926 epoch 12 - iter 14/71 - loss 0.19237624 - time (sec): 6.86 - samples/sec: 173.27 - lr: 0.050000 2023-04-05 12:54:52,633 epoch 12 - iter 21/71 - loss 0.19432209 - time (sec): 10.57 - samples/sec: 166.42 - lr: 0.050000 2023-04-05 12:54:56,545 epoch 12 - iter 28/71 - loss 0.20200765 - time (sec): 14.48 - samples/sec: 162.90 - lr: 0.050000 2023-04-05 12:55:00,495 epoch 12 - iter 35/71 - loss 0.19446487 - time (sec): 18.43 - samples/sec: 160.60 - lr: 0.050000 2023-04-05 12:55:04,256 epoch 12 - iter 42/71 - loss 0.19910943 - time (sec): 22.19 - samples/sec: 160.01 - lr: 0.050000 2023-04-05 12:55:08,062 epoch 12 - iter 49/71 - loss 0.19637866 - time (sec): 26.00 - samples/sec: 158.66 - lr: 0.050000 2023-04-05 12:55:11,765 epoch 12 - iter 56/71 - loss 0.19106381 - time (sec): 29.70 - samples/sec: 158.17 - lr: 0.050000 2023-04-05 12:55:15,677 epoch 12 - iter 63/71 - loss 0.19328764 - time (sec): 33.61 - samples/sec: 157.71 - lr: 0.050000 2023-04-05 12:55:19,610 epoch 12 - iter 70/71 - loss 0.18986505 - time (sec): 37.55 - samples/sec: 156.77 - lr: 0.050000 2023-04-05 12:55:20,532 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:55:20,534 EPOCH 12 done: loss 0.1892 - lr 0.050000 2023-04-05 12:55:51,520 Evaluating as a multi-label problem: False 2023-04-05 12:55:51,541 TRAIN : loss 0.11139164865016937 - f1-score (micro avg) 0.9468 2023-04-05 12:55:55,099 Evaluating as a multi-label problem: False 2023-04-05 12:55:55,107 DEV : loss 0.24872702360153198 - f1-score (micro avg) 0.8656 2023-04-05 12:55:55,114 BAD EPOCHS (no improvement): 0 2023-04-05 12:55:55,116 saving best model 2023-04-05 12:55:56,261 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:55:59,215 epoch 13 - iter 7/71 - loss 0.13416757 - time (sec): 2.95 - samples/sec: 191.70 - lr: 0.050000 2023-04-05 12:56:02,877 epoch 13 - iter 14/71 - loss 0.14492713 - time (sec): 6.61 - samples/sec: 173.40 - lr: 0.050000 2023-04-05 12:56:06,537 epoch 13 - iter 21/71 - loss 0.15026220 - time (sec): 10.27 - samples/sec: 165.55 - lr: 0.050000 2023-04-05 12:56:10,099 epoch 13 - iter 28/71 - loss 0.14928537 - time (sec): 13.84 - samples/sec: 166.95 - lr: 0.050000 2023-04-05 12:56:13,686 epoch 13 - iter 35/71 - loss 0.14769742 - time (sec): 17.42 - samples/sec: 165.57 - lr: 0.050000 2023-04-05 12:56:17,287 epoch 13 - iter 42/71 - loss 0.15688659 - time (sec): 21.02 - samples/sec: 164.09 - lr: 0.050000 2023-04-05 12:56:20,996 epoch 13 - iter 49/71 - loss 0.16690754 - time (sec): 24.73 - samples/sec: 164.31 - lr: 0.050000 2023-04-05 12:56:24,678 epoch 13 - iter 56/71 - loss 0.17086305 - time (sec): 28.42 - samples/sec: 164.45 - lr: 0.050000 2023-04-05 12:56:28,394 epoch 13 - iter 63/71 - loss 0.17395421 - time (sec): 32.13 - samples/sec: 164.13 - lr: 0.050000 2023-04-05 12:56:32,037 epoch 13 - iter 70/71 - loss 0.17863766 - time (sec): 35.77 - samples/sec: 164.47 - lr: 0.050000 2023-04-05 12:56:32,895 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:56:32,895 EPOCH 13 done: loss 0.1786 - lr 0.050000 2023-04-05 12:57:02,811 Evaluating as a multi-label problem: False 2023-04-05 12:57:02,831 TRAIN : loss 0.11495152860879898 - f1-score (micro avg) 0.9423 2023-04-05 12:57:06,419 Evaluating as a multi-label problem: False 2023-04-05 12:57:06,429 DEV : loss 0.2527526021003723 - f1-score (micro avg) 0.8571 2023-04-05 12:57:06,433 BAD EPOCHS (no improvement): 1 2023-04-05 12:57:06,435 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:57:09,526 epoch 14 - iter 7/71 - loss 0.19849680 - time (sec): 3.09 - samples/sec: 198.95 - lr: 0.050000 2023-04-05 12:57:13,202 epoch 14 - iter 14/71 - loss 0.18692846 - time (sec): 6.77 - samples/sec: 173.93 - lr: 0.050000 2023-04-05 12:57:16,931 epoch 14 - iter 21/71 - loss 0.16410048 - time (sec): 10.50 - samples/sec: 165.11 - lr: 0.050000 2023-04-05 12:57:20,568 epoch 14 - iter 28/71 - loss 0.17096232 - time (sec): 14.13 - samples/sec: 164.73 - lr: 0.050000 2023-04-05 12:57:24,320 epoch 14 - iter 35/71 - loss 0.17130471 - time (sec): 17.88 - samples/sec: 164.61 - lr: 0.050000 2023-04-05 12:57:28,263 epoch 14 - iter 42/71 - loss 0.17276982 - time (sec): 21.83 - samples/sec: 162.55 - lr: 0.050000 2023-04-05 12:57:31,855 epoch 14 - iter 49/71 - loss 0.17053371 - time (sec): 25.42 - samples/sec: 161.88 - lr: 0.050000 2023-04-05 12:57:35,454 epoch 14 - iter 56/71 - loss 0.17009003 - time (sec): 29.02 - samples/sec: 161.72 - lr: 0.050000 2023-04-05 12:57:39,094 epoch 14 - iter 63/71 - loss 0.16765044 - time (sec): 32.66 - samples/sec: 161.28 - lr: 0.050000 2023-04-05 12:57:42,812 epoch 14 - iter 70/71 - loss 0.17106035 - time (sec): 36.38 - samples/sec: 161.78 - lr: 0.050000 2023-04-05 12:57:43,636 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:57:43,637 EPOCH 14 done: loss 0.1709 - lr 0.050000 2023-04-05 12:58:13,469 Evaluating as a multi-label problem: False 2023-04-05 12:58:13,486 TRAIN : loss 0.09600471705198288 - f1-score (micro avg) 0.9574 2023-04-05 12:58:17,049 Evaluating as a multi-label problem: False 2023-04-05 12:58:17,060 DEV : loss 0.2339187115430832 - f1-score (micro avg) 0.8799 2023-04-05 12:58:17,066 BAD EPOCHS (no improvement): 0 2023-04-05 12:58:17,068 saving best model 2023-04-05 12:58:18,074 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:58:21,099 epoch 15 - iter 7/71 - loss 0.18573106 - time (sec): 3.02 - samples/sec: 195.41 - lr: 0.050000 2023-04-05 12:58:24,823 epoch 15 - iter 14/71 - loss 0.16845787 - time (sec): 6.75 - samples/sec: 173.09 - lr: 0.050000 2023-04-05 12:58:28,520 epoch 15 - iter 21/71 - loss 0.17292932 - time (sec): 10.45 - samples/sec: 168.59 - lr: 0.050000 2023-04-05 12:58:32,285 epoch 15 - iter 28/71 - loss 0.16194007 - time (sec): 14.21 - samples/sec: 164.53 - lr: 0.050000 2023-04-05 12:58:35,962 epoch 15 - iter 35/71 - loss 0.16402796 - time (sec): 17.89 - samples/sec: 163.30 - lr: 0.050000 2023-04-05 12:58:39,707 epoch 15 - iter 42/71 - loss 0.16481449 - time (sec): 21.63 - samples/sec: 163.18 - lr: 0.050000 2023-04-05 12:58:43,272 epoch 15 - iter 49/71 - loss 0.16905415 - time (sec): 25.20 - samples/sec: 163.51 - lr: 0.050000 2023-04-05 12:58:46,892 epoch 15 - iter 56/71 - loss 0.17389761 - time (sec): 28.82 - samples/sec: 162.99 - lr: 0.050000 2023-04-05 12:58:50,539 epoch 15 - iter 63/71 - loss 0.18006799 - time (sec): 32.46 - samples/sec: 163.29 - lr: 0.050000 2023-04-05 12:58:54,178 epoch 15 - iter 70/71 - loss 0.17852127 - time (sec): 36.10 - samples/sec: 162.76 - lr: 0.050000 2023-04-05 12:58:54,989 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:58:54,990 EPOCH 15 done: loss 0.1795 - lr 0.050000 2023-04-05 12:59:25,120 Evaluating as a multi-label problem: False 2023-04-05 12:59:25,142 TRAIN : loss 0.10160095989704132 - f1-score (micro avg) 0.9519 2023-04-05 12:59:28,704 Evaluating as a multi-label problem: False 2023-04-05 12:59:28,715 DEV : loss 0.22547538578510284 - f1-score (micro avg) 0.8848 2023-04-05 12:59:28,722 BAD EPOCHS (no improvement): 0 2023-04-05 12:59:28,724 saving best model 2023-04-05 12:59:29,886 ---------------------------------------------------------------------------------------------------- 2023-04-05 12:59:32,863 epoch 16 - iter 7/71 - loss 0.18803312 - time (sec): 2.98 - samples/sec: 201.30 - lr: 0.050000 2023-04-05 12:59:36,639 epoch 16 - iter 14/71 - loss 0.17472744 - time (sec): 6.75 - samples/sec: 176.98 - lr: 0.050000 2023-04-05 12:59:40,239 epoch 16 - iter 21/71 - loss 0.17674174 - time (sec): 10.35 - samples/sec: 173.11 - lr: 0.050000 2023-04-05 12:59:43,953 epoch 16 - iter 28/71 - loss 0.18068979 - time (sec): 14.07 - samples/sec: 170.13 - lr: 0.050000 2023-04-05 12:59:47,620 epoch 16 - iter 35/71 - loss 0.18222164 - time (sec): 17.73 - samples/sec: 168.22 - lr: 0.050000 2023-04-05 12:59:51,255 epoch 16 - iter 42/71 - loss 0.18013268 - time (sec): 21.37 - samples/sec: 167.36 - lr: 0.050000 2023-04-05 12:59:54,891 epoch 16 - iter 49/71 - loss 0.17236708 - time (sec): 25.00 - samples/sec: 166.02 - lr: 0.050000 2023-04-05 12:59:58,571 epoch 16 - iter 56/71 - loss 0.17486601 - time (sec): 28.68 - samples/sec: 164.55 - lr: 0.050000 2023-04-05 13:00:02,189 epoch 16 - iter 63/71 - loss 0.17192697 - time (sec): 32.30 - samples/sec: 164.58 - lr: 0.050000 2023-04-05 13:00:05,957 epoch 16 - iter 70/71 - loss 0.16671679 - time (sec): 36.07 - samples/sec: 163.16 - lr: 0.050000 2023-04-05 13:00:06,789 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:00:06,790 EPOCH 16 done: loss 0.1660 - lr 0.050000 2023-04-05 13:00:36,903 Evaluating as a multi-label problem: False 2023-04-05 13:00:36,919 TRAIN : loss 0.09528940916061401 - f1-score (micro avg) 0.9534 2023-04-05 13:00:40,480 Evaluating as a multi-label problem: False 2023-04-05 13:00:40,495 DEV : loss 0.2436159998178482 - f1-score (micro avg) 0.8731 2023-04-05 13:00:40,500 BAD EPOCHS (no improvement): 1 2023-04-05 13:00:40,503 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:00:43,573 epoch 17 - iter 7/71 - loss 0.16280295 - time (sec): 3.07 - samples/sec: 187.36 - lr: 0.050000 2023-04-05 13:00:47,316 epoch 17 - iter 14/71 - loss 0.16491960 - time (sec): 6.81 - samples/sec: 171.74 - lr: 0.050000 2023-04-05 13:00:51,066 epoch 17 - iter 21/71 - loss 0.16969325 - time (sec): 10.56 - samples/sec: 171.45 - lr: 0.050000 2023-04-05 13:00:54,815 epoch 17 - iter 28/71 - loss 0.16213035 - time (sec): 14.31 - samples/sec: 166.86 - lr: 0.050000 2023-04-05 13:00:58,536 epoch 17 - iter 35/71 - loss 0.16020484 - time (sec): 18.03 - samples/sec: 165.03 - lr: 0.050000 2023-04-05 13:01:02,186 epoch 17 - iter 42/71 - loss 0.16493772 - time (sec): 21.68 - samples/sec: 162.94 - lr: 0.050000 2023-04-05 13:01:05,927 epoch 17 - iter 49/71 - loss 0.16161421 - time (sec): 25.42 - samples/sec: 162.80 - lr: 0.050000 2023-04-05 13:01:09,546 epoch 17 - iter 56/71 - loss 0.16247358 - time (sec): 29.04 - samples/sec: 163.49 - lr: 0.050000 2023-04-05 13:01:13,292 epoch 17 - iter 63/71 - loss 0.16001124 - time (sec): 32.79 - samples/sec: 161.34 - lr: 0.050000 2023-04-05 13:01:17,024 epoch 17 - iter 70/71 - loss 0.16184123 - time (sec): 36.52 - samples/sec: 161.03 - lr: 0.050000 2023-04-05 13:01:17,872 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:01:17,873 EPOCH 17 done: loss 0.1613 - lr 0.050000 2023-04-05 13:01:48,038 Evaluating as a multi-label problem: False 2023-04-05 13:01:48,055 TRAIN : loss 0.09658616781234741 - f1-score (micro avg) 0.9508 2023-04-05 13:01:51,710 Evaluating as a multi-label problem: False 2023-04-05 13:01:51,723 DEV : loss 0.25625115633010864 - f1-score (micro avg) 0.8651 2023-04-05 13:01:51,730 BAD EPOCHS (no improvement): 2 2023-04-05 13:01:51,731 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:01:54,950 epoch 18 - iter 7/71 - loss 0.14699763 - time (sec): 3.22 - samples/sec: 178.08 - lr: 0.050000 2023-04-05 13:01:58,906 epoch 18 - iter 14/71 - loss 0.11720383 - time (sec): 7.17 - samples/sec: 156.81 - lr: 0.050000 2023-04-05 13:02:02,544 epoch 18 - iter 21/71 - loss 0.12927531 - time (sec): 10.81 - samples/sec: 160.28 - lr: 0.050000 2023-04-05 13:02:06,291 epoch 18 - iter 28/71 - loss 0.13428711 - time (sec): 14.56 - samples/sec: 159.42 - lr: 0.050000 2023-04-05 13:02:10,110 epoch 18 - iter 35/71 - loss 0.13574585 - time (sec): 18.38 - samples/sec: 157.52 - lr: 0.050000 2023-04-05 13:02:13,768 epoch 18 - iter 42/71 - loss 0.13144409 - time (sec): 22.04 - samples/sec: 158.47 - lr: 0.050000 2023-04-05 13:02:17,521 epoch 18 - iter 49/71 - loss 0.13997386 - time (sec): 25.79 - samples/sec: 158.95 - lr: 0.050000 2023-04-05 13:02:21,181 epoch 18 - iter 56/71 - loss 0.15194990 - time (sec): 29.45 - samples/sec: 159.70 - lr: 0.050000 2023-04-05 13:02:24,954 epoch 18 - iter 63/71 - loss 0.15543297 - time (sec): 33.22 - samples/sec: 159.95 - lr: 0.050000 2023-04-05 13:02:28,617 epoch 18 - iter 70/71 - loss 0.15181266 - time (sec): 36.89 - samples/sec: 159.47 - lr: 0.050000 2023-04-05 13:02:29,438 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:02:29,438 EPOCH 18 done: loss 0.1521 - lr 0.050000 2023-04-05 13:03:00,408 Evaluating as a multi-label problem: False 2023-04-05 13:03:00,426 TRAIN : loss 0.07957068085670471 - f1-score (micro avg) 0.9605 2023-04-05 13:03:04,203 Evaluating as a multi-label problem: False 2023-04-05 13:03:04,214 DEV : loss 0.24449127912521362 - f1-score (micro avg) 0.8678 2023-04-05 13:03:04,220 BAD EPOCHS (no improvement): 3 2023-04-05 13:03:04,221 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:03:07,272 epoch 19 - iter 7/71 - loss 0.14268361 - time (sec): 3.05 - samples/sec: 192.50 - lr: 0.050000 2023-04-05 13:03:11,108 epoch 19 - iter 14/71 - loss 0.11705529 - time (sec): 6.89 - samples/sec: 171.38 - lr: 0.050000 2023-04-05 13:03:14,913 epoch 19 - iter 21/71 - loss 0.13677063 - time (sec): 10.69 - samples/sec: 166.87 - lr: 0.050000 2023-04-05 13:03:18,612 epoch 19 - iter 28/71 - loss 0.14036431 - time (sec): 14.39 - samples/sec: 164.42 - lr: 0.050000 2023-04-05 13:03:22,346 epoch 19 - iter 35/71 - loss 0.14069950 - time (sec): 18.12 - samples/sec: 162.99 - lr: 0.050000 2023-04-05 13:03:26,137 epoch 19 - iter 42/71 - loss 0.14416925 - time (sec): 21.91 - samples/sec: 161.81 - lr: 0.050000 2023-04-05 13:03:29,727 epoch 19 - iter 49/71 - loss 0.14774011 - time (sec): 25.50 - samples/sec: 162.05 - lr: 0.050000 2023-04-05 13:03:33,433 epoch 19 - iter 56/71 - loss 0.14539107 - time (sec): 29.21 - samples/sec: 161.11 - lr: 0.050000 2023-04-05 13:03:37,072 epoch 19 - iter 63/71 - loss 0.14446964 - time (sec): 32.85 - samples/sec: 160.61 - lr: 0.050000 2023-04-05 13:03:40,670 epoch 19 - iter 70/71 - loss 0.14622801 - time (sec): 36.45 - samples/sec: 161.22 - lr: 0.050000 2023-04-05 13:03:41,501 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:03:41,502 EPOCH 19 done: loss 0.1476 - lr 0.050000 2023-04-05 13:04:11,752 Evaluating as a multi-label problem: False 2023-04-05 13:04:11,772 TRAIN : loss 0.08018826693296432 - f1-score (micro avg) 0.9626 2023-04-05 13:04:15,357 Evaluating as a multi-label problem: False 2023-04-05 13:04:15,368 DEV : loss 0.2329457700252533 - f1-score (micro avg) 0.8603 2023-04-05 13:04:15,373 Epoch 19: reducing learning rate of group 0 to 2.5000e-02. 2023-04-05 13:04:15,374 BAD EPOCHS (no improvement): 4 2023-04-05 13:04:15,376 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:04:18,529 epoch 20 - iter 7/71 - loss 0.15115543 - time (sec): 3.15 - samples/sec: 187.15 - lr: 0.025000 2023-04-05 13:04:22,232 epoch 20 - iter 14/71 - loss 0.14745089 - time (sec): 6.86 - samples/sec: 170.06 - lr: 0.025000 2023-04-05 13:04:25,979 epoch 20 - iter 21/71 - loss 0.13852706 - time (sec): 10.60 - samples/sec: 162.88 - lr: 0.025000 2023-04-05 13:04:29,734 epoch 20 - iter 28/71 - loss 0.13696755 - time (sec): 14.36 - samples/sec: 161.09 - lr: 0.025000 2023-04-05 13:04:33,339 epoch 20 - iter 35/71 - loss 0.13835721 - time (sec): 17.96 - samples/sec: 162.89 - lr: 0.025000 2023-04-05 13:04:36,924 epoch 20 - iter 42/71 - loss 0.13924535 - time (sec): 21.55 - samples/sec: 163.31 - lr: 0.025000 2023-04-05 13:04:40,601 epoch 20 - iter 49/71 - loss 0.13776399 - time (sec): 25.22 - samples/sec: 163.13 - lr: 0.025000 2023-04-05 13:04:44,100 epoch 20 - iter 56/71 - loss 0.13600369 - time (sec): 28.72 - samples/sec: 163.35 - lr: 0.025000 2023-04-05 13:04:47,772 epoch 20 - iter 63/71 - loss 0.13546503 - time (sec): 32.40 - samples/sec: 163.72 - lr: 0.025000 2023-04-05 13:04:51,285 epoch 20 - iter 70/71 - loss 0.13381809 - time (sec): 35.91 - samples/sec: 163.86 - lr: 0.025000 2023-04-05 13:04:52,110 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:04:52,111 EPOCH 20 done: loss 0.1342 - lr 0.025000 2023-04-05 13:05:21,762 Evaluating as a multi-label problem: False 2023-04-05 13:05:21,781 TRAIN : loss 0.06673520058393478 - f1-score (micro avg) 0.9674 2023-04-05 13:05:25,271 Evaluating as a multi-label problem: False 2023-04-05 13:05:25,281 DEV : loss 0.22465617954730988 - f1-score (micro avg) 0.8848 2023-04-05 13:05:25,286 BAD EPOCHS (no improvement): 0 2023-04-05 13:05:25,287 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:05:28,197 epoch 21 - iter 7/71 - loss 0.14072648 - time (sec): 2.91 - samples/sec: 198.61 - lr: 0.025000 2023-04-05 13:05:31,972 epoch 21 - iter 14/71 - loss 0.12496798 - time (sec): 6.68 - samples/sec: 176.52 - lr: 0.025000 2023-04-05 13:05:35,617 epoch 21 - iter 21/71 - loss 0.12397679 - time (sec): 10.33 - samples/sec: 172.03 - lr: 0.025000 2023-04-05 13:05:39,209 epoch 21 - iter 28/71 - loss 0.12982706 - time (sec): 13.92 - samples/sec: 169.16 - lr: 0.025000 2023-04-05 13:05:42,824 epoch 21 - iter 35/71 - loss 0.12812647 - time (sec): 17.54 - samples/sec: 164.51 - lr: 0.025000 2023-04-05 13:05:46,347 epoch 21 - iter 42/71 - loss 0.12693924 - time (sec): 21.06 - samples/sec: 163.67 - lr: 0.025000 2023-04-05 13:05:49,905 epoch 21 - iter 49/71 - loss 0.12797163 - time (sec): 24.62 - samples/sec: 165.08 - lr: 0.025000 2023-04-05 13:05:53,577 epoch 21 - iter 56/71 - loss 0.13077894 - time (sec): 28.29 - samples/sec: 164.40 - lr: 0.025000 2023-04-05 13:05:57,191 epoch 21 - iter 63/71 - loss 0.13144443 - time (sec): 31.90 - samples/sec: 163.71 - lr: 0.025000 2023-04-05 13:06:00,722 epoch 21 - iter 70/71 - loss 0.13238450 - time (sec): 35.43 - samples/sec: 166.14 - lr: 0.025000 2023-04-05 13:06:01,424 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:06:01,425 EPOCH 21 done: loss 0.1318 - lr 0.025000 2023-04-05 13:06:30,981 Evaluating as a multi-label problem: False 2023-04-05 13:06:30,997 TRAIN : loss 0.06548392027616501 - f1-score (micro avg) 0.9687 2023-04-05 13:06:34,514 Evaluating as a multi-label problem: False 2023-04-05 13:06:34,524 DEV : loss 0.22177472710609436 - f1-score (micro avg) 0.8868 2023-04-05 13:06:34,529 BAD EPOCHS (no improvement): 0 2023-04-05 13:06:34,531 saving best model 2023-04-05 13:06:36,118 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:06:39,070 epoch 22 - iter 7/71 - loss 0.10843301 - time (sec): 2.95 - samples/sec: 193.84 - lr: 0.025000 2023-04-05 13:06:42,690 epoch 22 - iter 14/71 - loss 0.12542959 - time (sec): 6.57 - samples/sec: 174.26 - lr: 0.025000 2023-04-05 13:06:46,379 epoch 22 - iter 21/71 - loss 0.11852796 - time (sec): 10.26 - samples/sec: 165.88 - lr: 0.025000 2023-04-05 13:06:50,145 epoch 22 - iter 28/71 - loss 0.11984452 - time (sec): 14.03 - samples/sec: 165.33 - lr: 0.025000 2023-04-05 13:06:53,619 epoch 22 - iter 35/71 - loss 0.12448799 - time (sec): 17.50 - samples/sec: 166.46 - lr: 0.025000 2023-04-05 13:06:57,251 epoch 22 - iter 42/71 - loss 0.12873390 - time (sec): 21.13 - samples/sec: 165.81 - lr: 0.025000 2023-04-05 13:07:00,920 epoch 22 - iter 49/71 - loss 0.12402659 - time (sec): 24.80 - samples/sec: 164.83 - lr: 0.025000 2023-04-05 13:07:04,547 epoch 22 - iter 56/71 - loss 0.12855204 - time (sec): 28.43 - samples/sec: 165.01 - lr: 0.025000 2023-04-05 13:07:08,144 epoch 22 - iter 63/71 - loss 0.12747032 - time (sec): 32.02 - samples/sec: 165.53 - lr: 0.025000 2023-04-05 13:07:11,868 epoch 22 - iter 70/71 - loss 0.12699574 - time (sec): 35.75 - samples/sec: 164.37 - lr: 0.025000 2023-04-05 13:07:12,670 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:07:12,671 EPOCH 22 done: loss 0.1266 - lr 0.025000 2023-04-05 13:07:42,659 Evaluating as a multi-label problem: False 2023-04-05 13:07:42,680 TRAIN : loss 0.06098590046167374 - f1-score (micro avg) 0.971 2023-04-05 13:07:46,246 Evaluating as a multi-label problem: False 2023-04-05 13:07:46,256 DEV : loss 0.21829380095005035 - f1-score (micro avg) 0.8881 2023-04-05 13:07:46,263 BAD EPOCHS (no improvement): 0 2023-04-05 13:07:46,264 saving best model 2023-04-05 13:07:47,600 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:07:50,798 epoch 23 - iter 7/71 - loss 0.15008917 - time (sec): 3.20 - samples/sec: 187.07 - lr: 0.025000 2023-04-05 13:07:54,819 epoch 23 - iter 14/71 - loss 0.14280265 - time (sec): 7.22 - samples/sec: 162.93 - lr: 0.025000 2023-04-05 13:07:58,438 epoch 23 - iter 21/71 - loss 0.13304625 - time (sec): 10.84 - samples/sec: 160.38 - lr: 0.025000 2023-04-05 13:08:02,077 epoch 23 - iter 28/71 - loss 0.12460526 - time (sec): 14.48 - samples/sec: 161.23 - lr: 0.025000 2023-04-05 13:08:05,787 epoch 23 - iter 35/71 - loss 0.11730006 - time (sec): 18.19 - samples/sec: 161.67 - lr: 0.025000 2023-04-05 13:08:09,415 epoch 23 - iter 42/71 - loss 0.11912754 - time (sec): 21.81 - samples/sec: 161.64 - lr: 0.025000 2023-04-05 13:08:13,061 epoch 23 - iter 49/71 - loss 0.12580741 - time (sec): 25.46 - samples/sec: 161.24 - lr: 0.025000 2023-04-05 13:08:16,757 epoch 23 - iter 56/71 - loss 0.12476487 - time (sec): 29.16 - samples/sec: 161.24 - lr: 0.025000 2023-04-05 13:08:20,720 epoch 23 - iter 63/71 - loss 0.12545874 - time (sec): 33.12 - samples/sec: 160.03 - lr: 0.025000 2023-04-05 13:08:24,409 epoch 23 - iter 70/71 - loss 0.12915321 - time (sec): 36.81 - samples/sec: 159.80 - lr: 0.025000 2023-04-05 13:08:25,252 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:08:25,253 EPOCH 23 done: loss 0.1288 - lr 0.025000 2023-04-05 13:08:55,761 Evaluating as a multi-label problem: False 2023-04-05 13:08:55,783 TRAIN : loss 0.05855342745780945 - f1-score (micro avg) 0.9743 2023-04-05 13:08:59,415 Evaluating as a multi-label problem: False 2023-04-05 13:08:59,424 DEV : loss 0.21743902564048767 - f1-score (micro avg) 0.8901 2023-04-05 13:08:59,429 BAD EPOCHS (no improvement): 0 2023-04-05 13:08:59,431 saving best model 2023-04-05 13:09:00,565 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:09:03,643 epoch 24 - iter 7/71 - loss 0.09932632 - time (sec): 3.08 - samples/sec: 193.76 - lr: 0.025000 2023-04-05 13:09:07,439 epoch 24 - iter 14/71 - loss 0.11461498 - time (sec): 6.87 - samples/sec: 173.88 - lr: 0.025000 2023-04-05 13:09:11,176 epoch 24 - iter 21/71 - loss 0.12509619 - time (sec): 10.61 - samples/sec: 169.85 - lr: 0.025000 2023-04-05 13:09:15,006 epoch 24 - iter 28/71 - loss 0.12628469 - time (sec): 14.44 - samples/sec: 164.13 - lr: 0.025000 2023-04-05 13:09:18,758 epoch 24 - iter 35/71 - loss 0.12660201 - time (sec): 18.19 - samples/sec: 162.82 - lr: 0.025000 2023-04-05 13:09:22,402 epoch 24 - iter 42/71 - loss 0.13016555 - time (sec): 21.84 - samples/sec: 163.50 - lr: 0.025000 2023-04-05 13:09:26,014 epoch 24 - iter 49/71 - loss 0.12703872 - time (sec): 25.45 - samples/sec: 163.55 - lr: 0.025000 2023-04-05 13:09:29,654 epoch 24 - iter 56/71 - loss 0.12356562 - time (sec): 29.09 - samples/sec: 162.13 - lr: 0.025000 2023-04-05 13:09:33,309 epoch 24 - iter 63/71 - loss 0.12854118 - time (sec): 32.74 - samples/sec: 162.21 - lr: 0.025000 2023-04-05 13:09:37,052 epoch 24 - iter 70/71 - loss 0.12844792 - time (sec): 36.49 - samples/sec: 161.19 - lr: 0.025000 2023-04-05 13:09:37,848 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:09:37,849 EPOCH 24 done: loss 0.1292 - lr 0.025000 2023-04-05 13:10:08,561 Evaluating as a multi-label problem: False 2023-04-05 13:10:08,579 TRAIN : loss 0.05762539058923721 - f1-score (micro avg) 0.9705 2023-04-05 13:10:12,126 Evaluating as a multi-label problem: False 2023-04-05 13:10:12,136 DEV : loss 0.21672436594963074 - f1-score (micro avg) 0.8831 2023-04-05 13:10:12,142 BAD EPOCHS (no improvement): 1 2023-04-05 13:10:12,143 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:10:15,262 epoch 25 - iter 7/71 - loss 0.10395902 - time (sec): 3.12 - samples/sec: 192.28 - lr: 0.025000 2023-04-05 13:10:19,012 epoch 25 - iter 14/71 - loss 0.11430174 - time (sec): 6.87 - samples/sec: 175.34 - lr: 0.025000 2023-04-05 13:10:22,832 epoch 25 - iter 21/71 - loss 0.11574001 - time (sec): 10.69 - samples/sec: 168.99 - lr: 0.025000 2023-04-05 13:10:26,537 epoch 25 - iter 28/71 - loss 0.12338041 - time (sec): 14.39 - samples/sec: 165.38 - lr: 0.025000 2023-04-05 13:10:30,355 epoch 25 - iter 35/71 - loss 0.12176332 - time (sec): 18.21 - samples/sec: 161.95 - lr: 0.025000 2023-04-05 13:10:34,020 epoch 25 - iter 42/71 - loss 0.11983290 - time (sec): 21.87 - samples/sec: 161.01 - lr: 0.025000 2023-04-05 13:10:37,749 epoch 25 - iter 49/71 - loss 0.11602293 - time (sec): 25.60 - samples/sec: 159.00 - lr: 0.025000 2023-04-05 13:10:41,445 epoch 25 - iter 56/71 - loss 0.11461970 - time (sec): 29.30 - samples/sec: 158.94 - lr: 0.025000 2023-04-05 13:10:45,185 epoch 25 - iter 63/71 - loss 0.11841378 - time (sec): 33.04 - samples/sec: 159.87 - lr: 0.025000 2023-04-05 13:10:48,840 epoch 25 - iter 70/71 - loss 0.12186183 - time (sec): 36.70 - samples/sec: 160.21 - lr: 0.025000 2023-04-05 13:10:49,657 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:10:49,659 EPOCH 25 done: loss 0.1226 - lr 0.025000 2023-04-05 13:11:20,148 Evaluating as a multi-label problem: False 2023-04-05 13:11:20,167 TRAIN : loss 0.05489451438188553 - f1-score (micro avg) 0.9758 2023-04-05 13:11:23,819 Evaluating as a multi-label problem: False 2023-04-05 13:11:23,833 DEV : loss 0.20767748355865479 - f1-score (micro avg) 0.8959 2023-04-05 13:11:23,837 BAD EPOCHS (no improvement): 0 2023-04-05 13:11:23,840 saving best model 2023-04-05 13:11:25,356 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:11:28,428 epoch 26 - iter 7/71 - loss 0.14960103 - time (sec): 3.07 - samples/sec: 195.35 - lr: 0.025000 2023-04-05 13:11:32,086 epoch 26 - iter 14/71 - loss 0.13459322 - time (sec): 6.73 - samples/sec: 176.72 - lr: 0.025000 2023-04-05 13:11:35,752 epoch 26 - iter 21/71 - loss 0.14211700 - time (sec): 10.39 - samples/sec: 168.54 - lr: 0.025000 2023-04-05 13:11:39,370 epoch 26 - iter 28/71 - loss 0.13695156 - time (sec): 14.01 - samples/sec: 166.77 - lr: 0.025000 2023-04-05 13:11:43,047 epoch 26 - iter 35/71 - loss 0.13044682 - time (sec): 17.69 - samples/sec: 164.66 - lr: 0.025000 2023-04-05 13:11:46,632 epoch 26 - iter 42/71 - loss 0.12209705 - time (sec): 21.27 - samples/sec: 162.92 - lr: 0.025000 2023-04-05 13:11:50,270 epoch 26 - iter 49/71 - loss 0.12045467 - time (sec): 24.91 - samples/sec: 163.36 - lr: 0.025000 2023-04-05 13:11:53,914 epoch 26 - iter 56/71 - loss 0.12313593 - time (sec): 28.56 - samples/sec: 163.85 - lr: 0.025000 2023-04-05 13:11:57,552 epoch 26 - iter 63/71 - loss 0.12808456 - time (sec): 32.20 - samples/sec: 164.06 - lr: 0.025000 2023-04-05 13:12:01,370 epoch 26 - iter 70/71 - loss 0.13030809 - time (sec): 36.01 - samples/sec: 163.25 - lr: 0.025000 2023-04-05 13:12:02,206 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:12:02,207 EPOCH 26 done: loss 0.1298 - lr 0.025000 2023-04-05 13:12:32,718 Evaluating as a multi-label problem: False 2023-04-05 13:12:32,738 TRAIN : loss 0.053497862070798874 - f1-score (micro avg) 0.9731 2023-04-05 13:12:36,375 Evaluating as a multi-label problem: False 2023-04-05 13:12:36,388 DEV : loss 0.21015514433383942 - f1-score (micro avg) 0.8782 2023-04-05 13:12:36,393 BAD EPOCHS (no improvement): 1 2023-04-05 13:12:36,395 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:12:39,594 epoch 27 - iter 7/71 - loss 0.14380847 - time (sec): 3.20 - samples/sec: 190.03 - lr: 0.025000 2023-04-05 13:12:43,352 epoch 27 - iter 14/71 - loss 0.14111434 - time (sec): 6.96 - samples/sec: 173.92 - lr: 0.025000 2023-04-05 13:12:47,121 epoch 27 - iter 21/71 - loss 0.12589741 - time (sec): 10.73 - samples/sec: 168.18 - lr: 0.025000 2023-04-05 13:12:51,064 epoch 27 - iter 28/71 - loss 0.12663826 - time (sec): 14.67 - samples/sec: 163.67 - lr: 0.025000 2023-04-05 13:12:54,988 epoch 27 - iter 35/71 - loss 0.12494789 - time (sec): 18.59 - samples/sec: 160.22 - lr: 0.025000 2023-04-05 13:12:58,720 epoch 27 - iter 42/71 - loss 0.12722136 - time (sec): 22.33 - samples/sec: 158.56 - lr: 0.025000 2023-04-05 13:13:02,624 epoch 27 - iter 49/71 - loss 0.12614770 - time (sec): 26.23 - samples/sec: 157.34 - lr: 0.025000 2023-04-05 13:13:06,387 epoch 27 - iter 56/71 - loss 0.12524578 - time (sec): 29.99 - samples/sec: 156.77 - lr: 0.025000 2023-04-05 13:13:10,205 epoch 27 - iter 63/71 - loss 0.12175985 - time (sec): 33.81 - samples/sec: 156.61 - lr: 0.025000 2023-04-05 13:13:14,135 epoch 27 - iter 70/71 - loss 0.12627498 - time (sec): 37.74 - samples/sec: 155.75 - lr: 0.025000 2023-04-05 13:13:15,072 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:13:15,073 EPOCH 27 done: loss 0.1258 - lr 0.025000 2023-04-05 13:13:46,132 Evaluating as a multi-label problem: False 2023-04-05 13:13:46,148 TRAIN : loss 0.05426767095923424 - f1-score (micro avg) 0.9714 2023-04-05 13:13:49,886 Evaluating as a multi-label problem: False 2023-04-05 13:13:49,900 DEV : loss 0.21591384708881378 - f1-score (micro avg) 0.8922 2023-04-05 13:13:49,905 BAD EPOCHS (no improvement): 2 2023-04-05 13:13:49,907 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:13:53,123 epoch 28 - iter 7/71 - loss 0.14167046 - time (sec): 3.21 - samples/sec: 183.52 - lr: 0.025000 2023-04-05 13:13:56,811 epoch 28 - iter 14/71 - loss 0.12129582 - time (sec): 6.90 - samples/sec: 174.86 - lr: 0.025000 2023-04-05 13:14:00,650 epoch 28 - iter 21/71 - loss 0.11742410 - time (sec): 10.74 - samples/sec: 170.64 - lr: 0.025000 2023-04-05 13:14:04,744 epoch 28 - iter 28/71 - loss 0.11524606 - time (sec): 14.84 - samples/sec: 163.59 - lr: 0.025000 2023-04-05 13:14:08,572 epoch 28 - iter 35/71 - loss 0.11837341 - time (sec): 18.66 - samples/sec: 161.11 - lr: 0.025000 2023-04-05 13:14:12,510 epoch 28 - iter 42/71 - loss 0.11728808 - time (sec): 22.60 - samples/sec: 159.06 - lr: 0.025000 2023-04-05 13:14:16,455 epoch 28 - iter 49/71 - loss 0.11521758 - time (sec): 26.55 - samples/sec: 154.90 - lr: 0.025000 2023-04-05 13:14:20,473 epoch 28 - iter 56/71 - loss 0.11345810 - time (sec): 30.57 - samples/sec: 152.56 - lr: 0.025000 2023-04-05 13:14:24,545 epoch 28 - iter 63/71 - loss 0.11271746 - time (sec): 34.64 - samples/sec: 152.47 - lr: 0.025000 2023-04-05 13:14:28,525 epoch 28 - iter 70/71 - loss 0.10973027 - time (sec): 38.62 - samples/sec: 152.11 - lr: 0.025000 2023-04-05 13:14:29,417 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:14:29,418 EPOCH 28 done: loss 0.1105 - lr 0.025000 2023-04-05 13:15:01,734 Evaluating as a multi-label problem: False 2023-04-05 13:15:01,755 TRAIN : loss 0.05138213932514191 - f1-score (micro avg) 0.9737 2023-04-05 13:15:05,435 Evaluating as a multi-label problem: False 2023-04-05 13:15:05,449 DEV : loss 0.20661891996860504 - f1-score (micro avg) 0.9 2023-04-05 13:15:05,454 BAD EPOCHS (no improvement): 0 2023-04-05 13:15:05,456 saving best model 2023-04-05 13:15:06,556 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:15:09,647 epoch 29 - iter 7/71 - loss 0.09828948 - time (sec): 3.09 - samples/sec: 191.35 - lr: 0.025000 2023-04-05 13:15:13,489 epoch 29 - iter 14/71 - loss 0.09758705 - time (sec): 6.93 - samples/sec: 169.24 - lr: 0.025000 2023-04-05 13:15:17,592 epoch 29 - iter 21/71 - loss 0.10076913 - time (sec): 11.03 - samples/sec: 159.06 - lr: 0.025000 2023-04-05 13:15:21,325 epoch 29 - iter 28/71 - loss 0.10712337 - time (sec): 14.77 - samples/sec: 159.01 - lr: 0.025000 2023-04-05 13:15:25,125 epoch 29 - iter 35/71 - loss 0.10798193 - time (sec): 18.57 - samples/sec: 159.10 - lr: 0.025000 2023-04-05 13:15:28,807 epoch 29 - iter 42/71 - loss 0.10437713 - time (sec): 22.25 - samples/sec: 159.56 - lr: 0.025000 2023-04-05 13:15:32,624 epoch 29 - iter 49/71 - loss 0.10833207 - time (sec): 26.07 - samples/sec: 158.25 - lr: 0.025000 2023-04-05 13:15:36,363 epoch 29 - iter 56/71 - loss 0.10749686 - time (sec): 29.80 - samples/sec: 158.10 - lr: 0.025000 2023-04-05 13:15:40,194 epoch 29 - iter 63/71 - loss 0.10740193 - time (sec): 33.64 - samples/sec: 157.39 - lr: 0.025000 2023-04-05 13:15:43,973 epoch 29 - iter 70/71 - loss 0.10763063 - time (sec): 37.41 - samples/sec: 157.24 - lr: 0.025000 2023-04-05 13:15:44,840 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:15:44,841 EPOCH 29 done: loss 0.1075 - lr 0.025000 2023-04-05 13:16:16,518 Evaluating as a multi-label problem: False 2023-04-05 13:16:16,538 TRAIN : loss 0.04754020646214485 - f1-score (micro avg) 0.9807 2023-04-05 13:16:20,428 Evaluating as a multi-label problem: False 2023-04-05 13:16:20,441 DEV : loss 0.20554056763648987 - f1-score (micro avg) 0.8831 2023-04-05 13:16:20,447 BAD EPOCHS (no improvement): 1 2023-04-05 13:16:20,449 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:16:23,644 epoch 30 - iter 7/71 - loss 0.12214543 - time (sec): 3.19 - samples/sec: 190.64 - lr: 0.025000 2023-04-05 13:16:27,504 epoch 30 - iter 14/71 - loss 0.10450958 - time (sec): 7.05 - samples/sec: 169.69 - lr: 0.025000 2023-04-05 13:16:31,376 epoch 30 - iter 21/71 - loss 0.10887640 - time (sec): 10.93 - samples/sec: 163.00 - lr: 0.025000 2023-04-05 13:16:35,226 epoch 30 - iter 28/71 - loss 0.10731823 - time (sec): 14.78 - samples/sec: 160.73 - lr: 0.025000 2023-04-05 13:16:39,102 epoch 30 - iter 35/71 - loss 0.10467509 - time (sec): 18.65 - samples/sec: 158.64 - lr: 0.025000 2023-04-05 13:16:43,120 epoch 30 - iter 42/71 - loss 0.11094270 - time (sec): 22.67 - samples/sec: 156.95 - lr: 0.025000 2023-04-05 13:16:46,982 epoch 30 - iter 49/71 - loss 0.10634089 - time (sec): 26.53 - samples/sec: 157.10 - lr: 0.025000 2023-04-05 13:16:51,073 epoch 30 - iter 56/71 - loss 0.10685095 - time (sec): 30.62 - samples/sec: 154.23 - lr: 0.025000 2023-04-05 13:16:55,184 epoch 30 - iter 63/71 - loss 0.11220058 - time (sec): 34.73 - samples/sec: 152.73 - lr: 0.025000 2023-04-05 13:16:59,355 epoch 30 - iter 70/71 - loss 0.11420165 - time (sec): 38.91 - samples/sec: 151.32 - lr: 0.025000 2023-04-05 13:17:00,290 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:17:00,291 EPOCH 30 done: loss 0.1138 - lr 0.025000 2023-04-05 13:17:33,935 Evaluating as a multi-label problem: False 2023-04-05 13:17:33,955 TRAIN : loss 0.049051374197006226 - f1-score (micro avg) 0.9765 2023-04-05 13:17:37,925 Evaluating as a multi-label problem: False 2023-04-05 13:17:37,937 DEV : loss 0.21063490211963654 - f1-score (micro avg) 0.8989 2023-04-05 13:17:37,943 BAD EPOCHS (no improvement): 2 2023-04-05 13:17:37,944 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:17:41,197 epoch 31 - iter 7/71 - loss 0.12085872 - time (sec): 3.25 - samples/sec: 182.37 - lr: 0.025000 2023-04-05 13:17:44,963 epoch 31 - iter 14/71 - loss 0.12129786 - time (sec): 7.02 - samples/sec: 164.16 - lr: 0.025000 2023-04-05 13:17:48,794 epoch 31 - iter 21/71 - loss 0.11010256 - time (sec): 10.85 - samples/sec: 160.39 - lr: 0.025000 2023-04-05 13:17:52,660 epoch 31 - iter 28/71 - loss 0.11869391 - time (sec): 14.71 - samples/sec: 157.67 - lr: 0.025000 2023-04-05 13:17:56,463 epoch 31 - iter 35/71 - loss 0.11978621 - time (sec): 18.52 - samples/sec: 157.63 - lr: 0.025000 2023-04-05 13:18:00,669 epoch 31 - iter 42/71 - loss 0.12158608 - time (sec): 22.72 - samples/sec: 153.76 - lr: 0.025000 2023-04-05 13:18:04,928 epoch 31 - iter 49/71 - loss 0.12205383 - time (sec): 26.98 - samples/sec: 151.10 - lr: 0.025000 2023-04-05 13:18:08,749 epoch 31 - iter 56/71 - loss 0.12407999 - time (sec): 30.80 - samples/sec: 152.35 - lr: 0.025000 2023-04-05 13:18:12,993 epoch 31 - iter 63/71 - loss 0.12182435 - time (sec): 35.05 - samples/sec: 151.14 - lr: 0.025000 2023-04-05 13:18:17,045 epoch 31 - iter 70/71 - loss 0.11982468 - time (sec): 39.10 - samples/sec: 150.39 - lr: 0.025000 2023-04-05 13:18:17,906 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:18:17,907 EPOCH 31 done: loss 0.1199 - lr 0.025000 2023-04-05 13:18:50,399 Evaluating as a multi-label problem: False 2023-04-05 13:18:50,415 TRAIN : loss 0.04879188537597656 - f1-score (micro avg) 0.9767 2023-04-05 13:18:54,298 Evaluating as a multi-label problem: False 2023-04-05 13:18:54,311 DEV : loss 0.21518820524215698 - f1-score (micro avg) 0.8856 2023-04-05 13:18:54,319 BAD EPOCHS (no improvement): 3 2023-04-05 13:18:54,320 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:18:57,672 epoch 32 - iter 7/71 - loss 0.09686100 - time (sec): 3.35 - samples/sec: 182.98 - lr: 0.025000 2023-04-05 13:19:01,687 epoch 32 - iter 14/71 - loss 0.11264203 - time (sec): 7.36 - samples/sec: 163.07 - lr: 0.025000 2023-04-05 13:19:05,453 epoch 32 - iter 21/71 - loss 0.11587441 - time (sec): 11.13 - samples/sec: 160.90 - lr: 0.025000 2023-04-05 13:19:09,495 epoch 32 - iter 28/71 - loss 0.11953699 - time (sec): 15.17 - samples/sec: 154.68 - lr: 0.025000 2023-04-05 13:19:13,613 epoch 32 - iter 35/71 - loss 0.11792902 - time (sec): 19.29 - samples/sec: 151.52 - lr: 0.025000 2023-04-05 13:19:17,553 epoch 32 - iter 42/71 - loss 0.11791943 - time (sec): 23.23 - samples/sec: 151.60 - lr: 0.025000 2023-04-05 13:19:21,512 epoch 32 - iter 49/71 - loss 0.11984092 - time (sec): 27.19 - samples/sec: 151.85 - lr: 0.025000 2023-04-05 13:19:25,273 epoch 32 - iter 56/71 - loss 0.12028672 - time (sec): 30.95 - samples/sec: 152.37 - lr: 0.025000 2023-04-05 13:19:29,003 epoch 32 - iter 63/71 - loss 0.11505568 - time (sec): 34.68 - samples/sec: 153.28 - lr: 0.025000 2023-04-05 13:19:32,783 epoch 32 - iter 70/71 - loss 0.11360413 - time (sec): 38.46 - samples/sec: 152.91 - lr: 0.025000 2023-04-05 13:19:33,667 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:19:33,668 EPOCH 32 done: loss 0.1136 - lr 0.025000 2023-04-05 13:20:05,299 Evaluating as a multi-label problem: False 2023-04-05 13:20:05,317 TRAIN : loss 0.04602975398302078 - f1-score (micro avg) 0.9764 2023-04-05 13:20:08,959 Evaluating as a multi-label problem: False 2023-04-05 13:20:08,971 DEV : loss 0.2196006327867508 - f1-score (micro avg) 0.8831 2023-04-05 13:20:08,977 Epoch 32: reducing learning rate of group 0 to 1.2500e-02. 2023-04-05 13:20:08,978 BAD EPOCHS (no improvement): 4 2023-04-05 13:20:08,980 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:20:12,222 epoch 33 - iter 7/71 - loss 0.11581283 - time (sec): 3.24 - samples/sec: 177.97 - lr: 0.012500 2023-04-05 13:20:16,008 epoch 33 - iter 14/71 - loss 0.11978144 - time (sec): 7.03 - samples/sec: 169.61 - lr: 0.012500 2023-04-05 13:20:19,936 epoch 33 - iter 21/71 - loss 0.11537349 - time (sec): 10.96 - samples/sec: 159.65 - lr: 0.012500 2023-04-05 13:20:23,770 epoch 33 - iter 28/71 - loss 0.10736190 - time (sec): 14.79 - samples/sec: 160.05 - lr: 0.012500 2023-04-05 13:20:27,878 epoch 33 - iter 35/71 - loss 0.11021785 - time (sec): 18.90 - samples/sec: 156.10 - lr: 0.012500 2023-04-05 13:20:31,966 epoch 33 - iter 42/71 - loss 0.10713205 - time (sec): 22.99 - samples/sec: 152.88 - lr: 0.012500 2023-04-05 13:20:35,718 epoch 33 - iter 49/71 - loss 0.10934260 - time (sec): 26.74 - samples/sec: 153.53 - lr: 0.012500 2023-04-05 13:20:39,467 epoch 33 - iter 56/71 - loss 0.10654440 - time (sec): 30.49 - samples/sec: 153.51 - lr: 0.012500 2023-04-05 13:20:43,388 epoch 33 - iter 63/71 - loss 0.10765648 - time (sec): 34.41 - samples/sec: 153.34 - lr: 0.012500 2023-04-05 13:20:47,348 epoch 33 - iter 70/71 - loss 0.11051452 - time (sec): 38.37 - samples/sec: 153.15 - lr: 0.012500 2023-04-05 13:20:48,204 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:20:48,205 EPOCH 33 done: loss 0.1101 - lr 0.012500 2023-04-05 13:21:21,733 Evaluating as a multi-label problem: False 2023-04-05 13:21:21,755 TRAIN : loss 0.04103841260075569 - f1-score (micro avg) 0.9818 2023-04-05 13:21:25,443 Evaluating as a multi-label problem: False 2023-04-05 13:21:25,453 DEV : loss 0.19789864122867584 - f1-score (micro avg) 0.8848 2023-04-05 13:21:25,460 BAD EPOCHS (no improvement): 1 2023-04-05 13:21:25,462 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:21:28,952 epoch 34 - iter 7/71 - loss 0.11239772 - time (sec): 3.49 - samples/sec: 171.41 - lr: 0.012500 2023-04-05 13:21:33,108 epoch 34 - iter 14/71 - loss 0.09980787 - time (sec): 7.64 - samples/sec: 155.67 - lr: 0.012500 2023-04-05 13:21:37,298 epoch 34 - iter 21/71 - loss 0.09221232 - time (sec): 11.84 - samples/sec: 150.74 - lr: 0.012500 2023-04-05 13:21:41,356 epoch 34 - iter 28/71 - loss 0.09881858 - time (sec): 15.89 - samples/sec: 148.74 - lr: 0.012500 2023-04-05 13:21:45,307 epoch 34 - iter 35/71 - loss 0.10014088 - time (sec): 19.84 - samples/sec: 147.75 - lr: 0.012500 2023-04-05 13:21:49,356 epoch 34 - iter 42/71 - loss 0.09659220 - time (sec): 23.89 - samples/sec: 147.12 - lr: 0.012500 2023-04-05 13:21:53,323 epoch 34 - iter 49/71 - loss 0.09546800 - time (sec): 27.86 - samples/sec: 147.34 - lr: 0.012500 2023-04-05 13:21:57,491 epoch 34 - iter 56/71 - loss 0.09803050 - time (sec): 32.03 - samples/sec: 146.53 - lr: 0.012500 2023-04-05 13:22:01,642 epoch 34 - iter 63/71 - loss 0.10373384 - time (sec): 36.18 - samples/sec: 147.24 - lr: 0.012500 2023-04-05 13:22:05,766 epoch 34 - iter 70/71 - loss 0.10094599 - time (sec): 40.30 - samples/sec: 145.94 - lr: 0.012500 2023-04-05 13:22:06,689 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:22:06,690 EPOCH 34 done: loss 0.1011 - lr 0.012500 2023-04-05 13:22:39,510 Evaluating as a multi-label problem: False 2023-04-05 13:22:39,527 TRAIN : loss 0.04190446436405182 - f1-score (micro avg) 0.9799 2023-04-05 13:22:43,323 Evaluating as a multi-label problem: False 2023-04-05 13:22:43,335 DEV : loss 0.20444391667842865 - f1-score (micro avg) 0.8951 2023-04-05 13:22:43,341 BAD EPOCHS (no improvement): 2 2023-04-05 13:22:43,342 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:22:46,687 epoch 35 - iter 7/71 - loss 0.12740153 - time (sec): 3.34 - samples/sec: 179.39 - lr: 0.012500 2023-04-05 13:22:50,605 epoch 35 - iter 14/71 - loss 0.09989261 - time (sec): 7.26 - samples/sec: 162.61 - lr: 0.012500 2023-04-05 13:22:54,708 epoch 35 - iter 21/71 - loss 0.09380928 - time (sec): 11.37 - samples/sec: 158.03 - lr: 0.012500 2023-04-05 13:22:58,737 epoch 35 - iter 28/71 - loss 0.10283196 - time (sec): 15.39 - samples/sec: 154.02 - lr: 0.012500 2023-04-05 13:23:02,706 epoch 35 - iter 35/71 - loss 0.10197240 - time (sec): 19.36 - samples/sec: 153.54 - lr: 0.012500 2023-04-05 13:23:06,476 epoch 35 - iter 42/71 - loss 0.09712460 - time (sec): 23.13 - samples/sec: 152.98 - lr: 0.012500 2023-04-05 13:23:10,261 epoch 35 - iter 49/71 - loss 0.09975067 - time (sec): 26.92 - samples/sec: 152.68 - lr: 0.012500 2023-04-05 13:23:14,485 epoch 35 - iter 56/71 - loss 0.09870986 - time (sec): 31.14 - samples/sec: 150.82 - lr: 0.012500 2023-04-05 13:23:18,733 epoch 35 - iter 63/71 - loss 0.10022879 - time (sec): 35.39 - samples/sec: 149.50 - lr: 0.012500 2023-04-05 13:23:22,647 epoch 35 - iter 70/71 - loss 0.10151940 - time (sec): 39.30 - samples/sec: 149.76 - lr: 0.012500 2023-04-05 13:23:23,519 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:23:23,520 EPOCH 35 done: loss 0.1017 - lr 0.012500 2023-04-05 13:23:56,251 Evaluating as a multi-label problem: False 2023-04-05 13:23:56,267 TRAIN : loss 0.03953753411769867 - f1-score (micro avg) 0.9826 2023-04-05 13:24:00,070 Evaluating as a multi-label problem: False 2023-04-05 13:24:00,083 DEV : loss 0.20463646948337555 - f1-score (micro avg) 0.8939 2023-04-05 13:24:00,089 BAD EPOCHS (no improvement): 3 2023-04-05 13:24:00,091 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:24:03,512 epoch 36 - iter 7/71 - loss 0.09733479 - time (sec): 3.42 - samples/sec: 169.82 - lr: 0.012500 2023-04-05 13:24:07,519 epoch 36 - iter 14/71 - loss 0.09929472 - time (sec): 7.43 - samples/sec: 155.63 - lr: 0.012500 2023-04-05 13:24:11,244 epoch 36 - iter 21/71 - loss 0.09236051 - time (sec): 11.15 - samples/sec: 156.73 - lr: 0.012500 2023-04-05 13:24:14,927 epoch 36 - iter 28/71 - loss 0.09717889 - time (sec): 14.84 - samples/sec: 158.54 - lr: 0.012500 2023-04-05 13:24:18,720 epoch 36 - iter 35/71 - loss 0.09814890 - time (sec): 18.63 - samples/sec: 157.23 - lr: 0.012500 2023-04-05 13:24:22,810 epoch 36 - iter 42/71 - loss 0.09793933 - time (sec): 22.72 - samples/sec: 154.50 - lr: 0.012500 2023-04-05 13:24:26,643 epoch 36 - iter 49/71 - loss 0.09698078 - time (sec): 26.55 - samples/sec: 154.15 - lr: 0.012500 2023-04-05 13:24:30,395 epoch 36 - iter 56/71 - loss 0.09628057 - time (sec): 30.30 - samples/sec: 155.19 - lr: 0.012500 2023-04-05 13:24:34,471 epoch 36 - iter 63/71 - loss 0.09878813 - time (sec): 34.38 - samples/sec: 154.22 - lr: 0.012500 2023-04-05 13:24:38,425 epoch 36 - iter 70/71 - loss 0.10012306 - time (sec): 38.33 - samples/sec: 153.20 - lr: 0.012500 2023-04-05 13:24:39,322 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:24:39,323 EPOCH 36 done: loss 0.1011 - lr 0.012500 2023-04-05 13:25:11,618 Evaluating as a multi-label problem: False 2023-04-05 13:25:11,634 TRAIN : loss 0.03854582458734512 - f1-score (micro avg) 0.9824 2023-04-05 13:25:15,735 Evaluating as a multi-label problem: False 2023-04-05 13:25:15,747 DEV : loss 0.20157837867736816 - f1-score (micro avg) 0.8885 2023-04-05 13:25:15,751 Epoch 36: reducing learning rate of group 0 to 6.2500e-03. 2023-04-05 13:25:15,752 BAD EPOCHS (no improvement): 4 2023-04-05 13:25:15,754 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:25:19,364 epoch 37 - iter 7/71 - loss 0.10420912 - time (sec): 3.61 - samples/sec: 165.95 - lr: 0.006250 2023-04-05 13:25:23,503 epoch 37 - iter 14/71 - loss 0.09970839 - time (sec): 7.75 - samples/sec: 153.57 - lr: 0.006250 2023-04-05 13:25:27,746 epoch 37 - iter 21/71 - loss 0.09589796 - time (sec): 11.99 - samples/sec: 149.02 - lr: 0.006250 2023-04-05 13:25:31,896 epoch 37 - iter 28/71 - loss 0.09015041 - time (sec): 16.14 - samples/sec: 145.40 - lr: 0.006250 2023-04-05 13:25:35,703 epoch 37 - iter 35/71 - loss 0.09060108 - time (sec): 19.95 - samples/sec: 145.42 - lr: 0.006250 2023-04-05 13:25:39,605 epoch 37 - iter 42/71 - loss 0.08912907 - time (sec): 23.85 - samples/sec: 145.99 - lr: 0.006250 2023-04-05 13:25:43,386 epoch 37 - iter 49/71 - loss 0.09042087 - time (sec): 27.63 - samples/sec: 148.27 - lr: 0.006250 2023-04-05 13:25:47,332 epoch 37 - iter 56/71 - loss 0.08986784 - time (sec): 31.58 - samples/sec: 148.59 - lr: 0.006250 2023-04-05 13:25:51,296 epoch 37 - iter 63/71 - loss 0.09147423 - time (sec): 35.54 - samples/sec: 148.87 - lr: 0.006250 2023-04-05 13:25:55,383 epoch 37 - iter 70/71 - loss 0.09523919 - time (sec): 39.63 - samples/sec: 148.30 - lr: 0.006250 2023-04-05 13:25:56,305 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:25:56,307 EPOCH 37 done: loss 0.0947 - lr 0.006250 2023-04-05 13:26:28,852 Evaluating as a multi-label problem: False 2023-04-05 13:26:28,871 TRAIN : loss 0.03739665448665619 - f1-score (micro avg) 0.9842 2023-04-05 13:26:32,646 Evaluating as a multi-label problem: False 2023-04-05 13:26:32,656 DEV : loss 0.20396985113620758 - f1-score (micro avg) 0.8972 2023-04-05 13:26:32,663 BAD EPOCHS (no improvement): 1 2023-04-05 13:26:32,664 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:26:35,733 epoch 38 - iter 7/71 - loss 0.05286205 - time (sec): 3.07 - samples/sec: 184.49 - lr: 0.006250 2023-04-05 13:26:39,585 epoch 38 - iter 14/71 - loss 0.07231140 - time (sec): 6.92 - samples/sec: 168.21 - lr: 0.006250 2023-04-05 13:26:43,417 epoch 38 - iter 21/71 - loss 0.07400087 - time (sec): 10.75 - samples/sec: 163.88 - lr: 0.006250 2023-04-05 13:26:47,164 epoch 38 - iter 28/71 - loss 0.08128836 - time (sec): 14.50 - samples/sec: 161.87 - lr: 0.006250 2023-04-05 13:26:50,941 epoch 38 - iter 35/71 - loss 0.08052234 - time (sec): 18.28 - samples/sec: 161.08 - lr: 0.006250 2023-04-05 13:26:54,710 epoch 38 - iter 42/71 - loss 0.08768679 - time (sec): 22.04 - samples/sec: 158.99 - lr: 0.006250 2023-04-05 13:26:58,468 epoch 38 - iter 49/71 - loss 0.08866679 - time (sec): 25.80 - samples/sec: 159.44 - lr: 0.006250 2023-04-05 13:27:02,174 epoch 38 - iter 56/71 - loss 0.09098401 - time (sec): 29.51 - samples/sec: 159.68 - lr: 0.006250 2023-04-05 13:27:06,037 epoch 38 - iter 63/71 - loss 0.09004818 - time (sec): 33.37 - samples/sec: 158.34 - lr: 0.006250 2023-04-05 13:27:09,739 epoch 38 - iter 70/71 - loss 0.09029994 - time (sec): 37.07 - samples/sec: 158.63 - lr: 0.006250 2023-04-05 13:27:10,609 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:27:10,610 EPOCH 38 done: loss 0.0905 - lr 0.006250 2023-04-05 13:27:41,464 Evaluating as a multi-label problem: False 2023-04-05 13:27:41,484 TRAIN : loss 0.03672816604375839 - f1-score (micro avg) 0.984 2023-04-05 13:27:45,153 Evaluating as a multi-label problem: False 2023-04-05 13:27:45,162 DEV : loss 0.1981133222579956 - f1-score (micro avg) 0.9026 2023-04-05 13:27:45,168 BAD EPOCHS (no improvement): 0 2023-04-05 13:27:45,170 saving best model 2023-04-05 13:27:46,871 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:27:50,071 epoch 39 - iter 7/71 - loss 0.09828755 - time (sec): 3.20 - samples/sec: 178.58 - lr: 0.006250 2023-04-05 13:27:53,862 epoch 39 - iter 14/71 - loss 0.08947759 - time (sec): 6.99 - samples/sec: 166.98 - lr: 0.006250 2023-04-05 13:27:57,588 epoch 39 - iter 21/71 - loss 0.09770654 - time (sec): 10.71 - samples/sec: 164.63 - lr: 0.006250 2023-04-05 13:28:01,143 epoch 39 - iter 28/71 - loss 0.09769007 - time (sec): 14.27 - samples/sec: 162.79 - lr: 0.006250 2023-04-05 13:28:04,815 epoch 39 - iter 35/71 - loss 0.09508572 - time (sec): 17.94 - samples/sec: 161.85 - lr: 0.006250 2023-04-05 13:28:08,491 epoch 39 - iter 42/71 - loss 0.09602471 - time (sec): 21.62 - samples/sec: 160.52 - lr: 0.006250 2023-04-05 13:28:12,205 epoch 39 - iter 49/71 - loss 0.09330580 - time (sec): 25.33 - samples/sec: 158.85 - lr: 0.006250 2023-04-05 13:28:15,838 epoch 39 - iter 56/71 - loss 0.09344274 - time (sec): 28.96 - samples/sec: 160.30 - lr: 0.006250 2023-04-05 13:28:19,582 epoch 39 - iter 63/71 - loss 0.09424575 - time (sec): 32.71 - samples/sec: 161.12 - lr: 0.006250 2023-04-05 13:28:23,266 epoch 39 - iter 70/71 - loss 0.09570928 - time (sec): 36.39 - samples/sec: 161.54 - lr: 0.006250 2023-04-05 13:28:24,136 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:28:24,137 EPOCH 39 done: loss 0.0955 - lr 0.006250 2023-04-05 13:28:55,230 Evaluating as a multi-label problem: False 2023-04-05 13:28:55,254 TRAIN : loss 0.03638274222612381 - f1-score (micro avg) 0.984 2023-04-05 13:28:59,307 Evaluating as a multi-label problem: False 2023-04-05 13:28:59,322 DEV : loss 0.20241594314575195 - f1-score (micro avg) 0.8935 2023-04-05 13:28:59,327 BAD EPOCHS (no improvement): 1 2023-04-05 13:28:59,329 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:29:02,529 epoch 40 - iter 7/71 - loss 0.10842314 - time (sec): 3.20 - samples/sec: 172.19 - lr: 0.006250 2023-04-05 13:29:06,624 epoch 40 - iter 14/71 - loss 0.10954390 - time (sec): 7.29 - samples/sec: 154.36 - lr: 0.006250 2023-04-05 13:29:10,512 epoch 40 - iter 21/71 - loss 0.11209631 - time (sec): 11.18 - samples/sec: 155.68 - lr: 0.006250 2023-04-05 13:29:14,381 epoch 40 - iter 28/71 - loss 0.10212289 - time (sec): 15.05 - samples/sec: 153.20 - lr: 0.006250 2023-04-05 13:29:18,226 epoch 40 - iter 35/71 - loss 0.09686748 - time (sec): 18.90 - samples/sec: 154.84 - lr: 0.006250 2023-04-05 13:29:22,011 epoch 40 - iter 42/71 - loss 0.09428059 - time (sec): 22.68 - samples/sec: 155.41 - lr: 0.006250 2023-04-05 13:29:25,738 epoch 40 - iter 49/71 - loss 0.09207549 - time (sec): 26.41 - samples/sec: 156.04 - lr: 0.006250 2023-04-05 13:29:29,548 epoch 40 - iter 56/71 - loss 0.09320325 - time (sec): 30.22 - samples/sec: 156.72 - lr: 0.006250 2023-04-05 13:29:33,362 epoch 40 - iter 63/71 - loss 0.09517989 - time (sec): 34.03 - samples/sec: 155.61 - lr: 0.006250 2023-04-05 13:29:37,104 epoch 40 - iter 70/71 - loss 0.09762175 - time (sec): 37.78 - samples/sec: 155.84 - lr: 0.006250 2023-04-05 13:29:37,944 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:29:37,944 EPOCH 40 done: loss 0.0976 - lr 0.006250 2023-04-05 13:30:09,218 Evaluating as a multi-label problem: False 2023-04-05 13:30:09,236 TRAIN : loss 0.03600259870290756 - f1-score (micro avg) 0.9838 2023-04-05 13:30:13,253 Evaluating as a multi-label problem: False 2023-04-05 13:30:13,263 DEV : loss 0.20378927886486053 - f1-score (micro avg) 0.8939 2023-04-05 13:30:13,269 BAD EPOCHS (no improvement): 2 2023-04-05 13:30:14,367 ---------------------------------------------------------------------------------------------------- 2023-04-05 13:30:18,413 SequenceTagger predicts: Dictionary with 11 tags: O, S-PERSON, B-PERSON, E-PERSON, I-PERSON, S-ORG, B-ORG, E-ORG, I-ORG, , 2023-04-05 13:30:24,734 Evaluating as a multi-label problem: False 2023-04-05 13:30:24,746 0.9125 0.905 0.9087 0.8456 2023-04-05 13:30:24,747 Results: - F-score (micro) 0.9087 - F-score (macro) 0.8766 - Accuracy 0.8456 By class: precision recall f1-score support PERSON 0.9391 0.9443 0.9417 359 ORG 0.8319 0.7920 0.8115 125 micro avg 0.9125 0.9050 0.9087 484 macro avg 0.8855 0.8681 0.8766 484 weighted avg 0.9114 0.9050 0.9080 484 2023-04-05 13:30:24,748 ----------------------------------------------------------------------------------------------------