diff --git "a/training.log" "b/training.log" new file mode 100644--- /dev/null +++ "b/training.log" @@ -0,0 +1,1579 @@ +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Model: "SequenceTagger( + (embeddings): StackedEmbeddings( + (list_embedding_0): WordEmbeddings( + 'de' + (embedding): Embedding(1000000, 300) + ) + (list_embedding_1): FlairEmbeddings( + (lm): LanguageModel( + (drop): Dropout(p=0.25, inplace=False) + (encoder): Embedding(275, 100) + (rnn): LSTM(100, 2048) + ) + ) + (list_embedding_2): FlairEmbeddings( + (lm): LanguageModel( + (drop): Dropout(p=0.25, inplace=False) + (encoder): Embedding(275, 100) + (rnn): LSTM(100, 2048) + ) + ) + ) + (word_dropout): WordDropout(p=0.05) + (locked_dropout): LockedDropout(p=0.5) + (embedding2nn): Linear(in_features=4396, out_features=4396, bias=True) + (rnn): LSTM(4396, 256, batch_first=True, bidirectional=True) + (linear): Linear(in_features=512, out_features=71, bias=True) + (loss_function): ViterbiLoss() + (crf): CRF() +)" +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Corpus: 420 train + 500 dev + 506 test sentences +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Train: 420 sentences +2023-05-15 21:27:39,581 (train_with_dev=False, train_with_test=False) +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Training Params: +2023-05-15 21:27:39,581 - learning_rate: "0.1" +2023-05-15 21:27:39,581 - mini_batch_size: "4" +2023-05-15 21:27:39,581 - max_epochs: "150" +2023-05-15 21:27:39,581 - shuffle: "True" +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Plugins: +2023-05-15 21:27:39,581 - AnnealOnPlateau | patience: '3', anneal_factor: '0.5', min_learning_rate: '0.0001' +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Final evaluation on model from best epoch (best-model.pt) +2023-05-15 21:27:39,581 - metric: "('micro avg', 'accuracy')" +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Computation: +2023-05-15 21:27:39,581 - compute on device: cuda:0 +2023-05-15 21:27:39,581 - embedding storage: cpu +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 Model training base path: "pos-twitter-german-bs4-4" +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:39,581 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:40,533 epoch 1 - iter 10/105 - loss 4.13732332 - time (sec): 0.95 - samples/sec: 579.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:41,490 epoch 1 - iter 20/105 - loss 3.80546391 - time (sec): 1.91 - samples/sec: 605.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:42,512 epoch 1 - iter 30/105 - loss 3.50840566 - time (sec): 2.93 - samples/sec: 597.21 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:43,510 epoch 1 - iter 40/105 - loss 3.26294657 - time (sec): 3.93 - samples/sec: 616.23 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:44,492 epoch 1 - iter 50/105 - loss 3.05152618 - time (sec): 4.91 - samples/sec: 620.65 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:45,477 epoch 1 - iter 60/105 - loss 2.92978663 - time (sec): 5.90 - samples/sec: 610.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:46,428 epoch 1 - iter 70/105 - loss 2.80236063 - time (sec): 6.85 - samples/sec: 608.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:47,452 epoch 1 - iter 80/105 - loss 2.66861768 - time (sec): 7.87 - samples/sec: 609.51 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:48,403 epoch 1 - iter 90/105 - loss 2.56291144 - time (sec): 8.82 - samples/sec: 611.03 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:49,333 epoch 1 - iter 100/105 - loss 2.48654514 - time (sec): 9.75 - samples/sec: 607.29 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:49,822 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:49,822 EPOCH 1 done: loss 2.4446 - lr: 0.100000 +2023-05-15 21:27:51,262 DEV : loss 1.257121205329895 - accuracy (micro avg) 0.6784 +2023-05-15 21:27:51,274 - 0 epochs without improvement +2023-05-15 21:27:51,274 saving best model +2023-05-15 21:27:52,427 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:52,588 epoch 2 - iter 10/105 - loss 1.50705724 - time (sec): 0.16 - samples/sec: 3730.85 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:52,740 epoch 2 - iter 20/105 - loss 1.42392433 - time (sec): 0.31 - samples/sec: 3666.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:52,895 epoch 2 - iter 30/105 - loss 1.32810853 - time (sec): 0.47 - samples/sec: 3740.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,018 epoch 2 - iter 40/105 - loss 1.26945619 - time (sec): 0.59 - samples/sec: 3883.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,142 epoch 2 - iter 50/105 - loss 1.25974974 - time (sec): 0.71 - samples/sec: 4067.80 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,272 epoch 2 - iter 60/105 - loss 1.24602583 - time (sec): 0.84 - samples/sec: 4120.93 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,394 epoch 2 - iter 70/105 - loss 1.26179294 - time (sec): 0.97 - samples/sec: 4184.40 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,517 epoch 2 - iter 80/105 - loss 1.22697354 - time (sec): 1.09 - samples/sec: 4240.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,646 epoch 2 - iter 90/105 - loss 1.20106764 - time (sec): 1.22 - samples/sec: 4334.29 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,772 epoch 2 - iter 100/105 - loss 1.17538832 - time (sec): 1.34 - samples/sec: 4373.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:53,839 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:53,839 EPOCH 2 done: loss 1.1693 - lr: 0.100000 +2023-05-15 21:27:54,478 DEV : loss 0.7305335402488708 - accuracy (micro avg) 0.8135 +2023-05-15 21:27:54,490 - 0 epochs without improvement +2023-05-15 21:27:54,490 saving best model +2023-05-15 21:27:56,027 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:56,188 epoch 3 - iter 10/105 - loss 1.03272509 - time (sec): 0.16 - samples/sec: 3494.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:56,358 epoch 3 - iter 20/105 - loss 0.97097376 - time (sec): 0.33 - samples/sec: 3768.79 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:56,509 epoch 3 - iter 30/105 - loss 0.95689355 - time (sec): 0.48 - samples/sec: 3821.29 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:56,667 epoch 3 - iter 40/105 - loss 0.93238795 - time (sec): 0.64 - samples/sec: 3918.58 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:56,810 epoch 3 - iter 50/105 - loss 0.91059667 - time (sec): 0.78 - samples/sec: 3897.35 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:56,947 epoch 3 - iter 60/105 - loss 0.92685783 - time (sec): 0.92 - samples/sec: 3814.35 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:57,084 epoch 3 - iter 70/105 - loss 0.91279862 - time (sec): 1.06 - samples/sec: 3916.54 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:57,212 epoch 3 - iter 80/105 - loss 0.89905473 - time (sec): 1.18 - samples/sec: 4005.24 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:57,333 epoch 3 - iter 90/105 - loss 0.87855579 - time (sec): 1.31 - samples/sec: 4077.26 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:57,458 epoch 3 - iter 100/105 - loss 0.86400929 - time (sec): 1.43 - samples/sec: 4140.61 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:27:57,525 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:57,526 EPOCH 3 done: loss 0.8662 - lr: 0.100000 +2023-05-15 21:27:58,166 DEV : loss 0.6227904558181763 - accuracy (micro avg) 0.8302 +2023-05-15 21:27:58,178 - 0 epochs without improvement +2023-05-15 21:27:58,178 saving best model +2023-05-15 21:27:59,690 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:27:59,852 epoch 4 - iter 10/105 - loss 0.70690933 - time (sec): 0.16 - samples/sec: 3773.47 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,017 epoch 4 - iter 20/105 - loss 0.67778427 - time (sec): 0.33 - samples/sec: 3685.11 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,162 epoch 4 - iter 30/105 - loss 0.72460377 - time (sec): 0.47 - samples/sec: 3659.52 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,291 epoch 4 - iter 40/105 - loss 0.74667061 - time (sec): 0.60 - samples/sec: 3901.05 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,413 epoch 4 - iter 50/105 - loss 0.72456310 - time (sec): 0.72 - samples/sec: 4004.33 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,544 epoch 4 - iter 60/105 - loss 0.72599638 - time (sec): 0.85 - samples/sec: 4146.27 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,677 epoch 4 - iter 70/105 - loss 0.70078356 - time (sec): 0.99 - samples/sec: 4201.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,799 epoch 4 - iter 80/105 - loss 0.70384380 - time (sec): 1.11 - samples/sec: 4253.46 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:00,927 epoch 4 - iter 90/105 - loss 0.69472975 - time (sec): 1.24 - samples/sec: 4321.83 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:01,053 epoch 4 - iter 100/105 - loss 0.69011472 - time (sec): 1.36 - samples/sec: 4356.35 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:01,117 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:01,117 EPOCH 4 done: loss 0.6919 - lr: 0.100000 +2023-05-15 21:28:02,126 DEV : loss 0.4872177243232727 - accuracy (micro avg) 0.8758 +2023-05-15 21:28:02,138 - 0 epochs without improvement +2023-05-15 21:28:02,138 saving best model +2023-05-15 21:28:03,629 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:03,791 epoch 5 - iter 10/105 - loss 0.61365618 - time (sec): 0.16 - samples/sec: 3683.44 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:03,957 epoch 5 - iter 20/105 - loss 0.59538471 - time (sec): 0.33 - samples/sec: 3731.82 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,125 epoch 5 - iter 30/105 - loss 0.57676758 - time (sec): 0.50 - samples/sec: 3761.28 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,258 epoch 5 - iter 40/105 - loss 0.57542241 - time (sec): 0.63 - samples/sec: 3923.23 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,384 epoch 5 - iter 50/105 - loss 0.56359304 - time (sec): 0.76 - samples/sec: 4025.06 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,518 epoch 5 - iter 60/105 - loss 0.56716628 - time (sec): 0.89 - samples/sec: 4121.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,641 epoch 5 - iter 70/105 - loss 0.57142354 - time (sec): 1.01 - samples/sec: 4161.49 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,771 epoch 5 - iter 80/105 - loss 0.57661931 - time (sec): 1.14 - samples/sec: 4218.39 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:04,899 epoch 5 - iter 90/105 - loss 0.58855084 - time (sec): 1.27 - samples/sec: 4241.27 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:05,023 epoch 5 - iter 100/105 - loss 0.58498679 - time (sec): 1.39 - samples/sec: 4268.28 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:05,085 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:05,086 EPOCH 5 done: loss 0.5872 - lr: 0.100000 +2023-05-15 21:28:05,762 DEV : loss 0.4932672083377838 - accuracy (micro avg) 0.8797 +2023-05-15 21:28:05,774 - 0 epochs without improvement +2023-05-15 21:28:05,774 saving best model +2023-05-15 21:28:07,277 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:07,441 epoch 6 - iter 10/105 - loss 0.53339850 - time (sec): 0.16 - samples/sec: 3641.02 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:07,607 epoch 6 - iter 20/105 - loss 0.50043912 - time (sec): 0.33 - samples/sec: 3706.63 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:07,730 epoch 6 - iter 30/105 - loss 0.53429793 - time (sec): 0.45 - samples/sec: 4028.08 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:07,864 epoch 6 - iter 40/105 - loss 0.52955419 - time (sec): 0.59 - samples/sec: 4164.53 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:07,990 epoch 6 - iter 50/105 - loss 0.50279857 - time (sec): 0.71 - samples/sec: 4250.14 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:08,121 epoch 6 - iter 60/105 - loss 0.49586015 - time (sec): 0.84 - samples/sec: 4329.78 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:08,255 epoch 6 - iter 70/105 - loss 0.50257764 - time (sec): 0.98 - samples/sec: 4351.11 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:08,378 epoch 6 - iter 80/105 - loss 0.51000387 - time (sec): 1.10 - samples/sec: 4362.95 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:08,512 epoch 6 - iter 90/105 - loss 0.51874780 - time (sec): 1.23 - samples/sec: 4366.77 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:08,635 epoch 6 - iter 100/105 - loss 0.52402548 - time (sec): 1.36 - samples/sec: 4361.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:08,703 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:08,703 EPOCH 6 done: loss 0.5268 - lr: 0.100000 +2023-05-15 21:28:09,375 DEV : loss 0.4175605773925781 - accuracy (micro avg) 0.8935 +2023-05-15 21:28:09,387 - 0 epochs without improvement +2023-05-15 21:28:09,387 saving best model +2023-05-15 21:28:10,889 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:11,057 epoch 7 - iter 10/105 - loss 0.42810660 - time (sec): 0.17 - samples/sec: 3461.94 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:11,219 epoch 7 - iter 20/105 - loss 0.50242569 - time (sec): 0.33 - samples/sec: 3551.85 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:11,385 epoch 7 - iter 30/105 - loss 0.47000735 - time (sec): 0.50 - samples/sec: 3508.06 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:11,547 epoch 7 - iter 40/105 - loss 0.46886899 - time (sec): 0.66 - samples/sec: 3537.36 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:11,706 epoch 7 - iter 50/105 - loss 0.46468136 - time (sec): 0.82 - samples/sec: 3480.86 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:11,868 epoch 7 - iter 60/105 - loss 0.44673748 - time (sec): 0.98 - samples/sec: 3517.24 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:12,042 epoch 7 - iter 70/105 - loss 0.46160434 - time (sec): 1.15 - samples/sec: 3541.95 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:12,201 epoch 7 - iter 80/105 - loss 0.46348997 - time (sec): 1.31 - samples/sec: 3550.57 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:12,364 epoch 7 - iter 90/105 - loss 0.46535512 - time (sec): 1.47 - samples/sec: 3597.04 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:12,522 epoch 7 - iter 100/105 - loss 0.46283468 - time (sec): 1.63 - samples/sec: 3614.16 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:12,602 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:12,602 EPOCH 7 done: loss 0.4622 - lr: 0.100000 +2023-05-15 21:28:13,404 DEV : loss 0.3824714422225952 - accuracy (micro avg) 0.9053 +2023-05-15 21:28:13,416 - 0 epochs without improvement +2023-05-15 21:28:13,416 saving best model +2023-05-15 21:28:14,931 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:15,098 epoch 8 - iter 10/105 - loss 0.52925490 - time (sec): 0.17 - samples/sec: 3717.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:15,255 epoch 8 - iter 20/105 - loss 0.45622410 - time (sec): 0.32 - samples/sec: 3607.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:15,408 epoch 8 - iter 30/105 - loss 0.43188253 - time (sec): 0.48 - samples/sec: 3586.79 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:15,550 epoch 8 - iter 40/105 - loss 0.45116733 - time (sec): 0.62 - samples/sec: 3625.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:15,679 epoch 8 - iter 50/105 - loss 0.47200060 - time (sec): 0.75 - samples/sec: 3805.42 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:15,808 epoch 8 - iter 60/105 - loss 0.47657265 - time (sec): 0.88 - samples/sec: 3985.65 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:15,936 epoch 8 - iter 70/105 - loss 0.46800503 - time (sec): 1.00 - samples/sec: 4075.81 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:16,063 epoch 8 - iter 80/105 - loss 0.45493913 - time (sec): 1.13 - samples/sec: 4110.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:16,196 epoch 8 - iter 90/105 - loss 0.44503478 - time (sec): 1.26 - samples/sec: 4186.41 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:16,330 epoch 8 - iter 100/105 - loss 0.42980643 - time (sec): 1.40 - samples/sec: 4242.37 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:16,395 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:16,395 EPOCH 8 done: loss 0.4262 - lr: 0.100000 +2023-05-15 21:28:17,065 DEV : loss 0.4457044303417206 - accuracy (micro avg) 0.8953 +2023-05-15 21:28:17,078 - 1 epochs without improvement +2023-05-15 21:28:17,078 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:17,232 epoch 9 - iter 10/105 - loss 0.43019313 - time (sec): 0.15 - samples/sec: 3666.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:17,391 epoch 9 - iter 20/105 - loss 0.42917599 - time (sec): 0.31 - samples/sec: 3602.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:17,560 epoch 9 - iter 30/105 - loss 0.43572778 - time (sec): 0.48 - samples/sec: 3684.15 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:17,724 epoch 9 - iter 40/105 - loss 0.42423583 - time (sec): 0.65 - samples/sec: 3740.41 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:17,885 epoch 9 - iter 50/105 - loss 0.40547926 - time (sec): 0.81 - samples/sec: 3749.20 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:18,043 epoch 9 - iter 60/105 - loss 0.40662170 - time (sec): 0.96 - samples/sec: 3707.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:18,201 epoch 9 - iter 70/105 - loss 0.41125568 - time (sec): 1.12 - samples/sec: 3713.64 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:18,355 epoch 9 - iter 80/105 - loss 0.40591552 - time (sec): 1.28 - samples/sec: 3672.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:18,520 epoch 9 - iter 90/105 - loss 0.40433275 - time (sec): 1.44 - samples/sec: 3672.57 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:18,677 epoch 9 - iter 100/105 - loss 0.39568312 - time (sec): 1.60 - samples/sec: 3692.30 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:18,760 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:18,760 EPOCH 9 done: loss 0.3993 - lr: 0.100000 +2023-05-15 21:28:19,433 DEV : loss 0.4055745005607605 - accuracy (micro avg) 0.9067 +2023-05-15 21:28:19,445 - 0 epochs without improvement +2023-05-15 21:28:19,445 saving best model +2023-05-15 21:28:20,938 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:21,109 epoch 10 - iter 10/105 - loss 0.29233026 - time (sec): 0.17 - samples/sec: 3313.66 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:21,290 epoch 10 - iter 20/105 - loss 0.32887121 - time (sec): 0.35 - samples/sec: 3504.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:21,455 epoch 10 - iter 30/105 - loss 0.31719053 - time (sec): 0.52 - samples/sec: 3589.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:21,617 epoch 10 - iter 40/105 - loss 0.32200724 - time (sec): 0.68 - samples/sec: 3565.44 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:21,773 epoch 10 - iter 50/105 - loss 0.33617648 - time (sec): 0.83 - samples/sec: 3622.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:21,940 epoch 10 - iter 60/105 - loss 0.34266434 - time (sec): 1.00 - samples/sec: 3641.11 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:22,103 epoch 10 - iter 70/105 - loss 0.34127674 - time (sec): 1.16 - samples/sec: 3617.73 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:22,249 epoch 10 - iter 80/105 - loss 0.34237331 - time (sec): 1.31 - samples/sec: 3665.35 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:22,404 epoch 10 - iter 90/105 - loss 0.34708513 - time (sec): 1.46 - samples/sec: 3646.44 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:22,554 epoch 10 - iter 100/105 - loss 0.34402692 - time (sec): 1.62 - samples/sec: 3635.62 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:22,644 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:22,644 EPOCH 10 done: loss 0.3448 - lr: 0.100000 +2023-05-15 21:28:23,321 DEV : loss 0.3692632019519806 - accuracy (micro avg) 0.9107 +2023-05-15 21:28:23,333 - 0 epochs without improvement +2023-05-15 21:28:23,333 saving best model +2023-05-15 21:28:24,834 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:25,004 epoch 11 - iter 10/105 - loss 0.34743618 - time (sec): 0.17 - samples/sec: 3556.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,163 epoch 11 - iter 20/105 - loss 0.34974343 - time (sec): 0.33 - samples/sec: 3539.54 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,326 epoch 11 - iter 30/105 - loss 0.32850942 - time (sec): 0.49 - samples/sec: 3729.38 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,480 epoch 11 - iter 40/105 - loss 0.31337183 - time (sec): 0.65 - samples/sec: 3812.20 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,608 epoch 11 - iter 50/105 - loss 0.30750245 - time (sec): 0.77 - samples/sec: 3931.40 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,726 epoch 11 - iter 60/105 - loss 0.31700868 - time (sec): 0.89 - samples/sec: 3986.15 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,855 epoch 11 - iter 70/105 - loss 0.32409246 - time (sec): 1.02 - samples/sec: 4094.85 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:25,976 epoch 11 - iter 80/105 - loss 0.32874190 - time (sec): 1.14 - samples/sec: 4126.07 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:26,105 epoch 11 - iter 90/105 - loss 0.34002788 - time (sec): 1.27 - samples/sec: 4201.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:26,233 epoch 11 - iter 100/105 - loss 0.33955505 - time (sec): 1.40 - samples/sec: 4249.07 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:26,299 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:26,299 EPOCH 11 done: loss 0.3348 - lr: 0.100000 +2023-05-15 21:28:27,102 DEV : loss 0.37659206986427307 - accuracy (micro avg) 0.9107 +2023-05-15 21:28:27,114 - 1 epochs without improvement +2023-05-15 21:28:27,114 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:27,266 epoch 12 - iter 10/105 - loss 0.28334459 - time (sec): 0.15 - samples/sec: 3790.16 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:27,430 epoch 12 - iter 20/105 - loss 0.26780325 - time (sec): 0.32 - samples/sec: 3825.09 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:27,584 epoch 12 - iter 30/105 - loss 0.29387770 - time (sec): 0.47 - samples/sec: 3834.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:27,735 epoch 12 - iter 40/105 - loss 0.30666119 - time (sec): 0.62 - samples/sec: 3791.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:27,889 epoch 12 - iter 50/105 - loss 0.30770345 - time (sec): 0.77 - samples/sec: 3792.88 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:28,058 epoch 12 - iter 60/105 - loss 0.31630196 - time (sec): 0.94 - samples/sec: 3808.64 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:28,212 epoch 12 - iter 70/105 - loss 0.32114589 - time (sec): 1.10 - samples/sec: 3793.90 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:28,373 epoch 12 - iter 80/105 - loss 0.32696461 - time (sec): 1.26 - samples/sec: 3737.04 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:28,541 epoch 12 - iter 90/105 - loss 0.32321903 - time (sec): 1.43 - samples/sec: 3732.76 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:28,704 epoch 12 - iter 100/105 - loss 0.32507218 - time (sec): 1.59 - samples/sec: 3733.42 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:28,789 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:28,789 EPOCH 12 done: loss 0.3271 - lr: 0.100000 +2023-05-15 21:28:29,460 DEV : loss 0.3771950602531433 - accuracy (micro avg) 0.9105 +2023-05-15 21:28:29,472 - 2 epochs without improvement +2023-05-15 21:28:29,472 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:29,633 epoch 13 - iter 10/105 - loss 0.25558085 - time (sec): 0.16 - samples/sec: 3839.70 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:29,804 epoch 13 - iter 20/105 - loss 0.28810978 - time (sec): 0.33 - samples/sec: 3857.76 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:29,949 epoch 13 - iter 30/105 - loss 0.28257029 - time (sec): 0.48 - samples/sec: 3848.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,114 epoch 13 - iter 40/105 - loss 0.26605552 - time (sec): 0.64 - samples/sec: 3808.50 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,272 epoch 13 - iter 50/105 - loss 0.26701675 - time (sec): 0.80 - samples/sec: 3777.93 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,428 epoch 13 - iter 60/105 - loss 0.27406176 - time (sec): 0.96 - samples/sec: 3764.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,578 epoch 13 - iter 70/105 - loss 0.28434073 - time (sec): 1.11 - samples/sec: 3737.53 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,721 epoch 13 - iter 80/105 - loss 0.28311288 - time (sec): 1.25 - samples/sec: 3787.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,855 epoch 13 - iter 90/105 - loss 0.28752102 - time (sec): 1.38 - samples/sec: 3884.81 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:30,979 epoch 13 - iter 100/105 - loss 0.28659536 - time (sec): 1.51 - samples/sec: 3909.75 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:31,048 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:31,048 EPOCH 13 done: loss 0.2864 - lr: 0.100000 +2023-05-15 21:28:31,726 DEV : loss 0.3924044966697693 - accuracy (micro avg) 0.9078 +2023-05-15 21:28:31,738 - 3 epochs without improvement +2023-05-15 21:28:31,738 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:31,896 epoch 14 - iter 10/105 - loss 0.26458731 - time (sec): 0.16 - samples/sec: 4180.82 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,059 epoch 14 - iter 20/105 - loss 0.25633094 - time (sec): 0.32 - samples/sec: 3916.83 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,213 epoch 14 - iter 30/105 - loss 0.29500461 - time (sec): 0.47 - samples/sec: 3861.29 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,365 epoch 14 - iter 40/105 - loss 0.29691722 - time (sec): 0.63 - samples/sec: 3870.16 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,521 epoch 14 - iter 50/105 - loss 0.29505478 - time (sec): 0.78 - samples/sec: 3847.72 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,652 epoch 14 - iter 60/105 - loss 0.29782093 - time (sec): 0.91 - samples/sec: 3959.37 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,783 epoch 14 - iter 70/105 - loss 0.30191361 - time (sec): 1.04 - samples/sec: 4012.08 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:32,914 epoch 14 - iter 80/105 - loss 0.29182818 - time (sec): 1.17 - samples/sec: 4042.65 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:33,043 epoch 14 - iter 90/105 - loss 0.29334758 - time (sec): 1.30 - samples/sec: 4090.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:33,173 epoch 14 - iter 100/105 - loss 0.29052290 - time (sec): 1.43 - samples/sec: 4130.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:33,245 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:33,246 EPOCH 14 done: loss 0.2933 - lr: 0.100000 +2023-05-15 21:28:34,072 DEV : loss 0.3501710295677185 - accuracy (micro avg) 0.9157 +2023-05-15 21:28:34,085 - 0 epochs without improvement +2023-05-15 21:28:34,085 saving best model +2023-05-15 21:28:35,588 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:35,755 epoch 15 - iter 10/105 - loss 0.26030079 - time (sec): 0.17 - samples/sec: 3653.21 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:35,920 epoch 15 - iter 20/105 - loss 0.24296167 - time (sec): 0.33 - samples/sec: 3614.18 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,077 epoch 15 - iter 30/105 - loss 0.22735185 - time (sec): 0.49 - samples/sec: 3687.43 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,240 epoch 15 - iter 40/105 - loss 0.24004719 - time (sec): 0.65 - samples/sec: 3695.51 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,395 epoch 15 - iter 50/105 - loss 0.24872295 - time (sec): 0.81 - samples/sec: 3757.80 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,533 epoch 15 - iter 60/105 - loss 0.24078150 - time (sec): 0.94 - samples/sec: 3875.50 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,660 epoch 15 - iter 70/105 - loss 0.23893907 - time (sec): 1.07 - samples/sec: 3967.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,783 epoch 15 - iter 80/105 - loss 0.23832398 - time (sec): 1.19 - samples/sec: 4028.86 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:36,907 epoch 15 - iter 90/105 - loss 0.24174430 - time (sec): 1.32 - samples/sec: 4089.50 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:37,031 epoch 15 - iter 100/105 - loss 0.24719004 - time (sec): 1.44 - samples/sec: 4123.74 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:37,094 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:37,094 EPOCH 15 done: loss 0.2450 - lr: 0.100000 +2023-05-15 21:28:37,764 DEV : loss 0.36203423142433167 - accuracy (micro avg) 0.9168 +2023-05-15 21:28:37,776 - 0 epochs without improvement +2023-05-15 21:28:37,777 saving best model +2023-05-15 21:28:39,292 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:39,460 epoch 16 - iter 10/105 - loss 0.28751220 - time (sec): 0.17 - samples/sec: 3624.96 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:39,636 epoch 16 - iter 20/105 - loss 0.26365962 - time (sec): 0.34 - samples/sec: 3574.88 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:39,800 epoch 16 - iter 30/105 - loss 0.26388788 - time (sec): 0.51 - samples/sec: 3657.97 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:39,958 epoch 16 - iter 40/105 - loss 0.25961111 - time (sec): 0.67 - samples/sec: 3606.68 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,117 epoch 16 - iter 50/105 - loss 0.25009359 - time (sec): 0.82 - samples/sec: 3602.32 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,268 epoch 16 - iter 60/105 - loss 0.24935196 - time (sec): 0.98 - samples/sec: 3631.04 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,433 epoch 16 - iter 70/105 - loss 0.25583697 - time (sec): 1.14 - samples/sec: 3660.28 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,592 epoch 16 - iter 80/105 - loss 0.25461234 - time (sec): 1.30 - samples/sec: 3649.21 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,749 epoch 16 - iter 90/105 - loss 0.25699326 - time (sec): 1.46 - samples/sec: 3649.05 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,915 epoch 16 - iter 100/105 - loss 0.25579934 - time (sec): 1.62 - samples/sec: 3648.15 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:40,993 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:40,993 EPOCH 16 done: loss 0.2513 - lr: 0.100000 +2023-05-15 21:28:41,665 DEV : loss 0.37685805559158325 - accuracy (micro avg) 0.9161 +2023-05-15 21:28:41,677 - 1 epochs without improvement +2023-05-15 21:28:41,678 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:41,839 epoch 17 - iter 10/105 - loss 0.25334569 - time (sec): 0.16 - samples/sec: 3850.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:41,981 epoch 17 - iter 20/105 - loss 0.24333967 - time (sec): 0.30 - samples/sec: 3621.39 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,132 epoch 17 - iter 30/105 - loss 0.23545959 - time (sec): 0.45 - samples/sec: 3627.00 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,285 epoch 17 - iter 40/105 - loss 0.23830042 - time (sec): 0.61 - samples/sec: 3707.61 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,413 epoch 17 - iter 50/105 - loss 0.24765555 - time (sec): 0.74 - samples/sec: 3912.11 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,538 epoch 17 - iter 60/105 - loss 0.24520233 - time (sec): 0.86 - samples/sec: 4023.16 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,668 epoch 17 - iter 70/105 - loss 0.24207840 - time (sec): 0.99 - samples/sec: 4189.92 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,797 epoch 17 - iter 80/105 - loss 0.23634265 - time (sec): 1.12 - samples/sec: 4230.99 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:42,925 epoch 17 - iter 90/105 - loss 0.23920227 - time (sec): 1.25 - samples/sec: 4271.72 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:43,059 epoch 17 - iter 100/105 - loss 0.23783792 - time (sec): 1.38 - samples/sec: 4332.38 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:43,119 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:43,119 EPOCH 17 done: loss 0.2414 - lr: 0.100000 +2023-05-15 21:28:43,790 DEV : loss 0.37420299649238586 - accuracy (micro avg) 0.9168 +2023-05-15 21:28:43,802 - 2 epochs without improvement +2023-05-15 21:28:43,802 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:43,959 epoch 18 - iter 10/105 - loss 0.24061614 - time (sec): 0.16 - samples/sec: 4009.23 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:44,125 epoch 18 - iter 20/105 - loss 0.21762997 - time (sec): 0.32 - samples/sec: 3788.05 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:44,286 epoch 18 - iter 30/105 - loss 0.21340251 - time (sec): 0.48 - samples/sec: 3721.72 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:44,458 epoch 18 - iter 40/105 - loss 0.20776087 - time (sec): 0.66 - samples/sec: 3772.39 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:44,611 epoch 18 - iter 50/105 - loss 0.21267624 - time (sec): 0.81 - samples/sec: 3704.43 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:44,774 epoch 18 - iter 60/105 - loss 0.22464270 - time (sec): 0.97 - samples/sec: 3733.26 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:44,937 epoch 18 - iter 70/105 - loss 0.23068581 - time (sec): 1.13 - samples/sec: 3712.31 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:45,074 epoch 18 - iter 80/105 - loss 0.23007149 - time (sec): 1.27 - samples/sec: 3804.07 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:45,198 epoch 18 - iter 90/105 - loss 0.23322089 - time (sec): 1.40 - samples/sec: 3876.73 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:45,319 epoch 18 - iter 100/105 - loss 0.23723561 - time (sec): 1.52 - samples/sec: 3933.25 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:45,382 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:45,382 EPOCH 18 done: loss 0.2391 - lr: 0.100000 +2023-05-15 21:28:46,199 DEV : loss 0.3612535893917084 - accuracy (micro avg) 0.9186 +2023-05-15 21:28:46,211 - 0 epochs without improvement +2023-05-15 21:28:46,211 saving best model +2023-05-15 21:28:47,711 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:47,875 epoch 19 - iter 10/105 - loss 0.22788334 - time (sec): 0.16 - samples/sec: 3354.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,045 epoch 19 - iter 20/105 - loss 0.21366377 - time (sec): 0.33 - samples/sec: 3320.64 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,208 epoch 19 - iter 30/105 - loss 0.20840273 - time (sec): 0.50 - samples/sec: 3421.34 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,372 epoch 19 - iter 40/105 - loss 0.20833268 - time (sec): 0.66 - samples/sec: 3520.90 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,527 epoch 19 - iter 50/105 - loss 0.20487635 - time (sec): 0.82 - samples/sec: 3632.63 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,684 epoch 19 - iter 60/105 - loss 0.21416832 - time (sec): 0.97 - samples/sec: 3691.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,810 epoch 19 - iter 70/105 - loss 0.20718833 - time (sec): 1.10 - samples/sec: 3836.58 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:48,936 epoch 19 - iter 80/105 - loss 0.20743279 - time (sec): 1.22 - samples/sec: 3893.28 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:49,067 epoch 19 - iter 90/105 - loss 0.21436857 - time (sec): 1.36 - samples/sec: 3980.06 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:49,192 epoch 19 - iter 100/105 - loss 0.21507106 - time (sec): 1.48 - samples/sec: 4031.59 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:49,260 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:49,260 EPOCH 19 done: loss 0.2162 - lr: 0.100000 +2023-05-15 21:28:49,929 DEV : loss 0.38264331221580505 - accuracy (micro avg) 0.9151 +2023-05-15 21:28:49,941 - 1 epochs without improvement +2023-05-15 21:28:49,941 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:50,098 epoch 20 - iter 10/105 - loss 0.18692402 - time (sec): 0.16 - samples/sec: 3641.69 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:50,243 epoch 20 - iter 20/105 - loss 0.18505662 - time (sec): 0.30 - samples/sec: 3892.05 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:50,374 epoch 20 - iter 30/105 - loss 0.18862972 - time (sec): 0.43 - samples/sec: 4193.31 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:50,498 epoch 20 - iter 40/105 - loss 0.19222678 - time (sec): 0.56 - samples/sec: 4259.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:50,621 epoch 20 - iter 50/105 - loss 0.19376221 - time (sec): 0.68 - samples/sec: 4310.40 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:50,755 epoch 20 - iter 60/105 - loss 0.20313456 - time (sec): 0.81 - samples/sec: 4423.94 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:50,892 epoch 20 - iter 70/105 - loss 0.21258358 - time (sec): 0.95 - samples/sec: 4528.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:51,018 epoch 20 - iter 80/105 - loss 0.21183394 - time (sec): 1.08 - samples/sec: 4531.58 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:51,138 epoch 20 - iter 90/105 - loss 0.20992423 - time (sec): 1.20 - samples/sec: 4477.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:51,269 epoch 20 - iter 100/105 - loss 0.21406864 - time (sec): 1.33 - samples/sec: 4483.06 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:51,331 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:51,331 EPOCH 20 done: loss 0.2133 - lr: 0.100000 +2023-05-15 21:28:52,001 DEV : loss 0.37900087237358093 - accuracy (micro avg) 0.9165 +2023-05-15 21:28:52,014 - 2 epochs without improvement +2023-05-15 21:28:52,014 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:52,173 epoch 21 - iter 10/105 - loss 0.21559606 - time (sec): 0.16 - samples/sec: 3872.09 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:52,337 epoch 21 - iter 20/105 - loss 0.20114522 - time (sec): 0.32 - samples/sec: 4092.61 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:52,468 epoch 21 - iter 30/105 - loss 0.18528392 - time (sec): 0.45 - samples/sec: 4232.20 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:52,593 epoch 21 - iter 40/105 - loss 0.18452745 - time (sec): 0.58 - samples/sec: 4270.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:52,719 epoch 21 - iter 50/105 - loss 0.18896888 - time (sec): 0.71 - samples/sec: 4306.03 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:52,844 epoch 21 - iter 60/105 - loss 0.19499164 - time (sec): 0.83 - samples/sec: 4292.80 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:52,973 epoch 21 - iter 70/105 - loss 0.18755836 - time (sec): 0.96 - samples/sec: 4357.24 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:53,100 epoch 21 - iter 80/105 - loss 0.19307926 - time (sec): 1.09 - samples/sec: 4405.82 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:53,226 epoch 21 - iter 90/105 - loss 0.19528471 - time (sec): 1.21 - samples/sec: 4401.00 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:53,353 epoch 21 - iter 100/105 - loss 0.19506996 - time (sec): 1.34 - samples/sec: 4414.11 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:53,419 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:53,419 EPOCH 21 done: loss 0.1992 - lr: 0.100000 +2023-05-15 21:28:54,225 DEV : loss 0.38944578170776367 - accuracy (micro avg) 0.9169 +2023-05-15 21:28:54,237 - 3 epochs without improvement +2023-05-15 21:28:54,237 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:54,390 epoch 22 - iter 10/105 - loss 0.20993071 - time (sec): 0.15 - samples/sec: 4066.32 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:54,543 epoch 22 - iter 20/105 - loss 0.18478512 - time (sec): 0.31 - samples/sec: 3890.44 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:54,700 epoch 22 - iter 30/105 - loss 0.19947340 - time (sec): 0.46 - samples/sec: 3819.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:54,859 epoch 22 - iter 40/105 - loss 0.19935648 - time (sec): 0.62 - samples/sec: 3886.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:54,988 epoch 22 - iter 50/105 - loss 0.20181282 - time (sec): 0.75 - samples/sec: 3961.08 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:55,122 epoch 22 - iter 60/105 - loss 0.20980325 - time (sec): 0.88 - samples/sec: 4090.00 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:55,252 epoch 22 - iter 70/105 - loss 0.20766586 - time (sec): 1.01 - samples/sec: 4117.60 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:55,377 epoch 22 - iter 80/105 - loss 0.20463919 - time (sec): 1.14 - samples/sec: 4147.86 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:55,503 epoch 22 - iter 90/105 - loss 0.20755944 - time (sec): 1.27 - samples/sec: 4188.49 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:55,637 epoch 22 - iter 100/105 - loss 0.20476708 - time (sec): 1.40 - samples/sec: 4243.65 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:55,703 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:55,703 EPOCH 22 done: loss 0.2058 - lr: 0.100000 +2023-05-15 21:28:56,376 DEV : loss 0.3886520266532898 - accuracy (micro avg) 0.9206 +2023-05-15 21:28:56,388 - 0 epochs without improvement +2023-05-15 21:28:56,388 saving best model +2023-05-15 21:28:57,893 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:58,068 epoch 23 - iter 10/105 - loss 0.19153291 - time (sec): 0.17 - samples/sec: 3812.49 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:58,231 epoch 23 - iter 20/105 - loss 0.16076549 - time (sec): 0.34 - samples/sec: 3733.08 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:58,391 epoch 23 - iter 30/105 - loss 0.17099648 - time (sec): 0.50 - samples/sec: 3819.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:58,545 epoch 23 - iter 40/105 - loss 0.17438607 - time (sec): 0.65 - samples/sec: 3778.95 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:58,700 epoch 23 - iter 50/105 - loss 0.17411032 - time (sec): 0.81 - samples/sec: 3782.28 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:58,828 epoch 23 - iter 60/105 - loss 0.17241215 - time (sec): 0.93 - samples/sec: 3890.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:58,960 epoch 23 - iter 70/105 - loss 0.18217880 - time (sec): 1.07 - samples/sec: 3984.89 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:59,088 epoch 23 - iter 80/105 - loss 0.18175217 - time (sec): 1.19 - samples/sec: 4030.69 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:59,212 epoch 23 - iter 90/105 - loss 0.18610610 - time (sec): 1.32 - samples/sec: 4069.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:59,344 epoch 23 - iter 100/105 - loss 0.18463815 - time (sec): 1.45 - samples/sec: 4099.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:28:59,410 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:28:59,410 EPOCH 23 done: loss 0.1822 - lr: 0.100000 +2023-05-15 21:29:00,084 DEV : loss 0.36885181069374084 - accuracy (micro avg) 0.9197 +2023-05-15 21:29:00,097 - 1 epochs without improvement +2023-05-15 21:29:00,097 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:00,254 epoch 24 - iter 10/105 - loss 0.16730327 - time (sec): 0.16 - samples/sec: 3740.77 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:00,427 epoch 24 - iter 20/105 - loss 0.16385379 - time (sec): 0.33 - samples/sec: 3664.66 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:00,584 epoch 24 - iter 30/105 - loss 0.16301537 - time (sec): 0.49 - samples/sec: 3636.25 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:00,747 epoch 24 - iter 40/105 - loss 0.17770530 - time (sec): 0.65 - samples/sec: 3696.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:00,874 epoch 24 - iter 50/105 - loss 0.16391138 - time (sec): 0.78 - samples/sec: 3792.58 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:01,005 epoch 24 - iter 60/105 - loss 0.17546450 - time (sec): 0.91 - samples/sec: 3931.20 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:01,128 epoch 24 - iter 70/105 - loss 0.17562979 - time (sec): 1.03 - samples/sec: 4042.74 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:01,255 epoch 24 - iter 80/105 - loss 0.17835859 - time (sec): 1.16 - samples/sec: 4076.50 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:01,383 epoch 24 - iter 90/105 - loss 0.17677116 - time (sec): 1.29 - samples/sec: 4133.64 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:01,513 epoch 24 - iter 100/105 - loss 0.17858467 - time (sec): 1.42 - samples/sec: 4189.34 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:01,579 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:01,579 EPOCH 24 done: loss 0.1767 - lr: 0.100000 +2023-05-15 21:29:02,389 DEV : loss 0.37519118189811707 - accuracy (micro avg) 0.9212 +2023-05-15 21:29:02,401 - 0 epochs without improvement +2023-05-15 21:29:02,401 saving best model +2023-05-15 21:29:03,937 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:04,110 epoch 25 - iter 10/105 - loss 0.11805303 - time (sec): 0.17 - samples/sec: 3739.77 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:04,278 epoch 25 - iter 20/105 - loss 0.14216948 - time (sec): 0.34 - samples/sec: 3772.14 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:04,439 epoch 25 - iter 30/105 - loss 0.15601505 - time (sec): 0.50 - samples/sec: 3784.47 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:04,602 epoch 25 - iter 40/105 - loss 0.17032954 - time (sec): 0.67 - samples/sec: 3779.74 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:04,765 epoch 25 - iter 50/105 - loss 0.17585301 - time (sec): 0.83 - samples/sec: 3753.25 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:04,916 epoch 25 - iter 60/105 - loss 0.18142867 - time (sec): 0.98 - samples/sec: 3716.42 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:05,071 epoch 25 - iter 70/105 - loss 0.18596428 - time (sec): 1.13 - samples/sec: 3687.15 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:05,233 epoch 25 - iter 80/105 - loss 0.19091255 - time (sec): 1.30 - samples/sec: 3662.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:05,391 epoch 25 - iter 90/105 - loss 0.19683841 - time (sec): 1.45 - samples/sec: 3663.45 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:05,556 epoch 25 - iter 100/105 - loss 0.19210034 - time (sec): 1.62 - samples/sec: 3665.75 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:05,630 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:05,630 EPOCH 25 done: loss 0.1916 - lr: 0.100000 +2023-05-15 21:29:06,302 DEV : loss 0.4038028419017792 - accuracy (micro avg) 0.919 +2023-05-15 21:29:06,314 - 1 epochs without improvement +2023-05-15 21:29:06,314 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:06,463 epoch 26 - iter 10/105 - loss 0.23133541 - time (sec): 0.15 - samples/sec: 3868.62 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:06,622 epoch 26 - iter 20/105 - loss 0.22254807 - time (sec): 0.31 - samples/sec: 3844.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:06,788 epoch 26 - iter 30/105 - loss 0.20389609 - time (sec): 0.47 - samples/sec: 3715.95 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:06,949 epoch 26 - iter 40/105 - loss 0.19783433 - time (sec): 0.63 - samples/sec: 3742.26 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,112 epoch 26 - iter 50/105 - loss 0.19137730 - time (sec): 0.80 - samples/sec: 3719.80 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,266 epoch 26 - iter 60/105 - loss 0.18776910 - time (sec): 0.95 - samples/sec: 3715.59 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,425 epoch 26 - iter 70/105 - loss 0.18005685 - time (sec): 1.11 - samples/sec: 3739.70 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,584 epoch 26 - iter 80/105 - loss 0.17298422 - time (sec): 1.27 - samples/sec: 3768.56 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,708 epoch 26 - iter 90/105 - loss 0.18011843 - time (sec): 1.39 - samples/sec: 3809.45 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,840 epoch 26 - iter 100/105 - loss 0.18028208 - time (sec): 1.53 - samples/sec: 3873.21 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:07,909 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:07,909 EPOCH 26 done: loss 0.1815 - lr: 0.100000 +2023-05-15 21:29:08,581 DEV : loss 0.3833042085170746 - accuracy (micro avg) 0.9165 +2023-05-15 21:29:08,593 - 2 epochs without improvement +2023-05-15 21:29:08,593 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:08,749 epoch 27 - iter 10/105 - loss 0.14937348 - time (sec): 0.16 - samples/sec: 3789.39 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:08,918 epoch 27 - iter 20/105 - loss 0.16552775 - time (sec): 0.32 - samples/sec: 3704.49 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,091 epoch 27 - iter 30/105 - loss 0.16062877 - time (sec): 0.50 - samples/sec: 3704.97 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,252 epoch 27 - iter 40/105 - loss 0.17210393 - time (sec): 0.66 - samples/sec: 3731.70 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,382 epoch 27 - iter 50/105 - loss 0.17608282 - time (sec): 0.79 - samples/sec: 3950.03 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,499 epoch 27 - iter 60/105 - loss 0.17527874 - time (sec): 0.91 - samples/sec: 3986.92 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,624 epoch 27 - iter 70/105 - loss 0.16902907 - time (sec): 1.03 - samples/sec: 4045.54 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,746 epoch 27 - iter 80/105 - loss 0.17736055 - time (sec): 1.15 - samples/sec: 4100.51 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:09,874 epoch 27 - iter 90/105 - loss 0.17949764 - time (sec): 1.28 - samples/sec: 4159.39 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:10,005 epoch 27 - iter 100/105 - loss 0.17488097 - time (sec): 1.41 - samples/sec: 4200.05 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:10,070 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:10,070 EPOCH 27 done: loss 0.1752 - lr: 0.100000 +2023-05-15 21:29:10,742 DEV : loss 0.40154093503952026 - accuracy (micro avg) 0.9191 +2023-05-15 21:29:10,755 - 3 epochs without improvement +2023-05-15 21:29:10,755 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:10,916 epoch 28 - iter 10/105 - loss 0.14716557 - time (sec): 0.16 - samples/sec: 4006.33 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:11,074 epoch 28 - iter 20/105 - loss 0.14562331 - time (sec): 0.32 - samples/sec: 3926.32 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:11,231 epoch 28 - iter 30/105 - loss 0.13775937 - time (sec): 0.48 - samples/sec: 3861.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:11,388 epoch 28 - iter 40/105 - loss 0.14650236 - time (sec): 0.63 - samples/sec: 3868.72 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:11,552 epoch 28 - iter 50/105 - loss 0.15787049 - time (sec): 0.80 - samples/sec: 3829.39 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:11,706 epoch 28 - iter 60/105 - loss 0.16049621 - time (sec): 0.95 - samples/sec: 3837.66 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:11,861 epoch 28 - iter 70/105 - loss 0.16097042 - time (sec): 1.11 - samples/sec: 3780.49 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:12,020 epoch 28 - iter 80/105 - loss 0.15843466 - time (sec): 1.26 - samples/sec: 3762.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:12,174 epoch 28 - iter 90/105 - loss 0.16246614 - time (sec): 1.42 - samples/sec: 3759.14 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:12,330 epoch 28 - iter 100/105 - loss 0.16244747 - time (sec): 1.57 - samples/sec: 3744.50 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:12,421 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:12,421 EPOCH 28 done: loss 0.1658 - lr: 0.100000 +2023-05-15 21:29:13,226 DEV : loss 0.3700896203517914 - accuracy (micro avg) 0.9233 +2023-05-15 21:29:13,238 - 0 epochs without improvement +2023-05-15 21:29:13,238 saving best model +2023-05-15 21:29:14,742 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:14,915 epoch 29 - iter 10/105 - loss 0.19017553 - time (sec): 0.17 - samples/sec: 3425.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,078 epoch 29 - iter 20/105 - loss 0.17749714 - time (sec): 0.34 - samples/sec: 3402.37 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,229 epoch 29 - iter 30/105 - loss 0.16616767 - time (sec): 0.49 - samples/sec: 3573.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,365 epoch 29 - iter 40/105 - loss 0.16236268 - time (sec): 0.62 - samples/sec: 3704.61 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,485 epoch 29 - iter 50/105 - loss 0.16865431 - time (sec): 0.74 - samples/sec: 3801.47 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,618 epoch 29 - iter 60/105 - loss 0.17369814 - time (sec): 0.88 - samples/sec: 3945.68 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,747 epoch 29 - iter 70/105 - loss 0.17680836 - time (sec): 1.00 - samples/sec: 4061.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:15,878 epoch 29 - iter 80/105 - loss 0.17509046 - time (sec): 1.14 - samples/sec: 4114.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:16,007 epoch 29 - iter 90/105 - loss 0.17525190 - time (sec): 1.26 - samples/sec: 4165.68 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:16,140 epoch 29 - iter 100/105 - loss 0.17246774 - time (sec): 1.40 - samples/sec: 4236.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:16,207 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:16,207 EPOCH 29 done: loss 0.1727 - lr: 0.100000 +2023-05-15 21:29:16,879 DEV : loss 0.4101061522960663 - accuracy (micro avg) 0.9205 +2023-05-15 21:29:16,891 - 1 epochs without improvement +2023-05-15 21:29:16,891 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:17,040 epoch 30 - iter 10/105 - loss 0.12426246 - time (sec): 0.15 - samples/sec: 3641.65 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:17,194 epoch 30 - iter 20/105 - loss 0.17071040 - time (sec): 0.30 - samples/sec: 3819.79 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:17,358 epoch 30 - iter 30/105 - loss 0.16425334 - time (sec): 0.47 - samples/sec: 3694.92 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:17,520 epoch 30 - iter 40/105 - loss 0.16927551 - time (sec): 0.63 - samples/sec: 3729.25 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:17,650 epoch 30 - iter 50/105 - loss 0.16612028 - time (sec): 0.76 - samples/sec: 3912.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:17,782 epoch 30 - iter 60/105 - loss 0.16233095 - time (sec): 0.89 - samples/sec: 3977.99 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:17,907 epoch 30 - iter 70/105 - loss 0.16040288 - time (sec): 1.02 - samples/sec: 4047.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:18,035 epoch 30 - iter 80/105 - loss 0.16245743 - time (sec): 1.14 - samples/sec: 4098.75 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:18,159 epoch 30 - iter 90/105 - loss 0.16439274 - time (sec): 1.27 - samples/sec: 4129.95 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:18,300 epoch 30 - iter 100/105 - loss 0.16738507 - time (sec): 1.41 - samples/sec: 4203.40 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:18,367 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:18,367 EPOCH 30 done: loss 0.1668 - lr: 0.100000 +2023-05-15 21:29:19,046 DEV : loss 0.40395256876945496 - accuracy (micro avg) 0.9234 +2023-05-15 21:29:19,058 - 0 epochs without improvement +2023-05-15 21:29:19,059 saving best model +2023-05-15 21:29:20,549 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:20,721 epoch 31 - iter 10/105 - loss 0.17948015 - time (sec): 0.17 - samples/sec: 3359.44 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:20,888 epoch 31 - iter 20/105 - loss 0.16896526 - time (sec): 0.34 - samples/sec: 3340.57 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,043 epoch 31 - iter 30/105 - loss 0.15090088 - time (sec): 0.49 - samples/sec: 3470.71 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,195 epoch 31 - iter 40/105 - loss 0.14980059 - time (sec): 0.65 - samples/sec: 3531.56 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,349 epoch 31 - iter 50/105 - loss 0.15409275 - time (sec): 0.80 - samples/sec: 3568.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,510 epoch 31 - iter 60/105 - loss 0.16090804 - time (sec): 0.96 - samples/sec: 3606.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,674 epoch 31 - iter 70/105 - loss 0.17302166 - time (sec): 1.12 - samples/sec: 3636.50 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,827 epoch 31 - iter 80/105 - loss 0.17423631 - time (sec): 1.28 - samples/sec: 3660.33 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:21,986 epoch 31 - iter 90/105 - loss 0.16943651 - time (sec): 1.44 - samples/sec: 3668.25 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:22,152 epoch 31 - iter 100/105 - loss 0.17237235 - time (sec): 1.60 - samples/sec: 3678.88 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:22,241 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:22,241 EPOCH 31 done: loss 0.1689 - lr: 0.100000 +2023-05-15 21:29:23,046 DEV : loss 0.4038907289505005 - accuracy (micro avg) 0.9215 +2023-05-15 21:29:23,058 - 1 epochs without improvement +2023-05-15 21:29:23,058 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:23,221 epoch 32 - iter 10/105 - loss 0.13153856 - time (sec): 0.16 - samples/sec: 3930.46 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:23,386 epoch 32 - iter 20/105 - loss 0.12559417 - time (sec): 0.33 - samples/sec: 3898.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:23,540 epoch 32 - iter 30/105 - loss 0.13186378 - time (sec): 0.48 - samples/sec: 3882.91 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:23,706 epoch 32 - iter 40/105 - loss 0.14738308 - time (sec): 0.65 - samples/sec: 3803.48 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:23,860 epoch 32 - iter 50/105 - loss 0.14542930 - time (sec): 0.80 - samples/sec: 3822.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:24,013 epoch 32 - iter 60/105 - loss 0.14034148 - time (sec): 0.95 - samples/sec: 3772.52 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:24,162 epoch 32 - iter 70/105 - loss 0.14608369 - time (sec): 1.10 - samples/sec: 3736.89 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:24,322 epoch 32 - iter 80/105 - loss 0.14451093 - time (sec): 1.26 - samples/sec: 3765.69 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:24,452 epoch 32 - iter 90/105 - loss 0.14694589 - time (sec): 1.39 - samples/sec: 3835.94 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:24,578 epoch 32 - iter 100/105 - loss 0.14257599 - time (sec): 1.52 - samples/sec: 3899.13 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:24,645 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:24,645 EPOCH 32 done: loss 0.1465 - lr: 0.100000 +2023-05-15 21:29:25,317 DEV : loss 0.4025160074234009 - accuracy (micro avg) 0.9226 +2023-05-15 21:29:25,329 - 2 epochs without improvement +2023-05-15 21:29:25,330 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:25,489 epoch 33 - iter 10/105 - loss 0.11692645 - time (sec): 0.16 - samples/sec: 3837.74 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:25,661 epoch 33 - iter 20/105 - loss 0.11261481 - time (sec): 0.33 - samples/sec: 3799.27 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:25,818 epoch 33 - iter 30/105 - loss 0.12293677 - time (sec): 0.49 - samples/sec: 3747.77 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:25,963 epoch 33 - iter 40/105 - loss 0.12200511 - time (sec): 0.63 - samples/sec: 3718.00 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,124 epoch 33 - iter 50/105 - loss 0.13014301 - time (sec): 0.79 - samples/sec: 3746.61 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,275 epoch 33 - iter 60/105 - loss 0.13305045 - time (sec): 0.95 - samples/sec: 3739.00 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,432 epoch 33 - iter 70/105 - loss 0.14157664 - time (sec): 1.10 - samples/sec: 3697.61 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,561 epoch 33 - iter 80/105 - loss 0.14296348 - time (sec): 1.23 - samples/sec: 3733.29 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,693 epoch 33 - iter 90/105 - loss 0.14671581 - time (sec): 1.36 - samples/sec: 3850.84 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,823 epoch 33 - iter 100/105 - loss 0.14655293 - time (sec): 1.49 - samples/sec: 3942.58 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:26,890 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:26,890 EPOCH 33 done: loss 0.1479 - lr: 0.100000 +2023-05-15 21:29:27,561 DEV : loss 0.3954732418060303 - accuracy (micro avg) 0.9216 +2023-05-15 21:29:27,574 - 3 epochs without improvement +2023-05-15 21:29:27,574 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:27,737 epoch 34 - iter 10/105 - loss 0.18187757 - time (sec): 0.16 - samples/sec: 3964.56 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:27,885 epoch 34 - iter 20/105 - loss 0.15943059 - time (sec): 0.31 - samples/sec: 3973.64 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,012 epoch 34 - iter 30/105 - loss 0.18407409 - time (sec): 0.44 - samples/sec: 4058.33 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,142 epoch 34 - iter 40/105 - loss 0.17524787 - time (sec): 0.57 - samples/sec: 4222.45 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,274 epoch 34 - iter 50/105 - loss 0.17493872 - time (sec): 0.70 - samples/sec: 4284.73 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,402 epoch 34 - iter 60/105 - loss 0.16409113 - time (sec): 0.83 - samples/sec: 4326.04 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,533 epoch 34 - iter 70/105 - loss 0.16252810 - time (sec): 0.96 - samples/sec: 4330.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,657 epoch 34 - iter 80/105 - loss 0.16419135 - time (sec): 1.08 - samples/sec: 4339.85 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,784 epoch 34 - iter 90/105 - loss 0.15812127 - time (sec): 1.21 - samples/sec: 4404.06 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,909 epoch 34 - iter 100/105 - loss 0.15864944 - time (sec): 1.33 - samples/sec: 4432.72 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:28,976 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:28,976 EPOCH 34 done: loss 0.1557 - lr: 0.100000 +2023-05-15 21:29:29,783 DEV : loss 0.40327951312065125 - accuracy (micro avg) 0.9237 +2023-05-15 21:29:29,795 - 0 epochs without improvement +2023-05-15 21:29:29,795 saving best model +2023-05-15 21:29:31,337 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:31,498 epoch 35 - iter 10/105 - loss 0.11365990 - time (sec): 0.16 - samples/sec: 3319.82 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:31,666 epoch 35 - iter 20/105 - loss 0.15195260 - time (sec): 0.33 - samples/sec: 3488.78 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:31,838 epoch 35 - iter 30/105 - loss 0.15559549 - time (sec): 0.50 - samples/sec: 3580.59 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:31,993 epoch 35 - iter 40/105 - loss 0.14416862 - time (sec): 0.66 - samples/sec: 3531.45 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:32,155 epoch 35 - iter 50/105 - loss 0.15059920 - time (sec): 0.82 - samples/sec: 3596.12 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:32,320 epoch 35 - iter 60/105 - loss 0.15134831 - time (sec): 0.98 - samples/sec: 3625.10 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:32,479 epoch 35 - iter 70/105 - loss 0.16104896 - time (sec): 1.14 - samples/sec: 3645.90 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:32,641 epoch 35 - iter 80/105 - loss 0.16009531 - time (sec): 1.30 - samples/sec: 3666.99 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:32,792 epoch 35 - iter 90/105 - loss 0.15922036 - time (sec): 1.45 - samples/sec: 3666.51 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:32,948 epoch 35 - iter 100/105 - loss 0.16487008 - time (sec): 1.61 - samples/sec: 3675.78 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:33,017 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:33,017 EPOCH 35 done: loss 0.1689 - lr: 0.100000 +2023-05-15 21:29:33,692 DEV : loss 0.39580753445625305 - accuracy (micro avg) 0.9242 +2023-05-15 21:29:33,704 - 0 epochs without improvement +2023-05-15 21:29:33,704 saving best model +2023-05-15 21:29:35,238 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:35,407 epoch 36 - iter 10/105 - loss 0.12681475 - time (sec): 0.17 - samples/sec: 3601.05 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:35,573 epoch 36 - iter 20/105 - loss 0.13123437 - time (sec): 0.34 - samples/sec: 3565.71 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:35,738 epoch 36 - iter 30/105 - loss 0.12768796 - time (sec): 0.50 - samples/sec: 3642.53 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:35,899 epoch 36 - iter 40/105 - loss 0.12212410 - time (sec): 0.66 - samples/sec: 3679.92 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,048 epoch 36 - iter 50/105 - loss 0.12578326 - time (sec): 0.81 - samples/sec: 3698.25 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,209 epoch 36 - iter 60/105 - loss 0.12588040 - time (sec): 0.97 - samples/sec: 3697.77 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,364 epoch 36 - iter 70/105 - loss 0.13398106 - time (sec): 1.13 - samples/sec: 3700.07 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,525 epoch 36 - iter 80/105 - loss 0.13156100 - time (sec): 1.29 - samples/sec: 3718.51 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,671 epoch 36 - iter 90/105 - loss 0.13473477 - time (sec): 1.43 - samples/sec: 3739.14 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,800 epoch 36 - iter 100/105 - loss 0.13714964 - time (sec): 1.56 - samples/sec: 3799.85 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:36,866 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:36,866 EPOCH 36 done: loss 0.1372 - lr: 0.100000 +2023-05-15 21:29:37,544 DEV : loss 0.4013025164604187 - accuracy (micro avg) 0.9229 +2023-05-15 21:29:37,556 - 1 epochs without improvement +2023-05-15 21:29:37,556 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:37,717 epoch 37 - iter 10/105 - loss 0.17773883 - time (sec): 0.16 - samples/sec: 3833.71 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:37,876 epoch 37 - iter 20/105 - loss 0.13500703 - time (sec): 0.32 - samples/sec: 3767.94 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,032 epoch 37 - iter 30/105 - loss 0.13968275 - time (sec): 0.48 - samples/sec: 3886.64 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,193 epoch 37 - iter 40/105 - loss 0.13029297 - time (sec): 0.64 - samples/sec: 3799.45 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,344 epoch 37 - iter 50/105 - loss 0.13293094 - time (sec): 0.79 - samples/sec: 3762.98 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,502 epoch 37 - iter 60/105 - loss 0.13691739 - time (sec): 0.95 - samples/sec: 3754.09 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,646 epoch 37 - iter 70/105 - loss 0.13779774 - time (sec): 1.09 - samples/sec: 3831.76 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,779 epoch 37 - iter 80/105 - loss 0.13771613 - time (sec): 1.22 - samples/sec: 3888.86 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:38,908 epoch 37 - iter 90/105 - loss 0.13710010 - time (sec): 1.35 - samples/sec: 3953.89 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:39,036 epoch 37 - iter 100/105 - loss 0.14205918 - time (sec): 1.48 - samples/sec: 3993.58 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:39,103 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:39,103 EPOCH 37 done: loss 0.1438 - lr: 0.100000 +2023-05-15 21:29:39,810 DEV : loss 0.415070116519928 - accuracy (micro avg) 0.9219 +2023-05-15 21:29:39,822 - 2 epochs without improvement +2023-05-15 21:29:39,822 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:39,994 epoch 38 - iter 10/105 - loss 0.20516581 - time (sec): 0.17 - samples/sec: 3954.67 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:40,149 epoch 38 - iter 20/105 - loss 0.18937984 - time (sec): 0.33 - samples/sec: 3794.82 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:40,309 epoch 38 - iter 30/105 - loss 0.15848597 - time (sec): 0.49 - samples/sec: 3844.54 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:40,456 epoch 38 - iter 40/105 - loss 0.16197244 - time (sec): 0.63 - samples/sec: 3741.04 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:40,622 epoch 38 - iter 50/105 - loss 0.16457943 - time (sec): 0.80 - samples/sec: 3794.73 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:40,781 epoch 38 - iter 60/105 - loss 0.16114683 - time (sec): 0.96 - samples/sec: 3756.04 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:40,938 epoch 38 - iter 70/105 - loss 0.16400070 - time (sec): 1.12 - samples/sec: 3776.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:41,085 epoch 38 - iter 80/105 - loss 0.16291289 - time (sec): 1.26 - samples/sec: 3786.55 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:41,217 epoch 38 - iter 90/105 - loss 0.15458326 - time (sec): 1.40 - samples/sec: 3846.21 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:41,342 epoch 38 - iter 100/105 - loss 0.15470577 - time (sec): 1.52 - samples/sec: 3906.01 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:41,404 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:41,404 EPOCH 38 done: loss 0.1544 - lr: 0.100000 +2023-05-15 21:29:42,219 DEV : loss 0.41072356700897217 - accuracy (micro avg) 0.9237 +2023-05-15 21:29:42,231 - 3 epochs without improvement +2023-05-15 21:29:42,231 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:42,389 epoch 39 - iter 10/105 - loss 0.12626244 - time (sec): 0.16 - samples/sec: 3574.30 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:42,519 epoch 39 - iter 20/105 - loss 0.10437065 - time (sec): 0.29 - samples/sec: 3972.66 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:42,644 epoch 39 - iter 30/105 - loss 0.11959350 - time (sec): 0.41 - samples/sec: 4001.79 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:42,780 epoch 39 - iter 40/105 - loss 0.11522957 - time (sec): 0.55 - samples/sec: 4191.23 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:42,913 epoch 39 - iter 50/105 - loss 0.11583460 - time (sec): 0.68 - samples/sec: 4244.33 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:43,039 epoch 39 - iter 60/105 - loss 0.12341357 - time (sec): 0.81 - samples/sec: 4253.17 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:43,166 epoch 39 - iter 70/105 - loss 0.12221358 - time (sec): 0.93 - samples/sec: 4329.07 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:43,297 epoch 39 - iter 80/105 - loss 0.12490269 - time (sec): 1.07 - samples/sec: 4363.97 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:43,434 epoch 39 - iter 90/105 - loss 0.12776413 - time (sec): 1.20 - samples/sec: 4432.09 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:43,562 epoch 39 - iter 100/105 - loss 0.12640114 - time (sec): 1.33 - samples/sec: 4439.21 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:43,632 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:43,632 EPOCH 39 done: loss 0.1244 - lr: 0.100000 +2023-05-15 21:29:44,318 DEV : loss 0.41140687465667725 - accuracy (micro avg) 0.9249 +2023-05-15 21:29:44,330 - 0 epochs without improvement +2023-05-15 21:29:44,330 saving best model +2023-05-15 21:29:45,827 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:45,986 epoch 40 - iter 10/105 - loss 0.14795394 - time (sec): 0.16 - samples/sec: 3761.38 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,147 epoch 40 - iter 20/105 - loss 0.11385624 - time (sec): 0.32 - samples/sec: 3628.90 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,280 epoch 40 - iter 30/105 - loss 0.10842964 - time (sec): 0.45 - samples/sec: 3770.91 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,419 epoch 40 - iter 40/105 - loss 0.11264527 - time (sec): 0.59 - samples/sec: 4036.28 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,543 epoch 40 - iter 50/105 - loss 0.11175295 - time (sec): 0.72 - samples/sec: 4150.69 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,666 epoch 40 - iter 60/105 - loss 0.11581112 - time (sec): 0.84 - samples/sec: 4231.95 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,796 epoch 40 - iter 70/105 - loss 0.11738182 - time (sec): 0.97 - samples/sec: 4282.03 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:46,925 epoch 40 - iter 80/105 - loss 0.12190069 - time (sec): 1.10 - samples/sec: 4315.15 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:47,050 epoch 40 - iter 90/105 - loss 0.12535250 - time (sec): 1.22 - samples/sec: 4350.73 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:47,180 epoch 40 - iter 100/105 - loss 0.12320186 - time (sec): 1.35 - samples/sec: 4363.43 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:47,251 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:47,251 EPOCH 40 done: loss 0.1229 - lr: 0.100000 +2023-05-15 21:29:47,931 DEV : loss 0.4322431683540344 - accuracy (micro avg) 0.9233 +2023-05-15 21:29:47,945 - 1 epochs without improvement +2023-05-15 21:29:47,945 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:48,108 epoch 41 - iter 10/105 - loss 0.13569982 - time (sec): 0.16 - samples/sec: 3739.71 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:48,255 epoch 41 - iter 20/105 - loss 0.12269332 - time (sec): 0.31 - samples/sec: 3765.33 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:48,414 epoch 41 - iter 30/105 - loss 0.12385531 - time (sec): 0.47 - samples/sec: 3683.30 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:48,576 epoch 41 - iter 40/105 - loss 0.12539752 - time (sec): 0.63 - samples/sec: 3765.40 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:48,742 epoch 41 - iter 50/105 - loss 0.13098185 - time (sec): 0.80 - samples/sec: 3827.90 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:48,907 epoch 41 - iter 60/105 - loss 0.13122920 - time (sec): 0.96 - samples/sec: 3754.83 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:49,070 epoch 41 - iter 70/105 - loss 0.13069313 - time (sec): 1.12 - samples/sec: 3760.26 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:49,202 epoch 41 - iter 80/105 - loss 0.12619702 - time (sec): 1.26 - samples/sec: 3872.70 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:49,332 epoch 41 - iter 90/105 - loss 0.12526959 - time (sec): 1.39 - samples/sec: 3931.93 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:49,457 epoch 41 - iter 100/105 - loss 0.12524086 - time (sec): 1.51 - samples/sec: 3954.19 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:49,519 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:49,519 EPOCH 41 done: loss 0.1237 - lr: 0.100000 +2023-05-15 21:29:50,333 DEV : loss 0.42178237438201904 - accuracy (micro avg) 0.9234 +2023-05-15 21:29:50,345 - 2 epochs without improvement +2023-05-15 21:29:50,345 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:50,508 epoch 42 - iter 10/105 - loss 0.13757911 - time (sec): 0.16 - samples/sec: 3642.86 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:50,675 epoch 42 - iter 20/105 - loss 0.12492803 - time (sec): 0.33 - samples/sec: 3727.46 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:50,835 epoch 42 - iter 30/105 - loss 0.12348396 - time (sec): 0.49 - samples/sec: 3722.72 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:50,989 epoch 42 - iter 40/105 - loss 0.12797827 - time (sec): 0.64 - samples/sec: 3679.45 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:51,152 epoch 42 - iter 50/105 - loss 0.12548280 - time (sec): 0.81 - samples/sec: 3696.99 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:51,309 epoch 42 - iter 60/105 - loss 0.11989623 - time (sec): 0.96 - samples/sec: 3749.20 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:51,473 epoch 42 - iter 70/105 - loss 0.12276712 - time (sec): 1.13 - samples/sec: 3762.06 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:51,627 epoch 42 - iter 80/105 - loss 0.12305123 - time (sec): 1.28 - samples/sec: 3763.51 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:51,780 epoch 42 - iter 90/105 - loss 0.11996811 - time (sec): 1.43 - samples/sec: 3766.57 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:51,934 epoch 42 - iter 100/105 - loss 0.12027167 - time (sec): 1.59 - samples/sec: 3740.47 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:52,014 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:52,014 EPOCH 42 done: loss 0.1211 - lr: 0.100000 +2023-05-15 21:29:52,690 DEV : loss 0.42137962579727173 - accuracy (micro avg) 0.9237 +2023-05-15 21:29:52,704 - 3 epochs without improvement +2023-05-15 21:29:52,704 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:52,871 epoch 43 - iter 10/105 - loss 0.10634964 - time (sec): 0.17 - samples/sec: 3955.16 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,030 epoch 43 - iter 20/105 - loss 0.13513222 - time (sec): 0.33 - samples/sec: 3909.74 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,182 epoch 43 - iter 30/105 - loss 0.13494545 - time (sec): 0.48 - samples/sec: 3942.48 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,341 epoch 43 - iter 40/105 - loss 0.12992074 - time (sec): 0.64 - samples/sec: 3855.31 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,492 epoch 43 - iter 50/105 - loss 0.13240206 - time (sec): 0.79 - samples/sec: 3849.44 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,658 epoch 43 - iter 60/105 - loss 0.13833264 - time (sec): 0.95 - samples/sec: 3793.30 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,816 epoch 43 - iter 70/105 - loss 0.13098679 - time (sec): 1.11 - samples/sec: 3794.22 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:53,977 epoch 43 - iter 80/105 - loss 0.12497234 - time (sec): 1.27 - samples/sec: 3781.76 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:54,136 epoch 43 - iter 90/105 - loss 0.12201816 - time (sec): 1.43 - samples/sec: 3748.20 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:54,300 epoch 43 - iter 100/105 - loss 0.12714844 - time (sec): 1.60 - samples/sec: 3725.40 - lr: 0.100000 - momentum: 0.000000 +2023-05-15 21:29:54,377 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:54,377 EPOCH 43 done: loss 0.1301 - lr: 0.100000 +2023-05-15 21:29:55,050 DEV : loss 0.4403139650821686 - accuracy (micro avg) 0.9215 +2023-05-15 21:29:55,061 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.05] +2023-05-15 21:29:55,062 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:55,215 epoch 44 - iter 10/105 - loss 0.11070459 - time (sec): 0.15 - samples/sec: 3645.21 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:55,376 epoch 44 - iter 20/105 - loss 0.12333639 - time (sec): 0.31 - samples/sec: 3957.75 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:55,540 epoch 44 - iter 30/105 - loss 0.12356758 - time (sec): 0.48 - samples/sec: 3879.69 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:55,688 epoch 44 - iter 40/105 - loss 0.12342539 - time (sec): 0.63 - samples/sec: 3764.11 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:55,844 epoch 44 - iter 50/105 - loss 0.12214429 - time (sec): 0.78 - samples/sec: 3710.79 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:56,007 epoch 44 - iter 60/105 - loss 0.12046220 - time (sec): 0.95 - samples/sec: 3746.26 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:56,169 epoch 44 - iter 70/105 - loss 0.12088169 - time (sec): 1.11 - samples/sec: 3733.92 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:56,304 epoch 44 - iter 80/105 - loss 0.11876220 - time (sec): 1.24 - samples/sec: 3789.32 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:56,435 epoch 44 - iter 90/105 - loss 0.12117199 - time (sec): 1.37 - samples/sec: 3869.54 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:56,560 epoch 44 - iter 100/105 - loss 0.11797416 - time (sec): 1.50 - samples/sec: 3946.47 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:56,632 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:56,632 EPOCH 44 done: loss 0.1203 - lr: 0.050000 +2023-05-15 21:29:57,437 DEV : loss 0.4078696370124817 - accuracy (micro avg) 0.9234 +2023-05-15 21:29:57,450 - 1 epochs without improvement +2023-05-15 21:29:57,450 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:57,598 epoch 45 - iter 10/105 - loss 0.12240043 - time (sec): 0.15 - samples/sec: 3917.08 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:57,760 epoch 45 - iter 20/105 - loss 0.12615430 - time (sec): 0.31 - samples/sec: 3888.29 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:57,924 epoch 45 - iter 30/105 - loss 0.11850485 - time (sec): 0.47 - samples/sec: 3763.81 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,082 epoch 45 - iter 40/105 - loss 0.11245046 - time (sec): 0.63 - samples/sec: 3766.75 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,241 epoch 45 - iter 50/105 - loss 0.11174340 - time (sec): 0.79 - samples/sec: 3729.85 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,403 epoch 45 - iter 60/105 - loss 0.11224976 - time (sec): 0.95 - samples/sec: 3692.49 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,563 epoch 45 - iter 70/105 - loss 0.11477408 - time (sec): 1.11 - samples/sec: 3693.73 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,732 epoch 45 - iter 80/105 - loss 0.11335217 - time (sec): 1.28 - samples/sec: 3710.14 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,870 epoch 45 - iter 90/105 - loss 0.11603745 - time (sec): 1.42 - samples/sec: 3801.20 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:58,997 epoch 45 - iter 100/105 - loss 0.11783449 - time (sec): 1.55 - samples/sec: 3827.74 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:29:59,067 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:29:59,067 EPOCH 45 done: loss 0.1205 - lr: 0.050000 +2023-05-15 21:29:59,738 DEV : loss 0.42651140689849854 - accuracy (micro avg) 0.9262 +2023-05-15 21:29:59,750 - 0 epochs without improvement +2023-05-15 21:29:59,750 saving best model +2023-05-15 21:30:01,303 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:01,462 epoch 46 - iter 10/105 - loss 0.07757579 - time (sec): 0.16 - samples/sec: 3575.33 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:01,635 epoch 46 - iter 20/105 - loss 0.10217992 - time (sec): 0.33 - samples/sec: 3588.55 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:01,786 epoch 46 - iter 30/105 - loss 0.08863273 - time (sec): 0.48 - samples/sec: 3626.34 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:01,938 epoch 46 - iter 40/105 - loss 0.09988256 - time (sec): 0.64 - samples/sec: 3916.20 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,068 epoch 46 - iter 50/105 - loss 0.10114883 - time (sec): 0.77 - samples/sec: 3999.98 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,194 epoch 46 - iter 60/105 - loss 0.09889308 - time (sec): 0.89 - samples/sec: 4105.93 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,314 epoch 46 - iter 70/105 - loss 0.10302309 - time (sec): 1.01 - samples/sec: 4154.63 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,447 epoch 46 - iter 80/105 - loss 0.10177534 - time (sec): 1.14 - samples/sec: 4239.99 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,574 epoch 46 - iter 90/105 - loss 0.10155713 - time (sec): 1.27 - samples/sec: 4239.72 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,706 epoch 46 - iter 100/105 - loss 0.10475806 - time (sec): 1.40 - samples/sec: 4257.72 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:02,771 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:02,771 EPOCH 46 done: loss 0.1047 - lr: 0.050000 +2023-05-15 21:30:03,442 DEV : loss 0.42641904950141907 - accuracy (micro avg) 0.9244 +2023-05-15 21:30:03,455 - 1 epochs without improvement +2023-05-15 21:30:03,455 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:03,604 epoch 47 - iter 10/105 - loss 0.12040184 - time (sec): 0.15 - samples/sec: 3787.05 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:03,767 epoch 47 - iter 20/105 - loss 0.10311603 - time (sec): 0.31 - samples/sec: 3748.26 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:03,924 epoch 47 - iter 30/105 - loss 0.09732495 - time (sec): 0.47 - samples/sec: 3790.45 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,059 epoch 47 - iter 40/105 - loss 0.10650195 - time (sec): 0.60 - samples/sec: 3867.01 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,187 epoch 47 - iter 50/105 - loss 0.10210912 - time (sec): 0.73 - samples/sec: 3995.66 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,316 epoch 47 - iter 60/105 - loss 0.10302949 - time (sec): 0.86 - samples/sec: 4087.62 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,443 epoch 47 - iter 70/105 - loss 0.09700798 - time (sec): 0.99 - samples/sec: 4161.11 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,568 epoch 47 - iter 80/105 - loss 0.09692983 - time (sec): 1.11 - samples/sec: 4210.37 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,691 epoch 47 - iter 90/105 - loss 0.09707526 - time (sec): 1.24 - samples/sec: 4230.86 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,826 epoch 47 - iter 100/105 - loss 0.09855310 - time (sec): 1.37 - samples/sec: 4304.02 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:04,895 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:04,895 EPOCH 47 done: loss 0.0997 - lr: 0.050000 +2023-05-15 21:30:05,699 DEV : loss 0.4337489902973175 - accuracy (micro avg) 0.9248 +2023-05-15 21:30:05,711 - 2 epochs without improvement +2023-05-15 21:30:05,711 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:05,868 epoch 48 - iter 10/105 - loss 0.09515060 - time (sec): 0.16 - samples/sec: 3527.84 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,012 epoch 48 - iter 20/105 - loss 0.09041357 - time (sec): 0.30 - samples/sec: 3751.47 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,172 epoch 48 - iter 30/105 - loss 0.08203553 - time (sec): 0.46 - samples/sec: 3713.72 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,342 epoch 48 - iter 40/105 - loss 0.08376034 - time (sec): 0.63 - samples/sec: 3837.15 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,504 epoch 48 - iter 50/105 - loss 0.08316523 - time (sec): 0.79 - samples/sec: 3744.48 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,639 epoch 48 - iter 60/105 - loss 0.08656137 - time (sec): 0.93 - samples/sec: 3854.43 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,771 epoch 48 - iter 70/105 - loss 0.09194897 - time (sec): 1.06 - samples/sec: 3914.28 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:06,899 epoch 48 - iter 80/105 - loss 0.09788336 - time (sec): 1.19 - samples/sec: 3988.64 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:07,031 epoch 48 - iter 90/105 - loss 0.10283371 - time (sec): 1.32 - samples/sec: 4077.49 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:07,151 epoch 48 - iter 100/105 - loss 0.10118860 - time (sec): 1.44 - samples/sec: 4109.14 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:07,217 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:07,217 EPOCH 48 done: loss 0.1001 - lr: 0.050000 +2023-05-15 21:30:07,889 DEV : loss 0.43896815180778503 - accuracy (micro avg) 0.9234 +2023-05-15 21:30:07,901 - 3 epochs without improvement +2023-05-15 21:30:07,901 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:08,064 epoch 49 - iter 10/105 - loss 0.10410754 - time (sec): 0.16 - samples/sec: 3487.74 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:08,225 epoch 49 - iter 20/105 - loss 0.08994063 - time (sec): 0.32 - samples/sec: 3541.51 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:08,386 epoch 49 - iter 30/105 - loss 0.09238845 - time (sec): 0.49 - samples/sec: 3686.53 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:08,541 epoch 49 - iter 40/105 - loss 0.09027927 - time (sec): 0.64 - samples/sec: 3669.78 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:08,707 epoch 49 - iter 50/105 - loss 0.09236148 - time (sec): 0.81 - samples/sec: 3593.40 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:08,872 epoch 49 - iter 60/105 - loss 0.09347906 - time (sec): 0.97 - samples/sec: 3589.49 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:09,042 epoch 49 - iter 70/105 - loss 0.09866856 - time (sec): 1.14 - samples/sec: 3585.54 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:09,198 epoch 49 - iter 80/105 - loss 0.10140349 - time (sec): 1.30 - samples/sec: 3582.01 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:09,334 epoch 49 - iter 90/105 - loss 0.10317846 - time (sec): 1.43 - samples/sec: 3683.47 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:09,469 epoch 49 - iter 100/105 - loss 0.10346212 - time (sec): 1.57 - samples/sec: 3800.14 - lr: 0.050000 - momentum: 0.000000 +2023-05-15 21:30:09,529 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:09,529 EPOCH 49 done: loss 0.1022 - lr: 0.050000 +2023-05-15 21:30:10,202 DEV : loss 0.4417934715747833 - accuracy (micro avg) 0.9247 +2023-05-15 21:30:10,214 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.025] +2023-05-15 21:30:10,214 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:10,374 epoch 50 - iter 10/105 - loss 0.10155014 - time (sec): 0.16 - samples/sec: 3410.51 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:10,528 epoch 50 - iter 20/105 - loss 0.09421830 - time (sec): 0.31 - samples/sec: 3582.91 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:10,683 epoch 50 - iter 30/105 - loss 0.08456918 - time (sec): 0.47 - samples/sec: 3597.25 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:10,838 epoch 50 - iter 40/105 - loss 0.08748446 - time (sec): 0.62 - samples/sec: 3703.35 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,003 epoch 50 - iter 50/105 - loss 0.08878558 - time (sec): 0.79 - samples/sec: 3695.58 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,158 epoch 50 - iter 60/105 - loss 0.08631747 - time (sec): 0.94 - samples/sec: 3650.32 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,322 epoch 50 - iter 70/105 - loss 0.08712266 - time (sec): 1.11 - samples/sec: 3653.78 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,489 epoch 50 - iter 80/105 - loss 0.08719100 - time (sec): 1.27 - samples/sec: 3674.25 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,646 epoch 50 - iter 90/105 - loss 0.08340004 - time (sec): 1.43 - samples/sec: 3690.59 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,800 epoch 50 - iter 100/105 - loss 0.08787648 - time (sec): 1.59 - samples/sec: 3705.52 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:11,888 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:11,888 EPOCH 50 done: loss 0.0871 - lr: 0.025000 +2023-05-15 21:30:12,560 DEV : loss 0.43998345732688904 - accuracy (micro avg) 0.9248 +2023-05-15 21:30:12,572 - 1 epochs without improvement +2023-05-15 21:30:12,572 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:12,714 epoch 51 - iter 10/105 - loss 0.08250246 - time (sec): 0.14 - samples/sec: 3380.14 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:12,885 epoch 51 - iter 20/105 - loss 0.07379753 - time (sec): 0.31 - samples/sec: 3591.82 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,052 epoch 51 - iter 30/105 - loss 0.08802722 - time (sec): 0.48 - samples/sec: 3742.11 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,207 epoch 51 - iter 40/105 - loss 0.09066879 - time (sec): 0.63 - samples/sec: 3831.15 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,357 epoch 51 - iter 50/105 - loss 0.08766548 - time (sec): 0.78 - samples/sec: 3935.49 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,486 epoch 51 - iter 60/105 - loss 0.08422949 - time (sec): 0.91 - samples/sec: 4058.85 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,616 epoch 51 - iter 70/105 - loss 0.08933460 - time (sec): 1.04 - samples/sec: 4091.37 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,740 epoch 51 - iter 80/105 - loss 0.09256634 - time (sec): 1.17 - samples/sec: 4130.07 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,871 epoch 51 - iter 90/105 - loss 0.09040581 - time (sec): 1.30 - samples/sec: 4153.59 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:13,996 epoch 51 - iter 100/105 - loss 0.08850502 - time (sec): 1.42 - samples/sec: 4178.50 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:14,060 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:14,060 EPOCH 51 done: loss 0.0889 - lr: 0.025000 +2023-05-15 21:30:14,868 DEV : loss 0.4459802806377411 - accuracy (micro avg) 0.9266 +2023-05-15 21:30:14,880 - 0 epochs without improvement +2023-05-15 21:30:14,880 saving best model +2023-05-15 21:30:16,376 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:16,538 epoch 52 - iter 10/105 - loss 0.08790156 - time (sec): 0.16 - samples/sec: 3551.43 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:16,709 epoch 52 - iter 20/105 - loss 0.09278011 - time (sec): 0.33 - samples/sec: 3493.27 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:16,874 epoch 52 - iter 30/105 - loss 0.09091522 - time (sec): 0.50 - samples/sec: 3689.32 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,041 epoch 52 - iter 40/105 - loss 0.08797693 - time (sec): 0.66 - samples/sec: 3717.58 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,199 epoch 52 - iter 50/105 - loss 0.09140983 - time (sec): 0.82 - samples/sec: 3722.50 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,362 epoch 52 - iter 60/105 - loss 0.08783638 - time (sec): 0.99 - samples/sec: 3691.63 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,521 epoch 52 - iter 70/105 - loss 0.09044234 - time (sec): 1.14 - samples/sec: 3683.92 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,683 epoch 52 - iter 80/105 - loss 0.09242811 - time (sec): 1.31 - samples/sec: 3702.29 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,837 epoch 52 - iter 90/105 - loss 0.09060020 - time (sec): 1.46 - samples/sec: 3683.11 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:17,993 epoch 52 - iter 100/105 - loss 0.09166655 - time (sec): 1.62 - samples/sec: 3671.75 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:18,075 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:18,075 EPOCH 52 done: loss 0.0923 - lr: 0.025000 +2023-05-15 21:30:18,746 DEV : loss 0.44730234146118164 - accuracy (micro avg) 0.9249 +2023-05-15 21:30:18,758 - 1 epochs without improvement +2023-05-15 21:30:18,758 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:18,905 epoch 53 - iter 10/105 - loss 0.05499141 - time (sec): 0.15 - samples/sec: 3610.68 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:19,062 epoch 53 - iter 20/105 - loss 0.07059527 - time (sec): 0.30 - samples/sec: 3575.99 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:19,223 epoch 53 - iter 30/105 - loss 0.08327849 - time (sec): 0.46 - samples/sec: 3646.81 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:19,371 epoch 53 - iter 40/105 - loss 0.08524775 - time (sec): 0.61 - samples/sec: 3626.90 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:19,531 epoch 53 - iter 50/105 - loss 0.08712003 - time (sec): 0.77 - samples/sec: 3684.46 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:19,703 epoch 53 - iter 60/105 - loss 0.09188637 - time (sec): 0.94 - samples/sec: 3692.12 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:19,858 epoch 53 - iter 70/105 - loss 0.09597465 - time (sec): 1.10 - samples/sec: 3664.92 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:20,023 epoch 53 - iter 80/105 - loss 0.09462198 - time (sec): 1.26 - samples/sec: 3699.23 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:20,155 epoch 53 - iter 90/105 - loss 0.09487981 - time (sec): 1.40 - samples/sec: 3804.17 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:20,285 epoch 53 - iter 100/105 - loss 0.09549078 - time (sec): 1.53 - samples/sec: 3868.82 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:20,356 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:20,356 EPOCH 53 done: loss 0.0979 - lr: 0.025000 +2023-05-15 21:30:21,036 DEV : loss 0.4442404806613922 - accuracy (micro avg) 0.9267 +2023-05-15 21:30:21,048 - 0 epochs without improvement +2023-05-15 21:30:21,048 saving best model +2023-05-15 21:30:22,517 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:22,691 epoch 54 - iter 10/105 - loss 0.08495571 - time (sec): 0.17 - samples/sec: 3491.90 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:22,854 epoch 54 - iter 20/105 - loss 0.08501973 - time (sec): 0.34 - samples/sec: 3554.35 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,021 epoch 54 - iter 30/105 - loss 0.09743852 - time (sec): 0.50 - samples/sec: 3672.88 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,172 epoch 54 - iter 40/105 - loss 0.09451822 - time (sec): 0.65 - samples/sec: 3701.29 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,309 epoch 54 - iter 50/105 - loss 0.08993559 - time (sec): 0.79 - samples/sec: 3787.10 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,443 epoch 54 - iter 60/105 - loss 0.08867751 - time (sec): 0.93 - samples/sec: 3908.89 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,567 epoch 54 - iter 70/105 - loss 0.08874145 - time (sec): 1.05 - samples/sec: 3967.35 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,694 epoch 54 - iter 80/105 - loss 0.08686806 - time (sec): 1.18 - samples/sec: 4038.98 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,825 epoch 54 - iter 90/105 - loss 0.09092987 - time (sec): 1.31 - samples/sec: 4116.77 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:23,946 epoch 54 - iter 100/105 - loss 0.08850743 - time (sec): 1.43 - samples/sec: 4145.41 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:24,013 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:24,013 EPOCH 54 done: loss 0.0894 - lr: 0.025000 +2023-05-15 21:30:24,822 DEV : loss 0.44815778732299805 - accuracy (micro avg) 0.9267 +2023-05-15 21:30:24,834 - 1 epochs without improvement +2023-05-15 21:30:24,835 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:24,998 epoch 55 - iter 10/105 - loss 0.11340921 - time (sec): 0.16 - samples/sec: 3674.32 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:25,161 epoch 55 - iter 20/105 - loss 0.10073815 - time (sec): 0.33 - samples/sec: 3971.60 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:25,306 epoch 55 - iter 30/105 - loss 0.10311474 - time (sec): 0.47 - samples/sec: 3810.26 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:25,472 epoch 55 - iter 40/105 - loss 0.09085139 - time (sec): 0.64 - samples/sec: 3794.66 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:25,628 epoch 55 - iter 50/105 - loss 0.10506698 - time (sec): 0.79 - samples/sec: 3785.43 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:25,784 epoch 55 - iter 60/105 - loss 0.10309018 - time (sec): 0.95 - samples/sec: 3817.32 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:25,935 epoch 55 - iter 70/105 - loss 0.10555289 - time (sec): 1.10 - samples/sec: 3784.28 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:26,092 epoch 55 - iter 80/105 - loss 0.10515112 - time (sec): 1.26 - samples/sec: 3806.57 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:26,237 epoch 55 - iter 90/105 - loss 0.10222695 - time (sec): 1.40 - samples/sec: 3807.05 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:26,390 epoch 55 - iter 100/105 - loss 0.09704806 - time (sec): 1.56 - samples/sec: 3812.01 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:26,471 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:26,472 EPOCH 55 done: loss 0.0994 - lr: 0.025000 +2023-05-15 21:30:27,160 DEV : loss 0.4603947103023529 - accuracy (micro avg) 0.9273 +2023-05-15 21:30:27,172 - 0 epochs without improvement +2023-05-15 21:30:27,172 saving best model +2023-05-15 21:30:28,679 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:28,850 epoch 56 - iter 10/105 - loss 0.08967255 - time (sec): 0.17 - samples/sec: 3319.85 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,019 epoch 56 - iter 20/105 - loss 0.06883085 - time (sec): 0.34 - samples/sec: 3569.10 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,172 epoch 56 - iter 30/105 - loss 0.07577760 - time (sec): 0.49 - samples/sec: 3659.54 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,318 epoch 56 - iter 40/105 - loss 0.08986062 - time (sec): 0.64 - samples/sec: 3724.69 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,476 epoch 56 - iter 50/105 - loss 0.08512969 - time (sec): 0.80 - samples/sec: 3717.46 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,634 epoch 56 - iter 60/105 - loss 0.09044687 - time (sec): 0.95 - samples/sec: 3722.60 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,786 epoch 56 - iter 70/105 - loss 0.09543241 - time (sec): 1.11 - samples/sec: 3771.58 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:29,950 epoch 56 - iter 80/105 - loss 0.09562096 - time (sec): 1.27 - samples/sec: 3734.26 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:30,107 epoch 56 - iter 90/105 - loss 0.09317804 - time (sec): 1.43 - samples/sec: 3722.42 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:30,255 epoch 56 - iter 100/105 - loss 0.09849251 - time (sec): 1.58 - samples/sec: 3748.79 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:30,337 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:30,337 EPOCH 56 done: loss 0.0992 - lr: 0.025000 +2023-05-15 21:30:31,005 DEV : loss 0.45674797892570496 - accuracy (micro avg) 0.9253 +2023-05-15 21:30:31,018 - 1 epochs without improvement +2023-05-15 21:30:31,018 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:31,172 epoch 57 - iter 10/105 - loss 0.02979771 - time (sec): 0.15 - samples/sec: 3689.67 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:31,326 epoch 57 - iter 20/105 - loss 0.06583308 - time (sec): 0.31 - samples/sec: 3619.17 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:31,490 epoch 57 - iter 30/105 - loss 0.07248027 - time (sec): 0.47 - samples/sec: 3718.69 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:31,635 epoch 57 - iter 40/105 - loss 0.06894876 - time (sec): 0.62 - samples/sec: 3819.61 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:31,759 epoch 57 - iter 50/105 - loss 0.06672600 - time (sec): 0.74 - samples/sec: 3965.13 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:31,887 epoch 57 - iter 60/105 - loss 0.07004378 - time (sec): 0.87 - samples/sec: 4066.85 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:32,012 epoch 57 - iter 70/105 - loss 0.07138190 - time (sec): 0.99 - samples/sec: 4149.22 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:32,140 epoch 57 - iter 80/105 - loss 0.07725337 - time (sec): 1.12 - samples/sec: 4193.19 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:32,274 epoch 57 - iter 90/105 - loss 0.07986006 - time (sec): 1.26 - samples/sec: 4260.96 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:32,401 epoch 57 - iter 100/105 - loss 0.08470070 - time (sec): 1.38 - samples/sec: 4325.36 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:32,469 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:32,469 EPOCH 57 done: loss 0.0849 - lr: 0.025000 +2023-05-15 21:30:33,267 DEV : loss 0.4544132947921753 - accuracy (micro avg) 0.9276 +2023-05-15 21:30:33,279 - 0 epochs without improvement +2023-05-15 21:30:33,280 saving best model +2023-05-15 21:30:34,740 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:34,910 epoch 58 - iter 10/105 - loss 0.09034536 - time (sec): 0.17 - samples/sec: 3468.88 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,063 epoch 58 - iter 20/105 - loss 0.11065231 - time (sec): 0.32 - samples/sec: 3467.45 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,224 epoch 58 - iter 30/105 - loss 0.10656638 - time (sec): 0.48 - samples/sec: 3512.14 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,385 epoch 58 - iter 40/105 - loss 0.09482706 - time (sec): 0.64 - samples/sec: 3652.50 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,550 epoch 58 - iter 50/105 - loss 0.08608497 - time (sec): 0.81 - samples/sec: 3591.58 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,701 epoch 58 - iter 60/105 - loss 0.08756389 - time (sec): 0.96 - samples/sec: 3609.97 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,829 epoch 58 - iter 70/105 - loss 0.09090279 - time (sec): 1.09 - samples/sec: 3715.99 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:35,955 epoch 58 - iter 80/105 - loss 0.08859676 - time (sec): 1.21 - samples/sec: 3827.45 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:36,086 epoch 58 - iter 90/105 - loss 0.08564067 - time (sec): 1.35 - samples/sec: 3907.50 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:36,220 epoch 58 - iter 100/105 - loss 0.08297622 - time (sec): 1.48 - samples/sec: 3985.55 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:36,290 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:36,291 EPOCH 58 done: loss 0.0848 - lr: 0.025000 +2023-05-15 21:30:36,963 DEV : loss 0.4505839943885803 - accuracy (micro avg) 0.9269 +2023-05-15 21:30:36,976 - 1 epochs without improvement +2023-05-15 21:30:36,976 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:37,141 epoch 59 - iter 10/105 - loss 0.09168406 - time (sec): 0.16 - samples/sec: 3648.56 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:37,304 epoch 59 - iter 20/105 - loss 0.09114142 - time (sec): 0.33 - samples/sec: 3729.72 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:37,467 epoch 59 - iter 30/105 - loss 0.08044970 - time (sec): 0.49 - samples/sec: 3824.77 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:37,612 epoch 59 - iter 40/105 - loss 0.08763567 - time (sec): 0.64 - samples/sec: 3884.63 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:37,740 epoch 59 - iter 50/105 - loss 0.08274090 - time (sec): 0.76 - samples/sec: 3976.87 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:37,869 epoch 59 - iter 60/105 - loss 0.08555697 - time (sec): 0.89 - samples/sec: 4118.47 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:37,989 epoch 59 - iter 70/105 - loss 0.08258492 - time (sec): 1.01 - samples/sec: 4109.92 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:38,112 epoch 59 - iter 80/105 - loss 0.08036995 - time (sec): 1.14 - samples/sec: 4127.41 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:38,242 epoch 59 - iter 90/105 - loss 0.08190745 - time (sec): 1.27 - samples/sec: 4182.97 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:38,371 epoch 59 - iter 100/105 - loss 0.08489307 - time (sec): 1.40 - samples/sec: 4245.97 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:38,437 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:38,437 EPOCH 59 done: loss 0.0831 - lr: 0.025000 +2023-05-15 21:30:39,126 DEV : loss 0.449642151594162 - accuracy (micro avg) 0.9287 +2023-05-15 21:30:39,139 - 0 epochs without improvement +2023-05-15 21:30:39,139 saving best model +2023-05-15 21:30:40,634 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:40,810 epoch 60 - iter 10/105 - loss 0.07294341 - time (sec): 0.18 - samples/sec: 3747.11 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:40,975 epoch 60 - iter 20/105 - loss 0.07297189 - time (sec): 0.34 - samples/sec: 3538.07 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,132 epoch 60 - iter 30/105 - loss 0.08514902 - time (sec): 0.50 - samples/sec: 3549.28 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,259 epoch 60 - iter 40/105 - loss 0.09686086 - time (sec): 0.62 - samples/sec: 3697.61 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,391 epoch 60 - iter 50/105 - loss 0.09001737 - time (sec): 0.76 - samples/sec: 3913.06 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,509 epoch 60 - iter 60/105 - loss 0.08812331 - time (sec): 0.87 - samples/sec: 3958.10 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,640 epoch 60 - iter 70/105 - loss 0.08630214 - time (sec): 1.01 - samples/sec: 4044.86 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,771 epoch 60 - iter 80/105 - loss 0.08846802 - time (sec): 1.14 - samples/sec: 4130.01 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:41,904 epoch 60 - iter 90/105 - loss 0.08861147 - time (sec): 1.27 - samples/sec: 4195.05 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:42,036 epoch 60 - iter 100/105 - loss 0.08885317 - time (sec): 1.40 - samples/sec: 4207.76 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:42,108 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:42,108 EPOCH 60 done: loss 0.0874 - lr: 0.025000 +2023-05-15 21:30:42,795 DEV : loss 0.4487907290458679 - accuracy (micro avg) 0.9277 +2023-05-15 21:30:42,808 - 1 epochs without improvement +2023-05-15 21:30:42,808 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:42,973 epoch 61 - iter 10/105 - loss 0.08015160 - time (sec): 0.16 - samples/sec: 3824.57 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,123 epoch 61 - iter 20/105 - loss 0.06057929 - time (sec): 0.31 - samples/sec: 3784.72 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,275 epoch 61 - iter 30/105 - loss 0.06926931 - time (sec): 0.47 - samples/sec: 3624.66 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,427 epoch 61 - iter 40/105 - loss 0.07085607 - time (sec): 0.62 - samples/sec: 3670.65 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,588 epoch 61 - iter 50/105 - loss 0.07059675 - time (sec): 0.78 - samples/sec: 3724.09 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,723 epoch 61 - iter 60/105 - loss 0.07058916 - time (sec): 0.91 - samples/sec: 3895.12 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,845 epoch 61 - iter 70/105 - loss 0.07125948 - time (sec): 1.04 - samples/sec: 3951.28 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:43,978 epoch 61 - iter 80/105 - loss 0.07479688 - time (sec): 1.17 - samples/sec: 4037.05 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:44,111 epoch 61 - iter 90/105 - loss 0.07313734 - time (sec): 1.30 - samples/sec: 4092.27 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:44,241 epoch 61 - iter 100/105 - loss 0.07799588 - time (sec): 1.43 - samples/sec: 4126.79 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:44,312 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:44,312 EPOCH 61 done: loss 0.0760 - lr: 0.025000 +2023-05-15 21:30:45,140 DEV : loss 0.4585801362991333 - accuracy (micro avg) 0.927 +2023-05-15 21:30:45,153 - 2 epochs without improvement +2023-05-15 21:30:45,153 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:45,308 epoch 62 - iter 10/105 - loss 0.09004574 - time (sec): 0.15 - samples/sec: 3944.17 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:45,466 epoch 62 - iter 20/105 - loss 0.07939139 - time (sec): 0.31 - samples/sec: 3844.98 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:45,595 epoch 62 - iter 30/105 - loss 0.06903933 - time (sec): 0.44 - samples/sec: 4024.22 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:45,726 epoch 62 - iter 40/105 - loss 0.07174227 - time (sec): 0.57 - samples/sec: 4081.11 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:45,861 epoch 62 - iter 50/105 - loss 0.07977434 - time (sec): 0.71 - samples/sec: 4136.79 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:45,992 epoch 62 - iter 60/105 - loss 0.08341777 - time (sec): 0.84 - samples/sec: 4264.15 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:46,125 epoch 62 - iter 70/105 - loss 0.08811842 - time (sec): 0.97 - samples/sec: 4249.91 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:46,252 epoch 62 - iter 80/105 - loss 0.08805974 - time (sec): 1.10 - samples/sec: 4258.69 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:46,386 epoch 62 - iter 90/105 - loss 0.08797355 - time (sec): 1.23 - samples/sec: 4307.63 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:46,521 epoch 62 - iter 100/105 - loss 0.08905213 - time (sec): 1.37 - samples/sec: 4343.29 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:46,587 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:46,587 EPOCH 62 done: loss 0.0876 - lr: 0.025000 +2023-05-15 21:30:47,277 DEV : loss 0.45762529969215393 - accuracy (micro avg) 0.9273 +2023-05-15 21:30:47,289 - 3 epochs without improvement +2023-05-15 21:30:47,290 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:47,455 epoch 63 - iter 10/105 - loss 0.13866874 - time (sec): 0.17 - samples/sec: 3764.67 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:47,616 epoch 63 - iter 20/105 - loss 0.11774307 - time (sec): 0.33 - samples/sec: 3679.75 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:47,773 epoch 63 - iter 30/105 - loss 0.10617288 - time (sec): 0.48 - samples/sec: 3634.33 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:47,903 epoch 63 - iter 40/105 - loss 0.10151569 - time (sec): 0.61 - samples/sec: 3826.73 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,036 epoch 63 - iter 50/105 - loss 0.10331057 - time (sec): 0.75 - samples/sec: 3977.39 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,162 epoch 63 - iter 60/105 - loss 0.09765511 - time (sec): 0.87 - samples/sec: 4060.06 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,297 epoch 63 - iter 70/105 - loss 0.09812690 - time (sec): 1.01 - samples/sec: 4107.99 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,422 epoch 63 - iter 80/105 - loss 0.10264448 - time (sec): 1.13 - samples/sec: 4126.25 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,555 epoch 63 - iter 90/105 - loss 0.09810914 - time (sec): 1.27 - samples/sec: 4178.06 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,688 epoch 63 - iter 100/105 - loss 0.09693463 - time (sec): 1.40 - samples/sec: 4235.89 - lr: 0.025000 - momentum: 0.000000 +2023-05-15 21:30:48,757 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:48,757 EPOCH 63 done: loss 0.0968 - lr: 0.025000 +2023-05-15 21:30:49,446 DEV : loss 0.4547846019268036 - accuracy (micro avg) 0.9287 +2023-05-15 21:30:49,459 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0125] +2023-05-15 21:30:49,459 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:49,621 epoch 64 - iter 10/105 - loss 0.10850674 - time (sec): 0.16 - samples/sec: 3610.44 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:49,789 epoch 64 - iter 20/105 - loss 0.09252579 - time (sec): 0.33 - samples/sec: 3587.76 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:49,937 epoch 64 - iter 30/105 - loss 0.08403065 - time (sec): 0.48 - samples/sec: 3635.53 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,095 epoch 64 - iter 40/105 - loss 0.08705889 - time (sec): 0.64 - samples/sec: 3693.26 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,249 epoch 64 - iter 50/105 - loss 0.08250025 - time (sec): 0.79 - samples/sec: 3689.38 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,380 epoch 64 - iter 60/105 - loss 0.07915800 - time (sec): 0.92 - samples/sec: 3822.31 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,505 epoch 64 - iter 70/105 - loss 0.08280372 - time (sec): 1.05 - samples/sec: 3914.92 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,632 epoch 64 - iter 80/105 - loss 0.08043726 - time (sec): 1.17 - samples/sec: 4009.72 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,760 epoch 64 - iter 90/105 - loss 0.07739270 - time (sec): 1.30 - samples/sec: 4072.08 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,892 epoch 64 - iter 100/105 - loss 0.07765963 - time (sec): 1.43 - samples/sec: 4135.64 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:50,958 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:50,958 EPOCH 64 done: loss 0.0806 - lr: 0.012500 +2023-05-15 21:30:51,770 DEV : loss 0.4491409659385681 - accuracy (micro avg) 0.927 +2023-05-15 21:30:51,782 - 1 epochs without improvement +2023-05-15 21:30:51,782 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:51,936 epoch 65 - iter 10/105 - loss 0.09353496 - time (sec): 0.15 - samples/sec: 3789.70 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,097 epoch 65 - iter 20/105 - loss 0.08407092 - time (sec): 0.31 - samples/sec: 3859.28 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,230 epoch 65 - iter 30/105 - loss 0.07108789 - time (sec): 0.45 - samples/sec: 3987.09 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,358 epoch 65 - iter 40/105 - loss 0.07327740 - time (sec): 0.58 - samples/sec: 4156.74 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,485 epoch 65 - iter 50/105 - loss 0.08257736 - time (sec): 0.70 - samples/sec: 4220.56 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,609 epoch 65 - iter 60/105 - loss 0.08262387 - time (sec): 0.83 - samples/sec: 4243.88 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,737 epoch 65 - iter 70/105 - loss 0.07754773 - time (sec): 0.95 - samples/sec: 4326.77 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:52,864 epoch 65 - iter 80/105 - loss 0.07909466 - time (sec): 1.08 - samples/sec: 4357.30 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:53,001 epoch 65 - iter 90/105 - loss 0.07937441 - time (sec): 1.22 - samples/sec: 4386.93 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:53,128 epoch 65 - iter 100/105 - loss 0.07883681 - time (sec): 1.35 - samples/sec: 4390.93 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:53,196 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:53,196 EPOCH 65 done: loss 0.0776 - lr: 0.012500 +2023-05-15 21:30:53,867 DEV : loss 0.45759317278862 - accuracy (micro avg) 0.9269 +2023-05-15 21:30:53,879 - 2 epochs without improvement +2023-05-15 21:30:53,880 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:54,039 epoch 66 - iter 10/105 - loss 0.08663228 - time (sec): 0.16 - samples/sec: 3779.37 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:54,208 epoch 66 - iter 20/105 - loss 0.08645371 - time (sec): 0.33 - samples/sec: 3807.32 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:54,361 epoch 66 - iter 30/105 - loss 0.08114543 - time (sec): 0.48 - samples/sec: 3776.59 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:54,513 epoch 66 - iter 40/105 - loss 0.08233481 - time (sec): 0.63 - samples/sec: 3703.76 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:54,669 epoch 66 - iter 50/105 - loss 0.08185727 - time (sec): 0.79 - samples/sec: 3738.31 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:54,800 epoch 66 - iter 60/105 - loss 0.08319666 - time (sec): 0.92 - samples/sec: 3843.32 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:54,930 epoch 66 - iter 70/105 - loss 0.08390522 - time (sec): 1.05 - samples/sec: 3922.32 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:55,059 epoch 66 - iter 80/105 - loss 0.08176695 - time (sec): 1.18 - samples/sec: 4003.55 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:55,193 epoch 66 - iter 90/105 - loss 0.07874792 - time (sec): 1.31 - samples/sec: 4082.74 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:55,320 epoch 66 - iter 100/105 - loss 0.07989898 - time (sec): 1.44 - samples/sec: 4126.29 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:55,385 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:55,385 EPOCH 66 done: loss 0.0814 - lr: 0.012500 +2023-05-15 21:30:56,058 DEV : loss 0.4588263928890228 - accuracy (micro avg) 0.9269 +2023-05-15 21:30:56,071 - 3 epochs without improvement +2023-05-15 21:30:56,071 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:56,230 epoch 67 - iter 10/105 - loss 0.06845870 - time (sec): 0.16 - samples/sec: 3905.46 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:56,386 epoch 67 - iter 20/105 - loss 0.08727215 - time (sec): 0.32 - samples/sec: 3764.82 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:56,539 epoch 67 - iter 30/105 - loss 0.07657684 - time (sec): 0.47 - samples/sec: 3771.44 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:56,679 epoch 67 - iter 40/105 - loss 0.08364560 - time (sec): 0.61 - samples/sec: 3889.19 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:56,810 epoch 67 - iter 50/105 - loss 0.07990151 - time (sec): 0.74 - samples/sec: 4035.15 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:56,939 epoch 67 - iter 60/105 - loss 0.07751244 - time (sec): 0.87 - samples/sec: 4144.07 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:57,061 epoch 67 - iter 70/105 - loss 0.07772931 - time (sec): 0.99 - samples/sec: 4169.81 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:57,188 epoch 67 - iter 80/105 - loss 0.07758064 - time (sec): 1.12 - samples/sec: 4225.12 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:57,317 epoch 67 - iter 90/105 - loss 0.07968912 - time (sec): 1.25 - samples/sec: 4258.92 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:57,456 epoch 67 - iter 100/105 - loss 0.08729335 - time (sec): 1.38 - samples/sec: 4313.57 - lr: 0.012500 - momentum: 0.000000 +2023-05-15 21:30:57,519 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:57,519 EPOCH 67 done: loss 0.0866 - lr: 0.012500 +2023-05-15 21:30:58,192 DEV : loss 0.45442140102386475 - accuracy (micro avg) 0.9276 +2023-05-15 21:30:58,204 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00625] +2023-05-15 21:30:58,204 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:58,360 epoch 68 - iter 10/105 - loss 0.10963916 - time (sec): 0.16 - samples/sec: 3625.19 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:58,512 epoch 68 - iter 20/105 - loss 0.07913487 - time (sec): 0.31 - samples/sec: 3774.24 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:58,671 epoch 68 - iter 30/105 - loss 0.07409651 - time (sec): 0.47 - samples/sec: 3819.83 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:58,818 epoch 68 - iter 40/105 - loss 0.07495369 - time (sec): 0.61 - samples/sec: 3801.11 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:58,964 epoch 68 - iter 50/105 - loss 0.07448298 - time (sec): 0.76 - samples/sec: 3813.46 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:59,116 epoch 68 - iter 60/105 - loss 0.07435783 - time (sec): 0.91 - samples/sec: 3849.08 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:59,248 epoch 68 - iter 70/105 - loss 0.07258293 - time (sec): 1.04 - samples/sec: 3964.16 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:59,374 epoch 68 - iter 80/105 - loss 0.07064422 - time (sec): 1.17 - samples/sec: 4006.50 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:59,507 epoch 68 - iter 90/105 - loss 0.06948889 - time (sec): 1.30 - samples/sec: 4037.94 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:59,644 epoch 68 - iter 100/105 - loss 0.06781102 - time (sec): 1.44 - samples/sec: 4113.60 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:30:59,713 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:30:59,713 EPOCH 68 done: loss 0.0686 - lr: 0.006250 +2023-05-15 21:31:00,522 DEV : loss 0.4559873044490814 - accuracy (micro avg) 0.9287 +2023-05-15 21:31:00,535 - 1 epochs without improvement +2023-05-15 21:31:00,535 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:00,692 epoch 69 - iter 10/105 - loss 0.08944258 - time (sec): 0.16 - samples/sec: 4206.96 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:00,846 epoch 69 - iter 20/105 - loss 0.10336843 - time (sec): 0.31 - samples/sec: 3939.59 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:00,983 epoch 69 - iter 30/105 - loss 0.09478803 - time (sec): 0.45 - samples/sec: 4098.96 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,111 epoch 69 - iter 40/105 - loss 0.09967452 - time (sec): 0.58 - samples/sec: 4125.35 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,240 epoch 69 - iter 50/105 - loss 0.09868047 - time (sec): 0.71 - samples/sec: 4244.02 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,372 epoch 69 - iter 60/105 - loss 0.09570703 - time (sec): 0.84 - samples/sec: 4329.31 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,501 epoch 69 - iter 70/105 - loss 0.08997756 - time (sec): 0.97 - samples/sec: 4394.20 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,630 epoch 69 - iter 80/105 - loss 0.08595509 - time (sec): 1.09 - samples/sec: 4411.81 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,750 epoch 69 - iter 90/105 - loss 0.08389853 - time (sec): 1.21 - samples/sec: 4403.53 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,879 epoch 69 - iter 100/105 - loss 0.08742698 - time (sec): 1.34 - samples/sec: 4413.41 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:01,943 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:01,943 EPOCH 69 done: loss 0.0885 - lr: 0.006250 +2023-05-15 21:31:02,617 DEV : loss 0.45404523611068726 - accuracy (micro avg) 0.9284 +2023-05-15 21:31:02,630 - 2 epochs without improvement +2023-05-15 21:31:02,630 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:02,786 epoch 70 - iter 10/105 - loss 0.05790097 - time (sec): 0.16 - samples/sec: 3862.98 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:02,951 epoch 70 - iter 20/105 - loss 0.05650145 - time (sec): 0.32 - samples/sec: 3842.87 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,114 epoch 70 - iter 30/105 - loss 0.06989721 - time (sec): 0.48 - samples/sec: 3892.36 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,266 epoch 70 - iter 40/105 - loss 0.07572799 - time (sec): 0.64 - samples/sec: 3841.64 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,411 epoch 70 - iter 50/105 - loss 0.07726074 - time (sec): 0.78 - samples/sec: 3841.64 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,540 epoch 70 - iter 60/105 - loss 0.07660886 - time (sec): 0.91 - samples/sec: 3979.72 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,666 epoch 70 - iter 70/105 - loss 0.07545937 - time (sec): 1.04 - samples/sec: 4038.69 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,792 epoch 70 - iter 80/105 - loss 0.08047299 - time (sec): 1.16 - samples/sec: 4091.16 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:03,921 epoch 70 - iter 90/105 - loss 0.08212963 - time (sec): 1.29 - samples/sec: 4152.37 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:04,047 epoch 70 - iter 100/105 - loss 0.08173741 - time (sec): 1.42 - samples/sec: 4160.89 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:04,118 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:04,118 EPOCH 70 done: loss 0.0821 - lr: 0.006250 +2023-05-15 21:31:04,789 DEV : loss 0.4548334777355194 - accuracy (micro avg) 0.9288 +2023-05-15 21:31:04,802 - 0 epochs without improvement +2023-05-15 21:31:04,802 saving best model +2023-05-15 21:31:06,278 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:06,446 epoch 71 - iter 10/105 - loss 0.05925519 - time (sec): 0.17 - samples/sec: 3561.96 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:06,627 epoch 71 - iter 20/105 - loss 0.08341835 - time (sec): 0.35 - samples/sec: 3688.93 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:06,786 epoch 71 - iter 30/105 - loss 0.08201720 - time (sec): 0.51 - samples/sec: 3799.69 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:06,940 epoch 71 - iter 40/105 - loss 0.08307218 - time (sec): 0.66 - samples/sec: 3764.18 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,107 epoch 71 - iter 50/105 - loss 0.07868981 - time (sec): 0.83 - samples/sec: 3729.79 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,253 epoch 71 - iter 60/105 - loss 0.07536770 - time (sec): 0.97 - samples/sec: 3724.63 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,415 epoch 71 - iter 70/105 - loss 0.07608214 - time (sec): 1.14 - samples/sec: 3728.62 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,563 epoch 71 - iter 80/105 - loss 0.07578846 - time (sec): 1.28 - samples/sec: 3705.30 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,717 epoch 71 - iter 90/105 - loss 0.07440881 - time (sec): 1.44 - samples/sec: 3724.39 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,841 epoch 71 - iter 100/105 - loss 0.07231427 - time (sec): 1.56 - samples/sec: 3793.05 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:07,908 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:07,908 EPOCH 71 done: loss 0.0727 - lr: 0.006250 +2023-05-15 21:31:08,710 DEV : loss 0.4581780433654785 - accuracy (micro avg) 0.9285 +2023-05-15 21:31:08,722 - 1 epochs without improvement +2023-05-15 21:31:08,722 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:08,883 epoch 72 - iter 10/105 - loss 0.05354000 - time (sec): 0.16 - samples/sec: 3907.77 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,033 epoch 72 - iter 20/105 - loss 0.06611641 - time (sec): 0.31 - samples/sec: 3858.84 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,189 epoch 72 - iter 30/105 - loss 0.09281519 - time (sec): 0.47 - samples/sec: 3720.04 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,316 epoch 72 - iter 40/105 - loss 0.09454611 - time (sec): 0.59 - samples/sec: 3910.86 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,445 epoch 72 - iter 50/105 - loss 0.09008855 - time (sec): 0.72 - samples/sec: 4117.39 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,577 epoch 72 - iter 60/105 - loss 0.09084776 - time (sec): 0.86 - samples/sec: 4184.40 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,707 epoch 72 - iter 70/105 - loss 0.08655022 - time (sec): 0.98 - samples/sec: 4259.24 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,833 epoch 72 - iter 80/105 - loss 0.08556694 - time (sec): 1.11 - samples/sec: 4252.43 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:09,963 epoch 72 - iter 90/105 - loss 0.08736164 - time (sec): 1.24 - samples/sec: 4288.88 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:10,088 epoch 72 - iter 100/105 - loss 0.08868731 - time (sec): 1.37 - samples/sec: 4315.20 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:10,159 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:10,159 EPOCH 72 done: loss 0.0863 - lr: 0.006250 +2023-05-15 21:31:10,829 DEV : loss 0.45512455701828003 - accuracy (micro avg) 0.9277 +2023-05-15 21:31:10,842 - 2 epochs without improvement +2023-05-15 21:31:10,842 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:11,014 epoch 73 - iter 10/105 - loss 0.05618360 - time (sec): 0.17 - samples/sec: 4063.87 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,176 epoch 73 - iter 20/105 - loss 0.06240535 - time (sec): 0.33 - samples/sec: 4017.13 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,317 epoch 73 - iter 30/105 - loss 0.05767794 - time (sec): 0.47 - samples/sec: 4047.38 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,442 epoch 73 - iter 40/105 - loss 0.06854809 - time (sec): 0.60 - samples/sec: 4149.84 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,565 epoch 73 - iter 50/105 - loss 0.06718055 - time (sec): 0.72 - samples/sec: 4220.82 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,693 epoch 73 - iter 60/105 - loss 0.06832022 - time (sec): 0.85 - samples/sec: 4251.94 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,825 epoch 73 - iter 70/105 - loss 0.07204937 - time (sec): 0.98 - samples/sec: 4323.67 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:11,948 epoch 73 - iter 80/105 - loss 0.07038789 - time (sec): 1.11 - samples/sec: 4367.21 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:12,068 epoch 73 - iter 90/105 - loss 0.07173083 - time (sec): 1.23 - samples/sec: 4385.62 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:12,197 epoch 73 - iter 100/105 - loss 0.07347684 - time (sec): 1.36 - samples/sec: 4375.11 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:12,264 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:12,264 EPOCH 73 done: loss 0.0732 - lr: 0.006250 +2023-05-15 21:31:12,939 DEV : loss 0.45252254605293274 - accuracy (micro avg) 0.9277 +2023-05-15 21:31:12,951 - 3 epochs without improvement +2023-05-15 21:31:12,951 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:13,116 epoch 74 - iter 10/105 - loss 0.02994434 - time (sec): 0.16 - samples/sec: 3313.18 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:13,280 epoch 74 - iter 20/105 - loss 0.04619729 - time (sec): 0.33 - samples/sec: 3407.00 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:13,433 epoch 74 - iter 30/105 - loss 0.05692570 - time (sec): 0.48 - samples/sec: 3506.18 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:13,587 epoch 74 - iter 40/105 - loss 0.06489997 - time (sec): 0.64 - samples/sec: 3678.64 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:13,741 epoch 74 - iter 50/105 - loss 0.06906347 - time (sec): 0.79 - samples/sec: 3707.45 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:13,873 epoch 74 - iter 60/105 - loss 0.07814785 - time (sec): 0.92 - samples/sec: 3834.11 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:14,001 epoch 74 - iter 70/105 - loss 0.07762192 - time (sec): 1.05 - samples/sec: 3913.42 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:14,137 epoch 74 - iter 80/105 - loss 0.07806681 - time (sec): 1.19 - samples/sec: 4023.56 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:14,270 epoch 74 - iter 90/105 - loss 0.07972765 - time (sec): 1.32 - samples/sec: 4095.73 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:14,393 epoch 74 - iter 100/105 - loss 0.08040769 - time (sec): 1.44 - samples/sec: 4103.49 - lr: 0.006250 - momentum: 0.000000 +2023-05-15 21:31:14,464 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:14,464 EPOCH 74 done: loss 0.0806 - lr: 0.006250 +2023-05-15 21:31:15,266 DEV : loss 0.452541708946228 - accuracy (micro avg) 0.9277 +2023-05-15 21:31:15,278 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.003125] +2023-05-15 21:31:15,278 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:15,438 epoch 75 - iter 10/105 - loss 0.05128156 - time (sec): 0.16 - samples/sec: 3811.40 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:15,588 epoch 75 - iter 20/105 - loss 0.05844909 - time (sec): 0.31 - samples/sec: 3738.54 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:15,719 epoch 75 - iter 30/105 - loss 0.06017984 - time (sec): 0.44 - samples/sec: 4060.59 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:15,842 epoch 75 - iter 40/105 - loss 0.05969354 - time (sec): 0.56 - samples/sec: 4217.26 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:15,971 epoch 75 - iter 50/105 - loss 0.07116576 - time (sec): 0.69 - samples/sec: 4355.69 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:16,100 epoch 75 - iter 60/105 - loss 0.07862299 - time (sec): 0.82 - samples/sec: 4368.47 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:16,222 epoch 75 - iter 70/105 - loss 0.07367731 - time (sec): 0.94 - samples/sec: 4354.25 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:16,352 epoch 75 - iter 80/105 - loss 0.07187146 - time (sec): 1.07 - samples/sec: 4375.41 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:16,485 epoch 75 - iter 90/105 - loss 0.06910262 - time (sec): 1.21 - samples/sec: 4380.93 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:16,617 epoch 75 - iter 100/105 - loss 0.06635376 - time (sec): 1.34 - samples/sec: 4421.90 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:16,687 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:16,687 EPOCH 75 done: loss 0.0684 - lr: 0.003125 +2023-05-15 21:31:17,359 DEV : loss 0.45416298508644104 - accuracy (micro avg) 0.9281 +2023-05-15 21:31:17,372 - 1 epochs without improvement +2023-05-15 21:31:17,372 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:17,523 epoch 76 - iter 10/105 - loss 0.05918599 - time (sec): 0.15 - samples/sec: 3442.17 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:17,671 epoch 76 - iter 20/105 - loss 0.07186231 - time (sec): 0.30 - samples/sec: 3511.07 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:17,834 epoch 76 - iter 30/105 - loss 0.07661525 - time (sec): 0.46 - samples/sec: 3563.78 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:17,995 epoch 76 - iter 40/105 - loss 0.09041049 - time (sec): 0.62 - samples/sec: 3605.29 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,152 epoch 76 - iter 50/105 - loss 0.08889893 - time (sec): 0.78 - samples/sec: 3700.76 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,281 epoch 76 - iter 60/105 - loss 0.08529008 - time (sec): 0.91 - samples/sec: 3901.02 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,408 epoch 76 - iter 70/105 - loss 0.08495219 - time (sec): 1.04 - samples/sec: 3976.59 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,540 epoch 76 - iter 80/105 - loss 0.08500861 - time (sec): 1.17 - samples/sec: 4053.75 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,675 epoch 76 - iter 90/105 - loss 0.08691115 - time (sec): 1.30 - samples/sec: 4117.47 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,805 epoch 76 - iter 100/105 - loss 0.08528640 - time (sec): 1.43 - samples/sec: 4157.67 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:18,870 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:18,870 EPOCH 76 done: loss 0.0831 - lr: 0.003125 +2023-05-15 21:31:19,545 DEV : loss 0.4525325298309326 - accuracy (micro avg) 0.9284 +2023-05-15 21:31:19,558 - 2 epochs without improvement +2023-05-15 21:31:19,558 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:19,708 epoch 77 - iter 10/105 - loss 0.05640535 - time (sec): 0.15 - samples/sec: 3978.59 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:19,860 epoch 77 - iter 20/105 - loss 0.07251769 - time (sec): 0.30 - samples/sec: 3821.23 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,007 epoch 77 - iter 30/105 - loss 0.05696852 - time (sec): 0.45 - samples/sec: 3803.28 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,167 epoch 77 - iter 40/105 - loss 0.06423022 - time (sec): 0.61 - samples/sec: 3810.51 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,329 epoch 77 - iter 50/105 - loss 0.06844753 - time (sec): 0.77 - samples/sec: 3821.47 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,487 epoch 77 - iter 60/105 - loss 0.07693215 - time (sec): 0.93 - samples/sec: 3874.91 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,652 epoch 77 - iter 70/105 - loss 0.07380444 - time (sec): 1.09 - samples/sec: 3829.16 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,804 epoch 77 - iter 80/105 - loss 0.07705450 - time (sec): 1.25 - samples/sec: 3827.57 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:20,970 epoch 77 - iter 90/105 - loss 0.07583402 - time (sec): 1.41 - samples/sec: 3800.11 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:21,125 epoch 77 - iter 100/105 - loss 0.07763649 - time (sec): 1.57 - samples/sec: 3779.50 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:21,203 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:21,203 EPOCH 77 done: loss 0.0758 - lr: 0.003125 +2023-05-15 21:31:21,874 DEV : loss 0.4528382122516632 - accuracy (micro avg) 0.9281 +2023-05-15 21:31:21,887 - 3 epochs without improvement +2023-05-15 21:31:21,887 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:22,038 epoch 78 - iter 10/105 - loss 0.04706085 - time (sec): 0.15 - samples/sec: 3537.22 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:22,197 epoch 78 - iter 20/105 - loss 0.06569906 - time (sec): 0.31 - samples/sec: 3743.21 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:22,343 epoch 78 - iter 30/105 - loss 0.07837444 - time (sec): 0.46 - samples/sec: 3690.38 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:22,496 epoch 78 - iter 40/105 - loss 0.07520407 - time (sec): 0.61 - samples/sec: 3718.03 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:22,654 epoch 78 - iter 50/105 - loss 0.07961023 - time (sec): 0.77 - samples/sec: 3764.60 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:22,780 epoch 78 - iter 60/105 - loss 0.08524695 - time (sec): 0.89 - samples/sec: 3911.94 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:22,912 epoch 78 - iter 70/105 - loss 0.08718972 - time (sec): 1.03 - samples/sec: 4034.00 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:23,037 epoch 78 - iter 80/105 - loss 0.08715969 - time (sec): 1.15 - samples/sec: 4124.13 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:23,158 epoch 78 - iter 90/105 - loss 0.08897209 - time (sec): 1.27 - samples/sec: 4149.10 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:23,297 epoch 78 - iter 100/105 - loss 0.09199450 - time (sec): 1.41 - samples/sec: 4206.78 - lr: 0.003125 - momentum: 0.000000 +2023-05-15 21:31:23,363 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:23,363 EPOCH 78 done: loss 0.0927 - lr: 0.003125 +2023-05-15 21:31:24,176 DEV : loss 0.45365533232688904 - accuracy (micro avg) 0.9284 +2023-05-15 21:31:24,188 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0015625] +2023-05-15 21:31:24,189 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:24,350 epoch 79 - iter 10/105 - loss 0.09398302 - time (sec): 0.16 - samples/sec: 3712.77 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:24,504 epoch 79 - iter 20/105 - loss 0.07434979 - time (sec): 0.32 - samples/sec: 3928.85 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:24,666 epoch 79 - iter 30/105 - loss 0.08088376 - time (sec): 0.48 - samples/sec: 3863.00 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:24,821 epoch 79 - iter 40/105 - loss 0.08310857 - time (sec): 0.63 - samples/sec: 3812.20 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:24,979 epoch 79 - iter 50/105 - loss 0.07883039 - time (sec): 0.79 - samples/sec: 3788.24 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:25,132 epoch 79 - iter 60/105 - loss 0.08249951 - time (sec): 0.94 - samples/sec: 3807.23 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:25,293 epoch 79 - iter 70/105 - loss 0.08213901 - time (sec): 1.10 - samples/sec: 3808.40 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:25,450 epoch 79 - iter 80/105 - loss 0.07941353 - time (sec): 1.26 - samples/sec: 3763.70 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:25,609 epoch 79 - iter 90/105 - loss 0.07842659 - time (sec): 1.42 - samples/sec: 3803.21 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:25,758 epoch 79 - iter 100/105 - loss 0.08078698 - time (sec): 1.57 - samples/sec: 3769.21 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:25,838 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:25,838 EPOCH 79 done: loss 0.0814 - lr: 0.001563 +2023-05-15 21:31:26,512 DEV : loss 0.45469412207603455 - accuracy (micro avg) 0.9287 +2023-05-15 21:31:26,525 - 1 epochs without improvement +2023-05-15 21:31:26,525 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:26,674 epoch 80 - iter 10/105 - loss 0.08687815 - time (sec): 0.15 - samples/sec: 3496.86 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:26,832 epoch 80 - iter 20/105 - loss 0.08161370 - time (sec): 0.31 - samples/sec: 3707.59 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:26,963 epoch 80 - iter 30/105 - loss 0.08213428 - time (sec): 0.44 - samples/sec: 4044.88 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,091 epoch 80 - iter 40/105 - loss 0.07656094 - time (sec): 0.57 - samples/sec: 4128.56 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,225 epoch 80 - iter 50/105 - loss 0.07999229 - time (sec): 0.70 - samples/sec: 4259.62 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,356 epoch 80 - iter 60/105 - loss 0.08157347 - time (sec): 0.83 - samples/sec: 4344.42 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,482 epoch 80 - iter 70/105 - loss 0.08251190 - time (sec): 0.96 - samples/sec: 4392.56 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,608 epoch 80 - iter 80/105 - loss 0.08859252 - time (sec): 1.08 - samples/sec: 4404.76 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,732 epoch 80 - iter 90/105 - loss 0.08405423 - time (sec): 1.21 - samples/sec: 4405.96 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,865 epoch 80 - iter 100/105 - loss 0.08321391 - time (sec): 1.34 - samples/sec: 4431.27 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:27,928 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:27,928 EPOCH 80 done: loss 0.0847 - lr: 0.001563 +2023-05-15 21:31:28,600 DEV : loss 0.45543888211250305 - accuracy (micro avg) 0.9281 +2023-05-15 21:31:28,612 - 2 epochs without improvement +2023-05-15 21:31:28,612 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:28,780 epoch 81 - iter 10/105 - loss 0.09104405 - time (sec): 0.17 - samples/sec: 3878.73 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:28,939 epoch 81 - iter 20/105 - loss 0.09779127 - time (sec): 0.33 - samples/sec: 3779.11 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,068 epoch 81 - iter 30/105 - loss 0.08391870 - time (sec): 0.46 - samples/sec: 3956.76 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,198 epoch 81 - iter 40/105 - loss 0.08020966 - time (sec): 0.59 - samples/sec: 4117.64 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,324 epoch 81 - iter 50/105 - loss 0.07895226 - time (sec): 0.71 - samples/sec: 4247.92 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,454 epoch 81 - iter 60/105 - loss 0.07539052 - time (sec): 0.84 - samples/sec: 4255.05 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,574 epoch 81 - iter 70/105 - loss 0.07256490 - time (sec): 0.96 - samples/sec: 4249.92 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,707 epoch 81 - iter 80/105 - loss 0.07126569 - time (sec): 1.10 - samples/sec: 4375.21 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,832 epoch 81 - iter 90/105 - loss 0.07536926 - time (sec): 1.22 - samples/sec: 4392.35 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:29,958 epoch 81 - iter 100/105 - loss 0.07462281 - time (sec): 1.35 - samples/sec: 4402.65 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:30,028 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:30,028 EPOCH 81 done: loss 0.0738 - lr: 0.001563 +2023-05-15 21:31:30,834 DEV : loss 0.45619630813598633 - accuracy (micro avg) 0.9281 +2023-05-15 21:31:30,846 - 3 epochs without improvement +2023-05-15 21:31:30,846 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:31,008 epoch 82 - iter 10/105 - loss 0.12525001 - time (sec): 0.16 - samples/sec: 3724.13 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,171 epoch 82 - iter 20/105 - loss 0.11689806 - time (sec): 0.32 - samples/sec: 3620.25 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,299 epoch 82 - iter 30/105 - loss 0.10251425 - time (sec): 0.45 - samples/sec: 3802.14 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,419 epoch 82 - iter 40/105 - loss 0.09642245 - time (sec): 0.57 - samples/sec: 3950.44 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,549 epoch 82 - iter 50/105 - loss 0.09190527 - time (sec): 0.70 - samples/sec: 4075.66 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,674 epoch 82 - iter 60/105 - loss 0.09405102 - time (sec): 0.83 - samples/sec: 4142.18 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,804 epoch 82 - iter 70/105 - loss 0.08565949 - time (sec): 0.96 - samples/sec: 4217.21 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:31,929 epoch 82 - iter 80/105 - loss 0.08256859 - time (sec): 1.08 - samples/sec: 4213.75 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:32,059 epoch 82 - iter 90/105 - loss 0.08123900 - time (sec): 1.21 - samples/sec: 4286.50 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:32,194 epoch 82 - iter 100/105 - loss 0.07714071 - time (sec): 1.35 - samples/sec: 4364.26 - lr: 0.001563 - momentum: 0.000000 +2023-05-15 21:31:32,267 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:32,267 EPOCH 82 done: loss 0.0819 - lr: 0.001563 +2023-05-15 21:31:32,940 DEV : loss 0.45649954676628113 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:32,953 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00078125] +2023-05-15 21:31:32,953 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:33,111 epoch 83 - iter 10/105 - loss 0.06625568 - time (sec): 0.16 - samples/sec: 3815.44 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:33,267 epoch 83 - iter 20/105 - loss 0.06653766 - time (sec): 0.31 - samples/sec: 3872.72 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:33,428 epoch 83 - iter 30/105 - loss 0.07237229 - time (sec): 0.47 - samples/sec: 3838.24 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:33,580 epoch 83 - iter 40/105 - loss 0.06671547 - time (sec): 0.63 - samples/sec: 3708.51 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:33,735 epoch 83 - iter 50/105 - loss 0.06586352 - time (sec): 0.78 - samples/sec: 3696.60 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:33,890 epoch 83 - iter 60/105 - loss 0.06541506 - time (sec): 0.94 - samples/sec: 3706.22 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:34,059 epoch 83 - iter 70/105 - loss 0.06899117 - time (sec): 1.11 - samples/sec: 3742.73 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:34,217 epoch 83 - iter 80/105 - loss 0.06740104 - time (sec): 1.26 - samples/sec: 3707.03 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:34,373 epoch 83 - iter 90/105 - loss 0.06348514 - time (sec): 1.42 - samples/sec: 3712.48 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:34,514 epoch 83 - iter 100/105 - loss 0.06313693 - time (sec): 1.56 - samples/sec: 3794.15 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:34,582 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:34,582 EPOCH 83 done: loss 0.0646 - lr: 0.000781 +2023-05-15 21:31:35,256 DEV : loss 0.4569226801395416 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:35,268 - 1 epochs without improvement +2023-05-15 21:31:35,268 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:35,425 epoch 84 - iter 10/105 - loss 0.05634396 - time (sec): 0.16 - samples/sec: 3795.45 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:35,589 epoch 84 - iter 20/105 - loss 0.05880624 - time (sec): 0.32 - samples/sec: 3615.23 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:35,746 epoch 84 - iter 30/105 - loss 0.05553765 - time (sec): 0.48 - samples/sec: 3651.73 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:35,898 epoch 84 - iter 40/105 - loss 0.05421690 - time (sec): 0.63 - samples/sec: 3608.96 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,051 epoch 84 - iter 50/105 - loss 0.06769443 - time (sec): 0.78 - samples/sec: 3651.20 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,183 epoch 84 - iter 60/105 - loss 0.06904524 - time (sec): 0.91 - samples/sec: 3815.75 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,307 epoch 84 - iter 70/105 - loss 0.06988047 - time (sec): 1.04 - samples/sec: 3923.91 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,438 epoch 84 - iter 80/105 - loss 0.06880722 - time (sec): 1.17 - samples/sec: 4015.36 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,567 epoch 84 - iter 90/105 - loss 0.06763182 - time (sec): 1.30 - samples/sec: 4079.92 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,694 epoch 84 - iter 100/105 - loss 0.06676447 - time (sec): 1.43 - samples/sec: 4148.14 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:36,762 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:36,762 EPOCH 84 done: loss 0.0678 - lr: 0.000781 +2023-05-15 21:31:37,575 DEV : loss 0.4565540850162506 - accuracy (micro avg) 0.9281 +2023-05-15 21:31:37,587 - 2 epochs without improvement +2023-05-15 21:31:37,587 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:37,757 epoch 85 - iter 10/105 - loss 0.06256486 - time (sec): 0.17 - samples/sec: 3677.85 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:37,914 epoch 85 - iter 20/105 - loss 0.07293128 - time (sec): 0.33 - samples/sec: 3592.56 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:38,066 epoch 85 - iter 30/105 - loss 0.06617007 - time (sec): 0.48 - samples/sec: 3619.65 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:38,222 epoch 85 - iter 40/105 - loss 0.07255578 - time (sec): 0.63 - samples/sec: 3677.53 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:38,384 epoch 85 - iter 50/105 - loss 0.06989969 - time (sec): 0.80 - samples/sec: 3776.48 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:38,530 epoch 85 - iter 60/105 - loss 0.06554395 - time (sec): 0.94 - samples/sec: 3740.88 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:38,688 epoch 85 - iter 70/105 - loss 0.06226723 - time (sec): 1.10 - samples/sec: 3780.25 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:38,847 epoch 85 - iter 80/105 - loss 0.06101403 - time (sec): 1.26 - samples/sec: 3820.26 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:39,005 epoch 85 - iter 90/105 - loss 0.06516340 - time (sec): 1.42 - samples/sec: 3797.73 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:39,159 epoch 85 - iter 100/105 - loss 0.06652169 - time (sec): 1.57 - samples/sec: 3761.44 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:39,231 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:39,231 EPOCH 85 done: loss 0.0670 - lr: 0.000781 +2023-05-15 21:31:39,903 DEV : loss 0.45625191926956177 - accuracy (micro avg) 0.9284 +2023-05-15 21:31:39,916 - 3 epochs without improvement +2023-05-15 21:31:39,916 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:40,078 epoch 86 - iter 10/105 - loss 0.04161077 - time (sec): 0.16 - samples/sec: 3363.93 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:40,251 epoch 86 - iter 20/105 - loss 0.05284345 - time (sec): 0.33 - samples/sec: 3447.78 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:40,405 epoch 86 - iter 30/105 - loss 0.05720211 - time (sec): 0.49 - samples/sec: 3544.07 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:40,556 epoch 86 - iter 40/105 - loss 0.05855859 - time (sec): 0.64 - samples/sec: 3620.52 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:40,723 epoch 86 - iter 50/105 - loss 0.06731156 - time (sec): 0.81 - samples/sec: 3633.73 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:40,892 epoch 86 - iter 60/105 - loss 0.06409584 - time (sec): 0.98 - samples/sec: 3624.43 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:41,050 epoch 86 - iter 70/105 - loss 0.06448159 - time (sec): 1.13 - samples/sec: 3610.54 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:41,201 epoch 86 - iter 80/105 - loss 0.06684483 - time (sec): 1.28 - samples/sec: 3617.48 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:41,373 epoch 86 - iter 90/105 - loss 0.07065451 - time (sec): 1.46 - samples/sec: 3609.49 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:41,539 epoch 86 - iter 100/105 - loss 0.07105019 - time (sec): 1.62 - samples/sec: 3651.39 - lr: 0.000781 - momentum: 0.000000 +2023-05-15 21:31:41,623 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:41,623 EPOCH 86 done: loss 0.0707 - lr: 0.000781 +2023-05-15 21:31:42,293 DEV : loss 0.4558914303779602 - accuracy (micro avg) 0.9285 +2023-05-15 21:31:42,305 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000390625] +2023-05-15 21:31:42,305 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:42,466 epoch 87 - iter 10/105 - loss 0.04982021 - time (sec): 0.16 - samples/sec: 3886.65 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:42,623 epoch 87 - iter 20/105 - loss 0.08063016 - time (sec): 0.32 - samples/sec: 3594.55 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:42,767 epoch 87 - iter 30/105 - loss 0.07532039 - time (sec): 0.46 - samples/sec: 3603.82 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:42,937 epoch 87 - iter 40/105 - loss 0.07826631 - time (sec): 0.63 - samples/sec: 3606.43 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,105 epoch 87 - iter 50/105 - loss 0.07813172 - time (sec): 0.80 - samples/sec: 3646.58 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,268 epoch 87 - iter 60/105 - loss 0.08340842 - time (sec): 0.96 - samples/sec: 3654.19 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,423 epoch 87 - iter 70/105 - loss 0.07727099 - time (sec): 1.12 - samples/sec: 3714.27 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,575 epoch 87 - iter 80/105 - loss 0.07791657 - time (sec): 1.27 - samples/sec: 3727.90 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,735 epoch 87 - iter 90/105 - loss 0.07602732 - time (sec): 1.43 - samples/sec: 3703.95 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,871 epoch 87 - iter 100/105 - loss 0.07647325 - time (sec): 1.57 - samples/sec: 3788.99 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:43,937 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:43,937 EPOCH 87 done: loss 0.0766 - lr: 0.000391 +2023-05-15 21:31:44,741 DEV : loss 0.45574912428855896 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:44,753 - 1 epochs without improvement +2023-05-15 21:31:44,754 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:44,913 epoch 88 - iter 10/105 - loss 0.07622109 - time (sec): 0.16 - samples/sec: 3928.45 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,073 epoch 88 - iter 20/105 - loss 0.08673939 - time (sec): 0.32 - samples/sec: 3721.81 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,241 epoch 88 - iter 30/105 - loss 0.09478922 - time (sec): 0.49 - samples/sec: 3719.78 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,385 epoch 88 - iter 40/105 - loss 0.09433885 - time (sec): 0.63 - samples/sec: 3828.89 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,515 epoch 88 - iter 50/105 - loss 0.09348278 - time (sec): 0.76 - samples/sec: 3975.06 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,641 epoch 88 - iter 60/105 - loss 0.08843286 - time (sec): 0.89 - samples/sec: 4030.94 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,767 epoch 88 - iter 70/105 - loss 0.08531614 - time (sec): 1.01 - samples/sec: 4080.20 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:45,901 epoch 88 - iter 80/105 - loss 0.08199163 - time (sec): 1.15 - samples/sec: 4150.58 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:46,028 epoch 88 - iter 90/105 - loss 0.08255142 - time (sec): 1.27 - samples/sec: 4176.91 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:46,154 epoch 88 - iter 100/105 - loss 0.07994260 - time (sec): 1.40 - samples/sec: 4233.18 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:46,223 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:46,223 EPOCH 88 done: loss 0.0802 - lr: 0.000391 +2023-05-15 21:31:46,894 DEV : loss 0.4561077356338501 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:46,906 - 2 epochs without improvement +2023-05-15 21:31:46,906 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:47,070 epoch 89 - iter 10/105 - loss 0.07080775 - time (sec): 0.16 - samples/sec: 3693.68 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:47,223 epoch 89 - iter 20/105 - loss 0.07455936 - time (sec): 0.32 - samples/sec: 3613.52 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:47,394 epoch 89 - iter 30/105 - loss 0.07381745 - time (sec): 0.49 - samples/sec: 3669.90 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:47,523 epoch 89 - iter 40/105 - loss 0.08314483 - time (sec): 0.62 - samples/sec: 3816.89 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:47,649 epoch 89 - iter 50/105 - loss 0.07774133 - time (sec): 0.74 - samples/sec: 4036.84 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:47,777 epoch 89 - iter 60/105 - loss 0.07910999 - time (sec): 0.87 - samples/sec: 4118.80 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:47,901 epoch 89 - iter 70/105 - loss 0.07932378 - time (sec): 0.99 - samples/sec: 4202.82 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:48,033 epoch 89 - iter 80/105 - loss 0.07679682 - time (sec): 1.13 - samples/sec: 4251.01 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:48,164 epoch 89 - iter 90/105 - loss 0.07742812 - time (sec): 1.26 - samples/sec: 4295.76 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:48,289 epoch 89 - iter 100/105 - loss 0.07785464 - time (sec): 1.38 - samples/sec: 4305.90 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:48,357 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:48,357 EPOCH 89 done: loss 0.0786 - lr: 0.000391 +2023-05-15 21:31:49,028 DEV : loss 0.45609503984451294 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:49,040 - 3 epochs without improvement +2023-05-15 21:31:49,040 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:49,204 epoch 90 - iter 10/105 - loss 0.05868137 - time (sec): 0.16 - samples/sec: 3826.04 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:49,349 epoch 90 - iter 20/105 - loss 0.09696209 - time (sec): 0.31 - samples/sec: 3764.51 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:49,507 epoch 90 - iter 30/105 - loss 0.09615798 - time (sec): 0.47 - samples/sec: 3705.81 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:49,667 epoch 90 - iter 40/105 - loss 0.08374592 - time (sec): 0.63 - samples/sec: 3715.04 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:49,823 epoch 90 - iter 50/105 - loss 0.07703084 - time (sec): 0.78 - samples/sec: 3662.11 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:49,978 epoch 90 - iter 60/105 - loss 0.08614811 - time (sec): 0.94 - samples/sec: 3732.03 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:50,131 epoch 90 - iter 70/105 - loss 0.09328989 - time (sec): 1.09 - samples/sec: 3688.87 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:50,280 epoch 90 - iter 80/105 - loss 0.08829302 - time (sec): 1.24 - samples/sec: 3726.86 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:50,418 epoch 90 - iter 90/105 - loss 0.08347571 - time (sec): 1.38 - samples/sec: 3848.06 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:50,553 epoch 90 - iter 100/105 - loss 0.08183453 - time (sec): 1.51 - samples/sec: 3907.42 - lr: 0.000391 - momentum: 0.000000 +2023-05-15 21:31:50,620 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:50,620 EPOCH 90 done: loss 0.0803 - lr: 0.000391 +2023-05-15 21:31:51,291 DEV : loss 0.4561174511909485 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:51,304 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0001953125] +2023-05-15 21:31:51,304 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:51,463 epoch 91 - iter 10/105 - loss 0.10163174 - time (sec): 0.16 - samples/sec: 3736.41 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:51,612 epoch 91 - iter 20/105 - loss 0.08917681 - time (sec): 0.31 - samples/sec: 3612.29 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:51,768 epoch 91 - iter 30/105 - loss 0.07233568 - time (sec): 0.46 - samples/sec: 3660.52 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:51,923 epoch 91 - iter 40/105 - loss 0.07080886 - time (sec): 0.62 - samples/sec: 3707.63 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,087 epoch 91 - iter 50/105 - loss 0.06833288 - time (sec): 0.78 - samples/sec: 3758.81 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,237 epoch 91 - iter 60/105 - loss 0.06462794 - time (sec): 0.93 - samples/sec: 3804.06 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,391 epoch 91 - iter 70/105 - loss 0.06432342 - time (sec): 1.09 - samples/sec: 3788.99 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,524 epoch 91 - iter 80/105 - loss 0.06133304 - time (sec): 1.22 - samples/sec: 3898.58 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,648 epoch 91 - iter 90/105 - loss 0.06374294 - time (sec): 1.34 - samples/sec: 3970.37 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,775 epoch 91 - iter 100/105 - loss 0.06553967 - time (sec): 1.47 - samples/sec: 4014.74 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:52,842 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:52,842 EPOCH 91 done: loss 0.0674 - lr: 0.000195 +2023-05-15 21:31:53,652 DEV : loss 0.4560907185077667 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:53,664 - 1 epochs without improvement +2023-05-15 21:31:53,664 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:53,830 epoch 92 - iter 10/105 - loss 0.09787210 - time (sec): 0.17 - samples/sec: 3797.34 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:53,989 epoch 92 - iter 20/105 - loss 0.08095827 - time (sec): 0.33 - samples/sec: 3682.45 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,141 epoch 92 - iter 30/105 - loss 0.07818815 - time (sec): 0.48 - samples/sec: 3688.05 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,293 epoch 92 - iter 40/105 - loss 0.07613048 - time (sec): 0.63 - samples/sec: 3823.41 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,420 epoch 92 - iter 50/105 - loss 0.07859168 - time (sec): 0.76 - samples/sec: 3881.09 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,550 epoch 92 - iter 60/105 - loss 0.07787762 - time (sec): 0.89 - samples/sec: 3994.22 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,676 epoch 92 - iter 70/105 - loss 0.08210023 - time (sec): 1.01 - samples/sec: 4065.12 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,807 epoch 92 - iter 80/105 - loss 0.08262567 - time (sec): 1.14 - samples/sec: 4158.56 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:54,932 epoch 92 - iter 90/105 - loss 0.08175313 - time (sec): 1.27 - samples/sec: 4191.78 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:55,059 epoch 92 - iter 100/105 - loss 0.08208601 - time (sec): 1.39 - samples/sec: 4205.72 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:55,130 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:55,131 EPOCH 92 done: loss 0.0814 - lr: 0.000195 +2023-05-15 21:31:55,801 DEV : loss 0.45620062947273254 - accuracy (micro avg) 0.9282 +2023-05-15 21:31:55,813 - 2 epochs without improvement +2023-05-15 21:31:55,813 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:55,966 epoch 93 - iter 10/105 - loss 0.08338736 - time (sec): 0.15 - samples/sec: 3324.33 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,116 epoch 93 - iter 20/105 - loss 0.09952859 - time (sec): 0.30 - samples/sec: 3364.67 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,272 epoch 93 - iter 30/105 - loss 0.09269119 - time (sec): 0.46 - samples/sec: 3448.47 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,428 epoch 93 - iter 40/105 - loss 0.08263464 - time (sec): 0.61 - samples/sec: 3554.14 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,563 epoch 93 - iter 50/105 - loss 0.07815594 - time (sec): 0.75 - samples/sec: 3720.25 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,687 epoch 93 - iter 60/105 - loss 0.07279853 - time (sec): 0.87 - samples/sec: 3844.56 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,813 epoch 93 - iter 70/105 - loss 0.06974049 - time (sec): 1.00 - samples/sec: 3966.49 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:56,940 epoch 93 - iter 80/105 - loss 0.07093432 - time (sec): 1.13 - samples/sec: 4056.96 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:57,079 epoch 93 - iter 90/105 - loss 0.06798311 - time (sec): 1.27 - samples/sec: 4180.91 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:57,209 epoch 93 - iter 100/105 - loss 0.06784165 - time (sec): 1.40 - samples/sec: 4266.46 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:57,274 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:57,274 EPOCH 93 done: loss 0.0690 - lr: 0.000195 +2023-05-15 21:31:57,945 DEV : loss 0.45623788237571716 - accuracy (micro avg) 0.9281 +2023-05-15 21:31:57,957 - 3 epochs without improvement +2023-05-15 21:31:57,957 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:58,125 epoch 94 - iter 10/105 - loss 0.08754979 - time (sec): 0.17 - samples/sec: 3932.26 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:58,285 epoch 94 - iter 20/105 - loss 0.08029897 - time (sec): 0.33 - samples/sec: 3869.40 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:58,460 epoch 94 - iter 30/105 - loss 0.08021386 - time (sec): 0.50 - samples/sec: 3876.31 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:58,622 epoch 94 - iter 40/105 - loss 0.08244415 - time (sec): 0.66 - samples/sec: 3822.84 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:58,780 epoch 94 - iter 50/105 - loss 0.08622380 - time (sec): 0.82 - samples/sec: 3797.24 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:58,933 epoch 94 - iter 60/105 - loss 0.08088922 - time (sec): 0.98 - samples/sec: 3785.69 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:59,077 epoch 94 - iter 70/105 - loss 0.07714372 - time (sec): 1.12 - samples/sec: 3802.97 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:59,229 epoch 94 - iter 80/105 - loss 0.07837018 - time (sec): 1.27 - samples/sec: 3800.98 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:59,382 epoch 94 - iter 90/105 - loss 0.07799857 - time (sec): 1.42 - samples/sec: 3792.89 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:59,534 epoch 94 - iter 100/105 - loss 0.07581181 - time (sec): 1.58 - samples/sec: 3812.30 - lr: 0.000195 - momentum: 0.000000 +2023-05-15 21:31:59,598 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:31:59,598 EPOCH 94 done: loss 0.0771 - lr: 0.000195 +2023-05-15 21:32:00,407 DEV : loss 0.45624786615371704 - accuracy (micro avg) 0.9282 +2023-05-15 21:32:00,419 - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [9.765625e-05] +2023-05-15 21:32:00,419 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:32:00,419 learning rate too small - quitting training! +2023-05-15 21:32:00,419 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:32:00,419 Saving model ... +2023-05-15 21:32:01,555 Done. +2023-05-15 21:32:01,555 ---------------------------------------------------------------------------------------------------- +2023-05-15 21:32:01,555 Loading model from best epoch ... +2023-05-15 21:32:03,450 SequenceTagger predicts: Dictionary with 71 tags: NN, $., NE, ADV, APPR, ART, VVFIN, PPER, ADDRESS, $(, ADJA, URL, VAFIN, ADJD, $,, HASH, KON, CARD, VVINF, APPRART, VVPP, EMO, VMFIN, PIS, PTKNEG, PDS, KOUS, PPOSAT, PTKVZ, PIAT, PRF, XYB, ITJ, PWAV, FM, PROAV, PWS, XY, PRELS, VAINF, VVIMP, PDAT, KOKOM, PTKZU, PTKANT, PAUSE, VVFIN_ES, PTKA, VVIZU, NINFL +2023-05-15 21:32:05,113 +Results: +- F-score (micro) 0.9316 +- F-score (macro) 0.6573 +- Accuracy 0.9316 + +By class: + precision recall f1-score support + + NN 0.9181 0.9339 0.9260 1165 + $. 0.9812 0.9946 0.9879 736 + ADV 0.9162 0.9307 0.9234 505 + NE 0.8333 0.8385 0.8359 483 + APPR 0.9582 0.9750 0.9665 400 + ART 0.9667 0.9915 0.9789 351 + VVFIN 0.9453 0.9333 0.9393 315 + PPER 0.9542 0.9927 0.9731 273 + $( 0.9808 0.9623 0.9714 265 + ADDRESS 0.9274 0.9914 0.9583 232 + VAFIN 0.9775 0.9864 0.9819 220 + URL 0.9910 1.0000 0.9955 220 + ADJA 0.9312 0.9355 0.9333 217 + ADJD 0.9149 0.7818 0.8431 220 + $, 1.0000 1.0000 1.0000 198 + HASH 0.9184 0.9507 0.9343 142 + KON 0.9568 0.9433 0.9500 141 + VVINF 0.8125 0.9100 0.8585 100 + CARD 0.9623 0.9808 0.9714 104 + VVPP 0.8725 0.9368 0.9036 95 + APPRART 1.0000 1.0000 1.0000 97 + EMO 0.8812 0.9780 0.9271 91 + VMFIN 0.8481 0.9710 0.9054 69 + PIS 0.8714 0.9104 0.8905 67 + PDS 0.9385 0.8971 0.9173 68 + PTKNEG 1.0000 1.0000 1.0000 58 + PPOSAT 1.0000 0.9636 0.9815 55 + KOUS 0.9375 0.9184 0.9278 49 + PTKVZ 0.7778 0.7143 0.7447 49 + PIAT 0.9000 0.8780 0.8889 41 + ITJ 0.7000 0.5526 0.6176 38 + PWAV 0.9062 0.9667 0.9355 30 + PROAV 0.7931 0.7667 0.7797 30 + PRF 0.9545 0.6562 0.7778 32 + XYB 0.9600 0.8889 0.9231 27 + PWS 1.0000 0.8148 0.8980 27 + VAINF 1.0000 0.9524 0.9756 21 + PDAT 0.9048 0.9500 0.9268 20 + FM 1.0000 0.6400 0.7805 25 + XY 0.7778 0.2500 0.3784 28 + PTKANT 0.8947 0.9444 0.9189 18 + PTKZU 0.9286 0.9286 0.9286 14 + PRELS 0.5714 0.7273 0.6400 11 + KOKOM 0.8333 0.8333 0.8333 12 + VVFIN_ES 1.0000 0.7000 0.8235 10 + PAUSE 0.4000 0.2500 0.3077 8 + VVIMP 0.7500 0.3333 0.4615 9 + PTKA 0.5000 0.1429 0.2222 7 + PWAT 1.0000 1.0000 1.0000 4 + PTKREZ 0.6667 0.5000 0.5714 4 + VVIZU 0.0000 0.0000 0.0000 4 + PTKPAU 0.5000 1.0000 0.6667 1 + NINFL 0.0000 0.0000 0.0000 3 + VAFIN_ES 0.0000 0.0000 0.0000 2 + VMINF 0.0000 0.0000 0.0000 2 + KOUS_ES 0.0000 0.0000 0.0000 1 + VAPP 0.0000 0.0000 0.0000 1 + XYU 0.0000 0.0000 0.0000 1 + PIS_PPER 0.0000 0.0000 0.0000 1 + KOUI 0.0000 0.0000 0.0000 1 + VAFIN_PPER 0.0000 0.0000 0.0000 1 + TRUNC 0.0000 0.0000 0.0000 1 + VVFIN_DU 0.0000 0.0000 0.0000 1 + PTKONO 0.0000 0.0000 0.0000 1 + PTKQU 0.0000 0.0000 0.0000 1 + PTK 0.0000 0.0000 0.0000 0 + + accuracy 0.9316 7423 + macro avg 0.6805 0.6515 0.6573 7423 +weighted avg 0.9284 0.9316 0.9285 7423 + +2023-05-15 21:32:05,113 ----------------------------------------------------------------------------------------------------