################################### TRAIN_CONFIG ################################### dataset_dir: ./Audio_XenoCanto labels_list: ./xeno_labels.csv model_name: BirdAST_SeqPool_GroupKFold backbone_name: MIT/ast-finetuned-audioset-10-10-0.4593 n_classes: 728 audio_sr: 16000 segment_length: 10 fft_window: 0.025 hop_window_length: 0.01 n_mels: 128 low_cut: 1000 high_cut: 8000 top_db: 100 batch_size: 16 num_workers: 0 n_splits: 5 log_dir: ./training_logs max_lr: 1e-05 epochs: 15 weight_decay: 0.01 lr_final_div: 1000 amp: True grad_accum_steps: 1 max_grad_norm: 10000000.0 print_epoch_freq: 1 print_freq: 500 random_seed: 2046 copy: )> ################################################################################ Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving. Epoch 1 [0/559] | Train Loss: 0.3882 Grad: 59133.6680 LR: 4.0003e-07 | Elapse: 4.16s Epoch 1 [500/559] | Train Loss: 0.1746 Grad: 10785.3711 LR: 6.6576e-06 | Elapse: 135.75s Epoch 1 [558/559] | Train Loss: 0.1640 Grad: 28186.2715 LR: 7.6104e-06 | Elapse: 150.05s Epoch 1 [0/140] | Valid Loss: 0.0902 | Elapse: 0.51s Epoch 1 [139/140] | Valid Loss: 0.1603 | Elapse: 35.22s Epoch 1 - Train Loss: 0.1640 - Valid Loss: 0.5077 - Elapsed Time: 203.18s - Epoch 1: Best model found with loss = 0.5077. Epoch 2 [0/559] | Train Loss: 0.3939 Grad: 62395.7148 LR: 7.6259e-06 | Elapse: 0.26s Epoch 2 [500/559] | Train Loss: 0.1721 Grad: 14891.8457 LR: 9.9786e-06 | Elapse: 126.82s Epoch 2 [558/559] | Train Loss: 0.1611 Grad: 39606.0000 LR: 9.9660e-06 | Elapse: 140.96s Epoch 2 [0/140] | Valid Loss: 0.0903 | Elapse: 0.26s Epoch 2 [139/140] | Valid Loss: 0.1686 | Elapse: 33.93s Epoch 2 - Train Loss: 0.1611 - Valid Loss: 0.5473 - Elapsed Time: 193.32s - Epoch 2: Best model found with loss = 0.5473. Epoch 3 [0/559] | Train Loss: 0.3536 Grad: 109698.6953 LR: 9.9657e-06 | Elapse: 0.27s Epoch 3 [500/559] | Train Loss: 0.1534 Grad: 24832.7734 LR: 9.7377e-06 | Elapse: 126.60s Epoch 3 [558/559] | Train Loss: 0.1428 Grad: 44304.6953 LR: 9.6978e-06 | Elapse: 140.57s Epoch 3 [0/140] | Valid Loss: 0.0862 | Elapse: 0.24s Epoch 3 [139/140] | Valid Loss: 0.1822 | Elapse: 33.67s Epoch 3 - Train Loss: 0.1428 - Valid Loss: 0.5807 - Elapsed Time: 192.39s - Epoch 3: Best model found with loss = 0.5807. Epoch 4 [0/559] | Train Loss: 0.2503 Grad: 159404.0781 LR: 9.6970e-06 | Elapse: 0.34s Epoch 4 [500/559] | Train Loss: 0.1188 Grad: 29168.4512 LR: 9.2415e-06 | Elapse: 126.05s Epoch 4 [558/559] | Train Loss: 0.1104 Grad: 43132.2422 LR: 9.1763e-06 | Elapse: 139.80s Epoch 4 [0/140] | Valid Loss: 0.0805 | Elapse: 0.24s Epoch 4 [139/140] | Valid Loss: 0.1919 | Elapse: 33.40s Epoch 4 - Train Loss: 0.1104 - Valid Loss: 0.6077 - Elapsed Time: 191.29s - Epoch 4: Best model found with loss = 0.6077. Epoch 5 [0/559] | Train Loss: 0.1533 Grad: 181679.0000 LR: 9.1752e-06 | Elapse: 0.26s Epoch 5 [500/559] | Train Loss: 0.0870 Grad: 28622.9746 LR: 8.5166e-06 | Elapse: 129.63s Epoch 5 [558/559] | Train Loss: 0.0807 Grad: 41173.3281 LR: 8.4298e-06 | Elapse: 143.63s Epoch 5 [0/140] | Valid Loss: 0.0762 | Elapse: 0.25s Epoch 5 [139/140] | Valid Loss: 0.2010 | Elapse: 33.42s Epoch 5 - Train Loss: 0.0807 - Valid Loss: 0.6217 - Elapsed Time: 194.94s - Epoch 5: Best model found with loss = 0.6217. Epoch 6 [0/559] | Train Loss: 0.0727 Grad: 145376.2812 LR: 8.4282e-06 | Elapse: 0.26s Epoch 6 [500/559] | Train Loss: 0.0636 Grad: 26931.7656 LR: 7.6021e-06 | Elapse: 125.84s Epoch 6 [558/559] | Train Loss: 0.0591 Grad: 40624.6094 LR: 7.4983e-06 | Elapse: 139.94s Epoch 6 [0/140] | Valid Loss: 0.0718 | Elapse: 0.25s Epoch 6 [139/140] | Valid Loss: 0.2069 | Elapse: 33.78s Epoch 6 - Train Loss: 0.0591 - Valid Loss: 0.6402 - Elapsed Time: 191.76s - Epoch 6: Best model found with loss = 0.6402. Epoch 7 [0/559] | Train Loss: 0.0291 Grad: 75963.4531 LR: 7.4965e-06 | Elapse: 0.26s Epoch 7 [500/559] | Train Loss: 0.0454 Grad: 25041.2656 LR: 6.5474e-06 | Elapse: 125.73s Epoch 7 [558/559] | Train Loss: 0.0425 Grad: 39131.4258 LR: 6.4322e-06 | Elapse: 139.58s Epoch 7 [0/140] | Valid Loss: 0.0673 | Elapse: 0.25s Epoch 7 [139/140] | Valid Loss: 0.2128 | Elapse: 33.91s Epoch 7 - Train Loss: 0.0425 - Valid Loss: 0.6534 - Elapsed Time: 191.57s - Epoch 7: Best model found with loss = 0.6534. Epoch 8 [0/559] | Train Loss: 0.0193 Grad: 55918.3516 LR: 6.4302e-06 | Elapse: 0.26s Epoch 8 [500/559] | Train Loss: 0.0340 Grad: 25086.2227 LR: 5.4093e-06 | Elapse: 125.92s Epoch 8 [558/559] | Train Loss: 0.0320 Grad: 36927.0547 LR: 5.2888e-06 | Elapse: 139.95s Epoch 8 [0/140] | Valid Loss: 0.0642 | Elapse: 0.25s Epoch 8 [139/140] | Valid Loss: 0.2160 | Elapse: 33.58s Epoch 8 - Train Loss: 0.0320 - Valid Loss: 0.6616 - Elapsed Time: 191.96s - Epoch 8: Best model found with loss = 0.6616. Epoch 9 [0/559] | Train Loss: 0.0134 Grad: 38488.9297 LR: 5.2868e-06 | Elapse: 0.26s Epoch 9 [500/559] | Train Loss: 0.0270 Grad: 23536.4590 LR: 4.2491e-06 | Elapse: 125.70s Epoch 9 [558/559] | Train Loss: 0.0254 Grad: 34283.2383 LR: 4.1299e-06 | Elapse: 139.31s Epoch 9 [0/140] | Valid Loss: 0.0611 | Elapse: 0.24s Epoch 9 [139/140] | Valid Loss: 0.2193 | Elapse: 33.43s Epoch 9 - Train Loss: 0.0254 - Valid Loss: 0.6674 - Elapsed Time: 191.60s - Epoch 9: Best model found with loss = 0.6674. Epoch 10 [0/559] | Train Loss: 0.0113 Grad: 33469.5664 LR: 4.1279e-06 | Elapse: 0.25s Epoch 10 [500/559] | Train Loss: 0.0229 Grad: 24498.5020 LR: 3.1294e-06 | Elapse: 125.69s Epoch 10 [558/559] | Train Loss: 0.0217 Grad: 29899.7441 LR: 3.0180e-06 | Elapse: 139.69s Epoch 10 [0/140] | Valid Loss: 0.0562 | Elapse: 0.24s Epoch 10 [139/140] | Valid Loss: 0.2224 | Elapse: 33.32s Epoch 10 - Train Loss: 0.0217 - Valid Loss: 0.6696 - Elapsed Time: 191.04s - Epoch 10: Best model found with loss = 0.6696. Epoch 11 [0/559] | Train Loss: 0.0110 Grad: 33819.4648 LR: 3.0161e-06 | Elapse: 0.26s Epoch 11 [500/559] | Train Loss: 0.0206 Grad: 18584.9199 LR: 2.1105e-06 | Elapse: 125.26s Epoch 11 [558/559] | Train Loss: 0.0196 Grad: 29235.0020 LR: 2.0129e-06 | Elapse: 139.49s Epoch 11 [0/140] | Valid Loss: 0.0517 | Elapse: 0.26s Epoch 11 [139/140] | Valid Loss: 0.2231 | Elapse: 33.66s Epoch 11 - Train Loss: 0.0196 - Valid Loss: 0.6783 - Elapsed Time: 191.75s - Epoch 11: Best model found with loss = 0.6783. Epoch 12 [0/559] | Train Loss: 0.0097 Grad: 28240.1406 LR: 2.0112e-06 | Elapse: 0.25s Epoch 12 [500/559] | Train Loss: 0.0200 Grad: 18508.3789 LR: 1.2475e-06 | Elapse: 125.82s Epoch 12 [558/559] | Train Loss: 0.0190 Grad: 28309.4277 LR: 1.1688e-06 | Elapse: 139.62s Epoch 12 [0/140] | Valid Loss: 0.0508 | Elapse: 0.25s Epoch 12 [139/140] | Valid Loss: 0.2262 | Elapse: 33.76s Epoch 12 - Train Loss: 0.0190 - Valid Loss: 0.6871 - Elapsed Time: 191.67s - Epoch 12: Best model found with loss = 0.6871. Epoch 13 [0/559] | Train Loss: 0.0115 Grad: 45351.9102 LR: 1.1675e-06 | Elapse: 0.26s Epoch 13 [500/559] | Train Loss: 0.0211 Grad: 23046.5820 LR: 5.8671e-07 | Elapse: 126.02s Epoch 13 [558/559] | Train Loss: 0.0201 Grad: 33189.5586 LR: 5.3128e-07 | Elapse: 140.10s Epoch 13 [0/140] | Valid Loss: 0.0486 | Elapse: 0.24s Epoch 13 [139/140] | Valid Loss: 0.2286 | Elapse: 33.58s Epoch 13 - Train Loss: 0.0201 - Valid Loss: 0.6849 - Elapsed Time: 191.60s Epoch 14 [0/559] | Train Loss: 0.0320 Grad: 162004.3594 LR: 5.3035e-07 | Elapse: 0.26s Epoch 14 [500/559] | Train Loss: 0.0297 Grad: 50295.4297 LR: 1.6390e-07 | Elapse: 126.44s Epoch 14 [558/559] | Train Loss: 0.0279 Grad: 24119.4727 LR: 1.3469e-07 | Elapse: 140.38s Epoch 14 [0/140] | Valid Loss: 0.0653 | Elapse: 0.24s Epoch 14 [139/140] | Valid Loss: 0.2349 | Elapse: 37.45s Epoch 14 - Train Loss: 0.0279 - Valid Loss: 0.6842 - Elapsed Time: 195.84s Epoch 15 [0/559] | Train Loss: 0.0254 Grad: 138480.9531 LR: 1.3421e-07 | Elapse: 0.26s Epoch 15 [500/559] | Train Loss: 0.0333 Grad: 35250.3398 LR: 1.8075e-09 | Elapse: 126.61s Epoch 15 [558/559] | Train Loss: 0.0310 Grad: 22504.0352 LR: 4.0043e-10 | Elapse: 140.76s Epoch 15 [0/140] | Valid Loss: 0.0645 | Elapse: 0.25s Epoch 15 [139/140] | Valid Loss: 0.2340 | Elapse: 33.76s Epoch 15 - Train Loss: 0.0310 - Valid Loss: 0.6936 - Elapsed Time: 192.85s - Epoch 15: Best model found with loss = 0.6936. Fold 0 | Time: 48.76min | Overall Evaluation Loss: 0.6936 Epoch 1 [0/559] | Train Loss: 0.3883 Grad: 58472.6211 LR: 4.0003e-07 | Elapse: 0.28s Epoch 1 [500/559] | Train Loss: 0.1752 Grad: 533.1627 LR: 6.6576e-06 | Elapse: 125.80s Epoch 1 [558/559] | Train Loss: 0.1658 Grad: 21144.0176 LR: 7.6104e-06 | Elapse: 140.06s Epoch 1 [0/140] | Valid Loss: 0.2178 | Elapse: 0.25s Epoch 1 [139/140] | Valid Loss: 0.1547 | Elapse: 33.57s Epoch 1 - Train Loss: 0.1658 - Valid Loss: 0.5293 - Elapsed Time: 192.75s - Epoch 1: Best model found with loss = 0.5293. Epoch 2 [0/559] | Train Loss: 0.3810 Grad: 56734.7383 LR: 7.6259e-06 | Elapse: 0.25s Epoch 2 [500/559] | Train Loss: 0.1724 Grad: 687.1262 LR: 9.9786e-06 | Elapse: 125.61s Epoch 2 [558/559] | Train Loss: 0.1627 Grad: 28590.4316 LR: 9.9660e-06 | Elapse: 139.84s Epoch 2 [0/140] | Valid Loss: 0.2403 | Elapse: 0.24s Epoch 2 [139/140] | Valid Loss: 0.1600 | Elapse: 33.61s Epoch 2 - Train Loss: 0.1627 - Valid Loss: 0.5705 - Elapsed Time: 192.10s - Epoch 2: Best model found with loss = 0.5705. Epoch 3 [0/559] | Train Loss: 0.3289 Grad: 88720.4609 LR: 9.9657e-06 | Elapse: 0.25s Epoch 3 [500/559] | Train Loss: 0.1545 Grad: 1277.2761 LR: 9.7377e-06 | Elapse: 124.81s Epoch 3 [558/559] | Train Loss: 0.1453 Grad: 42685.6367 LR: 9.6978e-06 | Elapse: 138.68s Epoch 3 [0/140] | Valid Loss: 0.2729 | Elapse: 0.24s Epoch 3 [139/140] | Valid Loss: 0.1710 | Elapse: 33.70s Epoch 3 - Train Loss: 0.1453 - Valid Loss: 0.5893 - Elapsed Time: 191.07s - Epoch 3: Best model found with loss = 0.5893. Epoch 4 [0/559] | Train Loss: 0.2254 Grad: 135318.4531 LR: 9.6970e-06 | Elapse: 0.26s Epoch 4 [500/559] | Train Loss: 0.1200 Grad: 2176.0022 LR: 9.2415e-06 | Elapse: 125.77s Epoch 4 [558/559] | Train Loss: 0.1128 Grad: 46790.1719 LR: 9.1763e-06 | Elapse: 139.61s Epoch 4 [0/140] | Valid Loss: 0.3063 | Elapse: 0.25s Epoch 4 [139/140] | Valid Loss: 0.1791 | Elapse: 33.68s Epoch 4 - Train Loss: 0.1128 - Valid Loss: 0.6110 - Elapsed Time: 191.98s - Epoch 4: Best model found with loss = 0.6110. Epoch 5 [0/559] | Train Loss: 0.1175 Grad: 150119.5781 LR: 9.1752e-06 | Elapse: 0.25s Epoch 5 [500/559] | Train Loss: 0.0875 Grad: 2357.2097 LR: 8.5166e-06 | Elapse: 125.26s Epoch 5 [558/559] | Train Loss: 0.0826 Grad: 45106.0508 LR: 8.4298e-06 | Elapse: 138.68s Epoch 5 [0/140] | Valid Loss: 0.3306 | Elapse: 0.24s Epoch 5 [139/140] | Valid Loss: 0.1818 | Elapse: 33.45s Epoch 5 - Train Loss: 0.0826 - Valid Loss: 0.6356 - Elapsed Time: 190.85s - Epoch 5: Best model found with loss = 0.6356. Epoch 6 [0/559] | Train Loss: 0.0824 Grad: 174138.9375 LR: 8.4282e-06 | Elapse: 0.25s Epoch 6 [500/559] | Train Loss: 0.0645 Grad: 2415.5796 LR: 7.6021e-06 | Elapse: 125.34s Epoch 6 [558/559] | Train Loss: 0.0609 Grad: 34928.2852 LR: 7.4983e-06 | Elapse: 139.51s Epoch 6 [0/140] | Valid Loss: 0.3380 | Elapse: 0.25s Epoch 6 [139/140] | Valid Loss: 0.1884 | Elapse: 33.46s Epoch 6 - Train Loss: 0.0609 - Valid Loss: 0.6562 - Elapsed Time: 191.72s - Epoch 6: Best model found with loss = 0.6562. Epoch 7 [0/559] | Train Loss: 0.0366 Grad: 120182.0000 LR: 7.4965e-06 | Elapse: 0.25s Epoch 7 [500/559] | Train Loss: 0.0466 Grad: 2352.1565 LR: 6.5474e-06 | Elapse: 125.26s Epoch 7 [558/559] | Train Loss: 0.0443 Grad: 34154.0625 LR: 6.4322e-06 | Elapse: 138.76s Epoch 7 [0/140] | Valid Loss: 0.3391 | Elapse: 0.25s Epoch 7 [139/140] | Valid Loss: 0.1881 | Elapse: 33.47s Epoch 7 - Train Loss: 0.0443 - Valid Loss: 0.6690 - Elapsed Time: 190.93s - Epoch 7: Best model found with loss = 0.6690. Epoch 8 [0/559] | Train Loss: 0.0122 Grad: 36752.5703 LR: 6.4302e-06 | Elapse: 0.25s Epoch 8 [500/559] | Train Loss: 0.0347 Grad: 2524.4080 LR: 5.4093e-06 | Elapse: 125.76s Epoch 8 [558/559] | Train Loss: 0.0332 Grad: 32212.2832 LR: 5.2888e-06 | Elapse: 139.62s Epoch 8 [0/140] | Valid Loss: 0.3415 | Elapse: 0.24s Epoch 8 [139/140] | Valid Loss: 0.1895 | Elapse: 33.64s Epoch 8 - Train Loss: 0.0332 - Valid Loss: 0.6803 - Elapsed Time: 191.95s - Epoch 8: Best model found with loss = 0.6803. Epoch 9 [0/559] | Train Loss: 0.0104 Grad: 32936.9766 LR: 5.2868e-06 | Elapse: 0.27s Epoch 9 [500/559] | Train Loss: 0.0279 Grad: 2847.9451 LR: 4.2491e-06 | Elapse: 125.51s Epoch 9 [558/559] | Train Loss: 0.0269 Grad: 28955.7949 LR: 4.1299e-06 | Elapse: 139.89s Epoch 9 [0/140] | Valid Loss: 0.3439 | Elapse: 0.25s Epoch 9 [139/140] | Valid Loss: 0.1957 | Elapse: 34.14s Epoch 9 - Train Loss: 0.0269 - Valid Loss: 0.6768 - Elapsed Time: 192.91s Epoch 10 [0/559] | Train Loss: 0.0104 Grad: 42780.7773 LR: 4.1279e-06 | Elapse: 0.26s Epoch 10 [500/559] | Train Loss: 0.0238 Grad: 2901.0112 LR: 3.1294e-06 | Elapse: 125.79s Epoch 10 [558/559] | Train Loss: 0.0230 Grad: 36174.7969 LR: 3.0180e-06 | Elapse: 139.76s Epoch 10 [0/140] | Valid Loss: 0.3682 | Elapse: 0.25s Epoch 10 [139/140] | Valid Loss: 0.1996 | Elapse: 33.61s Epoch 10 - Train Loss: 0.0230 - Valid Loss: 0.6766 - Elapsed Time: 192.09s Epoch 11 [0/559] | Train Loss: 0.0138 Grad: 106932.0312 LR: 3.0161e-06 | Elapse: 0.25s Epoch 11 [500/559] | Train Loss: 0.0221 Grad: 2955.0090 LR: 2.1105e-06 | Elapse: 125.59s Epoch 11 [558/559] | Train Loss: 0.0213 Grad: 35371.1094 LR: 2.0129e-06 | Elapse: 139.91s Epoch 11 [0/140] | Valid Loss: 0.3881 | Elapse: 0.25s Epoch 11 [139/140] | Valid Loss: 0.2018 | Elapse: 33.85s Epoch 11 - Train Loss: 0.0213 - Valid Loss: 0.6778 - Elapsed Time: 192.62s Epoch 12 [0/559] | Train Loss: 0.0082 Grad: 31339.0059 LR: 2.0112e-06 | Elapse: 0.26s Epoch 12 [500/559] | Train Loss: 0.0219 Grad: 3061.0505 LR: 1.2475e-06 | Elapse: 126.89s Epoch 12 [558/559] | Train Loss: 0.0214 Grad: 49892.2852 LR: 1.1688e-06 | Elapse: 141.08s Epoch 12 [0/140] | Valid Loss: 0.4046 | Elapse: 0.26s Epoch 12 [139/140] | Valid Loss: 0.2060 | Elapse: 33.79s Epoch 12 - Train Loss: 0.0214 - Valid Loss: 0.6766 - Elapsed Time: 193.88s Epoch 13 [0/559] | Train Loss: 0.0138 Grad: 68523.7422 LR: 1.1675e-06 | Elapse: 0.26s Epoch 13 [500/559] | Train Loss: 0.0243 Grad: 3324.7493 LR: 5.8671e-07 | Elapse: 126.07s Epoch 13 [558/559] | Train Loss: 0.0233 Grad: 31661.7578 LR: 5.3128e-07 | Elapse: 140.40s Epoch 13 [0/140] | Valid Loss: 0.4038 | Elapse: 0.26s Epoch 13 [139/140] | Valid Loss: 0.2110 | Elapse: 33.80s Epoch 13 - Train Loss: 0.0233 - Valid Loss: 0.6846 - Elapsed Time: 192.83s - Epoch 13: Best model found with loss = 0.6846. Epoch 14 [0/559] | Train Loss: 0.0085 Grad: 27442.8672 LR: 5.3035e-07 | Elapse: 0.26s Epoch 14 [500/559] | Train Loss: 0.0312 Grad: 2730.3948 LR: 1.6390e-07 | Elapse: 125.83s Epoch 14 [558/559] | Train Loss: 0.0304 Grad: 39547.0664 LR: 1.3469e-07 | Elapse: 139.96s Epoch 14 [0/140] | Valid Loss: 0.4071 | Elapse: 0.24s Epoch 14 [139/140] | Valid Loss: 0.1993 | Elapse: 33.71s Epoch 14 - Train Loss: 0.0304 - Valid Loss: 0.6752 - Elapsed Time: 192.36s Epoch 15 [0/559] | Train Loss: 0.0339 Grad: 233240.2188 LR: 1.3421e-07 | Elapse: 0.25s Epoch 15 [500/559] | Train Loss: 0.0446 Grad: 3607.2859 LR: 1.8075e-09 | Elapse: 125.32s Epoch 15 [558/559] | Train Loss: 0.0415 Grad: 23472.7012 LR: 4.0043e-10 | Elapse: 139.40s Epoch 15 [0/140] | Valid Loss: 0.3857 | Elapse: 0.25s Epoch 15 [139/140] | Valid Loss: 0.2055 | Elapse: 33.72s Epoch 15 - Train Loss: 0.0415 - Valid Loss: 0.7002 - Elapsed Time: 192.17s - Epoch 15: Best model found with loss = 0.7002. Fold 1 | Time: 48.74min | Overall Evaluation Loss: 0.6015 Epoch 1 [0/559] | Train Loss: 0.3812 Grad: 63666.2773 LR: 4.0003e-07 | Elapse: 0.25s Epoch 1 [500/559] | Train Loss: 0.1732 Grad: 581.3810 LR: 6.6576e-06 | Elapse: 125.81s Epoch 1 [558/559] | Train Loss: 0.1633 Grad: 732.4777 LR: 7.6104e-06 | Elapse: 139.98s Epoch 1 [0/140] | Valid Loss: 0.0022 | Elapse: 0.24s Epoch 1 [139/140] | Valid Loss: 0.1633 | Elapse: 33.73s Epoch 1 - Train Loss: 0.1633 - Valid Loss: 0.5166 - Elapsed Time: 191.88s - Epoch 1: Best model found with loss = 0.5166. Epoch 2 [0/559] | Train Loss: 0.3838 Grad: 57708.2617 LR: 7.6259e-06 | Elapse: 0.26s Epoch 2 [500/559] | Train Loss: 0.1705 Grad: 918.3583 LR: 9.9786e-06 | Elapse: 126.00s Epoch 2 [558/559] | Train Loss: 0.1604 Grad: 1212.6509 LR: 9.9660e-06 | Elapse: 140.22s Epoch 2 [0/140] | Valid Loss: 0.0026 | Elapse: 0.25s Epoch 2 [139/140] | Valid Loss: 0.1712 | Elapse: 33.55s Epoch 2 - Train Loss: 0.1604 - Valid Loss: 0.5430 - Elapsed Time: 192.26s - Epoch 2: Best model found with loss = 0.5430. Epoch 3 [0/559] | Train Loss: 0.3401 Grad: 87677.3906 LR: 9.9657e-06 | Elapse: 0.26s Epoch 3 [500/559] | Train Loss: 0.1533 Grad: 1757.4867 LR: 9.7377e-06 | Elapse: 125.95s Epoch 3 [558/559] | Train Loss: 0.1437 Grad: 1956.4470 LR: 9.6978e-06 | Elapse: 140.17s Epoch 3 [0/140] | Valid Loss: 0.0032 | Elapse: 0.25s Epoch 3 [139/140] | Valid Loss: 0.1864 | Elapse: 34.00s Epoch 3 - Train Loss: 0.1437 - Valid Loss: 0.5779 - Elapsed Time: 192.76s - Epoch 3: Best model found with loss = 0.5779. Epoch 4 [0/559] | Train Loss: 0.2400 Grad: 140489.8125 LR: 9.6970e-06 | Elapse: 0.29s Epoch 4 [500/559] | Train Loss: 0.1184 Grad: 2406.1462 LR: 9.2415e-06 | Elapse: 125.33s Epoch 4 [558/559] | Train Loss: 0.1107 Grad: 2661.6245 LR: 9.1763e-06 | Elapse: 139.51s Epoch 4 [0/140] | Valid Loss: 0.0036 | Elapse: 0.24s Epoch 4 [139/140] | Valid Loss: 0.1974 | Elapse: 33.96s Epoch 4 - Train Loss: 0.1107 - Valid Loss: 0.6101 - Elapsed Time: 192.02s - Epoch 4: Best model found with loss = 0.6101. Epoch 5 [0/559] | Train Loss: 0.1249 Grad: 170391.6250 LR: 9.1752e-06 | Elapse: 0.26s Epoch 5 [500/559] | Train Loss: 0.0822 Grad: 2784.2063 LR: 8.5166e-06 | Elapse: 130.16s Epoch 5 [558/559] | Train Loss: 0.0770 Grad: 2933.9009 LR: 8.4298e-06 | Elapse: 144.47s Epoch 5 [0/140] | Valid Loss: 0.0039 | Elapse: 0.25s Epoch 5 [139/140] | Valid Loss: 0.2053 | Elapse: 33.90s Epoch 5 - Train Loss: 0.0770 - Valid Loss: 0.6497 - Elapsed Time: 196.74s - Epoch 5: Best model found with loss = 0.6497. Epoch 6 [0/559] | Train Loss: 0.0599 Grad: 137885.3594 LR: 8.4282e-06 | Elapse: 0.26s Epoch 6 [500/559] | Train Loss: 0.0572 Grad: 2973.3008 LR: 7.6021e-06 | Elapse: 126.88s Epoch 6 [558/559] | Train Loss: 0.0537 Grad: 3382.1863 LR: 7.4983e-06 | Elapse: 141.28s Epoch 6 [0/140] | Valid Loss: 0.0040 | Elapse: 0.24s Epoch 6 [139/140] | Valid Loss: 0.2106 | Elapse: 34.33s Epoch 6 - Train Loss: 0.0537 - Valid Loss: 0.6735 - Elapsed Time: 194.60s - Epoch 6: Best model found with loss = 0.6735. Epoch 7 [0/559] | Train Loss: 0.0362 Grad: 118999.2891 LR: 7.4965e-06 | Elapse: 0.27s Epoch 7 [500/559] | Train Loss: 0.0399 Grad: 2944.9485 LR: 6.5474e-06 | Elapse: 127.59s Epoch 7 [558/559] | Train Loss: 0.0375 Grad: 3746.2769 LR: 6.4322e-06 | Elapse: 141.92s Epoch 7 [0/140] | Valid Loss: 0.0040 | Elapse: 0.25s Epoch 7 [139/140] | Valid Loss: 0.2130 | Elapse: 36.41s Epoch 7 - Train Loss: 0.0375 - Valid Loss: 0.6873 - Elapsed Time: 196.90s - Epoch 7: Best model found with loss = 0.6873. Epoch 8 [0/559] | Train Loss: 0.0127 Grad: 36778.2344 LR: 6.4302e-06 | Elapse: 0.27s Epoch 8 [500/559] | Train Loss: 0.0289 Grad: 3099.9973 LR: 5.4093e-06 | Elapse: 126.40s Epoch 8 [558/559] | Train Loss: 0.0274 Grad: 3744.7815 LR: 5.2888e-06 | Elapse: 140.66s Epoch 8 [0/140] | Valid Loss: 0.0039 | Elapse: 0.25s Epoch 8 [139/140] | Valid Loss: 0.2149 | Elapse: 33.77s Epoch 8 - Train Loss: 0.0274 - Valid Loss: 0.6968 - Elapsed Time: 192.69s - Epoch 8: Best model found with loss = 0.6968. Epoch 9 [0/559] | Train Loss: 0.0107 Grad: 30835.6641 LR: 5.2868e-06 | Elapse: 0.27s Epoch 9 [500/559] | Train Loss: 0.0227 Grad: 3168.0400 LR: 4.2491e-06 | Elapse: 125.79s Epoch 9 [558/559] | Train Loss: 0.0216 Grad: 3586.9297 LR: 4.1299e-06 | Elapse: 139.93s Epoch 9 [0/140] | Valid Loss: 0.0037 | Elapse: 0.25s Epoch 9 [139/140] | Valid Loss: 0.2169 | Elapse: 33.72s Epoch 9 - Train Loss: 0.0216 - Valid Loss: 0.7033 - Elapsed Time: 192.30s - Epoch 9: Best model found with loss = 0.7033. Epoch 10 [0/559] | Train Loss: 0.0108 Grad: 38345.1289 LR: 4.1279e-06 | Elapse: 0.27s Epoch 10 [500/559] | Train Loss: 0.0190 Grad: 3129.7153 LR: 3.1294e-06 | Elapse: 126.03s Epoch 10 [558/559] | Train Loss: 0.0182 Grad: 3530.1790 LR: 3.0180e-06 | Elapse: 140.02s Epoch 10 [0/140] | Valid Loss: 0.0035 | Elapse: 0.25s Epoch 10 [139/140] | Valid Loss: 0.2181 | Elapse: 33.88s Epoch 10 - Train Loss: 0.0182 - Valid Loss: 0.7128 - Elapsed Time: 193.01s - Epoch 10: Best model found with loss = 0.7128. Epoch 11 [0/559] | Train Loss: 0.0090 Grad: 26533.8750 LR: 3.0161e-06 | Elapse: 0.26s Epoch 11 [500/559] | Train Loss: 0.0169 Grad: 3294.2278 LR: 2.1105e-06 | Elapse: 125.86s Epoch 11 [558/559] | Train Loss: 0.0162 Grad: 3385.0498 LR: 2.0129e-06 | Elapse: 140.18s Epoch 11 [0/140] | Valid Loss: 0.0034 | Elapse: 0.25s Epoch 11 [139/140] | Valid Loss: 0.2210 | Elapse: 34.25s Epoch 11 - Train Loss: 0.0162 - Valid Loss: 0.7142 - Elapsed Time: 193.09s - Epoch 11: Best model found with loss = 0.7142. Epoch 12 [0/559] | Train Loss: 0.0087 Grad: 26727.4258 LR: 2.0112e-06 | Elapse: 0.27s Epoch 12 [500/559] | Train Loss: 0.0160 Grad: 3698.4797 LR: 1.2475e-06 | Elapse: 127.23s Epoch 12 [558/559] | Train Loss: 0.0154 Grad: 3166.7349 LR: 1.1688e-06 | Elapse: 141.57s Epoch 12 [0/140] | Valid Loss: 0.0033 | Elapse: 0.25s Epoch 12 [139/140] | Valid Loss: 0.2227 | Elapse: 33.83s Epoch 12 - Train Loss: 0.0154 - Valid Loss: 0.7182 - Elapsed Time: 193.83s - Epoch 12: Best model found with loss = 0.7182. Epoch 13 [0/559] | Train Loss: 0.0087 Grad: 31494.2891 LR: 1.1675e-06 | Elapse: 0.26s Epoch 13 [500/559] | Train Loss: 0.0165 Grad: 3536.4666 LR: 5.8671e-07 | Elapse: 126.05s Epoch 13 [558/559] | Train Loss: 0.0160 Grad: 3760.2610 LR: 5.3128e-07 | Elapse: 140.15s Epoch 13 [0/140] | Valid Loss: 0.0036 | Elapse: 0.24s Epoch 13 [139/140] | Valid Loss: 0.2264 | Elapse: 34.03s Epoch 13 - Train Loss: 0.0160 - Valid Loss: 0.7131 - Elapsed Time: 192.70s Epoch 14 [0/559] | Train Loss: 0.0283 Grad: 178969.0156 LR: 5.3035e-07 | Elapse: 0.26s Epoch 14 [500/559] | Train Loss: 0.0234 Grad: 3923.5754 LR: 1.6390e-07 | Elapse: 126.74s Epoch 14 [558/559] | Train Loss: 0.0221 Grad: 4402.1353 LR: 1.3469e-07 | Elapse: 140.99s Epoch 14 [0/140] | Valid Loss: 0.0041 | Elapse: 0.25s Epoch 14 [139/140] | Valid Loss: 0.2295 | Elapse: 34.08s Epoch 14 - Train Loss: 0.0221 - Valid Loss: 0.7279 - Elapsed Time: 194.47s - Epoch 14: Best model found with loss = 0.7279. Epoch 15 [0/559] | Train Loss: 0.0141 Grad: 78785.1016 LR: 1.3421e-07 | Elapse: 0.27s Epoch 15 [500/559] | Train Loss: 0.0303 Grad: 4075.6143 LR: 1.8075e-09 | Elapse: 126.08s Epoch 15 [558/559] | Train Loss: 0.0283 Grad: 3996.2625 LR: 4.0043e-10 | Elapse: 140.29s Epoch 15 [0/140] | Valid Loss: 0.0043 | Elapse: 0.24s Epoch 15 [139/140] | Valid Loss: 0.2293 | Elapse: 33.68s Epoch 15 - Train Loss: 0.0283 - Valid Loss: 0.7332 - Elapsed Time: 192.40s - Epoch 15: Best model found with loss = 0.7332. Fold 2 | Time: 49.36min | Overall Evaluation Loss: 0.5567 Epoch 1 [0/559] | Train Loss: 0.4033 Grad: 60917.2539 LR: 4.0003e-07 | Elapse: 0.26s Epoch 1 [500/559] | Train Loss: 0.1715 Grad: 14635.6260 LR: 6.6576e-06 | Elapse: 126.84s Epoch 1 [558/559] | Train Loss: 0.1611 Grad: 23629.8457 LR: 7.6104e-06 | Elapse: 141.22s Epoch 1 [0/140] | Valid Loss: 0.0018 | Elapse: 0.24s Epoch 1 [139/140] | Valid Loss: 0.1737 | Elapse: 34.04s Epoch 1 - Train Loss: 0.1611 - Valid Loss: 0.5356 - Elapsed Time: 195.75s - Epoch 1: Best model found with loss = 0.5356. Epoch 2 [0/559] | Train Loss: 0.4044 Grad: 60479.0547 LR: 7.6259e-06 | Elapse: 0.27s Epoch 2 [500/559] | Train Loss: 0.1690 Grad: 17688.2090 LR: 9.9786e-06 | Elapse: 126.92s Epoch 2 [558/559] | Train Loss: 0.1583 Grad: 29924.3945 LR: 9.9660e-06 | Elapse: 141.46s Epoch 2 [0/140] | Valid Loss: 0.0022 | Elapse: 0.25s Epoch 2 [139/140] | Valid Loss: 0.1835 | Elapse: 34.17s Epoch 2 - Train Loss: 0.1583 - Valid Loss: 0.5568 - Elapsed Time: 196.09s - Epoch 2: Best model found with loss = 0.5568. Epoch 3 [0/559] | Train Loss: 0.3917 Grad: 83466.0781 LR: 9.9657e-06 | Elapse: 0.26s Epoch 3 [500/559] | Train Loss: 0.1508 Grad: 23610.6426 LR: 9.7377e-06 | Elapse: 125.85s Epoch 3 [558/559] | Train Loss: 0.1409 Grad: 36061.9453 LR: 9.6978e-06 | Elapse: 140.01s Epoch 3 [0/140] | Valid Loss: 0.0026 | Elapse: 0.24s Epoch 3 [139/140] | Valid Loss: 0.1992 | Elapse: 33.93s Epoch 3 - Train Loss: 0.1409 - Valid Loss: 0.5892 - Elapsed Time: 194.61s - Epoch 3: Best model found with loss = 0.5892. Epoch 4 [0/559] | Train Loss: 0.3327 Grad: 118424.0703 LR: 9.6970e-06 | Elapse: 0.26s Epoch 4 [500/559] | Train Loss: 0.1170 Grad: 25682.8535 LR: 9.2415e-06 | Elapse: 125.23s Epoch 4 [558/559] | Train Loss: 0.1092 Grad: 36628.4961 LR: 9.1763e-06 | Elapse: 139.25s Epoch 4 [0/140] | Valid Loss: 0.0030 | Elapse: 0.24s Epoch 4 [139/140] | Valid Loss: 0.2104 | Elapse: 33.40s Epoch 4 - Train Loss: 0.1092 - Valid Loss: 0.6228 - Elapsed Time: 193.43s - Epoch 4: Best model found with loss = 0.6228. Epoch 5 [0/559] | Train Loss: 0.2381 Grad: 145616.0312 LR: 9.1752e-06 | Elapse: 0.28s Epoch 5 [500/559] | Train Loss: 0.0863 Grad: 30112.3164 LR: 8.5166e-06 | Elapse: 126.16s Epoch 5 [558/559] | Train Loss: 0.0806 Grad: 33182.0938 LR: 8.4298e-06 | Elapse: 140.46s Epoch 5 [0/140] | Valid Loss: 0.0033 | Elapse: 0.25s Epoch 5 [139/140] | Valid Loss: 0.2205 | Elapse: 33.92s Epoch 5 - Train Loss: 0.0806 - Valid Loss: 0.6424 - Elapsed Time: 195.22s - Epoch 5: Best model found with loss = 0.6424. Epoch 6 [0/559] | Train Loss: 0.1447 Grad: 156314.4531 LR: 8.4282e-06 | Elapse: 0.26s Epoch 6 [500/559] | Train Loss: 0.0629 Grad: 28787.6660 LR: 7.6021e-06 | Elapse: 126.09s Epoch 6 [558/559] | Train Loss: 0.0588 Grad: 28876.8848 LR: 7.4983e-06 | Elapse: 140.45s Epoch 6 [0/140] | Valid Loss: 0.0032 | Elapse: 0.25s Epoch 6 [139/140] | Valid Loss: 0.2265 | Elapse: 33.92s Epoch 6 - Train Loss: 0.0588 - Valid Loss: 0.6557 - Elapsed Time: 194.86s - Epoch 6: Best model found with loss = 0.6557. Epoch 7 [0/559] | Train Loss: 0.0764 Grad: 137550.7500 LR: 7.4965e-06 | Elapse: 0.26s Epoch 7 [500/559] | Train Loss: 0.0459 Grad: 30014.1816 LR: 6.5474e-06 | Elapse: 127.04s Epoch 7 [558/559] | Train Loss: 0.0431 Grad: 27206.9277 LR: 6.4322e-06 | Elapse: 141.47s Epoch 7 [0/140] | Valid Loss: 0.0032 | Elapse: 0.25s Epoch 7 [139/140] | Valid Loss: 0.2320 | Elapse: 33.86s Epoch 7 - Train Loss: 0.0431 - Valid Loss: 0.6641 - Elapsed Time: 195.97s - Epoch 7: Best model found with loss = 0.6641. Epoch 8 [0/559] | Train Loss: 0.0332 Grad: 80237.3750 LR: 6.4302e-06 | Elapse: 0.26s Epoch 8 [500/559] | Train Loss: 0.0343 Grad: 29795.3418 LR: 5.4093e-06 | Elapse: 125.89s Epoch 8 [558/559] | Train Loss: 0.0324 Grad: 25865.8398 LR: 5.2888e-06 | Elapse: 140.15s Epoch 8 [0/140] | Valid Loss: 0.0031 | Elapse: 0.24s Epoch 8 [139/140] | Valid Loss: 0.2335 | Elapse: 33.68s Epoch 8 - Train Loss: 0.0324 - Valid Loss: 0.6735 - Elapsed Time: 194.54s - Epoch 8: Best model found with loss = 0.6735. Epoch 9 [0/559] | Train Loss: 0.0222 Grad: 66607.2344 LR: 5.2868e-06 | Elapse: 0.30s Epoch 9 [500/559] | Train Loss: 0.0273 Grad: 40628.5742 LR: 4.2491e-06 | Elapse: 125.85s Epoch 9 [558/559] | Train Loss: 0.0259 Grad: 23364.4590 LR: 4.1299e-06 | Elapse: 140.20s Epoch 9 [0/140] | Valid Loss: 0.0032 | Elapse: 0.25s Epoch 9 [139/140] | Valid Loss: 0.2337 | Elapse: 34.37s Epoch 9 - Train Loss: 0.0259 - Valid Loss: 0.6869 - Elapsed Time: 195.15s - Epoch 9: Best model found with loss = 0.6869. Epoch 10 [0/559] | Train Loss: 0.0155 Grad: 45010.6641 LR: 4.1279e-06 | Elapse: 0.25s Epoch 10 [500/559] | Train Loss: 0.0230 Grad: 26767.3535 LR: 3.1294e-06 | Elapse: 125.55s Epoch 10 [558/559] | Train Loss: 0.0220 Grad: 23471.8066 LR: 3.0180e-06 | Elapse: 139.66s Epoch 10 [0/140] | Valid Loss: 0.0033 | Elapse: 0.24s Epoch 10 [139/140] | Valid Loss: 0.2343 | Elapse: 33.98s Epoch 10 - Train Loss: 0.0220 - Valid Loss: 0.6924 - Elapsed Time: 194.54s - Epoch 10: Best model found with loss = 0.6924. Epoch 11 [0/559] | Train Loss: 0.0167 Grad: 65534.1953 LR: 3.0161e-06 | Elapse: 0.26s Epoch 11 [500/559] | Train Loss: 0.0210 Grad: 44248.9766 LR: 2.1105e-06 | Elapse: 126.21s Epoch 11 [558/559] | Train Loss: 0.0201 Grad: 20471.0703 LR: 2.0129e-06 | Elapse: 140.31s Epoch 11 [0/140] | Valid Loss: 0.0031 | Elapse: 0.25s Epoch 11 [139/140] | Valid Loss: 0.2365 | Elapse: 33.85s Epoch 11 - Train Loss: 0.0201 - Valid Loss: 0.6935 - Elapsed Time: 194.91s - Epoch 11: Best model found with loss = 0.6935. Epoch 12 [0/559] | Train Loss: 0.0125 Grad: 44372.0859 LR: 2.0112e-06 | Elapse: 0.25s Epoch 12 [500/559] | Train Loss: 0.0211 Grad: 41723.9102 LR: 1.2475e-06 | Elapse: 126.32s Epoch 12 [558/559] | Train Loss: 0.0202 Grad: 18374.7188 LR: 1.1688e-06 | Elapse: 140.69s Epoch 12 [0/140] | Valid Loss: 0.0031 | Elapse: 0.27s Epoch 12 [139/140] | Valid Loss: 0.2389 | Elapse: 33.82s Epoch 12 - Train Loss: 0.0202 - Valid Loss: 0.6999 - Elapsed Time: 195.00s - Epoch 12: Best model found with loss = 0.6999. Epoch 13 [0/559] | Train Loss: 0.0164 Grad: 101302.9453 LR: 1.1675e-06 | Elapse: 0.26s Epoch 13 [500/559] | Train Loss: 0.0232 Grad: 44267.0078 LR: 5.8671e-07 | Elapse: 126.35s Epoch 13 [558/559] | Train Loss: 0.0225 Grad: 28713.8242 LR: 5.3128e-07 | Elapse: 140.21s Epoch 13 [0/140] | Valid Loss: 0.0030 | Elapse: 0.25s Epoch 13 [139/140] | Valid Loss: 0.2456 | Elapse: 34.02s Epoch 13 - Train Loss: 0.0225 - Valid Loss: 0.6834 - Elapsed Time: 195.04s Epoch 14 [0/559] | Train Loss: 0.0553 Grad: 297918.5312 LR: 5.3035e-07 | Elapse: 0.26s Epoch 14 [500/559] | Train Loss: 0.0332 Grad: 29470.8398 LR: 1.6390e-07 | Elapse: 126.10s Epoch 14 [558/559] | Train Loss: 0.0309 Grad: 18943.3828 LR: 1.3469e-07 | Elapse: 140.19s Epoch 14 [0/140] | Valid Loss: 0.0040 | Elapse: 0.25s Epoch 14 [139/140] | Valid Loss: 0.2481 | Elapse: 34.27s Epoch 14 - Train Loss: 0.0309 - Valid Loss: 0.7021 - Elapsed Time: 195.17s - Epoch 14: Best model found with loss = 0.7021. Epoch 15 [0/559] | Train Loss: 0.0169 Grad: 69102.5625 LR: 1.3421e-07 | Elapse: 0.26s Epoch 15 [500/559] | Train Loss: 0.0318 Grad: 25327.1172 LR: 1.8075e-09 | Elapse: 126.69s Epoch 15 [558/559] | Train Loss: 0.0296 Grad: 16122.5898 LR: 4.0043e-10 | Elapse: 140.98s Epoch 15 [0/140] | Valid Loss: 0.0040 | Elapse: 0.25s Epoch 15 [139/140] | Valid Loss: 0.2482 | Elapse: 34.14s Epoch 15 - Train Loss: 0.0296 - Valid Loss: 0.7040 - Elapsed Time: 195.68s - Epoch 15: Best model found with loss = 0.7040. Fold 3 | Time: 50.03min | Overall Evaluation Loss: 0.5080 Epoch 1 [0/559] | Train Loss: 0.0049 Grad: 1555.6669 LR: 4.0003e-07 | Elapse: 0.26s Epoch 1 [500/559] | Train Loss: 0.1728 Grad: 240.1252 LR: 6.6576e-06 | Elapse: 127.19s Epoch 1 [558/559] | Train Loss: 0.1627 Grad: 21768.7949 LR: 7.6104e-06 | Elapse: 141.47s Epoch 1 [0/140] | Valid Loss: 0.4064 | Elapse: 0.24s Epoch 1 [139/140] | Valid Loss: 0.1669 | Elapse: 34.05s Epoch 1 - Train Loss: 0.1627 - Valid Loss: 0.5143 - Elapsed Time: 195.51s - Epoch 1: Best model found with loss = 0.5143. Epoch 2 [0/559] | Train Loss: 0.0048 Grad: 1321.4033 LR: 7.6259e-06 | Elapse: 0.27s Epoch 2 [500/559] | Train Loss: 0.1700 Grad: 331.7207 LR: 9.9786e-06 | Elapse: 131.08s Epoch 2 [558/559] | Train Loss: 0.1598 Grad: 26886.9473 LR: 9.9660e-06 | Elapse: 145.39s Epoch 2 [0/140] | Valid Loss: 0.4371 | Elapse: 0.25s Epoch 2 [139/140] | Valid Loss: 0.1746 | Elapse: 34.33s Epoch 2 - Train Loss: 0.1598 - Valid Loss: 0.5597 - Elapsed Time: 199.80s - Epoch 2: Best model found with loss = 0.5597. Epoch 3 [0/559] | Train Loss: 0.0053 Grad: 1975.0654 LR: 9.9657e-06 | Elapse: 0.26s Epoch 3 [500/559] | Train Loss: 0.1505 Grad: 535.0563 LR: 9.7377e-06 | Elapse: 126.83s Epoch 3 [558/559] | Train Loss: 0.1411 Grad: 37974.1758 LR: 9.6978e-06 | Elapse: 140.74s Epoch 3 [0/140] | Valid Loss: 0.4865 | Elapse: 0.24s Epoch 3 [139/140] | Valid Loss: 0.1881 | Elapse: 34.37s Epoch 3 - Train Loss: 0.1411 - Valid Loss: 0.6046 - Elapsed Time: 194.98s - Epoch 3: Best model found with loss = 0.6046. Epoch 4 [0/559] | Train Loss: 0.0064 Grad: 3478.8313 LR: 9.6970e-06 | Elapse: 0.27s Epoch 4 [500/559] | Train Loss: 0.1155 Grad: 707.9515 LR: 9.2415e-06 | Elapse: 126.66s Epoch 4 [558/559] | Train Loss: 0.1082 Grad: 44904.4180 LR: 9.1763e-06 | Elapse: 140.97s Epoch 4 [0/140] | Valid Loss: 0.5202 | Elapse: 0.25s Epoch 4 [139/140] | Valid Loss: 0.1988 | Elapse: 34.23s Epoch 4 - Train Loss: 0.1082 - Valid Loss: 0.6361 - Elapsed Time: 195.29s - Epoch 4: Best model found with loss = 0.6361. Epoch 5 [0/559] | Train Loss: 0.0067 Grad: 4604.8989 LR: 9.1752e-06 | Elapse: 0.26s Epoch 5 [500/559] | Train Loss: 0.0831 Grad: 757.2225 LR: 8.5166e-06 | Elapse: 126.59s Epoch 5 [558/559] | Train Loss: 0.0779 Grad: 48539.0977 LR: 8.4298e-06 | Elapse: 140.80s Epoch 5 [0/140] | Valid Loss: 0.5466 | Elapse: 0.24s Epoch 5 [139/140] | Valid Loss: 0.2059 | Elapse: 33.96s Epoch 5 - Train Loss: 0.0779 - Valid Loss: 0.6536 - Elapsed Time: 194.90s - Epoch 5: Best model found with loss = 0.6536. Epoch 6 [0/559] | Train Loss: 0.0065 Grad: 5073.3496 LR: 8.4282e-06 | Elapse: 0.25s Epoch 6 [500/559] | Train Loss: 0.0585 Grad: 859.3474 LR: 7.6021e-06 | Elapse: 126.51s Epoch 6 [558/559] | Train Loss: 0.0550 Grad: 45421.2344 LR: 7.4983e-06 | Elapse: 140.76s Epoch 6 [0/140] | Valid Loss: 0.5603 | Elapse: 0.24s Epoch 6 [139/140] | Valid Loss: 0.2109 | Elapse: 33.62s Epoch 6 - Train Loss: 0.0550 - Valid Loss: 0.6647 - Elapsed Time: 194.39s - Epoch 6: Best model found with loss = 0.6647. Epoch 7 [0/559] | Train Loss: 0.0057 Grad: 4945.8242 LR: 7.4965e-06 | Elapse: 0.26s Epoch 7 [500/559] | Train Loss: 0.0414 Grad: 901.8848 LR: 6.5474e-06 | Elapse: 125.32s Epoch 7 [558/559] | Train Loss: 0.0391 Grad: 42609.3008 LR: 6.4322e-06 | Elapse: 139.33s Epoch 7 [0/140] | Valid Loss: 0.5715 | Elapse: 0.26s Epoch 7 [139/140] | Valid Loss: 0.2130 | Elapse: 33.73s Epoch 7 - Train Loss: 0.0391 - Valid Loss: 0.6863 - Elapsed Time: 193.06s - Epoch 7: Best model found with loss = 0.6863. Epoch 8 [0/559] | Train Loss: 0.0053 Grad: 4962.4922 LR: 6.4302e-06 | Elapse: 0.26s Epoch 8 [500/559] | Train Loss: 0.0315 Grad: 922.6987 LR: 5.4093e-06 | Elapse: 130.24s Epoch 8 [558/559] | Train Loss: 0.0299 Grad: 38646.7812 LR: 5.2888e-06 | Elapse: 143.91s Epoch 8 [0/140] | Valid Loss: 0.5756 | Elapse: 0.24s Epoch 8 [139/140] | Valid Loss: 0.2145 | Elapse: 33.65s Epoch 8 - Train Loss: 0.0299 - Valid Loss: 0.6948 - Elapsed Time: 197.73s - Epoch 8: Best model found with loss = 0.6948. Epoch 9 [0/559] | Train Loss: 0.0048 Grad: 4831.4072 LR: 5.2868e-06 | Elapse: 0.26s Epoch 9 [500/559] | Train Loss: 0.0254 Grad: 949.3217 LR: 4.2491e-06 | Elapse: 125.47s Epoch 9 [558/559] | Train Loss: 0.0242 Grad: 34935.5742 LR: 4.1299e-06 | Elapse: 139.63s Epoch 9 [0/140] | Valid Loss: 0.5847 | Elapse: 0.24s Epoch 9 [139/140] | Valid Loss: 0.2157 | Elapse: 33.78s Epoch 9 - Train Loss: 0.0242 - Valid Loss: 0.7015 - Elapsed Time: 193.70s - Epoch 9: Best model found with loss = 0.7015. Epoch 10 [0/559] | Train Loss: 0.0044 Grad: 4695.4775 LR: 4.1279e-06 | Elapse: 0.26s Epoch 10 [500/559] | Train Loss: 0.0218 Grad: 985.4415 LR: 3.1294e-06 | Elapse: 125.73s Epoch 10 [558/559] | Train Loss: 0.0209 Grad: 30812.2129 LR: 3.0180e-06 | Elapse: 139.94s Epoch 10 [0/140] | Valid Loss: 0.5970 | Elapse: 0.24s Epoch 10 [139/140] | Valid Loss: 0.2170 | Elapse: 33.73s Epoch 10 - Train Loss: 0.0209 - Valid Loss: 0.7077 - Elapsed Time: 193.81s - Epoch 10: Best model found with loss = 0.7077. Epoch 11 [0/559] | Train Loss: 0.0042 Grad: 4457.5732 LR: 3.0161e-06 | Elapse: 0.26s Epoch 11 [500/559] | Train Loss: 0.0197 Grad: 998.7606 LR: 2.1105e-06 | Elapse: 125.42s Epoch 11 [558/559] | Train Loss: 0.0189 Grad: 28108.0117 LR: 2.0129e-06 | Elapse: 139.67s Epoch 11 [0/140] | Valid Loss: 0.6144 | Elapse: 0.24s Epoch 11 [139/140] | Valid Loss: 0.2198 | Elapse: 33.66s Epoch 11 - Train Loss: 0.0189 - Valid Loss: 0.7122 - Elapsed Time: 193.37s - Epoch 11: Best model found with loss = 0.7122. Epoch 12 [0/559] | Train Loss: 0.0044 Grad: 4598.8530 LR: 2.0112e-06 | Elapse: 0.25s Epoch 12 [500/559] | Train Loss: 0.0187 Grad: 1023.3531 LR: 1.2475e-06 | Elapse: 126.20s Epoch 12 [558/559] | Train Loss: 0.0180 Grad: 30015.6699 LR: 1.1688e-06 | Elapse: 140.24s Epoch 12 [0/140] | Valid Loss: 0.6243 | Elapse: 0.24s Epoch 12 [139/140] | Valid Loss: 0.2217 | Elapse: 33.71s Epoch 12 - Train Loss: 0.0180 - Valid Loss: 0.7160 - Elapsed Time: 194.13s - Epoch 12: Best model found with loss = 0.7160. Epoch 13 [0/559] | Train Loss: 0.0045 Grad: 4811.9868 LR: 1.1675e-06 | Elapse: 0.26s Epoch 13 [500/559] | Train Loss: 0.0198 Grad: 962.6007 LR: 5.8671e-07 | Elapse: 127.01s Epoch 13 [558/559] | Train Loss: 0.0191 Grad: 36723.7148 LR: 5.3128e-07 | Elapse: 141.41s Epoch 13 [0/140] | Valid Loss: 0.6340 | Elapse: 0.25s Epoch 13 [139/140] | Valid Loss: 0.2238 | Elapse: 33.85s Epoch 13 - Train Loss: 0.0191 - Valid Loss: 0.7230 - Elapsed Time: 195.50s - Epoch 13: Best model found with loss = 0.7230. Epoch 14 [0/559] | Train Loss: 0.0044 Grad: 4792.7627 LR: 5.3035e-07 | Elapse: 0.26s Epoch 14 [500/559] | Train Loss: 0.0257 Grad: 1015.6837 LR: 1.6390e-07 | Elapse: 126.09s Epoch 14 [558/559] | Train Loss: 0.0248 Grad: 29382.1445 LR: 1.3469e-07 | Elapse: 140.39s Epoch 14 [0/140] | Valid Loss: 0.6257 | Elapse: 0.24s Epoch 14 [139/140] | Valid Loss: 0.2225 | Elapse: 33.86s Epoch 14 - Train Loss: 0.0248 - Valid Loss: 0.7225 - Elapsed Time: 194.32s Epoch 15 [0/559] | Train Loss: 0.0051 Grad: 5505.9766 LR: 1.3421e-07 | Elapse: 0.25s Epoch 15 [500/559] | Train Loss: 0.0354 Grad: 1217.5887 LR: 1.8075e-09 | Elapse: 125.52s Epoch 15 [558/559] | Train Loss: 0.0330 Grad: 28119.0625 LR: 4.0043e-10 | Elapse: 139.46s Epoch 15 [0/140] | Valid Loss: 0.6553 | Elapse: 0.24s Epoch 15 [139/140] | Valid Loss: 0.2268 | Elapse: 33.73s Epoch 15 - Train Loss: 0.0330 - Valid Loss: 0.7335 - Elapsed Time: 193.33s - Epoch 15: Best model found with loss = 0.7335. Fold 4 | Time: 50.19min | Overall Evaluation Loss: 0.4696