diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,77890 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.527111111111111, + "global_step": 64840, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9997037037037034e-05, + "loss": 4.7865, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 4.999407407407407e-05, + "loss": 4.1868, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.999111111111111e-05, + "loss": 3.8968, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 4.998814814814815e-05, + "loss": 3.7514, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.998518518518518e-05, + "loss": 3.6851, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 4.998222222222222e-05, + "loss": 3.5718, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.997925925925926e-05, + "loss": 3.5858, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 4.99762962962963e-05, + "loss": 3.484, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.997333333333333e-05, + "loss": 3.5216, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 4.997037037037037e-05, + "loss": 3.4322, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.996740740740741e-05, + "loss": 3.3487, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 4.996444444444445e-05, + "loss": 3.3356, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 4.996148148148148e-05, + "loss": 3.2931, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 4.995851851851852e-05, + "loss": 3.4107, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.995555555555556e-05, + "loss": 3.3909, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 4.9952592592592596e-05, + "loss": 3.2811, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 4.994962962962963e-05, + "loss": 3.288, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 4.994666666666667e-05, + "loss": 3.2796, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943703703703706e-05, + "loss": 3.2775, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940740740740745e-05, + "loss": 3.2935, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 4.993777777777778e-05, + "loss": 3.2768, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934814814814816e-05, + "loss": 3.3402, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9931851851851855e-05, + "loss": 3.178, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928888888888893e-05, + "loss": 3.13, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925925925925926e-05, + "loss": 3.1009, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 4.9922962962962964e-05, + "loss": 3.2681, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 4.992e-05, + "loss": 3.0927, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 4.991703703703704e-05, + "loss": 3.1378, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914074074074074e-05, + "loss": 3.0502, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 4.991111111111111e-05, + "loss": 3.1192, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 4.990814814814815e-05, + "loss": 3.1041, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 4.990518518518519e-05, + "loss": 3.0226, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 4.990222222222222e-05, + "loss": 3.1867, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 4.989925925925926e-05, + "loss": 3.0374, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 4.9896296296296293e-05, + "loss": 3.0775, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 4.989333333333334e-05, + "loss": 3.0081, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 4.989037037037037e-05, + "loss": 3.0772, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 4.988740740740741e-05, + "loss": 3.1312, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 4.988444444444444e-05, + "loss": 2.8459, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 4.988148148148149e-05, + "loss": 3.1078, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 4.987851851851852e-05, + "loss": 3.1878, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 4.987555555555556e-05, + "loss": 3.0411, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 4.987259259259259e-05, + "loss": 2.9985, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 4.9869629629629636e-05, + "loss": 3.0428, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 4.986666666666667e-05, + "loss": 3.0759, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 4.986370370370371e-05, + "loss": 2.9847, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 4.986074074074074e-05, + "loss": 3.0635, + "step": 235 + }, + { + "epoch": 0.04, + "learning_rate": 4.985777777777778e-05, + "loss": 3.0223, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9854814814814817e-05, + "loss": 3.0469, + "step": 245 + }, + { + "epoch": 0.04, + "learning_rate": 4.9851851851851855e-05, + "loss": 3.105, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 4.984888888888889e-05, + "loss": 3.0966, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 4.9845925925925926e-05, + "loss": 3.0835, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 4.9842962962962965e-05, + "loss": 3.0889, + "step": 265 + }, + { + "epoch": 0.05, + "learning_rate": 4.9840000000000004e-05, + "loss": 2.9536, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 4.9837037037037036e-05, + "loss": 3.1165, + "step": 275 + }, + { + "epoch": 0.05, + "learning_rate": 4.9834074074074075e-05, + "loss": 2.9886, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 4.9831111111111114e-05, + "loss": 3.1402, + "step": 285 + }, + { + "epoch": 0.05, + "learning_rate": 4.982814814814815e-05, + "loss": 3.0506, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 4.9825185185185184e-05, + "loss": 3.0962, + "step": 295 + }, + { + "epoch": 0.05, + "learning_rate": 4.982222222222222e-05, + "loss": 2.9656, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 4.981925925925926e-05, + "loss": 3.018, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 4.98162962962963e-05, + "loss": 2.9314, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 4.981333333333333e-05, + "loss": 3.0717, + "step": 315 + }, + { + "epoch": 0.06, + "learning_rate": 4.981037037037037e-05, + "loss": 3.0789, + "step": 320 + }, + { + "epoch": 0.06, + "learning_rate": 4.980740740740741e-05, + "loss": 3.0124, + "step": 325 + }, + { + "epoch": 0.06, + "learning_rate": 4.980444444444445e-05, + "loss": 2.937, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 4.980148148148148e-05, + "loss": 2.8069, + "step": 335 + }, + { + "epoch": 0.06, + "learning_rate": 4.979851851851852e-05, + "loss": 2.832, + "step": 340 + }, + { + "epoch": 0.06, + "learning_rate": 4.979555555555556e-05, + "loss": 3.0651, + "step": 345 + }, + { + "epoch": 0.06, + "learning_rate": 4.97925925925926e-05, + "loss": 3.015, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 4.978962962962963e-05, + "loss": 2.921, + "step": 355 + }, + { + "epoch": 0.06, + "learning_rate": 4.978666666666667e-05, + "loss": 2.946, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 4.978370370370371e-05, + "loss": 2.8124, + "step": 365 + }, + { + "epoch": 0.07, + "learning_rate": 4.9780740740740746e-05, + "loss": 2.9505, + "step": 370 + }, + { + "epoch": 0.07, + "learning_rate": 4.977777777777778e-05, + "loss": 2.8616, + "step": 375 + }, + { + "epoch": 0.07, + "learning_rate": 4.977481481481482e-05, + "loss": 2.8996, + "step": 380 + }, + { + "epoch": 0.07, + "learning_rate": 4.9771851851851856e-05, + "loss": 2.8545, + "step": 385 + }, + { + "epoch": 0.07, + "learning_rate": 4.9768888888888895e-05, + "loss": 3.0045, + "step": 390 + }, + { + "epoch": 0.07, + "learning_rate": 4.976592592592593e-05, + "loss": 2.8424, + "step": 395 + }, + { + "epoch": 0.07, + "learning_rate": 4.9762962962962966e-05, + "loss": 2.8732, + "step": 400 + }, + { + "epoch": 0.07, + "learning_rate": 4.976e-05, + "loss": 2.7691, + "step": 405 + }, + { + "epoch": 0.07, + "learning_rate": 4.9757037037037044e-05, + "loss": 2.7451, + "step": 410 + }, + { + "epoch": 0.07, + "learning_rate": 4.9754074074074076e-05, + "loss": 2.8599, + "step": 415 + }, + { + "epoch": 0.07, + "learning_rate": 4.9751111111111114e-05, + "loss": 2.7836, + "step": 420 + }, + { + "epoch": 0.08, + "learning_rate": 4.9748148148148146e-05, + "loss": 2.7931, + "step": 425 + }, + { + "epoch": 0.08, + "learning_rate": 4.974518518518519e-05, + "loss": 2.9787, + "step": 430 + }, + { + "epoch": 0.08, + "learning_rate": 4.9742222222222224e-05, + "loss": 2.9116, + "step": 435 + }, + { + "epoch": 0.08, + "learning_rate": 4.973925925925926e-05, + "loss": 2.761, + "step": 440 + }, + { + "epoch": 0.08, + "learning_rate": 4.9736296296296295e-05, + "loss": 2.8602, + "step": 445 + }, + { + "epoch": 0.08, + "learning_rate": 4.973333333333334e-05, + "loss": 2.9382, + "step": 450 + }, + { + "epoch": 0.08, + "learning_rate": 4.973037037037037e-05, + "loss": 2.8331, + "step": 455 + }, + { + "epoch": 0.08, + "learning_rate": 4.972740740740741e-05, + "loss": 2.8605, + "step": 460 + }, + { + "epoch": 0.08, + "learning_rate": 4.9724444444444443e-05, + "loss": 2.9158, + "step": 465 + }, + { + "epoch": 0.08, + "learning_rate": 4.972148148148148e-05, + "loss": 2.8463, + "step": 470 + }, + { + "epoch": 0.08, + "learning_rate": 4.971851851851852e-05, + "loss": 2.8619, + "step": 475 + }, + { + "epoch": 0.09, + "learning_rate": 4.971555555555556e-05, + "loss": 2.8954, + "step": 480 + }, + { + "epoch": 0.09, + "learning_rate": 4.971259259259259e-05, + "loss": 2.8213, + "step": 485 + }, + { + "epoch": 0.09, + "learning_rate": 4.970962962962963e-05, + "loss": 2.9462, + "step": 490 + }, + { + "epoch": 0.09, + "learning_rate": 4.970666666666667e-05, + "loss": 2.8364, + "step": 495 + }, + { + "epoch": 0.09, + "learning_rate": 4.970370370370371e-05, + "loss": 2.8938, + "step": 500 + }, + { + "epoch": 0.09, + "learning_rate": 4.970074074074074e-05, + "loss": 2.763, + "step": 505 + }, + { + "epoch": 0.09, + "learning_rate": 4.969777777777778e-05, + "loss": 2.9042, + "step": 510 + }, + { + "epoch": 0.09, + "learning_rate": 4.969481481481482e-05, + "loss": 2.7493, + "step": 515 + }, + { + "epoch": 0.09, + "learning_rate": 4.969185185185186e-05, + "loss": 2.9707, + "step": 520 + }, + { + "epoch": 0.09, + "learning_rate": 4.968888888888889e-05, + "loss": 2.8097, + "step": 525 + }, + { + "epoch": 0.09, + "learning_rate": 4.968592592592593e-05, + "loss": 2.9142, + "step": 530 + }, + { + "epoch": 0.1, + "learning_rate": 4.968296296296297e-05, + "loss": 2.8978, + "step": 535 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680000000000005e-05, + "loss": 2.8335, + "step": 540 + }, + { + "epoch": 0.1, + "learning_rate": 4.967703703703704e-05, + "loss": 2.8292, + "step": 545 + }, + { + "epoch": 0.1, + "learning_rate": 4.9674074074074076e-05, + "loss": 2.8864, + "step": 550 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671111111111115e-05, + "loss": 2.7604, + "step": 555 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668148148148154e-05, + "loss": 2.8053, + "step": 560 + }, + { + "epoch": 0.1, + "learning_rate": 4.9665185185185186e-05, + "loss": 2.8569, + "step": 565 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662222222222225e-05, + "loss": 2.8642, + "step": 570 + }, + { + "epoch": 0.1, + "learning_rate": 4.9659259259259264e-05, + "loss": 2.7163, + "step": 575 + }, + { + "epoch": 0.1, + "learning_rate": 4.96562962962963e-05, + "loss": 2.7935, + "step": 580 + }, + { + "epoch": 0.1, + "learning_rate": 4.9653333333333335e-05, + "loss": 2.8108, + "step": 585 + }, + { + "epoch": 0.1, + "learning_rate": 4.965037037037037e-05, + "loss": 2.8263, + "step": 590 + }, + { + "epoch": 0.11, + "learning_rate": 4.964740740740741e-05, + "loss": 2.8264, + "step": 595 + }, + { + "epoch": 0.11, + "learning_rate": 4.964444444444445e-05, + "loss": 2.7342, + "step": 600 + }, + { + "epoch": 0.11, + "learning_rate": 4.964148148148148e-05, + "loss": 2.8448, + "step": 605 + }, + { + "epoch": 0.11, + "learning_rate": 4.963851851851852e-05, + "loss": 2.791, + "step": 610 + }, + { + "epoch": 0.11, + "learning_rate": 4.963555555555556e-05, + "loss": 2.8689, + "step": 615 + }, + { + "epoch": 0.11, + "learning_rate": 4.96325925925926e-05, + "loss": 2.6708, + "step": 620 + }, + { + "epoch": 0.11, + "learning_rate": 4.962962962962963e-05, + "loss": 2.6226, + "step": 625 + }, + { + "epoch": 0.11, + "learning_rate": 4.962666666666667e-05, + "loss": 2.9335, + "step": 630 + }, + { + "epoch": 0.11, + "learning_rate": 4.96237037037037e-05, + "loss": 2.8356, + "step": 635 + }, + { + "epoch": 0.11, + "learning_rate": 4.962074074074075e-05, + "loss": 2.7369, + "step": 640 + }, + { + "epoch": 0.11, + "learning_rate": 4.961777777777778e-05, + "loss": 2.6848, + "step": 645 + }, + { + "epoch": 0.12, + "learning_rate": 4.961481481481482e-05, + "loss": 2.7344, + "step": 650 + }, + { + "epoch": 0.12, + "learning_rate": 4.961185185185185e-05, + "loss": 2.8622, + "step": 655 + }, + { + "epoch": 0.12, + "learning_rate": 4.9608888888888897e-05, + "loss": 2.7942, + "step": 660 + }, + { + "epoch": 0.12, + "learning_rate": 4.960592592592593e-05, + "loss": 2.6875, + "step": 665 + }, + { + "epoch": 0.12, + "learning_rate": 4.960296296296297e-05, + "loss": 2.7193, + "step": 670 + }, + { + "epoch": 0.12, + "learning_rate": 4.96e-05, + "loss": 2.7213, + "step": 675 + }, + { + "epoch": 0.12, + "learning_rate": 4.9597037037037045e-05, + "loss": 2.755, + "step": 680 + }, + { + "epoch": 0.12, + "learning_rate": 4.959407407407408e-05, + "loss": 2.8535, + "step": 685 + }, + { + "epoch": 0.12, + "learning_rate": 4.9591111111111116e-05, + "loss": 2.7465, + "step": 690 + }, + { + "epoch": 0.12, + "learning_rate": 4.958814814814815e-05, + "loss": 2.8645, + "step": 695 + }, + { + "epoch": 0.12, + "learning_rate": 4.958518518518519e-05, + "loss": 2.8154, + "step": 700 + }, + { + "epoch": 0.13, + "learning_rate": 4.9582222222222226e-05, + "loss": 2.8004, + "step": 705 + }, + { + "epoch": 0.13, + "learning_rate": 4.9579259259259264e-05, + "loss": 2.786, + "step": 710 + }, + { + "epoch": 0.13, + "learning_rate": 4.9576296296296296e-05, + "loss": 2.6265, + "step": 715 + }, + { + "epoch": 0.13, + "learning_rate": 4.9573333333333335e-05, + "loss": 2.8455, + "step": 720 + }, + { + "epoch": 0.13, + "learning_rate": 4.9570370370370374e-05, + "loss": 2.6883, + "step": 725 + }, + { + "epoch": 0.13, + "learning_rate": 4.956740740740741e-05, + "loss": 2.8027, + "step": 730 + }, + { + "epoch": 0.13, + "learning_rate": 4.9564444444444445e-05, + "loss": 2.7642, + "step": 735 + }, + { + "epoch": 0.13, + "learning_rate": 4.9561481481481484e-05, + "loss": 2.7519, + "step": 740 + }, + { + "epoch": 0.13, + "learning_rate": 4.955851851851852e-05, + "loss": 2.6603, + "step": 745 + }, + { + "epoch": 0.13, + "learning_rate": 4.955555555555556e-05, + "loss": 2.7165, + "step": 750 + }, + { + "epoch": 0.13, + "learning_rate": 4.9552592592592593e-05, + "loss": 2.8298, + "step": 755 + }, + { + "epoch": 0.14, + "learning_rate": 4.954962962962963e-05, + "loss": 2.8128, + "step": 760 + }, + { + "epoch": 0.14, + "learning_rate": 4.954666666666667e-05, + "loss": 2.7578, + "step": 765 + }, + { + "epoch": 0.14, + "learning_rate": 4.954370370370371e-05, + "loss": 2.7739, + "step": 770 + }, + { + "epoch": 0.14, + "learning_rate": 4.954074074074074e-05, + "loss": 2.642, + "step": 775 + }, + { + "epoch": 0.14, + "learning_rate": 4.9537777777777774e-05, + "loss": 2.7469, + "step": 780 + }, + { + "epoch": 0.14, + "learning_rate": 4.953481481481482e-05, + "loss": 2.8179, + "step": 785 + }, + { + "epoch": 0.14, + "learning_rate": 4.953185185185185e-05, + "loss": 2.6917, + "step": 790 + }, + { + "epoch": 0.14, + "learning_rate": 4.952888888888889e-05, + "loss": 2.6922, + "step": 795 + }, + { + "epoch": 0.14, + "learning_rate": 4.952592592592592e-05, + "loss": 2.7306, + "step": 800 + }, + { + "epoch": 0.14, + "learning_rate": 4.952296296296297e-05, + "loss": 2.7722, + "step": 805 + }, + { + "epoch": 0.14, + "learning_rate": 4.952e-05, + "loss": 2.8362, + "step": 810 + }, + { + "epoch": 0.14, + "learning_rate": 4.951703703703704e-05, + "loss": 2.7178, + "step": 815 + }, + { + "epoch": 0.15, + "learning_rate": 4.951407407407407e-05, + "loss": 2.7951, + "step": 820 + }, + { + "epoch": 0.15, + "learning_rate": 4.951111111111112e-05, + "loss": 2.6786, + "step": 825 + }, + { + "epoch": 0.15, + "learning_rate": 4.950814814814815e-05, + "loss": 2.8212, + "step": 830 + }, + { + "epoch": 0.15, + "learning_rate": 4.950518518518519e-05, + "loss": 2.5981, + "step": 835 + }, + { + "epoch": 0.15, + "learning_rate": 4.950222222222222e-05, + "loss": 2.8504, + "step": 840 + }, + { + "epoch": 0.15, + "learning_rate": 4.9499259259259265e-05, + "loss": 2.7314, + "step": 845 + }, + { + "epoch": 0.15, + "learning_rate": 4.94962962962963e-05, + "loss": 2.8478, + "step": 850 + }, + { + "epoch": 0.15, + "learning_rate": 4.9493333333333336e-05, + "loss": 2.8575, + "step": 855 + }, + { + "epoch": 0.15, + "learning_rate": 4.949037037037037e-05, + "loss": 2.7991, + "step": 860 + }, + { + "epoch": 0.15, + "learning_rate": 4.948740740740741e-05, + "loss": 2.7548, + "step": 865 + }, + { + "epoch": 0.15, + "learning_rate": 4.9484444444444446e-05, + "loss": 2.7882, + "step": 870 + }, + { + "epoch": 0.16, + "learning_rate": 4.9481481481481485e-05, + "loss": 2.6827, + "step": 875 + }, + { + "epoch": 0.16, + "learning_rate": 4.947851851851852e-05, + "loss": 2.6902, + "step": 880 + }, + { + "epoch": 0.16, + "learning_rate": 4.9475555555555555e-05, + "loss": 2.7625, + "step": 885 + }, + { + "epoch": 0.16, + "learning_rate": 4.9472592592592594e-05, + "loss": 2.6774, + "step": 890 + }, + { + "epoch": 0.16, + "learning_rate": 4.946962962962963e-05, + "loss": 2.7684, + "step": 895 + }, + { + "epoch": 0.16, + "learning_rate": 4.9466666666666665e-05, + "loss": 2.7199, + "step": 900 + }, + { + "epoch": 0.16, + "learning_rate": 4.9463703703703704e-05, + "loss": 2.6763, + "step": 905 + }, + { + "epoch": 0.16, + "learning_rate": 4.946074074074074e-05, + "loss": 2.7746, + "step": 910 + }, + { + "epoch": 0.16, + "learning_rate": 4.945777777777778e-05, + "loss": 2.7869, + "step": 915 + }, + { + "epoch": 0.16, + "learning_rate": 4.9454814814814814e-05, + "loss": 2.7418, + "step": 920 + }, + { + "epoch": 0.16, + "learning_rate": 4.945185185185185e-05, + "loss": 2.8152, + "step": 925 + }, + { + "epoch": 0.17, + "learning_rate": 4.944888888888889e-05, + "loss": 2.7189, + "step": 930 + }, + { + "epoch": 0.17, + "learning_rate": 4.944592592592593e-05, + "loss": 2.8392, + "step": 935 + }, + { + "epoch": 0.17, + "learning_rate": 4.944296296296296e-05, + "loss": 2.7173, + "step": 940 + }, + { + "epoch": 0.17, + "learning_rate": 4.944e-05, + "loss": 2.7719, + "step": 945 + }, + { + "epoch": 0.17, + "learning_rate": 4.943703703703704e-05, + "loss": 2.7683, + "step": 950 + }, + { + "epoch": 0.17, + "learning_rate": 4.943407407407408e-05, + "loss": 2.8069, + "step": 955 + }, + { + "epoch": 0.17, + "learning_rate": 4.943111111111111e-05, + "loss": 2.7951, + "step": 960 + }, + { + "epoch": 0.17, + "learning_rate": 4.942814814814815e-05, + "loss": 2.7212, + "step": 965 + }, + { + "epoch": 0.17, + "learning_rate": 4.942518518518519e-05, + "loss": 2.8281, + "step": 970 + }, + { + "epoch": 0.17, + "learning_rate": 4.942222222222223e-05, + "loss": 2.9378, + "step": 975 + }, + { + "epoch": 0.17, + "learning_rate": 4.941925925925926e-05, + "loss": 2.7481, + "step": 980 + }, + { + "epoch": 0.18, + "learning_rate": 4.94162962962963e-05, + "loss": 2.8349, + "step": 985 + }, + { + "epoch": 0.18, + "learning_rate": 4.941333333333334e-05, + "loss": 2.6851, + "step": 990 + }, + { + "epoch": 0.18, + "learning_rate": 4.9410370370370376e-05, + "loss": 2.7403, + "step": 995 + }, + { + "epoch": 0.18, + "learning_rate": 4.940740740740741e-05, + "loss": 2.7812, + "step": 1000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9404444444444447e-05, + "loss": 2.6925, + "step": 1005 + }, + { + "epoch": 0.18, + "learning_rate": 4.940148148148148e-05, + "loss": 2.7102, + "step": 1010 + }, + { + "epoch": 0.18, + "learning_rate": 4.9398518518518524e-05, + "loss": 2.6798, + "step": 1015 + }, + { + "epoch": 0.18, + "learning_rate": 4.9395555555555556e-05, + "loss": 2.7171, + "step": 1020 + }, + { + "epoch": 0.18, + "learning_rate": 4.9392592592592595e-05, + "loss": 2.709, + "step": 1025 + }, + { + "epoch": 0.18, + "learning_rate": 4.938962962962963e-05, + "loss": 2.7912, + "step": 1030 + }, + { + "epoch": 0.18, + "learning_rate": 4.938666666666667e-05, + "loss": 2.6622, + "step": 1035 + }, + { + "epoch": 0.18, + "learning_rate": 4.9383703703703705e-05, + "loss": 2.7107, + "step": 1040 + }, + { + "epoch": 0.19, + "learning_rate": 4.9380740740740744e-05, + "loss": 2.7652, + "step": 1045 + }, + { + "epoch": 0.19, + "learning_rate": 4.9377777777777776e-05, + "loss": 2.7961, + "step": 1050 + }, + { + "epoch": 0.19, + "learning_rate": 4.937481481481482e-05, + "loss": 2.5857, + "step": 1055 + }, + { + "epoch": 0.19, + "learning_rate": 4.937185185185185e-05, + "loss": 2.5863, + "step": 1060 + }, + { + "epoch": 0.19, + "learning_rate": 4.936888888888889e-05, + "loss": 2.6799, + "step": 1065 + }, + { + "epoch": 0.19, + "learning_rate": 4.9365925925925924e-05, + "loss": 2.6358, + "step": 1070 + }, + { + "epoch": 0.19, + "learning_rate": 4.936296296296297e-05, + "loss": 2.768, + "step": 1075 + }, + { + "epoch": 0.19, + "learning_rate": 4.936e-05, + "loss": 2.886, + "step": 1080 + }, + { + "epoch": 0.19, + "learning_rate": 4.935703703703704e-05, + "loss": 2.5491, + "step": 1085 + }, + { + "epoch": 0.19, + "learning_rate": 4.935407407407407e-05, + "loss": 2.7212, + "step": 1090 + }, + { + "epoch": 0.19, + "learning_rate": 4.935111111111111e-05, + "loss": 2.7452, + "step": 1095 + }, + { + "epoch": 0.2, + "learning_rate": 4.934814814814815e-05, + "loss": 2.6479, + "step": 1100 + }, + { + "epoch": 0.2, + "learning_rate": 4.934518518518519e-05, + "loss": 2.7943, + "step": 1105 + }, + { + "epoch": 0.2, + "learning_rate": 4.934222222222222e-05, + "loss": 2.6701, + "step": 1110 + }, + { + "epoch": 0.2, + "learning_rate": 4.933925925925926e-05, + "loss": 2.6863, + "step": 1115 + }, + { + "epoch": 0.2, + "learning_rate": 4.93362962962963e-05, + "loss": 2.6787, + "step": 1120 + }, + { + "epoch": 0.2, + "learning_rate": 4.933333333333334e-05, + "loss": 2.771, + "step": 1125 + }, + { + "epoch": 0.2, + "learning_rate": 4.933037037037037e-05, + "loss": 2.5961, + "step": 1130 + }, + { + "epoch": 0.2, + "learning_rate": 4.932740740740741e-05, + "loss": 2.7753, + "step": 1135 + }, + { + "epoch": 0.2, + "learning_rate": 4.932444444444445e-05, + "loss": 2.5641, + "step": 1140 + }, + { + "epoch": 0.2, + "learning_rate": 4.9321481481481486e-05, + "loss": 2.685, + "step": 1145 + }, + { + "epoch": 0.2, + "learning_rate": 4.931851851851852e-05, + "loss": 2.7166, + "step": 1150 + }, + { + "epoch": 0.21, + "learning_rate": 4.931555555555556e-05, + "loss": 2.746, + "step": 1155 + }, + { + "epoch": 0.21, + "learning_rate": 4.9312592592592596e-05, + "loss": 2.6803, + "step": 1160 + }, + { + "epoch": 0.21, + "learning_rate": 4.9309629629629635e-05, + "loss": 2.6282, + "step": 1165 + }, + { + "epoch": 0.21, + "learning_rate": 4.930666666666667e-05, + "loss": 2.6186, + "step": 1170 + }, + { + "epoch": 0.21, + "learning_rate": 4.9303703703703705e-05, + "loss": 2.7369, + "step": 1175 + }, + { + "epoch": 0.21, + "learning_rate": 4.9300740740740744e-05, + "loss": 2.6946, + "step": 1180 + }, + { + "epoch": 0.21, + "learning_rate": 4.929777777777778e-05, + "loss": 2.6654, + "step": 1185 + }, + { + "epoch": 0.21, + "learning_rate": 4.9294814814814815e-05, + "loss": 2.6052, + "step": 1190 + }, + { + "epoch": 0.21, + "learning_rate": 4.9291851851851854e-05, + "loss": 2.7831, + "step": 1195 + }, + { + "epoch": 0.21, + "learning_rate": 4.928888888888889e-05, + "loss": 2.7227, + "step": 1200 + }, + { + "epoch": 0.21, + "learning_rate": 4.928592592592593e-05, + "loss": 2.7201, + "step": 1205 + }, + { + "epoch": 0.22, + "learning_rate": 4.9282962962962964e-05, + "loss": 2.6648, + "step": 1210 + }, + { + "epoch": 0.22, + "learning_rate": 4.928e-05, + "loss": 2.6865, + "step": 1215 + }, + { + "epoch": 0.22, + "learning_rate": 4.927703703703704e-05, + "loss": 2.664, + "step": 1220 + }, + { + "epoch": 0.22, + "learning_rate": 4.927407407407408e-05, + "loss": 2.5916, + "step": 1225 + }, + { + "epoch": 0.22, + "learning_rate": 4.927111111111111e-05, + "loss": 2.6063, + "step": 1230 + }, + { + "epoch": 0.22, + "learning_rate": 4.926814814814815e-05, + "loss": 2.696, + "step": 1235 + }, + { + "epoch": 0.22, + "learning_rate": 4.926518518518518e-05, + "loss": 2.7361, + "step": 1240 + }, + { + "epoch": 0.22, + "learning_rate": 4.926222222222223e-05, + "loss": 2.6426, + "step": 1245 + }, + { + "epoch": 0.22, + "learning_rate": 4.925925925925926e-05, + "loss": 2.6998, + "step": 1250 + }, + { + "epoch": 0.22, + "learning_rate": 4.92562962962963e-05, + "loss": 2.7315, + "step": 1255 + }, + { + "epoch": 0.22, + "learning_rate": 4.925333333333333e-05, + "loss": 2.6739, + "step": 1260 + }, + { + "epoch": 0.22, + "learning_rate": 4.925037037037038e-05, + "loss": 2.6776, + "step": 1265 + }, + { + "epoch": 0.23, + "learning_rate": 4.924740740740741e-05, + "loss": 2.7521, + "step": 1270 + }, + { + "epoch": 0.23, + "learning_rate": 4.924444444444445e-05, + "loss": 2.6265, + "step": 1275 + }, + { + "epoch": 0.23, + "learning_rate": 4.924148148148148e-05, + "loss": 2.774, + "step": 1280 + }, + { + "epoch": 0.23, + "learning_rate": 4.9238518518518526e-05, + "loss": 2.6793, + "step": 1285 + }, + { + "epoch": 0.23, + "learning_rate": 4.923555555555556e-05, + "loss": 2.6703, + "step": 1290 + }, + { + "epoch": 0.23, + "learning_rate": 4.9232592592592597e-05, + "loss": 2.6451, + "step": 1295 + }, + { + "epoch": 0.23, + "learning_rate": 4.922962962962963e-05, + "loss": 2.6367, + "step": 1300 + }, + { + "epoch": 0.23, + "learning_rate": 4.9226666666666674e-05, + "loss": 2.5895, + "step": 1305 + }, + { + "epoch": 0.23, + "learning_rate": 4.9223703703703706e-05, + "loss": 2.5834, + "step": 1310 + }, + { + "epoch": 0.23, + "learning_rate": 4.9220740740740745e-05, + "loss": 2.7616, + "step": 1315 + }, + { + "epoch": 0.23, + "learning_rate": 4.921777777777778e-05, + "loss": 2.6617, + "step": 1320 + }, + { + "epoch": 0.24, + "learning_rate": 4.9214814814814816e-05, + "loss": 2.7151, + "step": 1325 + }, + { + "epoch": 0.24, + "learning_rate": 4.9211851851851855e-05, + "loss": 2.6578, + "step": 1330 + }, + { + "epoch": 0.24, + "learning_rate": 4.9208888888888894e-05, + "loss": 2.5476, + "step": 1335 + }, + { + "epoch": 0.24, + "learning_rate": 4.9205925925925926e-05, + "loss": 2.7617, + "step": 1340 + }, + { + "epoch": 0.24, + "learning_rate": 4.9202962962962964e-05, + "loss": 2.5194, + "step": 1345 + }, + { + "epoch": 0.24, + "learning_rate": 4.92e-05, + "loss": 2.6243, + "step": 1350 + }, + { + "epoch": 0.24, + "learning_rate": 4.919703703703704e-05, + "loss": 2.542, + "step": 1355 + }, + { + "epoch": 0.24, + "learning_rate": 4.9194074074074074e-05, + "loss": 2.5248, + "step": 1360 + }, + { + "epoch": 0.24, + "learning_rate": 4.919111111111111e-05, + "loss": 2.5072, + "step": 1365 + }, + { + "epoch": 0.24, + "learning_rate": 4.918814814814815e-05, + "loss": 2.6479, + "step": 1370 + }, + { + "epoch": 0.24, + "learning_rate": 4.918518518518519e-05, + "loss": 2.6212, + "step": 1375 + }, + { + "epoch": 0.25, + "learning_rate": 4.918222222222222e-05, + "loss": 2.6752, + "step": 1380 + }, + { + "epoch": 0.25, + "learning_rate": 4.917925925925926e-05, + "loss": 2.6133, + "step": 1385 + }, + { + "epoch": 0.25, + "learning_rate": 4.91762962962963e-05, + "loss": 2.5788, + "step": 1390 + }, + { + "epoch": 0.25, + "learning_rate": 4.917333333333334e-05, + "loss": 2.5973, + "step": 1395 + }, + { + "epoch": 0.25, + "learning_rate": 4.917037037037037e-05, + "loss": 2.6873, + "step": 1400 + }, + { + "epoch": 0.25, + "learning_rate": 4.916740740740741e-05, + "loss": 2.5677, + "step": 1405 + }, + { + "epoch": 0.25, + "learning_rate": 4.916444444444445e-05, + "loss": 2.6275, + "step": 1410 + }, + { + "epoch": 0.25, + "learning_rate": 4.916148148148149e-05, + "loss": 2.6584, + "step": 1415 + }, + { + "epoch": 0.25, + "learning_rate": 4.915851851851852e-05, + "loss": 2.5752, + "step": 1420 + }, + { + "epoch": 0.25, + "learning_rate": 4.915555555555556e-05, + "loss": 2.5627, + "step": 1425 + }, + { + "epoch": 0.25, + "learning_rate": 4.91525925925926e-05, + "loss": 2.5348, + "step": 1430 + }, + { + "epoch": 0.26, + "learning_rate": 4.9149629629629636e-05, + "loss": 2.7585, + "step": 1435 + }, + { + "epoch": 0.26, + "learning_rate": 4.914666666666667e-05, + "loss": 2.5969, + "step": 1440 + }, + { + "epoch": 0.26, + "learning_rate": 4.914370370370371e-05, + "loss": 2.6326, + "step": 1445 + }, + { + "epoch": 0.26, + "learning_rate": 4.9140740740740746e-05, + "loss": 2.5628, + "step": 1450 + }, + { + "epoch": 0.26, + "learning_rate": 4.9137777777777785e-05, + "loss": 2.6056, + "step": 1455 + }, + { + "epoch": 0.26, + "learning_rate": 4.913481481481482e-05, + "loss": 2.6705, + "step": 1460 + }, + { + "epoch": 0.26, + "learning_rate": 4.9131851851851856e-05, + "loss": 2.6942, + "step": 1465 + }, + { + "epoch": 0.26, + "learning_rate": 4.912888888888889e-05, + "loss": 2.6487, + "step": 1470 + }, + { + "epoch": 0.26, + "learning_rate": 4.912592592592593e-05, + "loss": 2.563, + "step": 1475 + }, + { + "epoch": 0.26, + "learning_rate": 4.9122962962962965e-05, + "loss": 2.7142, + "step": 1480 + }, + { + "epoch": 0.26, + "learning_rate": 4.9120000000000004e-05, + "loss": 2.703, + "step": 1485 + }, + { + "epoch": 0.26, + "learning_rate": 4.9117037037037036e-05, + "loss": 2.5956, + "step": 1490 + }, + { + "epoch": 0.27, + "learning_rate": 4.911407407407408e-05, + "loss": 2.6318, + "step": 1495 + }, + { + "epoch": 0.27, + "learning_rate": 4.9111111111111114e-05, + "loss": 2.5268, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 4.910814814814815e-05, + "loss": 2.6568, + "step": 1505 + }, + { + "epoch": 0.27, + "learning_rate": 4.9105185185185185e-05, + "loss": 2.6327, + "step": 1510 + }, + { + "epoch": 0.27, + "learning_rate": 4.910222222222223e-05, + "loss": 2.6212, + "step": 1515 + }, + { + "epoch": 0.27, + "learning_rate": 4.909925925925926e-05, + "loss": 2.6328, + "step": 1520 + }, + { + "epoch": 0.27, + "learning_rate": 4.90962962962963e-05, + "loss": 2.5819, + "step": 1525 + }, + { + "epoch": 0.27, + "learning_rate": 4.909333333333333e-05, + "loss": 2.7559, + "step": 1530 + }, + { + "epoch": 0.27, + "learning_rate": 4.909037037037038e-05, + "loss": 2.6101, + "step": 1535 + }, + { + "epoch": 0.27, + "learning_rate": 4.908740740740741e-05, + "loss": 2.522, + "step": 1540 + }, + { + "epoch": 0.27, + "learning_rate": 4.908444444444445e-05, + "loss": 2.6315, + "step": 1545 + }, + { + "epoch": 0.28, + "learning_rate": 4.908148148148148e-05, + "loss": 2.613, + "step": 1550 + }, + { + "epoch": 0.28, + "learning_rate": 4.907851851851852e-05, + "loss": 2.6335, + "step": 1555 + }, + { + "epoch": 0.28, + "learning_rate": 4.907555555555556e-05, + "loss": 2.5468, + "step": 1560 + }, + { + "epoch": 0.28, + "learning_rate": 4.907259259259259e-05, + "loss": 2.7266, + "step": 1565 + }, + { + "epoch": 0.28, + "learning_rate": 4.906962962962963e-05, + "loss": 2.6311, + "step": 1570 + }, + { + "epoch": 0.28, + "learning_rate": 4.906666666666667e-05, + "loss": 2.5856, + "step": 1575 + }, + { + "epoch": 0.28, + "learning_rate": 4.906370370370371e-05, + "loss": 2.6343, + "step": 1580 + }, + { + "epoch": 0.28, + "learning_rate": 4.906074074074074e-05, + "loss": 2.6214, + "step": 1585 + }, + { + "epoch": 0.28, + "learning_rate": 4.905777777777778e-05, + "loss": 2.6503, + "step": 1590 + }, + { + "epoch": 0.28, + "learning_rate": 4.905481481481482e-05, + "loss": 2.6477, + "step": 1595 + }, + { + "epoch": 0.28, + "learning_rate": 4.9051851851851856e-05, + "loss": 2.6671, + "step": 1600 + }, + { + "epoch": 0.29, + "learning_rate": 4.904888888888889e-05, + "loss": 2.6796, + "step": 1605 + }, + { + "epoch": 0.29, + "learning_rate": 4.904592592592593e-05, + "loss": 2.4912, + "step": 1610 + }, + { + "epoch": 0.29, + "learning_rate": 4.9042962962962966e-05, + "loss": 2.5484, + "step": 1615 + }, + { + "epoch": 0.29, + "learning_rate": 4.9040000000000005e-05, + "loss": 2.7285, + "step": 1620 + }, + { + "epoch": 0.29, + "learning_rate": 4.903703703703704e-05, + "loss": 2.5839, + "step": 1625 + }, + { + "epoch": 0.29, + "learning_rate": 4.9034074074074076e-05, + "loss": 2.6494, + "step": 1630 + }, + { + "epoch": 0.29, + "learning_rate": 4.903111111111111e-05, + "loss": 2.6646, + "step": 1635 + }, + { + "epoch": 0.29, + "learning_rate": 4.902814814814815e-05, + "loss": 2.7042, + "step": 1640 + }, + { + "epoch": 0.29, + "learning_rate": 4.9025185185185185e-05, + "loss": 2.5844, + "step": 1645 + }, + { + "epoch": 0.29, + "learning_rate": 4.9022222222222224e-05, + "loss": 2.5931, + "step": 1650 + }, + { + "epoch": 0.29, + "learning_rate": 4.9019259259259256e-05, + "loss": 2.538, + "step": 1655 + }, + { + "epoch": 0.3, + "learning_rate": 4.90162962962963e-05, + "loss": 2.6606, + "step": 1660 + }, + { + "epoch": 0.3, + "learning_rate": 4.9013333333333334e-05, + "loss": 2.5271, + "step": 1665 + }, + { + "epoch": 0.3, + "learning_rate": 4.901037037037037e-05, + "loss": 2.6466, + "step": 1670 + }, + { + "epoch": 0.3, + "learning_rate": 4.9007407407407405e-05, + "loss": 2.5798, + "step": 1675 + }, + { + "epoch": 0.3, + "learning_rate": 4.900444444444445e-05, + "loss": 2.5151, + "step": 1680 + }, + { + "epoch": 0.3, + "learning_rate": 4.900148148148148e-05, + "loss": 2.5624, + "step": 1685 + }, + { + "epoch": 0.3, + "learning_rate": 4.899851851851852e-05, + "loss": 2.6799, + "step": 1690 + }, + { + "epoch": 0.3, + "learning_rate": 4.899555555555555e-05, + "loss": 2.6345, + "step": 1695 + }, + { + "epoch": 0.3, + "learning_rate": 4.89925925925926e-05, + "loss": 2.5328, + "step": 1700 + }, + { + "epoch": 0.3, + "learning_rate": 4.898962962962963e-05, + "loss": 2.5292, + "step": 1705 + }, + { + "epoch": 0.3, + "learning_rate": 4.898666666666667e-05, + "loss": 2.7206, + "step": 1710 + }, + { + "epoch": 0.3, + "learning_rate": 4.89837037037037e-05, + "loss": 2.5661, + "step": 1715 + }, + { + "epoch": 0.31, + "learning_rate": 4.898074074074074e-05, + "loss": 2.6318, + "step": 1720 + }, + { + "epoch": 0.31, + "learning_rate": 4.897777777777778e-05, + "loss": 2.5574, + "step": 1725 + }, + { + "epoch": 0.31, + "learning_rate": 4.897481481481482e-05, + "loss": 2.541, + "step": 1730 + }, + { + "epoch": 0.31, + "learning_rate": 4.897185185185185e-05, + "loss": 2.5739, + "step": 1735 + }, + { + "epoch": 0.31, + "learning_rate": 4.896888888888889e-05, + "loss": 2.6641, + "step": 1740 + }, + { + "epoch": 0.31, + "learning_rate": 4.896592592592593e-05, + "loss": 2.543, + "step": 1745 + }, + { + "epoch": 0.31, + "learning_rate": 4.896296296296297e-05, + "loss": 2.7357, + "step": 1750 + }, + { + "epoch": 0.31, + "learning_rate": 4.896e-05, + "loss": 2.4066, + "step": 1755 + }, + { + "epoch": 0.31, + "learning_rate": 4.895703703703704e-05, + "loss": 2.5714, + "step": 1760 + }, + { + "epoch": 0.31, + "learning_rate": 4.8954074074074076e-05, + "loss": 2.6102, + "step": 1765 + }, + { + "epoch": 0.31, + "learning_rate": 4.8951111111111115e-05, + "loss": 2.6916, + "step": 1770 + }, + { + "epoch": 0.32, + "learning_rate": 4.894814814814815e-05, + "loss": 2.5464, + "step": 1775 + }, + { + "epoch": 0.32, + "learning_rate": 4.8945185185185186e-05, + "loss": 2.4816, + "step": 1780 + }, + { + "epoch": 0.32, + "learning_rate": 4.8942222222222225e-05, + "loss": 2.5859, + "step": 1785 + }, + { + "epoch": 0.32, + "learning_rate": 4.8939259259259264e-05, + "loss": 2.6595, + "step": 1790 + }, + { + "epoch": 0.32, + "learning_rate": 4.8936296296296296e-05, + "loss": 2.5294, + "step": 1795 + }, + { + "epoch": 0.32, + "learning_rate": 4.8933333333333335e-05, + "loss": 2.6837, + "step": 1800 + }, + { + "epoch": 0.32, + "learning_rate": 4.8930370370370373e-05, + "loss": 2.6714, + "step": 1805 + }, + { + "epoch": 0.32, + "learning_rate": 4.892740740740741e-05, + "loss": 2.5805, + "step": 1810 + }, + { + "epoch": 0.32, + "learning_rate": 4.8924444444444444e-05, + "loss": 2.682, + "step": 1815 + }, + { + "epoch": 0.32, + "learning_rate": 4.892148148148148e-05, + "loss": 2.6585, + "step": 1820 + }, + { + "epoch": 0.32, + "learning_rate": 4.891851851851852e-05, + "loss": 2.5288, + "step": 1825 + }, + { + "epoch": 0.33, + "learning_rate": 4.891555555555556e-05, + "loss": 2.46, + "step": 1830 + }, + { + "epoch": 0.33, + "learning_rate": 4.891259259259259e-05, + "loss": 2.5487, + "step": 1835 + }, + { + "epoch": 0.33, + "learning_rate": 4.890962962962963e-05, + "loss": 2.5528, + "step": 1840 + }, + { + "epoch": 0.33, + "learning_rate": 4.890666666666667e-05, + "loss": 2.7303, + "step": 1845 + }, + { + "epoch": 0.33, + "learning_rate": 4.890370370370371e-05, + "loss": 2.6666, + "step": 1850 + }, + { + "epoch": 0.33, + "learning_rate": 4.890074074074074e-05, + "loss": 2.5087, + "step": 1855 + }, + { + "epoch": 0.33, + "learning_rate": 4.889777777777778e-05, + "loss": 2.5803, + "step": 1860 + }, + { + "epoch": 0.33, + "learning_rate": 4.889481481481481e-05, + "loss": 2.6161, + "step": 1865 + }, + { + "epoch": 0.33, + "learning_rate": 4.889185185185186e-05, + "loss": 2.5119, + "step": 1870 + }, + { + "epoch": 0.33, + "learning_rate": 4.888888888888889e-05, + "loss": 2.5282, + "step": 1875 + }, + { + "epoch": 0.33, + "learning_rate": 4.888592592592593e-05, + "loss": 2.5958, + "step": 1880 + }, + { + "epoch": 0.34, + "learning_rate": 4.888296296296296e-05, + "loss": 2.3793, + "step": 1885 + }, + { + "epoch": 0.34, + "learning_rate": 4.8880000000000006e-05, + "loss": 2.6117, + "step": 1890 + }, + { + "epoch": 0.34, + "learning_rate": 4.887703703703704e-05, + "loss": 2.6272, + "step": 1895 + }, + { + "epoch": 0.34, + "learning_rate": 4.887407407407408e-05, + "loss": 2.5193, + "step": 1900 + }, + { + "epoch": 0.34, + "learning_rate": 4.887111111111111e-05, + "loss": 2.5786, + "step": 1905 + }, + { + "epoch": 0.34, + "learning_rate": 4.8868148148148155e-05, + "loss": 2.5585, + "step": 1910 + }, + { + "epoch": 0.34, + "learning_rate": 4.886518518518519e-05, + "loss": 2.5675, + "step": 1915 + }, + { + "epoch": 0.34, + "learning_rate": 4.8862222222222226e-05, + "loss": 2.6321, + "step": 1920 + }, + { + "epoch": 0.34, + "learning_rate": 4.885925925925926e-05, + "loss": 2.5583, + "step": 1925 + }, + { + "epoch": 0.34, + "learning_rate": 4.88562962962963e-05, + "loss": 2.6734, + "step": 1930 + }, + { + "epoch": 0.34, + "learning_rate": 4.8853333333333335e-05, + "loss": 2.515, + "step": 1935 + }, + { + "epoch": 0.34, + "learning_rate": 4.8850370370370374e-05, + "loss": 2.5814, + "step": 1940 + }, + { + "epoch": 0.35, + "learning_rate": 4.8847407407407406e-05, + "loss": 2.5155, + "step": 1945 + }, + { + "epoch": 0.35, + "learning_rate": 4.8844444444444445e-05, + "loss": 2.5582, + "step": 1950 + }, + { + "epoch": 0.35, + "learning_rate": 4.8841481481481484e-05, + "loss": 2.4236, + "step": 1955 + }, + { + "epoch": 0.35, + "learning_rate": 4.883851851851852e-05, + "loss": 2.5744, + "step": 1960 + }, + { + "epoch": 0.35, + "learning_rate": 4.8835555555555555e-05, + "loss": 2.6236, + "step": 1965 + }, + { + "epoch": 0.35, + "learning_rate": 4.8832592592592594e-05, + "loss": 2.5213, + "step": 1970 + }, + { + "epoch": 0.35, + "learning_rate": 4.882962962962963e-05, + "loss": 2.4923, + "step": 1975 + }, + { + "epoch": 0.35, + "learning_rate": 4.882666666666667e-05, + "loss": 2.6818, + "step": 1980 + }, + { + "epoch": 0.35, + "learning_rate": 4.88237037037037e-05, + "loss": 2.5147, + "step": 1985 + }, + { + "epoch": 0.35, + "learning_rate": 4.882074074074074e-05, + "loss": 2.6033, + "step": 1990 + }, + { + "epoch": 0.35, + "learning_rate": 4.881777777777778e-05, + "loss": 2.5704, + "step": 1995 + }, + { + "epoch": 0.36, + "learning_rate": 4.881481481481482e-05, + "loss": 2.649, + "step": 2000 + }, + { + "epoch": 0.36, + "learning_rate": 4.881185185185185e-05, + "loss": 2.584, + "step": 2005 + }, + { + "epoch": 0.36, + "learning_rate": 4.880888888888889e-05, + "loss": 2.5052, + "step": 2010 + }, + { + "epoch": 0.36, + "learning_rate": 4.880592592592593e-05, + "loss": 2.6428, + "step": 2015 + }, + { + "epoch": 0.36, + "learning_rate": 4.880296296296297e-05, + "loss": 2.7282, + "step": 2020 + }, + { + "epoch": 0.36, + "learning_rate": 4.88e-05, + "loss": 2.5777, + "step": 2025 + }, + { + "epoch": 0.36, + "learning_rate": 4.879703703703704e-05, + "loss": 2.6538, + "step": 2030 + }, + { + "epoch": 0.36, + "learning_rate": 4.879407407407408e-05, + "loss": 2.647, + "step": 2035 + }, + { + "epoch": 0.36, + "learning_rate": 4.879111111111112e-05, + "loss": 2.6602, + "step": 2040 + }, + { + "epoch": 0.36, + "learning_rate": 4.878814814814815e-05, + "loss": 2.5872, + "step": 2045 + }, + { + "epoch": 0.36, + "learning_rate": 4.878518518518519e-05, + "loss": 2.6631, + "step": 2050 + }, + { + "epoch": 0.37, + "learning_rate": 4.8782222222222226e-05, + "loss": 2.6267, + "step": 2055 + }, + { + "epoch": 0.37, + "learning_rate": 4.8779259259259265e-05, + "loss": 2.5958, + "step": 2060 + }, + { + "epoch": 0.37, + "learning_rate": 4.87762962962963e-05, + "loss": 2.6223, + "step": 2065 + }, + { + "epoch": 0.37, + "learning_rate": 4.8773333333333336e-05, + "loss": 2.6239, + "step": 2070 + }, + { + "epoch": 0.37, + "learning_rate": 4.8770370370370375e-05, + "loss": 2.55, + "step": 2075 + }, + { + "epoch": 0.37, + "learning_rate": 4.8767407407407414e-05, + "loss": 2.5085, + "step": 2080 + }, + { + "epoch": 0.37, + "learning_rate": 4.8764444444444446e-05, + "loss": 2.626, + "step": 2085 + }, + { + "epoch": 0.37, + "learning_rate": 4.8761481481481485e-05, + "loss": 2.398, + "step": 2090 + }, + { + "epoch": 0.37, + "learning_rate": 4.875851851851852e-05, + "loss": 2.5995, + "step": 2095 + }, + { + "epoch": 0.37, + "learning_rate": 4.875555555555556e-05, + "loss": 2.4188, + "step": 2100 + }, + { + "epoch": 0.37, + "learning_rate": 4.8752592592592594e-05, + "loss": 2.6131, + "step": 2105 + }, + { + "epoch": 0.38, + "learning_rate": 4.874962962962963e-05, + "loss": 2.6237, + "step": 2110 + }, + { + "epoch": 0.38, + "learning_rate": 4.8746666666666665e-05, + "loss": 2.3157, + "step": 2115 + }, + { + "epoch": 0.38, + "learning_rate": 4.874370370370371e-05, + "loss": 2.555, + "step": 2120 + }, + { + "epoch": 0.38, + "learning_rate": 4.874074074074074e-05, + "loss": 2.7092, + "step": 2125 + }, + { + "epoch": 0.38, + "learning_rate": 4.873777777777778e-05, + "loss": 2.435, + "step": 2130 + }, + { + "epoch": 0.38, + "learning_rate": 4.8734814814814814e-05, + "loss": 2.6015, + "step": 2135 + }, + { + "epoch": 0.38, + "learning_rate": 4.873185185185186e-05, + "loss": 2.5091, + "step": 2140 + }, + { + "epoch": 0.38, + "learning_rate": 4.872888888888889e-05, + "loss": 2.5709, + "step": 2145 + }, + { + "epoch": 0.38, + "learning_rate": 4.872592592592593e-05, + "loss": 2.5891, + "step": 2150 + }, + { + "epoch": 0.38, + "learning_rate": 4.872296296296296e-05, + "loss": 2.5093, + "step": 2155 + }, + { + "epoch": 0.38, + "learning_rate": 4.872000000000001e-05, + "loss": 2.6391, + "step": 2160 + }, + { + "epoch": 0.38, + "learning_rate": 4.871703703703704e-05, + "loss": 2.6089, + "step": 2165 + }, + { + "epoch": 0.39, + "learning_rate": 4.871407407407408e-05, + "loss": 2.6249, + "step": 2170 + }, + { + "epoch": 0.39, + "learning_rate": 4.871111111111111e-05, + "loss": 2.5036, + "step": 2175 + }, + { + "epoch": 0.39, + "learning_rate": 4.870814814814815e-05, + "loss": 2.536, + "step": 2180 + }, + { + "epoch": 0.39, + "learning_rate": 4.870518518518519e-05, + "loss": 2.6776, + "step": 2185 + }, + { + "epoch": 0.39, + "learning_rate": 4.870222222222223e-05, + "loss": 2.5875, + "step": 2190 + }, + { + "epoch": 0.39, + "learning_rate": 4.869925925925926e-05, + "loss": 2.6116, + "step": 2195 + }, + { + "epoch": 0.39, + "learning_rate": 4.86962962962963e-05, + "loss": 2.4586, + "step": 2200 + }, + { + "epoch": 0.39, + "learning_rate": 4.869333333333334e-05, + "loss": 2.5113, + "step": 2205 + }, + { + "epoch": 0.39, + "learning_rate": 4.8690370370370376e-05, + "loss": 2.4781, + "step": 2210 + }, + { + "epoch": 0.39, + "learning_rate": 4.868740740740741e-05, + "loss": 2.4665, + "step": 2215 + }, + { + "epoch": 0.39, + "learning_rate": 4.868444444444445e-05, + "loss": 2.6852, + "step": 2220 + }, + { + "epoch": 0.4, + "learning_rate": 4.8681481481481485e-05, + "loss": 2.5498, + "step": 2225 + }, + { + "epoch": 0.4, + "learning_rate": 4.8678518518518524e-05, + "loss": 2.5123, + "step": 2230 + }, + { + "epoch": 0.4, + "learning_rate": 4.8675555555555556e-05, + "loss": 2.4898, + "step": 2235 + }, + { + "epoch": 0.4, + "learning_rate": 4.8672592592592595e-05, + "loss": 2.5807, + "step": 2240 + }, + { + "epoch": 0.4, + "learning_rate": 4.8669629629629634e-05, + "loss": 2.4785, + "step": 2245 + }, + { + "epoch": 0.4, + "learning_rate": 4.866666666666667e-05, + "loss": 2.5469, + "step": 2250 + }, + { + "epoch": 0.4, + "learning_rate": 4.8663703703703705e-05, + "loss": 2.6372, + "step": 2255 + }, + { + "epoch": 0.4, + "learning_rate": 4.8660740740740744e-05, + "loss": 2.566, + "step": 2260 + }, + { + "epoch": 0.4, + "learning_rate": 4.865777777777778e-05, + "loss": 2.5668, + "step": 2265 + }, + { + "epoch": 0.4, + "learning_rate": 4.865481481481482e-05, + "loss": 2.5924, + "step": 2270 + }, + { + "epoch": 0.4, + "learning_rate": 4.865185185185185e-05, + "loss": 2.5431, + "step": 2275 + }, + { + "epoch": 0.41, + "learning_rate": 4.864888888888889e-05, + "loss": 2.4167, + "step": 2280 + }, + { + "epoch": 0.41, + "learning_rate": 4.864592592592593e-05, + "loss": 2.6605, + "step": 2285 + }, + { + "epoch": 0.41, + "learning_rate": 4.864296296296297e-05, + "loss": 2.4347, + "step": 2290 + }, + { + "epoch": 0.41, + "learning_rate": 4.864e-05, + "loss": 2.7029, + "step": 2295 + }, + { + "epoch": 0.41, + "learning_rate": 4.863703703703704e-05, + "loss": 2.6861, + "step": 2300 + }, + { + "epoch": 0.41, + "learning_rate": 4.863407407407408e-05, + "loss": 2.5973, + "step": 2305 + }, + { + "epoch": 0.41, + "learning_rate": 4.863111111111112e-05, + "loss": 2.5206, + "step": 2310 + }, + { + "epoch": 0.41, + "learning_rate": 4.862814814814815e-05, + "loss": 2.5048, + "step": 2315 + }, + { + "epoch": 0.41, + "learning_rate": 4.862518518518519e-05, + "loss": 2.5822, + "step": 2320 + }, + { + "epoch": 0.41, + "learning_rate": 4.862222222222222e-05, + "loss": 2.4442, + "step": 2325 + }, + { + "epoch": 0.41, + "learning_rate": 4.861925925925926e-05, + "loss": 2.6353, + "step": 2330 + }, + { + "epoch": 0.42, + "learning_rate": 4.86162962962963e-05, + "loss": 2.5626, + "step": 2335 + }, + { + "epoch": 0.42, + "learning_rate": 4.861333333333333e-05, + "loss": 2.6273, + "step": 2340 + }, + { + "epoch": 0.42, + "learning_rate": 4.861037037037037e-05, + "loss": 2.5765, + "step": 2345 + }, + { + "epoch": 0.42, + "learning_rate": 4.860740740740741e-05, + "loss": 2.5701, + "step": 2350 + }, + { + "epoch": 0.42, + "learning_rate": 4.860444444444445e-05, + "loss": 2.6121, + "step": 2355 + }, + { + "epoch": 0.42, + "learning_rate": 4.860148148148148e-05, + "loss": 2.5889, + "step": 2360 + }, + { + "epoch": 0.42, + "learning_rate": 4.859851851851852e-05, + "loss": 2.4516, + "step": 2365 + }, + { + "epoch": 0.42, + "learning_rate": 4.859555555555556e-05, + "loss": 2.5998, + "step": 2370 + }, + { + "epoch": 0.42, + "learning_rate": 4.8592592592592596e-05, + "loss": 2.5884, + "step": 2375 + }, + { + "epoch": 0.42, + "learning_rate": 4.858962962962963e-05, + "loss": 2.4922, + "step": 2380 + }, + { + "epoch": 0.42, + "learning_rate": 4.858666666666667e-05, + "loss": 2.6406, + "step": 2385 + }, + { + "epoch": 0.42, + "learning_rate": 4.8583703703703706e-05, + "loss": 2.5813, + "step": 2390 + }, + { + "epoch": 0.43, + "learning_rate": 4.8580740740740744e-05, + "loss": 2.5106, + "step": 2395 + }, + { + "epoch": 0.43, + "learning_rate": 4.8577777777777776e-05, + "loss": 2.7063, + "step": 2400 + }, + { + "epoch": 0.43, + "learning_rate": 4.8574814814814815e-05, + "loss": 2.56, + "step": 2405 + }, + { + "epoch": 0.43, + "learning_rate": 4.8571851851851854e-05, + "loss": 2.6228, + "step": 2410 + }, + { + "epoch": 0.43, + "learning_rate": 4.856888888888889e-05, + "loss": 2.4815, + "step": 2415 + }, + { + "epoch": 0.43, + "learning_rate": 4.8565925925925925e-05, + "loss": 2.5204, + "step": 2420 + }, + { + "epoch": 0.43, + "learning_rate": 4.8562962962962964e-05, + "loss": 2.5944, + "step": 2425 + }, + { + "epoch": 0.43, + "learning_rate": 4.856e-05, + "loss": 2.5486, + "step": 2430 + }, + { + "epoch": 0.43, + "learning_rate": 4.855703703703704e-05, + "loss": 2.5694, + "step": 2435 + }, + { + "epoch": 0.43, + "learning_rate": 4.8554074074074073e-05, + "loss": 2.4542, + "step": 2440 + }, + { + "epoch": 0.43, + "learning_rate": 4.855111111111111e-05, + "loss": 2.6608, + "step": 2445 + }, + { + "epoch": 0.44, + "learning_rate": 4.854814814814815e-05, + "loss": 2.5552, + "step": 2450 + }, + { + "epoch": 0.44, + "learning_rate": 4.854518518518519e-05, + "loss": 2.4182, + "step": 2455 + }, + { + "epoch": 0.44, + "learning_rate": 4.854222222222222e-05, + "loss": 2.7799, + "step": 2460 + }, + { + "epoch": 0.44, + "learning_rate": 4.853925925925926e-05, + "loss": 2.5867, + "step": 2465 + }, + { + "epoch": 0.44, + "learning_rate": 4.85362962962963e-05, + "loss": 2.4412, + "step": 2470 + }, + { + "epoch": 0.44, + "learning_rate": 4.853333333333334e-05, + "loss": 2.547, + "step": 2475 + }, + { + "epoch": 0.44, + "learning_rate": 4.853037037037037e-05, + "loss": 2.474, + "step": 2480 + }, + { + "epoch": 0.44, + "learning_rate": 4.852740740740741e-05, + "loss": 2.4617, + "step": 2485 + }, + { + "epoch": 0.44, + "learning_rate": 4.852444444444444e-05, + "loss": 2.4407, + "step": 2490 + }, + { + "epoch": 0.44, + "learning_rate": 4.852148148148149e-05, + "loss": 2.4253, + "step": 2495 + }, + { + "epoch": 0.44, + "learning_rate": 4.851851851851852e-05, + "loss": 2.4985, + "step": 2500 + }, + { + "epoch": 0.45, + "learning_rate": 4.851555555555556e-05, + "loss": 2.6185, + "step": 2505 + }, + { + "epoch": 0.45, + "learning_rate": 4.851259259259259e-05, + "loss": 2.4942, + "step": 2510 + }, + { + "epoch": 0.45, + "learning_rate": 4.8509629629629636e-05, + "loss": 2.5665, + "step": 2515 + }, + { + "epoch": 0.45, + "learning_rate": 4.850666666666667e-05, + "loss": 2.5831, + "step": 2520 + }, + { + "epoch": 0.45, + "learning_rate": 4.8503703703703706e-05, + "loss": 2.5389, + "step": 2525 + }, + { + "epoch": 0.45, + "learning_rate": 4.850074074074074e-05, + "loss": 2.5858, + "step": 2530 + }, + { + "epoch": 0.45, + "learning_rate": 4.8497777777777784e-05, + "loss": 2.6014, + "step": 2535 + }, + { + "epoch": 0.45, + "learning_rate": 4.8494814814814816e-05, + "loss": 2.5959, + "step": 2540 + }, + { + "epoch": 0.45, + "learning_rate": 4.8491851851851855e-05, + "loss": 2.4836, + "step": 2545 + }, + { + "epoch": 0.45, + "learning_rate": 4.848888888888889e-05, + "loss": 2.5167, + "step": 2550 + }, + { + "epoch": 0.45, + "learning_rate": 4.8485925925925926e-05, + "loss": 2.4104, + "step": 2555 + }, + { + "epoch": 0.46, + "learning_rate": 4.8482962962962965e-05, + "loss": 2.697, + "step": 2560 + }, + { + "epoch": 0.46, + "learning_rate": 4.8480000000000003e-05, + "loss": 2.5179, + "step": 2565 + }, + { + "epoch": 0.46, + "learning_rate": 4.8477037037037035e-05, + "loss": 2.5655, + "step": 2570 + }, + { + "epoch": 0.46, + "learning_rate": 4.8474074074074074e-05, + "loss": 2.7141, + "step": 2575 + }, + { + "epoch": 0.46, + "learning_rate": 4.847111111111111e-05, + "loss": 2.5745, + "step": 2580 + }, + { + "epoch": 0.46, + "learning_rate": 4.846814814814815e-05, + "loss": 2.5983, + "step": 2585 + }, + { + "epoch": 0.46, + "learning_rate": 4.8465185185185184e-05, + "loss": 2.5184, + "step": 2590 + }, + { + "epoch": 0.46, + "learning_rate": 4.846222222222222e-05, + "loss": 2.5778, + "step": 2595 + }, + { + "epoch": 0.46, + "learning_rate": 4.845925925925926e-05, + "loss": 2.5206, + "step": 2600 + }, + { + "epoch": 0.46, + "learning_rate": 4.84562962962963e-05, + "loss": 2.4711, + "step": 2605 + }, + { + "epoch": 0.46, + "learning_rate": 4.845333333333333e-05, + "loss": 2.4716, + "step": 2610 + }, + { + "epoch": 0.46, + "learning_rate": 4.845037037037037e-05, + "loss": 2.4848, + "step": 2615 + }, + { + "epoch": 0.47, + "learning_rate": 4.844740740740741e-05, + "loss": 2.654, + "step": 2620 + }, + { + "epoch": 0.47, + "learning_rate": 4.844444444444445e-05, + "loss": 2.5274, + "step": 2625 + }, + { + "epoch": 0.47, + "learning_rate": 4.844148148148148e-05, + "loss": 2.4585, + "step": 2630 + }, + { + "epoch": 0.47, + "learning_rate": 4.843851851851852e-05, + "loss": 2.5476, + "step": 2635 + }, + { + "epoch": 0.47, + "learning_rate": 4.843555555555556e-05, + "loss": 2.393, + "step": 2640 + }, + { + "epoch": 0.47, + "learning_rate": 4.84325925925926e-05, + "loss": 2.4644, + "step": 2645 + }, + { + "epoch": 0.47, + "learning_rate": 4.842962962962963e-05, + "loss": 2.6004, + "step": 2650 + }, + { + "epoch": 0.47, + "learning_rate": 4.842666666666667e-05, + "loss": 2.4683, + "step": 2655 + }, + { + "epoch": 0.47, + "learning_rate": 4.842370370370371e-05, + "loss": 2.5443, + "step": 2660 + }, + { + "epoch": 0.47, + "learning_rate": 4.8420740740740746e-05, + "loss": 2.5539, + "step": 2665 + }, + { + "epoch": 0.47, + "learning_rate": 4.841777777777778e-05, + "loss": 2.5629, + "step": 2670 + }, + { + "epoch": 0.48, + "learning_rate": 4.841481481481482e-05, + "loss": 2.5614, + "step": 2675 + }, + { + "epoch": 0.48, + "learning_rate": 4.8411851851851856e-05, + "loss": 2.569, + "step": 2680 + }, + { + "epoch": 0.48, + "learning_rate": 4.8408888888888894e-05, + "loss": 2.4735, + "step": 2685 + }, + { + "epoch": 0.48, + "learning_rate": 4.8405925925925927e-05, + "loss": 2.6352, + "step": 2690 + }, + { + "epoch": 0.48, + "learning_rate": 4.8402962962962965e-05, + "loss": 2.4947, + "step": 2695 + }, + { + "epoch": 0.48, + "learning_rate": 4.8400000000000004e-05, + "loss": 2.6446, + "step": 2700 + }, + { + "epoch": 0.48, + "learning_rate": 4.839703703703704e-05, + "loss": 2.4544, + "step": 2705 + }, + { + "epoch": 0.48, + "learning_rate": 4.8394074074074075e-05, + "loss": 2.5697, + "step": 2710 + }, + { + "epoch": 0.48, + "learning_rate": 4.8391111111111114e-05, + "loss": 2.4467, + "step": 2715 + }, + { + "epoch": 0.48, + "learning_rate": 4.8388148148148146e-05, + "loss": 2.5275, + "step": 2720 + }, + { + "epoch": 0.48, + "learning_rate": 4.838518518518519e-05, + "loss": 2.5394, + "step": 2725 + }, + { + "epoch": 0.49, + "learning_rate": 4.8382222222222224e-05, + "loss": 2.4864, + "step": 2730 + }, + { + "epoch": 0.49, + "learning_rate": 4.837925925925926e-05, + "loss": 2.5427, + "step": 2735 + }, + { + "epoch": 0.49, + "learning_rate": 4.8376296296296294e-05, + "loss": 2.5405, + "step": 2740 + }, + { + "epoch": 0.49, + "learning_rate": 4.837333333333334e-05, + "loss": 2.4612, + "step": 2745 + }, + { + "epoch": 0.49, + "learning_rate": 4.837037037037037e-05, + "loss": 2.4184, + "step": 2750 + }, + { + "epoch": 0.49, + "learning_rate": 4.836740740740741e-05, + "loss": 2.5487, + "step": 2755 + }, + { + "epoch": 0.49, + "learning_rate": 4.836444444444444e-05, + "loss": 2.4494, + "step": 2760 + }, + { + "epoch": 0.49, + "learning_rate": 4.836148148148149e-05, + "loss": 2.5665, + "step": 2765 + }, + { + "epoch": 0.49, + "learning_rate": 4.835851851851852e-05, + "loss": 2.5655, + "step": 2770 + }, + { + "epoch": 0.49, + "learning_rate": 4.835555555555556e-05, + "loss": 2.5634, + "step": 2775 + }, + { + "epoch": 0.49, + "learning_rate": 4.835259259259259e-05, + "loss": 2.6474, + "step": 2780 + }, + { + "epoch": 0.5, + "learning_rate": 4.834962962962963e-05, + "loss": 2.5541, + "step": 2785 + }, + { + "epoch": 0.5, + "learning_rate": 4.834666666666667e-05, + "loss": 2.5239, + "step": 2790 + }, + { + "epoch": 0.5, + "learning_rate": 4.834370370370371e-05, + "loss": 2.3692, + "step": 2795 + }, + { + "epoch": 0.5, + "learning_rate": 4.834074074074074e-05, + "loss": 2.4673, + "step": 2800 + }, + { + "epoch": 0.5, + "learning_rate": 4.833777777777778e-05, + "loss": 2.5017, + "step": 2805 + }, + { + "epoch": 0.5, + "learning_rate": 4.833481481481482e-05, + "loss": 2.5517, + "step": 2810 + }, + { + "epoch": 0.5, + "learning_rate": 4.8331851851851856e-05, + "loss": 2.5868, + "step": 2815 + }, + { + "epoch": 0.5, + "learning_rate": 4.832888888888889e-05, + "loss": 2.5741, + "step": 2820 + }, + { + "epoch": 0.5, + "learning_rate": 4.832592592592593e-05, + "loss": 2.6339, + "step": 2825 + }, + { + "epoch": 0.5, + "learning_rate": 4.8322962962962966e-05, + "loss": 2.5491, + "step": 2830 + }, + { + "epoch": 0.5, + "learning_rate": 4.8320000000000005e-05, + "loss": 2.5256, + "step": 2835 + }, + { + "epoch": 0.5, + "learning_rate": 4.831703703703704e-05, + "loss": 2.5913, + "step": 2840 + }, + { + "epoch": 0.51, + "learning_rate": 4.8314074074074076e-05, + "loss": 2.5661, + "step": 2845 + }, + { + "epoch": 0.51, + "learning_rate": 4.8311111111111115e-05, + "loss": 2.3706, + "step": 2850 + }, + { + "epoch": 0.51, + "learning_rate": 4.8308148148148153e-05, + "loss": 2.5229, + "step": 2855 + }, + { + "epoch": 0.51, + "learning_rate": 4.8305185185185185e-05, + "loss": 2.4967, + "step": 2860 + }, + { + "epoch": 0.51, + "learning_rate": 4.8302222222222224e-05, + "loss": 2.3973, + "step": 2865 + }, + { + "epoch": 0.51, + "learning_rate": 4.829925925925926e-05, + "loss": 2.6179, + "step": 2870 + }, + { + "epoch": 0.51, + "learning_rate": 4.82962962962963e-05, + "loss": 2.3491, + "step": 2875 + }, + { + "epoch": 0.51, + "learning_rate": 4.8293333333333334e-05, + "loss": 2.62, + "step": 2880 + }, + { + "epoch": 0.51, + "learning_rate": 4.829037037037037e-05, + "loss": 2.5978, + "step": 2885 + }, + { + "epoch": 0.51, + "learning_rate": 4.828740740740741e-05, + "loss": 2.4692, + "step": 2890 + }, + { + "epoch": 0.51, + "learning_rate": 4.828444444444445e-05, + "loss": 2.5157, + "step": 2895 + }, + { + "epoch": 0.52, + "learning_rate": 4.828148148148148e-05, + "loss": 2.4219, + "step": 2900 + }, + { + "epoch": 0.52, + "learning_rate": 4.827851851851852e-05, + "loss": 2.6283, + "step": 2905 + }, + { + "epoch": 0.52, + "learning_rate": 4.827555555555556e-05, + "loss": 2.5258, + "step": 2910 + }, + { + "epoch": 0.52, + "learning_rate": 4.82725925925926e-05, + "loss": 2.5934, + "step": 2915 + }, + { + "epoch": 0.52, + "learning_rate": 4.826962962962963e-05, + "loss": 2.5742, + "step": 2920 + }, + { + "epoch": 0.52, + "learning_rate": 4.826666666666667e-05, + "loss": 2.6123, + "step": 2925 + }, + { + "epoch": 0.52, + "learning_rate": 4.826370370370371e-05, + "loss": 2.5939, + "step": 2930 + }, + { + "epoch": 0.52, + "learning_rate": 4.826074074074075e-05, + "loss": 2.49, + "step": 2935 + }, + { + "epoch": 0.52, + "learning_rate": 4.825777777777778e-05, + "loss": 2.3457, + "step": 2940 + }, + { + "epoch": 0.52, + "learning_rate": 4.825481481481482e-05, + "loss": 2.4655, + "step": 2945 + }, + { + "epoch": 0.52, + "learning_rate": 4.825185185185185e-05, + "loss": 2.5718, + "step": 2950 + }, + { + "epoch": 0.53, + "learning_rate": 4.8248888888888896e-05, + "loss": 2.4498, + "step": 2955 + }, + { + "epoch": 0.53, + "learning_rate": 4.824592592592593e-05, + "loss": 2.5131, + "step": 2960 + }, + { + "epoch": 0.53, + "learning_rate": 4.824296296296297e-05, + "loss": 2.3037, + "step": 2965 + }, + { + "epoch": 0.53, + "learning_rate": 4.824e-05, + "loss": 2.5468, + "step": 2970 + }, + { + "epoch": 0.53, + "learning_rate": 4.8237037037037045e-05, + "loss": 2.5251, + "step": 2975 + }, + { + "epoch": 0.53, + "learning_rate": 4.8234074074074077e-05, + "loss": 2.5881, + "step": 2980 + }, + { + "epoch": 0.53, + "learning_rate": 4.8231111111111115e-05, + "loss": 2.478, + "step": 2985 + }, + { + "epoch": 0.53, + "learning_rate": 4.822814814814815e-05, + "loss": 2.5001, + "step": 2990 + }, + { + "epoch": 0.53, + "learning_rate": 4.822518518518519e-05, + "loss": 2.5638, + "step": 2995 + }, + { + "epoch": 0.53, + "learning_rate": 4.8222222222222225e-05, + "loss": 2.4833, + "step": 3000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8219259259259264e-05, + "loss": 2.482, + "step": 3005 + }, + { + "epoch": 0.54, + "learning_rate": 4.8216296296296296e-05, + "loss": 2.5302, + "step": 3010 + }, + { + "epoch": 0.54, + "learning_rate": 4.8213333333333335e-05, + "loss": 2.5613, + "step": 3015 + }, + { + "epoch": 0.54, + "learning_rate": 4.8210370370370374e-05, + "loss": 2.527, + "step": 3020 + }, + { + "epoch": 0.54, + "learning_rate": 4.820740740740741e-05, + "loss": 2.5483, + "step": 3025 + }, + { + "epoch": 0.54, + "learning_rate": 4.8204444444444444e-05, + "loss": 2.5656, + "step": 3030 + }, + { + "epoch": 0.54, + "learning_rate": 4.820148148148148e-05, + "loss": 2.5175, + "step": 3035 + }, + { + "epoch": 0.54, + "learning_rate": 4.819851851851852e-05, + "loss": 2.4651, + "step": 3040 + }, + { + "epoch": 0.54, + "learning_rate": 4.819555555555556e-05, + "loss": 2.5214, + "step": 3045 + }, + { + "epoch": 0.54, + "learning_rate": 4.819259259259259e-05, + "loss": 2.5157, + "step": 3050 + }, + { + "epoch": 0.54, + "learning_rate": 4.818962962962963e-05, + "loss": 2.5873, + "step": 3055 + }, + { + "epoch": 0.54, + "learning_rate": 4.818666666666667e-05, + "loss": 2.573, + "step": 3060 + }, + { + "epoch": 0.54, + "learning_rate": 4.818370370370371e-05, + "loss": 2.5124, + "step": 3065 + }, + { + "epoch": 0.55, + "learning_rate": 4.818074074074074e-05, + "loss": 2.4542, + "step": 3070 + }, + { + "epoch": 0.55, + "learning_rate": 4.817777777777778e-05, + "loss": 2.5165, + "step": 3075 + }, + { + "epoch": 0.55, + "learning_rate": 4.817481481481482e-05, + "loss": 2.4215, + "step": 3080 + }, + { + "epoch": 0.55, + "learning_rate": 4.817185185185186e-05, + "loss": 2.5509, + "step": 3085 + }, + { + "epoch": 0.55, + "learning_rate": 4.816888888888889e-05, + "loss": 2.4778, + "step": 3090 + }, + { + "epoch": 0.55, + "learning_rate": 4.816592592592593e-05, + "loss": 2.4294, + "step": 3095 + }, + { + "epoch": 0.55, + "learning_rate": 4.816296296296297e-05, + "loss": 2.5117, + "step": 3100 + }, + { + "epoch": 0.55, + "learning_rate": 4.816e-05, + "loss": 2.6064, + "step": 3105 + }, + { + "epoch": 0.55, + "learning_rate": 4.815703703703704e-05, + "loss": 2.3965, + "step": 3110 + }, + { + "epoch": 0.55, + "learning_rate": 4.815407407407407e-05, + "loss": 2.634, + "step": 3115 + }, + { + "epoch": 0.55, + "learning_rate": 4.8151111111111116e-05, + "loss": 2.5048, + "step": 3120 + }, + { + "epoch": 0.56, + "learning_rate": 4.814814814814815e-05, + "loss": 2.4657, + "step": 3125 + }, + { + "epoch": 0.56, + "learning_rate": 4.814518518518519e-05, + "loss": 2.5087, + "step": 3130 + }, + { + "epoch": 0.56, + "learning_rate": 4.814222222222222e-05, + "loss": 2.4964, + "step": 3135 + }, + { + "epoch": 0.56, + "learning_rate": 4.8139259259259265e-05, + "loss": 2.5269, + "step": 3140 + }, + { + "epoch": 0.56, + "learning_rate": 4.81362962962963e-05, + "loss": 2.4399, + "step": 3145 + }, + { + "epoch": 0.56, + "learning_rate": 4.8133333333333336e-05, + "loss": 2.5173, + "step": 3150 + }, + { + "epoch": 0.56, + "learning_rate": 4.813037037037037e-05, + "loss": 2.5539, + "step": 3155 + }, + { + "epoch": 0.56, + "learning_rate": 4.812740740740741e-05, + "loss": 2.5669, + "step": 3160 + }, + { + "epoch": 0.56, + "learning_rate": 4.8124444444444445e-05, + "loss": 2.3558, + "step": 3165 + }, + { + "epoch": 0.56, + "learning_rate": 4.8121481481481484e-05, + "loss": 2.604, + "step": 3170 + }, + { + "epoch": 0.56, + "learning_rate": 4.8118518518518516e-05, + "loss": 2.366, + "step": 3175 + }, + { + "epoch": 0.57, + "learning_rate": 4.8115555555555555e-05, + "loss": 2.584, + "step": 3180 + }, + { + "epoch": 0.57, + "learning_rate": 4.8112592592592594e-05, + "loss": 2.6561, + "step": 3185 + }, + { + "epoch": 0.57, + "learning_rate": 4.810962962962963e-05, + "loss": 2.4862, + "step": 3190 + }, + { + "epoch": 0.57, + "learning_rate": 4.8106666666666665e-05, + "loss": 2.5852, + "step": 3195 + }, + { + "epoch": 0.57, + "learning_rate": 4.8103703703703703e-05, + "loss": 2.5441, + "step": 3200 + }, + { + "epoch": 0.57, + "learning_rate": 4.810074074074074e-05, + "loss": 2.6158, + "step": 3205 + }, + { + "epoch": 0.57, + "learning_rate": 4.809777777777778e-05, + "loss": 2.464, + "step": 3210 + }, + { + "epoch": 0.57, + "learning_rate": 4.809481481481481e-05, + "loss": 2.5, + "step": 3215 + }, + { + "epoch": 0.57, + "learning_rate": 4.809185185185185e-05, + "loss": 2.5206, + "step": 3220 + }, + { + "epoch": 0.57, + "learning_rate": 4.808888888888889e-05, + "loss": 2.5357, + "step": 3225 + }, + { + "epoch": 0.57, + "learning_rate": 4.808592592592593e-05, + "loss": 2.4031, + "step": 3230 + }, + { + "epoch": 0.58, + "learning_rate": 4.808296296296296e-05, + "loss": 2.531, + "step": 3235 + }, + { + "epoch": 0.58, + "learning_rate": 4.808e-05, + "loss": 2.4502, + "step": 3240 + }, + { + "epoch": 0.58, + "learning_rate": 4.807703703703704e-05, + "loss": 2.5183, + "step": 3245 + }, + { + "epoch": 0.58, + "learning_rate": 4.807407407407408e-05, + "loss": 2.4353, + "step": 3250 + }, + { + "epoch": 0.58, + "learning_rate": 4.807111111111111e-05, + "loss": 2.4385, + "step": 3255 + }, + { + "epoch": 0.58, + "learning_rate": 4.806814814814815e-05, + "loss": 2.3361, + "step": 3260 + }, + { + "epoch": 0.58, + "learning_rate": 4.806518518518519e-05, + "loss": 2.4356, + "step": 3265 + }, + { + "epoch": 0.58, + "learning_rate": 4.8062222222222227e-05, + "loss": 2.4506, + "step": 3270 + }, + { + "epoch": 0.58, + "learning_rate": 4.805925925925926e-05, + "loss": 2.4999, + "step": 3275 + }, + { + "epoch": 0.58, + "learning_rate": 4.80562962962963e-05, + "loss": 2.5567, + "step": 3280 + }, + { + "epoch": 0.58, + "learning_rate": 4.8053333333333336e-05, + "loss": 2.5722, + "step": 3285 + }, + { + "epoch": 0.58, + "learning_rate": 4.8050370370370375e-05, + "loss": 2.4391, + "step": 3290 + }, + { + "epoch": 0.59, + "learning_rate": 4.804740740740741e-05, + "loss": 2.4832, + "step": 3295 + }, + { + "epoch": 0.59, + "learning_rate": 4.8044444444444446e-05, + "loss": 2.457, + "step": 3300 + }, + { + "epoch": 0.59, + "learning_rate": 4.8041481481481485e-05, + "loss": 2.4073, + "step": 3305 + }, + { + "epoch": 0.59, + "learning_rate": 4.8038518518518524e-05, + "loss": 2.6618, + "step": 3310 + }, + { + "epoch": 0.59, + "learning_rate": 4.8035555555555556e-05, + "loss": 2.5364, + "step": 3315 + }, + { + "epoch": 0.59, + "learning_rate": 4.8032592592592595e-05, + "loss": 2.6259, + "step": 3320 + }, + { + "epoch": 0.59, + "learning_rate": 4.802962962962963e-05, + "loss": 2.5101, + "step": 3325 + }, + { + "epoch": 0.59, + "learning_rate": 4.802666666666667e-05, + "loss": 2.5214, + "step": 3330 + }, + { + "epoch": 0.59, + "learning_rate": 4.8023703703703704e-05, + "loss": 2.622, + "step": 3335 + }, + { + "epoch": 0.59, + "learning_rate": 4.802074074074074e-05, + "loss": 2.6139, + "step": 3340 + }, + { + "epoch": 0.59, + "learning_rate": 4.8017777777777775e-05, + "loss": 2.4534, + "step": 3345 + }, + { + "epoch": 0.6, + "learning_rate": 4.801481481481482e-05, + "loss": 2.5654, + "step": 3350 + }, + { + "epoch": 0.6, + "learning_rate": 4.801185185185185e-05, + "loss": 2.5499, + "step": 3355 + }, + { + "epoch": 0.6, + "learning_rate": 4.800888888888889e-05, + "loss": 2.4748, + "step": 3360 + }, + { + "epoch": 0.6, + "learning_rate": 4.8005925925925924e-05, + "loss": 2.5733, + "step": 3365 + }, + { + "epoch": 0.6, + "learning_rate": 4.800296296296297e-05, + "loss": 2.4487, + "step": 3370 + }, + { + "epoch": 0.6, + "learning_rate": 4.8e-05, + "loss": 2.4621, + "step": 3375 + }, + { + "epoch": 0.6, + "learning_rate": 4.799703703703704e-05, + "loss": 2.5535, + "step": 3380 + }, + { + "epoch": 0.6, + "learning_rate": 4.799407407407407e-05, + "loss": 2.4086, + "step": 3385 + }, + { + "epoch": 0.6, + "learning_rate": 4.799111111111112e-05, + "loss": 2.5602, + "step": 3390 + }, + { + "epoch": 0.6, + "learning_rate": 4.798814814814815e-05, + "loss": 2.4961, + "step": 3395 + }, + { + "epoch": 0.6, + "learning_rate": 4.798518518518519e-05, + "loss": 2.471, + "step": 3400 + }, + { + "epoch": 0.61, + "learning_rate": 4.798222222222222e-05, + "loss": 2.3396, + "step": 3405 + }, + { + "epoch": 0.61, + "learning_rate": 4.797925925925926e-05, + "loss": 2.4646, + "step": 3410 + }, + { + "epoch": 0.61, + "learning_rate": 4.79762962962963e-05, + "loss": 2.449, + "step": 3415 + }, + { + "epoch": 0.61, + "learning_rate": 4.797333333333334e-05, + "loss": 2.5317, + "step": 3420 + }, + { + "epoch": 0.61, + "learning_rate": 4.797037037037037e-05, + "loss": 2.5041, + "step": 3425 + }, + { + "epoch": 0.61, + "learning_rate": 4.796740740740741e-05, + "loss": 2.4711, + "step": 3430 + }, + { + "epoch": 0.61, + "learning_rate": 4.796444444444445e-05, + "loss": 2.4812, + "step": 3435 + }, + { + "epoch": 0.61, + "learning_rate": 4.7961481481481486e-05, + "loss": 2.4339, + "step": 3440 + }, + { + "epoch": 0.61, + "learning_rate": 4.795851851851852e-05, + "loss": 2.4439, + "step": 3445 + }, + { + "epoch": 0.61, + "learning_rate": 4.7955555555555556e-05, + "loss": 2.4874, + "step": 3450 + }, + { + "epoch": 0.61, + "learning_rate": 4.7952592592592595e-05, + "loss": 2.5121, + "step": 3455 + }, + { + "epoch": 0.62, + "learning_rate": 4.7949629629629634e-05, + "loss": 2.6459, + "step": 3460 + }, + { + "epoch": 0.62, + "learning_rate": 4.7946666666666666e-05, + "loss": 2.6186, + "step": 3465 + }, + { + "epoch": 0.62, + "learning_rate": 4.7943703703703705e-05, + "loss": 2.3716, + "step": 3470 + }, + { + "epoch": 0.62, + "learning_rate": 4.7940740740740744e-05, + "loss": 2.507, + "step": 3475 + }, + { + "epoch": 0.62, + "learning_rate": 4.793777777777778e-05, + "loss": 2.3918, + "step": 3480 + }, + { + "epoch": 0.62, + "learning_rate": 4.7934814814814815e-05, + "loss": 2.3803, + "step": 3485 + }, + { + "epoch": 0.62, + "learning_rate": 4.7931851851851853e-05, + "loss": 2.4445, + "step": 3490 + }, + { + "epoch": 0.62, + "learning_rate": 4.792888888888889e-05, + "loss": 2.4617, + "step": 3495 + }, + { + "epoch": 0.62, + "learning_rate": 4.792592592592593e-05, + "loss": 2.5057, + "step": 3500 + }, + { + "epoch": 0.62, + "learning_rate": 4.792296296296296e-05, + "loss": 2.5112, + "step": 3505 + }, + { + "epoch": 0.62, + "learning_rate": 4.792e-05, + "loss": 2.4289, + "step": 3510 + }, + { + "epoch": 0.62, + "learning_rate": 4.791703703703704e-05, + "loss": 2.5129, + "step": 3515 + }, + { + "epoch": 0.63, + "learning_rate": 4.791407407407408e-05, + "loss": 2.4718, + "step": 3520 + }, + { + "epoch": 0.63, + "learning_rate": 4.791111111111111e-05, + "loss": 2.4584, + "step": 3525 + }, + { + "epoch": 0.63, + "learning_rate": 4.790814814814815e-05, + "loss": 2.4819, + "step": 3530 + }, + { + "epoch": 0.63, + "learning_rate": 4.790518518518519e-05, + "loss": 2.5925, + "step": 3535 + }, + { + "epoch": 0.63, + "learning_rate": 4.790222222222223e-05, + "loss": 2.4326, + "step": 3540 + }, + { + "epoch": 0.63, + "learning_rate": 4.789925925925926e-05, + "loss": 2.4423, + "step": 3545 + }, + { + "epoch": 0.63, + "learning_rate": 4.78962962962963e-05, + "loss": 2.5685, + "step": 3550 + }, + { + "epoch": 0.63, + "learning_rate": 4.789333333333334e-05, + "loss": 2.4943, + "step": 3555 + }, + { + "epoch": 0.63, + "learning_rate": 4.789037037037038e-05, + "loss": 2.6227, + "step": 3560 + }, + { + "epoch": 0.63, + "learning_rate": 4.788740740740741e-05, + "loss": 2.373, + "step": 3565 + }, + { + "epoch": 0.63, + "learning_rate": 4.788444444444445e-05, + "loss": 2.3402, + "step": 3570 + }, + { + "epoch": 0.64, + "learning_rate": 4.788148148148148e-05, + "loss": 2.5824, + "step": 3575 + }, + { + "epoch": 0.64, + "learning_rate": 4.7878518518518525e-05, + "loss": 2.4371, + "step": 3580 + }, + { + "epoch": 0.64, + "learning_rate": 4.787555555555556e-05, + "loss": 2.3421, + "step": 3585 + }, + { + "epoch": 0.64, + "learning_rate": 4.7872592592592596e-05, + "loss": 2.333, + "step": 3590 + }, + { + "epoch": 0.64, + "learning_rate": 4.786962962962963e-05, + "loss": 2.4595, + "step": 3595 + }, + { + "epoch": 0.64, + "learning_rate": 4.7866666666666674e-05, + "loss": 2.4442, + "step": 3600 + }, + { + "epoch": 0.64, + "learning_rate": 4.7863703703703706e-05, + "loss": 2.3995, + "step": 3605 + }, + { + "epoch": 0.64, + "learning_rate": 4.7860740740740745e-05, + "loss": 2.4855, + "step": 3610 + }, + { + "epoch": 0.64, + "learning_rate": 4.7857777777777777e-05, + "loss": 2.5998, + "step": 3615 + }, + { + "epoch": 0.64, + "learning_rate": 4.785481481481482e-05, + "loss": 2.4413, + "step": 3620 + }, + { + "epoch": 0.64, + "learning_rate": 4.7851851851851854e-05, + "loss": 2.5083, + "step": 3625 + }, + { + "epoch": 0.65, + "learning_rate": 4.784888888888889e-05, + "loss": 2.4819, + "step": 3630 + }, + { + "epoch": 0.65, + "learning_rate": 4.7845925925925925e-05, + "loss": 2.5796, + "step": 3635 + }, + { + "epoch": 0.65, + "learning_rate": 4.7842962962962964e-05, + "loss": 2.458, + "step": 3640 + }, + { + "epoch": 0.65, + "learning_rate": 4.784e-05, + "loss": 2.5684, + "step": 3645 + }, + { + "epoch": 0.65, + "learning_rate": 4.783703703703704e-05, + "loss": 2.3808, + "step": 3650 + }, + { + "epoch": 0.65, + "learning_rate": 4.7834074074074074e-05, + "loss": 2.4314, + "step": 3655 + }, + { + "epoch": 0.65, + "learning_rate": 4.783111111111111e-05, + "loss": 2.2501, + "step": 3660 + }, + { + "epoch": 0.65, + "learning_rate": 4.782814814814815e-05, + "loss": 2.5552, + "step": 3665 + }, + { + "epoch": 0.65, + "learning_rate": 4.782518518518519e-05, + "loss": 2.506, + "step": 3670 + }, + { + "epoch": 0.65, + "learning_rate": 4.782222222222222e-05, + "loss": 2.5063, + "step": 3675 + }, + { + "epoch": 0.65, + "learning_rate": 4.781925925925926e-05, + "loss": 2.4438, + "step": 3680 + }, + { + "epoch": 0.66, + "learning_rate": 4.78162962962963e-05, + "loss": 2.4133, + "step": 3685 + }, + { + "epoch": 0.66, + "learning_rate": 4.781333333333334e-05, + "loss": 2.5003, + "step": 3690 + }, + { + "epoch": 0.66, + "learning_rate": 4.781037037037037e-05, + "loss": 2.4661, + "step": 3695 + }, + { + "epoch": 0.66, + "learning_rate": 4.780740740740741e-05, + "loss": 2.4338, + "step": 3700 + }, + { + "epoch": 0.66, + "learning_rate": 4.780444444444445e-05, + "loss": 2.6581, + "step": 3705 + }, + { + "epoch": 0.66, + "learning_rate": 4.780148148148149e-05, + "loss": 2.5527, + "step": 3710 + }, + { + "epoch": 0.66, + "learning_rate": 4.779851851851852e-05, + "loss": 2.4698, + "step": 3715 + }, + { + "epoch": 0.66, + "learning_rate": 4.779555555555556e-05, + "loss": 2.4999, + "step": 3720 + }, + { + "epoch": 0.66, + "learning_rate": 4.77925925925926e-05, + "loss": 2.5276, + "step": 3725 + }, + { + "epoch": 0.66, + "learning_rate": 4.7789629629629636e-05, + "loss": 2.4254, + "step": 3730 + }, + { + "epoch": 0.66, + "learning_rate": 4.778666666666667e-05, + "loss": 2.5416, + "step": 3735 + }, + { + "epoch": 0.66, + "learning_rate": 4.7783703703703706e-05, + "loss": 2.3298, + "step": 3740 + }, + { + "epoch": 0.67, + "learning_rate": 4.7780740740740745e-05, + "loss": 2.5605, + "step": 3745 + }, + { + "epoch": 0.67, + "learning_rate": 4.7777777777777784e-05, + "loss": 2.5469, + "step": 3750 + }, + { + "epoch": 0.67, + "learning_rate": 4.7774814814814816e-05, + "loss": 2.5102, + "step": 3755 + }, + { + "epoch": 0.67, + "learning_rate": 4.7771851851851855e-05, + "loss": 2.4916, + "step": 3760 + }, + { + "epoch": 0.67, + "learning_rate": 4.7768888888888894e-05, + "loss": 2.4455, + "step": 3765 + }, + { + "epoch": 0.67, + "learning_rate": 4.776592592592593e-05, + "loss": 2.4469, + "step": 3770 + }, + { + "epoch": 0.67, + "learning_rate": 4.7762962962962965e-05, + "loss": 2.5556, + "step": 3775 + }, + { + "epoch": 0.67, + "learning_rate": 4.7760000000000004e-05, + "loss": 2.498, + "step": 3780 + }, + { + "epoch": 0.67, + "learning_rate": 4.775703703703704e-05, + "loss": 2.4771, + "step": 3785 + }, + { + "epoch": 0.67, + "learning_rate": 4.775407407407408e-05, + "loss": 2.4732, + "step": 3790 + }, + { + "epoch": 0.67, + "learning_rate": 4.775111111111111e-05, + "loss": 2.4418, + "step": 3795 + }, + { + "epoch": 0.68, + "learning_rate": 4.774814814814815e-05, + "loss": 2.3981, + "step": 3800 + }, + { + "epoch": 0.68, + "learning_rate": 4.7745185185185184e-05, + "loss": 2.4527, + "step": 3805 + }, + { + "epoch": 0.68, + "learning_rate": 4.774222222222223e-05, + "loss": 2.5408, + "step": 3810 + }, + { + "epoch": 0.68, + "learning_rate": 4.773925925925926e-05, + "loss": 2.372, + "step": 3815 + }, + { + "epoch": 0.68, + "learning_rate": 4.77362962962963e-05, + "loss": 2.3876, + "step": 3820 + }, + { + "epoch": 0.68, + "learning_rate": 4.773333333333333e-05, + "loss": 2.3631, + "step": 3825 + }, + { + "epoch": 0.68, + "learning_rate": 4.773037037037038e-05, + "loss": 2.3707, + "step": 3830 + }, + { + "epoch": 0.68, + "learning_rate": 4.772740740740741e-05, + "loss": 2.3632, + "step": 3835 + }, + { + "epoch": 0.68, + "learning_rate": 4.772444444444445e-05, + "loss": 2.4309, + "step": 3840 + }, + { + "epoch": 0.68, + "learning_rate": 4.772148148148148e-05, + "loss": 2.4249, + "step": 3845 + }, + { + "epoch": 0.68, + "learning_rate": 4.771851851851853e-05, + "loss": 2.5575, + "step": 3850 + }, + { + "epoch": 0.69, + "learning_rate": 4.771555555555556e-05, + "loss": 2.4422, + "step": 3855 + }, + { + "epoch": 0.69, + "learning_rate": 4.77125925925926e-05, + "loss": 2.4859, + "step": 3860 + }, + { + "epoch": 0.69, + "learning_rate": 4.770962962962963e-05, + "loss": 2.4379, + "step": 3865 + }, + { + "epoch": 0.69, + "learning_rate": 4.770666666666667e-05, + "loss": 2.5409, + "step": 3870 + }, + { + "epoch": 0.69, + "learning_rate": 4.770370370370371e-05, + "loss": 2.4814, + "step": 3875 + }, + { + "epoch": 0.69, + "learning_rate": 4.770074074074074e-05, + "loss": 2.5844, + "step": 3880 + }, + { + "epoch": 0.69, + "learning_rate": 4.769777777777778e-05, + "loss": 2.49, + "step": 3885 + }, + { + "epoch": 0.69, + "learning_rate": 4.769481481481482e-05, + "loss": 2.4241, + "step": 3890 + }, + { + "epoch": 0.69, + "learning_rate": 4.7691851851851856e-05, + "loss": 2.4844, + "step": 3895 + }, + { + "epoch": 0.69, + "learning_rate": 4.768888888888889e-05, + "loss": 2.4101, + "step": 3900 + }, + { + "epoch": 0.69, + "learning_rate": 4.768592592592593e-05, + "loss": 2.4722, + "step": 3905 + }, + { + "epoch": 0.7, + "learning_rate": 4.7682962962962965e-05, + "loss": 2.3843, + "step": 3910 + }, + { + "epoch": 0.7, + "learning_rate": 4.7680000000000004e-05, + "loss": 2.3913, + "step": 3915 + }, + { + "epoch": 0.7, + "learning_rate": 4.7677037037037036e-05, + "loss": 2.6144, + "step": 3920 + }, + { + "epoch": 0.7, + "learning_rate": 4.7674074074074075e-05, + "loss": 2.5613, + "step": 3925 + }, + { + "epoch": 0.7, + "learning_rate": 4.7671111111111114e-05, + "loss": 2.4305, + "step": 3930 + }, + { + "epoch": 0.7, + "learning_rate": 4.766814814814815e-05, + "loss": 2.4208, + "step": 3935 + }, + { + "epoch": 0.7, + "learning_rate": 4.7665185185185185e-05, + "loss": 2.4655, + "step": 3940 + }, + { + "epoch": 0.7, + "learning_rate": 4.7662222222222224e-05, + "loss": 2.3634, + "step": 3945 + }, + { + "epoch": 0.7, + "learning_rate": 4.7659259259259256e-05, + "loss": 2.4413, + "step": 3950 + }, + { + "epoch": 0.7, + "learning_rate": 4.76562962962963e-05, + "loss": 2.5993, + "step": 3955 + }, + { + "epoch": 0.7, + "learning_rate": 4.765333333333333e-05, + "loss": 2.4721, + "step": 3960 + }, + { + "epoch": 0.7, + "learning_rate": 4.765037037037037e-05, + "loss": 2.5053, + "step": 3965 + }, + { + "epoch": 0.71, + "learning_rate": 4.7647407407407404e-05, + "loss": 2.5659, + "step": 3970 + }, + { + "epoch": 0.71, + "learning_rate": 4.764444444444445e-05, + "loss": 2.4318, + "step": 3975 + }, + { + "epoch": 0.71, + "learning_rate": 4.764148148148148e-05, + "loss": 2.4853, + "step": 3980 + }, + { + "epoch": 0.71, + "learning_rate": 4.763851851851852e-05, + "loss": 2.3437, + "step": 3985 + }, + { + "epoch": 0.71, + "learning_rate": 4.763555555555555e-05, + "loss": 2.4864, + "step": 3990 + }, + { + "epoch": 0.71, + "learning_rate": 4.76325925925926e-05, + "loss": 2.3076, + "step": 3995 + }, + { + "epoch": 0.71, + "learning_rate": 4.762962962962963e-05, + "loss": 2.5107, + "step": 4000 + }, + { + "epoch": 0.71, + "learning_rate": 4.762666666666667e-05, + "loss": 2.3943, + "step": 4005 + }, + { + "epoch": 0.71, + "learning_rate": 4.76237037037037e-05, + "loss": 2.5734, + "step": 4010 + }, + { + "epoch": 0.71, + "learning_rate": 4.762074074074075e-05, + "loss": 2.5178, + "step": 4015 + }, + { + "epoch": 0.71, + "learning_rate": 4.761777777777778e-05, + "loss": 2.3443, + "step": 4020 + }, + { + "epoch": 0.72, + "learning_rate": 4.761481481481482e-05, + "loss": 2.4504, + "step": 4025 + }, + { + "epoch": 0.72, + "learning_rate": 4.761244444444445e-05, + "loss": 2.5104, + "step": 4030 + }, + { + "epoch": 0.72, + "learning_rate": 4.760948148148148e-05, + "loss": 2.5564, + "step": 4035 + }, + { + "epoch": 0.72, + "learning_rate": 4.7606518518518525e-05, + "loss": 2.4881, + "step": 4040 + }, + { + "epoch": 0.72, + "learning_rate": 4.760355555555556e-05, + "loss": 2.4379, + "step": 4045 + }, + { + "epoch": 0.72, + "learning_rate": 4.7600592592592596e-05, + "loss": 2.5927, + "step": 4050 + }, + { + "epoch": 0.72, + "learning_rate": 4.759762962962963e-05, + "loss": 2.434, + "step": 4055 + }, + { + "epoch": 0.72, + "learning_rate": 4.7594666666666674e-05, + "loss": 2.4309, + "step": 4060 + }, + { + "epoch": 0.72, + "learning_rate": 4.7591703703703706e-05, + "loss": 2.4294, + "step": 4065 + }, + { + "epoch": 0.72, + "learning_rate": 4.7588740740740744e-05, + "loss": 2.4577, + "step": 4070 + }, + { + "epoch": 0.72, + "learning_rate": 4.7585777777777776e-05, + "loss": 2.3474, + "step": 4075 + }, + { + "epoch": 0.73, + "learning_rate": 4.758281481481482e-05, + "loss": 2.4289, + "step": 4080 + }, + { + "epoch": 0.73, + "learning_rate": 4.7579851851851854e-05, + "loss": 2.5407, + "step": 4085 + }, + { + "epoch": 0.73, + "learning_rate": 4.757688888888889e-05, + "loss": 2.3306, + "step": 4090 + }, + { + "epoch": 0.73, + "learning_rate": 4.7573925925925925e-05, + "loss": 2.6044, + "step": 4095 + }, + { + "epoch": 0.73, + "learning_rate": 4.7570962962962964e-05, + "loss": 2.3461, + "step": 4100 + }, + { + "epoch": 0.73, + "learning_rate": 4.7568e-05, + "loss": 2.4643, + "step": 4105 + }, + { + "epoch": 0.73, + "learning_rate": 4.756503703703704e-05, + "loss": 2.4313, + "step": 4110 + }, + { + "epoch": 0.73, + "learning_rate": 4.7562074074074074e-05, + "loss": 2.3669, + "step": 4115 + }, + { + "epoch": 0.73, + "learning_rate": 4.755911111111111e-05, + "loss": 2.5477, + "step": 4120 + }, + { + "epoch": 0.73, + "learning_rate": 4.755614814814815e-05, + "loss": 2.512, + "step": 4125 + }, + { + "epoch": 0.73, + "learning_rate": 4.755318518518519e-05, + "loss": 2.342, + "step": 4130 + }, + { + "epoch": 0.74, + "learning_rate": 4.755022222222222e-05, + "loss": 2.6974, + "step": 4135 + }, + { + "epoch": 0.74, + "learning_rate": 4.754725925925926e-05, + "loss": 2.3741, + "step": 4140 + }, + { + "epoch": 0.74, + "learning_rate": 4.75442962962963e-05, + "loss": 2.4483, + "step": 4145 + }, + { + "epoch": 0.74, + "learning_rate": 4.754133333333334e-05, + "loss": 2.514, + "step": 4150 + }, + { + "epoch": 0.74, + "learning_rate": 4.753837037037037e-05, + "loss": 2.4418, + "step": 4155 + }, + { + "epoch": 0.74, + "learning_rate": 4.753540740740741e-05, + "loss": 2.5164, + "step": 4160 + }, + { + "epoch": 0.74, + "learning_rate": 4.753244444444445e-05, + "loss": 2.3921, + "step": 4165 + }, + { + "epoch": 0.74, + "learning_rate": 4.752948148148149e-05, + "loss": 2.5978, + "step": 4170 + }, + { + "epoch": 0.74, + "learning_rate": 4.752651851851852e-05, + "loss": 2.2988, + "step": 4175 + }, + { + "epoch": 0.74, + "learning_rate": 4.752355555555556e-05, + "loss": 2.5154, + "step": 4180 + }, + { + "epoch": 0.74, + "learning_rate": 4.75205925925926e-05, + "loss": 2.4891, + "step": 4185 + }, + { + "epoch": 0.74, + "learning_rate": 4.7517629629629636e-05, + "loss": 2.3978, + "step": 4190 + }, + { + "epoch": 0.75, + "learning_rate": 4.751466666666667e-05, + "loss": 2.5693, + "step": 4195 + }, + { + "epoch": 0.75, + "learning_rate": 4.7511703703703706e-05, + "loss": 2.5582, + "step": 4200 + }, + { + "epoch": 0.75, + "learning_rate": 4.7508740740740745e-05, + "loss": 2.5992, + "step": 4205 + }, + { + "epoch": 0.75, + "learning_rate": 4.7505777777777784e-05, + "loss": 2.5169, + "step": 4210 + }, + { + "epoch": 0.75, + "learning_rate": 4.7502814814814816e-05, + "loss": 2.4072, + "step": 4215 + }, + { + "epoch": 0.75, + "learning_rate": 4.7499851851851855e-05, + "loss": 2.6555, + "step": 4220 + }, + { + "epoch": 0.75, + "learning_rate": 4.7496888888888894e-05, + "loss": 2.4927, + "step": 4225 + }, + { + "epoch": 0.75, + "learning_rate": 4.749392592592593e-05, + "loss": 2.4171, + "step": 4230 + }, + { + "epoch": 0.75, + "learning_rate": 4.7490962962962965e-05, + "loss": 2.5243, + "step": 4235 + }, + { + "epoch": 0.75, + "learning_rate": 4.7488000000000003e-05, + "loss": 2.5331, + "step": 4240 + }, + { + "epoch": 0.75, + "learning_rate": 4.748503703703704e-05, + "loss": 2.5703, + "step": 4245 + }, + { + "epoch": 0.76, + "learning_rate": 4.748207407407408e-05, + "loss": 2.5743, + "step": 4250 + }, + { + "epoch": 0.76, + "learning_rate": 4.747911111111111e-05, + "loss": 2.2319, + "step": 4255 + }, + { + "epoch": 0.76, + "learning_rate": 4.747614814814815e-05, + "loss": 2.4138, + "step": 4260 + }, + { + "epoch": 0.76, + "learning_rate": 4.7473185185185184e-05, + "loss": 2.2671, + "step": 4265 + }, + { + "epoch": 0.76, + "learning_rate": 4.747022222222223e-05, + "loss": 2.551, + "step": 4270 + }, + { + "epoch": 0.76, + "learning_rate": 4.746725925925926e-05, + "loss": 2.3967, + "step": 4275 + }, + { + "epoch": 0.76, + "learning_rate": 4.74642962962963e-05, + "loss": 2.5029, + "step": 4280 + }, + { + "epoch": 0.76, + "learning_rate": 4.746133333333333e-05, + "loss": 2.5147, + "step": 4285 + }, + { + "epoch": 0.76, + "learning_rate": 4.745837037037038e-05, + "loss": 2.5134, + "step": 4290 + }, + { + "epoch": 0.76, + "learning_rate": 4.745540740740741e-05, + "loss": 2.5816, + "step": 4295 + }, + { + "epoch": 0.76, + "learning_rate": 4.745244444444445e-05, + "loss": 2.423, + "step": 4300 + }, + { + "epoch": 0.77, + "learning_rate": 4.744948148148148e-05, + "loss": 2.4048, + "step": 4305 + }, + { + "epoch": 0.77, + "learning_rate": 4.7446518518518527e-05, + "loss": 2.4497, + "step": 4310 + }, + { + "epoch": 0.77, + "learning_rate": 4.744355555555556e-05, + "loss": 2.5144, + "step": 4315 + }, + { + "epoch": 0.77, + "learning_rate": 4.74405925925926e-05, + "loss": 2.4839, + "step": 4320 + }, + { + "epoch": 0.77, + "learning_rate": 4.743762962962963e-05, + "loss": 2.4039, + "step": 4325 + }, + { + "epoch": 0.77, + "learning_rate": 4.743466666666667e-05, + "loss": 2.5328, + "step": 4330 + }, + { + "epoch": 0.77, + "learning_rate": 4.743170370370371e-05, + "loss": 2.4651, + "step": 4335 + }, + { + "epoch": 0.77, + "learning_rate": 4.742874074074074e-05, + "loss": 2.4951, + "step": 4340 + }, + { + "epoch": 0.77, + "learning_rate": 4.742577777777778e-05, + "loss": 2.3968, + "step": 4345 + }, + { + "epoch": 0.77, + "learning_rate": 4.742281481481482e-05, + "loss": 2.4837, + "step": 4350 + }, + { + "epoch": 0.77, + "learning_rate": 4.7419851851851856e-05, + "loss": 2.3474, + "step": 4355 + }, + { + "epoch": 0.78, + "learning_rate": 4.741688888888889e-05, + "loss": 2.4129, + "step": 4360 + }, + { + "epoch": 0.78, + "learning_rate": 4.7413925925925927e-05, + "loss": 2.54, + "step": 4365 + }, + { + "epoch": 0.78, + "learning_rate": 4.7410962962962965e-05, + "loss": 2.3878, + "step": 4370 + }, + { + "epoch": 0.78, + "learning_rate": 4.7408000000000004e-05, + "loss": 2.444, + "step": 4375 + }, + { + "epoch": 0.78, + "learning_rate": 4.7405037037037036e-05, + "loss": 2.4356, + "step": 4380 + }, + { + "epoch": 0.78, + "learning_rate": 4.7402074074074075e-05, + "loss": 2.5181, + "step": 4385 + }, + { + "epoch": 0.78, + "learning_rate": 4.7399111111111114e-05, + "loss": 2.465, + "step": 4390 + }, + { + "epoch": 0.78, + "learning_rate": 4.739614814814815e-05, + "loss": 2.4831, + "step": 4395 + }, + { + "epoch": 0.78, + "learning_rate": 4.7393185185185185e-05, + "loss": 2.5451, + "step": 4400 + }, + { + "epoch": 0.78, + "learning_rate": 4.7390222222222224e-05, + "loss": 2.4309, + "step": 4405 + }, + { + "epoch": 0.78, + "learning_rate": 4.738725925925926e-05, + "loss": 2.3851, + "step": 4410 + }, + { + "epoch": 0.78, + "learning_rate": 4.73842962962963e-05, + "loss": 2.2616, + "step": 4415 + }, + { + "epoch": 0.79, + "learning_rate": 4.738133333333333e-05, + "loss": 2.3663, + "step": 4420 + }, + { + "epoch": 0.79, + "learning_rate": 4.737837037037037e-05, + "loss": 2.448, + "step": 4425 + }, + { + "epoch": 0.79, + "learning_rate": 4.7375407407407404e-05, + "loss": 2.4346, + "step": 4430 + }, + { + "epoch": 0.79, + "learning_rate": 4.737244444444445e-05, + "loss": 2.3232, + "step": 4435 + }, + { + "epoch": 0.79, + "learning_rate": 4.736948148148148e-05, + "loss": 2.3551, + "step": 4440 + }, + { + "epoch": 0.79, + "learning_rate": 4.736651851851852e-05, + "loss": 2.4111, + "step": 4445 + }, + { + "epoch": 0.79, + "learning_rate": 4.736355555555555e-05, + "loss": 2.5738, + "step": 4450 + }, + { + "epoch": 0.79, + "learning_rate": 4.73605925925926e-05, + "loss": 2.5933, + "step": 4455 + }, + { + "epoch": 0.79, + "learning_rate": 4.735762962962963e-05, + "loss": 2.4173, + "step": 4460 + }, + { + "epoch": 0.79, + "learning_rate": 4.735466666666667e-05, + "loss": 2.5098, + "step": 4465 + }, + { + "epoch": 0.79, + "learning_rate": 4.73517037037037e-05, + "loss": 2.4746, + "step": 4470 + }, + { + "epoch": 0.8, + "learning_rate": 4.734874074074075e-05, + "loss": 2.4557, + "step": 4475 + }, + { + "epoch": 0.8, + "learning_rate": 4.734577777777778e-05, + "loss": 2.564, + "step": 4480 + }, + { + "epoch": 0.8, + "learning_rate": 4.734281481481482e-05, + "loss": 2.5229, + "step": 4485 + }, + { + "epoch": 0.8, + "learning_rate": 4.733985185185185e-05, + "loss": 2.545, + "step": 4490 + }, + { + "epoch": 0.8, + "learning_rate": 4.733688888888889e-05, + "loss": 2.4309, + "step": 4495 + }, + { + "epoch": 0.8, + "learning_rate": 4.733392592592593e-05, + "loss": 2.406, + "step": 4500 + }, + { + "epoch": 0.8, + "learning_rate": 4.7330962962962966e-05, + "loss": 2.4441, + "step": 4505 + }, + { + "epoch": 0.8, + "learning_rate": 4.7328e-05, + "loss": 2.4585, + "step": 4510 + }, + { + "epoch": 0.8, + "learning_rate": 4.732503703703704e-05, + "loss": 2.4851, + "step": 4515 + }, + { + "epoch": 0.8, + "learning_rate": 4.7322074074074076e-05, + "loss": 2.4068, + "step": 4520 + }, + { + "epoch": 0.8, + "learning_rate": 4.7319111111111115e-05, + "loss": 2.4573, + "step": 4525 + }, + { + "epoch": 0.81, + "learning_rate": 4.731614814814815e-05, + "loss": 2.3281, + "step": 4530 + }, + { + "epoch": 0.81, + "learning_rate": 4.7313185185185185e-05, + "loss": 2.4596, + "step": 4535 + }, + { + "epoch": 0.81, + "learning_rate": 4.7310222222222224e-05, + "loss": 2.3728, + "step": 4540 + }, + { + "epoch": 0.81, + "learning_rate": 4.730725925925926e-05, + "loss": 2.4934, + "step": 4545 + }, + { + "epoch": 0.81, + "learning_rate": 4.7304296296296295e-05, + "loss": 2.4534, + "step": 4550 + }, + { + "epoch": 0.81, + "learning_rate": 4.7301333333333334e-05, + "loss": 2.4465, + "step": 4555 + }, + { + "epoch": 0.81, + "learning_rate": 4.729837037037037e-05, + "loss": 2.5807, + "step": 4560 + }, + { + "epoch": 0.81, + "learning_rate": 4.729540740740741e-05, + "loss": 2.467, + "step": 4565 + }, + { + "epoch": 0.81, + "learning_rate": 4.7292444444444444e-05, + "loss": 2.5156, + "step": 4570 + }, + { + "epoch": 0.81, + "learning_rate": 4.728948148148148e-05, + "loss": 2.4047, + "step": 4575 + }, + { + "epoch": 0.81, + "learning_rate": 4.728651851851852e-05, + "loss": 2.4475, + "step": 4580 + }, + { + "epoch": 0.82, + "learning_rate": 4.728355555555556e-05, + "loss": 2.4209, + "step": 4585 + }, + { + "epoch": 0.82, + "learning_rate": 4.728059259259259e-05, + "loss": 2.4052, + "step": 4590 + }, + { + "epoch": 0.82, + "learning_rate": 4.727762962962963e-05, + "loss": 2.498, + "step": 4595 + }, + { + "epoch": 0.82, + "learning_rate": 4.727466666666667e-05, + "loss": 2.2793, + "step": 4600 + }, + { + "epoch": 0.82, + "learning_rate": 4.727170370370371e-05, + "loss": 2.5308, + "step": 4605 + }, + { + "epoch": 0.82, + "learning_rate": 4.726874074074074e-05, + "loss": 2.4118, + "step": 4610 + }, + { + "epoch": 0.82, + "learning_rate": 4.726577777777778e-05, + "loss": 2.3792, + "step": 4615 + }, + { + "epoch": 0.82, + "learning_rate": 4.726281481481482e-05, + "loss": 2.4621, + "step": 4620 + }, + { + "epoch": 0.82, + "learning_rate": 4.725985185185186e-05, + "loss": 2.5024, + "step": 4625 + }, + { + "epoch": 0.82, + "learning_rate": 4.725688888888889e-05, + "loss": 2.3679, + "step": 4630 + }, + { + "epoch": 0.82, + "learning_rate": 4.725392592592593e-05, + "loss": 2.1793, + "step": 4635 + }, + { + "epoch": 0.82, + "learning_rate": 4.725096296296297e-05, + "loss": 2.4323, + "step": 4640 + }, + { + "epoch": 0.83, + "learning_rate": 4.7248000000000006e-05, + "loss": 2.4791, + "step": 4645 + }, + { + "epoch": 0.83, + "learning_rate": 4.724503703703704e-05, + "loss": 2.3062, + "step": 4650 + }, + { + "epoch": 0.83, + "learning_rate": 4.7242074074074077e-05, + "loss": 2.4608, + "step": 4655 + }, + { + "epoch": 0.83, + "learning_rate": 4.723911111111111e-05, + "loss": 2.4645, + "step": 4660 + }, + { + "epoch": 0.83, + "learning_rate": 4.7236148148148154e-05, + "loss": 2.4516, + "step": 4665 + }, + { + "epoch": 0.83, + "learning_rate": 4.7233185185185186e-05, + "loss": 2.323, + "step": 4670 + }, + { + "epoch": 0.83, + "learning_rate": 4.7230222222222225e-05, + "loss": 2.4933, + "step": 4675 + }, + { + "epoch": 0.83, + "learning_rate": 4.722725925925926e-05, + "loss": 2.4274, + "step": 4680 + }, + { + "epoch": 0.83, + "learning_rate": 4.72242962962963e-05, + "loss": 2.4182, + "step": 4685 + }, + { + "epoch": 0.83, + "learning_rate": 4.7221333333333335e-05, + "loss": 2.3939, + "step": 4690 + }, + { + "epoch": 0.83, + "learning_rate": 4.7218370370370374e-05, + "loss": 2.5969, + "step": 4695 + }, + { + "epoch": 0.84, + "learning_rate": 4.7215407407407406e-05, + "loss": 2.4727, + "step": 4700 + }, + { + "epoch": 0.84, + "learning_rate": 4.721244444444445e-05, + "loss": 2.5203, + "step": 4705 + }, + { + "epoch": 0.84, + "learning_rate": 4.720948148148148e-05, + "loss": 2.4477, + "step": 4710 + }, + { + "epoch": 0.84, + "learning_rate": 4.720651851851852e-05, + "loss": 2.5023, + "step": 4715 + }, + { + "epoch": 0.84, + "learning_rate": 4.7203555555555554e-05, + "loss": 2.5075, + "step": 4720 + }, + { + "epoch": 0.84, + "learning_rate": 4.720059259259259e-05, + "loss": 2.4749, + "step": 4725 + }, + { + "epoch": 0.84, + "learning_rate": 4.719762962962963e-05, + "loss": 2.4227, + "step": 4730 + }, + { + "epoch": 0.84, + "learning_rate": 4.719466666666667e-05, + "loss": 2.3535, + "step": 4735 + }, + { + "epoch": 0.84, + "learning_rate": 4.71917037037037e-05, + "loss": 2.5053, + "step": 4740 + }, + { + "epoch": 0.84, + "learning_rate": 4.718874074074074e-05, + "loss": 2.2861, + "step": 4745 + }, + { + "epoch": 0.84, + "learning_rate": 4.718577777777778e-05, + "loss": 2.435, + "step": 4750 + }, + { + "epoch": 0.85, + "learning_rate": 4.718281481481482e-05, + "loss": 2.3364, + "step": 4755 + }, + { + "epoch": 0.85, + "learning_rate": 4.717985185185185e-05, + "loss": 2.444, + "step": 4760 + }, + { + "epoch": 0.85, + "learning_rate": 4.717688888888889e-05, + "loss": 2.4585, + "step": 4765 + }, + { + "epoch": 0.85, + "learning_rate": 4.717392592592593e-05, + "loss": 2.3898, + "step": 4770 + }, + { + "epoch": 0.85, + "learning_rate": 4.717096296296297e-05, + "loss": 2.4509, + "step": 4775 + }, + { + "epoch": 0.85, + "learning_rate": 4.7168e-05, + "loss": 2.4245, + "step": 4780 + }, + { + "epoch": 0.85, + "learning_rate": 4.716503703703704e-05, + "loss": 2.5839, + "step": 4785 + }, + { + "epoch": 0.85, + "learning_rate": 4.716207407407408e-05, + "loss": 2.3871, + "step": 4790 + }, + { + "epoch": 0.85, + "learning_rate": 4.7159111111111116e-05, + "loss": 2.2953, + "step": 4795 + }, + { + "epoch": 0.85, + "learning_rate": 4.715614814814815e-05, + "loss": 2.3996, + "step": 4800 + }, + { + "epoch": 0.85, + "learning_rate": 4.715318518518519e-05, + "loss": 2.3379, + "step": 4805 + }, + { + "epoch": 0.86, + "learning_rate": 4.7150222222222226e-05, + "loss": 2.4048, + "step": 4810 + }, + { + "epoch": 0.86, + "learning_rate": 4.7147259259259265e-05, + "loss": 2.3484, + "step": 4815 + }, + { + "epoch": 0.86, + "learning_rate": 4.71442962962963e-05, + "loss": 2.5595, + "step": 4820 + }, + { + "epoch": 0.86, + "learning_rate": 4.7141333333333336e-05, + "loss": 2.4604, + "step": 4825 + }, + { + "epoch": 0.86, + "learning_rate": 4.7138370370370374e-05, + "loss": 2.4274, + "step": 4830 + }, + { + "epoch": 0.86, + "learning_rate": 4.713540740740741e-05, + "loss": 2.3699, + "step": 4835 + }, + { + "epoch": 0.86, + "learning_rate": 4.7132444444444445e-05, + "loss": 2.3518, + "step": 4840 + }, + { + "epoch": 0.86, + "learning_rate": 4.7129481481481484e-05, + "loss": 2.4632, + "step": 4845 + }, + { + "epoch": 0.86, + "learning_rate": 4.712651851851852e-05, + "loss": 2.3874, + "step": 4850 + }, + { + "epoch": 0.86, + "learning_rate": 4.712355555555556e-05, + "loss": 2.3997, + "step": 4855 + }, + { + "epoch": 0.86, + "learning_rate": 4.7120592592592594e-05, + "loss": 2.5184, + "step": 4860 + }, + { + "epoch": 0.86, + "learning_rate": 4.711762962962963e-05, + "loss": 2.4227, + "step": 4865 + }, + { + "epoch": 0.87, + "learning_rate": 4.711466666666667e-05, + "loss": 2.4983, + "step": 4870 + }, + { + "epoch": 0.87, + "learning_rate": 4.711170370370371e-05, + "loss": 2.4304, + "step": 4875 + }, + { + "epoch": 0.87, + "learning_rate": 4.710874074074074e-05, + "loss": 2.4887, + "step": 4880 + }, + { + "epoch": 0.87, + "learning_rate": 4.710577777777778e-05, + "loss": 2.5103, + "step": 4885 + }, + { + "epoch": 0.87, + "learning_rate": 4.710281481481481e-05, + "loss": 2.3891, + "step": 4890 + }, + { + "epoch": 0.87, + "learning_rate": 4.709985185185186e-05, + "loss": 2.4116, + "step": 4895 + }, + { + "epoch": 0.87, + "learning_rate": 4.709688888888889e-05, + "loss": 2.4161, + "step": 4900 + }, + { + "epoch": 0.87, + "learning_rate": 4.709392592592593e-05, + "loss": 2.581, + "step": 4905 + }, + { + "epoch": 0.87, + "learning_rate": 4.709096296296296e-05, + "loss": 2.3946, + "step": 4910 + }, + { + "epoch": 0.87, + "learning_rate": 4.708800000000001e-05, + "loss": 2.3597, + "step": 4915 + }, + { + "epoch": 0.87, + "learning_rate": 4.708503703703704e-05, + "loss": 2.3345, + "step": 4920 + }, + { + "epoch": 0.88, + "learning_rate": 4.708207407407408e-05, + "loss": 2.3779, + "step": 4925 + }, + { + "epoch": 0.88, + "learning_rate": 4.707911111111111e-05, + "loss": 2.4762, + "step": 4930 + }, + { + "epoch": 0.88, + "learning_rate": 4.7076148148148156e-05, + "loss": 2.4625, + "step": 4935 + }, + { + "epoch": 0.88, + "learning_rate": 4.707318518518519e-05, + "loss": 2.4128, + "step": 4940 + }, + { + "epoch": 0.88, + "learning_rate": 4.707022222222223e-05, + "loss": 2.4835, + "step": 4945 + }, + { + "epoch": 0.88, + "learning_rate": 4.706725925925926e-05, + "loss": 2.3698, + "step": 4950 + }, + { + "epoch": 0.88, + "learning_rate": 4.70642962962963e-05, + "loss": 2.4273, + "step": 4955 + }, + { + "epoch": 0.88, + "learning_rate": 4.7061333333333336e-05, + "loss": 2.7041, + "step": 4960 + }, + { + "epoch": 0.88, + "learning_rate": 4.7058370370370375e-05, + "loss": 2.3832, + "step": 4965 + }, + { + "epoch": 0.88, + "learning_rate": 4.705540740740741e-05, + "loss": 2.371, + "step": 4970 + }, + { + "epoch": 0.88, + "learning_rate": 4.7052444444444446e-05, + "loss": 2.3734, + "step": 4975 + }, + { + "epoch": 0.89, + "learning_rate": 4.7049481481481485e-05, + "loss": 2.414, + "step": 4980 + }, + { + "epoch": 0.89, + "learning_rate": 4.7046518518518524e-05, + "loss": 2.5053, + "step": 4985 + }, + { + "epoch": 0.89, + "learning_rate": 4.7043555555555556e-05, + "loss": 2.4202, + "step": 4990 + }, + { + "epoch": 0.89, + "learning_rate": 4.7040592592592595e-05, + "loss": 2.3025, + "step": 4995 + }, + { + "epoch": 0.89, + "learning_rate": 4.703762962962963e-05, + "loss": 2.4485, + "step": 5000 + }, + { + "epoch": 0.89, + "learning_rate": 4.703466666666667e-05, + "loss": 2.4395, + "step": 5005 + }, + { + "epoch": 0.89, + "learning_rate": 4.7031703703703704e-05, + "loss": 2.4186, + "step": 5010 + }, + { + "epoch": 0.89, + "learning_rate": 4.702874074074074e-05, + "loss": 2.4557, + "step": 5015 + }, + { + "epoch": 0.89, + "learning_rate": 4.702577777777778e-05, + "loss": 2.5432, + "step": 5020 + }, + { + "epoch": 0.89, + "learning_rate": 4.702281481481482e-05, + "loss": 2.3154, + "step": 5025 + }, + { + "epoch": 0.89, + "learning_rate": 4.701985185185185e-05, + "loss": 2.4949, + "step": 5030 + }, + { + "epoch": 0.9, + "learning_rate": 4.701688888888889e-05, + "loss": 2.3995, + "step": 5035 + }, + { + "epoch": 0.9, + "learning_rate": 4.701392592592593e-05, + "loss": 2.4746, + "step": 5040 + }, + { + "epoch": 0.9, + "learning_rate": 4.701096296296297e-05, + "loss": 2.4757, + "step": 5045 + }, + { + "epoch": 0.9, + "learning_rate": 4.7008e-05, + "loss": 2.4692, + "step": 5050 + }, + { + "epoch": 0.9, + "learning_rate": 4.700503703703704e-05, + "loss": 2.3452, + "step": 5055 + }, + { + "epoch": 0.9, + "learning_rate": 4.700207407407408e-05, + "loss": 2.5242, + "step": 5060 + }, + { + "epoch": 0.9, + "learning_rate": 4.699911111111112e-05, + "loss": 2.4604, + "step": 5065 + }, + { + "epoch": 0.9, + "learning_rate": 4.699614814814815e-05, + "loss": 2.3832, + "step": 5070 + }, + { + "epoch": 0.9, + "learning_rate": 4.699318518518519e-05, + "loss": 2.3376, + "step": 5075 + }, + { + "epoch": 0.9, + "learning_rate": 4.699022222222223e-05, + "loss": 2.4679, + "step": 5080 + }, + { + "epoch": 0.9, + "learning_rate": 4.6987259259259266e-05, + "loss": 2.3359, + "step": 5085 + }, + { + "epoch": 0.9, + "learning_rate": 4.69842962962963e-05, + "loss": 2.3866, + "step": 5090 + }, + { + "epoch": 0.91, + "learning_rate": 4.698133333333334e-05, + "loss": 2.3574, + "step": 5095 + }, + { + "epoch": 0.91, + "learning_rate": 4.6978370370370376e-05, + "loss": 2.4682, + "step": 5100 + }, + { + "epoch": 0.91, + "learning_rate": 4.6975407407407415e-05, + "loss": 2.4025, + "step": 5105 + }, + { + "epoch": 0.91, + "learning_rate": 4.697244444444445e-05, + "loss": 2.2214, + "step": 5110 + }, + { + "epoch": 0.91, + "learning_rate": 4.696948148148148e-05, + "loss": 2.4172, + "step": 5115 + }, + { + "epoch": 0.91, + "learning_rate": 4.696651851851852e-05, + "loss": 2.3537, + "step": 5120 + }, + { + "epoch": 0.91, + "learning_rate": 4.6963555555555556e-05, + "loss": 2.4709, + "step": 5125 + }, + { + "epoch": 0.91, + "learning_rate": 4.6960592592592595e-05, + "loss": 2.2421, + "step": 5130 + }, + { + "epoch": 0.91, + "learning_rate": 4.695762962962963e-05, + "loss": 2.4343, + "step": 5135 + }, + { + "epoch": 0.91, + "learning_rate": 4.6954666666666666e-05, + "loss": 2.4245, + "step": 5140 + }, + { + "epoch": 0.91, + "learning_rate": 4.6951703703703705e-05, + "loss": 2.3552, + "step": 5145 + }, + { + "epoch": 0.92, + "learning_rate": 4.6948740740740744e-05, + "loss": 2.4264, + "step": 5150 + }, + { + "epoch": 0.92, + "learning_rate": 4.6945777777777776e-05, + "loss": 2.3034, + "step": 5155 + }, + { + "epoch": 0.92, + "learning_rate": 4.6942814814814815e-05, + "loss": 2.4818, + "step": 5160 + }, + { + "epoch": 0.92, + "learning_rate": 4.6939851851851853e-05, + "loss": 2.4698, + "step": 5165 + }, + { + "epoch": 0.92, + "learning_rate": 4.693688888888889e-05, + "loss": 2.3307, + "step": 5170 + }, + { + "epoch": 0.92, + "learning_rate": 4.6933925925925924e-05, + "loss": 2.4316, + "step": 5175 + }, + { + "epoch": 0.92, + "learning_rate": 4.693096296296296e-05, + "loss": 2.4789, + "step": 5180 + }, + { + "epoch": 0.92, + "learning_rate": 4.6928e-05, + "loss": 2.4746, + "step": 5185 + }, + { + "epoch": 0.92, + "learning_rate": 4.692503703703704e-05, + "loss": 2.3931, + "step": 5190 + }, + { + "epoch": 0.92, + "learning_rate": 4.692207407407407e-05, + "loss": 2.3844, + "step": 5195 + }, + { + "epoch": 0.92, + "learning_rate": 4.691911111111111e-05, + "loss": 2.4915, + "step": 5200 + }, + { + "epoch": 0.93, + "learning_rate": 4.691614814814815e-05, + "loss": 2.3076, + "step": 5205 + }, + { + "epoch": 0.93, + "learning_rate": 4.691318518518519e-05, + "loss": 2.508, + "step": 5210 + }, + { + "epoch": 0.93, + "learning_rate": 4.691022222222222e-05, + "loss": 2.369, + "step": 5215 + }, + { + "epoch": 0.93, + "learning_rate": 4.690725925925926e-05, + "loss": 2.3622, + "step": 5220 + }, + { + "epoch": 0.93, + "learning_rate": 4.69042962962963e-05, + "loss": 2.5399, + "step": 5225 + }, + { + "epoch": 0.93, + "learning_rate": 4.690133333333334e-05, + "loss": 2.3469, + "step": 5230 + }, + { + "epoch": 0.93, + "learning_rate": 4.689837037037037e-05, + "loss": 2.3712, + "step": 5235 + }, + { + "epoch": 0.93, + "learning_rate": 4.689540740740741e-05, + "loss": 2.4749, + "step": 5240 + }, + { + "epoch": 0.93, + "learning_rate": 4.689244444444445e-05, + "loss": 2.4072, + "step": 5245 + }, + { + "epoch": 0.93, + "learning_rate": 4.6889481481481486e-05, + "loss": 2.327, + "step": 5250 + }, + { + "epoch": 0.93, + "learning_rate": 4.688651851851852e-05, + "loss": 2.4527, + "step": 5255 + }, + { + "epoch": 0.94, + "learning_rate": 4.688355555555556e-05, + "loss": 2.3353, + "step": 5260 + }, + { + "epoch": 0.94, + "learning_rate": 4.688059259259259e-05, + "loss": 2.3977, + "step": 5265 + }, + { + "epoch": 0.94, + "learning_rate": 4.6877629629629635e-05, + "loss": 2.5054, + "step": 5270 + }, + { + "epoch": 0.94, + "learning_rate": 4.687466666666667e-05, + "loss": 2.4468, + "step": 5275 + }, + { + "epoch": 0.94, + "learning_rate": 4.6871703703703706e-05, + "loss": 2.3576, + "step": 5280 + }, + { + "epoch": 0.94, + "learning_rate": 4.686874074074074e-05, + "loss": 2.3727, + "step": 5285 + }, + { + "epoch": 0.94, + "learning_rate": 4.686577777777778e-05, + "loss": 2.3217, + "step": 5290 + }, + { + "epoch": 0.94, + "learning_rate": 4.6862814814814815e-05, + "loss": 2.3375, + "step": 5295 + }, + { + "epoch": 0.94, + "learning_rate": 4.6859851851851854e-05, + "loss": 2.3502, + "step": 5300 + }, + { + "epoch": 0.94, + "learning_rate": 4.6856888888888886e-05, + "loss": 2.4503, + "step": 5305 + }, + { + "epoch": 0.94, + "learning_rate": 4.685392592592593e-05, + "loss": 2.3362, + "step": 5310 + }, + { + "epoch": 0.94, + "learning_rate": 4.6850962962962964e-05, + "loss": 2.4725, + "step": 5315 + }, + { + "epoch": 0.95, + "learning_rate": 4.6848e-05, + "loss": 2.5309, + "step": 5320 + }, + { + "epoch": 0.95, + "learning_rate": 4.6845037037037035e-05, + "loss": 2.3586, + "step": 5325 + }, + { + "epoch": 0.95, + "learning_rate": 4.684207407407408e-05, + "loss": 2.3396, + "step": 5330 + }, + { + "epoch": 0.95, + "learning_rate": 4.683911111111111e-05, + "loss": 2.4974, + "step": 5335 + }, + { + "epoch": 0.95, + "learning_rate": 4.683614814814815e-05, + "loss": 2.3944, + "step": 5340 + }, + { + "epoch": 0.95, + "learning_rate": 4.683318518518518e-05, + "loss": 2.3822, + "step": 5345 + }, + { + "epoch": 0.95, + "learning_rate": 4.683022222222222e-05, + "loss": 2.536, + "step": 5350 + }, + { + "epoch": 0.95, + "learning_rate": 4.682725925925926e-05, + "loss": 2.4728, + "step": 5355 + }, + { + "epoch": 0.95, + "learning_rate": 4.68242962962963e-05, + "loss": 2.5047, + "step": 5360 + }, + { + "epoch": 0.95, + "learning_rate": 4.682133333333333e-05, + "loss": 2.3307, + "step": 5365 + }, + { + "epoch": 0.95, + "learning_rate": 4.681837037037037e-05, + "loss": 2.3908, + "step": 5370 + }, + { + "epoch": 0.96, + "learning_rate": 4.681540740740741e-05, + "loss": 2.4032, + "step": 5375 + }, + { + "epoch": 0.96, + "learning_rate": 4.681244444444445e-05, + "loss": 2.3613, + "step": 5380 + }, + { + "epoch": 0.96, + "learning_rate": 4.680948148148148e-05, + "loss": 2.4218, + "step": 5385 + }, + { + "epoch": 0.96, + "learning_rate": 4.680651851851852e-05, + "loss": 2.2862, + "step": 5390 + }, + { + "epoch": 0.96, + "learning_rate": 4.680355555555556e-05, + "loss": 2.2378, + "step": 5395 + }, + { + "epoch": 0.96, + "learning_rate": 4.68005925925926e-05, + "loss": 2.4057, + "step": 5400 + }, + { + "epoch": 0.96, + "learning_rate": 4.679762962962963e-05, + "loss": 2.3884, + "step": 5405 + }, + { + "epoch": 0.96, + "learning_rate": 4.679466666666667e-05, + "loss": 2.5152, + "step": 5410 + }, + { + "epoch": 0.96, + "learning_rate": 4.6791703703703706e-05, + "loss": 2.4994, + "step": 5415 + }, + { + "epoch": 0.96, + "learning_rate": 4.6788740740740745e-05, + "loss": 2.4554, + "step": 5420 + }, + { + "epoch": 0.96, + "learning_rate": 4.678577777777778e-05, + "loss": 2.5549, + "step": 5425 + }, + { + "epoch": 0.97, + "learning_rate": 4.6782814814814816e-05, + "loss": 2.4477, + "step": 5430 + }, + { + "epoch": 0.97, + "learning_rate": 4.6779851851851855e-05, + "loss": 2.401, + "step": 5435 + }, + { + "epoch": 0.97, + "learning_rate": 4.6776888888888894e-05, + "loss": 2.4176, + "step": 5440 + }, + { + "epoch": 0.97, + "learning_rate": 4.6773925925925926e-05, + "loss": 2.539, + "step": 5445 + }, + { + "epoch": 0.97, + "learning_rate": 4.6770962962962965e-05, + "loss": 2.44, + "step": 5450 + }, + { + "epoch": 0.97, + "learning_rate": 4.6768000000000004e-05, + "loss": 2.4583, + "step": 5455 + }, + { + "epoch": 0.97, + "learning_rate": 4.676503703703704e-05, + "loss": 2.294, + "step": 5460 + }, + { + "epoch": 0.97, + "learning_rate": 4.6762074074074074e-05, + "loss": 2.3093, + "step": 5465 + }, + { + "epoch": 0.97, + "learning_rate": 4.675911111111111e-05, + "loss": 2.2936, + "step": 5470 + }, + { + "epoch": 0.97, + "learning_rate": 4.675614814814815e-05, + "loss": 2.4079, + "step": 5475 + }, + { + "epoch": 0.97, + "learning_rate": 4.675318518518519e-05, + "loss": 2.4775, + "step": 5480 + }, + { + "epoch": 0.98, + "learning_rate": 4.675022222222222e-05, + "loss": 2.3401, + "step": 5485 + }, + { + "epoch": 0.98, + "learning_rate": 4.674725925925926e-05, + "loss": 2.3485, + "step": 5490 + }, + { + "epoch": 0.98, + "learning_rate": 4.6744296296296294e-05, + "loss": 2.2853, + "step": 5495 + }, + { + "epoch": 0.98, + "learning_rate": 4.674133333333334e-05, + "loss": 2.3732, + "step": 5500 + }, + { + "epoch": 0.98, + "learning_rate": 4.673837037037037e-05, + "loss": 2.4763, + "step": 5505 + }, + { + "epoch": 0.98, + "learning_rate": 4.673540740740741e-05, + "loss": 2.3342, + "step": 5510 + }, + { + "epoch": 0.98, + "learning_rate": 4.673244444444444e-05, + "loss": 2.3536, + "step": 5515 + }, + { + "epoch": 0.98, + "learning_rate": 4.672948148148149e-05, + "loss": 2.4202, + "step": 5520 + }, + { + "epoch": 0.98, + "learning_rate": 4.672651851851852e-05, + "loss": 2.4043, + "step": 5525 + }, + { + "epoch": 0.98, + "learning_rate": 4.672355555555556e-05, + "loss": 2.4102, + "step": 5530 + }, + { + "epoch": 0.98, + "learning_rate": 4.672059259259259e-05, + "loss": 2.3792, + "step": 5535 + }, + { + "epoch": 0.98, + "learning_rate": 4.6717629629629636e-05, + "loss": 2.3936, + "step": 5540 + }, + { + "epoch": 0.99, + "learning_rate": 4.671466666666667e-05, + "loss": 2.5566, + "step": 5545 + }, + { + "epoch": 0.99, + "learning_rate": 4.671170370370371e-05, + "loss": 2.4846, + "step": 5550 + }, + { + "epoch": 0.99, + "learning_rate": 4.670874074074074e-05, + "loss": 2.4058, + "step": 5555 + }, + { + "epoch": 0.99, + "learning_rate": 4.6705777777777785e-05, + "loss": 2.5478, + "step": 5560 + }, + { + "epoch": 0.99, + "learning_rate": 4.670281481481482e-05, + "loss": 2.3242, + "step": 5565 + }, + { + "epoch": 0.99, + "learning_rate": 4.6699851851851856e-05, + "loss": 2.4553, + "step": 5570 + }, + { + "epoch": 0.99, + "learning_rate": 4.669688888888889e-05, + "loss": 2.4632, + "step": 5575 + }, + { + "epoch": 0.99, + "learning_rate": 4.669392592592593e-05, + "loss": 2.4154, + "step": 5580 + }, + { + "epoch": 0.99, + "learning_rate": 4.6690962962962965e-05, + "loss": 2.4006, + "step": 5585 + }, + { + "epoch": 0.99, + "learning_rate": 4.6688000000000004e-05, + "loss": 2.4437, + "step": 5590 + }, + { + "epoch": 0.99, + "learning_rate": 4.6685037037037036e-05, + "loss": 2.4406, + "step": 5595 + }, + { + "epoch": 1.0, + "learning_rate": 4.6682074074074075e-05, + "loss": 2.315, + "step": 5600 + }, + { + "epoch": 1.0, + "learning_rate": 4.6679111111111114e-05, + "loss": 2.4427, + "step": 5605 + }, + { + "epoch": 1.0, + "learning_rate": 4.667614814814815e-05, + "loss": 2.3768, + "step": 5610 + }, + { + "epoch": 1.0, + "learning_rate": 4.6673185185185185e-05, + "loss": 2.4391, + "step": 5615 + }, + { + "epoch": 1.0, + "learning_rate": 4.6670222222222224e-05, + "loss": 2.5285, + "step": 5620 + }, + { + "epoch": 1.0, + "learning_rate": 4.666725925925926e-05, + "loss": 2.4759, + "step": 5625 + }, + { + "epoch": 1.0, + "learning_rate": 4.66642962962963e-05, + "loss": 2.4138, + "step": 5630 + }, + { + "epoch": 1.0, + "learning_rate": 4.666133333333333e-05, + "loss": 2.3682, + "step": 5635 + }, + { + "epoch": 1.0, + "learning_rate": 4.665837037037037e-05, + "loss": 2.2183, + "step": 5640 + }, + { + "epoch": 1.0, + "learning_rate": 4.665540740740741e-05, + "loss": 2.2696, + "step": 5645 + }, + { + "epoch": 1.0, + "learning_rate": 4.665244444444445e-05, + "loss": 2.2216, + "step": 5650 + }, + { + "epoch": 1.01, + "learning_rate": 4.664948148148148e-05, + "loss": 2.2421, + "step": 5655 + }, + { + "epoch": 1.01, + "learning_rate": 4.664651851851852e-05, + "loss": 2.2556, + "step": 5660 + }, + { + "epoch": 1.01, + "learning_rate": 4.664355555555556e-05, + "loss": 2.4091, + "step": 5665 + }, + { + "epoch": 1.01, + "learning_rate": 4.66405925925926e-05, + "loss": 2.302, + "step": 5670 + }, + { + "epoch": 1.01, + "learning_rate": 4.663762962962963e-05, + "loss": 2.2694, + "step": 5675 + }, + { + "epoch": 1.01, + "learning_rate": 4.663466666666667e-05, + "loss": 2.1418, + "step": 5680 + }, + { + "epoch": 1.01, + "learning_rate": 4.663170370370371e-05, + "loss": 2.3009, + "step": 5685 + }, + { + "epoch": 1.01, + "learning_rate": 4.662874074074075e-05, + "loss": 2.2199, + "step": 5690 + }, + { + "epoch": 1.01, + "learning_rate": 4.662577777777778e-05, + "loss": 2.1113, + "step": 5695 + }, + { + "epoch": 1.01, + "learning_rate": 4.662281481481482e-05, + "loss": 2.1406, + "step": 5700 + }, + { + "epoch": 1.01, + "learning_rate": 4.6619851851851857e-05, + "loss": 2.3233, + "step": 5705 + }, + { + "epoch": 1.02, + "learning_rate": 4.6616888888888895e-05, + "loss": 2.2806, + "step": 5710 + }, + { + "epoch": 1.02, + "learning_rate": 4.661392592592593e-05, + "loss": 2.241, + "step": 5715 + }, + { + "epoch": 1.02, + "learning_rate": 4.6610962962962966e-05, + "loss": 2.2779, + "step": 5720 + }, + { + "epoch": 1.02, + "learning_rate": 4.6608e-05, + "loss": 2.3018, + "step": 5725 + }, + { + "epoch": 1.02, + "learning_rate": 4.6605037037037044e-05, + "loss": 2.2499, + "step": 5730 + }, + { + "epoch": 1.02, + "learning_rate": 4.6602074074074076e-05, + "loss": 2.3864, + "step": 5735 + }, + { + "epoch": 1.02, + "learning_rate": 4.6599111111111115e-05, + "loss": 2.321, + "step": 5740 + }, + { + "epoch": 1.02, + "learning_rate": 4.659614814814815e-05, + "loss": 2.0841, + "step": 5745 + }, + { + "epoch": 1.02, + "learning_rate": 4.659318518518519e-05, + "loss": 2.3812, + "step": 5750 + }, + { + "epoch": 1.02, + "learning_rate": 4.6590222222222224e-05, + "loss": 2.3174, + "step": 5755 + }, + { + "epoch": 1.02, + "learning_rate": 4.658725925925926e-05, + "loss": 2.2251, + "step": 5760 + }, + { + "epoch": 1.02, + "learning_rate": 4.6584296296296295e-05, + "loss": 2.2463, + "step": 5765 + }, + { + "epoch": 1.03, + "learning_rate": 4.658133333333334e-05, + "loss": 2.1952, + "step": 5770 + }, + { + "epoch": 1.03, + "learning_rate": 4.657837037037037e-05, + "loss": 2.3955, + "step": 5775 + }, + { + "epoch": 1.03, + "learning_rate": 4.657540740740741e-05, + "loss": 2.1974, + "step": 5780 + }, + { + "epoch": 1.03, + "learning_rate": 4.6572444444444444e-05, + "loss": 2.2574, + "step": 5785 + }, + { + "epoch": 1.03, + "learning_rate": 4.656948148148149e-05, + "loss": 2.3413, + "step": 5790 + }, + { + "epoch": 1.03, + "learning_rate": 4.656651851851852e-05, + "loss": 2.2958, + "step": 5795 + }, + { + "epoch": 1.03, + "learning_rate": 4.656355555555556e-05, + "loss": 2.2036, + "step": 5800 + }, + { + "epoch": 1.03, + "learning_rate": 4.656059259259259e-05, + "loss": 2.3152, + "step": 5805 + }, + { + "epoch": 1.03, + "learning_rate": 4.655762962962963e-05, + "loss": 2.1738, + "step": 5810 + }, + { + "epoch": 1.03, + "learning_rate": 4.655466666666667e-05, + "loss": 2.2618, + "step": 5815 + }, + { + "epoch": 1.03, + "learning_rate": 4.655170370370371e-05, + "loss": 2.2715, + "step": 5820 + }, + { + "epoch": 1.04, + "learning_rate": 4.654874074074074e-05, + "loss": 2.4127, + "step": 5825 + }, + { + "epoch": 1.04, + "learning_rate": 4.654577777777778e-05, + "loss": 2.3517, + "step": 5830 + }, + { + "epoch": 1.04, + "learning_rate": 4.654281481481482e-05, + "loss": 2.2022, + "step": 5835 + }, + { + "epoch": 1.04, + "learning_rate": 4.653985185185186e-05, + "loss": 2.4314, + "step": 5840 + }, + { + "epoch": 1.04, + "learning_rate": 4.653688888888889e-05, + "loss": 2.1655, + "step": 5845 + }, + { + "epoch": 1.04, + "learning_rate": 4.653392592592593e-05, + "loss": 2.3021, + "step": 5850 + }, + { + "epoch": 1.04, + "learning_rate": 4.653096296296297e-05, + "loss": 2.4702, + "step": 5855 + }, + { + "epoch": 1.04, + "learning_rate": 4.6528000000000006e-05, + "loss": 2.1836, + "step": 5860 + }, + { + "epoch": 1.04, + "learning_rate": 4.652503703703704e-05, + "loss": 2.2377, + "step": 5865 + }, + { + "epoch": 1.04, + "learning_rate": 4.652207407407408e-05, + "loss": 2.3459, + "step": 5870 + }, + { + "epoch": 1.04, + "learning_rate": 4.6519111111111116e-05, + "loss": 2.2778, + "step": 5875 + }, + { + "epoch": 1.05, + "learning_rate": 4.6516148148148154e-05, + "loss": 2.2239, + "step": 5880 + }, + { + "epoch": 1.05, + "learning_rate": 4.6513185185185186e-05, + "loss": 2.3235, + "step": 5885 + }, + { + "epoch": 1.05, + "learning_rate": 4.651022222222222e-05, + "loss": 2.2257, + "step": 5890 + }, + { + "epoch": 1.05, + "learning_rate": 4.6507259259259264e-05, + "loss": 2.1825, + "step": 5895 + }, + { + "epoch": 1.05, + "learning_rate": 4.6504296296296296e-05, + "loss": 2.2704, + "step": 5900 + }, + { + "epoch": 1.05, + "learning_rate": 4.6501333333333335e-05, + "loss": 2.2846, + "step": 5905 + }, + { + "epoch": 1.05, + "learning_rate": 4.649837037037037e-05, + "loss": 2.353, + "step": 5910 + }, + { + "epoch": 1.05, + "learning_rate": 4.649540740740741e-05, + "loss": 2.3631, + "step": 5915 + }, + { + "epoch": 1.05, + "learning_rate": 4.6492444444444445e-05, + "loss": 2.3174, + "step": 5920 + }, + { + "epoch": 1.05, + "learning_rate": 4.6489481481481483e-05, + "loss": 2.1961, + "step": 5925 + }, + { + "epoch": 1.05, + "learning_rate": 4.6486518518518515e-05, + "loss": 2.2129, + "step": 5930 + }, + { + "epoch": 1.06, + "learning_rate": 4.648355555555556e-05, + "loss": 2.1397, + "step": 5935 + }, + { + "epoch": 1.06, + "learning_rate": 4.648059259259259e-05, + "loss": 2.2563, + "step": 5940 + }, + { + "epoch": 1.06, + "learning_rate": 4.647762962962963e-05, + "loss": 2.2357, + "step": 5945 + }, + { + "epoch": 1.06, + "learning_rate": 4.6474666666666664e-05, + "loss": 2.3086, + "step": 5950 + }, + { + "epoch": 1.06, + "learning_rate": 4.647170370370371e-05, + "loss": 2.3182, + "step": 5955 + }, + { + "epoch": 1.06, + "learning_rate": 4.646874074074074e-05, + "loss": 2.2669, + "step": 5960 + }, + { + "epoch": 1.06, + "learning_rate": 4.646577777777778e-05, + "loss": 2.3287, + "step": 5965 + }, + { + "epoch": 1.06, + "learning_rate": 4.646281481481481e-05, + "loss": 2.3493, + "step": 5970 + }, + { + "epoch": 1.06, + "learning_rate": 4.645985185185185e-05, + "loss": 2.3483, + "step": 5975 + }, + { + "epoch": 1.06, + "learning_rate": 4.645688888888889e-05, + "loss": 2.1827, + "step": 5980 + }, + { + "epoch": 1.06, + "learning_rate": 4.645392592592593e-05, + "loss": 2.3309, + "step": 5985 + }, + { + "epoch": 1.06, + "learning_rate": 4.645096296296296e-05, + "loss": 2.2168, + "step": 5990 + }, + { + "epoch": 1.07, + "learning_rate": 4.6448e-05, + "loss": 2.4169, + "step": 5995 + }, + { + "epoch": 1.07, + "learning_rate": 4.644503703703704e-05, + "loss": 2.2138, + "step": 6000 + }, + { + "epoch": 1.07, + "learning_rate": 4.644207407407408e-05, + "loss": 2.2737, + "step": 6005 + }, + { + "epoch": 1.07, + "learning_rate": 4.643911111111111e-05, + "loss": 2.3454, + "step": 6010 + }, + { + "epoch": 1.07, + "learning_rate": 4.643614814814815e-05, + "loss": 2.2545, + "step": 6015 + }, + { + "epoch": 1.07, + "learning_rate": 4.643318518518519e-05, + "loss": 2.2839, + "step": 6020 + }, + { + "epoch": 1.07, + "learning_rate": 4.6430222222222226e-05, + "loss": 2.1243, + "step": 6025 + }, + { + "epoch": 1.07, + "learning_rate": 4.642725925925926e-05, + "loss": 2.3119, + "step": 6030 + }, + { + "epoch": 1.07, + "learning_rate": 4.64242962962963e-05, + "loss": 2.3298, + "step": 6035 + }, + { + "epoch": 1.07, + "learning_rate": 4.6421333333333336e-05, + "loss": 2.1839, + "step": 6040 + }, + { + "epoch": 1.07, + "learning_rate": 4.6418370370370374e-05, + "loss": 2.3518, + "step": 6045 + }, + { + "epoch": 1.08, + "learning_rate": 4.6416000000000004e-05, + "loss": 2.2573, + "step": 6050 + }, + { + "epoch": 1.08, + "learning_rate": 4.6413037037037036e-05, + "loss": 2.322, + "step": 6055 + }, + { + "epoch": 1.08, + "learning_rate": 4.6410074074074075e-05, + "loss": 2.3098, + "step": 6060 + }, + { + "epoch": 1.08, + "learning_rate": 4.6407111111111114e-05, + "loss": 2.2697, + "step": 6065 + }, + { + "epoch": 1.08, + "learning_rate": 4.640414814814815e-05, + "loss": 2.2009, + "step": 6070 + }, + { + "epoch": 1.08, + "learning_rate": 4.6401185185185185e-05, + "loss": 2.2627, + "step": 6075 + }, + { + "epoch": 1.08, + "learning_rate": 4.6398222222222224e-05, + "loss": 2.4355, + "step": 6080 + }, + { + "epoch": 1.08, + "learning_rate": 4.639525925925926e-05, + "loss": 2.3458, + "step": 6085 + }, + { + "epoch": 1.08, + "learning_rate": 4.63922962962963e-05, + "loss": 2.3021, + "step": 6090 + }, + { + "epoch": 1.08, + "learning_rate": 4.638933333333333e-05, + "loss": 2.3315, + "step": 6095 + }, + { + "epoch": 1.08, + "learning_rate": 4.638637037037037e-05, + "loss": 2.3391, + "step": 6100 + }, + { + "epoch": 1.09, + "learning_rate": 4.638340740740741e-05, + "loss": 2.2762, + "step": 6105 + }, + { + "epoch": 1.09, + "learning_rate": 4.638044444444445e-05, + "loss": 2.3654, + "step": 6110 + }, + { + "epoch": 1.09, + "learning_rate": 4.637748148148148e-05, + "loss": 2.2251, + "step": 6115 + }, + { + "epoch": 1.09, + "learning_rate": 4.637451851851852e-05, + "loss": 2.3115, + "step": 6120 + }, + { + "epoch": 1.09, + "learning_rate": 4.637155555555556e-05, + "loss": 2.1566, + "step": 6125 + }, + { + "epoch": 1.09, + "learning_rate": 4.63685925925926e-05, + "loss": 2.1691, + "step": 6130 + }, + { + "epoch": 1.09, + "learning_rate": 4.636562962962963e-05, + "loss": 2.1287, + "step": 6135 + }, + { + "epoch": 1.09, + "learning_rate": 4.636266666666667e-05, + "loss": 2.2591, + "step": 6140 + }, + { + "epoch": 1.09, + "learning_rate": 4.635970370370371e-05, + "loss": 2.204, + "step": 6145 + }, + { + "epoch": 1.09, + "learning_rate": 4.635674074074075e-05, + "loss": 2.208, + "step": 6150 + }, + { + "epoch": 1.09, + "learning_rate": 4.635377777777778e-05, + "loss": 2.2976, + "step": 6155 + }, + { + "epoch": 1.1, + "learning_rate": 4.635081481481482e-05, + "loss": 2.2446, + "step": 6160 + }, + { + "epoch": 1.1, + "learning_rate": 4.6347851851851856e-05, + "loss": 2.1389, + "step": 6165 + }, + { + "epoch": 1.1, + "learning_rate": 4.6344888888888895e-05, + "loss": 2.2893, + "step": 6170 + }, + { + "epoch": 1.1, + "learning_rate": 4.634192592592593e-05, + "loss": 2.2795, + "step": 6175 + }, + { + "epoch": 1.1, + "learning_rate": 4.6338962962962966e-05, + "loss": 2.2333, + "step": 6180 + }, + { + "epoch": 1.1, + "learning_rate": 4.6336000000000005e-05, + "loss": 2.1521, + "step": 6185 + }, + { + "epoch": 1.1, + "learning_rate": 4.6333037037037044e-05, + "loss": 2.3204, + "step": 6190 + }, + { + "epoch": 1.1, + "learning_rate": 4.6330074074074076e-05, + "loss": 2.2406, + "step": 6195 + }, + { + "epoch": 1.1, + "learning_rate": 4.6327111111111115e-05, + "loss": 2.3181, + "step": 6200 + }, + { + "epoch": 1.1, + "learning_rate": 4.632414814814815e-05, + "loss": 2.3463, + "step": 6205 + }, + { + "epoch": 1.1, + "learning_rate": 4.632118518518519e-05, + "loss": 2.4011, + "step": 6210 + }, + { + "epoch": 1.1, + "learning_rate": 4.6318222222222224e-05, + "loss": 2.2294, + "step": 6215 + }, + { + "epoch": 1.11, + "learning_rate": 4.631525925925926e-05, + "loss": 2.2613, + "step": 6220 + }, + { + "epoch": 1.11, + "learning_rate": 4.6312296296296295e-05, + "loss": 2.3396, + "step": 6225 + }, + { + "epoch": 1.11, + "learning_rate": 4.630933333333334e-05, + "loss": 2.2936, + "step": 6230 + }, + { + "epoch": 1.11, + "learning_rate": 4.630637037037037e-05, + "loss": 2.2127, + "step": 6235 + }, + { + "epoch": 1.11, + "learning_rate": 4.630340740740741e-05, + "loss": 2.2135, + "step": 6240 + }, + { + "epoch": 1.11, + "learning_rate": 4.6300444444444444e-05, + "loss": 2.2102, + "step": 6245 + }, + { + "epoch": 1.11, + "learning_rate": 4.629748148148149e-05, + "loss": 2.2245, + "step": 6250 + }, + { + "epoch": 1.11, + "learning_rate": 4.629451851851852e-05, + "loss": 2.2822, + "step": 6255 + }, + { + "epoch": 1.11, + "learning_rate": 4.629155555555556e-05, + "loss": 2.2077, + "step": 6260 + }, + { + "epoch": 1.11, + "learning_rate": 4.628859259259259e-05, + "loss": 2.3078, + "step": 6265 + }, + { + "epoch": 1.11, + "learning_rate": 4.628562962962963e-05, + "loss": 2.1341, + "step": 6270 + }, + { + "epoch": 1.12, + "learning_rate": 4.628266666666667e-05, + "loss": 2.2508, + "step": 6275 + }, + { + "epoch": 1.12, + "learning_rate": 4.627970370370371e-05, + "loss": 2.2801, + "step": 6280 + }, + { + "epoch": 1.12, + "learning_rate": 4.627674074074074e-05, + "loss": 2.2972, + "step": 6285 + }, + { + "epoch": 1.12, + "learning_rate": 4.627377777777778e-05, + "loss": 2.3508, + "step": 6290 + }, + { + "epoch": 1.12, + "learning_rate": 4.627081481481482e-05, + "loss": 2.2515, + "step": 6295 + }, + { + "epoch": 1.12, + "learning_rate": 4.626785185185186e-05, + "loss": 2.2735, + "step": 6300 + }, + { + "epoch": 1.12, + "learning_rate": 4.626488888888889e-05, + "loss": 2.287, + "step": 6305 + }, + { + "epoch": 1.12, + "learning_rate": 4.626192592592593e-05, + "loss": 2.1915, + "step": 6310 + }, + { + "epoch": 1.12, + "learning_rate": 4.625896296296297e-05, + "loss": 2.3413, + "step": 6315 + }, + { + "epoch": 1.12, + "learning_rate": 4.6256000000000006e-05, + "loss": 2.1968, + "step": 6320 + }, + { + "epoch": 1.12, + "learning_rate": 4.625303703703704e-05, + "loss": 2.2362, + "step": 6325 + }, + { + "epoch": 1.13, + "learning_rate": 4.6250074074074077e-05, + "loss": 2.214, + "step": 6330 + }, + { + "epoch": 1.13, + "learning_rate": 4.6247111111111115e-05, + "loss": 2.1207, + "step": 6335 + }, + { + "epoch": 1.13, + "learning_rate": 4.6244148148148154e-05, + "loss": 2.25, + "step": 6340 + }, + { + "epoch": 1.13, + "learning_rate": 4.6241185185185186e-05, + "loss": 2.3796, + "step": 6345 + }, + { + "epoch": 1.13, + "learning_rate": 4.623822222222222e-05, + "loss": 2.3142, + "step": 6350 + }, + { + "epoch": 1.13, + "learning_rate": 4.6235259259259264e-05, + "loss": 2.2502, + "step": 6355 + }, + { + "epoch": 1.13, + "learning_rate": 4.6232296296296296e-05, + "loss": 2.2969, + "step": 6360 + }, + { + "epoch": 1.13, + "learning_rate": 4.6229333333333335e-05, + "loss": 2.1859, + "step": 6365 + }, + { + "epoch": 1.13, + "learning_rate": 4.622637037037037e-05, + "loss": 2.3306, + "step": 6370 + }, + { + "epoch": 1.13, + "learning_rate": 4.622340740740741e-05, + "loss": 2.2796, + "step": 6375 + }, + { + "epoch": 1.13, + "learning_rate": 4.6220444444444444e-05, + "loss": 2.2645, + "step": 6380 + }, + { + "epoch": 1.14, + "learning_rate": 4.621748148148148e-05, + "loss": 2.2048, + "step": 6385 + }, + { + "epoch": 1.14, + "learning_rate": 4.6214518518518515e-05, + "loss": 2.3097, + "step": 6390 + }, + { + "epoch": 1.14, + "learning_rate": 4.621155555555556e-05, + "loss": 2.2794, + "step": 6395 + }, + { + "epoch": 1.14, + "learning_rate": 4.620859259259259e-05, + "loss": 2.2973, + "step": 6400 + }, + { + "epoch": 1.14, + "learning_rate": 4.620562962962963e-05, + "loss": 2.2797, + "step": 6405 + }, + { + "epoch": 1.14, + "learning_rate": 4.6202666666666664e-05, + "loss": 2.2649, + "step": 6410 + }, + { + "epoch": 1.14, + "learning_rate": 4.619970370370371e-05, + "loss": 2.27, + "step": 6415 + }, + { + "epoch": 1.14, + "learning_rate": 4.619674074074074e-05, + "loss": 2.1985, + "step": 6420 + }, + { + "epoch": 1.14, + "learning_rate": 4.619377777777778e-05, + "loss": 2.3526, + "step": 6425 + }, + { + "epoch": 1.14, + "learning_rate": 4.619081481481481e-05, + "loss": 2.2487, + "step": 6430 + }, + { + "epoch": 1.14, + "learning_rate": 4.618785185185185e-05, + "loss": 2.251, + "step": 6435 + }, + { + "epoch": 1.14, + "learning_rate": 4.618488888888889e-05, + "loss": 2.2163, + "step": 6440 + }, + { + "epoch": 1.15, + "learning_rate": 4.618192592592593e-05, + "loss": 2.3746, + "step": 6445 + }, + { + "epoch": 1.15, + "learning_rate": 4.617896296296296e-05, + "loss": 2.2269, + "step": 6450 + }, + { + "epoch": 1.15, + "learning_rate": 4.6176e-05, + "loss": 2.2837, + "step": 6455 + }, + { + "epoch": 1.15, + "learning_rate": 4.617303703703704e-05, + "loss": 2.2553, + "step": 6460 + }, + { + "epoch": 1.15, + "learning_rate": 4.617007407407408e-05, + "loss": 2.2647, + "step": 6465 + }, + { + "epoch": 1.15, + "learning_rate": 4.616711111111111e-05, + "loss": 2.2199, + "step": 6470 + }, + { + "epoch": 1.15, + "learning_rate": 4.616414814814815e-05, + "loss": 2.2478, + "step": 6475 + }, + { + "epoch": 1.15, + "learning_rate": 4.616118518518519e-05, + "loss": 2.183, + "step": 6480 + }, + { + "epoch": 1.15, + "learning_rate": 4.6158222222222226e-05, + "loss": 2.2811, + "step": 6485 + }, + { + "epoch": 1.15, + "learning_rate": 4.615525925925926e-05, + "loss": 2.2932, + "step": 6490 + }, + { + "epoch": 1.15, + "learning_rate": 4.61522962962963e-05, + "loss": 2.2424, + "step": 6495 + }, + { + "epoch": 1.16, + "learning_rate": 4.6149333333333336e-05, + "loss": 2.3681, + "step": 6500 + }, + { + "epoch": 1.16, + "learning_rate": 4.6146370370370374e-05, + "loss": 2.287, + "step": 6505 + }, + { + "epoch": 1.16, + "learning_rate": 4.6143407407407406e-05, + "loss": 2.17, + "step": 6510 + }, + { + "epoch": 1.16, + "learning_rate": 4.6140444444444445e-05, + "loss": 2.3137, + "step": 6515 + }, + { + "epoch": 1.16, + "learning_rate": 4.6137481481481484e-05, + "loss": 2.282, + "step": 6520 + }, + { + "epoch": 1.16, + "learning_rate": 4.613451851851852e-05, + "loss": 2.3593, + "step": 6525 + }, + { + "epoch": 1.16, + "learning_rate": 4.6131555555555555e-05, + "loss": 2.2093, + "step": 6530 + }, + { + "epoch": 1.16, + "learning_rate": 4.6128592592592594e-05, + "loss": 2.4281, + "step": 6535 + }, + { + "epoch": 1.16, + "learning_rate": 4.612562962962963e-05, + "loss": 2.2629, + "step": 6540 + }, + { + "epoch": 1.16, + "learning_rate": 4.612266666666667e-05, + "loss": 2.2337, + "step": 6545 + }, + { + "epoch": 1.16, + "learning_rate": 4.6119703703703703e-05, + "loss": 2.3051, + "step": 6550 + }, + { + "epoch": 1.17, + "learning_rate": 4.611674074074074e-05, + "loss": 2.2408, + "step": 6555 + }, + { + "epoch": 1.17, + "learning_rate": 4.611377777777778e-05, + "loss": 2.3408, + "step": 6560 + }, + { + "epoch": 1.17, + "learning_rate": 4.611081481481482e-05, + "loss": 2.0711, + "step": 6565 + }, + { + "epoch": 1.17, + "learning_rate": 4.610785185185185e-05, + "loss": 2.3387, + "step": 6570 + }, + { + "epoch": 1.17, + "learning_rate": 4.610488888888889e-05, + "loss": 2.2371, + "step": 6575 + }, + { + "epoch": 1.17, + "learning_rate": 4.610192592592592e-05, + "loss": 2.3484, + "step": 6580 + }, + { + "epoch": 1.17, + "learning_rate": 4.609896296296297e-05, + "loss": 2.345, + "step": 6585 + }, + { + "epoch": 1.17, + "learning_rate": 4.6096e-05, + "loss": 2.1856, + "step": 6590 + }, + { + "epoch": 1.17, + "learning_rate": 4.609303703703704e-05, + "loss": 2.2727, + "step": 6595 + }, + { + "epoch": 1.17, + "learning_rate": 4.609007407407407e-05, + "loss": 2.2317, + "step": 6600 + }, + { + "epoch": 1.17, + "learning_rate": 4.608711111111112e-05, + "loss": 2.229, + "step": 6605 + }, + { + "epoch": 1.18, + "learning_rate": 4.608414814814815e-05, + "loss": 2.3958, + "step": 6610 + }, + { + "epoch": 1.18, + "learning_rate": 4.608118518518519e-05, + "loss": 2.247, + "step": 6615 + }, + { + "epoch": 1.18, + "learning_rate": 4.607822222222222e-05, + "loss": 2.2758, + "step": 6620 + }, + { + "epoch": 1.18, + "learning_rate": 4.6075259259259265e-05, + "loss": 2.2069, + "step": 6625 + }, + { + "epoch": 1.18, + "learning_rate": 4.60722962962963e-05, + "loss": 2.3289, + "step": 6630 + }, + { + "epoch": 1.18, + "learning_rate": 4.6069333333333336e-05, + "loss": 2.3894, + "step": 6635 + }, + { + "epoch": 1.18, + "learning_rate": 4.6066962962962966e-05, + "loss": 2.1465, + "step": 6640 + }, + { + "epoch": 1.18, + "learning_rate": 4.6064000000000005e-05, + "loss": 2.0789, + "step": 6645 + }, + { + "epoch": 1.18, + "learning_rate": 4.6061037037037044e-05, + "loss": 2.3641, + "step": 6650 + }, + { + "epoch": 1.18, + "learning_rate": 4.6058074074074076e-05, + "loss": 2.2511, + "step": 6655 + }, + { + "epoch": 1.18, + "learning_rate": 4.6055111111111115e-05, + "loss": 2.2238, + "step": 6660 + }, + { + "epoch": 1.18, + "learning_rate": 4.6052148148148147e-05, + "loss": 2.1556, + "step": 6665 + }, + { + "epoch": 1.19, + "learning_rate": 4.604918518518519e-05, + "loss": 2.2347, + "step": 6670 + }, + { + "epoch": 1.19, + "learning_rate": 4.6046222222222224e-05, + "loss": 2.2219, + "step": 6675 + }, + { + "epoch": 1.19, + "learning_rate": 4.604325925925926e-05, + "loss": 2.291, + "step": 6680 + }, + { + "epoch": 1.19, + "learning_rate": 4.6040296296296295e-05, + "loss": 2.3262, + "step": 6685 + }, + { + "epoch": 1.19, + "learning_rate": 4.603733333333334e-05, + "loss": 2.4022, + "step": 6690 + }, + { + "epoch": 1.19, + "learning_rate": 4.603437037037037e-05, + "loss": 2.2977, + "step": 6695 + }, + { + "epoch": 1.19, + "learning_rate": 4.603140740740741e-05, + "loss": 2.1813, + "step": 6700 + }, + { + "epoch": 1.19, + "learning_rate": 4.6028444444444444e-05, + "loss": 2.3603, + "step": 6705 + }, + { + "epoch": 1.19, + "learning_rate": 4.602548148148149e-05, + "loss": 2.3723, + "step": 6710 + }, + { + "epoch": 1.19, + "learning_rate": 4.602251851851852e-05, + "loss": 2.4143, + "step": 6715 + }, + { + "epoch": 1.19, + "learning_rate": 4.601955555555556e-05, + "loss": 2.3454, + "step": 6720 + }, + { + "epoch": 1.2, + "learning_rate": 4.601659259259259e-05, + "loss": 2.248, + "step": 6725 + }, + { + "epoch": 1.2, + "learning_rate": 4.601362962962963e-05, + "loss": 2.3829, + "step": 6730 + }, + { + "epoch": 1.2, + "learning_rate": 4.601066666666667e-05, + "loss": 2.1406, + "step": 6735 + }, + { + "epoch": 1.2, + "learning_rate": 4.600770370370371e-05, + "loss": 2.2528, + "step": 6740 + }, + { + "epoch": 1.2, + "learning_rate": 4.600474074074074e-05, + "loss": 2.2589, + "step": 6745 + }, + { + "epoch": 1.2, + "learning_rate": 4.600177777777778e-05, + "loss": 2.2276, + "step": 6750 + }, + { + "epoch": 1.2, + "learning_rate": 4.599881481481482e-05, + "loss": 2.2346, + "step": 6755 + }, + { + "epoch": 1.2, + "learning_rate": 4.599585185185186e-05, + "loss": 2.23, + "step": 6760 + }, + { + "epoch": 1.2, + "learning_rate": 4.599288888888889e-05, + "loss": 2.3288, + "step": 6765 + }, + { + "epoch": 1.2, + "learning_rate": 4.598992592592593e-05, + "loss": 2.3982, + "step": 6770 + }, + { + "epoch": 1.2, + "learning_rate": 4.598696296296297e-05, + "loss": 2.2338, + "step": 6775 + }, + { + "epoch": 1.21, + "learning_rate": 4.5984000000000006e-05, + "loss": 2.2898, + "step": 6780 + }, + { + "epoch": 1.21, + "learning_rate": 4.598103703703704e-05, + "loss": 2.2546, + "step": 6785 + }, + { + "epoch": 1.21, + "learning_rate": 4.5978074074074076e-05, + "loss": 2.3104, + "step": 6790 + }, + { + "epoch": 1.21, + "learning_rate": 4.5975111111111115e-05, + "loss": 2.2883, + "step": 6795 + }, + { + "epoch": 1.21, + "learning_rate": 4.5972148148148154e-05, + "loss": 2.1726, + "step": 6800 + }, + { + "epoch": 1.21, + "learning_rate": 4.5969185185185186e-05, + "loss": 2.2614, + "step": 6805 + }, + { + "epoch": 1.21, + "learning_rate": 4.5966222222222225e-05, + "loss": 2.3599, + "step": 6810 + }, + { + "epoch": 1.21, + "learning_rate": 4.5963259259259264e-05, + "loss": 2.3086, + "step": 6815 + }, + { + "epoch": 1.21, + "learning_rate": 4.5960296296296296e-05, + "loss": 2.2824, + "step": 6820 + }, + { + "epoch": 1.21, + "learning_rate": 4.5957333333333335e-05, + "loss": 2.3178, + "step": 6825 + }, + { + "epoch": 1.21, + "learning_rate": 4.595437037037037e-05, + "loss": 2.2981, + "step": 6830 + }, + { + "epoch": 1.22, + "learning_rate": 4.595140740740741e-05, + "loss": 2.3144, + "step": 6835 + }, + { + "epoch": 1.22, + "learning_rate": 4.5948444444444444e-05, + "loss": 2.3403, + "step": 6840 + }, + { + "epoch": 1.22, + "learning_rate": 4.594548148148148e-05, + "loss": 2.2833, + "step": 6845 + }, + { + "epoch": 1.22, + "learning_rate": 4.5942518518518515e-05, + "loss": 2.3044, + "step": 6850 + }, + { + "epoch": 1.22, + "learning_rate": 4.593955555555556e-05, + "loss": 2.2435, + "step": 6855 + }, + { + "epoch": 1.22, + "learning_rate": 4.593659259259259e-05, + "loss": 2.1663, + "step": 6860 + }, + { + "epoch": 1.22, + "learning_rate": 4.593362962962963e-05, + "loss": 2.2313, + "step": 6865 + }, + { + "epoch": 1.22, + "learning_rate": 4.5930666666666664e-05, + "loss": 2.1784, + "step": 6870 + }, + { + "epoch": 1.22, + "learning_rate": 4.592770370370371e-05, + "loss": 2.1213, + "step": 6875 + }, + { + "epoch": 1.22, + "learning_rate": 4.592474074074074e-05, + "loss": 2.404, + "step": 6880 + }, + { + "epoch": 1.22, + "learning_rate": 4.592177777777778e-05, + "loss": 2.2114, + "step": 6885 + }, + { + "epoch": 1.22, + "learning_rate": 4.591881481481481e-05, + "loss": 2.2593, + "step": 6890 + }, + { + "epoch": 1.23, + "learning_rate": 4.591585185185185e-05, + "loss": 2.3533, + "step": 6895 + }, + { + "epoch": 1.23, + "learning_rate": 4.591288888888889e-05, + "loss": 2.3332, + "step": 6900 + }, + { + "epoch": 1.23, + "learning_rate": 4.590992592592593e-05, + "loss": 2.2287, + "step": 6905 + }, + { + "epoch": 1.23, + "learning_rate": 4.590696296296296e-05, + "loss": 2.1564, + "step": 6910 + }, + { + "epoch": 1.23, + "learning_rate": 4.5904e-05, + "loss": 2.3331, + "step": 6915 + }, + { + "epoch": 1.23, + "learning_rate": 4.590103703703704e-05, + "loss": 2.3208, + "step": 6920 + }, + { + "epoch": 1.23, + "learning_rate": 4.589807407407408e-05, + "loss": 2.2578, + "step": 6925 + }, + { + "epoch": 1.23, + "learning_rate": 4.589511111111111e-05, + "loss": 2.1348, + "step": 6930 + }, + { + "epoch": 1.23, + "learning_rate": 4.589214814814815e-05, + "loss": 2.3644, + "step": 6935 + }, + { + "epoch": 1.23, + "learning_rate": 4.588918518518519e-05, + "loss": 2.2255, + "step": 6940 + }, + { + "epoch": 1.23, + "learning_rate": 4.5886222222222226e-05, + "loss": 2.283, + "step": 6945 + }, + { + "epoch": 1.24, + "learning_rate": 4.588325925925926e-05, + "loss": 2.2609, + "step": 6950 + }, + { + "epoch": 1.24, + "learning_rate": 4.58802962962963e-05, + "loss": 2.2377, + "step": 6955 + }, + { + "epoch": 1.24, + "learning_rate": 4.5877333333333335e-05, + "loss": 2.2686, + "step": 6960 + }, + { + "epoch": 1.24, + "learning_rate": 4.5874370370370374e-05, + "loss": 2.3053, + "step": 6965 + }, + { + "epoch": 1.24, + "learning_rate": 4.5871407407407406e-05, + "loss": 2.2412, + "step": 6970 + }, + { + "epoch": 1.24, + "learning_rate": 4.5868444444444445e-05, + "loss": 2.2785, + "step": 6975 + }, + { + "epoch": 1.24, + "learning_rate": 4.5865481481481484e-05, + "loss": 2.2444, + "step": 6980 + }, + { + "epoch": 1.24, + "learning_rate": 4.586251851851852e-05, + "loss": 2.1793, + "step": 6985 + }, + { + "epoch": 1.24, + "learning_rate": 4.5859555555555555e-05, + "loss": 2.2404, + "step": 6990 + }, + { + "epoch": 1.24, + "learning_rate": 4.5856592592592594e-05, + "loss": 2.198, + "step": 6995 + }, + { + "epoch": 1.24, + "learning_rate": 4.585362962962963e-05, + "loss": 2.1602, + "step": 7000 + }, + { + "epoch": 1.25, + "learning_rate": 4.585066666666667e-05, + "loss": 2.2226, + "step": 7005 + }, + { + "epoch": 1.25, + "learning_rate": 4.58477037037037e-05, + "loss": 2.3, + "step": 7010 + }, + { + "epoch": 1.25, + "learning_rate": 4.584474074074074e-05, + "loss": 2.1955, + "step": 7015 + }, + { + "epoch": 1.25, + "learning_rate": 4.584177777777778e-05, + "loss": 2.2604, + "step": 7020 + }, + { + "epoch": 1.25, + "learning_rate": 4.583881481481482e-05, + "loss": 2.3292, + "step": 7025 + }, + { + "epoch": 1.25, + "learning_rate": 4.583585185185185e-05, + "loss": 2.3145, + "step": 7030 + }, + { + "epoch": 1.25, + "learning_rate": 4.583288888888889e-05, + "loss": 2.2457, + "step": 7035 + }, + { + "epoch": 1.25, + "learning_rate": 4.582992592592593e-05, + "loss": 2.3012, + "step": 7040 + }, + { + "epoch": 1.25, + "learning_rate": 4.582696296296297e-05, + "loss": 2.2856, + "step": 7045 + }, + { + "epoch": 1.25, + "learning_rate": 4.5824e-05, + "loss": 2.27, + "step": 7050 + }, + { + "epoch": 1.25, + "learning_rate": 4.582103703703704e-05, + "loss": 2.2098, + "step": 7055 + }, + { + "epoch": 1.26, + "learning_rate": 4.581807407407407e-05, + "loss": 2.3036, + "step": 7060 + }, + { + "epoch": 1.26, + "learning_rate": 4.581511111111112e-05, + "loss": 2.2739, + "step": 7065 + }, + { + "epoch": 1.26, + "learning_rate": 4.581214814814815e-05, + "loss": 2.2305, + "step": 7070 + }, + { + "epoch": 1.26, + "learning_rate": 4.580918518518519e-05, + "loss": 2.1138, + "step": 7075 + }, + { + "epoch": 1.26, + "learning_rate": 4.580622222222222e-05, + "loss": 2.3126, + "step": 7080 + }, + { + "epoch": 1.26, + "learning_rate": 4.5803259259259265e-05, + "loss": 2.3705, + "step": 7085 + }, + { + "epoch": 1.26, + "learning_rate": 4.58002962962963e-05, + "loss": 2.3436, + "step": 7090 + }, + { + "epoch": 1.26, + "learning_rate": 4.5797333333333336e-05, + "loss": 2.2504, + "step": 7095 + }, + { + "epoch": 1.26, + "learning_rate": 4.579437037037037e-05, + "loss": 2.2131, + "step": 7100 + }, + { + "epoch": 1.26, + "learning_rate": 4.5791407407407414e-05, + "loss": 2.2504, + "step": 7105 + }, + { + "epoch": 1.26, + "learning_rate": 4.5788444444444446e-05, + "loss": 2.2675, + "step": 7110 + }, + { + "epoch": 1.26, + "learning_rate": 4.5785481481481485e-05, + "loss": 2.3812, + "step": 7115 + }, + { + "epoch": 1.27, + "learning_rate": 4.578251851851852e-05, + "loss": 2.243, + "step": 7120 + }, + { + "epoch": 1.27, + "learning_rate": 4.5779555555555556e-05, + "loss": 2.2187, + "step": 7125 + }, + { + "epoch": 1.27, + "learning_rate": 4.5776592592592594e-05, + "loss": 2.2334, + "step": 7130 + }, + { + "epoch": 1.27, + "learning_rate": 4.577362962962963e-05, + "loss": 2.2459, + "step": 7135 + }, + { + "epoch": 1.27, + "learning_rate": 4.5770666666666665e-05, + "loss": 2.3018, + "step": 7140 + }, + { + "epoch": 1.27, + "learning_rate": 4.5767703703703704e-05, + "loss": 2.3779, + "step": 7145 + }, + { + "epoch": 1.27, + "learning_rate": 4.576474074074074e-05, + "loss": 2.2553, + "step": 7150 + }, + { + "epoch": 1.27, + "learning_rate": 4.576177777777778e-05, + "loss": 2.2515, + "step": 7155 + }, + { + "epoch": 1.27, + "learning_rate": 4.5758814814814814e-05, + "loss": 2.1698, + "step": 7160 + }, + { + "epoch": 1.27, + "learning_rate": 4.575585185185185e-05, + "loss": 2.2536, + "step": 7165 + }, + { + "epoch": 1.27, + "learning_rate": 4.575288888888889e-05, + "loss": 2.3004, + "step": 7170 + }, + { + "epoch": 1.28, + "learning_rate": 4.574992592592593e-05, + "loss": 2.2575, + "step": 7175 + }, + { + "epoch": 1.28, + "learning_rate": 4.574696296296296e-05, + "loss": 2.3674, + "step": 7180 + }, + { + "epoch": 1.28, + "learning_rate": 4.5744e-05, + "loss": 2.2229, + "step": 7185 + }, + { + "epoch": 1.28, + "learning_rate": 4.574103703703704e-05, + "loss": 2.1819, + "step": 7190 + }, + { + "epoch": 1.28, + "learning_rate": 4.573807407407408e-05, + "loss": 2.3481, + "step": 7195 + }, + { + "epoch": 1.28, + "learning_rate": 4.573511111111111e-05, + "loss": 2.183, + "step": 7200 + }, + { + "epoch": 1.28, + "learning_rate": 4.573214814814815e-05, + "loss": 2.2309, + "step": 7205 + }, + { + "epoch": 1.28, + "learning_rate": 4.572918518518519e-05, + "loss": 2.181, + "step": 7210 + }, + { + "epoch": 1.28, + "learning_rate": 4.572622222222223e-05, + "loss": 2.1743, + "step": 7215 + }, + { + "epoch": 1.28, + "learning_rate": 4.572325925925926e-05, + "loss": 2.2209, + "step": 7220 + }, + { + "epoch": 1.28, + "learning_rate": 4.57202962962963e-05, + "loss": 2.2276, + "step": 7225 + }, + { + "epoch": 1.29, + "learning_rate": 4.571733333333334e-05, + "loss": 2.2843, + "step": 7230 + }, + { + "epoch": 1.29, + "learning_rate": 4.5714370370370376e-05, + "loss": 2.2599, + "step": 7235 + }, + { + "epoch": 1.29, + "learning_rate": 4.571140740740741e-05, + "loss": 2.2667, + "step": 7240 + }, + { + "epoch": 1.29, + "learning_rate": 4.570844444444445e-05, + "loss": 2.2352, + "step": 7245 + }, + { + "epoch": 1.29, + "learning_rate": 4.5705481481481485e-05, + "loss": 2.2337, + "step": 7250 + }, + { + "epoch": 1.29, + "learning_rate": 4.5702518518518524e-05, + "loss": 2.2564, + "step": 7255 + }, + { + "epoch": 1.29, + "learning_rate": 4.5699555555555556e-05, + "loss": 2.2869, + "step": 7260 + }, + { + "epoch": 1.29, + "learning_rate": 4.5696592592592595e-05, + "loss": 2.3509, + "step": 7265 + }, + { + "epoch": 1.29, + "learning_rate": 4.5693629629629634e-05, + "loss": 2.3603, + "step": 7270 + }, + { + "epoch": 1.29, + "learning_rate": 4.569066666666667e-05, + "loss": 2.2507, + "step": 7275 + }, + { + "epoch": 1.29, + "learning_rate": 4.5687703703703705e-05, + "loss": 2.3355, + "step": 7280 + }, + { + "epoch": 1.3, + "learning_rate": 4.5684740740740744e-05, + "loss": 2.2302, + "step": 7285 + }, + { + "epoch": 1.3, + "learning_rate": 4.5681777777777776e-05, + "loss": 2.1399, + "step": 7290 + }, + { + "epoch": 1.3, + "learning_rate": 4.567881481481482e-05, + "loss": 2.1978, + "step": 7295 + }, + { + "epoch": 1.3, + "learning_rate": 4.567585185185185e-05, + "loss": 2.3155, + "step": 7300 + }, + { + "epoch": 1.3, + "learning_rate": 4.567288888888889e-05, + "loss": 2.2613, + "step": 7305 + }, + { + "epoch": 1.3, + "learning_rate": 4.5669925925925924e-05, + "loss": 2.2463, + "step": 7310 + }, + { + "epoch": 1.3, + "learning_rate": 4.566696296296297e-05, + "loss": 2.3753, + "step": 7315 + }, + { + "epoch": 1.3, + "learning_rate": 4.5664e-05, + "loss": 2.3009, + "step": 7320 + }, + { + "epoch": 1.3, + "learning_rate": 4.566103703703704e-05, + "loss": 2.2551, + "step": 7325 + }, + { + "epoch": 1.3, + "learning_rate": 4.565807407407407e-05, + "loss": 2.1589, + "step": 7330 + }, + { + "epoch": 1.3, + "learning_rate": 4.565511111111112e-05, + "loss": 2.2831, + "step": 7335 + }, + { + "epoch": 1.3, + "learning_rate": 4.565214814814815e-05, + "loss": 2.2871, + "step": 7340 + }, + { + "epoch": 1.31, + "learning_rate": 4.564918518518519e-05, + "loss": 2.2423, + "step": 7345 + }, + { + "epoch": 1.31, + "learning_rate": 4.564622222222222e-05, + "loss": 2.2549, + "step": 7350 + }, + { + "epoch": 1.31, + "learning_rate": 4.564325925925926e-05, + "loss": 2.3964, + "step": 7355 + }, + { + "epoch": 1.31, + "learning_rate": 4.56402962962963e-05, + "loss": 2.2096, + "step": 7360 + }, + { + "epoch": 1.31, + "learning_rate": 4.563733333333334e-05, + "loss": 2.3593, + "step": 7365 + }, + { + "epoch": 1.31, + "learning_rate": 4.563437037037037e-05, + "loss": 2.186, + "step": 7370 + }, + { + "epoch": 1.31, + "learning_rate": 4.563140740740741e-05, + "loss": 2.2026, + "step": 7375 + }, + { + "epoch": 1.31, + "learning_rate": 4.562844444444445e-05, + "loss": 2.3176, + "step": 7380 + }, + { + "epoch": 1.31, + "learning_rate": 4.5625481481481486e-05, + "loss": 2.2689, + "step": 7385 + }, + { + "epoch": 1.31, + "learning_rate": 4.562251851851852e-05, + "loss": 2.2994, + "step": 7390 + }, + { + "epoch": 1.31, + "learning_rate": 4.561955555555556e-05, + "loss": 2.32, + "step": 7395 + }, + { + "epoch": 1.32, + "learning_rate": 4.5616592592592596e-05, + "loss": 2.2417, + "step": 7400 + }, + { + "epoch": 1.32, + "learning_rate": 4.5613629629629635e-05, + "loss": 2.1608, + "step": 7405 + }, + { + "epoch": 1.32, + "learning_rate": 4.561066666666667e-05, + "loss": 2.2433, + "step": 7410 + }, + { + "epoch": 1.32, + "learning_rate": 4.5607703703703706e-05, + "loss": 2.2954, + "step": 7415 + }, + { + "epoch": 1.32, + "learning_rate": 4.5604740740740744e-05, + "loss": 2.2712, + "step": 7420 + }, + { + "epoch": 1.32, + "learning_rate": 4.560177777777778e-05, + "loss": 2.2872, + "step": 7425 + }, + { + "epoch": 1.32, + "learning_rate": 4.5598814814814815e-05, + "loss": 2.3773, + "step": 7430 + }, + { + "epoch": 1.32, + "learning_rate": 4.5595851851851854e-05, + "loss": 2.2847, + "step": 7435 + }, + { + "epoch": 1.32, + "learning_rate": 4.559288888888889e-05, + "loss": 2.1243, + "step": 7440 + }, + { + "epoch": 1.32, + "learning_rate": 4.558992592592593e-05, + "loss": 2.2983, + "step": 7445 + }, + { + "epoch": 1.32, + "learning_rate": 4.5586962962962964e-05, + "loss": 2.2257, + "step": 7450 + }, + { + "epoch": 1.33, + "learning_rate": 4.5584e-05, + "loss": 2.3544, + "step": 7455 + }, + { + "epoch": 1.33, + "learning_rate": 4.558103703703704e-05, + "loss": 2.1814, + "step": 7460 + }, + { + "epoch": 1.33, + "learning_rate": 4.557807407407408e-05, + "loss": 2.2129, + "step": 7465 + }, + { + "epoch": 1.33, + "learning_rate": 4.557511111111111e-05, + "loss": 2.2303, + "step": 7470 + }, + { + "epoch": 1.33, + "learning_rate": 4.557214814814815e-05, + "loss": 2.3359, + "step": 7475 + }, + { + "epoch": 1.33, + "learning_rate": 4.556918518518519e-05, + "loss": 2.2404, + "step": 7480 + }, + { + "epoch": 1.33, + "learning_rate": 4.556622222222223e-05, + "loss": 2.3884, + "step": 7485 + }, + { + "epoch": 1.33, + "learning_rate": 4.556325925925926e-05, + "loss": 2.2906, + "step": 7490 + }, + { + "epoch": 1.33, + "learning_rate": 4.55602962962963e-05, + "loss": 2.2593, + "step": 7495 + }, + { + "epoch": 1.33, + "learning_rate": 4.555733333333334e-05, + "loss": 2.2034, + "step": 7500 + }, + { + "epoch": 1.33, + "learning_rate": 4.555437037037038e-05, + "loss": 2.383, + "step": 7505 + }, + { + "epoch": 1.34, + "learning_rate": 4.555140740740741e-05, + "loss": 2.2004, + "step": 7510 + }, + { + "epoch": 1.34, + "learning_rate": 4.554844444444445e-05, + "loss": 2.3607, + "step": 7515 + }, + { + "epoch": 1.34, + "learning_rate": 4.554548148148148e-05, + "loss": 2.1642, + "step": 7520 + }, + { + "epoch": 1.34, + "learning_rate": 4.5542518518518526e-05, + "loss": 2.1457, + "step": 7525 + }, + { + "epoch": 1.34, + "learning_rate": 4.553955555555556e-05, + "loss": 2.2545, + "step": 7530 + }, + { + "epoch": 1.34, + "learning_rate": 4.55365925925926e-05, + "loss": 2.2655, + "step": 7535 + }, + { + "epoch": 1.34, + "learning_rate": 4.553362962962963e-05, + "loss": 2.1745, + "step": 7540 + }, + { + "epoch": 1.34, + "learning_rate": 4.5530666666666674e-05, + "loss": 2.3619, + "step": 7545 + }, + { + "epoch": 1.34, + "learning_rate": 4.5527703703703706e-05, + "loss": 2.3244, + "step": 7550 + }, + { + "epoch": 1.34, + "learning_rate": 4.5524740740740745e-05, + "loss": 2.421, + "step": 7555 + }, + { + "epoch": 1.34, + "learning_rate": 4.552177777777778e-05, + "loss": 2.1832, + "step": 7560 + }, + { + "epoch": 1.34, + "learning_rate": 4.551881481481482e-05, + "loss": 2.2433, + "step": 7565 + }, + { + "epoch": 1.35, + "learning_rate": 4.5515851851851855e-05, + "loss": 2.3146, + "step": 7570 + }, + { + "epoch": 1.35, + "learning_rate": 4.5512888888888894e-05, + "loss": 2.2164, + "step": 7575 + }, + { + "epoch": 1.35, + "learning_rate": 4.5509925925925926e-05, + "loss": 2.13, + "step": 7580 + }, + { + "epoch": 1.35, + "learning_rate": 4.5506962962962965e-05, + "loss": 2.2406, + "step": 7585 + }, + { + "epoch": 1.35, + "learning_rate": 4.5504000000000003e-05, + "loss": 2.1664, + "step": 7590 + }, + { + "epoch": 1.35, + "learning_rate": 4.5501037037037035e-05, + "loss": 2.2063, + "step": 7595 + }, + { + "epoch": 1.35, + "learning_rate": 4.5498074074074074e-05, + "loss": 2.1979, + "step": 7600 + }, + { + "epoch": 1.35, + "learning_rate": 4.549511111111111e-05, + "loss": 2.2476, + "step": 7605 + }, + { + "epoch": 1.35, + "learning_rate": 4.549214814814815e-05, + "loss": 2.3224, + "step": 7610 + }, + { + "epoch": 1.35, + "learning_rate": 4.5489185185185184e-05, + "loss": 2.1408, + "step": 7615 + }, + { + "epoch": 1.35, + "learning_rate": 4.548622222222222e-05, + "loss": 2.2592, + "step": 7620 + }, + { + "epoch": 1.36, + "learning_rate": 4.548325925925926e-05, + "loss": 2.1739, + "step": 7625 + }, + { + "epoch": 1.36, + "learning_rate": 4.54802962962963e-05, + "loss": 2.2779, + "step": 7630 + }, + { + "epoch": 1.36, + "learning_rate": 4.547733333333333e-05, + "loss": 2.1724, + "step": 7635 + }, + { + "epoch": 1.36, + "learning_rate": 4.547437037037037e-05, + "loss": 2.1572, + "step": 7640 + }, + { + "epoch": 1.36, + "learning_rate": 4.547140740740741e-05, + "loss": 2.2909, + "step": 7645 + }, + { + "epoch": 1.36, + "learning_rate": 4.546844444444445e-05, + "loss": 2.3665, + "step": 7650 + }, + { + "epoch": 1.36, + "learning_rate": 4.546548148148148e-05, + "loss": 2.2536, + "step": 7655 + }, + { + "epoch": 1.36, + "learning_rate": 4.546251851851852e-05, + "loss": 2.2624, + "step": 7660 + }, + { + "epoch": 1.36, + "learning_rate": 4.545955555555555e-05, + "loss": 2.2469, + "step": 7665 + }, + { + "epoch": 1.36, + "learning_rate": 4.54565925925926e-05, + "loss": 2.2626, + "step": 7670 + }, + { + "epoch": 1.36, + "learning_rate": 4.545362962962963e-05, + "loss": 2.2693, + "step": 7675 + }, + { + "epoch": 1.37, + "learning_rate": 4.545066666666667e-05, + "loss": 2.0831, + "step": 7680 + }, + { + "epoch": 1.37, + "learning_rate": 4.54477037037037e-05, + "loss": 2.3036, + "step": 7685 + }, + { + "epoch": 1.37, + "learning_rate": 4.5444740740740746e-05, + "loss": 2.2692, + "step": 7690 + }, + { + "epoch": 1.37, + "learning_rate": 4.544177777777778e-05, + "loss": 2.212, + "step": 7695 + }, + { + "epoch": 1.37, + "learning_rate": 4.543881481481482e-05, + "loss": 2.2737, + "step": 7700 + }, + { + "epoch": 1.37, + "learning_rate": 4.543585185185185e-05, + "loss": 2.294, + "step": 7705 + }, + { + "epoch": 1.37, + "learning_rate": 4.5432888888888895e-05, + "loss": 2.2214, + "step": 7710 + }, + { + "epoch": 1.37, + "learning_rate": 4.5429925925925927e-05, + "loss": 2.2875, + "step": 7715 + }, + { + "epoch": 1.37, + "learning_rate": 4.5426962962962965e-05, + "loss": 2.2726, + "step": 7720 + }, + { + "epoch": 1.37, + "learning_rate": 4.5424e-05, + "loss": 2.1564, + "step": 7725 + }, + { + "epoch": 1.37, + "learning_rate": 4.542103703703704e-05, + "loss": 2.3015, + "step": 7730 + }, + { + "epoch": 1.38, + "learning_rate": 4.5418074074074075e-05, + "loss": 2.2133, + "step": 7735 + }, + { + "epoch": 1.38, + "learning_rate": 4.5415111111111114e-05, + "loss": 2.1914, + "step": 7740 + }, + { + "epoch": 1.38, + "learning_rate": 4.5412148148148146e-05, + "loss": 2.1841, + "step": 7745 + }, + { + "epoch": 1.38, + "learning_rate": 4.5409185185185185e-05, + "loss": 2.2633, + "step": 7750 + }, + { + "epoch": 1.38, + "learning_rate": 4.5406222222222224e-05, + "loss": 2.2805, + "step": 7755 + }, + { + "epoch": 1.38, + "learning_rate": 4.540325925925926e-05, + "loss": 2.1288, + "step": 7760 + }, + { + "epoch": 1.38, + "learning_rate": 4.5400296296296294e-05, + "loss": 2.1955, + "step": 7765 + }, + { + "epoch": 1.38, + "learning_rate": 4.539733333333333e-05, + "loss": 2.2045, + "step": 7770 + }, + { + "epoch": 1.38, + "learning_rate": 4.539437037037037e-05, + "loss": 2.1914, + "step": 7775 + }, + { + "epoch": 1.38, + "learning_rate": 4.539140740740741e-05, + "loss": 2.2926, + "step": 7780 + }, + { + "epoch": 1.38, + "learning_rate": 4.538844444444444e-05, + "loss": 2.2439, + "step": 7785 + }, + { + "epoch": 1.38, + "learning_rate": 4.538548148148148e-05, + "loss": 2.338, + "step": 7790 + }, + { + "epoch": 1.39, + "learning_rate": 4.538251851851852e-05, + "loss": 2.3185, + "step": 7795 + }, + { + "epoch": 1.39, + "learning_rate": 4.537955555555556e-05, + "loss": 2.2419, + "step": 7800 + }, + { + "epoch": 1.39, + "learning_rate": 4.537659259259259e-05, + "loss": 2.1946, + "step": 7805 + }, + { + "epoch": 1.39, + "learning_rate": 4.537362962962963e-05, + "loss": 2.1165, + "step": 7810 + }, + { + "epoch": 1.39, + "learning_rate": 4.537066666666667e-05, + "loss": 2.352, + "step": 7815 + }, + { + "epoch": 1.39, + "learning_rate": 4.536770370370371e-05, + "loss": 2.1178, + "step": 7820 + }, + { + "epoch": 1.39, + "learning_rate": 4.536474074074074e-05, + "loss": 2.2348, + "step": 7825 + }, + { + "epoch": 1.39, + "learning_rate": 4.536177777777778e-05, + "loss": 2.2033, + "step": 7830 + }, + { + "epoch": 1.39, + "learning_rate": 4.535881481481482e-05, + "loss": 2.3011, + "step": 7835 + }, + { + "epoch": 1.39, + "learning_rate": 4.5355851851851856e-05, + "loss": 2.2968, + "step": 7840 + }, + { + "epoch": 1.39, + "learning_rate": 4.535288888888889e-05, + "loss": 2.2689, + "step": 7845 + }, + { + "epoch": 1.4, + "learning_rate": 4.534992592592593e-05, + "loss": 2.24, + "step": 7850 + }, + { + "epoch": 1.4, + "learning_rate": 4.5346962962962966e-05, + "loss": 2.2188, + "step": 7855 + }, + { + "epoch": 1.4, + "learning_rate": 4.5344000000000005e-05, + "loss": 2.1723, + "step": 7860 + }, + { + "epoch": 1.4, + "learning_rate": 4.534103703703704e-05, + "loss": 2.2588, + "step": 7865 + }, + { + "epoch": 1.4, + "learning_rate": 4.5338074074074076e-05, + "loss": 2.2011, + "step": 7870 + }, + { + "epoch": 1.4, + "learning_rate": 4.5335111111111115e-05, + "loss": 2.343, + "step": 7875 + }, + { + "epoch": 1.4, + "learning_rate": 4.5332148148148153e-05, + "loss": 2.2202, + "step": 7880 + }, + { + "epoch": 1.4, + "learning_rate": 4.5329185185185186e-05, + "loss": 2.3572, + "step": 7885 + }, + { + "epoch": 1.4, + "learning_rate": 4.5326222222222224e-05, + "loss": 2.1689, + "step": 7890 + }, + { + "epoch": 1.4, + "learning_rate": 4.5323259259259256e-05, + "loss": 2.248, + "step": 7895 + }, + { + "epoch": 1.4, + "learning_rate": 4.53202962962963e-05, + "loss": 2.2938, + "step": 7900 + }, + { + "epoch": 1.41, + "learning_rate": 4.5317333333333334e-05, + "loss": 2.3777, + "step": 7905 + }, + { + "epoch": 1.41, + "learning_rate": 4.531437037037037e-05, + "loss": 2.192, + "step": 7910 + }, + { + "epoch": 1.41, + "learning_rate": 4.5311407407407405e-05, + "loss": 2.0929, + "step": 7915 + }, + { + "epoch": 1.41, + "learning_rate": 4.530844444444445e-05, + "loss": 2.3745, + "step": 7920 + }, + { + "epoch": 1.41, + "learning_rate": 4.530548148148148e-05, + "loss": 2.2037, + "step": 7925 + }, + { + "epoch": 1.41, + "learning_rate": 4.530251851851852e-05, + "loss": 2.2358, + "step": 7930 + }, + { + "epoch": 1.41, + "learning_rate": 4.5299555555555553e-05, + "loss": 2.2324, + "step": 7935 + }, + { + "epoch": 1.41, + "learning_rate": 4.52965925925926e-05, + "loss": 2.3352, + "step": 7940 + }, + { + "epoch": 1.41, + "learning_rate": 4.529362962962963e-05, + "loss": 2.277, + "step": 7945 + }, + { + "epoch": 1.41, + "learning_rate": 4.529066666666667e-05, + "loss": 2.3493, + "step": 7950 + }, + { + "epoch": 1.41, + "learning_rate": 4.52877037037037e-05, + "loss": 2.1944, + "step": 7955 + }, + { + "epoch": 1.42, + "learning_rate": 4.528474074074075e-05, + "loss": 2.3066, + "step": 7960 + }, + { + "epoch": 1.42, + "learning_rate": 4.528177777777778e-05, + "loss": 2.2026, + "step": 7965 + }, + { + "epoch": 1.42, + "learning_rate": 4.527881481481482e-05, + "loss": 2.1915, + "step": 7970 + }, + { + "epoch": 1.42, + "learning_rate": 4.527585185185185e-05, + "loss": 2.1089, + "step": 7975 + }, + { + "epoch": 1.42, + "learning_rate": 4.527288888888889e-05, + "loss": 2.0901, + "step": 7980 + }, + { + "epoch": 1.42, + "learning_rate": 4.526992592592593e-05, + "loss": 2.1283, + "step": 7985 + }, + { + "epoch": 1.42, + "learning_rate": 4.526696296296297e-05, + "loss": 2.328, + "step": 7990 + }, + { + "epoch": 1.42, + "learning_rate": 4.5264e-05, + "loss": 2.2328, + "step": 7995 + }, + { + "epoch": 1.42, + "learning_rate": 4.526103703703704e-05, + "loss": 2.2545, + "step": 8000 + }, + { + "epoch": 1.42, + "learning_rate": 4.5258074074074077e-05, + "loss": 2.1648, + "step": 8005 + }, + { + "epoch": 1.42, + "learning_rate": 4.5255111111111115e-05, + "loss": 2.2129, + "step": 8010 + }, + { + "epoch": 1.42, + "learning_rate": 4.525214814814815e-05, + "loss": 2.2575, + "step": 8015 + }, + { + "epoch": 1.43, + "learning_rate": 4.5249185185185186e-05, + "loss": 2.2243, + "step": 8020 + }, + { + "epoch": 1.43, + "learning_rate": 4.5246222222222225e-05, + "loss": 2.2518, + "step": 8025 + }, + { + "epoch": 1.43, + "learning_rate": 4.5243259259259264e-05, + "loss": 2.3025, + "step": 8030 + }, + { + "epoch": 1.43, + "learning_rate": 4.5240296296296296e-05, + "loss": 2.2745, + "step": 8035 + }, + { + "epoch": 1.43, + "learning_rate": 4.5237333333333335e-05, + "loss": 2.1712, + "step": 8040 + }, + { + "epoch": 1.43, + "learning_rate": 4.5234370370370374e-05, + "loss": 2.2009, + "step": 8045 + }, + { + "epoch": 1.43, + "learning_rate": 4.523140740740741e-05, + "loss": 2.2782, + "step": 8050 + }, + { + "epoch": 1.43, + "learning_rate": 4.5228444444444444e-05, + "loss": 2.17, + "step": 8055 + }, + { + "epoch": 1.43, + "learning_rate": 4.522548148148148e-05, + "loss": 2.2484, + "step": 8060 + }, + { + "epoch": 1.43, + "learning_rate": 4.522251851851852e-05, + "loss": 2.1069, + "step": 8065 + }, + { + "epoch": 1.43, + "learning_rate": 4.521955555555556e-05, + "loss": 2.029, + "step": 8070 + }, + { + "epoch": 1.44, + "learning_rate": 4.521659259259259e-05, + "loss": 2.1971, + "step": 8075 + }, + { + "epoch": 1.44, + "learning_rate": 4.521362962962963e-05, + "loss": 2.2024, + "step": 8080 + }, + { + "epoch": 1.44, + "learning_rate": 4.521066666666667e-05, + "loss": 2.2824, + "step": 8085 + }, + { + "epoch": 1.44, + "learning_rate": 4.520770370370371e-05, + "loss": 2.2766, + "step": 8090 + }, + { + "epoch": 1.44, + "learning_rate": 4.520474074074074e-05, + "loss": 2.2885, + "step": 8095 + }, + { + "epoch": 1.44, + "learning_rate": 4.520177777777778e-05, + "loss": 2.2258, + "step": 8100 + }, + { + "epoch": 1.44, + "learning_rate": 4.519881481481482e-05, + "loss": 2.1843, + "step": 8105 + }, + { + "epoch": 1.44, + "learning_rate": 4.519585185185186e-05, + "loss": 2.262, + "step": 8110 + }, + { + "epoch": 1.44, + "learning_rate": 4.519288888888889e-05, + "loss": 2.3421, + "step": 8115 + }, + { + "epoch": 1.44, + "learning_rate": 4.518992592592593e-05, + "loss": 2.2514, + "step": 8120 + }, + { + "epoch": 1.44, + "learning_rate": 4.518696296296296e-05, + "loss": 2.2268, + "step": 8125 + }, + { + "epoch": 1.45, + "learning_rate": 4.5184000000000006e-05, + "loss": 2.2646, + "step": 8130 + }, + { + "epoch": 1.45, + "learning_rate": 4.518103703703704e-05, + "loss": 2.3616, + "step": 8135 + }, + { + "epoch": 1.45, + "learning_rate": 4.517807407407408e-05, + "loss": 2.234, + "step": 8140 + }, + { + "epoch": 1.45, + "learning_rate": 4.517511111111111e-05, + "loss": 2.2045, + "step": 8145 + }, + { + "epoch": 1.45, + "learning_rate": 4.5172148148148155e-05, + "loss": 2.2334, + "step": 8150 + }, + { + "epoch": 1.45, + "learning_rate": 4.516918518518519e-05, + "loss": 2.2639, + "step": 8155 + }, + { + "epoch": 1.45, + "learning_rate": 4.5166222222222226e-05, + "loss": 2.2197, + "step": 8160 + }, + { + "epoch": 1.45, + "learning_rate": 4.516325925925926e-05, + "loss": 2.2616, + "step": 8165 + }, + { + "epoch": 1.45, + "learning_rate": 4.5160296296296304e-05, + "loss": 2.3805, + "step": 8170 + }, + { + "epoch": 1.45, + "learning_rate": 4.5157333333333336e-05, + "loss": 2.2324, + "step": 8175 + }, + { + "epoch": 1.45, + "learning_rate": 4.5154370370370374e-05, + "loss": 2.3746, + "step": 8180 + }, + { + "epoch": 1.46, + "learning_rate": 4.5151407407407406e-05, + "loss": 2.2441, + "step": 8185 + }, + { + "epoch": 1.46, + "learning_rate": 4.514844444444445e-05, + "loss": 2.0798, + "step": 8190 + }, + { + "epoch": 1.46, + "learning_rate": 4.5145481481481484e-05, + "loss": 2.2471, + "step": 8195 + }, + { + "epoch": 1.46, + "learning_rate": 4.514251851851852e-05, + "loss": 2.3012, + "step": 8200 + }, + { + "epoch": 1.46, + "learning_rate": 4.5139555555555555e-05, + "loss": 2.2261, + "step": 8205 + }, + { + "epoch": 1.46, + "learning_rate": 4.5136592592592594e-05, + "loss": 2.2868, + "step": 8210 + }, + { + "epoch": 1.46, + "learning_rate": 4.513362962962963e-05, + "loss": 2.1971, + "step": 8215 + }, + { + "epoch": 1.46, + "learning_rate": 4.513066666666667e-05, + "loss": 2.2844, + "step": 8220 + }, + { + "epoch": 1.46, + "learning_rate": 4.5127703703703703e-05, + "loss": 2.3089, + "step": 8225 + }, + { + "epoch": 1.46, + "learning_rate": 4.512474074074074e-05, + "loss": 2.265, + "step": 8230 + }, + { + "epoch": 1.46, + "learning_rate": 4.512177777777778e-05, + "loss": 2.3346, + "step": 8235 + }, + { + "epoch": 1.46, + "learning_rate": 4.511881481481482e-05, + "loss": 2.1878, + "step": 8240 + }, + { + "epoch": 1.47, + "learning_rate": 4.511585185185185e-05, + "loss": 2.2268, + "step": 8245 + }, + { + "epoch": 1.47, + "learning_rate": 4.511288888888889e-05, + "loss": 2.1943, + "step": 8250 + }, + { + "epoch": 1.47, + "learning_rate": 4.510992592592593e-05, + "loss": 2.2257, + "step": 8255 + }, + { + "epoch": 1.47, + "learning_rate": 4.510696296296297e-05, + "loss": 2.3799, + "step": 8260 + }, + { + "epoch": 1.47, + "learning_rate": 4.5104e-05, + "loss": 2.3179, + "step": 8265 + }, + { + "epoch": 1.47, + "learning_rate": 4.510103703703704e-05, + "loss": 2.2771, + "step": 8270 + }, + { + "epoch": 1.47, + "learning_rate": 4.509807407407408e-05, + "loss": 2.279, + "step": 8275 + }, + { + "epoch": 1.47, + "learning_rate": 4.509511111111112e-05, + "loss": 2.3063, + "step": 8280 + }, + { + "epoch": 1.47, + "learning_rate": 4.509214814814815e-05, + "loss": 2.3726, + "step": 8285 + }, + { + "epoch": 1.47, + "learning_rate": 4.508918518518519e-05, + "loss": 2.2917, + "step": 8290 + }, + { + "epoch": 1.47, + "learning_rate": 4.508622222222223e-05, + "loss": 2.3271, + "step": 8295 + }, + { + "epoch": 1.48, + "learning_rate": 4.5083259259259265e-05, + "loss": 2.2034, + "step": 8300 + }, + { + "epoch": 1.48, + "learning_rate": 4.50802962962963e-05, + "loss": 2.2116, + "step": 8305 + }, + { + "epoch": 1.48, + "learning_rate": 4.5077333333333336e-05, + "loss": 2.2149, + "step": 8310 + }, + { + "epoch": 1.48, + "learning_rate": 4.5074370370370375e-05, + "loss": 2.2908, + "step": 8315 + }, + { + "epoch": 1.48, + "learning_rate": 4.5071407407407414e-05, + "loss": 2.171, + "step": 8320 + }, + { + "epoch": 1.48, + "learning_rate": 4.5068444444444446e-05, + "loss": 2.2796, + "step": 8325 + }, + { + "epoch": 1.48, + "learning_rate": 4.5065481481481485e-05, + "loss": 2.1624, + "step": 8330 + }, + { + "epoch": 1.48, + "learning_rate": 4.5062518518518524e-05, + "loss": 2.3399, + "step": 8335 + }, + { + "epoch": 1.48, + "learning_rate": 4.505955555555556e-05, + "loss": 2.1823, + "step": 8340 + }, + { + "epoch": 1.48, + "learning_rate": 4.5056592592592595e-05, + "loss": 2.2171, + "step": 8345 + }, + { + "epoch": 1.48, + "learning_rate": 4.5054222222222224e-05, + "loss": 2.3208, + "step": 8350 + }, + { + "epoch": 1.49, + "learning_rate": 4.505125925925926e-05, + "loss": 2.2498, + "step": 8355 + }, + { + "epoch": 1.49, + "learning_rate": 4.50482962962963e-05, + "loss": 2.3605, + "step": 8360 + }, + { + "epoch": 1.49, + "learning_rate": 4.5045333333333334e-05, + "loss": 2.2842, + "step": 8365 + }, + { + "epoch": 1.49, + "learning_rate": 4.504237037037037e-05, + "loss": 2.0507, + "step": 8370 + }, + { + "epoch": 1.49, + "learning_rate": 4.5039407407407405e-05, + "loss": 2.4088, + "step": 8375 + }, + { + "epoch": 1.49, + "learning_rate": 4.503644444444445e-05, + "loss": 2.1591, + "step": 8380 + }, + { + "epoch": 1.49, + "learning_rate": 4.503348148148148e-05, + "loss": 2.4022, + "step": 8385 + }, + { + "epoch": 1.49, + "learning_rate": 4.503051851851852e-05, + "loss": 2.3104, + "step": 8390 + }, + { + "epoch": 1.49, + "learning_rate": 4.502755555555555e-05, + "loss": 2.1783, + "step": 8395 + }, + { + "epoch": 1.49, + "learning_rate": 4.50245925925926e-05, + "loss": 2.2255, + "step": 8400 + }, + { + "epoch": 1.49, + "learning_rate": 4.502162962962963e-05, + "loss": 2.3139, + "step": 8405 + }, + { + "epoch": 1.5, + "learning_rate": 4.501866666666667e-05, + "loss": 2.3446, + "step": 8410 + }, + { + "epoch": 1.5, + "learning_rate": 4.50157037037037e-05, + "loss": 2.2208, + "step": 8415 + }, + { + "epoch": 1.5, + "learning_rate": 4.501274074074075e-05, + "loss": 2.2788, + "step": 8420 + }, + { + "epoch": 1.5, + "learning_rate": 4.500977777777778e-05, + "loss": 2.2625, + "step": 8425 + }, + { + "epoch": 1.5, + "learning_rate": 4.500681481481482e-05, + "loss": 2.2027, + "step": 8430 + }, + { + "epoch": 1.5, + "learning_rate": 4.500385185185185e-05, + "loss": 2.2051, + "step": 8435 + }, + { + "epoch": 1.5, + "learning_rate": 4.500088888888889e-05, + "loss": 2.251, + "step": 8440 + }, + { + "epoch": 1.5, + "learning_rate": 4.499792592592593e-05, + "loss": 2.2352, + "step": 8445 + }, + { + "epoch": 1.5, + "learning_rate": 4.499496296296297e-05, + "loss": 2.1112, + "step": 8450 + }, + { + "epoch": 1.5, + "learning_rate": 4.4992e-05, + "loss": 2.3001, + "step": 8455 + }, + { + "epoch": 1.5, + "learning_rate": 4.498903703703704e-05, + "loss": 2.3309, + "step": 8460 + }, + { + "epoch": 1.5, + "learning_rate": 4.4986074074074076e-05, + "loss": 2.4054, + "step": 8465 + }, + { + "epoch": 1.51, + "learning_rate": 4.4983111111111115e-05, + "loss": 2.361, + "step": 8470 + }, + { + "epoch": 1.51, + "learning_rate": 4.498014814814815e-05, + "loss": 2.3468, + "step": 8475 + }, + { + "epoch": 1.51, + "learning_rate": 4.4977185185185186e-05, + "loss": 2.1889, + "step": 8480 + }, + { + "epoch": 1.51, + "learning_rate": 4.4974222222222225e-05, + "loss": 2.2536, + "step": 8485 + }, + { + "epoch": 1.51, + "learning_rate": 4.4971259259259264e-05, + "loss": 2.2048, + "step": 8490 + }, + { + "epoch": 1.51, + "learning_rate": 4.4968296296296296e-05, + "loss": 2.1627, + "step": 8495 + }, + { + "epoch": 1.51, + "learning_rate": 4.4965333333333335e-05, + "loss": 2.1465, + "step": 8500 + }, + { + "epoch": 1.51, + "learning_rate": 4.4962370370370374e-05, + "loss": 2.2, + "step": 8505 + }, + { + "epoch": 1.51, + "learning_rate": 4.495940740740741e-05, + "loss": 2.2766, + "step": 8510 + }, + { + "epoch": 1.51, + "learning_rate": 4.4956444444444444e-05, + "loss": 2.2126, + "step": 8515 + }, + { + "epoch": 1.51, + "learning_rate": 4.495348148148148e-05, + "loss": 2.2234, + "step": 8520 + }, + { + "epoch": 1.52, + "learning_rate": 4.495051851851852e-05, + "loss": 2.1466, + "step": 8525 + }, + { + "epoch": 1.52, + "learning_rate": 4.494755555555556e-05, + "loss": 2.2597, + "step": 8530 + }, + { + "epoch": 1.52, + "learning_rate": 4.494459259259259e-05, + "loss": 2.1913, + "step": 8535 + }, + { + "epoch": 1.52, + "learning_rate": 4.494162962962963e-05, + "loss": 2.344, + "step": 8540 + }, + { + "epoch": 1.52, + "learning_rate": 4.493866666666667e-05, + "loss": 2.3772, + "step": 8545 + }, + { + "epoch": 1.52, + "learning_rate": 4.493570370370371e-05, + "loss": 2.2795, + "step": 8550 + }, + { + "epoch": 1.52, + "learning_rate": 4.493274074074074e-05, + "loss": 2.285, + "step": 8555 + }, + { + "epoch": 1.52, + "learning_rate": 4.492977777777778e-05, + "loss": 2.1795, + "step": 8560 + }, + { + "epoch": 1.52, + "learning_rate": 4.492681481481482e-05, + "loss": 2.2825, + "step": 8565 + }, + { + "epoch": 1.52, + "learning_rate": 4.492385185185186e-05, + "loss": 2.2854, + "step": 8570 + }, + { + "epoch": 1.52, + "learning_rate": 4.492088888888889e-05, + "loss": 2.1471, + "step": 8575 + }, + { + "epoch": 1.53, + "learning_rate": 4.491792592592593e-05, + "loss": 2.1668, + "step": 8580 + }, + { + "epoch": 1.53, + "learning_rate": 4.491496296296297e-05, + "loss": 2.1977, + "step": 8585 + }, + { + "epoch": 1.53, + "learning_rate": 4.4912000000000006e-05, + "loss": 2.2191, + "step": 8590 + }, + { + "epoch": 1.53, + "learning_rate": 4.490903703703704e-05, + "loss": 2.2372, + "step": 8595 + }, + { + "epoch": 1.53, + "learning_rate": 4.490607407407408e-05, + "loss": 2.3301, + "step": 8600 + }, + { + "epoch": 1.53, + "learning_rate": 4.490311111111111e-05, + "loss": 2.1774, + "step": 8605 + }, + { + "epoch": 1.53, + "learning_rate": 4.4900148148148155e-05, + "loss": 2.3402, + "step": 8610 + }, + { + "epoch": 1.53, + "learning_rate": 4.489718518518519e-05, + "loss": 2.0965, + "step": 8615 + }, + { + "epoch": 1.53, + "learning_rate": 4.4894222222222226e-05, + "loss": 2.2759, + "step": 8620 + }, + { + "epoch": 1.53, + "learning_rate": 4.489125925925926e-05, + "loss": 2.1717, + "step": 8625 + }, + { + "epoch": 1.53, + "learning_rate": 4.4888296296296303e-05, + "loss": 2.3114, + "step": 8630 + }, + { + "epoch": 1.54, + "learning_rate": 4.4885333333333335e-05, + "loss": 2.0952, + "step": 8635 + }, + { + "epoch": 1.54, + "learning_rate": 4.4882370370370374e-05, + "loss": 2.2115, + "step": 8640 + }, + { + "epoch": 1.54, + "learning_rate": 4.4879407407407406e-05, + "loss": 2.3288, + "step": 8645 + }, + { + "epoch": 1.54, + "learning_rate": 4.487644444444445e-05, + "loss": 2.0221, + "step": 8650 + }, + { + "epoch": 1.54, + "learning_rate": 4.4873481481481484e-05, + "loss": 2.1448, + "step": 8655 + }, + { + "epoch": 1.54, + "learning_rate": 4.487051851851852e-05, + "loss": 2.29, + "step": 8660 + }, + { + "epoch": 1.54, + "learning_rate": 4.4867555555555555e-05, + "loss": 2.24, + "step": 8665 + }, + { + "epoch": 1.54, + "learning_rate": 4.4864592592592594e-05, + "loss": 2.1958, + "step": 8670 + }, + { + "epoch": 1.54, + "learning_rate": 4.486162962962963e-05, + "loss": 2.2127, + "step": 8675 + }, + { + "epoch": 1.54, + "learning_rate": 4.485866666666667e-05, + "loss": 2.2432, + "step": 8680 + }, + { + "epoch": 1.54, + "learning_rate": 4.48557037037037e-05, + "loss": 2.2858, + "step": 8685 + }, + { + "epoch": 1.54, + "learning_rate": 4.485274074074074e-05, + "loss": 2.2226, + "step": 8690 + }, + { + "epoch": 1.55, + "learning_rate": 4.484977777777778e-05, + "loss": 2.1858, + "step": 8695 + }, + { + "epoch": 1.55, + "learning_rate": 4.484681481481482e-05, + "loss": 2.3423, + "step": 8700 + }, + { + "epoch": 1.55, + "learning_rate": 4.484385185185185e-05, + "loss": 2.2348, + "step": 8705 + }, + { + "epoch": 1.55, + "learning_rate": 4.484088888888889e-05, + "loss": 2.1366, + "step": 8710 + }, + { + "epoch": 1.55, + "learning_rate": 4.483792592592593e-05, + "loss": 2.2243, + "step": 8715 + }, + { + "epoch": 1.55, + "learning_rate": 4.483496296296297e-05, + "loss": 2.3124, + "step": 8720 + }, + { + "epoch": 1.55, + "learning_rate": 4.4832e-05, + "loss": 2.1294, + "step": 8725 + }, + { + "epoch": 1.55, + "learning_rate": 4.482903703703704e-05, + "loss": 2.2274, + "step": 8730 + }, + { + "epoch": 1.55, + "learning_rate": 4.482607407407408e-05, + "loss": 2.1691, + "step": 8735 + }, + { + "epoch": 1.55, + "learning_rate": 4.482311111111112e-05, + "loss": 2.2492, + "step": 8740 + }, + { + "epoch": 1.55, + "learning_rate": 4.482014814814815e-05, + "loss": 2.161, + "step": 8745 + }, + { + "epoch": 1.56, + "learning_rate": 4.481718518518519e-05, + "loss": 2.2844, + "step": 8750 + }, + { + "epoch": 1.56, + "learning_rate": 4.4814222222222227e-05, + "loss": 2.2789, + "step": 8755 + }, + { + "epoch": 1.56, + "learning_rate": 4.4811259259259265e-05, + "loss": 2.3224, + "step": 8760 + }, + { + "epoch": 1.56, + "learning_rate": 4.48082962962963e-05, + "loss": 2.2029, + "step": 8765 + }, + { + "epoch": 1.56, + "learning_rate": 4.4805333333333336e-05, + "loss": 2.2366, + "step": 8770 + }, + { + "epoch": 1.56, + "learning_rate": 4.4802370370370375e-05, + "loss": 2.1433, + "step": 8775 + }, + { + "epoch": 1.56, + "learning_rate": 4.4799407407407414e-05, + "loss": 2.3118, + "step": 8780 + }, + { + "epoch": 1.56, + "learning_rate": 4.4796444444444446e-05, + "loss": 2.3444, + "step": 8785 + }, + { + "epoch": 1.56, + "learning_rate": 4.4793481481481485e-05, + "loss": 2.1308, + "step": 8790 + }, + { + "epoch": 1.56, + "learning_rate": 4.4790518518518524e-05, + "loss": 2.327, + "step": 8795 + }, + { + "epoch": 1.56, + "learning_rate": 4.478755555555556e-05, + "loss": 2.1839, + "step": 8800 + }, + { + "epoch": 1.57, + "learning_rate": 4.4784592592592594e-05, + "loss": 2.2696, + "step": 8805 + }, + { + "epoch": 1.57, + "learning_rate": 4.478162962962963e-05, + "loss": 2.0645, + "step": 8810 + }, + { + "epoch": 1.57, + "learning_rate": 4.477866666666667e-05, + "loss": 2.1307, + "step": 8815 + }, + { + "epoch": 1.57, + "learning_rate": 4.477570370370371e-05, + "loss": 2.1694, + "step": 8820 + }, + { + "epoch": 1.57, + "learning_rate": 4.477274074074074e-05, + "loss": 2.1367, + "step": 8825 + }, + { + "epoch": 1.57, + "learning_rate": 4.4769777777777775e-05, + "loss": 2.3855, + "step": 8830 + }, + { + "epoch": 1.57, + "learning_rate": 4.4766814814814814e-05, + "loss": 2.2773, + "step": 8835 + }, + { + "epoch": 1.57, + "learning_rate": 4.476385185185185e-05, + "loss": 2.1735, + "step": 8840 + }, + { + "epoch": 1.57, + "learning_rate": 4.476088888888889e-05, + "loss": 2.3049, + "step": 8845 + }, + { + "epoch": 1.57, + "learning_rate": 4.4757925925925923e-05, + "loss": 2.2075, + "step": 8850 + }, + { + "epoch": 1.57, + "learning_rate": 4.475496296296296e-05, + "loss": 2.0926, + "step": 8855 + }, + { + "epoch": 1.58, + "learning_rate": 4.4752e-05, + "loss": 2.241, + "step": 8860 + }, + { + "epoch": 1.58, + "learning_rate": 4.474903703703704e-05, + "loss": 2.2096, + "step": 8865 + }, + { + "epoch": 1.58, + "learning_rate": 4.474607407407407e-05, + "loss": 2.0937, + "step": 8870 + }, + { + "epoch": 1.58, + "learning_rate": 4.474311111111111e-05, + "loss": 2.0851, + "step": 8875 + }, + { + "epoch": 1.58, + "learning_rate": 4.474014814814815e-05, + "loss": 2.2944, + "step": 8880 + }, + { + "epoch": 1.58, + "learning_rate": 4.473718518518519e-05, + "loss": 2.312, + "step": 8885 + }, + { + "epoch": 1.58, + "learning_rate": 4.473422222222222e-05, + "loss": 2.3338, + "step": 8890 + }, + { + "epoch": 1.58, + "learning_rate": 4.473125925925926e-05, + "loss": 2.2477, + "step": 8895 + }, + { + "epoch": 1.58, + "learning_rate": 4.47282962962963e-05, + "loss": 2.227, + "step": 8900 + }, + { + "epoch": 1.58, + "learning_rate": 4.472533333333334e-05, + "loss": 2.1642, + "step": 8905 + }, + { + "epoch": 1.58, + "learning_rate": 4.472237037037037e-05, + "loss": 2.1958, + "step": 8910 + }, + { + "epoch": 1.58, + "learning_rate": 4.471940740740741e-05, + "loss": 2.0787, + "step": 8915 + }, + { + "epoch": 1.59, + "learning_rate": 4.471644444444445e-05, + "loss": 2.1999, + "step": 8920 + }, + { + "epoch": 1.59, + "learning_rate": 4.4713481481481486e-05, + "loss": 2.1504, + "step": 8925 + }, + { + "epoch": 1.59, + "learning_rate": 4.471051851851852e-05, + "loss": 2.2471, + "step": 8930 + }, + { + "epoch": 1.59, + "learning_rate": 4.4707555555555556e-05, + "loss": 2.2049, + "step": 8935 + }, + { + "epoch": 1.59, + "learning_rate": 4.4704592592592595e-05, + "loss": 2.3103, + "step": 8940 + }, + { + "epoch": 1.59, + "learning_rate": 4.4701629629629634e-05, + "loss": 2.2926, + "step": 8945 + }, + { + "epoch": 1.59, + "learning_rate": 4.4698666666666666e-05, + "loss": 2.2791, + "step": 8950 + }, + { + "epoch": 1.59, + "learning_rate": 4.4695703703703705e-05, + "loss": 2.2122, + "step": 8955 + }, + { + "epoch": 1.59, + "learning_rate": 4.4692740740740744e-05, + "loss": 2.0151, + "step": 8960 + }, + { + "epoch": 1.59, + "learning_rate": 4.468977777777778e-05, + "loss": 2.2277, + "step": 8965 + }, + { + "epoch": 1.59, + "learning_rate": 4.4686814814814815e-05, + "loss": 2.1302, + "step": 8970 + }, + { + "epoch": 1.6, + "learning_rate": 4.4683851851851853e-05, + "loss": 2.2825, + "step": 8975 + }, + { + "epoch": 1.6, + "learning_rate": 4.4680888888888885e-05, + "loss": 2.243, + "step": 8980 + }, + { + "epoch": 1.6, + "learning_rate": 4.467792592592593e-05, + "loss": 2.2525, + "step": 8985 + }, + { + "epoch": 1.6, + "learning_rate": 4.467496296296296e-05, + "loss": 2.1146, + "step": 8990 + }, + { + "epoch": 1.6, + "learning_rate": 4.4672e-05, + "loss": 2.1477, + "step": 8995 + }, + { + "epoch": 1.6, + "learning_rate": 4.4669037037037034e-05, + "loss": 2.1913, + "step": 9000 + }, + { + "epoch": 1.6, + "learning_rate": 4.466607407407408e-05, + "loss": 2.3038, + "step": 9005 + }, + { + "epoch": 1.6, + "learning_rate": 4.466311111111111e-05, + "loss": 2.3616, + "step": 9010 + }, + { + "epoch": 1.6, + "learning_rate": 4.466014814814815e-05, + "loss": 2.2636, + "step": 9015 + }, + { + "epoch": 1.6, + "learning_rate": 4.465718518518518e-05, + "loss": 2.1322, + "step": 9020 + }, + { + "epoch": 1.6, + "learning_rate": 4.465422222222223e-05, + "loss": 2.2828, + "step": 9025 + }, + { + "epoch": 1.61, + "learning_rate": 4.465125925925926e-05, + "loss": 2.2577, + "step": 9030 + }, + { + "epoch": 1.61, + "learning_rate": 4.46482962962963e-05, + "loss": 2.2734, + "step": 9035 + }, + { + "epoch": 1.61, + "learning_rate": 4.464533333333333e-05, + "loss": 2.0607, + "step": 9040 + }, + { + "epoch": 1.61, + "learning_rate": 4.4642370370370377e-05, + "loss": 2.242, + "step": 9045 + }, + { + "epoch": 1.61, + "learning_rate": 4.463940740740741e-05, + "loss": 2.3204, + "step": 9050 + }, + { + "epoch": 1.61, + "learning_rate": 4.463644444444445e-05, + "loss": 2.3941, + "step": 9055 + }, + { + "epoch": 1.61, + "learning_rate": 4.463348148148148e-05, + "loss": 2.2281, + "step": 9060 + }, + { + "epoch": 1.61, + "learning_rate": 4.463051851851852e-05, + "loss": 2.2483, + "step": 9065 + }, + { + "epoch": 1.61, + "learning_rate": 4.462755555555556e-05, + "loss": 2.1758, + "step": 9070 + }, + { + "epoch": 1.61, + "learning_rate": 4.4624592592592596e-05, + "loss": 2.1309, + "step": 9075 + }, + { + "epoch": 1.61, + "learning_rate": 4.462162962962963e-05, + "loss": 2.2749, + "step": 9080 + }, + { + "epoch": 1.62, + "learning_rate": 4.461866666666667e-05, + "loss": 2.3262, + "step": 9085 + }, + { + "epoch": 1.62, + "learning_rate": 4.4615703703703706e-05, + "loss": 2.3108, + "step": 9090 + }, + { + "epoch": 1.62, + "learning_rate": 4.4612740740740744e-05, + "loss": 2.3365, + "step": 9095 + }, + { + "epoch": 1.62, + "learning_rate": 4.4609777777777777e-05, + "loss": 2.2091, + "step": 9100 + }, + { + "epoch": 1.62, + "learning_rate": 4.4606814814814815e-05, + "loss": 2.2945, + "step": 9105 + }, + { + "epoch": 1.62, + "learning_rate": 4.4603851851851854e-05, + "loss": 2.2508, + "step": 9110 + }, + { + "epoch": 1.62, + "learning_rate": 4.460088888888889e-05, + "loss": 2.3548, + "step": 9115 + }, + { + "epoch": 1.62, + "learning_rate": 4.4597925925925925e-05, + "loss": 2.1927, + "step": 9120 + }, + { + "epoch": 1.62, + "learning_rate": 4.4594962962962964e-05, + "loss": 2.0419, + "step": 9125 + }, + { + "epoch": 1.62, + "learning_rate": 4.4592e-05, + "loss": 2.2701, + "step": 9130 + }, + { + "epoch": 1.62, + "learning_rate": 4.458903703703704e-05, + "loss": 2.3501, + "step": 9135 + }, + { + "epoch": 1.62, + "learning_rate": 4.4586074074074074e-05, + "loss": 2.1802, + "step": 9140 + }, + { + "epoch": 1.63, + "learning_rate": 4.458311111111111e-05, + "loss": 2.2476, + "step": 9145 + }, + { + "epoch": 1.63, + "learning_rate": 4.458014814814815e-05, + "loss": 2.2158, + "step": 9150 + }, + { + "epoch": 1.63, + "learning_rate": 4.457718518518519e-05, + "loss": 2.3661, + "step": 9155 + }, + { + "epoch": 1.63, + "learning_rate": 4.457422222222222e-05, + "loss": 2.2258, + "step": 9160 + }, + { + "epoch": 1.63, + "learning_rate": 4.457125925925926e-05, + "loss": 2.2263, + "step": 9165 + }, + { + "epoch": 1.63, + "learning_rate": 4.45682962962963e-05, + "loss": 2.274, + "step": 9170 + }, + { + "epoch": 1.63, + "learning_rate": 4.456533333333334e-05, + "loss": 2.293, + "step": 9175 + }, + { + "epoch": 1.63, + "learning_rate": 4.456237037037037e-05, + "loss": 2.272, + "step": 9180 + }, + { + "epoch": 1.63, + "learning_rate": 4.455940740740741e-05, + "loss": 2.1729, + "step": 9185 + }, + { + "epoch": 1.63, + "learning_rate": 4.455644444444445e-05, + "loss": 2.2191, + "step": 9190 + }, + { + "epoch": 1.63, + "learning_rate": 4.455348148148149e-05, + "loss": 2.1812, + "step": 9195 + }, + { + "epoch": 1.64, + "learning_rate": 4.455051851851852e-05, + "loss": 2.1767, + "step": 9200 + }, + { + "epoch": 1.64, + "learning_rate": 4.454755555555556e-05, + "loss": 2.358, + "step": 9205 + }, + { + "epoch": 1.64, + "learning_rate": 4.454459259259259e-05, + "loss": 2.2129, + "step": 9210 + }, + { + "epoch": 1.64, + "learning_rate": 4.4541629629629636e-05, + "loss": 2.2722, + "step": 9215 + }, + { + "epoch": 1.64, + "learning_rate": 4.453866666666667e-05, + "loss": 2.305, + "step": 9220 + }, + { + "epoch": 1.64, + "learning_rate": 4.4535703703703706e-05, + "loss": 2.1439, + "step": 9225 + }, + { + "epoch": 1.64, + "learning_rate": 4.453274074074074e-05, + "loss": 2.1352, + "step": 9230 + }, + { + "epoch": 1.64, + "learning_rate": 4.4529777777777784e-05, + "loss": 2.2203, + "step": 9235 + }, + { + "epoch": 1.64, + "learning_rate": 4.4526814814814816e-05, + "loss": 2.3053, + "step": 9240 + }, + { + "epoch": 1.64, + "learning_rate": 4.4523851851851855e-05, + "loss": 2.1684, + "step": 9245 + }, + { + "epoch": 1.64, + "learning_rate": 4.452088888888889e-05, + "loss": 2.1803, + "step": 9250 + }, + { + "epoch": 1.65, + "learning_rate": 4.451792592592593e-05, + "loss": 2.1994, + "step": 9255 + }, + { + "epoch": 1.65, + "learning_rate": 4.4514962962962965e-05, + "loss": 2.1723, + "step": 9260 + }, + { + "epoch": 1.65, + "learning_rate": 4.4512000000000003e-05, + "loss": 2.291, + "step": 9265 + }, + { + "epoch": 1.65, + "learning_rate": 4.4509037037037035e-05, + "loss": 2.2842, + "step": 9270 + }, + { + "epoch": 1.65, + "learning_rate": 4.450607407407408e-05, + "loss": 2.2466, + "step": 9275 + }, + { + "epoch": 1.65, + "learning_rate": 4.450311111111111e-05, + "loss": 2.1951, + "step": 9280 + }, + { + "epoch": 1.65, + "learning_rate": 4.450014814814815e-05, + "loss": 2.2368, + "step": 9285 + }, + { + "epoch": 1.65, + "learning_rate": 4.4497185185185184e-05, + "loss": 2.2186, + "step": 9290 + }, + { + "epoch": 1.65, + "learning_rate": 4.449422222222222e-05, + "loss": 2.2374, + "step": 9295 + }, + { + "epoch": 1.65, + "learning_rate": 4.449125925925926e-05, + "loss": 2.1349, + "step": 9300 + }, + { + "epoch": 1.65, + "learning_rate": 4.44882962962963e-05, + "loss": 2.1731, + "step": 9305 + }, + { + "epoch": 1.66, + "learning_rate": 4.448533333333333e-05, + "loss": 2.1091, + "step": 9310 + }, + { + "epoch": 1.66, + "learning_rate": 4.448237037037037e-05, + "loss": 2.2432, + "step": 9315 + }, + { + "epoch": 1.66, + "learning_rate": 4.447940740740741e-05, + "loss": 2.1939, + "step": 9320 + }, + { + "epoch": 1.66, + "learning_rate": 4.447644444444445e-05, + "loss": 2.2088, + "step": 9325 + }, + { + "epoch": 1.66, + "learning_rate": 4.447348148148148e-05, + "loss": 2.2673, + "step": 9330 + }, + { + "epoch": 1.66, + "learning_rate": 4.447051851851852e-05, + "loss": 2.1189, + "step": 9335 + }, + { + "epoch": 1.66, + "learning_rate": 4.446755555555556e-05, + "loss": 2.2517, + "step": 9340 + }, + { + "epoch": 1.66, + "learning_rate": 4.44645925925926e-05, + "loss": 2.3645, + "step": 9345 + }, + { + "epoch": 1.66, + "learning_rate": 4.446162962962963e-05, + "loss": 2.2288, + "step": 9350 + }, + { + "epoch": 1.66, + "learning_rate": 4.445866666666667e-05, + "loss": 2.3341, + "step": 9355 + }, + { + "epoch": 1.66, + "learning_rate": 4.445570370370371e-05, + "loss": 2.3481, + "step": 9360 + }, + { + "epoch": 1.66, + "learning_rate": 4.4452740740740746e-05, + "loss": 2.2295, + "step": 9365 + }, + { + "epoch": 1.67, + "learning_rate": 4.444977777777778e-05, + "loss": 2.1133, + "step": 9370 + }, + { + "epoch": 1.67, + "learning_rate": 4.444681481481482e-05, + "loss": 2.0986, + "step": 9375 + }, + { + "epoch": 1.67, + "learning_rate": 4.4443851851851856e-05, + "loss": 2.1724, + "step": 9380 + }, + { + "epoch": 1.67, + "learning_rate": 4.4440888888888895e-05, + "loss": 2.1093, + "step": 9385 + }, + { + "epoch": 1.67, + "learning_rate": 4.4437925925925927e-05, + "loss": 2.2069, + "step": 9390 + }, + { + "epoch": 1.67, + "learning_rate": 4.4434962962962965e-05, + "loss": 2.2396, + "step": 9395 + }, + { + "epoch": 1.67, + "learning_rate": 4.4432000000000004e-05, + "loss": 2.1892, + "step": 9400 + }, + { + "epoch": 1.67, + "learning_rate": 4.442903703703704e-05, + "loss": 2.2442, + "step": 9405 + }, + { + "epoch": 1.67, + "learning_rate": 4.4426074074074075e-05, + "loss": 2.1985, + "step": 9410 + }, + { + "epoch": 1.67, + "learning_rate": 4.4423111111111114e-05, + "loss": 2.2573, + "step": 9415 + }, + { + "epoch": 1.67, + "learning_rate": 4.442014814814815e-05, + "loss": 2.2509, + "step": 9420 + }, + { + "epoch": 1.68, + "learning_rate": 4.441718518518519e-05, + "loss": 2.3678, + "step": 9425 + }, + { + "epoch": 1.68, + "learning_rate": 4.4414222222222224e-05, + "loss": 2.2194, + "step": 9430 + }, + { + "epoch": 1.68, + "learning_rate": 4.441125925925926e-05, + "loss": 2.1909, + "step": 9435 + }, + { + "epoch": 1.68, + "learning_rate": 4.4408296296296294e-05, + "loss": 2.3339, + "step": 9440 + }, + { + "epoch": 1.68, + "learning_rate": 4.440533333333334e-05, + "loss": 2.1494, + "step": 9445 + }, + { + "epoch": 1.68, + "learning_rate": 4.440237037037037e-05, + "loss": 2.2311, + "step": 9450 + }, + { + "epoch": 1.68, + "learning_rate": 4.439940740740741e-05, + "loss": 2.0915, + "step": 9455 + }, + { + "epoch": 1.68, + "learning_rate": 4.439644444444444e-05, + "loss": 2.2459, + "step": 9460 + }, + { + "epoch": 1.68, + "learning_rate": 4.439348148148149e-05, + "loss": 2.2793, + "step": 9465 + }, + { + "epoch": 1.68, + "learning_rate": 4.439051851851852e-05, + "loss": 2.2388, + "step": 9470 + }, + { + "epoch": 1.68, + "learning_rate": 4.438755555555556e-05, + "loss": 2.2024, + "step": 9475 + }, + { + "epoch": 1.69, + "learning_rate": 4.438459259259259e-05, + "loss": 2.3477, + "step": 9480 + }, + { + "epoch": 1.69, + "learning_rate": 4.438162962962964e-05, + "loss": 2.1572, + "step": 9485 + }, + { + "epoch": 1.69, + "learning_rate": 4.437866666666667e-05, + "loss": 2.2055, + "step": 9490 + }, + { + "epoch": 1.69, + "learning_rate": 4.437570370370371e-05, + "loss": 2.1077, + "step": 9495 + }, + { + "epoch": 1.69, + "learning_rate": 4.437274074074074e-05, + "loss": 2.3007, + "step": 9500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4369777777777786e-05, + "loss": 2.2256, + "step": 9505 + }, + { + "epoch": 1.69, + "learning_rate": 4.436681481481482e-05, + "loss": 2.2885, + "step": 9510 + }, + { + "epoch": 1.69, + "learning_rate": 4.4363851851851856e-05, + "loss": 2.2536, + "step": 9515 + }, + { + "epoch": 1.69, + "learning_rate": 4.436088888888889e-05, + "loss": 2.3899, + "step": 9520 + }, + { + "epoch": 1.69, + "learning_rate": 4.435792592592593e-05, + "loss": 2.22, + "step": 9525 + }, + { + "epoch": 1.69, + "learning_rate": 4.4354962962962966e-05, + "loss": 2.246, + "step": 9530 + }, + { + "epoch": 1.7, + "learning_rate": 4.4352000000000005e-05, + "loss": 2.2212, + "step": 9535 + }, + { + "epoch": 1.7, + "learning_rate": 4.434903703703704e-05, + "loss": 2.1832, + "step": 9540 + }, + { + "epoch": 1.7, + "learning_rate": 4.4346074074074076e-05, + "loss": 2.33, + "step": 9545 + }, + { + "epoch": 1.7, + "learning_rate": 4.4343111111111115e-05, + "loss": 2.4236, + "step": 9550 + }, + { + "epoch": 1.7, + "learning_rate": 4.4340148148148153e-05, + "loss": 2.2868, + "step": 9555 + }, + { + "epoch": 1.7, + "learning_rate": 4.4337185185185186e-05, + "loss": 2.1766, + "step": 9560 + }, + { + "epoch": 1.7, + "learning_rate": 4.4334222222222224e-05, + "loss": 2.3585, + "step": 9565 + }, + { + "epoch": 1.7, + "learning_rate": 4.433125925925926e-05, + "loss": 2.2546, + "step": 9570 + }, + { + "epoch": 1.7, + "learning_rate": 4.43282962962963e-05, + "loss": 2.346, + "step": 9575 + }, + { + "epoch": 1.7, + "learning_rate": 4.4325333333333334e-05, + "loss": 2.1642, + "step": 9580 + }, + { + "epoch": 1.7, + "learning_rate": 4.432237037037037e-05, + "loss": 2.115, + "step": 9585 + }, + { + "epoch": 1.7, + "learning_rate": 4.431940740740741e-05, + "loss": 2.2371, + "step": 9590 + }, + { + "epoch": 1.71, + "learning_rate": 4.431644444444445e-05, + "loss": 2.2612, + "step": 9595 + }, + { + "epoch": 1.71, + "learning_rate": 4.431348148148148e-05, + "loss": 2.2129, + "step": 9600 + }, + { + "epoch": 1.71, + "learning_rate": 4.4310518518518515e-05, + "loss": 2.1105, + "step": 9605 + }, + { + "epoch": 1.71, + "learning_rate": 4.430755555555556e-05, + "loss": 2.1092, + "step": 9610 + }, + { + "epoch": 1.71, + "learning_rate": 4.430459259259259e-05, + "loss": 2.309, + "step": 9615 + }, + { + "epoch": 1.71, + "learning_rate": 4.430162962962963e-05, + "loss": 2.2111, + "step": 9620 + }, + { + "epoch": 1.71, + "learning_rate": 4.429866666666666e-05, + "loss": 2.3027, + "step": 9625 + }, + { + "epoch": 1.71, + "learning_rate": 4.429570370370371e-05, + "loss": 2.347, + "step": 9630 + }, + { + "epoch": 1.71, + "learning_rate": 4.429274074074074e-05, + "loss": 2.308, + "step": 9635 + }, + { + "epoch": 1.71, + "learning_rate": 4.428977777777778e-05, + "loss": 2.3214, + "step": 9640 + }, + { + "epoch": 1.71, + "learning_rate": 4.428681481481481e-05, + "loss": 2.3536, + "step": 9645 + }, + { + "epoch": 1.72, + "learning_rate": 4.428385185185186e-05, + "loss": 2.3462, + "step": 9650 + }, + { + "epoch": 1.72, + "learning_rate": 4.428088888888889e-05, + "loss": 2.2424, + "step": 9655 + }, + { + "epoch": 1.72, + "learning_rate": 4.427792592592593e-05, + "loss": 2.2741, + "step": 9660 + }, + { + "epoch": 1.72, + "learning_rate": 4.427496296296296e-05, + "loss": 2.2121, + "step": 9665 + }, + { + "epoch": 1.72, + "learning_rate": 4.4272000000000006e-05, + "loss": 2.2787, + "step": 9670 + }, + { + "epoch": 1.72, + "learning_rate": 4.426903703703704e-05, + "loss": 2.2397, + "step": 9675 + }, + { + "epoch": 1.72, + "learning_rate": 4.4266074074074077e-05, + "loss": 2.3025, + "step": 9680 + }, + { + "epoch": 1.72, + "learning_rate": 4.426311111111111e-05, + "loss": 2.2162, + "step": 9685 + }, + { + "epoch": 1.72, + "learning_rate": 4.426014814814815e-05, + "loss": 2.1911, + "step": 9690 + }, + { + "epoch": 1.72, + "learning_rate": 4.4257185185185186e-05, + "loss": 2.2748, + "step": 9695 + }, + { + "epoch": 1.72, + "learning_rate": 4.4254222222222225e-05, + "loss": 2.1526, + "step": 9700 + }, + { + "epoch": 1.73, + "learning_rate": 4.425125925925926e-05, + "loss": 2.2678, + "step": 9705 + }, + { + "epoch": 1.73, + "learning_rate": 4.4248296296296296e-05, + "loss": 2.268, + "step": 9710 + }, + { + "epoch": 1.73, + "learning_rate": 4.4245333333333335e-05, + "loss": 2.2567, + "step": 9715 + }, + { + "epoch": 1.73, + "learning_rate": 4.4242370370370374e-05, + "loss": 2.2341, + "step": 9720 + }, + { + "epoch": 1.73, + "learning_rate": 4.4239407407407406e-05, + "loss": 2.2499, + "step": 9725 + }, + { + "epoch": 1.73, + "learning_rate": 4.4236444444444445e-05, + "loss": 2.2566, + "step": 9730 + }, + { + "epoch": 1.73, + "learning_rate": 4.423348148148148e-05, + "loss": 2.281, + "step": 9735 + }, + { + "epoch": 1.73, + "learning_rate": 4.423051851851852e-05, + "loss": 2.1753, + "step": 9740 + }, + { + "epoch": 1.73, + "learning_rate": 4.4227555555555554e-05, + "loss": 2.292, + "step": 9745 + }, + { + "epoch": 1.73, + "learning_rate": 4.422459259259259e-05, + "loss": 2.1486, + "step": 9750 + }, + { + "epoch": 1.73, + "learning_rate": 4.422162962962963e-05, + "loss": 2.1281, + "step": 9755 + }, + { + "epoch": 1.74, + "learning_rate": 4.421866666666667e-05, + "loss": 2.2281, + "step": 9760 + }, + { + "epoch": 1.74, + "learning_rate": 4.42157037037037e-05, + "loss": 2.2341, + "step": 9765 + }, + { + "epoch": 1.74, + "learning_rate": 4.421274074074074e-05, + "loss": 2.1116, + "step": 9770 + }, + { + "epoch": 1.74, + "learning_rate": 4.420977777777778e-05, + "loss": 2.1859, + "step": 9775 + }, + { + "epoch": 1.74, + "learning_rate": 4.420681481481482e-05, + "loss": 2.2123, + "step": 9780 + }, + { + "epoch": 1.74, + "learning_rate": 4.420385185185185e-05, + "loss": 2.1039, + "step": 9785 + }, + { + "epoch": 1.74, + "learning_rate": 4.420088888888889e-05, + "loss": 2.3223, + "step": 9790 + }, + { + "epoch": 1.74, + "learning_rate": 4.419792592592593e-05, + "loss": 2.2018, + "step": 9795 + }, + { + "epoch": 1.74, + "learning_rate": 4.419496296296297e-05, + "loss": 2.1534, + "step": 9800 + }, + { + "epoch": 1.74, + "learning_rate": 4.4192e-05, + "loss": 2.1754, + "step": 9805 + }, + { + "epoch": 1.74, + "learning_rate": 4.418903703703704e-05, + "loss": 2.1769, + "step": 9810 + }, + { + "epoch": 1.74, + "learning_rate": 4.418607407407408e-05, + "loss": 2.1825, + "step": 9815 + }, + { + "epoch": 1.75, + "learning_rate": 4.4183111111111116e-05, + "loss": 2.065, + "step": 9820 + }, + { + "epoch": 1.75, + "learning_rate": 4.418014814814815e-05, + "loss": 2.1738, + "step": 9825 + }, + { + "epoch": 1.75, + "learning_rate": 4.417718518518519e-05, + "loss": 2.1448, + "step": 9830 + }, + { + "epoch": 1.75, + "learning_rate": 4.417422222222222e-05, + "loss": 2.1492, + "step": 9835 + }, + { + "epoch": 1.75, + "learning_rate": 4.4171259259259265e-05, + "loss": 2.298, + "step": 9840 + }, + { + "epoch": 1.75, + "learning_rate": 4.41682962962963e-05, + "loss": 2.1479, + "step": 9845 + }, + { + "epoch": 1.75, + "learning_rate": 4.4165333333333336e-05, + "loss": 2.2852, + "step": 9850 + }, + { + "epoch": 1.75, + "learning_rate": 4.416237037037037e-05, + "loss": 2.0958, + "step": 9855 + }, + { + "epoch": 1.75, + "learning_rate": 4.415940740740741e-05, + "loss": 2.276, + "step": 9860 + }, + { + "epoch": 1.75, + "learning_rate": 4.4156444444444445e-05, + "loss": 2.1492, + "step": 9865 + }, + { + "epoch": 1.75, + "learning_rate": 4.4153481481481484e-05, + "loss": 2.3101, + "step": 9870 + }, + { + "epoch": 1.76, + "learning_rate": 4.4150518518518516e-05, + "loss": 2.2756, + "step": 9875 + }, + { + "epoch": 1.76, + "learning_rate": 4.414755555555556e-05, + "loss": 2.3224, + "step": 9880 + }, + { + "epoch": 1.76, + "learning_rate": 4.4144592592592594e-05, + "loss": 2.2795, + "step": 9885 + }, + { + "epoch": 1.76, + "learning_rate": 4.414162962962963e-05, + "loss": 2.1955, + "step": 9890 + }, + { + "epoch": 1.76, + "learning_rate": 4.4138666666666665e-05, + "loss": 2.2822, + "step": 9895 + }, + { + "epoch": 1.76, + "learning_rate": 4.413570370370371e-05, + "loss": 2.2868, + "step": 9900 + }, + { + "epoch": 1.76, + "learning_rate": 4.413274074074074e-05, + "loss": 2.1663, + "step": 9905 + }, + { + "epoch": 1.76, + "learning_rate": 4.412977777777778e-05, + "loss": 2.3776, + "step": 9910 + }, + { + "epoch": 1.76, + "learning_rate": 4.412681481481481e-05, + "loss": 2.276, + "step": 9915 + }, + { + "epoch": 1.76, + "learning_rate": 4.412385185185185e-05, + "loss": 2.1808, + "step": 9920 + }, + { + "epoch": 1.76, + "learning_rate": 4.412088888888889e-05, + "loss": 2.1493, + "step": 9925 + }, + { + "epoch": 1.77, + "learning_rate": 4.411792592592593e-05, + "loss": 2.1838, + "step": 9930 + }, + { + "epoch": 1.77, + "learning_rate": 4.411496296296296e-05, + "loss": 2.2366, + "step": 9935 + }, + { + "epoch": 1.77, + "learning_rate": 4.4112e-05, + "loss": 2.228, + "step": 9940 + }, + { + "epoch": 1.77, + "learning_rate": 4.410903703703704e-05, + "loss": 2.3133, + "step": 9945 + }, + { + "epoch": 1.77, + "learning_rate": 4.410607407407408e-05, + "loss": 2.1735, + "step": 9950 + }, + { + "epoch": 1.77, + "learning_rate": 4.410311111111111e-05, + "loss": 2.3266, + "step": 9955 + }, + { + "epoch": 1.77, + "learning_rate": 4.410014814814815e-05, + "loss": 2.2632, + "step": 9960 + }, + { + "epoch": 1.77, + "learning_rate": 4.409718518518519e-05, + "loss": 2.164, + "step": 9965 + }, + { + "epoch": 1.77, + "learning_rate": 4.409422222222223e-05, + "loss": 2.2565, + "step": 9970 + }, + { + "epoch": 1.77, + "learning_rate": 4.409125925925926e-05, + "loss": 2.0973, + "step": 9975 + }, + { + "epoch": 1.77, + "learning_rate": 4.40882962962963e-05, + "loss": 2.2179, + "step": 9980 + }, + { + "epoch": 1.78, + "learning_rate": 4.4085333333333336e-05, + "loss": 2.1716, + "step": 9985 + }, + { + "epoch": 1.78, + "learning_rate": 4.4082370370370375e-05, + "loss": 2.061, + "step": 9990 + }, + { + "epoch": 1.78, + "learning_rate": 4.407940740740741e-05, + "loss": 2.0537, + "step": 9995 + }, + { + "epoch": 1.78, + "learning_rate": 4.4076444444444446e-05, + "loss": 2.1978, + "step": 10000 + }, + { + "epoch": 1.78, + "eval_loss": 2.0439772605895996, + "eval_rouge2_fmeasure": 0.1667, + "eval_rouge2_precision": 0.2047, + "eval_rouge2_recall": 0.1491, + "eval_runtime": 34261.036, + "eval_samples_per_second": 0.146, + "eval_steps_per_second": 0.073, + "step": 10000 + }, + { + "epoch": 1.78, + "learning_rate": 4.4073481481481485e-05, + "loss": 2.31, + "step": 10005 + }, + { + "epoch": 1.78, + "learning_rate": 4.4070518518518524e-05, + "loss": 2.2195, + "step": 10010 + }, + { + "epoch": 1.78, + "learning_rate": 4.4067555555555556e-05, + "loss": 2.3401, + "step": 10015 + }, + { + "epoch": 1.78, + "learning_rate": 4.4064592592592595e-05, + "loss": 2.2386, + "step": 10020 + }, + { + "epoch": 1.78, + "learning_rate": 4.406162962962963e-05, + "loss": 1.9536, + "step": 10025 + }, + { + "epoch": 1.78, + "learning_rate": 4.405866666666667e-05, + "loss": 2.2169, + "step": 10030 + }, + { + "epoch": 1.78, + "learning_rate": 4.4055703703703704e-05, + "loss": 2.286, + "step": 10035 + }, + { + "epoch": 1.78, + "learning_rate": 4.405274074074074e-05, + "loss": 2.2227, + "step": 10040 + }, + { + "epoch": 1.79, + "learning_rate": 4.404977777777778e-05, + "loss": 2.1438, + "step": 10045 + }, + { + "epoch": 1.79, + "learning_rate": 4.404681481481482e-05, + "loss": 2.2947, + "step": 10050 + }, + { + "epoch": 1.79, + "learning_rate": 4.404385185185185e-05, + "loss": 2.2414, + "step": 10055 + }, + { + "epoch": 1.79, + "learning_rate": 4.404088888888889e-05, + "loss": 2.1051, + "step": 10060 + }, + { + "epoch": 1.79, + "learning_rate": 4.4037925925925924e-05, + "loss": 2.2663, + "step": 10065 + }, + { + "epoch": 1.79, + "learning_rate": 4.403496296296297e-05, + "loss": 2.1975, + "step": 10070 + }, + { + "epoch": 1.79, + "learning_rate": 4.4032e-05, + "loss": 2.2077, + "step": 10075 + }, + { + "epoch": 1.79, + "learning_rate": 4.402903703703704e-05, + "loss": 2.2922, + "step": 10080 + }, + { + "epoch": 1.79, + "learning_rate": 4.402607407407407e-05, + "loss": 2.1373, + "step": 10085 + }, + { + "epoch": 1.79, + "learning_rate": 4.402311111111112e-05, + "loss": 2.2895, + "step": 10090 + }, + { + "epoch": 1.79, + "learning_rate": 4.402014814814815e-05, + "loss": 2.1627, + "step": 10095 + }, + { + "epoch": 1.8, + "learning_rate": 4.401718518518519e-05, + "loss": 2.1977, + "step": 10100 + }, + { + "epoch": 1.8, + "learning_rate": 4.401422222222222e-05, + "loss": 2.2384, + "step": 10105 + }, + { + "epoch": 1.8, + "learning_rate": 4.4011259259259266e-05, + "loss": 2.1434, + "step": 10110 + }, + { + "epoch": 1.8, + "learning_rate": 4.40082962962963e-05, + "loss": 2.1962, + "step": 10115 + }, + { + "epoch": 1.8, + "learning_rate": 4.400533333333334e-05, + "loss": 2.2206, + "step": 10120 + }, + { + "epoch": 1.8, + "learning_rate": 4.400237037037037e-05, + "loss": 2.1588, + "step": 10125 + }, + { + "epoch": 1.8, + "learning_rate": 4.3999407407407415e-05, + "loss": 2.1711, + "step": 10130 + }, + { + "epoch": 1.8, + "learning_rate": 4.399644444444445e-05, + "loss": 2.202, + "step": 10135 + }, + { + "epoch": 1.8, + "learning_rate": 4.3993481481481486e-05, + "loss": 2.2683, + "step": 10140 + }, + { + "epoch": 1.8, + "learning_rate": 4.399051851851852e-05, + "loss": 2.3309, + "step": 10145 + }, + { + "epoch": 1.8, + "learning_rate": 4.3987555555555556e-05, + "loss": 2.1929, + "step": 10150 + }, + { + "epoch": 1.81, + "learning_rate": 4.3984592592592595e-05, + "loss": 2.2509, + "step": 10155 + }, + { + "epoch": 1.81, + "learning_rate": 4.3981629629629634e-05, + "loss": 2.226, + "step": 10160 + }, + { + "epoch": 1.81, + "learning_rate": 4.3978666666666666e-05, + "loss": 2.1327, + "step": 10165 + }, + { + "epoch": 1.81, + "learning_rate": 4.3975703703703705e-05, + "loss": 2.3077, + "step": 10170 + }, + { + "epoch": 1.81, + "learning_rate": 4.3972740740740744e-05, + "loss": 2.2088, + "step": 10175 + }, + { + "epoch": 1.81, + "learning_rate": 4.396977777777778e-05, + "loss": 2.2068, + "step": 10180 + }, + { + "epoch": 1.81, + "learning_rate": 4.3966814814814815e-05, + "loss": 2.2497, + "step": 10185 + }, + { + "epoch": 1.81, + "learning_rate": 4.3963851851851854e-05, + "loss": 2.268, + "step": 10190 + }, + { + "epoch": 1.81, + "learning_rate": 4.396088888888889e-05, + "loss": 2.2018, + "step": 10195 + }, + { + "epoch": 1.81, + "learning_rate": 4.395792592592593e-05, + "loss": 2.2625, + "step": 10200 + }, + { + "epoch": 1.81, + "learning_rate": 4.395496296296296e-05, + "loss": 2.2081, + "step": 10205 + }, + { + "epoch": 1.82, + "learning_rate": 4.3952e-05, + "loss": 2.1937, + "step": 10210 + }, + { + "epoch": 1.82, + "learning_rate": 4.394903703703704e-05, + "loss": 2.2145, + "step": 10215 + }, + { + "epoch": 1.82, + "learning_rate": 4.394607407407408e-05, + "loss": 2.2441, + "step": 10220 + }, + { + "epoch": 1.82, + "learning_rate": 4.394311111111111e-05, + "loss": 2.29, + "step": 10225 + }, + { + "epoch": 1.82, + "learning_rate": 4.394014814814815e-05, + "loss": 2.0711, + "step": 10230 + }, + { + "epoch": 1.82, + "learning_rate": 4.393718518518519e-05, + "loss": 2.2448, + "step": 10235 + }, + { + "epoch": 1.82, + "learning_rate": 4.393422222222223e-05, + "loss": 2.0398, + "step": 10240 + }, + { + "epoch": 1.82, + "learning_rate": 4.393125925925926e-05, + "loss": 2.1704, + "step": 10245 + }, + { + "epoch": 1.82, + "learning_rate": 4.39282962962963e-05, + "loss": 2.1463, + "step": 10250 + }, + { + "epoch": 1.82, + "learning_rate": 4.392533333333334e-05, + "loss": 2.153, + "step": 10255 + }, + { + "epoch": 1.82, + "learning_rate": 4.392237037037038e-05, + "loss": 2.2118, + "step": 10260 + }, + { + "epoch": 1.82, + "learning_rate": 4.391940740740741e-05, + "loss": 2.245, + "step": 10265 + }, + { + "epoch": 1.83, + "learning_rate": 4.391644444444445e-05, + "loss": 2.2596, + "step": 10270 + }, + { + "epoch": 1.83, + "learning_rate": 4.3913481481481486e-05, + "loss": 2.2641, + "step": 10275 + }, + { + "epoch": 1.83, + "learning_rate": 4.3910518518518525e-05, + "loss": 2.0554, + "step": 10280 + }, + { + "epoch": 1.83, + "learning_rate": 4.390755555555556e-05, + "loss": 2.2307, + "step": 10285 + }, + { + "epoch": 1.83, + "learning_rate": 4.3904592592592596e-05, + "loss": 2.2338, + "step": 10290 + }, + { + "epoch": 1.83, + "learning_rate": 4.390162962962963e-05, + "loss": 2.2213, + "step": 10295 + }, + { + "epoch": 1.83, + "learning_rate": 4.3898666666666674e-05, + "loss": 2.2879, + "step": 10300 + }, + { + "epoch": 1.83, + "learning_rate": 4.3895703703703706e-05, + "loss": 2.3313, + "step": 10305 + }, + { + "epoch": 1.83, + "learning_rate": 4.3892740740740745e-05, + "loss": 2.1257, + "step": 10310 + }, + { + "epoch": 1.83, + "learning_rate": 4.388977777777778e-05, + "loss": 2.3254, + "step": 10315 + }, + { + "epoch": 1.83, + "learning_rate": 4.388681481481482e-05, + "loss": 2.3248, + "step": 10320 + }, + { + "epoch": 1.84, + "learning_rate": 4.3883851851851854e-05, + "loss": 2.3274, + "step": 10325 + }, + { + "epoch": 1.84, + "learning_rate": 4.388088888888889e-05, + "loss": 2.2155, + "step": 10330 + }, + { + "epoch": 1.84, + "learning_rate": 4.3877925925925925e-05, + "loss": 2.1047, + "step": 10335 + }, + { + "epoch": 1.84, + "learning_rate": 4.387496296296297e-05, + "loss": 2.2408, + "step": 10340 + }, + { + "epoch": 1.84, + "learning_rate": 4.3872e-05, + "loss": 2.2722, + "step": 10345 + }, + { + "epoch": 1.84, + "learning_rate": 4.386903703703704e-05, + "loss": 2.191, + "step": 10350 + }, + { + "epoch": 1.84, + "learning_rate": 4.3866074074074074e-05, + "loss": 2.2256, + "step": 10355 + }, + { + "epoch": 1.84, + "learning_rate": 4.386311111111112e-05, + "loss": 2.3582, + "step": 10360 + }, + { + "epoch": 1.84, + "learning_rate": 4.386014814814815e-05, + "loss": 2.2479, + "step": 10365 + }, + { + "epoch": 1.84, + "learning_rate": 4.385718518518519e-05, + "loss": 2.0827, + "step": 10370 + }, + { + "epoch": 1.84, + "learning_rate": 4.385422222222222e-05, + "loss": 2.1383, + "step": 10375 + }, + { + "epoch": 1.85, + "learning_rate": 4.385125925925926e-05, + "loss": 2.2418, + "step": 10380 + }, + { + "epoch": 1.85, + "learning_rate": 4.38482962962963e-05, + "loss": 2.2962, + "step": 10385 + }, + { + "epoch": 1.85, + "learning_rate": 4.384533333333333e-05, + "loss": 2.2133, + "step": 10390 + }, + { + "epoch": 1.85, + "learning_rate": 4.384237037037037e-05, + "loss": 2.2497, + "step": 10395 + }, + { + "epoch": 1.85, + "learning_rate": 4.383940740740741e-05, + "loss": 2.1867, + "step": 10400 + }, + { + "epoch": 1.85, + "learning_rate": 4.383644444444445e-05, + "loss": 2.2258, + "step": 10405 + }, + { + "epoch": 1.85, + "learning_rate": 4.383348148148148e-05, + "loss": 2.0529, + "step": 10410 + }, + { + "epoch": 1.85, + "learning_rate": 4.383051851851852e-05, + "loss": 2.2255, + "step": 10415 + }, + { + "epoch": 1.85, + "learning_rate": 4.382755555555556e-05, + "loss": 2.1513, + "step": 10420 + }, + { + "epoch": 1.85, + "learning_rate": 4.38245925925926e-05, + "loss": 2.2955, + "step": 10425 + }, + { + "epoch": 1.85, + "learning_rate": 4.382162962962963e-05, + "loss": 2.1093, + "step": 10430 + }, + { + "epoch": 1.86, + "learning_rate": 4.381866666666667e-05, + "loss": 2.2788, + "step": 10435 + }, + { + "epoch": 1.86, + "learning_rate": 4.3815703703703707e-05, + "loss": 2.3515, + "step": 10440 + }, + { + "epoch": 1.86, + "learning_rate": 4.3812740740740745e-05, + "loss": 2.1513, + "step": 10445 + }, + { + "epoch": 1.86, + "learning_rate": 4.380977777777778e-05, + "loss": 2.2426, + "step": 10450 + }, + { + "epoch": 1.86, + "learning_rate": 4.3806814814814816e-05, + "loss": 2.1481, + "step": 10455 + }, + { + "epoch": 1.86, + "learning_rate": 4.380385185185185e-05, + "loss": 2.2667, + "step": 10460 + }, + { + "epoch": 1.86, + "learning_rate": 4.3800888888888894e-05, + "loss": 2.2408, + "step": 10465 + }, + { + "epoch": 1.86, + "learning_rate": 4.3797925925925926e-05, + "loss": 2.2682, + "step": 10470 + }, + { + "epoch": 1.86, + "learning_rate": 4.3794962962962965e-05, + "loss": 2.2294, + "step": 10475 + }, + { + "epoch": 1.86, + "learning_rate": 4.3792e-05, + "loss": 2.2253, + "step": 10480 + }, + { + "epoch": 1.86, + "learning_rate": 4.378903703703704e-05, + "loss": 2.2201, + "step": 10485 + }, + { + "epoch": 1.86, + "learning_rate": 4.3786074074074074e-05, + "loss": 2.203, + "step": 10490 + }, + { + "epoch": 1.87, + "learning_rate": 4.378311111111111e-05, + "loss": 2.2442, + "step": 10495 + }, + { + "epoch": 1.87, + "learning_rate": 4.3780148148148145e-05, + "loss": 2.1727, + "step": 10500 + }, + { + "epoch": 1.87, + "learning_rate": 4.377718518518519e-05, + "loss": 2.3798, + "step": 10505 + }, + { + "epoch": 1.87, + "learning_rate": 4.377422222222222e-05, + "loss": 2.0875, + "step": 10510 + }, + { + "epoch": 1.87, + "learning_rate": 4.377125925925926e-05, + "loss": 2.0281, + "step": 10515 + }, + { + "epoch": 1.87, + "learning_rate": 4.3768296296296294e-05, + "loss": 2.1237, + "step": 10520 + }, + { + "epoch": 1.87, + "learning_rate": 4.376533333333333e-05, + "loss": 2.1481, + "step": 10525 + }, + { + "epoch": 1.87, + "learning_rate": 4.376237037037037e-05, + "loss": 2.2084, + "step": 10530 + }, + { + "epoch": 1.87, + "learning_rate": 4.375940740740741e-05, + "loss": 2.1602, + "step": 10535 + }, + { + "epoch": 1.87, + "learning_rate": 4.375644444444444e-05, + "loss": 2.1144, + "step": 10540 + }, + { + "epoch": 1.87, + "learning_rate": 4.375348148148148e-05, + "loss": 2.2005, + "step": 10545 + }, + { + "epoch": 1.88, + "learning_rate": 4.375051851851852e-05, + "loss": 2.2358, + "step": 10550 + }, + { + "epoch": 1.88, + "learning_rate": 4.374755555555556e-05, + "loss": 2.13, + "step": 10555 + }, + { + "epoch": 1.88, + "learning_rate": 4.374459259259259e-05, + "loss": 2.3751, + "step": 10560 + }, + { + "epoch": 1.88, + "learning_rate": 4.374162962962963e-05, + "loss": 2.2317, + "step": 10565 + }, + { + "epoch": 1.88, + "learning_rate": 4.373866666666667e-05, + "loss": 2.2669, + "step": 10570 + }, + { + "epoch": 1.88, + "learning_rate": 4.373570370370371e-05, + "loss": 2.1224, + "step": 10575 + }, + { + "epoch": 1.88, + "learning_rate": 4.373274074074074e-05, + "loss": 2.2104, + "step": 10580 + }, + { + "epoch": 1.88, + "learning_rate": 4.372977777777778e-05, + "loss": 2.2166, + "step": 10585 + }, + { + "epoch": 1.88, + "learning_rate": 4.372681481481482e-05, + "loss": 2.1757, + "step": 10590 + }, + { + "epoch": 1.88, + "learning_rate": 4.3723851851851856e-05, + "loss": 2.2154, + "step": 10595 + }, + { + "epoch": 1.88, + "learning_rate": 4.372088888888889e-05, + "loss": 2.3143, + "step": 10600 + }, + { + "epoch": 1.89, + "learning_rate": 4.371792592592593e-05, + "loss": 2.3835, + "step": 10605 + }, + { + "epoch": 1.89, + "learning_rate": 4.3714962962962966e-05, + "loss": 2.1537, + "step": 10610 + }, + { + "epoch": 1.89, + "learning_rate": 4.3712000000000004e-05, + "loss": 2.2768, + "step": 10615 + }, + { + "epoch": 1.89, + "learning_rate": 4.3709037037037036e-05, + "loss": 2.1266, + "step": 10620 + }, + { + "epoch": 1.89, + "learning_rate": 4.3706074074074075e-05, + "loss": 2.1756, + "step": 10625 + }, + { + "epoch": 1.89, + "learning_rate": 4.3703111111111114e-05, + "loss": 2.2417, + "step": 10630 + }, + { + "epoch": 1.89, + "learning_rate": 4.370014814814815e-05, + "loss": 2.17, + "step": 10635 + }, + { + "epoch": 1.89, + "learning_rate": 4.3697185185185185e-05, + "loss": 2.2912, + "step": 10640 + }, + { + "epoch": 1.89, + "learning_rate": 4.3694222222222224e-05, + "loss": 2.2549, + "step": 10645 + }, + { + "epoch": 1.89, + "learning_rate": 4.369125925925926e-05, + "loss": 2.0107, + "step": 10650 + }, + { + "epoch": 1.89, + "learning_rate": 4.36882962962963e-05, + "loss": 2.3948, + "step": 10655 + }, + { + "epoch": 1.9, + "learning_rate": 4.3685333333333333e-05, + "loss": 2.2989, + "step": 10660 + }, + { + "epoch": 1.9, + "learning_rate": 4.368237037037037e-05, + "loss": 2.2562, + "step": 10665 + }, + { + "epoch": 1.9, + "learning_rate": 4.367940740740741e-05, + "loss": 2.1361, + "step": 10670 + }, + { + "epoch": 1.9, + "learning_rate": 4.367644444444445e-05, + "loss": 2.2095, + "step": 10675 + }, + { + "epoch": 1.9, + "learning_rate": 4.367348148148148e-05, + "loss": 2.2919, + "step": 10680 + }, + { + "epoch": 1.9, + "learning_rate": 4.367051851851852e-05, + "loss": 2.1821, + "step": 10685 + }, + { + "epoch": 1.9, + "learning_rate": 4.366755555555555e-05, + "loss": 2.2549, + "step": 10690 + }, + { + "epoch": 1.9, + "learning_rate": 4.36645925925926e-05, + "loss": 2.3804, + "step": 10695 + }, + { + "epoch": 1.9, + "learning_rate": 4.366162962962963e-05, + "loss": 2.3303, + "step": 10700 + }, + { + "epoch": 1.9, + "learning_rate": 4.365866666666667e-05, + "loss": 2.111, + "step": 10705 + }, + { + "epoch": 1.9, + "learning_rate": 4.36557037037037e-05, + "loss": 2.1768, + "step": 10710 + }, + { + "epoch": 1.9, + "learning_rate": 4.365274074074075e-05, + "loss": 2.0743, + "step": 10715 + }, + { + "epoch": 1.91, + "learning_rate": 4.364977777777778e-05, + "loss": 1.9922, + "step": 10720 + }, + { + "epoch": 1.91, + "learning_rate": 4.364681481481482e-05, + "loss": 2.1965, + "step": 10725 + }, + { + "epoch": 1.91, + "learning_rate": 4.364385185185185e-05, + "loss": 2.314, + "step": 10730 + }, + { + "epoch": 1.91, + "learning_rate": 4.3640888888888895e-05, + "loss": 2.2277, + "step": 10735 + }, + { + "epoch": 1.91, + "learning_rate": 4.363792592592593e-05, + "loss": 2.2343, + "step": 10740 + }, + { + "epoch": 1.91, + "learning_rate": 4.3634962962962966e-05, + "loss": 2.0696, + "step": 10745 + }, + { + "epoch": 1.91, + "learning_rate": 4.3632e-05, + "loss": 2.1953, + "step": 10750 + }, + { + "epoch": 1.91, + "learning_rate": 4.362903703703704e-05, + "loss": 2.2502, + "step": 10755 + }, + { + "epoch": 1.91, + "learning_rate": 4.3626074074074076e-05, + "loss": 2.1502, + "step": 10760 + }, + { + "epoch": 1.91, + "learning_rate": 4.3623111111111115e-05, + "loss": 2.1763, + "step": 10765 + }, + { + "epoch": 1.91, + "learning_rate": 4.362014814814815e-05, + "loss": 2.1015, + "step": 10770 + }, + { + "epoch": 1.92, + "learning_rate": 4.3617185185185186e-05, + "loss": 2.2108, + "step": 10775 + }, + { + "epoch": 1.92, + "learning_rate": 4.3614222222222224e-05, + "loss": 2.266, + "step": 10780 + }, + { + "epoch": 1.92, + "learning_rate": 4.361125925925926e-05, + "loss": 2.3031, + "step": 10785 + }, + { + "epoch": 1.92, + "learning_rate": 4.3608296296296295e-05, + "loss": 2.2477, + "step": 10790 + }, + { + "epoch": 1.92, + "learning_rate": 4.3605333333333334e-05, + "loss": 2.2424, + "step": 10795 + }, + { + "epoch": 1.92, + "learning_rate": 4.360237037037037e-05, + "loss": 2.1454, + "step": 10800 + }, + { + "epoch": 1.92, + "learning_rate": 4.359940740740741e-05, + "loss": 2.1509, + "step": 10805 + }, + { + "epoch": 1.92, + "learning_rate": 4.3596444444444444e-05, + "loss": 2.1308, + "step": 10810 + }, + { + "epoch": 1.92, + "learning_rate": 4.359348148148148e-05, + "loss": 2.2903, + "step": 10815 + }, + { + "epoch": 1.92, + "learning_rate": 4.359051851851852e-05, + "loss": 2.1762, + "step": 10820 + }, + { + "epoch": 1.92, + "learning_rate": 4.358755555555556e-05, + "loss": 2.1965, + "step": 10825 + }, + { + "epoch": 1.93, + "learning_rate": 4.358459259259259e-05, + "loss": 2.2008, + "step": 10830 + }, + { + "epoch": 1.93, + "learning_rate": 4.358162962962963e-05, + "loss": 2.3109, + "step": 10835 + }, + { + "epoch": 1.93, + "learning_rate": 4.357866666666667e-05, + "loss": 2.0748, + "step": 10840 + }, + { + "epoch": 1.93, + "learning_rate": 4.357570370370371e-05, + "loss": 2.085, + "step": 10845 + }, + { + "epoch": 1.93, + "learning_rate": 4.357274074074074e-05, + "loss": 2.206, + "step": 10850 + }, + { + "epoch": 1.93, + "learning_rate": 4.356977777777778e-05, + "loss": 2.0983, + "step": 10855 + }, + { + "epoch": 1.93, + "learning_rate": 4.356681481481482e-05, + "loss": 2.1558, + "step": 10860 + }, + { + "epoch": 1.93, + "learning_rate": 4.356385185185186e-05, + "loss": 2.1374, + "step": 10865 + }, + { + "epoch": 1.93, + "learning_rate": 4.356088888888889e-05, + "loss": 2.2158, + "step": 10870 + }, + { + "epoch": 1.93, + "learning_rate": 4.355792592592593e-05, + "loss": 2.1164, + "step": 10875 + }, + { + "epoch": 1.93, + "learning_rate": 4.355496296296297e-05, + "loss": 2.1895, + "step": 10880 + }, + { + "epoch": 1.94, + "learning_rate": 4.3552000000000006e-05, + "loss": 2.2881, + "step": 10885 + }, + { + "epoch": 1.94, + "learning_rate": 4.354903703703704e-05, + "loss": 2.272, + "step": 10890 + }, + { + "epoch": 1.94, + "learning_rate": 4.354607407407408e-05, + "loss": 2.0539, + "step": 10895 + }, + { + "epoch": 1.94, + "learning_rate": 4.3543111111111116e-05, + "loss": 2.304, + "step": 10900 + }, + { + "epoch": 1.94, + "learning_rate": 4.3540148148148154e-05, + "loss": 2.1847, + "step": 10905 + }, + { + "epoch": 1.94, + "learning_rate": 4.3537185185185186e-05, + "loss": 2.1715, + "step": 10910 + }, + { + "epoch": 1.94, + "learning_rate": 4.3534222222222225e-05, + "loss": 2.1479, + "step": 10915 + }, + { + "epoch": 1.94, + "learning_rate": 4.353125925925926e-05, + "loss": 2.3192, + "step": 10920 + }, + { + "epoch": 1.94, + "learning_rate": 4.35282962962963e-05, + "loss": 2.2156, + "step": 10925 + }, + { + "epoch": 1.94, + "learning_rate": 4.3525333333333335e-05, + "loss": 2.2539, + "step": 10930 + }, + { + "epoch": 1.94, + "learning_rate": 4.3522370370370374e-05, + "loss": 2.1328, + "step": 10935 + }, + { + "epoch": 1.94, + "learning_rate": 4.3519407407407406e-05, + "loss": 2.029, + "step": 10940 + }, + { + "epoch": 1.95, + "learning_rate": 4.351644444444445e-05, + "loss": 2.0718, + "step": 10945 + }, + { + "epoch": 1.95, + "learning_rate": 4.3513481481481483e-05, + "loss": 2.2239, + "step": 10950 + }, + { + "epoch": 1.95, + "learning_rate": 4.351051851851852e-05, + "loss": 2.2558, + "step": 10955 + }, + { + "epoch": 1.95, + "learning_rate": 4.3507555555555554e-05, + "loss": 2.3063, + "step": 10960 + }, + { + "epoch": 1.95, + "learning_rate": 4.35045925925926e-05, + "loss": 2.2471, + "step": 10965 + }, + { + "epoch": 1.95, + "learning_rate": 4.350162962962963e-05, + "loss": 2.231, + "step": 10970 + }, + { + "epoch": 1.95, + "learning_rate": 4.349866666666667e-05, + "loss": 2.3588, + "step": 10975 + }, + { + "epoch": 1.95, + "learning_rate": 4.34957037037037e-05, + "loss": 2.0912, + "step": 10980 + }, + { + "epoch": 1.95, + "learning_rate": 4.349274074074074e-05, + "loss": 2.2122, + "step": 10985 + }, + { + "epoch": 1.95, + "learning_rate": 4.348977777777778e-05, + "loss": 2.3138, + "step": 10990 + }, + { + "epoch": 1.95, + "learning_rate": 4.348681481481482e-05, + "loss": 2.2255, + "step": 10995 + }, + { + "epoch": 1.96, + "learning_rate": 4.348385185185185e-05, + "loss": 2.3409, + "step": 11000 + }, + { + "epoch": 1.96, + "learning_rate": 4.348088888888889e-05, + "loss": 2.3846, + "step": 11005 + }, + { + "epoch": 1.96, + "learning_rate": 4.347792592592593e-05, + "loss": 2.1558, + "step": 11010 + }, + { + "epoch": 1.96, + "learning_rate": 4.347496296296297e-05, + "loss": 2.296, + "step": 11015 + }, + { + "epoch": 1.96, + "learning_rate": 4.3472e-05, + "loss": 2.2548, + "step": 11020 + }, + { + "epoch": 1.96, + "learning_rate": 4.346903703703704e-05, + "loss": 2.0732, + "step": 11025 + }, + { + "epoch": 1.96, + "learning_rate": 4.346607407407408e-05, + "loss": 2.1148, + "step": 11030 + }, + { + "epoch": 1.96, + "learning_rate": 4.3463111111111116e-05, + "loss": 2.2872, + "step": 11035 + }, + { + "epoch": 1.96, + "learning_rate": 4.346014814814815e-05, + "loss": 2.1739, + "step": 11040 + }, + { + "epoch": 1.96, + "learning_rate": 4.345718518518519e-05, + "loss": 2.2737, + "step": 11045 + }, + { + "epoch": 1.96, + "learning_rate": 4.3454222222222226e-05, + "loss": 2.2568, + "step": 11050 + }, + { + "epoch": 1.97, + "learning_rate": 4.3451259259259265e-05, + "loss": 2.2822, + "step": 11055 + }, + { + "epoch": 1.97, + "learning_rate": 4.34482962962963e-05, + "loss": 2.173, + "step": 11060 + }, + { + "epoch": 1.97, + "learning_rate": 4.3445333333333336e-05, + "loss": 2.2033, + "step": 11065 + }, + { + "epoch": 1.97, + "learning_rate": 4.3442370370370375e-05, + "loss": 2.316, + "step": 11070 + }, + { + "epoch": 1.97, + "learning_rate": 4.343940740740741e-05, + "loss": 2.2675, + "step": 11075 + }, + { + "epoch": 1.97, + "learning_rate": 4.3436444444444445e-05, + "loss": 2.2084, + "step": 11080 + }, + { + "epoch": 1.97, + "learning_rate": 4.3433481481481484e-05, + "loss": 2.1887, + "step": 11085 + }, + { + "epoch": 1.97, + "learning_rate": 4.343051851851852e-05, + "loss": 2.2739, + "step": 11090 + }, + { + "epoch": 1.97, + "learning_rate": 4.342755555555556e-05, + "loss": 2.2122, + "step": 11095 + }, + { + "epoch": 1.97, + "learning_rate": 4.3424592592592594e-05, + "loss": 2.2178, + "step": 11100 + }, + { + "epoch": 1.97, + "learning_rate": 4.342162962962963e-05, + "loss": 2.1574, + "step": 11105 + }, + { + "epoch": 1.98, + "learning_rate": 4.341866666666667e-05, + "loss": 2.2147, + "step": 11110 + }, + { + "epoch": 1.98, + "learning_rate": 4.341570370370371e-05, + "loss": 2.1124, + "step": 11115 + }, + { + "epoch": 1.98, + "learning_rate": 4.341274074074074e-05, + "loss": 2.1672, + "step": 11120 + }, + { + "epoch": 1.98, + "learning_rate": 4.340977777777778e-05, + "loss": 2.2424, + "step": 11125 + }, + { + "epoch": 1.98, + "learning_rate": 4.340681481481482e-05, + "loss": 2.2263, + "step": 11130 + }, + { + "epoch": 1.98, + "learning_rate": 4.340385185185186e-05, + "loss": 2.1193, + "step": 11135 + }, + { + "epoch": 1.98, + "learning_rate": 4.340088888888889e-05, + "loss": 2.2944, + "step": 11140 + }, + { + "epoch": 1.98, + "learning_rate": 4.339792592592593e-05, + "loss": 2.2848, + "step": 11145 + }, + { + "epoch": 1.98, + "learning_rate": 4.339496296296296e-05, + "loss": 2.3274, + "step": 11150 + }, + { + "epoch": 1.98, + "learning_rate": 4.3392e-05, + "loss": 2.2382, + "step": 11155 + }, + { + "epoch": 1.98, + "learning_rate": 4.338903703703704e-05, + "loss": 2.1853, + "step": 11160 + }, + { + "epoch": 1.98, + "learning_rate": 4.338607407407407e-05, + "loss": 2.1775, + "step": 11165 + }, + { + "epoch": 1.99, + "learning_rate": 4.338311111111111e-05, + "loss": 2.2032, + "step": 11170 + }, + { + "epoch": 1.99, + "learning_rate": 4.338014814814815e-05, + "loss": 2.1721, + "step": 11175 + }, + { + "epoch": 1.99, + "learning_rate": 4.337718518518519e-05, + "loss": 2.239, + "step": 11180 + }, + { + "epoch": 1.99, + "learning_rate": 4.337422222222222e-05, + "loss": 2.335, + "step": 11185 + }, + { + "epoch": 1.99, + "learning_rate": 4.337125925925926e-05, + "loss": 2.2095, + "step": 11190 + }, + { + "epoch": 1.99, + "learning_rate": 4.33682962962963e-05, + "loss": 2.3582, + "step": 11195 + }, + { + "epoch": 1.99, + "learning_rate": 4.3365333333333336e-05, + "loss": 2.2294, + "step": 11200 + }, + { + "epoch": 1.99, + "learning_rate": 4.336237037037037e-05, + "loss": 2.1753, + "step": 11205 + }, + { + "epoch": 1.99, + "learning_rate": 4.335940740740741e-05, + "loss": 2.3546, + "step": 11210 + }, + { + "epoch": 1.99, + "learning_rate": 4.3356444444444446e-05, + "loss": 2.2901, + "step": 11215 + }, + { + "epoch": 1.99, + "learning_rate": 4.3353481481481485e-05, + "loss": 2.1788, + "step": 11220 + }, + { + "epoch": 2.0, + "learning_rate": 4.335051851851852e-05, + "loss": 2.2092, + "step": 11225 + }, + { + "epoch": 2.0, + "learning_rate": 4.3347555555555556e-05, + "loss": 2.0376, + "step": 11230 + }, + { + "epoch": 2.0, + "learning_rate": 4.3344592592592595e-05, + "loss": 2.1752, + "step": 11235 + }, + { + "epoch": 2.0, + "learning_rate": 4.3341629629629633e-05, + "loss": 2.1688, + "step": 11240 + }, + { + "epoch": 2.0, + "learning_rate": 4.3338666666666666e-05, + "loss": 2.2212, + "step": 11245 + }, + { + "epoch": 2.0, + "learning_rate": 4.3335703703703704e-05, + "loss": 2.2072, + "step": 11250 + }, + { + "epoch": 2.0, + "learning_rate": 4.333274074074074e-05, + "loss": 2.0503, + "step": 11255 + }, + { + "epoch": 2.0, + "learning_rate": 4.332977777777778e-05, + "loss": 1.8864, + "step": 11260 + }, + { + "epoch": 2.0, + "learning_rate": 4.3326814814814814e-05, + "loss": 1.9188, + "step": 11265 + }, + { + "epoch": 2.0, + "learning_rate": 4.332385185185185e-05, + "loss": 2.1106, + "step": 11270 + }, + { + "epoch": 2.0, + "learning_rate": 4.332088888888889e-05, + "loss": 2.105, + "step": 11275 + }, + { + "epoch": 2.01, + "learning_rate": 4.331792592592593e-05, + "loss": 2.08, + "step": 11280 + }, + { + "epoch": 2.01, + "learning_rate": 4.331496296296296e-05, + "loss": 2.0219, + "step": 11285 + }, + { + "epoch": 2.01, + "learning_rate": 4.3312e-05, + "loss": 2.1002, + "step": 11290 + }, + { + "epoch": 2.01, + "learning_rate": 4.330903703703704e-05, + "loss": 2.1142, + "step": 11295 + }, + { + "epoch": 2.01, + "learning_rate": 4.330607407407408e-05, + "loss": 2.1429, + "step": 11300 + }, + { + "epoch": 2.01, + "learning_rate": 4.330311111111111e-05, + "loss": 1.9445, + "step": 11305 + }, + { + "epoch": 2.01, + "learning_rate": 4.330014814814815e-05, + "loss": 1.9515, + "step": 11310 + }, + { + "epoch": 2.01, + "learning_rate": 4.329718518518518e-05, + "loss": 2.2021, + "step": 11315 + }, + { + "epoch": 2.01, + "learning_rate": 4.329422222222223e-05, + "loss": 2.0118, + "step": 11320 + }, + { + "epoch": 2.01, + "learning_rate": 4.329125925925926e-05, + "loss": 2.0163, + "step": 11325 + }, + { + "epoch": 2.01, + "learning_rate": 4.32882962962963e-05, + "loss": 2.0078, + "step": 11330 + }, + { + "epoch": 2.02, + "learning_rate": 4.328533333333333e-05, + "loss": 2.0841, + "step": 11335 + }, + { + "epoch": 2.02, + "learning_rate": 4.3282370370370376e-05, + "loss": 2.0808, + "step": 11340 + }, + { + "epoch": 2.02, + "learning_rate": 4.327940740740741e-05, + "loss": 2.0091, + "step": 11345 + }, + { + "epoch": 2.02, + "learning_rate": 4.327644444444445e-05, + "loss": 2.1778, + "step": 11350 + }, + { + "epoch": 2.02, + "learning_rate": 4.327348148148148e-05, + "loss": 2.0817, + "step": 11355 + }, + { + "epoch": 2.02, + "learning_rate": 4.3270518518518525e-05, + "loss": 2.1222, + "step": 11360 + }, + { + "epoch": 2.02, + "learning_rate": 4.3267555555555557e-05, + "loss": 2.156, + "step": 11365 + }, + { + "epoch": 2.02, + "learning_rate": 4.3264592592592595e-05, + "loss": 2.0951, + "step": 11370 + }, + { + "epoch": 2.02, + "learning_rate": 4.326162962962963e-05, + "loss": 2.0884, + "step": 11375 + }, + { + "epoch": 2.02, + "learning_rate": 4.3258666666666666e-05, + "loss": 2.1757, + "step": 11380 + }, + { + "epoch": 2.02, + "learning_rate": 4.3255703703703705e-05, + "loss": 1.9914, + "step": 11385 + }, + { + "epoch": 2.02, + "learning_rate": 4.3252740740740744e-05, + "loss": 2.1065, + "step": 11390 + }, + { + "epoch": 2.03, + "learning_rate": 4.3249777777777776e-05, + "loss": 2.1114, + "step": 11395 + }, + { + "epoch": 2.03, + "learning_rate": 4.3246814814814815e-05, + "loss": 2.0487, + "step": 11400 + }, + { + "epoch": 2.03, + "learning_rate": 4.3243851851851854e-05, + "loss": 2.0324, + "step": 11405 + }, + { + "epoch": 2.03, + "learning_rate": 4.324088888888889e-05, + "loss": 2.0874, + "step": 11410 + }, + { + "epoch": 2.03, + "learning_rate": 4.3237925925925924e-05, + "loss": 1.9281, + "step": 11415 + }, + { + "epoch": 2.03, + "learning_rate": 4.323496296296296e-05, + "loss": 2.002, + "step": 11420 + }, + { + "epoch": 2.03, + "learning_rate": 4.3232e-05, + "loss": 1.9988, + "step": 11425 + }, + { + "epoch": 2.03, + "learning_rate": 4.322903703703704e-05, + "loss": 2.0591, + "step": 11430 + }, + { + "epoch": 2.03, + "learning_rate": 4.322607407407407e-05, + "loss": 2.2524, + "step": 11435 + }, + { + "epoch": 2.03, + "learning_rate": 4.322311111111111e-05, + "loss": 2.1542, + "step": 11440 + }, + { + "epoch": 2.03, + "learning_rate": 4.322014814814815e-05, + "loss": 2.1468, + "step": 11445 + }, + { + "epoch": 2.04, + "learning_rate": 4.321718518518519e-05, + "loss": 2.1932, + "step": 11450 + }, + { + "epoch": 2.04, + "learning_rate": 4.321422222222222e-05, + "loss": 1.9458, + "step": 11455 + }, + { + "epoch": 2.04, + "learning_rate": 4.321125925925926e-05, + "loss": 2.178, + "step": 11460 + }, + { + "epoch": 2.04, + "learning_rate": 4.32082962962963e-05, + "loss": 2.0837, + "step": 11465 + }, + { + "epoch": 2.04, + "learning_rate": 4.320533333333334e-05, + "loss": 2.0219, + "step": 11470 + }, + { + "epoch": 2.04, + "learning_rate": 4.320237037037037e-05, + "loss": 2.0984, + "step": 11475 + }, + { + "epoch": 2.04, + "learning_rate": 4.319940740740741e-05, + "loss": 2.0556, + "step": 11480 + }, + { + "epoch": 2.04, + "learning_rate": 4.319644444444445e-05, + "loss": 2.0566, + "step": 11485 + }, + { + "epoch": 2.04, + "learning_rate": 4.3193481481481487e-05, + "loss": 2.1313, + "step": 11490 + }, + { + "epoch": 2.04, + "learning_rate": 4.319051851851852e-05, + "loss": 2.1925, + "step": 11495 + }, + { + "epoch": 2.04, + "learning_rate": 4.318755555555556e-05, + "loss": 2.0852, + "step": 11500 + }, + { + "epoch": 2.05, + "learning_rate": 4.3184592592592596e-05, + "loss": 2.1343, + "step": 11505 + }, + { + "epoch": 2.05, + "learning_rate": 4.3181629629629635e-05, + "loss": 2.0709, + "step": 11510 + }, + { + "epoch": 2.05, + "learning_rate": 4.317866666666667e-05, + "loss": 2.0256, + "step": 11515 + }, + { + "epoch": 2.05, + "learning_rate": 4.3175703703703706e-05, + "loss": 2.0816, + "step": 11520 + }, + { + "epoch": 2.05, + "learning_rate": 4.3172740740740745e-05, + "loss": 2.1531, + "step": 11525 + }, + { + "epoch": 2.05, + "learning_rate": 4.3169777777777784e-05, + "loss": 2.1405, + "step": 11530 + }, + { + "epoch": 2.05, + "learning_rate": 4.3166814814814816e-05, + "loss": 2.0763, + "step": 11535 + }, + { + "epoch": 2.05, + "learning_rate": 4.3163851851851854e-05, + "loss": 2.1567, + "step": 11540 + }, + { + "epoch": 2.05, + "learning_rate": 4.3160888888888886e-05, + "loss": 2.0646, + "step": 11545 + }, + { + "epoch": 2.05, + "learning_rate": 4.315792592592593e-05, + "loss": 2.0905, + "step": 11550 + }, + { + "epoch": 2.05, + "learning_rate": 4.3154962962962964e-05, + "loss": 2.1443, + "step": 11555 + }, + { + "epoch": 2.06, + "learning_rate": 4.3152e-05, + "loss": 2.2026, + "step": 11560 + }, + { + "epoch": 2.06, + "learning_rate": 4.3149037037037035e-05, + "loss": 2.1737, + "step": 11565 + }, + { + "epoch": 2.06, + "learning_rate": 4.314607407407408e-05, + "loss": 2.0629, + "step": 11570 + }, + { + "epoch": 2.06, + "learning_rate": 4.314311111111111e-05, + "loss": 2.1468, + "step": 11575 + }, + { + "epoch": 2.06, + "learning_rate": 4.314014814814815e-05, + "loss": 2.0219, + "step": 11580 + }, + { + "epoch": 2.06, + "learning_rate": 4.3137185185185183e-05, + "loss": 2.117, + "step": 11585 + }, + { + "epoch": 2.06, + "learning_rate": 4.313422222222223e-05, + "loss": 2.0584, + "step": 11590 + }, + { + "epoch": 2.06, + "learning_rate": 4.313125925925926e-05, + "loss": 2.0323, + "step": 11595 + }, + { + "epoch": 2.06, + "learning_rate": 4.31282962962963e-05, + "loss": 2.0484, + "step": 11600 + }, + { + "epoch": 2.06, + "learning_rate": 4.312533333333333e-05, + "loss": 2.1403, + "step": 11605 + }, + { + "epoch": 2.06, + "learning_rate": 4.312237037037037e-05, + "loss": 2.0259, + "step": 11610 + }, + { + "epoch": 2.06, + "learning_rate": 4.311940740740741e-05, + "loss": 2.0897, + "step": 11615 + }, + { + "epoch": 2.07, + "learning_rate": 4.311644444444445e-05, + "loss": 2.2139, + "step": 11620 + }, + { + "epoch": 2.07, + "learning_rate": 4.311348148148148e-05, + "loss": 2.1341, + "step": 11625 + }, + { + "epoch": 2.07, + "learning_rate": 4.311051851851852e-05, + "loss": 2.0687, + "step": 11630 + }, + { + "epoch": 2.07, + "learning_rate": 4.310755555555556e-05, + "loss": 2.028, + "step": 11635 + }, + { + "epoch": 2.07, + "learning_rate": 4.31045925925926e-05, + "loss": 2.1242, + "step": 11640 + }, + { + "epoch": 2.07, + "learning_rate": 4.310162962962963e-05, + "loss": 2.0842, + "step": 11645 + }, + { + "epoch": 2.07, + "learning_rate": 4.309866666666667e-05, + "loss": 2.1029, + "step": 11650 + }, + { + "epoch": 2.07, + "learning_rate": 4.309570370370371e-05, + "loss": 2.1054, + "step": 11655 + }, + { + "epoch": 2.07, + "learning_rate": 4.3092740740740745e-05, + "loss": 2.0464, + "step": 11660 + }, + { + "epoch": 2.07, + "learning_rate": 4.308977777777778e-05, + "loss": 2.1943, + "step": 11665 + }, + { + "epoch": 2.07, + "learning_rate": 4.3086814814814816e-05, + "loss": 2.08, + "step": 11670 + }, + { + "epoch": 2.08, + "learning_rate": 4.3083851851851855e-05, + "loss": 2.1416, + "step": 11675 + }, + { + "epoch": 2.08, + "learning_rate": 4.3080888888888894e-05, + "loss": 2.0655, + "step": 11680 + }, + { + "epoch": 2.08, + "learning_rate": 4.3077925925925926e-05, + "loss": 2.0228, + "step": 11685 + }, + { + "epoch": 2.08, + "learning_rate": 4.3074962962962965e-05, + "loss": 2.0516, + "step": 11690 + }, + { + "epoch": 2.08, + "learning_rate": 4.3072000000000004e-05, + "loss": 2.0225, + "step": 11695 + }, + { + "epoch": 2.08, + "learning_rate": 4.306903703703704e-05, + "loss": 2.0841, + "step": 11700 + }, + { + "epoch": 2.08, + "learning_rate": 4.3066074074074075e-05, + "loss": 1.9781, + "step": 11705 + }, + { + "epoch": 2.08, + "learning_rate": 4.306311111111111e-05, + "loss": 1.9715, + "step": 11710 + }, + { + "epoch": 2.08, + "learning_rate": 4.306014814814815e-05, + "loss": 2.0729, + "step": 11715 + }, + { + "epoch": 2.08, + "learning_rate": 4.305718518518519e-05, + "loss": 2.0646, + "step": 11720 + }, + { + "epoch": 2.08, + "learning_rate": 4.305422222222222e-05, + "loss": 2.2353, + "step": 11725 + }, + { + "epoch": 2.09, + "learning_rate": 4.305125925925926e-05, + "loss": 2.0217, + "step": 11730 + }, + { + "epoch": 2.09, + "learning_rate": 4.30482962962963e-05, + "loss": 2.0151, + "step": 11735 + }, + { + "epoch": 2.09, + "learning_rate": 4.304533333333334e-05, + "loss": 2.1185, + "step": 11740 + }, + { + "epoch": 2.09, + "learning_rate": 4.304237037037037e-05, + "loss": 1.9818, + "step": 11745 + }, + { + "epoch": 2.09, + "learning_rate": 4.303940740740741e-05, + "loss": 2.0689, + "step": 11750 + }, + { + "epoch": 2.09, + "learning_rate": 4.303644444444445e-05, + "loss": 1.9555, + "step": 11755 + }, + { + "epoch": 2.09, + "learning_rate": 4.303348148148149e-05, + "loss": 2.1376, + "step": 11760 + }, + { + "epoch": 2.09, + "learning_rate": 4.303051851851852e-05, + "loss": 1.9713, + "step": 11765 + }, + { + "epoch": 2.09, + "learning_rate": 4.302755555555556e-05, + "loss": 2.082, + "step": 11770 + }, + { + "epoch": 2.09, + "learning_rate": 4.302459259259259e-05, + "loss": 2.056, + "step": 11775 + }, + { + "epoch": 2.09, + "learning_rate": 4.3021629629629637e-05, + "loss": 2.2239, + "step": 11780 + }, + { + "epoch": 2.1, + "learning_rate": 4.301866666666667e-05, + "loss": 2.1189, + "step": 11785 + }, + { + "epoch": 2.1, + "learning_rate": 4.301570370370371e-05, + "loss": 2.1475, + "step": 11790 + }, + { + "epoch": 2.1, + "learning_rate": 4.301274074074074e-05, + "loss": 2.1659, + "step": 11795 + }, + { + "epoch": 2.1, + "learning_rate": 4.3009777777777785e-05, + "loss": 1.982, + "step": 11800 + }, + { + "epoch": 2.1, + "learning_rate": 4.300681481481482e-05, + "loss": 2.2411, + "step": 11805 + }, + { + "epoch": 2.1, + "learning_rate": 4.3003851851851856e-05, + "loss": 2.1064, + "step": 11810 + }, + { + "epoch": 2.1, + "learning_rate": 4.300088888888889e-05, + "loss": 2.0269, + "step": 11815 + }, + { + "epoch": 2.1, + "learning_rate": 4.2997925925925934e-05, + "loss": 2.292, + "step": 11820 + }, + { + "epoch": 2.1, + "learning_rate": 4.2994962962962966e-05, + "loss": 1.9721, + "step": 11825 + }, + { + "epoch": 2.1, + "learning_rate": 4.2992000000000004e-05, + "loss": 2.0659, + "step": 11830 + }, + { + "epoch": 2.1, + "learning_rate": 4.2989037037037036e-05, + "loss": 1.9607, + "step": 11835 + }, + { + "epoch": 2.1, + "learning_rate": 4.2986074074074075e-05, + "loss": 2.1082, + "step": 11840 + }, + { + "epoch": 2.11, + "learning_rate": 4.2983111111111114e-05, + "loss": 2.0569, + "step": 11845 + }, + { + "epoch": 2.11, + "learning_rate": 4.298014814814815e-05, + "loss": 2.266, + "step": 11850 + }, + { + "epoch": 2.11, + "learning_rate": 4.2977185185185185e-05, + "loss": 2.117, + "step": 11855 + }, + { + "epoch": 2.11, + "learning_rate": 4.2974222222222224e-05, + "loss": 2.0836, + "step": 11860 + }, + { + "epoch": 2.11, + "learning_rate": 4.297125925925926e-05, + "loss": 2.057, + "step": 11865 + }, + { + "epoch": 2.11, + "learning_rate": 4.29682962962963e-05, + "loss": 2.0447, + "step": 11870 + }, + { + "epoch": 2.11, + "learning_rate": 4.2965333333333334e-05, + "loss": 2.163, + "step": 11875 + }, + { + "epoch": 2.11, + "learning_rate": 4.296237037037037e-05, + "loss": 2.0831, + "step": 11880 + }, + { + "epoch": 2.11, + "learning_rate": 4.295940740740741e-05, + "loss": 2.1052, + "step": 11885 + }, + { + "epoch": 2.11, + "learning_rate": 4.295644444444445e-05, + "loss": 2.0914, + "step": 11890 + }, + { + "epoch": 2.11, + "learning_rate": 4.295348148148148e-05, + "loss": 2.0496, + "step": 11895 + }, + { + "epoch": 2.12, + "learning_rate": 4.295051851851852e-05, + "loss": 2.0744, + "step": 11900 + }, + { + "epoch": 2.12, + "learning_rate": 4.294755555555556e-05, + "loss": 2.2027, + "step": 11905 + }, + { + "epoch": 2.12, + "learning_rate": 4.29445925925926e-05, + "loss": 2.0048, + "step": 11910 + }, + { + "epoch": 2.12, + "learning_rate": 4.294162962962963e-05, + "loss": 2.0773, + "step": 11915 + }, + { + "epoch": 2.12, + "learning_rate": 4.293866666666667e-05, + "loss": 2.0863, + "step": 11920 + }, + { + "epoch": 2.12, + "learning_rate": 4.293570370370371e-05, + "loss": 1.9061, + "step": 11925 + }, + { + "epoch": 2.12, + "learning_rate": 4.293274074074074e-05, + "loss": 2.0572, + "step": 11930 + }, + { + "epoch": 2.12, + "learning_rate": 4.292977777777778e-05, + "loss": 2.1648, + "step": 11935 + }, + { + "epoch": 2.12, + "learning_rate": 4.292681481481481e-05, + "loss": 2.0614, + "step": 11940 + }, + { + "epoch": 2.12, + "learning_rate": 4.292385185185186e-05, + "loss": 1.9902, + "step": 11945 + }, + { + "epoch": 2.12, + "learning_rate": 4.292088888888889e-05, + "loss": 2.1492, + "step": 11950 + }, + { + "epoch": 2.13, + "learning_rate": 4.291792592592593e-05, + "loss": 1.9929, + "step": 11955 + }, + { + "epoch": 2.13, + "learning_rate": 4.291496296296296e-05, + "loss": 2.0434, + "step": 11960 + }, + { + "epoch": 2.13, + "learning_rate": 4.2912000000000005e-05, + "loss": 2.0688, + "step": 11965 + }, + { + "epoch": 2.13, + "learning_rate": 4.290903703703704e-05, + "loss": 2.0072, + "step": 11970 + }, + { + "epoch": 2.13, + "learning_rate": 4.2906074074074076e-05, + "loss": 2.0436, + "step": 11975 + }, + { + "epoch": 2.13, + "learning_rate": 4.290311111111111e-05, + "loss": 2.2308, + "step": 11980 + }, + { + "epoch": 2.13, + "learning_rate": 4.2900148148148154e-05, + "loss": 2.0936, + "step": 11985 + }, + { + "epoch": 2.13, + "learning_rate": 4.2897185185185186e-05, + "loss": 2.1122, + "step": 11990 + }, + { + "epoch": 2.13, + "learning_rate": 4.2894222222222225e-05, + "loss": 2.13, + "step": 11995 + }, + { + "epoch": 2.13, + "learning_rate": 4.289125925925926e-05, + "loss": 2.1069, + "step": 12000 + }, + { + "epoch": 2.13, + "learning_rate": 4.2888296296296295e-05, + "loss": 2.1227, + "step": 12005 + }, + { + "epoch": 2.14, + "learning_rate": 4.2885333333333334e-05, + "loss": 2.134, + "step": 12010 + }, + { + "epoch": 2.14, + "learning_rate": 4.288237037037037e-05, + "loss": 2.0549, + "step": 12015 + }, + { + "epoch": 2.14, + "learning_rate": 4.2879407407407405e-05, + "loss": 2.1679, + "step": 12020 + }, + { + "epoch": 2.14, + "learning_rate": 4.2876444444444444e-05, + "loss": 2.1453, + "step": 12025 + }, + { + "epoch": 2.14, + "learning_rate": 4.287348148148148e-05, + "loss": 1.9335, + "step": 12030 + }, + { + "epoch": 2.14, + "learning_rate": 4.287051851851852e-05, + "loss": 1.9928, + "step": 12035 + }, + { + "epoch": 2.14, + "learning_rate": 4.2867555555555554e-05, + "loss": 2.0942, + "step": 12040 + }, + { + "epoch": 2.14, + "learning_rate": 4.286459259259259e-05, + "loss": 2.0688, + "step": 12045 + }, + { + "epoch": 2.14, + "learning_rate": 4.286162962962963e-05, + "loss": 2.0388, + "step": 12050 + }, + { + "epoch": 2.14, + "learning_rate": 4.285866666666667e-05, + "loss": 2.1614, + "step": 12055 + }, + { + "epoch": 2.14, + "learning_rate": 4.28557037037037e-05, + "loss": 2.0595, + "step": 12060 + }, + { + "epoch": 2.14, + "learning_rate": 4.285274074074074e-05, + "loss": 2.0144, + "step": 12065 + }, + { + "epoch": 2.15, + "learning_rate": 4.284977777777778e-05, + "loss": 2.2383, + "step": 12070 + }, + { + "epoch": 2.15, + "learning_rate": 4.284681481481482e-05, + "loss": 1.941, + "step": 12075 + }, + { + "epoch": 2.15, + "learning_rate": 4.284385185185185e-05, + "loss": 2.121, + "step": 12080 + }, + { + "epoch": 2.15, + "learning_rate": 4.284088888888889e-05, + "loss": 2.0096, + "step": 12085 + }, + { + "epoch": 2.15, + "learning_rate": 4.283792592592593e-05, + "loss": 2.0464, + "step": 12090 + }, + { + "epoch": 2.15, + "learning_rate": 4.283496296296297e-05, + "loss": 2.0906, + "step": 12095 + }, + { + "epoch": 2.15, + "learning_rate": 4.2832e-05, + "loss": 2.0343, + "step": 12100 + }, + { + "epoch": 2.15, + "learning_rate": 4.282903703703704e-05, + "loss": 2.0857, + "step": 12105 + }, + { + "epoch": 2.15, + "learning_rate": 4.282607407407408e-05, + "loss": 2.1547, + "step": 12110 + }, + { + "epoch": 2.15, + "learning_rate": 4.2823111111111116e-05, + "loss": 2.161, + "step": 12115 + }, + { + "epoch": 2.15, + "learning_rate": 4.282014814814815e-05, + "loss": 2.1047, + "step": 12120 + }, + { + "epoch": 2.16, + "learning_rate": 4.2817185185185187e-05, + "loss": 2.1955, + "step": 12125 + }, + { + "epoch": 2.16, + "learning_rate": 4.2814222222222225e-05, + "loss": 2.069, + "step": 12130 + }, + { + "epoch": 2.16, + "learning_rate": 4.2811259259259264e-05, + "loss": 2.0278, + "step": 12135 + }, + { + "epoch": 2.16, + "learning_rate": 4.2808296296296296e-05, + "loss": 1.9195, + "step": 12140 + }, + { + "epoch": 2.16, + "learning_rate": 4.2805333333333335e-05, + "loss": 1.9381, + "step": 12145 + }, + { + "epoch": 2.16, + "learning_rate": 4.280237037037037e-05, + "loss": 2.1714, + "step": 12150 + }, + { + "epoch": 2.16, + "learning_rate": 4.279940740740741e-05, + "loss": 2.099, + "step": 12155 + }, + { + "epoch": 2.16, + "learning_rate": 4.2796444444444445e-05, + "loss": 2.0179, + "step": 12160 + }, + { + "epoch": 2.16, + "learning_rate": 4.2793481481481484e-05, + "loss": 2.0875, + "step": 12165 + }, + { + "epoch": 2.16, + "learning_rate": 4.2790518518518516e-05, + "loss": 2.1294, + "step": 12170 + }, + { + "epoch": 2.16, + "learning_rate": 4.278755555555556e-05, + "loss": 2.0409, + "step": 12175 + }, + { + "epoch": 2.17, + "learning_rate": 4.278459259259259e-05, + "loss": 2.0467, + "step": 12180 + }, + { + "epoch": 2.17, + "learning_rate": 4.278162962962963e-05, + "loss": 2.0565, + "step": 12185 + }, + { + "epoch": 2.17, + "learning_rate": 4.2778666666666664e-05, + "loss": 2.0773, + "step": 12190 + }, + { + "epoch": 2.17, + "learning_rate": 4.277570370370371e-05, + "loss": 2.0711, + "step": 12195 + }, + { + "epoch": 2.17, + "learning_rate": 4.277274074074074e-05, + "loss": 2.0719, + "step": 12200 + }, + { + "epoch": 2.17, + "learning_rate": 4.276977777777778e-05, + "loss": 1.9875, + "step": 12205 + }, + { + "epoch": 2.17, + "learning_rate": 4.276681481481481e-05, + "loss": 2.0617, + "step": 12210 + }, + { + "epoch": 2.17, + "learning_rate": 4.276385185185186e-05, + "loss": 1.9577, + "step": 12215 + }, + { + "epoch": 2.17, + "learning_rate": 4.276088888888889e-05, + "loss": 2.0032, + "step": 12220 + }, + { + "epoch": 2.17, + "learning_rate": 4.275792592592593e-05, + "loss": 2.0531, + "step": 12225 + }, + { + "epoch": 2.17, + "learning_rate": 4.275496296296296e-05, + "loss": 2.0201, + "step": 12230 + }, + { + "epoch": 2.18, + "learning_rate": 4.2752e-05, + "loss": 2.2203, + "step": 12235 + }, + { + "epoch": 2.18, + "learning_rate": 4.274903703703704e-05, + "loss": 2.1142, + "step": 12240 + }, + { + "epoch": 2.18, + "learning_rate": 4.274607407407408e-05, + "loss": 2.145, + "step": 12245 + }, + { + "epoch": 2.18, + "learning_rate": 4.274311111111111e-05, + "loss": 2.0741, + "step": 12250 + }, + { + "epoch": 2.18, + "learning_rate": 4.274014814814815e-05, + "loss": 2.1207, + "step": 12255 + }, + { + "epoch": 2.18, + "learning_rate": 4.273718518518519e-05, + "loss": 2.0753, + "step": 12260 + }, + { + "epoch": 2.18, + "learning_rate": 4.2734222222222226e-05, + "loss": 2.0867, + "step": 12265 + }, + { + "epoch": 2.18, + "learning_rate": 4.273125925925926e-05, + "loss": 2.1496, + "step": 12270 + }, + { + "epoch": 2.18, + "learning_rate": 4.27282962962963e-05, + "loss": 2.1154, + "step": 12275 + }, + { + "epoch": 2.18, + "learning_rate": 4.2725333333333336e-05, + "loss": 2.1221, + "step": 12280 + }, + { + "epoch": 2.18, + "learning_rate": 4.2722370370370375e-05, + "loss": 2.0638, + "step": 12285 + }, + { + "epoch": 2.18, + "learning_rate": 4.271940740740741e-05, + "loss": 2.0265, + "step": 12290 + }, + { + "epoch": 2.19, + "learning_rate": 4.2716444444444446e-05, + "loss": 2.1495, + "step": 12295 + }, + { + "epoch": 2.19, + "learning_rate": 4.2713481481481484e-05, + "loss": 1.947, + "step": 12300 + }, + { + "epoch": 2.19, + "learning_rate": 4.271051851851852e-05, + "loss": 2.1065, + "step": 12305 + }, + { + "epoch": 2.19, + "learning_rate": 4.2707555555555555e-05, + "loss": 2.0491, + "step": 12310 + }, + { + "epoch": 2.19, + "learning_rate": 4.2704592592592594e-05, + "loss": 2.1002, + "step": 12315 + }, + { + "epoch": 2.19, + "learning_rate": 4.270162962962963e-05, + "loss": 2.1485, + "step": 12320 + }, + { + "epoch": 2.19, + "learning_rate": 4.269866666666667e-05, + "loss": 1.931, + "step": 12325 + }, + { + "epoch": 2.19, + "learning_rate": 4.2695703703703704e-05, + "loss": 2.0992, + "step": 12330 + }, + { + "epoch": 2.19, + "learning_rate": 4.269274074074074e-05, + "loss": 2.005, + "step": 12335 + }, + { + "epoch": 2.19, + "learning_rate": 4.268977777777778e-05, + "loss": 2.1412, + "step": 12340 + }, + { + "epoch": 2.19, + "learning_rate": 4.268681481481482e-05, + "loss": 2.0787, + "step": 12345 + }, + { + "epoch": 2.2, + "learning_rate": 4.268385185185185e-05, + "loss": 2.1672, + "step": 12350 + }, + { + "epoch": 2.2, + "learning_rate": 4.268088888888889e-05, + "loss": 2.0586, + "step": 12355 + }, + { + "epoch": 2.2, + "learning_rate": 4.267792592592593e-05, + "loss": 2.1221, + "step": 12360 + }, + { + "epoch": 2.2, + "learning_rate": 4.267496296296297e-05, + "loss": 2.0877, + "step": 12365 + }, + { + "epoch": 2.2, + "learning_rate": 4.2672e-05, + "loss": 2.1301, + "step": 12370 + }, + { + "epoch": 2.2, + "learning_rate": 4.266903703703704e-05, + "loss": 1.9854, + "step": 12375 + }, + { + "epoch": 2.2, + "learning_rate": 4.266607407407407e-05, + "loss": 1.9394, + "step": 12380 + }, + { + "epoch": 2.2, + "learning_rate": 4.266311111111112e-05, + "loss": 2.1236, + "step": 12385 + }, + { + "epoch": 2.2, + "learning_rate": 4.266014814814815e-05, + "loss": 2.0078, + "step": 12390 + }, + { + "epoch": 2.2, + "learning_rate": 4.265718518518519e-05, + "loss": 2.1959, + "step": 12395 + }, + { + "epoch": 2.2, + "learning_rate": 4.265422222222222e-05, + "loss": 1.9486, + "step": 12400 + }, + { + "epoch": 2.21, + "learning_rate": 4.2651259259259266e-05, + "loss": 2.1441, + "step": 12405 + }, + { + "epoch": 2.21, + "learning_rate": 4.26482962962963e-05, + "loss": 2.1024, + "step": 12410 + }, + { + "epoch": 2.21, + "learning_rate": 4.2645333333333337e-05, + "loss": 2.0716, + "step": 12415 + }, + { + "epoch": 2.21, + "learning_rate": 4.264237037037037e-05, + "loss": 1.9934, + "step": 12420 + }, + { + "epoch": 2.21, + "learning_rate": 4.2639407407407414e-05, + "loss": 2.0877, + "step": 12425 + }, + { + "epoch": 2.21, + "learning_rate": 4.2636444444444446e-05, + "loss": 2.0199, + "step": 12430 + }, + { + "epoch": 2.21, + "learning_rate": 4.2633481481481485e-05, + "loss": 2.1148, + "step": 12435 + }, + { + "epoch": 2.21, + "learning_rate": 4.263051851851852e-05, + "loss": 1.9948, + "step": 12440 + }, + { + "epoch": 2.21, + "learning_rate": 4.262755555555556e-05, + "loss": 2.2159, + "step": 12445 + }, + { + "epoch": 2.21, + "learning_rate": 4.2624592592592595e-05, + "loss": 2.1202, + "step": 12450 + }, + { + "epoch": 2.21, + "learning_rate": 4.2621629629629634e-05, + "loss": 2.1747, + "step": 12455 + }, + { + "epoch": 2.22, + "learning_rate": 4.2618666666666666e-05, + "loss": 2.0513, + "step": 12460 + }, + { + "epoch": 2.22, + "learning_rate": 4.2615703703703704e-05, + "loss": 2.0114, + "step": 12465 + }, + { + "epoch": 2.22, + "learning_rate": 4.261274074074074e-05, + "loss": 2.1077, + "step": 12470 + }, + { + "epoch": 2.22, + "learning_rate": 4.260977777777778e-05, + "loss": 2.0394, + "step": 12475 + }, + { + "epoch": 2.22, + "learning_rate": 4.2606814814814814e-05, + "loss": 1.997, + "step": 12480 + }, + { + "epoch": 2.22, + "learning_rate": 4.260385185185185e-05, + "loss": 2.022, + "step": 12485 + }, + { + "epoch": 2.22, + "learning_rate": 4.260088888888889e-05, + "loss": 2.0506, + "step": 12490 + }, + { + "epoch": 2.22, + "learning_rate": 4.259792592592593e-05, + "loss": 2.0418, + "step": 12495 + }, + { + "epoch": 2.22, + "learning_rate": 4.259496296296296e-05, + "loss": 2.1871, + "step": 12500 + }, + { + "epoch": 2.22, + "learning_rate": 4.2592e-05, + "loss": 2.1347, + "step": 12505 + }, + { + "epoch": 2.22, + "learning_rate": 4.258903703703704e-05, + "loss": 1.8785, + "step": 12510 + }, + { + "epoch": 2.22, + "learning_rate": 4.258607407407408e-05, + "loss": 2.0166, + "step": 12515 + }, + { + "epoch": 2.23, + "learning_rate": 4.258311111111111e-05, + "loss": 2.1569, + "step": 12520 + }, + { + "epoch": 2.23, + "learning_rate": 4.258014814814815e-05, + "loss": 2.0294, + "step": 12525 + }, + { + "epoch": 2.23, + "learning_rate": 4.257718518518519e-05, + "loss": 2.0627, + "step": 12530 + }, + { + "epoch": 2.23, + "learning_rate": 4.257422222222223e-05, + "loss": 2.0042, + "step": 12535 + }, + { + "epoch": 2.23, + "learning_rate": 4.257125925925926e-05, + "loss": 2.0411, + "step": 12540 + }, + { + "epoch": 2.23, + "learning_rate": 4.25682962962963e-05, + "loss": 2.1107, + "step": 12545 + }, + { + "epoch": 2.23, + "learning_rate": 4.256533333333334e-05, + "loss": 2.0758, + "step": 12550 + }, + { + "epoch": 2.23, + "learning_rate": 4.2562370370370376e-05, + "loss": 2.2247, + "step": 12555 + }, + { + "epoch": 2.23, + "learning_rate": 4.255940740740741e-05, + "loss": 1.9923, + "step": 12560 + }, + { + "epoch": 2.23, + "learning_rate": 4.255644444444445e-05, + "loss": 2.0701, + "step": 12565 + }, + { + "epoch": 2.23, + "learning_rate": 4.2553481481481486e-05, + "loss": 2.0196, + "step": 12570 + }, + { + "epoch": 2.24, + "learning_rate": 4.2550518518518525e-05, + "loss": 2.1428, + "step": 12575 + }, + { + "epoch": 2.24, + "learning_rate": 4.254755555555556e-05, + "loss": 2.0243, + "step": 12580 + }, + { + "epoch": 2.24, + "learning_rate": 4.2544592592592596e-05, + "loss": 2.1269, + "step": 12585 + }, + { + "epoch": 2.24, + "learning_rate": 4.2541629629629634e-05, + "loss": 2.0801, + "step": 12590 + }, + { + "epoch": 2.24, + "learning_rate": 4.253866666666667e-05, + "loss": 2.0329, + "step": 12595 + }, + { + "epoch": 2.24, + "learning_rate": 4.2535703703703705e-05, + "loss": 2.1521, + "step": 12600 + }, + { + "epoch": 2.24, + "learning_rate": 4.2532740740740744e-05, + "loss": 2.0046, + "step": 12605 + }, + { + "epoch": 2.24, + "learning_rate": 4.252977777777778e-05, + "loss": 1.9771, + "step": 12610 + }, + { + "epoch": 2.24, + "learning_rate": 4.252681481481482e-05, + "loss": 2.0754, + "step": 12615 + }, + { + "epoch": 2.24, + "learning_rate": 4.2523851851851854e-05, + "loss": 2.111, + "step": 12620 + }, + { + "epoch": 2.24, + "learning_rate": 4.252088888888889e-05, + "loss": 1.9362, + "step": 12625 + }, + { + "epoch": 2.25, + "learning_rate": 4.2517925925925925e-05, + "loss": 1.9751, + "step": 12630 + }, + { + "epoch": 2.25, + "learning_rate": 4.251496296296297e-05, + "loss": 1.8958, + "step": 12635 + }, + { + "epoch": 2.25, + "learning_rate": 4.2512e-05, + "loss": 2.045, + "step": 12640 + }, + { + "epoch": 2.25, + "learning_rate": 4.250903703703704e-05, + "loss": 1.9897, + "step": 12645 + }, + { + "epoch": 2.25, + "learning_rate": 4.250607407407407e-05, + "loss": 1.9933, + "step": 12650 + }, + { + "epoch": 2.25, + "learning_rate": 4.250311111111112e-05, + "loss": 2.1026, + "step": 12655 + }, + { + "epoch": 2.25, + "learning_rate": 4.250014814814815e-05, + "loss": 2.1265, + "step": 12660 + }, + { + "epoch": 2.25, + "learning_rate": 4.249718518518519e-05, + "loss": 1.9803, + "step": 12665 + }, + { + "epoch": 2.25, + "learning_rate": 4.249422222222222e-05, + "loss": 2.1717, + "step": 12670 + }, + { + "epoch": 2.25, + "learning_rate": 4.249125925925927e-05, + "loss": 2.1805, + "step": 12675 + }, + { + "epoch": 2.25, + "learning_rate": 4.24882962962963e-05, + "loss": 2.0657, + "step": 12680 + }, + { + "epoch": 2.26, + "learning_rate": 4.248533333333334e-05, + "loss": 2.0095, + "step": 12685 + }, + { + "epoch": 2.26, + "learning_rate": 4.248237037037037e-05, + "loss": 2.1996, + "step": 12690 + }, + { + "epoch": 2.26, + "learning_rate": 4.247940740740741e-05, + "loss": 2.1165, + "step": 12695 + }, + { + "epoch": 2.26, + "learning_rate": 4.247644444444445e-05, + "loss": 2.1073, + "step": 12700 + }, + { + "epoch": 2.26, + "learning_rate": 4.247348148148148e-05, + "loss": 1.894, + "step": 12705 + }, + { + "epoch": 2.26, + "learning_rate": 4.247051851851852e-05, + "loss": 2.2013, + "step": 12710 + }, + { + "epoch": 2.26, + "learning_rate": 4.246755555555556e-05, + "loss": 2.0435, + "step": 12715 + }, + { + "epoch": 2.26, + "learning_rate": 4.2464592592592596e-05, + "loss": 2.2429, + "step": 12720 + }, + { + "epoch": 2.26, + "learning_rate": 4.246162962962963e-05, + "loss": 2.0785, + "step": 12725 + }, + { + "epoch": 2.26, + "learning_rate": 4.245866666666667e-05, + "loss": 2.0471, + "step": 12730 + }, + { + "epoch": 2.26, + "learning_rate": 4.2455703703703706e-05, + "loss": 1.8792, + "step": 12735 + }, + { + "epoch": 2.26, + "learning_rate": 4.2452740740740745e-05, + "loss": 2.0019, + "step": 12740 + }, + { + "epoch": 2.27, + "learning_rate": 4.244977777777778e-05, + "loss": 2.1038, + "step": 12745 + }, + { + "epoch": 2.27, + "learning_rate": 4.2446814814814816e-05, + "loss": 1.9694, + "step": 12750 + }, + { + "epoch": 2.27, + "learning_rate": 4.2443851851851855e-05, + "loss": 2.0658, + "step": 12755 + }, + { + "epoch": 2.27, + "learning_rate": 4.244088888888889e-05, + "loss": 2.1507, + "step": 12760 + }, + { + "epoch": 2.27, + "learning_rate": 4.2437925925925925e-05, + "loss": 2.1381, + "step": 12765 + }, + { + "epoch": 2.27, + "learning_rate": 4.2434962962962964e-05, + "loss": 2.1264, + "step": 12770 + }, + { + "epoch": 2.27, + "learning_rate": 4.2431999999999996e-05, + "loss": 2.0483, + "step": 12775 + }, + { + "epoch": 2.27, + "learning_rate": 4.242903703703704e-05, + "loss": 2.1985, + "step": 12780 + }, + { + "epoch": 2.27, + "learning_rate": 4.2426074074074074e-05, + "loss": 2.1627, + "step": 12785 + }, + { + "epoch": 2.27, + "learning_rate": 4.242311111111111e-05, + "loss": 2.0331, + "step": 12790 + }, + { + "epoch": 2.27, + "learning_rate": 4.2420148148148145e-05, + "loss": 2.0645, + "step": 12795 + }, + { + "epoch": 2.28, + "learning_rate": 4.241718518518519e-05, + "loss": 2.0922, + "step": 12800 + }, + { + "epoch": 2.28, + "learning_rate": 4.241422222222222e-05, + "loss": 1.9673, + "step": 12805 + }, + { + "epoch": 2.28, + "learning_rate": 4.241125925925926e-05, + "loss": 1.9481, + "step": 12810 + }, + { + "epoch": 2.28, + "learning_rate": 4.240829629629629e-05, + "loss": 2.0728, + "step": 12815 + }, + { + "epoch": 2.28, + "learning_rate": 4.240533333333334e-05, + "loss": 2.001, + "step": 12820 + }, + { + "epoch": 2.28, + "learning_rate": 4.240237037037037e-05, + "loss": 2.2028, + "step": 12825 + }, + { + "epoch": 2.28, + "learning_rate": 4.239940740740741e-05, + "loss": 2.0098, + "step": 12830 + }, + { + "epoch": 2.28, + "learning_rate": 4.239644444444444e-05, + "loss": 1.9691, + "step": 12835 + }, + { + "epoch": 2.28, + "learning_rate": 4.239348148148149e-05, + "loss": 1.9647, + "step": 12840 + }, + { + "epoch": 2.28, + "learning_rate": 4.239051851851852e-05, + "loss": 2.016, + "step": 12845 + }, + { + "epoch": 2.28, + "learning_rate": 4.238755555555556e-05, + "loss": 2.0025, + "step": 12850 + }, + { + "epoch": 2.29, + "learning_rate": 4.238459259259259e-05, + "loss": 2.042, + "step": 12855 + }, + { + "epoch": 2.29, + "learning_rate": 4.238162962962963e-05, + "loss": 2.0047, + "step": 12860 + }, + { + "epoch": 2.29, + "learning_rate": 4.237866666666667e-05, + "loss": 2.1752, + "step": 12865 + }, + { + "epoch": 2.29, + "learning_rate": 4.237570370370371e-05, + "loss": 2.0031, + "step": 12870 + }, + { + "epoch": 2.29, + "learning_rate": 4.237274074074074e-05, + "loss": 2.0408, + "step": 12875 + }, + { + "epoch": 2.29, + "learning_rate": 4.236977777777778e-05, + "loss": 2.1455, + "step": 12880 + }, + { + "epoch": 2.29, + "learning_rate": 4.2366814814814816e-05, + "loss": 2.1129, + "step": 12885 + }, + { + "epoch": 2.29, + "learning_rate": 4.2363851851851855e-05, + "loss": 2.1005, + "step": 12890 + }, + { + "epoch": 2.29, + "learning_rate": 4.236088888888889e-05, + "loss": 1.9708, + "step": 12895 + }, + { + "epoch": 2.29, + "learning_rate": 4.2357925925925926e-05, + "loss": 2.1866, + "step": 12900 + }, + { + "epoch": 2.29, + "learning_rate": 4.2354962962962965e-05, + "loss": 1.9787, + "step": 12905 + }, + { + "epoch": 2.3, + "learning_rate": 4.2352000000000004e-05, + "loss": 2.133, + "step": 12910 + }, + { + "epoch": 2.3, + "learning_rate": 4.2349037037037036e-05, + "loss": 2.1486, + "step": 12915 + }, + { + "epoch": 2.3, + "learning_rate": 4.2346074074074075e-05, + "loss": 2.0984, + "step": 12920 + }, + { + "epoch": 2.3, + "learning_rate": 4.2343111111111113e-05, + "loss": 2.0902, + "step": 12925 + }, + { + "epoch": 2.3, + "learning_rate": 4.234014814814815e-05, + "loss": 2.0028, + "step": 12930 + }, + { + "epoch": 2.3, + "learning_rate": 4.2337185185185184e-05, + "loss": 2.0839, + "step": 12935 + }, + { + "epoch": 2.3, + "learning_rate": 4.233422222222222e-05, + "loss": 2.0416, + "step": 12940 + }, + { + "epoch": 2.3, + "learning_rate": 4.233125925925926e-05, + "loss": 2.0289, + "step": 12945 + }, + { + "epoch": 2.3, + "learning_rate": 4.23282962962963e-05, + "loss": 2.061, + "step": 12950 + }, + { + "epoch": 2.3, + "learning_rate": 4.232533333333333e-05, + "loss": 2.0404, + "step": 12955 + }, + { + "epoch": 2.3, + "learning_rate": 4.232237037037037e-05, + "loss": 2.0707, + "step": 12960 + }, + { + "epoch": 2.3, + "learning_rate": 4.231940740740741e-05, + "loss": 2.1211, + "step": 12965 + }, + { + "epoch": 2.31, + "learning_rate": 4.231644444444445e-05, + "loss": 2.1527, + "step": 12970 + }, + { + "epoch": 2.31, + "learning_rate": 4.231348148148148e-05, + "loss": 2.0934, + "step": 12975 + }, + { + "epoch": 2.31, + "learning_rate": 4.231051851851852e-05, + "loss": 2.0904, + "step": 12980 + }, + { + "epoch": 2.31, + "learning_rate": 4.230755555555556e-05, + "loss": 1.9881, + "step": 12985 + }, + { + "epoch": 2.31, + "learning_rate": 4.23045925925926e-05, + "loss": 2.1066, + "step": 12990 + }, + { + "epoch": 2.31, + "learning_rate": 4.230162962962963e-05, + "loss": 2.0826, + "step": 12995 + }, + { + "epoch": 2.31, + "learning_rate": 4.229866666666667e-05, + "loss": 2.1241, + "step": 13000 + }, + { + "epoch": 2.31, + "learning_rate": 4.22957037037037e-05, + "loss": 2.2448, + "step": 13005 + }, + { + "epoch": 2.31, + "learning_rate": 4.2292740740740746e-05, + "loss": 2.0667, + "step": 13010 + }, + { + "epoch": 2.31, + "learning_rate": 4.228977777777778e-05, + "loss": 1.9399, + "step": 13015 + }, + { + "epoch": 2.31, + "learning_rate": 4.228681481481482e-05, + "loss": 2.2072, + "step": 13020 + }, + { + "epoch": 2.32, + "learning_rate": 4.228385185185185e-05, + "loss": 2.1444, + "step": 13025 + }, + { + "epoch": 2.32, + "learning_rate": 4.2280888888888895e-05, + "loss": 2.0552, + "step": 13030 + }, + { + "epoch": 2.32, + "learning_rate": 4.227792592592593e-05, + "loss": 2.0112, + "step": 13035 + }, + { + "epoch": 2.32, + "learning_rate": 4.2274962962962966e-05, + "loss": 2.1551, + "step": 13040 + }, + { + "epoch": 2.32, + "learning_rate": 4.2272e-05, + "loss": 2.1172, + "step": 13045 + }, + { + "epoch": 2.32, + "learning_rate": 4.226903703703704e-05, + "loss": 2.1369, + "step": 13050 + }, + { + "epoch": 2.32, + "learning_rate": 4.2266074074074075e-05, + "loss": 2.0632, + "step": 13055 + }, + { + "epoch": 2.32, + "learning_rate": 4.2263111111111114e-05, + "loss": 2.2058, + "step": 13060 + }, + { + "epoch": 2.32, + "learning_rate": 4.2260148148148146e-05, + "loss": 2.1546, + "step": 13065 + }, + { + "epoch": 2.32, + "learning_rate": 4.225718518518519e-05, + "loss": 2.1509, + "step": 13070 + }, + { + "epoch": 2.32, + "learning_rate": 4.2254222222222224e-05, + "loss": 2.0703, + "step": 13075 + }, + { + "epoch": 2.33, + "learning_rate": 4.225125925925926e-05, + "loss": 2.1884, + "step": 13080 + }, + { + "epoch": 2.33, + "learning_rate": 4.2248296296296295e-05, + "loss": 2.1389, + "step": 13085 + }, + { + "epoch": 2.33, + "learning_rate": 4.2245333333333334e-05, + "loss": 2.124, + "step": 13090 + }, + { + "epoch": 2.33, + "learning_rate": 4.224237037037037e-05, + "loss": 2.1039, + "step": 13095 + }, + { + "epoch": 2.33, + "learning_rate": 4.223940740740741e-05, + "loss": 2.0245, + "step": 13100 + }, + { + "epoch": 2.33, + "learning_rate": 4.223644444444444e-05, + "loss": 2.1605, + "step": 13105 + }, + { + "epoch": 2.33, + "learning_rate": 4.223348148148148e-05, + "loss": 1.9646, + "step": 13110 + }, + { + "epoch": 2.33, + "learning_rate": 4.223051851851852e-05, + "loss": 2.1198, + "step": 13115 + }, + { + "epoch": 2.33, + "learning_rate": 4.222755555555556e-05, + "loss": 2.0129, + "step": 13120 + }, + { + "epoch": 2.33, + "learning_rate": 4.222459259259259e-05, + "loss": 2.1546, + "step": 13125 + }, + { + "epoch": 2.33, + "learning_rate": 4.222162962962963e-05, + "loss": 2.0764, + "step": 13130 + }, + { + "epoch": 2.34, + "learning_rate": 4.221866666666667e-05, + "loss": 2.049, + "step": 13135 + }, + { + "epoch": 2.34, + "learning_rate": 4.221570370370371e-05, + "loss": 2.1184, + "step": 13140 + }, + { + "epoch": 2.34, + "learning_rate": 4.221274074074074e-05, + "loss": 2.0669, + "step": 13145 + }, + { + "epoch": 2.34, + "learning_rate": 4.220977777777778e-05, + "loss": 2.1905, + "step": 13150 + }, + { + "epoch": 2.34, + "learning_rate": 4.220681481481482e-05, + "loss": 2.2068, + "step": 13155 + }, + { + "epoch": 2.34, + "learning_rate": 4.220385185185186e-05, + "loss": 2.1641, + "step": 13160 + }, + { + "epoch": 2.34, + "learning_rate": 4.220088888888889e-05, + "loss": 1.8973, + "step": 13165 + }, + { + "epoch": 2.34, + "learning_rate": 4.219792592592593e-05, + "loss": 1.9805, + "step": 13170 + }, + { + "epoch": 2.34, + "learning_rate": 4.2194962962962967e-05, + "loss": 2.1717, + "step": 13175 + }, + { + "epoch": 2.34, + "learning_rate": 4.2192000000000005e-05, + "loss": 2.0219, + "step": 13180 + }, + { + "epoch": 2.34, + "learning_rate": 4.218903703703704e-05, + "loss": 2.1896, + "step": 13185 + }, + { + "epoch": 2.34, + "learning_rate": 4.2186074074074076e-05, + "loss": 2.1088, + "step": 13190 + }, + { + "epoch": 2.35, + "learning_rate": 4.2183111111111115e-05, + "loss": 2.0254, + "step": 13195 + }, + { + "epoch": 2.35, + "learning_rate": 4.2180148148148154e-05, + "loss": 2.1033, + "step": 13200 + }, + { + "epoch": 2.35, + "learning_rate": 4.2177185185185186e-05, + "loss": 1.9094, + "step": 13205 + }, + { + "epoch": 2.35, + "learning_rate": 4.2174222222222225e-05, + "loss": 2.1021, + "step": 13210 + }, + { + "epoch": 2.35, + "learning_rate": 4.2171259259259264e-05, + "loss": 2.1423, + "step": 13215 + }, + { + "epoch": 2.35, + "learning_rate": 4.21682962962963e-05, + "loss": 2.0943, + "step": 13220 + }, + { + "epoch": 2.35, + "learning_rate": 4.2165333333333334e-05, + "loss": 2.0726, + "step": 13225 + }, + { + "epoch": 2.35, + "learning_rate": 4.216237037037037e-05, + "loss": 2.0218, + "step": 13230 + }, + { + "epoch": 2.35, + "learning_rate": 4.2159407407407405e-05, + "loss": 2.215, + "step": 13235 + }, + { + "epoch": 2.35, + "learning_rate": 4.215644444444445e-05, + "loss": 2.0669, + "step": 13240 + }, + { + "epoch": 2.35, + "learning_rate": 4.215348148148148e-05, + "loss": 2.1599, + "step": 13245 + }, + { + "epoch": 2.36, + "learning_rate": 4.215051851851852e-05, + "loss": 2.0902, + "step": 13250 + }, + { + "epoch": 2.36, + "learning_rate": 4.2147555555555554e-05, + "loss": 1.9893, + "step": 13255 + }, + { + "epoch": 2.36, + "learning_rate": 4.21445925925926e-05, + "loss": 2.0965, + "step": 13260 + }, + { + "epoch": 2.36, + "learning_rate": 4.214162962962963e-05, + "loss": 1.9664, + "step": 13265 + }, + { + "epoch": 2.36, + "learning_rate": 4.213866666666667e-05, + "loss": 2.0104, + "step": 13270 + }, + { + "epoch": 2.36, + "learning_rate": 4.21357037037037e-05, + "loss": 2.0935, + "step": 13275 + }, + { + "epoch": 2.36, + "learning_rate": 4.213274074074075e-05, + "loss": 2.1998, + "step": 13280 + }, + { + "epoch": 2.36, + "learning_rate": 4.212977777777778e-05, + "loss": 2.1149, + "step": 13285 + }, + { + "epoch": 2.36, + "learning_rate": 4.212681481481482e-05, + "loss": 2.149, + "step": 13290 + }, + { + "epoch": 2.36, + "learning_rate": 4.212385185185185e-05, + "loss": 2.0638, + "step": 13295 + }, + { + "epoch": 2.36, + "learning_rate": 4.2120888888888896e-05, + "loss": 1.9766, + "step": 13300 + }, + { + "epoch": 2.37, + "learning_rate": 4.211792592592593e-05, + "loss": 2.0908, + "step": 13305 + }, + { + "epoch": 2.37, + "learning_rate": 4.211496296296297e-05, + "loss": 2.1838, + "step": 13310 + }, + { + "epoch": 2.37, + "learning_rate": 4.2112e-05, + "loss": 2.131, + "step": 13315 + }, + { + "epoch": 2.37, + "learning_rate": 4.210903703703704e-05, + "loss": 2.1748, + "step": 13320 + }, + { + "epoch": 2.37, + "learning_rate": 4.210607407407408e-05, + "loss": 2.072, + "step": 13325 + }, + { + "epoch": 2.37, + "learning_rate": 4.2103111111111116e-05, + "loss": 2.1037, + "step": 13330 + }, + { + "epoch": 2.37, + "learning_rate": 4.210014814814815e-05, + "loss": 2.057, + "step": 13335 + }, + { + "epoch": 2.37, + "learning_rate": 4.209718518518519e-05, + "loss": 2.0885, + "step": 13340 + }, + { + "epoch": 2.37, + "learning_rate": 4.2094222222222225e-05, + "loss": 2.1631, + "step": 13345 + }, + { + "epoch": 2.37, + "learning_rate": 4.2091259259259264e-05, + "loss": 2.1436, + "step": 13350 + }, + { + "epoch": 2.37, + "learning_rate": 4.2088296296296296e-05, + "loss": 2.1534, + "step": 13355 + }, + { + "epoch": 2.38, + "learning_rate": 4.2085333333333335e-05, + "loss": 2.041, + "step": 13360 + }, + { + "epoch": 2.38, + "learning_rate": 4.2082370370370374e-05, + "loss": 2.0053, + "step": 13365 + }, + { + "epoch": 2.38, + "learning_rate": 4.207940740740741e-05, + "loss": 1.9723, + "step": 13370 + }, + { + "epoch": 2.38, + "learning_rate": 4.2076444444444445e-05, + "loss": 2.1651, + "step": 13375 + }, + { + "epoch": 2.38, + "learning_rate": 4.2073481481481484e-05, + "loss": 2.0194, + "step": 13380 + }, + { + "epoch": 2.38, + "learning_rate": 4.207051851851852e-05, + "loss": 1.9925, + "step": 13385 + }, + { + "epoch": 2.38, + "learning_rate": 4.206755555555556e-05, + "loss": 2.2107, + "step": 13390 + }, + { + "epoch": 2.38, + "learning_rate": 4.206459259259259e-05, + "loss": 2.0535, + "step": 13395 + }, + { + "epoch": 2.38, + "learning_rate": 4.206162962962963e-05, + "loss": 2.163, + "step": 13400 + }, + { + "epoch": 2.38, + "learning_rate": 4.205866666666667e-05, + "loss": 2.1978, + "step": 13405 + }, + { + "epoch": 2.38, + "learning_rate": 4.205570370370371e-05, + "loss": 2.0739, + "step": 13410 + }, + { + "epoch": 2.38, + "learning_rate": 4.205274074074074e-05, + "loss": 2.0485, + "step": 13415 + }, + { + "epoch": 2.39, + "learning_rate": 4.204977777777778e-05, + "loss": 2.1549, + "step": 13420 + }, + { + "epoch": 2.39, + "learning_rate": 4.204681481481482e-05, + "loss": 2.1377, + "step": 13425 + }, + { + "epoch": 2.39, + "learning_rate": 4.204385185185186e-05, + "loss": 2.0472, + "step": 13430 + }, + { + "epoch": 2.39, + "learning_rate": 4.204088888888889e-05, + "loss": 2.1706, + "step": 13435 + }, + { + "epoch": 2.39, + "learning_rate": 4.203792592592593e-05, + "loss": 2.0735, + "step": 13440 + }, + { + "epoch": 2.39, + "learning_rate": 4.203496296296297e-05, + "loss": 2.0858, + "step": 13445 + }, + { + "epoch": 2.39, + "learning_rate": 4.203200000000001e-05, + "loss": 2.0049, + "step": 13450 + }, + { + "epoch": 2.39, + "learning_rate": 4.202903703703704e-05, + "loss": 2.0622, + "step": 13455 + }, + { + "epoch": 2.39, + "learning_rate": 4.202607407407408e-05, + "loss": 2.0394, + "step": 13460 + }, + { + "epoch": 2.39, + "learning_rate": 4.202311111111111e-05, + "loss": 2.091, + "step": 13465 + }, + { + "epoch": 2.39, + "learning_rate": 4.2020148148148155e-05, + "loss": 2.0626, + "step": 13470 + }, + { + "epoch": 2.4, + "learning_rate": 4.201718518518519e-05, + "loss": 2.0693, + "step": 13475 + }, + { + "epoch": 2.4, + "learning_rate": 4.201422222222222e-05, + "loss": 2.1128, + "step": 13480 + }, + { + "epoch": 2.4, + "learning_rate": 4.201125925925926e-05, + "loss": 2.1237, + "step": 13485 + }, + { + "epoch": 2.4, + "learning_rate": 4.20082962962963e-05, + "loss": 2.1342, + "step": 13490 + }, + { + "epoch": 2.4, + "learning_rate": 4.2005333333333336e-05, + "loss": 2.0532, + "step": 13495 + }, + { + "epoch": 2.4, + "learning_rate": 4.200237037037037e-05, + "loss": 2.0842, + "step": 13500 + }, + { + "epoch": 2.4, + "learning_rate": 4.199940740740741e-05, + "loss": 2.0855, + "step": 13505 + }, + { + "epoch": 2.4, + "learning_rate": 4.1996444444444446e-05, + "loss": 2.1203, + "step": 13510 + }, + { + "epoch": 2.4, + "learning_rate": 4.1993481481481484e-05, + "loss": 2.0449, + "step": 13515 + }, + { + "epoch": 2.4, + "learning_rate": 4.1990518518518516e-05, + "loss": 2.1576, + "step": 13520 + }, + { + "epoch": 2.4, + "learning_rate": 4.1987555555555555e-05, + "loss": 2.0758, + "step": 13525 + }, + { + "epoch": 2.41, + "learning_rate": 4.1984592592592594e-05, + "loss": 2.1485, + "step": 13530 + }, + { + "epoch": 2.41, + "learning_rate": 4.198162962962963e-05, + "loss": 2.0653, + "step": 13535 + }, + { + "epoch": 2.41, + "learning_rate": 4.1978666666666665e-05, + "loss": 2.0219, + "step": 13540 + }, + { + "epoch": 2.41, + "learning_rate": 4.1975703703703704e-05, + "loss": 2.2063, + "step": 13545 + }, + { + "epoch": 2.41, + "learning_rate": 4.197274074074074e-05, + "loss": 2.1276, + "step": 13550 + }, + { + "epoch": 2.41, + "learning_rate": 4.196977777777778e-05, + "loss": 2.0958, + "step": 13555 + }, + { + "epoch": 2.41, + "learning_rate": 4.1966814814814814e-05, + "loss": 2.2182, + "step": 13560 + }, + { + "epoch": 2.41, + "learning_rate": 4.196385185185185e-05, + "loss": 2.0937, + "step": 13565 + }, + { + "epoch": 2.41, + "learning_rate": 4.196088888888889e-05, + "loss": 1.9599, + "step": 13570 + }, + { + "epoch": 2.41, + "learning_rate": 4.195792592592593e-05, + "loss": 2.0422, + "step": 13575 + }, + { + "epoch": 2.41, + "learning_rate": 4.195496296296296e-05, + "loss": 2.0227, + "step": 13580 + }, + { + "epoch": 2.42, + "learning_rate": 4.1952e-05, + "loss": 1.9983, + "step": 13585 + }, + { + "epoch": 2.42, + "learning_rate": 4.194903703703704e-05, + "loss": 2.1914, + "step": 13590 + }, + { + "epoch": 2.42, + "learning_rate": 4.194607407407408e-05, + "loss": 2.0946, + "step": 13595 + }, + { + "epoch": 2.42, + "learning_rate": 4.194311111111111e-05, + "loss": 2.0182, + "step": 13600 + }, + { + "epoch": 2.42, + "learning_rate": 4.194014814814815e-05, + "loss": 2.0037, + "step": 13605 + }, + { + "epoch": 2.42, + "learning_rate": 4.193718518518519e-05, + "loss": 2.0642, + "step": 13610 + }, + { + "epoch": 2.42, + "learning_rate": 4.193422222222223e-05, + "loss": 2.1569, + "step": 13615 + }, + { + "epoch": 2.42, + "learning_rate": 4.193125925925926e-05, + "loss": 1.9669, + "step": 13620 + }, + { + "epoch": 2.42, + "learning_rate": 4.19282962962963e-05, + "loss": 2.0406, + "step": 13625 + }, + { + "epoch": 2.42, + "learning_rate": 4.192533333333333e-05, + "loss": 2.3094, + "step": 13630 + }, + { + "epoch": 2.42, + "learning_rate": 4.1922370370370376e-05, + "loss": 2.1298, + "step": 13635 + }, + { + "epoch": 2.42, + "learning_rate": 4.191940740740741e-05, + "loss": 2.1142, + "step": 13640 + }, + { + "epoch": 2.43, + "learning_rate": 4.1916444444444446e-05, + "loss": 1.9428, + "step": 13645 + }, + { + "epoch": 2.43, + "learning_rate": 4.191348148148148e-05, + "loss": 2.0835, + "step": 13650 + }, + { + "epoch": 2.43, + "learning_rate": 4.1910518518518524e-05, + "loss": 2.0534, + "step": 13655 + }, + { + "epoch": 2.43, + "learning_rate": 4.1907555555555556e-05, + "loss": 2.0673, + "step": 13660 + }, + { + "epoch": 2.43, + "learning_rate": 4.1904592592592595e-05, + "loss": 2.1224, + "step": 13665 + }, + { + "epoch": 2.43, + "learning_rate": 4.190162962962963e-05, + "loss": 2.1028, + "step": 13670 + }, + { + "epoch": 2.43, + "learning_rate": 4.189866666666667e-05, + "loss": 2.173, + "step": 13675 + }, + { + "epoch": 2.43, + "learning_rate": 4.1895703703703705e-05, + "loss": 2.1125, + "step": 13680 + }, + { + "epoch": 2.43, + "learning_rate": 4.1892740740740743e-05, + "loss": 2.0076, + "step": 13685 + }, + { + "epoch": 2.43, + "learning_rate": 4.1889777777777775e-05, + "loss": 2.2083, + "step": 13690 + }, + { + "epoch": 2.43, + "learning_rate": 4.1886814814814814e-05, + "loss": 1.9489, + "step": 13695 + }, + { + "epoch": 2.44, + "learning_rate": 4.188385185185185e-05, + "loss": 2.1595, + "step": 13700 + }, + { + "epoch": 2.44, + "learning_rate": 4.188088888888889e-05, + "loss": 2.0408, + "step": 13705 + }, + { + "epoch": 2.44, + "learning_rate": 4.1877925925925924e-05, + "loss": 2.0923, + "step": 13710 + }, + { + "epoch": 2.44, + "learning_rate": 4.187496296296296e-05, + "loss": 2.2375, + "step": 13715 + }, + { + "epoch": 2.44, + "learning_rate": 4.1872e-05, + "loss": 1.9604, + "step": 13720 + }, + { + "epoch": 2.44, + "learning_rate": 4.186903703703704e-05, + "loss": 1.9845, + "step": 13725 + }, + { + "epoch": 2.44, + "learning_rate": 4.186607407407407e-05, + "loss": 2.0058, + "step": 13730 + }, + { + "epoch": 2.44, + "learning_rate": 4.186311111111111e-05, + "loss": 2.075, + "step": 13735 + }, + { + "epoch": 2.44, + "learning_rate": 4.186014814814815e-05, + "loss": 2.0699, + "step": 13740 + }, + { + "epoch": 2.44, + "learning_rate": 4.185718518518519e-05, + "loss": 2.0277, + "step": 13745 + }, + { + "epoch": 2.44, + "learning_rate": 4.185422222222222e-05, + "loss": 2.2192, + "step": 13750 + }, + { + "epoch": 2.45, + "learning_rate": 4.185125925925926e-05, + "loss": 2.0455, + "step": 13755 + }, + { + "epoch": 2.45, + "learning_rate": 4.18482962962963e-05, + "loss": 2.069, + "step": 13760 + }, + { + "epoch": 2.45, + "learning_rate": 4.184533333333334e-05, + "loss": 2.0551, + "step": 13765 + }, + { + "epoch": 2.45, + "learning_rate": 4.184237037037037e-05, + "loss": 2.1023, + "step": 13770 + }, + { + "epoch": 2.45, + "learning_rate": 4.183940740740741e-05, + "loss": 2.1192, + "step": 13775 + }, + { + "epoch": 2.45, + "learning_rate": 4.183644444444445e-05, + "loss": 2.1519, + "step": 13780 + }, + { + "epoch": 2.45, + "learning_rate": 4.1833481481481486e-05, + "loss": 2.1216, + "step": 13785 + }, + { + "epoch": 2.45, + "learning_rate": 4.183051851851852e-05, + "loss": 2.0152, + "step": 13790 + }, + { + "epoch": 2.45, + "learning_rate": 4.182755555555556e-05, + "loss": 2.2032, + "step": 13795 + }, + { + "epoch": 2.45, + "learning_rate": 4.1824592592592596e-05, + "loss": 2.208, + "step": 13800 + }, + { + "epoch": 2.45, + "learning_rate": 4.1821629629629634e-05, + "loss": 2.0739, + "step": 13805 + }, + { + "epoch": 2.46, + "learning_rate": 4.1818666666666667e-05, + "loss": 2.0978, + "step": 13810 + }, + { + "epoch": 2.46, + "learning_rate": 4.1815703703703705e-05, + "loss": 2.1084, + "step": 13815 + }, + { + "epoch": 2.46, + "learning_rate": 4.1812740740740744e-05, + "loss": 1.9446, + "step": 13820 + }, + { + "epoch": 2.46, + "learning_rate": 4.180977777777778e-05, + "loss": 2.1051, + "step": 13825 + }, + { + "epoch": 2.46, + "learning_rate": 4.1806814814814815e-05, + "loss": 2.0322, + "step": 13830 + }, + { + "epoch": 2.46, + "learning_rate": 4.1803851851851854e-05, + "loss": 2.0215, + "step": 13835 + }, + { + "epoch": 2.46, + "learning_rate": 4.180088888888889e-05, + "loss": 2.1087, + "step": 13840 + }, + { + "epoch": 2.46, + "learning_rate": 4.179792592592593e-05, + "loss": 2.1563, + "step": 13845 + }, + { + "epoch": 2.46, + "learning_rate": 4.1794962962962964e-05, + "loss": 2.0637, + "step": 13850 + }, + { + "epoch": 2.46, + "learning_rate": 4.1792e-05, + "loss": 2.1086, + "step": 13855 + }, + { + "epoch": 2.46, + "learning_rate": 4.1789037037037034e-05, + "loss": 2.0967, + "step": 13860 + }, + { + "epoch": 2.46, + "learning_rate": 4.178607407407408e-05, + "loss": 2.1491, + "step": 13865 + }, + { + "epoch": 2.47, + "learning_rate": 4.178311111111111e-05, + "loss": 2.2069, + "step": 13870 + }, + { + "epoch": 2.47, + "learning_rate": 4.178014814814815e-05, + "loss": 2.1009, + "step": 13875 + }, + { + "epoch": 2.47, + "learning_rate": 4.177718518518518e-05, + "loss": 2.0797, + "step": 13880 + }, + { + "epoch": 2.47, + "learning_rate": 4.177422222222223e-05, + "loss": 2.0598, + "step": 13885 + }, + { + "epoch": 2.47, + "learning_rate": 4.177125925925926e-05, + "loss": 2.0526, + "step": 13890 + }, + { + "epoch": 2.47, + "learning_rate": 4.17682962962963e-05, + "loss": 2.1235, + "step": 13895 + }, + { + "epoch": 2.47, + "learning_rate": 4.176533333333333e-05, + "loss": 2.2266, + "step": 13900 + }, + { + "epoch": 2.47, + "learning_rate": 4.176237037037038e-05, + "loss": 2.1926, + "step": 13905 + }, + { + "epoch": 2.47, + "learning_rate": 4.175940740740741e-05, + "loss": 2.1444, + "step": 13910 + }, + { + "epoch": 2.47, + "learning_rate": 4.175644444444445e-05, + "loss": 2.0799, + "step": 13915 + }, + { + "epoch": 2.47, + "learning_rate": 4.175348148148148e-05, + "loss": 2.212, + "step": 13920 + }, + { + "epoch": 2.48, + "learning_rate": 4.175051851851852e-05, + "loss": 2.1169, + "step": 13925 + }, + { + "epoch": 2.48, + "learning_rate": 4.174755555555556e-05, + "loss": 2.0192, + "step": 13930 + }, + { + "epoch": 2.48, + "learning_rate": 4.1744592592592596e-05, + "loss": 2.0212, + "step": 13935 + }, + { + "epoch": 2.48, + "learning_rate": 4.174162962962963e-05, + "loss": 2.0407, + "step": 13940 + }, + { + "epoch": 2.48, + "learning_rate": 4.173866666666667e-05, + "loss": 1.9824, + "step": 13945 + }, + { + "epoch": 2.48, + "learning_rate": 4.1735703703703706e-05, + "loss": 2.0493, + "step": 13950 + }, + { + "epoch": 2.48, + "learning_rate": 4.1732740740740745e-05, + "loss": 2.1002, + "step": 13955 + }, + { + "epoch": 2.48, + "learning_rate": 4.172977777777778e-05, + "loss": 2.009, + "step": 13960 + }, + { + "epoch": 2.48, + "learning_rate": 4.1726814814814816e-05, + "loss": 2.0869, + "step": 13965 + }, + { + "epoch": 2.48, + "learning_rate": 4.1723851851851855e-05, + "loss": 2.2347, + "step": 13970 + }, + { + "epoch": 2.48, + "learning_rate": 4.1720888888888893e-05, + "loss": 1.9343, + "step": 13975 + }, + { + "epoch": 2.49, + "learning_rate": 4.1717925925925925e-05, + "loss": 2.0489, + "step": 13980 + }, + { + "epoch": 2.49, + "learning_rate": 4.1714962962962964e-05, + "loss": 2.2685, + "step": 13985 + }, + { + "epoch": 2.49, + "learning_rate": 4.1712e-05, + "loss": 2.0465, + "step": 13990 + }, + { + "epoch": 2.49, + "learning_rate": 4.170903703703704e-05, + "loss": 2.0893, + "step": 13995 + }, + { + "epoch": 2.49, + "learning_rate": 4.1706074074074074e-05, + "loss": 2.1137, + "step": 14000 + }, + { + "epoch": 2.49, + "learning_rate": 4.170311111111111e-05, + "loss": 2.053, + "step": 14005 + }, + { + "epoch": 2.49, + "learning_rate": 4.170014814814815e-05, + "loss": 2.2186, + "step": 14010 + }, + { + "epoch": 2.49, + "learning_rate": 4.169718518518519e-05, + "loss": 2.0172, + "step": 14015 + }, + { + "epoch": 2.49, + "learning_rate": 4.169422222222222e-05, + "loss": 2.0425, + "step": 14020 + }, + { + "epoch": 2.49, + "learning_rate": 4.169125925925926e-05, + "loss": 2.0626, + "step": 14025 + }, + { + "epoch": 2.49, + "learning_rate": 4.16882962962963e-05, + "loss": 1.9898, + "step": 14030 + }, + { + "epoch": 2.5, + "learning_rate": 4.168533333333334e-05, + "loss": 2.1142, + "step": 14035 + }, + { + "epoch": 2.5, + "learning_rate": 4.168237037037037e-05, + "loss": 2.0087, + "step": 14040 + }, + { + "epoch": 2.5, + "learning_rate": 4.167940740740741e-05, + "loss": 1.8753, + "step": 14045 + }, + { + "epoch": 2.5, + "learning_rate": 4.167644444444445e-05, + "loss": 2.0239, + "step": 14050 + }, + { + "epoch": 2.5, + "learning_rate": 4.167348148148149e-05, + "loss": 2.2092, + "step": 14055 + }, + { + "epoch": 2.5, + "learning_rate": 4.167051851851852e-05, + "loss": 2.0851, + "step": 14060 + }, + { + "epoch": 2.5, + "learning_rate": 4.166755555555556e-05, + "loss": 2.0621, + "step": 14065 + }, + { + "epoch": 2.5, + "learning_rate": 4.16645925925926e-05, + "loss": 1.9728, + "step": 14070 + }, + { + "epoch": 2.5, + "learning_rate": 4.1661629629629636e-05, + "loss": 2.0479, + "step": 14075 + }, + { + "epoch": 2.5, + "learning_rate": 4.165866666666667e-05, + "loss": 2.0899, + "step": 14080 + }, + { + "epoch": 2.5, + "learning_rate": 4.165570370370371e-05, + "loss": 2.1002, + "step": 14085 + }, + { + "epoch": 2.5, + "learning_rate": 4.165274074074074e-05, + "loss": 2.2045, + "step": 14090 + }, + { + "epoch": 2.51, + "learning_rate": 4.1649777777777785e-05, + "loss": 2.0023, + "step": 14095 + }, + { + "epoch": 2.51, + "learning_rate": 4.1646814814814817e-05, + "loss": 2.0798, + "step": 14100 + }, + { + "epoch": 2.51, + "learning_rate": 4.1643851851851855e-05, + "loss": 2.1715, + "step": 14105 + }, + { + "epoch": 2.51, + "learning_rate": 4.164088888888889e-05, + "loss": 2.1451, + "step": 14110 + }, + { + "epoch": 2.51, + "learning_rate": 4.163792592592593e-05, + "loss": 2.1356, + "step": 14115 + }, + { + "epoch": 2.51, + "learning_rate": 4.1634962962962965e-05, + "loss": 1.9964, + "step": 14120 + }, + { + "epoch": 2.51, + "learning_rate": 4.1632000000000004e-05, + "loss": 2.1082, + "step": 14125 + }, + { + "epoch": 2.51, + "learning_rate": 4.1629037037037036e-05, + "loss": 2.1252, + "step": 14130 + }, + { + "epoch": 2.51, + "learning_rate": 4.162607407407408e-05, + "loss": 2.1712, + "step": 14135 + }, + { + "epoch": 2.51, + "learning_rate": 4.1623111111111114e-05, + "loss": 2.1854, + "step": 14140 + }, + { + "epoch": 2.51, + "learning_rate": 4.162014814814815e-05, + "loss": 2.0532, + "step": 14145 + }, + { + "epoch": 2.52, + "learning_rate": 4.1617185185185184e-05, + "loss": 2.1789, + "step": 14150 + }, + { + "epoch": 2.52, + "learning_rate": 4.161422222222222e-05, + "loss": 2.0444, + "step": 14155 + }, + { + "epoch": 2.52, + "learning_rate": 4.161125925925926e-05, + "loss": 1.981, + "step": 14160 + }, + { + "epoch": 2.52, + "learning_rate": 4.16082962962963e-05, + "loss": 2.1252, + "step": 14165 + }, + { + "epoch": 2.52, + "learning_rate": 4.160533333333333e-05, + "loss": 1.9818, + "step": 14170 + }, + { + "epoch": 2.52, + "learning_rate": 4.160237037037037e-05, + "loss": 2.0711, + "step": 14175 + }, + { + "epoch": 2.52, + "learning_rate": 4.159940740740741e-05, + "loss": 2.0776, + "step": 14180 + }, + { + "epoch": 2.52, + "learning_rate": 4.159644444444445e-05, + "loss": 2.158, + "step": 14185 + }, + { + "epoch": 2.52, + "learning_rate": 4.159348148148148e-05, + "loss": 2.1075, + "step": 14190 + }, + { + "epoch": 2.52, + "learning_rate": 4.159051851851852e-05, + "loss": 2.0279, + "step": 14195 + }, + { + "epoch": 2.52, + "learning_rate": 4.158755555555556e-05, + "loss": 1.9855, + "step": 14200 + }, + { + "epoch": 2.53, + "learning_rate": 4.15845925925926e-05, + "loss": 1.9874, + "step": 14205 + }, + { + "epoch": 2.53, + "learning_rate": 4.158162962962963e-05, + "loss": 2.1113, + "step": 14210 + }, + { + "epoch": 2.53, + "learning_rate": 4.157866666666667e-05, + "loss": 2.0573, + "step": 14215 + }, + { + "epoch": 2.53, + "learning_rate": 4.157570370370371e-05, + "loss": 2.0298, + "step": 14220 + }, + { + "epoch": 2.53, + "learning_rate": 4.1572740740740746e-05, + "loss": 2.1508, + "step": 14225 + }, + { + "epoch": 2.53, + "learning_rate": 4.156977777777778e-05, + "loss": 2.182, + "step": 14230 + }, + { + "epoch": 2.53, + "learning_rate": 4.156681481481482e-05, + "loss": 2.085, + "step": 14235 + }, + { + "epoch": 2.53, + "learning_rate": 4.1563851851851856e-05, + "loss": 2.144, + "step": 14240 + }, + { + "epoch": 2.53, + "learning_rate": 4.1560888888888895e-05, + "loss": 2.1203, + "step": 14245 + }, + { + "epoch": 2.53, + "learning_rate": 4.155792592592593e-05, + "loss": 2.0401, + "step": 14250 + }, + { + "epoch": 2.53, + "learning_rate": 4.155496296296296e-05, + "loss": 2.0923, + "step": 14255 + }, + { + "epoch": 2.54, + "learning_rate": 4.1552000000000005e-05, + "loss": 2.1821, + "step": 14260 + }, + { + "epoch": 2.54, + "learning_rate": 4.154903703703704e-05, + "loss": 2.0245, + "step": 14265 + }, + { + "epoch": 2.54, + "learning_rate": 4.1546074074074076e-05, + "loss": 2.0459, + "step": 14270 + }, + { + "epoch": 2.54, + "learning_rate": 4.154311111111111e-05, + "loss": 2.0527, + "step": 14275 + }, + { + "epoch": 2.54, + "learning_rate": 4.154014814814815e-05, + "loss": 2.0416, + "step": 14280 + }, + { + "epoch": 2.54, + "learning_rate": 4.1537185185185185e-05, + "loss": 1.9352, + "step": 14285 + }, + { + "epoch": 2.54, + "learning_rate": 4.1534222222222224e-05, + "loss": 2.0967, + "step": 14290 + }, + { + "epoch": 2.54, + "learning_rate": 4.1531259259259256e-05, + "loss": 2.1743, + "step": 14295 + }, + { + "epoch": 2.54, + "learning_rate": 4.15282962962963e-05, + "loss": 2.0947, + "step": 14300 + }, + { + "epoch": 2.54, + "learning_rate": 4.1525333333333334e-05, + "loss": 2.2204, + "step": 14305 + }, + { + "epoch": 2.54, + "learning_rate": 4.152237037037037e-05, + "loss": 2.0692, + "step": 14310 + }, + { + "epoch": 2.54, + "learning_rate": 4.1519407407407405e-05, + "loss": 2.002, + "step": 14315 + }, + { + "epoch": 2.55, + "learning_rate": 4.1516444444444443e-05, + "loss": 2.1378, + "step": 14320 + }, + { + "epoch": 2.55, + "learning_rate": 4.151348148148148e-05, + "loss": 2.0584, + "step": 14325 + }, + { + "epoch": 2.55, + "learning_rate": 4.151051851851852e-05, + "loss": 2.0266, + "step": 14330 + }, + { + "epoch": 2.55, + "learning_rate": 4.150755555555555e-05, + "loss": 2.2201, + "step": 14335 + }, + { + "epoch": 2.55, + "learning_rate": 4.150459259259259e-05, + "loss": 2.0901, + "step": 14340 + }, + { + "epoch": 2.55, + "learning_rate": 4.150162962962963e-05, + "loss": 2.085, + "step": 14345 + }, + { + "epoch": 2.55, + "learning_rate": 4.149866666666667e-05, + "loss": 2.2857, + "step": 14350 + }, + { + "epoch": 2.55, + "learning_rate": 4.14957037037037e-05, + "loss": 2.0769, + "step": 14355 + }, + { + "epoch": 2.55, + "learning_rate": 4.149274074074074e-05, + "loss": 2.1179, + "step": 14360 + }, + { + "epoch": 2.55, + "learning_rate": 4.148977777777778e-05, + "loss": 2.1101, + "step": 14365 + }, + { + "epoch": 2.55, + "learning_rate": 4.148681481481482e-05, + "loss": 2.1943, + "step": 14370 + }, + { + "epoch": 2.56, + "learning_rate": 4.148385185185185e-05, + "loss": 2.1204, + "step": 14375 + }, + { + "epoch": 2.56, + "learning_rate": 4.148088888888889e-05, + "loss": 1.9206, + "step": 14380 + }, + { + "epoch": 2.56, + "learning_rate": 4.147792592592593e-05, + "loss": 2.1541, + "step": 14385 + }, + { + "epoch": 2.56, + "learning_rate": 4.147496296296297e-05, + "loss": 2.0315, + "step": 14390 + }, + { + "epoch": 2.56, + "learning_rate": 4.1472e-05, + "loss": 1.9857, + "step": 14395 + }, + { + "epoch": 2.56, + "learning_rate": 4.146903703703704e-05, + "loss": 2.1676, + "step": 14400 + }, + { + "epoch": 2.56, + "learning_rate": 4.1466074074074076e-05, + "loss": 2.032, + "step": 14405 + }, + { + "epoch": 2.56, + "learning_rate": 4.1463111111111115e-05, + "loss": 2.1304, + "step": 14410 + }, + { + "epoch": 2.56, + "learning_rate": 4.146014814814815e-05, + "loss": 2.111, + "step": 14415 + }, + { + "epoch": 2.56, + "learning_rate": 4.1457185185185186e-05, + "loss": 1.9829, + "step": 14420 + }, + { + "epoch": 2.56, + "learning_rate": 4.1454222222222225e-05, + "loss": 2.045, + "step": 14425 + }, + { + "epoch": 2.57, + "learning_rate": 4.1451259259259264e-05, + "loss": 2.0173, + "step": 14430 + }, + { + "epoch": 2.57, + "learning_rate": 4.1448296296296296e-05, + "loss": 2.0057, + "step": 14435 + }, + { + "epoch": 2.57, + "learning_rate": 4.1445333333333335e-05, + "loss": 2.1537, + "step": 14440 + }, + { + "epoch": 2.57, + "learning_rate": 4.144237037037037e-05, + "loss": 2.0826, + "step": 14445 + }, + { + "epoch": 2.57, + "learning_rate": 4.143940740740741e-05, + "loss": 2.1167, + "step": 14450 + }, + { + "epoch": 2.57, + "learning_rate": 4.1436444444444444e-05, + "loss": 2.1742, + "step": 14455 + }, + { + "epoch": 2.57, + "learning_rate": 4.143348148148148e-05, + "loss": 2.1215, + "step": 14460 + }, + { + "epoch": 2.57, + "learning_rate": 4.143051851851852e-05, + "loss": 2.1289, + "step": 14465 + }, + { + "epoch": 2.57, + "learning_rate": 4.142755555555556e-05, + "loss": 2.126, + "step": 14470 + }, + { + "epoch": 2.57, + "learning_rate": 4.142459259259259e-05, + "loss": 2.0364, + "step": 14475 + }, + { + "epoch": 2.57, + "learning_rate": 4.142162962962963e-05, + "loss": 1.9591, + "step": 14480 + }, + { + "epoch": 2.58, + "learning_rate": 4.1418666666666664e-05, + "loss": 2.1057, + "step": 14485 + }, + { + "epoch": 2.58, + "learning_rate": 4.141570370370371e-05, + "loss": 2.02, + "step": 14490 + }, + { + "epoch": 2.58, + "learning_rate": 4.141274074074074e-05, + "loss": 2.0651, + "step": 14495 + }, + { + "epoch": 2.58, + "learning_rate": 4.140977777777778e-05, + "loss": 2.0719, + "step": 14500 + }, + { + "epoch": 2.58, + "learning_rate": 4.140681481481481e-05, + "loss": 2.1098, + "step": 14505 + }, + { + "epoch": 2.58, + "learning_rate": 4.140385185185186e-05, + "loss": 2.1732, + "step": 14510 + }, + { + "epoch": 2.58, + "learning_rate": 4.140088888888889e-05, + "loss": 1.8967, + "step": 14515 + }, + { + "epoch": 2.58, + "learning_rate": 4.139792592592593e-05, + "loss": 2.1395, + "step": 14520 + }, + { + "epoch": 2.58, + "learning_rate": 4.139496296296296e-05, + "loss": 2.094, + "step": 14525 + }, + { + "epoch": 2.58, + "learning_rate": 4.1392000000000006e-05, + "loss": 2.0397, + "step": 14530 + }, + { + "epoch": 2.58, + "learning_rate": 4.138903703703704e-05, + "loss": 2.1939, + "step": 14535 + }, + { + "epoch": 2.58, + "learning_rate": 4.138607407407408e-05, + "loss": 2.1498, + "step": 14540 + }, + { + "epoch": 2.59, + "learning_rate": 4.138311111111111e-05, + "loss": 2.125, + "step": 14545 + }, + { + "epoch": 2.59, + "learning_rate": 4.138014814814815e-05, + "loss": 2.0582, + "step": 14550 + }, + { + "epoch": 2.59, + "learning_rate": 4.137718518518519e-05, + "loss": 2.0127, + "step": 14555 + }, + { + "epoch": 2.59, + "learning_rate": 4.1374222222222226e-05, + "loss": 2.0991, + "step": 14560 + }, + { + "epoch": 2.59, + "learning_rate": 4.137125925925926e-05, + "loss": 2.0907, + "step": 14565 + }, + { + "epoch": 2.59, + "learning_rate": 4.1368296296296296e-05, + "loss": 2.0903, + "step": 14570 + }, + { + "epoch": 2.59, + "learning_rate": 4.1365333333333335e-05, + "loss": 2.0422, + "step": 14575 + }, + { + "epoch": 2.59, + "learning_rate": 4.1362370370370374e-05, + "loss": 2.0719, + "step": 14580 + }, + { + "epoch": 2.59, + "learning_rate": 4.1359407407407406e-05, + "loss": 2.0992, + "step": 14585 + }, + { + "epoch": 2.59, + "learning_rate": 4.1356444444444445e-05, + "loss": 2.1733, + "step": 14590 + }, + { + "epoch": 2.59, + "learning_rate": 4.1353481481481484e-05, + "loss": 2.0139, + "step": 14595 + }, + { + "epoch": 2.6, + "learning_rate": 4.135051851851852e-05, + "loss": 2.0392, + "step": 14600 + }, + { + "epoch": 2.6, + "learning_rate": 4.1347555555555555e-05, + "loss": 2.0644, + "step": 14605 + }, + { + "epoch": 2.6, + "learning_rate": 4.1344592592592593e-05, + "loss": 2.0225, + "step": 14610 + }, + { + "epoch": 2.6, + "learning_rate": 4.134162962962963e-05, + "loss": 1.9885, + "step": 14615 + }, + { + "epoch": 2.6, + "learning_rate": 4.133866666666667e-05, + "loss": 2.0296, + "step": 14620 + }, + { + "epoch": 2.6, + "learning_rate": 4.13357037037037e-05, + "loss": 1.9292, + "step": 14625 + }, + { + "epoch": 2.6, + "learning_rate": 4.133274074074074e-05, + "loss": 2.0361, + "step": 14630 + }, + { + "epoch": 2.6, + "learning_rate": 4.132977777777778e-05, + "loss": 1.9927, + "step": 14635 + }, + { + "epoch": 2.6, + "learning_rate": 4.132681481481482e-05, + "loss": 2.1377, + "step": 14640 + }, + { + "epoch": 2.6, + "learning_rate": 4.132385185185185e-05, + "loss": 1.9959, + "step": 14645 + }, + { + "epoch": 2.6, + "learning_rate": 4.132088888888889e-05, + "loss": 2.1521, + "step": 14650 + }, + { + "epoch": 2.61, + "learning_rate": 4.131792592592593e-05, + "loss": 2.106, + "step": 14655 + }, + { + "epoch": 2.61, + "learning_rate": 4.131496296296297e-05, + "loss": 2.0922, + "step": 14660 + }, + { + "epoch": 2.61, + "learning_rate": 4.1312e-05, + "loss": 2.1658, + "step": 14665 + }, + { + "epoch": 2.61, + "learning_rate": 4.130903703703704e-05, + "loss": 2.1335, + "step": 14670 + }, + { + "epoch": 2.61, + "learning_rate": 4.130607407407408e-05, + "loss": 2.04, + "step": 14675 + }, + { + "epoch": 2.61, + "learning_rate": 4.130311111111112e-05, + "loss": 2.1213, + "step": 14680 + }, + { + "epoch": 2.61, + "learning_rate": 4.130014814814815e-05, + "loss": 2.078, + "step": 14685 + }, + { + "epoch": 2.61, + "learning_rate": 4.129718518518519e-05, + "loss": 2.1521, + "step": 14690 + }, + { + "epoch": 2.61, + "learning_rate": 4.1294222222222226e-05, + "loss": 2.1108, + "step": 14695 + }, + { + "epoch": 2.61, + "learning_rate": 4.1291259259259265e-05, + "loss": 1.9757, + "step": 14700 + }, + { + "epoch": 2.61, + "learning_rate": 4.12882962962963e-05, + "loss": 2.1106, + "step": 14705 + }, + { + "epoch": 2.62, + "learning_rate": 4.1285333333333336e-05, + "loss": 2.1667, + "step": 14710 + }, + { + "epoch": 2.62, + "learning_rate": 4.128237037037037e-05, + "loss": 2.0904, + "step": 14715 + }, + { + "epoch": 2.62, + "learning_rate": 4.1279407407407414e-05, + "loss": 1.9551, + "step": 14720 + }, + { + "epoch": 2.62, + "learning_rate": 4.1276444444444446e-05, + "loss": 2.0572, + "step": 14725 + }, + { + "epoch": 2.62, + "learning_rate": 4.1273481481481485e-05, + "loss": 2.1257, + "step": 14730 + }, + { + "epoch": 2.62, + "learning_rate": 4.1270518518518517e-05, + "loss": 2.0819, + "step": 14735 + }, + { + "epoch": 2.62, + "learning_rate": 4.126755555555556e-05, + "loss": 2.0816, + "step": 14740 + }, + { + "epoch": 2.62, + "learning_rate": 4.1265185185185185e-05, + "loss": 2.0429, + "step": 14745 + }, + { + "epoch": 2.62, + "learning_rate": 4.1262222222222224e-05, + "loss": 2.0619, + "step": 14750 + }, + { + "epoch": 2.62, + "learning_rate": 4.1259259259259256e-05, + "loss": 2.0228, + "step": 14755 + }, + { + "epoch": 2.62, + "learning_rate": 4.12562962962963e-05, + "loss": 2.1033, + "step": 14760 + }, + { + "epoch": 2.62, + "learning_rate": 4.1253333333333334e-05, + "loss": 2.1752, + "step": 14765 + }, + { + "epoch": 2.63, + "learning_rate": 4.125037037037037e-05, + "loss": 2.1219, + "step": 14770 + }, + { + "epoch": 2.63, + "learning_rate": 4.1247407407407405e-05, + "loss": 1.9767, + "step": 14775 + }, + { + "epoch": 2.63, + "learning_rate": 4.124444444444444e-05, + "loss": 2.1594, + "step": 14780 + }, + { + "epoch": 2.63, + "learning_rate": 4.124148148148148e-05, + "loss": 1.9246, + "step": 14785 + }, + { + "epoch": 2.63, + "learning_rate": 4.123851851851852e-05, + "loss": 2.097, + "step": 14790 + }, + { + "epoch": 2.63, + "learning_rate": 4.123555555555555e-05, + "loss": 1.9906, + "step": 14795 + }, + { + "epoch": 2.63, + "learning_rate": 4.123259259259259e-05, + "loss": 2.0414, + "step": 14800 + }, + { + "epoch": 2.63, + "learning_rate": 4.122962962962963e-05, + "loss": 2.1419, + "step": 14805 + }, + { + "epoch": 2.63, + "learning_rate": 4.122666666666667e-05, + "loss": 1.9506, + "step": 14810 + }, + { + "epoch": 2.63, + "learning_rate": 4.12237037037037e-05, + "loss": 2.0683, + "step": 14815 + }, + { + "epoch": 2.63, + "learning_rate": 4.122074074074074e-05, + "loss": 2.2329, + "step": 14820 + }, + { + "epoch": 2.64, + "learning_rate": 4.121777777777778e-05, + "loss": 2.1431, + "step": 14825 + }, + { + "epoch": 2.64, + "learning_rate": 4.121481481481482e-05, + "loss": 2.121, + "step": 14830 + }, + { + "epoch": 2.64, + "learning_rate": 4.121185185185185e-05, + "loss": 2.0335, + "step": 14835 + }, + { + "epoch": 2.64, + "learning_rate": 4.120888888888889e-05, + "loss": 1.9796, + "step": 14840 + }, + { + "epoch": 2.64, + "learning_rate": 4.120592592592593e-05, + "loss": 2.0391, + "step": 14845 + }, + { + "epoch": 2.64, + "learning_rate": 4.1202962962962967e-05, + "loss": 2.1091, + "step": 14850 + }, + { + "epoch": 2.64, + "learning_rate": 4.12e-05, + "loss": 2.0152, + "step": 14855 + }, + { + "epoch": 2.64, + "learning_rate": 4.119703703703704e-05, + "loss": 1.9969, + "step": 14860 + }, + { + "epoch": 2.64, + "learning_rate": 4.1194074074074076e-05, + "loss": 2.1258, + "step": 14865 + }, + { + "epoch": 2.64, + "learning_rate": 4.1191111111111115e-05, + "loss": 1.9615, + "step": 14870 + }, + { + "epoch": 2.64, + "learning_rate": 4.118814814814815e-05, + "loss": 2.0152, + "step": 14875 + }, + { + "epoch": 2.65, + "learning_rate": 4.1185185185185186e-05, + "loss": 2.017, + "step": 14880 + }, + { + "epoch": 2.65, + "learning_rate": 4.1182814814814816e-05, + "loss": 2.0579, + "step": 14885 + }, + { + "epoch": 2.65, + "learning_rate": 4.1179851851851854e-05, + "loss": 2.1387, + "step": 14890 + }, + { + "epoch": 2.65, + "learning_rate": 4.117688888888889e-05, + "loss": 2.2143, + "step": 14895 + }, + { + "epoch": 2.65, + "learning_rate": 4.1173925925925925e-05, + "loss": 2.035, + "step": 14900 + }, + { + "epoch": 2.65, + "learning_rate": 4.1170962962962964e-05, + "loss": 2.1267, + "step": 14905 + }, + { + "epoch": 2.65, + "learning_rate": 4.1168e-05, + "loss": 2.2027, + "step": 14910 + }, + { + "epoch": 2.65, + "learning_rate": 4.116503703703704e-05, + "loss": 2.1033, + "step": 14915 + }, + { + "epoch": 2.65, + "learning_rate": 4.1162074074074074e-05, + "loss": 2.2719, + "step": 14920 + }, + { + "epoch": 2.65, + "learning_rate": 4.115911111111111e-05, + "loss": 2.0827, + "step": 14925 + }, + { + "epoch": 2.65, + "learning_rate": 4.115614814814815e-05, + "loss": 2.1438, + "step": 14930 + }, + { + "epoch": 2.66, + "learning_rate": 4.115318518518519e-05, + "loss": 2.0304, + "step": 14935 + }, + { + "epoch": 2.66, + "learning_rate": 4.115022222222222e-05, + "loss": 2.0755, + "step": 14940 + }, + { + "epoch": 2.66, + "learning_rate": 4.114725925925926e-05, + "loss": 2.123, + "step": 14945 + }, + { + "epoch": 2.66, + "learning_rate": 4.11442962962963e-05, + "loss": 2.1398, + "step": 14950 + }, + { + "epoch": 2.66, + "learning_rate": 4.114133333333334e-05, + "loss": 2.0213, + "step": 14955 + }, + { + "epoch": 2.66, + "learning_rate": 4.113837037037037e-05, + "loss": 2.0681, + "step": 14960 + }, + { + "epoch": 2.66, + "learning_rate": 4.113540740740741e-05, + "loss": 2.0369, + "step": 14965 + }, + { + "epoch": 2.66, + "learning_rate": 4.113244444444445e-05, + "loss": 2.0586, + "step": 14970 + }, + { + "epoch": 2.66, + "learning_rate": 4.112948148148149e-05, + "loss": 2.0624, + "step": 14975 + }, + { + "epoch": 2.66, + "learning_rate": 4.112651851851852e-05, + "loss": 2.1034, + "step": 14980 + }, + { + "epoch": 2.66, + "learning_rate": 4.112355555555556e-05, + "loss": 2.078, + "step": 14985 + }, + { + "epoch": 2.66, + "learning_rate": 4.11205925925926e-05, + "loss": 2.0662, + "step": 14990 + }, + { + "epoch": 2.67, + "learning_rate": 4.1117629629629636e-05, + "loss": 2.0414, + "step": 14995 + }, + { + "epoch": 2.67, + "learning_rate": 4.111466666666667e-05, + "loss": 2.0381, + "step": 15000 + }, + { + "epoch": 2.67, + "learning_rate": 4.111170370370371e-05, + "loss": 2.1513, + "step": 15005 + }, + { + "epoch": 2.67, + "learning_rate": 4.1108740740740745e-05, + "loss": 1.9433, + "step": 15010 + }, + { + "epoch": 2.67, + "learning_rate": 4.1105777777777784e-05, + "loss": 2.0806, + "step": 15015 + }, + { + "epoch": 2.67, + "learning_rate": 4.1102814814814816e-05, + "loss": 2.0476, + "step": 15020 + }, + { + "epoch": 2.67, + "learning_rate": 4.1099851851851855e-05, + "loss": 2.0201, + "step": 15025 + }, + { + "epoch": 2.67, + "learning_rate": 4.109688888888889e-05, + "loss": 2.1419, + "step": 15030 + }, + { + "epoch": 2.67, + "learning_rate": 4.109392592592593e-05, + "loss": 1.9521, + "step": 15035 + }, + { + "epoch": 2.67, + "learning_rate": 4.1090962962962965e-05, + "loss": 1.9679, + "step": 15040 + }, + { + "epoch": 2.67, + "learning_rate": 4.1088000000000004e-05, + "loss": 2.1852, + "step": 15045 + }, + { + "epoch": 2.68, + "learning_rate": 4.1085037037037036e-05, + "loss": 2.0972, + "step": 15050 + }, + { + "epoch": 2.68, + "learning_rate": 4.108207407407408e-05, + "loss": 2.0936, + "step": 15055 + }, + { + "epoch": 2.68, + "learning_rate": 4.107911111111111e-05, + "loss": 1.9501, + "step": 15060 + }, + { + "epoch": 2.68, + "learning_rate": 4.107614814814815e-05, + "loss": 2.1829, + "step": 15065 + }, + { + "epoch": 2.68, + "learning_rate": 4.1073185185185184e-05, + "loss": 1.9812, + "step": 15070 + }, + { + "epoch": 2.68, + "learning_rate": 4.107022222222223e-05, + "loss": 2.163, + "step": 15075 + }, + { + "epoch": 2.68, + "learning_rate": 4.106725925925926e-05, + "loss": 1.9699, + "step": 15080 + }, + { + "epoch": 2.68, + "learning_rate": 4.10642962962963e-05, + "loss": 1.9943, + "step": 15085 + }, + { + "epoch": 2.68, + "learning_rate": 4.106133333333333e-05, + "loss": 2.0932, + "step": 15090 + }, + { + "epoch": 2.68, + "learning_rate": 4.105837037037037e-05, + "loss": 2.0264, + "step": 15095 + }, + { + "epoch": 2.68, + "learning_rate": 4.105540740740741e-05, + "loss": 2.0437, + "step": 15100 + }, + { + "epoch": 2.69, + "learning_rate": 4.105244444444445e-05, + "loss": 2.0824, + "step": 15105 + }, + { + "epoch": 2.69, + "learning_rate": 4.104948148148148e-05, + "loss": 2.1427, + "step": 15110 + }, + { + "epoch": 2.69, + "learning_rate": 4.104651851851852e-05, + "loss": 1.9665, + "step": 15115 + }, + { + "epoch": 2.69, + "learning_rate": 4.104355555555556e-05, + "loss": 2.1389, + "step": 15120 + }, + { + "epoch": 2.69, + "learning_rate": 4.10405925925926e-05, + "loss": 2.2224, + "step": 15125 + }, + { + "epoch": 2.69, + "learning_rate": 4.103762962962963e-05, + "loss": 2.0985, + "step": 15130 + }, + { + "epoch": 2.69, + "learning_rate": 4.103466666666667e-05, + "loss": 1.9811, + "step": 15135 + }, + { + "epoch": 2.69, + "learning_rate": 4.103170370370371e-05, + "loss": 2.1304, + "step": 15140 + }, + { + "epoch": 2.69, + "learning_rate": 4.1028740740740746e-05, + "loss": 2.0762, + "step": 15145 + }, + { + "epoch": 2.69, + "learning_rate": 4.102577777777778e-05, + "loss": 2.1825, + "step": 15150 + }, + { + "epoch": 2.69, + "learning_rate": 4.102281481481482e-05, + "loss": 2.0223, + "step": 15155 + }, + { + "epoch": 2.7, + "learning_rate": 4.1019851851851856e-05, + "loss": 2.0827, + "step": 15160 + }, + { + "epoch": 2.7, + "learning_rate": 4.1016888888888895e-05, + "loss": 2.0286, + "step": 15165 + }, + { + "epoch": 2.7, + "learning_rate": 4.101392592592593e-05, + "loss": 2.0494, + "step": 15170 + }, + { + "epoch": 2.7, + "learning_rate": 4.1010962962962966e-05, + "loss": 2.1453, + "step": 15175 + }, + { + "epoch": 2.7, + "learning_rate": 4.1008000000000004e-05, + "loss": 2.179, + "step": 15180 + }, + { + "epoch": 2.7, + "learning_rate": 4.1005037037037037e-05, + "loss": 1.9324, + "step": 15185 + }, + { + "epoch": 2.7, + "learning_rate": 4.1002074074074075e-05, + "loss": 2.1469, + "step": 15190 + }, + { + "epoch": 2.7, + "learning_rate": 4.099911111111111e-05, + "loss": 2.1273, + "step": 15195 + }, + { + "epoch": 2.7, + "learning_rate": 4.099614814814815e-05, + "loss": 2.0263, + "step": 15200 + }, + { + "epoch": 2.7, + "learning_rate": 4.0993185185185185e-05, + "loss": 2.0346, + "step": 15205 + }, + { + "epoch": 2.7, + "learning_rate": 4.0990222222222224e-05, + "loss": 2.1658, + "step": 15210 + }, + { + "epoch": 2.7, + "learning_rate": 4.0987259259259256e-05, + "loss": 2.1182, + "step": 15215 + }, + { + "epoch": 2.71, + "learning_rate": 4.09842962962963e-05, + "loss": 2.0952, + "step": 15220 + }, + { + "epoch": 2.71, + "learning_rate": 4.0981333333333334e-05, + "loss": 2.0503, + "step": 15225 + }, + { + "epoch": 2.71, + "learning_rate": 4.097837037037037e-05, + "loss": 2.2561, + "step": 15230 + }, + { + "epoch": 2.71, + "learning_rate": 4.0975407407407404e-05, + "loss": 2.0875, + "step": 15235 + }, + { + "epoch": 2.71, + "learning_rate": 4.097244444444445e-05, + "loss": 2.0434, + "step": 15240 + }, + { + "epoch": 2.71, + "learning_rate": 4.096948148148148e-05, + "loss": 2.1228, + "step": 15245 + }, + { + "epoch": 2.71, + "learning_rate": 4.096651851851852e-05, + "loss": 2.0239, + "step": 15250 + }, + { + "epoch": 2.71, + "learning_rate": 4.096355555555555e-05, + "loss": 2.23, + "step": 15255 + }, + { + "epoch": 2.71, + "learning_rate": 4.096059259259259e-05, + "loss": 2.1564, + "step": 15260 + }, + { + "epoch": 2.71, + "learning_rate": 4.095762962962963e-05, + "loss": 1.9969, + "step": 15265 + }, + { + "epoch": 2.71, + "learning_rate": 4.095466666666667e-05, + "loss": 2.1184, + "step": 15270 + }, + { + "epoch": 2.72, + "learning_rate": 4.09517037037037e-05, + "loss": 1.9873, + "step": 15275 + }, + { + "epoch": 2.72, + "learning_rate": 4.094874074074074e-05, + "loss": 2.07, + "step": 15280 + }, + { + "epoch": 2.72, + "learning_rate": 4.094577777777778e-05, + "loss": 2.1316, + "step": 15285 + }, + { + "epoch": 2.72, + "learning_rate": 4.094281481481482e-05, + "loss": 1.9668, + "step": 15290 + }, + { + "epoch": 2.72, + "learning_rate": 4.093985185185185e-05, + "loss": 2.0465, + "step": 15295 + }, + { + "epoch": 2.72, + "learning_rate": 4.093688888888889e-05, + "loss": 2.1029, + "step": 15300 + }, + { + "epoch": 2.72, + "learning_rate": 4.093392592592593e-05, + "loss": 2.1664, + "step": 15305 + }, + { + "epoch": 2.72, + "learning_rate": 4.0930962962962966e-05, + "loss": 2.0025, + "step": 15310 + }, + { + "epoch": 2.72, + "learning_rate": 4.0928e-05, + "loss": 2.0515, + "step": 15315 + }, + { + "epoch": 2.72, + "learning_rate": 4.092503703703704e-05, + "loss": 2.1744, + "step": 15320 + }, + { + "epoch": 2.72, + "learning_rate": 4.0922074074074076e-05, + "loss": 2.0385, + "step": 15325 + }, + { + "epoch": 2.73, + "learning_rate": 4.0919111111111115e-05, + "loss": 2.1291, + "step": 15330 + }, + { + "epoch": 2.73, + "learning_rate": 4.091614814814815e-05, + "loss": 2.0378, + "step": 15335 + }, + { + "epoch": 2.73, + "learning_rate": 4.0913185185185186e-05, + "loss": 2.2602, + "step": 15340 + }, + { + "epoch": 2.73, + "learning_rate": 4.0910222222222225e-05, + "loss": 2.1103, + "step": 15345 + }, + { + "epoch": 2.73, + "learning_rate": 4.0907259259259263e-05, + "loss": 2.027, + "step": 15350 + }, + { + "epoch": 2.73, + "learning_rate": 4.0904296296296295e-05, + "loss": 2.1953, + "step": 15355 + }, + { + "epoch": 2.73, + "learning_rate": 4.0901333333333334e-05, + "loss": 2.042, + "step": 15360 + }, + { + "epoch": 2.73, + "learning_rate": 4.089837037037037e-05, + "loss": 2.0208, + "step": 15365 + }, + { + "epoch": 2.73, + "learning_rate": 4.089540740740741e-05, + "loss": 2.1944, + "step": 15370 + }, + { + "epoch": 2.73, + "learning_rate": 4.0892444444444444e-05, + "loss": 2.14, + "step": 15375 + }, + { + "epoch": 2.73, + "learning_rate": 4.088948148148148e-05, + "loss": 2.1005, + "step": 15380 + }, + { + "epoch": 2.74, + "learning_rate": 4.088651851851852e-05, + "loss": 2.1982, + "step": 15385 + }, + { + "epoch": 2.74, + "learning_rate": 4.088355555555556e-05, + "loss": 2.1287, + "step": 15390 + }, + { + "epoch": 2.74, + "learning_rate": 4.088059259259259e-05, + "loss": 2.1458, + "step": 15395 + }, + { + "epoch": 2.74, + "learning_rate": 4.087762962962963e-05, + "loss": 2.0599, + "step": 15400 + }, + { + "epoch": 2.74, + "learning_rate": 4.087466666666666e-05, + "loss": 2.0354, + "step": 15405 + }, + { + "epoch": 2.74, + "learning_rate": 4.087170370370371e-05, + "loss": 1.9656, + "step": 15410 + }, + { + "epoch": 2.74, + "learning_rate": 4.086874074074074e-05, + "loss": 2.1182, + "step": 15415 + }, + { + "epoch": 2.74, + "learning_rate": 4.086577777777778e-05, + "loss": 2.0102, + "step": 15420 + }, + { + "epoch": 2.74, + "learning_rate": 4.086281481481481e-05, + "loss": 2.2395, + "step": 15425 + }, + { + "epoch": 2.74, + "learning_rate": 4.085985185185186e-05, + "loss": 1.9445, + "step": 15430 + }, + { + "epoch": 2.74, + "learning_rate": 4.085688888888889e-05, + "loss": 2.1119, + "step": 15435 + }, + { + "epoch": 2.74, + "learning_rate": 4.085392592592593e-05, + "loss": 2.1529, + "step": 15440 + }, + { + "epoch": 2.75, + "learning_rate": 4.085096296296296e-05, + "loss": 2.0068, + "step": 15445 + }, + { + "epoch": 2.75, + "learning_rate": 4.0848000000000006e-05, + "loss": 2.211, + "step": 15450 + }, + { + "epoch": 2.75, + "learning_rate": 4.084503703703704e-05, + "loss": 2.0549, + "step": 15455 + }, + { + "epoch": 2.75, + "learning_rate": 4.084207407407408e-05, + "loss": 2.1113, + "step": 15460 + }, + { + "epoch": 2.75, + "learning_rate": 4.083911111111111e-05, + "loss": 2.0522, + "step": 15465 + }, + { + "epoch": 2.75, + "learning_rate": 4.0836148148148155e-05, + "loss": 2.0833, + "step": 15470 + }, + { + "epoch": 2.75, + "learning_rate": 4.0833185185185187e-05, + "loss": 2.185, + "step": 15475 + }, + { + "epoch": 2.75, + "learning_rate": 4.0830222222222225e-05, + "loss": 2.0761, + "step": 15480 + }, + { + "epoch": 2.75, + "learning_rate": 4.082725925925926e-05, + "loss": 2.0456, + "step": 15485 + }, + { + "epoch": 2.75, + "learning_rate": 4.0824296296296296e-05, + "loss": 2.0464, + "step": 15490 + }, + { + "epoch": 2.75, + "learning_rate": 4.0821333333333335e-05, + "loss": 2.0996, + "step": 15495 + }, + { + "epoch": 2.76, + "learning_rate": 4.0818370370370374e-05, + "loss": 1.9248, + "step": 15500 + }, + { + "epoch": 2.76, + "learning_rate": 4.0815407407407406e-05, + "loss": 1.8979, + "step": 15505 + }, + { + "epoch": 2.76, + "learning_rate": 4.0812444444444445e-05, + "loss": 2.0678, + "step": 15510 + }, + { + "epoch": 2.76, + "learning_rate": 4.0809481481481484e-05, + "loss": 2.2282, + "step": 15515 + }, + { + "epoch": 2.76, + "learning_rate": 4.080651851851852e-05, + "loss": 2.1183, + "step": 15520 + }, + { + "epoch": 2.76, + "learning_rate": 4.0803555555555554e-05, + "loss": 1.929, + "step": 15525 + }, + { + "epoch": 2.76, + "learning_rate": 4.080059259259259e-05, + "loss": 2.119, + "step": 15530 + }, + { + "epoch": 2.76, + "learning_rate": 4.079822222222223e-05, + "loss": 2.1638, + "step": 15535 + }, + { + "epoch": 2.76, + "learning_rate": 4.079525925925926e-05, + "loss": 2.1155, + "step": 15540 + }, + { + "epoch": 2.76, + "learning_rate": 4.07922962962963e-05, + "loss": 2.0963, + "step": 15545 + }, + { + "epoch": 2.76, + "learning_rate": 4.078933333333333e-05, + "loss": 2.1174, + "step": 15550 + }, + { + "epoch": 2.77, + "learning_rate": 4.078637037037037e-05, + "loss": 2.2391, + "step": 15555 + }, + { + "epoch": 2.77, + "learning_rate": 4.078340740740741e-05, + "loss": 2.0453, + "step": 15560 + }, + { + "epoch": 2.77, + "learning_rate": 4.078044444444445e-05, + "loss": 1.9399, + "step": 15565 + }, + { + "epoch": 2.77, + "learning_rate": 4.077748148148148e-05, + "loss": 2.0565, + "step": 15570 + }, + { + "epoch": 2.77, + "learning_rate": 4.077451851851852e-05, + "loss": 2.0234, + "step": 15575 + }, + { + "epoch": 2.77, + "learning_rate": 4.077155555555556e-05, + "loss": 2.1458, + "step": 15580 + }, + { + "epoch": 2.77, + "learning_rate": 4.07685925925926e-05, + "loss": 2.0724, + "step": 15585 + }, + { + "epoch": 2.77, + "learning_rate": 4.076562962962963e-05, + "loss": 2.0227, + "step": 15590 + }, + { + "epoch": 2.77, + "learning_rate": 4.076266666666667e-05, + "loss": 2.1013, + "step": 15595 + }, + { + "epoch": 2.77, + "learning_rate": 4.075970370370371e-05, + "loss": 2.0211, + "step": 15600 + }, + { + "epoch": 2.77, + "learning_rate": 4.0756740740740746e-05, + "loss": 2.1538, + "step": 15605 + }, + { + "epoch": 2.78, + "learning_rate": 4.075377777777778e-05, + "loss": 2.0437, + "step": 15610 + }, + { + "epoch": 2.78, + "learning_rate": 4.075081481481482e-05, + "loss": 2.0719, + "step": 15615 + }, + { + "epoch": 2.78, + "learning_rate": 4.0747851851851856e-05, + "loss": 2.1299, + "step": 15620 + }, + { + "epoch": 2.78, + "learning_rate": 4.0744888888888895e-05, + "loss": 2.1515, + "step": 15625 + }, + { + "epoch": 2.78, + "learning_rate": 4.074192592592593e-05, + "loss": 2.2022, + "step": 15630 + }, + { + "epoch": 2.78, + "learning_rate": 4.0738962962962966e-05, + "loss": 2.2535, + "step": 15635 + }, + { + "epoch": 2.78, + "learning_rate": 4.0736000000000004e-05, + "loss": 1.9738, + "step": 15640 + }, + { + "epoch": 2.78, + "learning_rate": 4.0733037037037036e-05, + "loss": 2.1485, + "step": 15645 + }, + { + "epoch": 2.78, + "learning_rate": 4.0730074074074075e-05, + "loss": 2.0765, + "step": 15650 + }, + { + "epoch": 2.78, + "learning_rate": 4.072711111111111e-05, + "loss": 2.272, + "step": 15655 + }, + { + "epoch": 2.78, + "learning_rate": 4.072414814814815e-05, + "loss": 2.1975, + "step": 15660 + }, + { + "epoch": 2.78, + "learning_rate": 4.0721185185185185e-05, + "loss": 2.2159, + "step": 15665 + }, + { + "epoch": 2.79, + "learning_rate": 4.0718222222222224e-05, + "loss": 2.0837, + "step": 15670 + }, + { + "epoch": 2.79, + "learning_rate": 4.0715259259259256e-05, + "loss": 2.0685, + "step": 15675 + }, + { + "epoch": 2.79, + "learning_rate": 4.07122962962963e-05, + "loss": 2.1558, + "step": 15680 + }, + { + "epoch": 2.79, + "learning_rate": 4.0709333333333333e-05, + "loss": 2.139, + "step": 15685 + }, + { + "epoch": 2.79, + "learning_rate": 4.070637037037037e-05, + "loss": 2.0986, + "step": 15690 + }, + { + "epoch": 2.79, + "learning_rate": 4.0703407407407404e-05, + "loss": 2.1454, + "step": 15695 + }, + { + "epoch": 2.79, + "learning_rate": 4.070044444444445e-05, + "loss": 1.9436, + "step": 15700 + }, + { + "epoch": 2.79, + "learning_rate": 4.069748148148148e-05, + "loss": 1.9881, + "step": 15705 + }, + { + "epoch": 2.79, + "learning_rate": 4.069451851851852e-05, + "loss": 2.0824, + "step": 15710 + }, + { + "epoch": 2.79, + "learning_rate": 4.069155555555555e-05, + "loss": 2.1291, + "step": 15715 + }, + { + "epoch": 2.79, + "learning_rate": 4.068859259259259e-05, + "loss": 2.1314, + "step": 15720 + }, + { + "epoch": 2.8, + "learning_rate": 4.068562962962963e-05, + "loss": 2.0585, + "step": 15725 + }, + { + "epoch": 2.8, + "learning_rate": 4.068266666666667e-05, + "loss": 2.0556, + "step": 15730 + }, + { + "epoch": 2.8, + "learning_rate": 4.06797037037037e-05, + "loss": 2.1239, + "step": 15735 + }, + { + "epoch": 2.8, + "learning_rate": 4.067674074074074e-05, + "loss": 2.0041, + "step": 15740 + }, + { + "epoch": 2.8, + "learning_rate": 4.067377777777778e-05, + "loss": 2.0906, + "step": 15745 + }, + { + "epoch": 2.8, + "learning_rate": 4.067081481481482e-05, + "loss": 1.9624, + "step": 15750 + }, + { + "epoch": 2.8, + "learning_rate": 4.066785185185185e-05, + "loss": 2.0015, + "step": 15755 + }, + { + "epoch": 2.8, + "learning_rate": 4.066488888888889e-05, + "loss": 2.0668, + "step": 15760 + }, + { + "epoch": 2.8, + "learning_rate": 4.066192592592593e-05, + "loss": 2.0416, + "step": 15765 + }, + { + "epoch": 2.8, + "learning_rate": 4.0658962962962966e-05, + "loss": 2.101, + "step": 15770 + }, + { + "epoch": 2.8, + "learning_rate": 4.0656e-05, + "loss": 2.0801, + "step": 15775 + }, + { + "epoch": 2.81, + "learning_rate": 4.065303703703704e-05, + "loss": 2.0539, + "step": 15780 + }, + { + "epoch": 2.81, + "learning_rate": 4.0650074074074076e-05, + "loss": 1.981, + "step": 15785 + }, + { + "epoch": 2.81, + "learning_rate": 4.0647111111111115e-05, + "loss": 2.0143, + "step": 15790 + }, + { + "epoch": 2.81, + "learning_rate": 4.064414814814815e-05, + "loss": 2.0821, + "step": 15795 + }, + { + "epoch": 2.81, + "learning_rate": 4.0641185185185186e-05, + "loss": 2.056, + "step": 15800 + }, + { + "epoch": 2.81, + "learning_rate": 4.0638222222222225e-05, + "loss": 2.1024, + "step": 15805 + }, + { + "epoch": 2.81, + "learning_rate": 4.063525925925926e-05, + "loss": 2.0003, + "step": 15810 + }, + { + "epoch": 2.81, + "learning_rate": 4.0632296296296295e-05, + "loss": 2.0825, + "step": 15815 + }, + { + "epoch": 2.81, + "learning_rate": 4.0629333333333334e-05, + "loss": 1.991, + "step": 15820 + }, + { + "epoch": 2.81, + "learning_rate": 4.062637037037037e-05, + "loss": 2.2716, + "step": 15825 + }, + { + "epoch": 2.81, + "learning_rate": 4.062340740740741e-05, + "loss": 2.0591, + "step": 15830 + }, + { + "epoch": 2.82, + "learning_rate": 4.0620444444444444e-05, + "loss": 2.2245, + "step": 15835 + }, + { + "epoch": 2.82, + "learning_rate": 4.061748148148148e-05, + "loss": 1.9089, + "step": 15840 + }, + { + "epoch": 2.82, + "learning_rate": 4.061451851851852e-05, + "loss": 2.0742, + "step": 15845 + }, + { + "epoch": 2.82, + "learning_rate": 4.061155555555556e-05, + "loss": 2.1801, + "step": 15850 + }, + { + "epoch": 2.82, + "learning_rate": 4.060859259259259e-05, + "loss": 2.0128, + "step": 15855 + }, + { + "epoch": 2.82, + "learning_rate": 4.060562962962963e-05, + "loss": 2.1646, + "step": 15860 + }, + { + "epoch": 2.82, + "learning_rate": 4.060266666666666e-05, + "loss": 2.0965, + "step": 15865 + }, + { + "epoch": 2.82, + "learning_rate": 4.059970370370371e-05, + "loss": 2.0245, + "step": 15870 + }, + { + "epoch": 2.82, + "learning_rate": 4.059674074074074e-05, + "loss": 2.1484, + "step": 15875 + }, + { + "epoch": 2.82, + "learning_rate": 4.059377777777778e-05, + "loss": 2.0386, + "step": 15880 + }, + { + "epoch": 2.82, + "learning_rate": 4.059081481481481e-05, + "loss": 2.0924, + "step": 15885 + }, + { + "epoch": 2.82, + "learning_rate": 4.058785185185186e-05, + "loss": 1.9741, + "step": 15890 + }, + { + "epoch": 2.83, + "learning_rate": 4.058488888888889e-05, + "loss": 2.049, + "step": 15895 + }, + { + "epoch": 2.83, + "learning_rate": 4.058192592592593e-05, + "loss": 2.1096, + "step": 15900 + }, + { + "epoch": 2.83, + "learning_rate": 4.057896296296296e-05, + "loss": 2.0821, + "step": 15905 + }, + { + "epoch": 2.83, + "learning_rate": 4.0576000000000006e-05, + "loss": 1.9492, + "step": 15910 + }, + { + "epoch": 2.83, + "learning_rate": 4.057303703703704e-05, + "loss": 2.0418, + "step": 15915 + }, + { + "epoch": 2.83, + "learning_rate": 4.057007407407408e-05, + "loss": 2.0218, + "step": 15920 + }, + { + "epoch": 2.83, + "learning_rate": 4.056711111111111e-05, + "loss": 2.0541, + "step": 15925 + }, + { + "epoch": 2.83, + "learning_rate": 4.0564148148148154e-05, + "loss": 2.0128, + "step": 15930 + }, + { + "epoch": 2.83, + "learning_rate": 4.0561185185185186e-05, + "loss": 2.0768, + "step": 15935 + }, + { + "epoch": 2.83, + "learning_rate": 4.0558222222222225e-05, + "loss": 2.0105, + "step": 15940 + }, + { + "epoch": 2.83, + "learning_rate": 4.055525925925926e-05, + "loss": 2.0679, + "step": 15945 + }, + { + "epoch": 2.84, + "learning_rate": 4.0552296296296296e-05, + "loss": 2.0675, + "step": 15950 + }, + { + "epoch": 2.84, + "learning_rate": 4.0549333333333335e-05, + "loss": 2.1774, + "step": 15955 + }, + { + "epoch": 2.84, + "learning_rate": 4.0546370370370374e-05, + "loss": 2.1544, + "step": 15960 + }, + { + "epoch": 2.84, + "learning_rate": 4.0543407407407406e-05, + "loss": 2.0531, + "step": 15965 + }, + { + "epoch": 2.84, + "learning_rate": 4.0540444444444445e-05, + "loss": 2.0487, + "step": 15970 + }, + { + "epoch": 2.84, + "learning_rate": 4.0537481481481483e-05, + "loss": 2.1262, + "step": 15975 + }, + { + "epoch": 2.84, + "learning_rate": 4.053451851851852e-05, + "loss": 2.1444, + "step": 15980 + }, + { + "epoch": 2.84, + "learning_rate": 4.0531555555555554e-05, + "loss": 2.1414, + "step": 15985 + }, + { + "epoch": 2.84, + "learning_rate": 4.052859259259259e-05, + "loss": 2.0467, + "step": 15990 + }, + { + "epoch": 2.84, + "learning_rate": 4.052562962962963e-05, + "loss": 1.9632, + "step": 15995 + }, + { + "epoch": 2.84, + "learning_rate": 4.052266666666667e-05, + "loss": 1.9612, + "step": 16000 + }, + { + "epoch": 2.85, + "learning_rate": 4.05197037037037e-05, + "loss": 1.8637, + "step": 16005 + }, + { + "epoch": 2.85, + "learning_rate": 4.051674074074074e-05, + "loss": 2.0664, + "step": 16010 + }, + { + "epoch": 2.85, + "learning_rate": 4.051377777777778e-05, + "loss": 2.1201, + "step": 16015 + }, + { + "epoch": 2.85, + "learning_rate": 4.051081481481482e-05, + "loss": 2.068, + "step": 16020 + }, + { + "epoch": 2.85, + "learning_rate": 4.050785185185185e-05, + "loss": 2.1233, + "step": 16025 + }, + { + "epoch": 2.85, + "learning_rate": 4.050488888888889e-05, + "loss": 2.1708, + "step": 16030 + }, + { + "epoch": 2.85, + "learning_rate": 4.050192592592593e-05, + "loss": 1.8747, + "step": 16035 + }, + { + "epoch": 2.85, + "learning_rate": 4.049896296296297e-05, + "loss": 2.1514, + "step": 16040 + }, + { + "epoch": 2.85, + "learning_rate": 4.0496e-05, + "loss": 2.0846, + "step": 16045 + }, + { + "epoch": 2.85, + "learning_rate": 4.049303703703704e-05, + "loss": 2.0597, + "step": 16050 + }, + { + "epoch": 2.85, + "learning_rate": 4.049007407407408e-05, + "loss": 2.0686, + "step": 16055 + }, + { + "epoch": 2.86, + "learning_rate": 4.0487111111111116e-05, + "loss": 2.1085, + "step": 16060 + }, + { + "epoch": 2.86, + "learning_rate": 4.048414814814815e-05, + "loss": 2.1299, + "step": 16065 + }, + { + "epoch": 2.86, + "learning_rate": 4.048118518518519e-05, + "loss": 2.092, + "step": 16070 + }, + { + "epoch": 2.86, + "learning_rate": 4.0478222222222226e-05, + "loss": 2.0897, + "step": 16075 + }, + { + "epoch": 2.86, + "learning_rate": 4.0475259259259265e-05, + "loss": 2.0561, + "step": 16080 + }, + { + "epoch": 2.86, + "learning_rate": 4.04722962962963e-05, + "loss": 2.1786, + "step": 16085 + }, + { + "epoch": 2.86, + "learning_rate": 4.0469333333333336e-05, + "loss": 2.2351, + "step": 16090 + }, + { + "epoch": 2.86, + "learning_rate": 4.046637037037037e-05, + "loss": 2.1283, + "step": 16095 + }, + { + "epoch": 2.86, + "learning_rate": 4.046340740740741e-05, + "loss": 2.0544, + "step": 16100 + }, + { + "epoch": 2.86, + "learning_rate": 4.0460444444444445e-05, + "loss": 2.0658, + "step": 16105 + }, + { + "epoch": 2.86, + "learning_rate": 4.0457481481481484e-05, + "loss": 2.1455, + "step": 16110 + }, + { + "epoch": 2.86, + "learning_rate": 4.0454518518518516e-05, + "loss": 2.0699, + "step": 16115 + }, + { + "epoch": 2.87, + "learning_rate": 4.045155555555556e-05, + "loss": 2.083, + "step": 16120 + }, + { + "epoch": 2.87, + "learning_rate": 4.0448592592592594e-05, + "loss": 2.1269, + "step": 16125 + }, + { + "epoch": 2.87, + "learning_rate": 4.044562962962963e-05, + "loss": 2.0783, + "step": 16130 + }, + { + "epoch": 2.87, + "learning_rate": 4.0442666666666665e-05, + "loss": 2.1505, + "step": 16135 + }, + { + "epoch": 2.87, + "learning_rate": 4.043970370370371e-05, + "loss": 2.0506, + "step": 16140 + }, + { + "epoch": 2.87, + "learning_rate": 4.043674074074074e-05, + "loss": 2.085, + "step": 16145 + }, + { + "epoch": 2.87, + "learning_rate": 4.043377777777778e-05, + "loss": 2.0843, + "step": 16150 + }, + { + "epoch": 2.87, + "learning_rate": 4.043081481481481e-05, + "loss": 2.1628, + "step": 16155 + }, + { + "epoch": 2.87, + "learning_rate": 4.042785185185186e-05, + "loss": 2.0469, + "step": 16160 + }, + { + "epoch": 2.87, + "learning_rate": 4.042488888888889e-05, + "loss": 2.0351, + "step": 16165 + }, + { + "epoch": 2.87, + "learning_rate": 4.042192592592593e-05, + "loss": 1.9571, + "step": 16170 + }, + { + "epoch": 2.88, + "learning_rate": 4.041896296296296e-05, + "loss": 2.0478, + "step": 16175 + }, + { + "epoch": 2.88, + "learning_rate": 4.0416e-05, + "loss": 2.0621, + "step": 16180 + }, + { + "epoch": 2.88, + "learning_rate": 4.041303703703704e-05, + "loss": 2.0006, + "step": 16185 + }, + { + "epoch": 2.88, + "learning_rate": 4.041007407407408e-05, + "loss": 2.1921, + "step": 16190 + }, + { + "epoch": 2.88, + "learning_rate": 4.040711111111111e-05, + "loss": 2.2011, + "step": 16195 + }, + { + "epoch": 2.88, + "learning_rate": 4.040414814814815e-05, + "loss": 2.1462, + "step": 16200 + }, + { + "epoch": 2.88, + "learning_rate": 4.040118518518519e-05, + "loss": 2.0548, + "step": 16205 + }, + { + "epoch": 2.88, + "learning_rate": 4.039822222222223e-05, + "loss": 2.0789, + "step": 16210 + }, + { + "epoch": 2.88, + "learning_rate": 4.039525925925926e-05, + "loss": 2.1233, + "step": 16215 + }, + { + "epoch": 2.88, + "learning_rate": 4.03922962962963e-05, + "loss": 2.1864, + "step": 16220 + }, + { + "epoch": 2.88, + "learning_rate": 4.0389333333333336e-05, + "loss": 2.0643, + "step": 16225 + }, + { + "epoch": 2.89, + "learning_rate": 4.0386370370370375e-05, + "loss": 2.1472, + "step": 16230 + }, + { + "epoch": 2.89, + "learning_rate": 4.038340740740741e-05, + "loss": 1.9254, + "step": 16235 + }, + { + "epoch": 2.89, + "learning_rate": 4.0380444444444446e-05, + "loss": 2.0204, + "step": 16240 + }, + { + "epoch": 2.89, + "learning_rate": 4.0377481481481485e-05, + "loss": 1.9877, + "step": 16245 + }, + { + "epoch": 2.89, + "learning_rate": 4.0374518518518524e-05, + "loss": 2.0185, + "step": 16250 + }, + { + "epoch": 2.89, + "learning_rate": 4.0371555555555556e-05, + "loss": 2.0657, + "step": 16255 + }, + { + "epoch": 2.89, + "learning_rate": 4.0368592592592595e-05, + "loss": 1.9766, + "step": 16260 + }, + { + "epoch": 2.89, + "learning_rate": 4.0365629629629634e-05, + "loss": 1.9813, + "step": 16265 + }, + { + "epoch": 2.89, + "learning_rate": 4.036266666666667e-05, + "loss": 2.1265, + "step": 16270 + }, + { + "epoch": 2.89, + "learning_rate": 4.0359703703703704e-05, + "loss": 2.1267, + "step": 16275 + }, + { + "epoch": 2.89, + "learning_rate": 4.035674074074074e-05, + "loss": 2.0729, + "step": 16280 + }, + { + "epoch": 2.9, + "learning_rate": 4.035377777777778e-05, + "loss": 2.0036, + "step": 16285 + }, + { + "epoch": 2.9, + "learning_rate": 4.035081481481482e-05, + "loss": 2.1389, + "step": 16290 + }, + { + "epoch": 2.9, + "learning_rate": 4.034785185185185e-05, + "loss": 2.0893, + "step": 16295 + }, + { + "epoch": 2.9, + "learning_rate": 4.034488888888889e-05, + "loss": 2.2427, + "step": 16300 + }, + { + "epoch": 2.9, + "learning_rate": 4.034192592592593e-05, + "loss": 2.1558, + "step": 16305 + }, + { + "epoch": 2.9, + "learning_rate": 4.033896296296297e-05, + "loss": 2.0431, + "step": 16310 + }, + { + "epoch": 2.9, + "learning_rate": 4.0336e-05, + "loss": 2.1553, + "step": 16315 + }, + { + "epoch": 2.9, + "learning_rate": 4.033303703703704e-05, + "loss": 2.0283, + "step": 16320 + }, + { + "epoch": 2.9, + "learning_rate": 4.033007407407408e-05, + "loss": 2.1257, + "step": 16325 + }, + { + "epoch": 2.9, + "learning_rate": 4.032711111111112e-05, + "loss": 2.1653, + "step": 16330 + }, + { + "epoch": 2.9, + "learning_rate": 4.032414814814815e-05, + "loss": 2.0811, + "step": 16335 + }, + { + "epoch": 2.9, + "learning_rate": 4.032118518518519e-05, + "loss": 2.0757, + "step": 16340 + }, + { + "epoch": 2.91, + "learning_rate": 4.031822222222222e-05, + "loss": 2.0354, + "step": 16345 + }, + { + "epoch": 2.91, + "learning_rate": 4.0315259259259266e-05, + "loss": 2.1375, + "step": 16350 + }, + { + "epoch": 2.91, + "learning_rate": 4.03122962962963e-05, + "loss": 2.0141, + "step": 16355 + }, + { + "epoch": 2.91, + "learning_rate": 4.030933333333334e-05, + "loss": 2.0928, + "step": 16360 + }, + { + "epoch": 2.91, + "learning_rate": 4.030637037037037e-05, + "loss": 2.0547, + "step": 16365 + }, + { + "epoch": 2.91, + "learning_rate": 4.0303407407407415e-05, + "loss": 2.0986, + "step": 16370 + }, + { + "epoch": 2.91, + "learning_rate": 4.030044444444445e-05, + "loss": 2.09, + "step": 16375 + }, + { + "epoch": 2.91, + "learning_rate": 4.0297481481481486e-05, + "loss": 2.1528, + "step": 16380 + }, + { + "epoch": 2.91, + "learning_rate": 4.029451851851852e-05, + "loss": 2.1105, + "step": 16385 + }, + { + "epoch": 2.91, + "learning_rate": 4.0291555555555563e-05, + "loss": 2.1813, + "step": 16390 + }, + { + "epoch": 2.91, + "learning_rate": 4.0288592592592595e-05, + "loss": 2.0998, + "step": 16395 + }, + { + "epoch": 2.92, + "learning_rate": 4.0285629629629634e-05, + "loss": 1.9956, + "step": 16400 + }, + { + "epoch": 2.92, + "learning_rate": 4.0282666666666666e-05, + "loss": 2.1837, + "step": 16405 + }, + { + "epoch": 2.92, + "learning_rate": 4.0279703703703705e-05, + "loss": 2.015, + "step": 16410 + }, + { + "epoch": 2.92, + "learning_rate": 4.0276740740740744e-05, + "loss": 2.1512, + "step": 16415 + }, + { + "epoch": 2.92, + "learning_rate": 4.0273777777777776e-05, + "loss": 1.9866, + "step": 16420 + }, + { + "epoch": 2.92, + "learning_rate": 4.0270814814814815e-05, + "loss": 2.0019, + "step": 16425 + }, + { + "epoch": 2.92, + "learning_rate": 4.0267851851851854e-05, + "loss": 2.0177, + "step": 16430 + }, + { + "epoch": 2.92, + "learning_rate": 4.026488888888889e-05, + "loss": 2.0448, + "step": 16435 + }, + { + "epoch": 2.92, + "learning_rate": 4.0261925925925925e-05, + "loss": 2.2202, + "step": 16440 + }, + { + "epoch": 2.92, + "learning_rate": 4.025896296296296e-05, + "loss": 1.9852, + "step": 16445 + }, + { + "epoch": 2.92, + "learning_rate": 4.0256e-05, + "loss": 1.8883, + "step": 16450 + }, + { + "epoch": 2.93, + "learning_rate": 4.025303703703704e-05, + "loss": 1.9739, + "step": 16455 + }, + { + "epoch": 2.93, + "learning_rate": 4.025007407407407e-05, + "loss": 2.116, + "step": 16460 + }, + { + "epoch": 2.93, + "learning_rate": 4.024711111111111e-05, + "loss": 2.0663, + "step": 16465 + }, + { + "epoch": 2.93, + "learning_rate": 4.024414814814815e-05, + "loss": 1.9692, + "step": 16470 + }, + { + "epoch": 2.93, + "learning_rate": 4.024118518518519e-05, + "loss": 1.9749, + "step": 16475 + }, + { + "epoch": 2.93, + "learning_rate": 4.023822222222222e-05, + "loss": 1.9819, + "step": 16480 + }, + { + "epoch": 2.93, + "learning_rate": 4.023525925925926e-05, + "loss": 2.0088, + "step": 16485 + }, + { + "epoch": 2.93, + "learning_rate": 4.023229629629629e-05, + "loss": 2.1145, + "step": 16490 + }, + { + "epoch": 2.93, + "learning_rate": 4.022933333333334e-05, + "loss": 2.0532, + "step": 16495 + }, + { + "epoch": 2.93, + "learning_rate": 4.022637037037037e-05, + "loss": 2.0437, + "step": 16500 + }, + { + "epoch": 2.93, + "learning_rate": 4.022340740740741e-05, + "loss": 2.0047, + "step": 16505 + }, + { + "epoch": 2.94, + "learning_rate": 4.022044444444444e-05, + "loss": 2.1703, + "step": 16510 + }, + { + "epoch": 2.94, + "learning_rate": 4.0217481481481487e-05, + "loss": 2.1873, + "step": 16515 + }, + { + "epoch": 2.94, + "learning_rate": 4.021451851851852e-05, + "loss": 2.0383, + "step": 16520 + }, + { + "epoch": 2.94, + "learning_rate": 4.021155555555556e-05, + "loss": 2.0219, + "step": 16525 + }, + { + "epoch": 2.94, + "learning_rate": 4.020859259259259e-05, + "loss": 2.0059, + "step": 16530 + }, + { + "epoch": 2.94, + "learning_rate": 4.0205629629629635e-05, + "loss": 2.0807, + "step": 16535 + }, + { + "epoch": 2.94, + "learning_rate": 4.020266666666667e-05, + "loss": 2.129, + "step": 16540 + }, + { + "epoch": 2.94, + "learning_rate": 4.0199703703703706e-05, + "loss": 2.2186, + "step": 16545 + }, + { + "epoch": 2.94, + "learning_rate": 4.019674074074074e-05, + "loss": 2.1436, + "step": 16550 + }, + { + "epoch": 2.94, + "learning_rate": 4.0193777777777784e-05, + "loss": 2.16, + "step": 16555 + }, + { + "epoch": 2.94, + "learning_rate": 4.0190814814814816e-05, + "loss": 2.1898, + "step": 16560 + }, + { + "epoch": 2.94, + "learning_rate": 4.0187851851851854e-05, + "loss": 2.1175, + "step": 16565 + }, + { + "epoch": 2.95, + "learning_rate": 4.0184888888888886e-05, + "loss": 1.99, + "step": 16570 + }, + { + "epoch": 2.95, + "learning_rate": 4.0181925925925925e-05, + "loss": 2.023, + "step": 16575 + }, + { + "epoch": 2.95, + "learning_rate": 4.0178962962962964e-05, + "loss": 2.2951, + "step": 16580 + }, + { + "epoch": 2.95, + "learning_rate": 4.0176e-05, + "loss": 2.1258, + "step": 16585 + }, + { + "epoch": 2.95, + "learning_rate": 4.0173037037037035e-05, + "loss": 1.9182, + "step": 16590 + }, + { + "epoch": 2.95, + "learning_rate": 4.0170074074074074e-05, + "loss": 2.1291, + "step": 16595 + }, + { + "epoch": 2.95, + "learning_rate": 4.016711111111111e-05, + "loss": 2.0255, + "step": 16600 + }, + { + "epoch": 2.95, + "learning_rate": 4.016414814814815e-05, + "loss": 2.0234, + "step": 16605 + }, + { + "epoch": 2.95, + "learning_rate": 4.0161185185185184e-05, + "loss": 2.1274, + "step": 16610 + }, + { + "epoch": 2.95, + "learning_rate": 4.015822222222222e-05, + "loss": 2.0428, + "step": 16615 + }, + { + "epoch": 2.95, + "learning_rate": 4.015525925925926e-05, + "loss": 2.164, + "step": 16620 + }, + { + "epoch": 2.96, + "learning_rate": 4.01522962962963e-05, + "loss": 1.9993, + "step": 16625 + }, + { + "epoch": 2.96, + "learning_rate": 4.014933333333333e-05, + "loss": 2.0875, + "step": 16630 + }, + { + "epoch": 2.96, + "learning_rate": 4.014637037037037e-05, + "loss": 2.0258, + "step": 16635 + }, + { + "epoch": 2.96, + "learning_rate": 4.014340740740741e-05, + "loss": 2.099, + "step": 16640 + }, + { + "epoch": 2.96, + "learning_rate": 4.014044444444445e-05, + "loss": 2.0906, + "step": 16645 + }, + { + "epoch": 2.96, + "learning_rate": 4.013748148148148e-05, + "loss": 1.9489, + "step": 16650 + }, + { + "epoch": 2.96, + "learning_rate": 4.013451851851852e-05, + "loss": 2.1682, + "step": 16655 + }, + { + "epoch": 2.96, + "learning_rate": 4.013155555555556e-05, + "loss": 2.0158, + "step": 16660 + }, + { + "epoch": 2.96, + "learning_rate": 4.01285925925926e-05, + "loss": 2.0507, + "step": 16665 + }, + { + "epoch": 2.96, + "learning_rate": 4.012562962962963e-05, + "loss": 1.9166, + "step": 16670 + }, + { + "epoch": 2.96, + "learning_rate": 4.012266666666667e-05, + "loss": 2.155, + "step": 16675 + }, + { + "epoch": 2.97, + "learning_rate": 4.011970370370371e-05, + "loss": 2.107, + "step": 16680 + }, + { + "epoch": 2.97, + "learning_rate": 4.0116740740740746e-05, + "loss": 2.0062, + "step": 16685 + }, + { + "epoch": 2.97, + "learning_rate": 4.011377777777778e-05, + "loss": 2.1848, + "step": 16690 + }, + { + "epoch": 2.97, + "learning_rate": 4.0110814814814816e-05, + "loss": 2.0409, + "step": 16695 + }, + { + "epoch": 2.97, + "learning_rate": 4.0107851851851855e-05, + "loss": 2.1192, + "step": 16700 + }, + { + "epoch": 2.97, + "learning_rate": 4.0104888888888894e-05, + "loss": 2.07, + "step": 16705 + }, + { + "epoch": 2.97, + "learning_rate": 4.0101925925925926e-05, + "loss": 2.1515, + "step": 16710 + }, + { + "epoch": 2.97, + "learning_rate": 4.0098962962962965e-05, + "loss": 2.0634, + "step": 16715 + }, + { + "epoch": 2.97, + "learning_rate": 4.0096e-05, + "loss": 2.1046, + "step": 16720 + }, + { + "epoch": 2.97, + "learning_rate": 4.009303703703704e-05, + "loss": 2.1359, + "step": 16725 + }, + { + "epoch": 2.97, + "learning_rate": 4.0090074074074075e-05, + "loss": 2.0086, + "step": 16730 + }, + { + "epoch": 2.98, + "learning_rate": 4.0087111111111113e-05, + "loss": 2.0031, + "step": 16735 + }, + { + "epoch": 2.98, + "learning_rate": 4.0084148148148145e-05, + "loss": 2.0127, + "step": 16740 + }, + { + "epoch": 2.98, + "learning_rate": 4.008118518518519e-05, + "loss": 2.0139, + "step": 16745 + }, + { + "epoch": 2.98, + "learning_rate": 4.007822222222222e-05, + "loss": 2.0884, + "step": 16750 + }, + { + "epoch": 2.98, + "learning_rate": 4.007525925925926e-05, + "loss": 1.9543, + "step": 16755 + }, + { + "epoch": 2.98, + "learning_rate": 4.0072296296296294e-05, + "loss": 1.9322, + "step": 16760 + }, + { + "epoch": 2.98, + "learning_rate": 4.006933333333334e-05, + "loss": 2.1212, + "step": 16765 + }, + { + "epoch": 2.98, + "learning_rate": 4.006637037037037e-05, + "loss": 2.0169, + "step": 16770 + }, + { + "epoch": 2.98, + "learning_rate": 4.006340740740741e-05, + "loss": 2.0677, + "step": 16775 + }, + { + "epoch": 2.98, + "learning_rate": 4.006044444444444e-05, + "loss": 1.9593, + "step": 16780 + }, + { + "epoch": 2.98, + "learning_rate": 4.005748148148149e-05, + "loss": 2.1301, + "step": 16785 + }, + { + "epoch": 2.98, + "learning_rate": 4.005451851851852e-05, + "loss": 2.0452, + "step": 16790 + }, + { + "epoch": 2.99, + "learning_rate": 4.005155555555556e-05, + "loss": 2.159, + "step": 16795 + }, + { + "epoch": 2.99, + "learning_rate": 4.004859259259259e-05, + "loss": 1.9793, + "step": 16800 + }, + { + "epoch": 2.99, + "learning_rate": 4.004562962962963e-05, + "loss": 1.9773, + "step": 16805 + }, + { + "epoch": 2.99, + "learning_rate": 4.004266666666667e-05, + "loss": 2.0183, + "step": 16810 + }, + { + "epoch": 2.99, + "learning_rate": 4.003970370370371e-05, + "loss": 2.0693, + "step": 16815 + }, + { + "epoch": 2.99, + "learning_rate": 4.003674074074074e-05, + "loss": 2.1873, + "step": 16820 + }, + { + "epoch": 2.99, + "learning_rate": 4.003377777777778e-05, + "loss": 2.1756, + "step": 16825 + }, + { + "epoch": 2.99, + "learning_rate": 4.003081481481482e-05, + "loss": 2.1226, + "step": 16830 + }, + { + "epoch": 2.99, + "learning_rate": 4.0027851851851856e-05, + "loss": 2.115, + "step": 16835 + }, + { + "epoch": 2.99, + "learning_rate": 4.002488888888889e-05, + "loss": 1.9618, + "step": 16840 + }, + { + "epoch": 2.99, + "learning_rate": 4.002192592592593e-05, + "loss": 2.2295, + "step": 16845 + }, + { + "epoch": 3.0, + "learning_rate": 4.0018962962962966e-05, + "loss": 2.1335, + "step": 16850 + }, + { + "epoch": 3.0, + "learning_rate": 4.0016000000000004e-05, + "loss": 2.0626, + "step": 16855 + }, + { + "epoch": 3.0, + "learning_rate": 4.0013037037037037e-05, + "loss": 2.2053, + "step": 16860 + }, + { + "epoch": 3.0, + "learning_rate": 4.0010074074074075e-05, + "loss": 2.0684, + "step": 16865 + }, + { + "epoch": 3.0, + "learning_rate": 4.0007111111111114e-05, + "loss": 2.1864, + "step": 16870 + }, + { + "epoch": 3.0, + "learning_rate": 4.000414814814815e-05, + "loss": 2.0419, + "step": 16875 + }, + { + "epoch": 3.0, + "learning_rate": 4.0001185185185185e-05, + "loss": 1.804, + "step": 16880 + }, + { + "epoch": 3.0, + "learning_rate": 3.9998222222222224e-05, + "loss": 1.885, + "step": 16885 + }, + { + "epoch": 3.0, + "learning_rate": 3.999525925925926e-05, + "loss": 2.0829, + "step": 16890 + }, + { + "epoch": 3.0, + "learning_rate": 3.99922962962963e-05, + "loss": 1.9241, + "step": 16895 + }, + { + "epoch": 3.0, + "learning_rate": 3.9989333333333334e-05, + "loss": 1.9544, + "step": 16900 + }, + { + "epoch": 3.01, + "learning_rate": 3.998637037037037e-05, + "loss": 1.9004, + "step": 16905 + }, + { + "epoch": 3.01, + "learning_rate": 3.998340740740741e-05, + "loss": 1.921, + "step": 16910 + }, + { + "epoch": 3.01, + "learning_rate": 3.998044444444445e-05, + "loss": 1.9571, + "step": 16915 + }, + { + "epoch": 3.01, + "learning_rate": 3.997748148148148e-05, + "loss": 2.0394, + "step": 16920 + }, + { + "epoch": 3.01, + "learning_rate": 3.997451851851852e-05, + "loss": 1.8959, + "step": 16925 + }, + { + "epoch": 3.01, + "learning_rate": 3.997155555555556e-05, + "loss": 1.8576, + "step": 16930 + }, + { + "epoch": 3.01, + "learning_rate": 3.99685925925926e-05, + "loss": 1.9712, + "step": 16935 + }, + { + "epoch": 3.01, + "learning_rate": 3.996562962962963e-05, + "loss": 2.0038, + "step": 16940 + }, + { + "epoch": 3.01, + "learning_rate": 3.996266666666667e-05, + "loss": 1.9133, + "step": 16945 + }, + { + "epoch": 3.01, + "learning_rate": 3.99597037037037e-05, + "loss": 2.0039, + "step": 16950 + }, + { + "epoch": 3.01, + "learning_rate": 3.995674074074075e-05, + "loss": 1.9632, + "step": 16955 + }, + { + "epoch": 3.02, + "learning_rate": 3.995377777777778e-05, + "loss": 1.921, + "step": 16960 + }, + { + "epoch": 3.02, + "learning_rate": 3.995081481481482e-05, + "loss": 1.9543, + "step": 16965 + }, + { + "epoch": 3.02, + "learning_rate": 3.994785185185185e-05, + "loss": 2.0106, + "step": 16970 + }, + { + "epoch": 3.02, + "learning_rate": 3.9944888888888896e-05, + "loss": 1.9748, + "step": 16975 + }, + { + "epoch": 3.02, + "learning_rate": 3.994192592592593e-05, + "loss": 1.8221, + "step": 16980 + }, + { + "epoch": 3.02, + "learning_rate": 3.9938962962962966e-05, + "loss": 2.0455, + "step": 16985 + }, + { + "epoch": 3.02, + "learning_rate": 3.9936e-05, + "loss": 1.9707, + "step": 16990 + }, + { + "epoch": 3.02, + "learning_rate": 3.9933037037037044e-05, + "loss": 2.0332, + "step": 16995 + }, + { + "epoch": 3.02, + "learning_rate": 3.9930074074074076e-05, + "loss": 1.9653, + "step": 17000 + }, + { + "epoch": 3.02, + "learning_rate": 3.9927111111111115e-05, + "loss": 1.9639, + "step": 17005 + }, + { + "epoch": 3.02, + "learning_rate": 3.992414814814815e-05, + "loss": 1.9335, + "step": 17010 + }, + { + "epoch": 3.02, + "learning_rate": 3.992118518518519e-05, + "loss": 1.8478, + "step": 17015 + }, + { + "epoch": 3.03, + "learning_rate": 3.9918222222222225e-05, + "loss": 1.9567, + "step": 17020 + }, + { + "epoch": 3.03, + "learning_rate": 3.9915259259259263e-05, + "loss": 1.9285, + "step": 17025 + }, + { + "epoch": 3.03, + "learning_rate": 3.9912296296296295e-05, + "loss": 1.9318, + "step": 17030 + }, + { + "epoch": 3.03, + "learning_rate": 3.9909333333333334e-05, + "loss": 1.9002, + "step": 17035 + }, + { + "epoch": 3.03, + "learning_rate": 3.990637037037037e-05, + "loss": 1.9526, + "step": 17040 + }, + { + "epoch": 3.03, + "learning_rate": 3.990340740740741e-05, + "loss": 2.0249, + "step": 17045 + }, + { + "epoch": 3.03, + "learning_rate": 3.9900444444444444e-05, + "loss": 1.9359, + "step": 17050 + }, + { + "epoch": 3.03, + "learning_rate": 3.989748148148148e-05, + "loss": 1.9477, + "step": 17055 + }, + { + "epoch": 3.03, + "learning_rate": 3.989451851851852e-05, + "loss": 2.0777, + "step": 17060 + }, + { + "epoch": 3.03, + "learning_rate": 3.989155555555556e-05, + "loss": 1.9016, + "step": 17065 + }, + { + "epoch": 3.03, + "learning_rate": 3.988859259259259e-05, + "loss": 2.0511, + "step": 17070 + }, + { + "epoch": 3.04, + "learning_rate": 3.988562962962963e-05, + "loss": 2.016, + "step": 17075 + }, + { + "epoch": 3.04, + "learning_rate": 3.988266666666667e-05, + "loss": 1.9095, + "step": 17080 + }, + { + "epoch": 3.04, + "learning_rate": 3.987970370370371e-05, + "loss": 1.8658, + "step": 17085 + }, + { + "epoch": 3.04, + "learning_rate": 3.987674074074074e-05, + "loss": 2.0141, + "step": 17090 + }, + { + "epoch": 3.04, + "learning_rate": 3.987377777777778e-05, + "loss": 2.0966, + "step": 17095 + }, + { + "epoch": 3.04, + "learning_rate": 3.987081481481482e-05, + "loss": 1.9256, + "step": 17100 + }, + { + "epoch": 3.04, + "learning_rate": 3.986785185185186e-05, + "loss": 1.937, + "step": 17105 + }, + { + "epoch": 3.04, + "learning_rate": 3.986488888888889e-05, + "loss": 1.9478, + "step": 17110 + }, + { + "epoch": 3.04, + "learning_rate": 3.986192592592593e-05, + "loss": 2.0359, + "step": 17115 + }, + { + "epoch": 3.04, + "learning_rate": 3.985896296296297e-05, + "loss": 1.9003, + "step": 17120 + }, + { + "epoch": 3.04, + "learning_rate": 3.9856000000000006e-05, + "loss": 1.853, + "step": 17125 + }, + { + "epoch": 3.05, + "learning_rate": 3.985303703703704e-05, + "loss": 1.975, + "step": 17130 + }, + { + "epoch": 3.05, + "learning_rate": 3.985007407407408e-05, + "loss": 2.0627, + "step": 17135 + }, + { + "epoch": 3.05, + "learning_rate": 3.9847111111111116e-05, + "loss": 1.867, + "step": 17140 + }, + { + "epoch": 3.05, + "learning_rate": 3.9844148148148155e-05, + "loss": 1.8958, + "step": 17145 + }, + { + "epoch": 3.05, + "learning_rate": 3.9841185185185187e-05, + "loss": 1.8877, + "step": 17150 + }, + { + "epoch": 3.05, + "learning_rate": 3.9838222222222225e-05, + "loss": 1.9654, + "step": 17155 + }, + { + "epoch": 3.05, + "learning_rate": 3.9835259259259264e-05, + "loss": 1.9467, + "step": 17160 + }, + { + "epoch": 3.05, + "learning_rate": 3.98322962962963e-05, + "loss": 1.9552, + "step": 17165 + }, + { + "epoch": 3.05, + "learning_rate": 3.9829333333333335e-05, + "loss": 1.9617, + "step": 17170 + }, + { + "epoch": 3.05, + "learning_rate": 3.9826370370370374e-05, + "loss": 1.8018, + "step": 17175 + }, + { + "epoch": 3.05, + "learning_rate": 3.9823407407407406e-05, + "loss": 1.9213, + "step": 17180 + }, + { + "epoch": 3.06, + "learning_rate": 3.982044444444445e-05, + "loss": 1.9823, + "step": 17185 + }, + { + "epoch": 3.06, + "learning_rate": 3.9817481481481484e-05, + "loss": 1.9261, + "step": 17190 + }, + { + "epoch": 3.06, + "learning_rate": 3.9814518518518516e-05, + "loss": 1.9203, + "step": 17195 + }, + { + "epoch": 3.06, + "learning_rate": 3.9811555555555554e-05, + "loss": 1.884, + "step": 17200 + }, + { + "epoch": 3.06, + "learning_rate": 3.980859259259259e-05, + "loss": 1.7776, + "step": 17205 + }, + { + "epoch": 3.06, + "learning_rate": 3.980562962962963e-05, + "loss": 1.9108, + "step": 17210 + }, + { + "epoch": 3.06, + "learning_rate": 3.9802666666666664e-05, + "loss": 2.0373, + "step": 17215 + }, + { + "epoch": 3.06, + "learning_rate": 3.97997037037037e-05, + "loss": 1.9052, + "step": 17220 + }, + { + "epoch": 3.06, + "learning_rate": 3.979674074074074e-05, + "loss": 1.871, + "step": 17225 + }, + { + "epoch": 3.06, + "learning_rate": 3.979377777777778e-05, + "loss": 2.0622, + "step": 17230 + }, + { + "epoch": 3.06, + "learning_rate": 3.979081481481481e-05, + "loss": 2.038, + "step": 17235 + }, + { + "epoch": 3.06, + "learning_rate": 3.978785185185185e-05, + "loss": 1.9585, + "step": 17240 + }, + { + "epoch": 3.07, + "learning_rate": 3.978488888888889e-05, + "loss": 1.8964, + "step": 17245 + }, + { + "epoch": 3.07, + "learning_rate": 3.978192592592593e-05, + "loss": 2.0654, + "step": 17250 + }, + { + "epoch": 3.07, + "learning_rate": 3.977896296296296e-05, + "loss": 1.9225, + "step": 17255 + }, + { + "epoch": 3.07, + "learning_rate": 3.9776e-05, + "loss": 2.111, + "step": 17260 + }, + { + "epoch": 3.07, + "learning_rate": 3.977303703703704e-05, + "loss": 1.9737, + "step": 17265 + }, + { + "epoch": 3.07, + "learning_rate": 3.977007407407408e-05, + "loss": 1.9816, + "step": 17270 + }, + { + "epoch": 3.07, + "learning_rate": 3.976711111111111e-05, + "loss": 2.0066, + "step": 17275 + }, + { + "epoch": 3.07, + "learning_rate": 3.976414814814815e-05, + "loss": 1.844, + "step": 17280 + }, + { + "epoch": 3.07, + "learning_rate": 3.976118518518519e-05, + "loss": 1.9811, + "step": 17285 + }, + { + "epoch": 3.07, + "learning_rate": 3.9758222222222226e-05, + "loss": 1.9123, + "step": 17290 + }, + { + "epoch": 3.07, + "learning_rate": 3.975525925925926e-05, + "loss": 1.9633, + "step": 17295 + }, + { + "epoch": 3.08, + "learning_rate": 3.97522962962963e-05, + "loss": 1.9782, + "step": 17300 + }, + { + "epoch": 3.08, + "learning_rate": 3.9749333333333336e-05, + "loss": 1.9844, + "step": 17305 + }, + { + "epoch": 3.08, + "learning_rate": 3.9746370370370375e-05, + "loss": 1.8245, + "step": 17310 + }, + { + "epoch": 3.08, + "learning_rate": 3.974340740740741e-05, + "loss": 1.9811, + "step": 17315 + }, + { + "epoch": 3.08, + "learning_rate": 3.9740444444444446e-05, + "loss": 2.0318, + "step": 17320 + }, + { + "epoch": 3.08, + "learning_rate": 3.9737481481481484e-05, + "loss": 1.7933, + "step": 17325 + }, + { + "epoch": 3.08, + "learning_rate": 3.973451851851852e-05, + "loss": 1.9597, + "step": 17330 + }, + { + "epoch": 3.08, + "learning_rate": 3.9731555555555555e-05, + "loss": 1.9063, + "step": 17335 + }, + { + "epoch": 3.08, + "learning_rate": 3.9728592592592594e-05, + "loss": 1.912, + "step": 17340 + }, + { + "epoch": 3.08, + "learning_rate": 3.9725629629629626e-05, + "loss": 1.9685, + "step": 17345 + }, + { + "epoch": 3.08, + "learning_rate": 3.972266666666667e-05, + "loss": 1.9292, + "step": 17350 + }, + { + "epoch": 3.09, + "learning_rate": 3.9719703703703704e-05, + "loss": 1.9578, + "step": 17355 + }, + { + "epoch": 3.09, + "learning_rate": 3.971674074074074e-05, + "loss": 1.9085, + "step": 17360 + }, + { + "epoch": 3.09, + "learning_rate": 3.9713777777777775e-05, + "loss": 1.8732, + "step": 17365 + }, + { + "epoch": 3.09, + "learning_rate": 3.971081481481482e-05, + "loss": 1.9264, + "step": 17370 + }, + { + "epoch": 3.09, + "learning_rate": 3.970785185185185e-05, + "loss": 2.0191, + "step": 17375 + }, + { + "epoch": 3.09, + "learning_rate": 3.970488888888889e-05, + "loss": 2.0001, + "step": 17380 + }, + { + "epoch": 3.09, + "learning_rate": 3.970192592592592e-05, + "loss": 1.9532, + "step": 17385 + }, + { + "epoch": 3.09, + "learning_rate": 3.969896296296297e-05, + "loss": 1.891, + "step": 17390 + }, + { + "epoch": 3.09, + "learning_rate": 3.9696e-05, + "loss": 1.9123, + "step": 17395 + }, + { + "epoch": 3.09, + "learning_rate": 3.969303703703704e-05, + "loss": 2.018, + "step": 17400 + }, + { + "epoch": 3.09, + "learning_rate": 3.969007407407407e-05, + "loss": 1.9841, + "step": 17405 + }, + { + "epoch": 3.1, + "learning_rate": 3.968711111111111e-05, + "loss": 2.0667, + "step": 17410 + }, + { + "epoch": 3.1, + "learning_rate": 3.968414814814815e-05, + "loss": 2.1308, + "step": 17415 + }, + { + "epoch": 3.1, + "learning_rate": 3.968118518518519e-05, + "loss": 2.0163, + "step": 17420 + }, + { + "epoch": 3.1, + "learning_rate": 3.967822222222222e-05, + "loss": 1.8905, + "step": 17425 + }, + { + "epoch": 3.1, + "learning_rate": 3.967525925925926e-05, + "loss": 1.9742, + "step": 17430 + }, + { + "epoch": 3.1, + "learning_rate": 3.96722962962963e-05, + "loss": 1.7909, + "step": 17435 + }, + { + "epoch": 3.1, + "learning_rate": 3.9669333333333337e-05, + "loss": 1.9992, + "step": 17440 + }, + { + "epoch": 3.1, + "learning_rate": 3.966637037037037e-05, + "loss": 2.0302, + "step": 17445 + }, + { + "epoch": 3.1, + "learning_rate": 3.966340740740741e-05, + "loss": 1.8409, + "step": 17450 + }, + { + "epoch": 3.1, + "learning_rate": 3.9660444444444446e-05, + "loss": 2.0036, + "step": 17455 + }, + { + "epoch": 3.1, + "learning_rate": 3.9657481481481485e-05, + "loss": 1.851, + "step": 17460 + }, + { + "epoch": 3.1, + "learning_rate": 3.965451851851852e-05, + "loss": 1.9512, + "step": 17465 + }, + { + "epoch": 3.11, + "learning_rate": 3.9651555555555556e-05, + "loss": 1.9695, + "step": 17470 + }, + { + "epoch": 3.11, + "learning_rate": 3.9648592592592595e-05, + "loss": 1.9616, + "step": 17475 + }, + { + "epoch": 3.11, + "learning_rate": 3.9645629629629634e-05, + "loss": 1.9008, + "step": 17480 + }, + { + "epoch": 3.11, + "learning_rate": 3.9642666666666666e-05, + "loss": 2.038, + "step": 17485 + }, + { + "epoch": 3.11, + "learning_rate": 3.9639703703703705e-05, + "loss": 1.9402, + "step": 17490 + }, + { + "epoch": 3.11, + "learning_rate": 3.963674074074074e-05, + "loss": 1.9161, + "step": 17495 + }, + { + "epoch": 3.11, + "learning_rate": 3.963377777777778e-05, + "loss": 1.9672, + "step": 17500 + }, + { + "epoch": 3.11, + "learning_rate": 3.9630814814814814e-05, + "loss": 1.9482, + "step": 17505 + }, + { + "epoch": 3.11, + "learning_rate": 3.962785185185185e-05, + "loss": 1.9604, + "step": 17510 + }, + { + "epoch": 3.11, + "learning_rate": 3.962488888888889e-05, + "loss": 2.0035, + "step": 17515 + }, + { + "epoch": 3.11, + "learning_rate": 3.962192592592593e-05, + "loss": 1.9753, + "step": 17520 + }, + { + "epoch": 3.12, + "learning_rate": 3.961896296296296e-05, + "loss": 1.9415, + "step": 17525 + }, + { + "epoch": 3.12, + "learning_rate": 3.9616e-05, + "loss": 1.7871, + "step": 17530 + }, + { + "epoch": 3.12, + "learning_rate": 3.961303703703704e-05, + "loss": 1.9716, + "step": 17535 + }, + { + "epoch": 3.12, + "learning_rate": 3.961007407407408e-05, + "loss": 1.965, + "step": 17540 + }, + { + "epoch": 3.12, + "learning_rate": 3.960711111111111e-05, + "loss": 2.0166, + "step": 17545 + }, + { + "epoch": 3.12, + "learning_rate": 3.960414814814815e-05, + "loss": 1.9227, + "step": 17550 + }, + { + "epoch": 3.12, + "learning_rate": 3.960118518518519e-05, + "loss": 2.0942, + "step": 17555 + }, + { + "epoch": 3.12, + "learning_rate": 3.959822222222223e-05, + "loss": 2.0826, + "step": 17560 + }, + { + "epoch": 3.12, + "learning_rate": 3.959525925925926e-05, + "loss": 1.9108, + "step": 17565 + }, + { + "epoch": 3.12, + "learning_rate": 3.95922962962963e-05, + "loss": 1.9744, + "step": 17570 + }, + { + "epoch": 3.12, + "learning_rate": 3.958933333333333e-05, + "loss": 1.9758, + "step": 17575 + }, + { + "epoch": 3.13, + "learning_rate": 3.9586370370370376e-05, + "loss": 1.977, + "step": 17580 + }, + { + "epoch": 3.13, + "learning_rate": 3.958340740740741e-05, + "loss": 1.9413, + "step": 17585 + }, + { + "epoch": 3.13, + "learning_rate": 3.958044444444445e-05, + "loss": 1.9017, + "step": 17590 + }, + { + "epoch": 3.13, + "learning_rate": 3.957748148148148e-05, + "loss": 1.9962, + "step": 17595 + }, + { + "epoch": 3.13, + "learning_rate": 3.9574518518518525e-05, + "loss": 1.8783, + "step": 17600 + }, + { + "epoch": 3.13, + "learning_rate": 3.957155555555556e-05, + "loss": 1.9721, + "step": 17605 + }, + { + "epoch": 3.13, + "learning_rate": 3.9568592592592596e-05, + "loss": 1.9635, + "step": 17610 + }, + { + "epoch": 3.13, + "learning_rate": 3.956562962962963e-05, + "loss": 1.918, + "step": 17615 + }, + { + "epoch": 3.13, + "learning_rate": 3.956266666666667e-05, + "loss": 1.8462, + "step": 17620 + }, + { + "epoch": 3.13, + "learning_rate": 3.9559703703703705e-05, + "loss": 1.8161, + "step": 17625 + }, + { + "epoch": 3.13, + "learning_rate": 3.9556740740740744e-05, + "loss": 2.0073, + "step": 17630 + }, + { + "epoch": 3.14, + "learning_rate": 3.9553777777777776e-05, + "loss": 1.8845, + "step": 17635 + }, + { + "epoch": 3.14, + "learning_rate": 3.9550814814814815e-05, + "loss": 1.9536, + "step": 17640 + }, + { + "epoch": 3.14, + "learning_rate": 3.9547851851851854e-05, + "loss": 1.9986, + "step": 17645 + }, + { + "epoch": 3.14, + "learning_rate": 3.9545481481481483e-05, + "loss": 1.8102, + "step": 17650 + }, + { + "epoch": 3.14, + "learning_rate": 3.954251851851852e-05, + "loss": 2.0962, + "step": 17655 + }, + { + "epoch": 3.14, + "learning_rate": 3.9539555555555554e-05, + "loss": 1.98, + "step": 17660 + }, + { + "epoch": 3.14, + "learning_rate": 3.953659259259259e-05, + "loss": 1.877, + "step": 17665 + }, + { + "epoch": 3.14, + "learning_rate": 3.953362962962963e-05, + "loss": 1.8747, + "step": 17670 + }, + { + "epoch": 3.14, + "learning_rate": 3.9530666666666664e-05, + "loss": 1.9352, + "step": 17675 + }, + { + "epoch": 3.14, + "learning_rate": 3.95277037037037e-05, + "loss": 1.9904, + "step": 17680 + }, + { + "epoch": 3.14, + "learning_rate": 3.952474074074074e-05, + "loss": 1.9266, + "step": 17685 + }, + { + "epoch": 3.14, + "learning_rate": 3.952177777777778e-05, + "loss": 1.9778, + "step": 17690 + }, + { + "epoch": 3.15, + "learning_rate": 3.951881481481481e-05, + "loss": 1.931, + "step": 17695 + }, + { + "epoch": 3.15, + "learning_rate": 3.951585185185185e-05, + "loss": 2.0493, + "step": 17700 + }, + { + "epoch": 3.15, + "learning_rate": 3.951288888888889e-05, + "loss": 1.9475, + "step": 17705 + }, + { + "epoch": 3.15, + "learning_rate": 3.950992592592593e-05, + "loss": 1.9939, + "step": 17710 + }, + { + "epoch": 3.15, + "learning_rate": 3.950696296296296e-05, + "loss": 1.9004, + "step": 17715 + }, + { + "epoch": 3.15, + "learning_rate": 3.9504e-05, + "loss": 1.9634, + "step": 17720 + }, + { + "epoch": 3.15, + "learning_rate": 3.950103703703704e-05, + "loss": 1.9425, + "step": 17725 + }, + { + "epoch": 3.15, + "learning_rate": 3.949807407407408e-05, + "loss": 2.0999, + "step": 17730 + }, + { + "epoch": 3.15, + "learning_rate": 3.949511111111111e-05, + "loss": 1.8698, + "step": 17735 + }, + { + "epoch": 3.15, + "learning_rate": 3.949214814814815e-05, + "loss": 2.0053, + "step": 17740 + }, + { + "epoch": 3.15, + "learning_rate": 3.948918518518519e-05, + "loss": 1.8978, + "step": 17745 + }, + { + "epoch": 3.16, + "learning_rate": 3.9486222222222226e-05, + "loss": 1.9193, + "step": 17750 + }, + { + "epoch": 3.16, + "learning_rate": 3.948325925925926e-05, + "loss": 1.9145, + "step": 17755 + }, + { + "epoch": 3.16, + "learning_rate": 3.94802962962963e-05, + "loss": 1.883, + "step": 17760 + }, + { + "epoch": 3.16, + "learning_rate": 3.9477333333333336e-05, + "loss": 1.9842, + "step": 17765 + }, + { + "epoch": 3.16, + "learning_rate": 3.9474370370370375e-05, + "loss": 1.8734, + "step": 17770 + }, + { + "epoch": 3.16, + "learning_rate": 3.9471407407407407e-05, + "loss": 1.9567, + "step": 17775 + }, + { + "epoch": 3.16, + "learning_rate": 3.9468444444444445e-05, + "loss": 1.9424, + "step": 17780 + }, + { + "epoch": 3.16, + "learning_rate": 3.9465481481481484e-05, + "loss": 1.8471, + "step": 17785 + }, + { + "epoch": 3.16, + "learning_rate": 3.946251851851852e-05, + "loss": 1.9656, + "step": 17790 + }, + { + "epoch": 3.16, + "learning_rate": 3.9459555555555555e-05, + "loss": 2.0159, + "step": 17795 + }, + { + "epoch": 3.16, + "learning_rate": 3.9456592592592594e-05, + "loss": 1.8562, + "step": 17800 + }, + { + "epoch": 3.17, + "learning_rate": 3.9453629629629626e-05, + "loss": 1.9071, + "step": 17805 + }, + { + "epoch": 3.17, + "learning_rate": 3.945066666666667e-05, + "loss": 1.9102, + "step": 17810 + }, + { + "epoch": 3.17, + "learning_rate": 3.9447703703703704e-05, + "loss": 1.972, + "step": 17815 + }, + { + "epoch": 3.17, + "learning_rate": 3.944474074074074e-05, + "loss": 2.011, + "step": 17820 + }, + { + "epoch": 3.17, + "learning_rate": 3.9441777777777774e-05, + "loss": 1.9088, + "step": 17825 + }, + { + "epoch": 3.17, + "learning_rate": 3.943881481481482e-05, + "loss": 1.9605, + "step": 17830 + }, + { + "epoch": 3.17, + "learning_rate": 3.943585185185185e-05, + "loss": 1.9324, + "step": 17835 + }, + { + "epoch": 3.17, + "learning_rate": 3.943288888888889e-05, + "loss": 1.9872, + "step": 17840 + }, + { + "epoch": 3.17, + "learning_rate": 3.942992592592592e-05, + "loss": 1.8406, + "step": 17845 + }, + { + "epoch": 3.17, + "learning_rate": 3.942696296296297e-05, + "loss": 1.8601, + "step": 17850 + }, + { + "epoch": 3.17, + "learning_rate": 3.9424e-05, + "loss": 1.9761, + "step": 17855 + }, + { + "epoch": 3.18, + "learning_rate": 3.942103703703704e-05, + "loss": 1.9923, + "step": 17860 + }, + { + "epoch": 3.18, + "learning_rate": 3.941807407407407e-05, + "loss": 1.8996, + "step": 17865 + }, + { + "epoch": 3.18, + "learning_rate": 3.941511111111112e-05, + "loss": 1.9434, + "step": 17870 + }, + { + "epoch": 3.18, + "learning_rate": 3.941214814814815e-05, + "loss": 1.9303, + "step": 17875 + }, + { + "epoch": 3.18, + "learning_rate": 3.940918518518519e-05, + "loss": 1.9858, + "step": 17880 + }, + { + "epoch": 3.18, + "learning_rate": 3.940622222222222e-05, + "loss": 1.8389, + "step": 17885 + }, + { + "epoch": 3.18, + "learning_rate": 3.940325925925926e-05, + "loss": 1.9791, + "step": 17890 + }, + { + "epoch": 3.18, + "learning_rate": 3.94002962962963e-05, + "loss": 1.9623, + "step": 17895 + }, + { + "epoch": 3.18, + "learning_rate": 3.9397333333333337e-05, + "loss": 1.992, + "step": 17900 + }, + { + "epoch": 3.18, + "learning_rate": 3.939437037037037e-05, + "loss": 1.8295, + "step": 17905 + }, + { + "epoch": 3.18, + "learning_rate": 3.939140740740741e-05, + "loss": 2.0538, + "step": 17910 + }, + { + "epoch": 3.18, + "learning_rate": 3.9388444444444446e-05, + "loss": 1.8538, + "step": 17915 + }, + { + "epoch": 3.19, + "learning_rate": 3.9385481481481485e-05, + "loss": 1.9485, + "step": 17920 + }, + { + "epoch": 3.19, + "learning_rate": 3.938251851851852e-05, + "loss": 1.8919, + "step": 17925 + }, + { + "epoch": 3.19, + "learning_rate": 3.9379555555555556e-05, + "loss": 1.9121, + "step": 17930 + }, + { + "epoch": 3.19, + "learning_rate": 3.9376592592592595e-05, + "loss": 1.8518, + "step": 17935 + }, + { + "epoch": 3.19, + "learning_rate": 3.9373629629629634e-05, + "loss": 1.894, + "step": 17940 + }, + { + "epoch": 3.19, + "learning_rate": 3.9370666666666666e-05, + "loss": 1.8834, + "step": 17945 + }, + { + "epoch": 3.19, + "learning_rate": 3.9367703703703704e-05, + "loss": 2.1225, + "step": 17950 + }, + { + "epoch": 3.19, + "learning_rate": 3.936474074074074e-05, + "loss": 2.0327, + "step": 17955 + }, + { + "epoch": 3.19, + "learning_rate": 3.936177777777778e-05, + "loss": 1.9276, + "step": 17960 + }, + { + "epoch": 3.19, + "learning_rate": 3.9358814814814814e-05, + "loss": 1.9549, + "step": 17965 + }, + { + "epoch": 3.19, + "learning_rate": 3.935585185185185e-05, + "loss": 1.9528, + "step": 17970 + }, + { + "epoch": 3.2, + "learning_rate": 3.935288888888889e-05, + "loss": 1.8547, + "step": 17975 + }, + { + "epoch": 3.2, + "learning_rate": 3.934992592592593e-05, + "loss": 1.8724, + "step": 17980 + }, + { + "epoch": 3.2, + "learning_rate": 3.934696296296296e-05, + "loss": 1.9825, + "step": 17985 + }, + { + "epoch": 3.2, + "learning_rate": 3.9344e-05, + "loss": 1.9112, + "step": 17990 + }, + { + "epoch": 3.2, + "learning_rate": 3.934103703703704e-05, + "loss": 2.068, + "step": 17995 + }, + { + "epoch": 3.2, + "learning_rate": 3.933807407407408e-05, + "loss": 1.9957, + "step": 18000 + }, + { + "epoch": 3.2, + "learning_rate": 3.933511111111111e-05, + "loss": 1.8978, + "step": 18005 + }, + { + "epoch": 3.2, + "learning_rate": 3.933214814814815e-05, + "loss": 1.9631, + "step": 18010 + }, + { + "epoch": 3.2, + "learning_rate": 3.932918518518519e-05, + "loss": 1.9016, + "step": 18015 + }, + { + "epoch": 3.2, + "learning_rate": 3.932622222222223e-05, + "loss": 2.1178, + "step": 18020 + }, + { + "epoch": 3.2, + "learning_rate": 3.932325925925926e-05, + "loss": 1.992, + "step": 18025 + }, + { + "epoch": 3.21, + "learning_rate": 3.93202962962963e-05, + "loss": 2.0326, + "step": 18030 + }, + { + "epoch": 3.21, + "learning_rate": 3.931733333333333e-05, + "loss": 1.8142, + "step": 18035 + }, + { + "epoch": 3.21, + "learning_rate": 3.9314370370370376e-05, + "loss": 1.9359, + "step": 18040 + }, + { + "epoch": 3.21, + "learning_rate": 3.931140740740741e-05, + "loss": 2.0913, + "step": 18045 + }, + { + "epoch": 3.21, + "learning_rate": 3.930844444444445e-05, + "loss": 1.997, + "step": 18050 + }, + { + "epoch": 3.21, + "learning_rate": 3.930548148148148e-05, + "loss": 1.9322, + "step": 18055 + }, + { + "epoch": 3.21, + "learning_rate": 3.9302518518518525e-05, + "loss": 1.9886, + "step": 18060 + }, + { + "epoch": 3.21, + "learning_rate": 3.929955555555556e-05, + "loss": 2.0672, + "step": 18065 + }, + { + "epoch": 3.21, + "learning_rate": 3.9296592592592595e-05, + "loss": 1.9143, + "step": 18070 + }, + { + "epoch": 3.21, + "learning_rate": 3.929362962962963e-05, + "loss": 2.1223, + "step": 18075 + }, + { + "epoch": 3.21, + "learning_rate": 3.929066666666667e-05, + "loss": 1.9278, + "step": 18080 + }, + { + "epoch": 3.22, + "learning_rate": 3.9287703703703705e-05, + "loss": 2.0303, + "step": 18085 + }, + { + "epoch": 3.22, + "learning_rate": 3.9284740740740744e-05, + "loss": 2.0346, + "step": 18090 + }, + { + "epoch": 3.22, + "learning_rate": 3.9281777777777776e-05, + "loss": 1.9867, + "step": 18095 + }, + { + "epoch": 3.22, + "learning_rate": 3.927881481481482e-05, + "loss": 2.0778, + "step": 18100 + }, + { + "epoch": 3.22, + "learning_rate": 3.9275851851851854e-05, + "loss": 1.8423, + "step": 18105 + }, + { + "epoch": 3.22, + "learning_rate": 3.927288888888889e-05, + "loss": 1.9294, + "step": 18110 + }, + { + "epoch": 3.22, + "learning_rate": 3.9269925925925925e-05, + "loss": 2.0413, + "step": 18115 + }, + { + "epoch": 3.22, + "learning_rate": 3.926696296296296e-05, + "loss": 1.9332, + "step": 18120 + }, + { + "epoch": 3.22, + "learning_rate": 3.9264e-05, + "loss": 1.9159, + "step": 18125 + }, + { + "epoch": 3.22, + "learning_rate": 3.926103703703704e-05, + "loss": 1.9301, + "step": 18130 + }, + { + "epoch": 3.22, + "learning_rate": 3.925807407407407e-05, + "loss": 2.0759, + "step": 18135 + }, + { + "epoch": 3.22, + "learning_rate": 3.925511111111111e-05, + "loss": 2.0338, + "step": 18140 + }, + { + "epoch": 3.23, + "learning_rate": 3.925214814814815e-05, + "loss": 2.0315, + "step": 18145 + }, + { + "epoch": 3.23, + "learning_rate": 3.924918518518519e-05, + "loss": 1.937, + "step": 18150 + }, + { + "epoch": 3.23, + "learning_rate": 3.924622222222222e-05, + "loss": 1.9797, + "step": 18155 + }, + { + "epoch": 3.23, + "learning_rate": 3.924325925925926e-05, + "loss": 1.8933, + "step": 18160 + }, + { + "epoch": 3.23, + "learning_rate": 3.92402962962963e-05, + "loss": 1.9699, + "step": 18165 + }, + { + "epoch": 3.23, + "learning_rate": 3.923733333333334e-05, + "loss": 1.9557, + "step": 18170 + }, + { + "epoch": 3.23, + "learning_rate": 3.923437037037037e-05, + "loss": 2.0219, + "step": 18175 + }, + { + "epoch": 3.23, + "learning_rate": 3.923140740740741e-05, + "loss": 2.0288, + "step": 18180 + }, + { + "epoch": 3.23, + "learning_rate": 3.922844444444445e-05, + "loss": 1.908, + "step": 18185 + }, + { + "epoch": 3.23, + "learning_rate": 3.9225481481481487e-05, + "loss": 1.9916, + "step": 18190 + }, + { + "epoch": 3.23, + "learning_rate": 3.922251851851852e-05, + "loss": 1.9366, + "step": 18195 + }, + { + "epoch": 3.24, + "learning_rate": 3.921955555555556e-05, + "loss": 1.9199, + "step": 18200 + }, + { + "epoch": 3.24, + "learning_rate": 3.9216592592592596e-05, + "loss": 1.9575, + "step": 18205 + }, + { + "epoch": 3.24, + "learning_rate": 3.9213629629629635e-05, + "loss": 1.944, + "step": 18210 + }, + { + "epoch": 3.24, + "learning_rate": 3.921066666666667e-05, + "loss": 1.8943, + "step": 18215 + }, + { + "epoch": 3.24, + "learning_rate": 3.9207703703703706e-05, + "loss": 1.9914, + "step": 18220 + }, + { + "epoch": 3.24, + "learning_rate": 3.9204740740740745e-05, + "loss": 2.0343, + "step": 18225 + }, + { + "epoch": 3.24, + "learning_rate": 3.9201777777777784e-05, + "loss": 1.9615, + "step": 18230 + }, + { + "epoch": 3.24, + "learning_rate": 3.9198814814814816e-05, + "loss": 2.0135, + "step": 18235 + }, + { + "epoch": 3.24, + "learning_rate": 3.9195851851851854e-05, + "loss": 1.9468, + "step": 18240 + }, + { + "epoch": 3.24, + "learning_rate": 3.919288888888889e-05, + "loss": 2.0036, + "step": 18245 + }, + { + "epoch": 3.24, + "learning_rate": 3.918992592592593e-05, + "loss": 1.9763, + "step": 18250 + }, + { + "epoch": 3.25, + "learning_rate": 3.9186962962962964e-05, + "loss": 2.0044, + "step": 18255 + }, + { + "epoch": 3.25, + "learning_rate": 3.9184e-05, + "loss": 2.0888, + "step": 18260 + }, + { + "epoch": 3.25, + "learning_rate": 3.9181037037037035e-05, + "loss": 1.9449, + "step": 18265 + }, + { + "epoch": 3.25, + "learning_rate": 3.917807407407408e-05, + "loss": 1.8848, + "step": 18270 + }, + { + "epoch": 3.25, + "learning_rate": 3.917511111111111e-05, + "loss": 2.011, + "step": 18275 + }, + { + "epoch": 3.25, + "learning_rate": 3.917214814814815e-05, + "loss": 1.9649, + "step": 18280 + }, + { + "epoch": 3.25, + "learning_rate": 3.9169185185185184e-05, + "loss": 1.9269, + "step": 18285 + }, + { + "epoch": 3.25, + "learning_rate": 3.916622222222223e-05, + "loss": 1.9048, + "step": 18290 + }, + { + "epoch": 3.25, + "learning_rate": 3.916325925925926e-05, + "loss": 2.0052, + "step": 18295 + }, + { + "epoch": 3.25, + "learning_rate": 3.91602962962963e-05, + "loss": 2.0078, + "step": 18300 + }, + { + "epoch": 3.25, + "learning_rate": 3.915733333333333e-05, + "loss": 2.0223, + "step": 18305 + }, + { + "epoch": 3.26, + "learning_rate": 3.915437037037038e-05, + "loss": 1.9351, + "step": 18310 + }, + { + "epoch": 3.26, + "learning_rate": 3.915140740740741e-05, + "loss": 1.9307, + "step": 18315 + }, + { + "epoch": 3.26, + "learning_rate": 3.914844444444445e-05, + "loss": 1.9483, + "step": 18320 + }, + { + "epoch": 3.26, + "learning_rate": 3.914548148148148e-05, + "loss": 1.9641, + "step": 18325 + }, + { + "epoch": 3.26, + "learning_rate": 3.9142518518518526e-05, + "loss": 2.0143, + "step": 18330 + }, + { + "epoch": 3.26, + "learning_rate": 3.913955555555556e-05, + "loss": 2.0224, + "step": 18335 + }, + { + "epoch": 3.26, + "learning_rate": 3.91365925925926e-05, + "loss": 1.9595, + "step": 18340 + }, + { + "epoch": 3.26, + "learning_rate": 3.913362962962963e-05, + "loss": 1.9511, + "step": 18345 + }, + { + "epoch": 3.26, + "learning_rate": 3.913066666666667e-05, + "loss": 1.836, + "step": 18350 + }, + { + "epoch": 3.26, + "learning_rate": 3.912770370370371e-05, + "loss": 1.9559, + "step": 18355 + }, + { + "epoch": 3.26, + "learning_rate": 3.9124740740740746e-05, + "loss": 1.8311, + "step": 18360 + }, + { + "epoch": 3.26, + "learning_rate": 3.912177777777778e-05, + "loss": 1.9132, + "step": 18365 + }, + { + "epoch": 3.27, + "learning_rate": 3.9118814814814816e-05, + "loss": 1.9175, + "step": 18370 + }, + { + "epoch": 3.27, + "learning_rate": 3.9115851851851855e-05, + "loss": 1.9778, + "step": 18375 + }, + { + "epoch": 3.27, + "learning_rate": 3.9112888888888894e-05, + "loss": 1.9395, + "step": 18380 + }, + { + "epoch": 3.27, + "learning_rate": 3.9109925925925926e-05, + "loss": 1.9238, + "step": 18385 + }, + { + "epoch": 3.27, + "learning_rate": 3.9106962962962965e-05, + "loss": 1.9268, + "step": 18390 + }, + { + "epoch": 3.27, + "learning_rate": 3.9104000000000004e-05, + "loss": 1.9828, + "step": 18395 + }, + { + "epoch": 3.27, + "learning_rate": 3.910103703703704e-05, + "loss": 1.8026, + "step": 18400 + }, + { + "epoch": 3.27, + "learning_rate": 3.9098074074074075e-05, + "loss": 1.9459, + "step": 18405 + }, + { + "epoch": 3.27, + "learning_rate": 3.9095111111111113e-05, + "loss": 2.0032, + "step": 18410 + }, + { + "epoch": 3.27, + "learning_rate": 3.909214814814815e-05, + "loss": 1.8915, + "step": 18415 + }, + { + "epoch": 3.27, + "learning_rate": 3.908918518518519e-05, + "loss": 1.9367, + "step": 18420 + }, + { + "epoch": 3.28, + "learning_rate": 3.908622222222222e-05, + "loss": 1.928, + "step": 18425 + }, + { + "epoch": 3.28, + "learning_rate": 3.908325925925926e-05, + "loss": 1.9992, + "step": 18430 + }, + { + "epoch": 3.28, + "learning_rate": 3.90802962962963e-05, + "loss": 2.0017, + "step": 18435 + }, + { + "epoch": 3.28, + "learning_rate": 3.907733333333333e-05, + "loss": 1.9322, + "step": 18440 + }, + { + "epoch": 3.28, + "learning_rate": 3.907437037037037e-05, + "loss": 1.9007, + "step": 18445 + }, + { + "epoch": 3.28, + "learning_rate": 3.9071407407407404e-05, + "loss": 1.8458, + "step": 18450 + }, + { + "epoch": 3.28, + "learning_rate": 3.906844444444445e-05, + "loss": 1.9432, + "step": 18455 + }, + { + "epoch": 3.28, + "learning_rate": 3.906548148148148e-05, + "loss": 1.9397, + "step": 18460 + }, + { + "epoch": 3.28, + "learning_rate": 3.906251851851852e-05, + "loss": 1.8468, + "step": 18465 + }, + { + "epoch": 3.28, + "learning_rate": 3.905955555555555e-05, + "loss": 1.9747, + "step": 18470 + }, + { + "epoch": 3.28, + "learning_rate": 3.90565925925926e-05, + "loss": 1.9483, + "step": 18475 + }, + { + "epoch": 3.29, + "learning_rate": 3.905362962962963e-05, + "loss": 2.1552, + "step": 18480 + }, + { + "epoch": 3.29, + "learning_rate": 3.905066666666667e-05, + "loss": 1.9504, + "step": 18485 + }, + { + "epoch": 3.29, + "learning_rate": 3.90477037037037e-05, + "loss": 2.0795, + "step": 18490 + }, + { + "epoch": 3.29, + "learning_rate": 3.904474074074074e-05, + "loss": 1.8503, + "step": 18495 + }, + { + "epoch": 3.29, + "learning_rate": 3.904177777777778e-05, + "loss": 1.9521, + "step": 18500 + }, + { + "epoch": 3.29, + "learning_rate": 3.903881481481482e-05, + "loss": 2.004, + "step": 18505 + }, + { + "epoch": 3.29, + "learning_rate": 3.903585185185185e-05, + "loss": 2.0594, + "step": 18510 + }, + { + "epoch": 3.29, + "learning_rate": 3.903288888888889e-05, + "loss": 1.8791, + "step": 18515 + }, + { + "epoch": 3.29, + "learning_rate": 3.902992592592593e-05, + "loss": 1.9404, + "step": 18520 + }, + { + "epoch": 3.29, + "learning_rate": 3.9026962962962966e-05, + "loss": 1.8909, + "step": 18525 + }, + { + "epoch": 3.29, + "learning_rate": 3.9024e-05, + "loss": 2.0197, + "step": 18530 + }, + { + "epoch": 3.3, + "learning_rate": 3.9021037037037037e-05, + "loss": 1.8873, + "step": 18535 + }, + { + "epoch": 3.3, + "learning_rate": 3.9018074074074075e-05, + "loss": 1.97, + "step": 18540 + }, + { + "epoch": 3.3, + "learning_rate": 3.9015111111111114e-05, + "loss": 1.9168, + "step": 18545 + }, + { + "epoch": 3.3, + "learning_rate": 3.9012148148148146e-05, + "loss": 1.945, + "step": 18550 + }, + { + "epoch": 3.3, + "learning_rate": 3.9009185185185185e-05, + "loss": 1.9144, + "step": 18555 + }, + { + "epoch": 3.3, + "learning_rate": 3.9006222222222224e-05, + "loss": 1.9031, + "step": 18560 + }, + { + "epoch": 3.3, + "learning_rate": 3.900325925925926e-05, + "loss": 1.768, + "step": 18565 + }, + { + "epoch": 3.3, + "learning_rate": 3.9000296296296295e-05, + "loss": 1.95, + "step": 18570 + }, + { + "epoch": 3.3, + "learning_rate": 3.8997333333333334e-05, + "loss": 2.0491, + "step": 18575 + }, + { + "epoch": 3.3, + "learning_rate": 3.899437037037037e-05, + "loss": 1.919, + "step": 18580 + }, + { + "epoch": 3.3, + "learning_rate": 3.899140740740741e-05, + "loss": 1.8325, + "step": 18585 + }, + { + "epoch": 3.3, + "learning_rate": 3.898844444444444e-05, + "loss": 2.0896, + "step": 18590 + }, + { + "epoch": 3.31, + "learning_rate": 3.898548148148148e-05, + "loss": 1.9612, + "step": 18595 + }, + { + "epoch": 3.31, + "learning_rate": 3.898251851851852e-05, + "loss": 1.9357, + "step": 18600 + }, + { + "epoch": 3.31, + "learning_rate": 3.897955555555556e-05, + "loss": 1.8241, + "step": 18605 + }, + { + "epoch": 3.31, + "learning_rate": 3.897659259259259e-05, + "loss": 1.9956, + "step": 18610 + }, + { + "epoch": 3.31, + "learning_rate": 3.897362962962963e-05, + "loss": 2.0243, + "step": 18615 + }, + { + "epoch": 3.31, + "learning_rate": 3.897066666666667e-05, + "loss": 1.908, + "step": 18620 + }, + { + "epoch": 3.31, + "learning_rate": 3.896770370370371e-05, + "loss": 1.8279, + "step": 18625 + }, + { + "epoch": 3.31, + "learning_rate": 3.896474074074074e-05, + "loss": 1.9317, + "step": 18630 + }, + { + "epoch": 3.31, + "learning_rate": 3.896177777777778e-05, + "loss": 2.0109, + "step": 18635 + }, + { + "epoch": 3.31, + "learning_rate": 3.895881481481482e-05, + "loss": 1.9514, + "step": 18640 + }, + { + "epoch": 3.31, + "learning_rate": 3.895585185185186e-05, + "loss": 1.8832, + "step": 18645 + }, + { + "epoch": 3.32, + "learning_rate": 3.895288888888889e-05, + "loss": 1.9389, + "step": 18650 + }, + { + "epoch": 3.32, + "learning_rate": 3.894992592592593e-05, + "loss": 1.9391, + "step": 18655 + }, + { + "epoch": 3.32, + "learning_rate": 3.894696296296296e-05, + "loss": 2.0754, + "step": 18660 + }, + { + "epoch": 3.32, + "learning_rate": 3.8944000000000005e-05, + "loss": 1.9924, + "step": 18665 + }, + { + "epoch": 3.32, + "learning_rate": 3.894103703703704e-05, + "loss": 1.9044, + "step": 18670 + }, + { + "epoch": 3.32, + "learning_rate": 3.8938074074074076e-05, + "loss": 2.0272, + "step": 18675 + }, + { + "epoch": 3.32, + "learning_rate": 3.893511111111111e-05, + "loss": 1.8972, + "step": 18680 + }, + { + "epoch": 3.32, + "learning_rate": 3.8932148148148154e-05, + "loss": 1.8988, + "step": 18685 + }, + { + "epoch": 3.32, + "learning_rate": 3.8929185185185186e-05, + "loss": 1.9523, + "step": 18690 + }, + { + "epoch": 3.32, + "learning_rate": 3.8926222222222225e-05, + "loss": 1.9462, + "step": 18695 + }, + { + "epoch": 3.32, + "learning_rate": 3.892325925925926e-05, + "loss": 1.9631, + "step": 18700 + }, + { + "epoch": 3.33, + "learning_rate": 3.89202962962963e-05, + "loss": 2.0508, + "step": 18705 + }, + { + "epoch": 3.33, + "learning_rate": 3.8917333333333334e-05, + "loss": 1.9613, + "step": 18710 + }, + { + "epoch": 3.33, + "learning_rate": 3.891437037037037e-05, + "loss": 2.0406, + "step": 18715 + }, + { + "epoch": 3.33, + "learning_rate": 3.8911407407407405e-05, + "loss": 1.933, + "step": 18720 + }, + { + "epoch": 3.33, + "learning_rate": 3.8908444444444444e-05, + "loss": 1.9617, + "step": 18725 + }, + { + "epoch": 3.33, + "learning_rate": 3.890548148148148e-05, + "loss": 1.9192, + "step": 18730 + }, + { + "epoch": 3.33, + "learning_rate": 3.890251851851852e-05, + "loss": 2.0134, + "step": 18735 + }, + { + "epoch": 3.33, + "learning_rate": 3.8899555555555554e-05, + "loss": 1.9253, + "step": 18740 + }, + { + "epoch": 3.33, + "learning_rate": 3.889659259259259e-05, + "loss": 1.9259, + "step": 18745 + }, + { + "epoch": 3.33, + "learning_rate": 3.889362962962963e-05, + "loss": 1.8575, + "step": 18750 + }, + { + "epoch": 3.33, + "learning_rate": 3.889066666666667e-05, + "loss": 1.8478, + "step": 18755 + }, + { + "epoch": 3.34, + "learning_rate": 3.88877037037037e-05, + "loss": 1.9355, + "step": 18760 + }, + { + "epoch": 3.34, + "learning_rate": 3.888474074074074e-05, + "loss": 1.8894, + "step": 18765 + }, + { + "epoch": 3.34, + "learning_rate": 3.888177777777778e-05, + "loss": 2.1438, + "step": 18770 + }, + { + "epoch": 3.34, + "learning_rate": 3.887881481481482e-05, + "loss": 1.9795, + "step": 18775 + }, + { + "epoch": 3.34, + "learning_rate": 3.887585185185185e-05, + "loss": 1.93, + "step": 18780 + }, + { + "epoch": 3.34, + "learning_rate": 3.887288888888889e-05, + "loss": 1.9843, + "step": 18785 + }, + { + "epoch": 3.34, + "learning_rate": 3.886992592592593e-05, + "loss": 2.0063, + "step": 18790 + }, + { + "epoch": 3.34, + "learning_rate": 3.886696296296297e-05, + "loss": 1.9405, + "step": 18795 + }, + { + "epoch": 3.34, + "learning_rate": 3.8864e-05, + "loss": 2.1478, + "step": 18800 + }, + { + "epoch": 3.34, + "learning_rate": 3.886103703703704e-05, + "loss": 1.8635, + "step": 18805 + }, + { + "epoch": 3.34, + "learning_rate": 3.885807407407408e-05, + "loss": 1.9148, + "step": 18810 + }, + { + "epoch": 3.34, + "learning_rate": 3.8855111111111116e-05, + "loss": 2.0098, + "step": 18815 + }, + { + "epoch": 3.35, + "learning_rate": 3.885214814814815e-05, + "loss": 1.9239, + "step": 18820 + }, + { + "epoch": 3.35, + "learning_rate": 3.8849185185185187e-05, + "loss": 1.8639, + "step": 18825 + }, + { + "epoch": 3.35, + "learning_rate": 3.8846222222222225e-05, + "loss": 1.8961, + "step": 18830 + }, + { + "epoch": 3.35, + "learning_rate": 3.8843259259259264e-05, + "loss": 2.0071, + "step": 18835 + }, + { + "epoch": 3.35, + "learning_rate": 3.8840296296296296e-05, + "loss": 1.9794, + "step": 18840 + }, + { + "epoch": 3.35, + "learning_rate": 3.8837333333333335e-05, + "loss": 1.8483, + "step": 18845 + }, + { + "epoch": 3.35, + "learning_rate": 3.8834370370370374e-05, + "loss": 2.0045, + "step": 18850 + }, + { + "epoch": 3.35, + "learning_rate": 3.883140740740741e-05, + "loss": 1.9456, + "step": 18855 + }, + { + "epoch": 3.35, + "learning_rate": 3.8828444444444445e-05, + "loss": 1.826, + "step": 18860 + }, + { + "epoch": 3.35, + "learning_rate": 3.8825481481481484e-05, + "loss": 2.0796, + "step": 18865 + }, + { + "epoch": 3.35, + "learning_rate": 3.882251851851852e-05, + "loss": 2.0733, + "step": 18870 + }, + { + "epoch": 3.36, + "learning_rate": 3.881955555555556e-05, + "loss": 1.9467, + "step": 18875 + }, + { + "epoch": 3.36, + "learning_rate": 3.881659259259259e-05, + "loss": 1.9626, + "step": 18880 + }, + { + "epoch": 3.36, + "learning_rate": 3.881362962962963e-05, + "loss": 1.8671, + "step": 18885 + }, + { + "epoch": 3.36, + "learning_rate": 3.8810666666666664e-05, + "loss": 1.9425, + "step": 18890 + }, + { + "epoch": 3.36, + "learning_rate": 3.880770370370371e-05, + "loss": 1.9938, + "step": 18895 + }, + { + "epoch": 3.36, + "learning_rate": 3.880474074074074e-05, + "loss": 2.0454, + "step": 18900 + }, + { + "epoch": 3.36, + "learning_rate": 3.880177777777778e-05, + "loss": 1.8612, + "step": 18905 + }, + { + "epoch": 3.36, + "learning_rate": 3.879881481481481e-05, + "loss": 1.9771, + "step": 18910 + }, + { + "epoch": 3.36, + "learning_rate": 3.879585185185186e-05, + "loss": 1.915, + "step": 18915 + }, + { + "epoch": 3.36, + "learning_rate": 3.879288888888889e-05, + "loss": 1.9095, + "step": 18920 + }, + { + "epoch": 3.36, + "learning_rate": 3.878992592592593e-05, + "loss": 2.052, + "step": 18925 + }, + { + "epoch": 3.37, + "learning_rate": 3.878696296296296e-05, + "loss": 1.9379, + "step": 18930 + }, + { + "epoch": 3.37, + "learning_rate": 3.878400000000001e-05, + "loss": 2.0275, + "step": 18935 + }, + { + "epoch": 3.37, + "learning_rate": 3.878103703703704e-05, + "loss": 1.9561, + "step": 18940 + }, + { + "epoch": 3.37, + "learning_rate": 3.877807407407408e-05, + "loss": 1.9538, + "step": 18945 + }, + { + "epoch": 3.37, + "learning_rate": 3.877511111111111e-05, + "loss": 2.0729, + "step": 18950 + }, + { + "epoch": 3.37, + "learning_rate": 3.877214814814815e-05, + "loss": 1.8977, + "step": 18955 + }, + { + "epoch": 3.37, + "learning_rate": 3.876918518518519e-05, + "loss": 2.0689, + "step": 18960 + }, + { + "epoch": 3.37, + "learning_rate": 3.8766222222222226e-05, + "loss": 1.9456, + "step": 18965 + }, + { + "epoch": 3.37, + "learning_rate": 3.876325925925926e-05, + "loss": 1.9149, + "step": 18970 + }, + { + "epoch": 3.37, + "learning_rate": 3.87602962962963e-05, + "loss": 2.0262, + "step": 18975 + }, + { + "epoch": 3.37, + "learning_rate": 3.8757333333333336e-05, + "loss": 1.9573, + "step": 18980 + }, + { + "epoch": 3.38, + "learning_rate": 3.8754370370370375e-05, + "loss": 1.9729, + "step": 18985 + }, + { + "epoch": 3.38, + "learning_rate": 3.875140740740741e-05, + "loss": 1.9602, + "step": 18990 + }, + { + "epoch": 3.38, + "learning_rate": 3.8748444444444446e-05, + "loss": 2.0735, + "step": 18995 + }, + { + "epoch": 3.38, + "learning_rate": 3.8745481481481484e-05, + "loss": 2.0241, + "step": 19000 + }, + { + "epoch": 3.38, + "learning_rate": 3.874251851851852e-05, + "loss": 2.0277, + "step": 19005 + }, + { + "epoch": 3.38, + "learning_rate": 3.8739555555555555e-05, + "loss": 1.9859, + "step": 19010 + }, + { + "epoch": 3.38, + "learning_rate": 3.8736592592592594e-05, + "loss": 2.0162, + "step": 19015 + }, + { + "epoch": 3.38, + "learning_rate": 3.873362962962963e-05, + "loss": 2.0295, + "step": 19020 + }, + { + "epoch": 3.38, + "learning_rate": 3.873066666666667e-05, + "loss": 1.8969, + "step": 19025 + }, + { + "epoch": 3.38, + "learning_rate": 3.8727703703703704e-05, + "loss": 1.9722, + "step": 19030 + }, + { + "epoch": 3.38, + "learning_rate": 3.872474074074074e-05, + "loss": 1.9406, + "step": 19035 + }, + { + "epoch": 3.38, + "learning_rate": 3.872177777777778e-05, + "loss": 1.8335, + "step": 19040 + }, + { + "epoch": 3.39, + "learning_rate": 3.871881481481482e-05, + "loss": 1.9987, + "step": 19045 + }, + { + "epoch": 3.39, + "learning_rate": 3.871585185185185e-05, + "loss": 1.959, + "step": 19050 + }, + { + "epoch": 3.39, + "learning_rate": 3.871288888888889e-05, + "loss": 1.8879, + "step": 19055 + }, + { + "epoch": 3.39, + "learning_rate": 3.870992592592593e-05, + "loss": 1.9152, + "step": 19060 + }, + { + "epoch": 3.39, + "learning_rate": 3.870696296296297e-05, + "loss": 2.0334, + "step": 19065 + }, + { + "epoch": 3.39, + "learning_rate": 3.8704e-05, + "loss": 1.9067, + "step": 19070 + }, + { + "epoch": 3.39, + "learning_rate": 3.870103703703704e-05, + "loss": 1.9722, + "step": 19075 + }, + { + "epoch": 3.39, + "learning_rate": 3.869807407407408e-05, + "loss": 2.0338, + "step": 19080 + }, + { + "epoch": 3.39, + "learning_rate": 3.869511111111112e-05, + "loss": 1.8726, + "step": 19085 + }, + { + "epoch": 3.39, + "learning_rate": 3.869214814814815e-05, + "loss": 2.0983, + "step": 19090 + }, + { + "epoch": 3.39, + "learning_rate": 3.868918518518519e-05, + "loss": 1.9237, + "step": 19095 + }, + { + "epoch": 3.4, + "learning_rate": 3.868622222222223e-05, + "loss": 1.93, + "step": 19100 + }, + { + "epoch": 3.4, + "learning_rate": 3.8683259259259266e-05, + "loss": 1.9989, + "step": 19105 + }, + { + "epoch": 3.4, + "learning_rate": 3.86802962962963e-05, + "loss": 2.0115, + "step": 19110 + }, + { + "epoch": 3.4, + "learning_rate": 3.867733333333334e-05, + "loss": 2.0316, + "step": 19115 + }, + { + "epoch": 3.4, + "learning_rate": 3.867437037037037e-05, + "loss": 1.9183, + "step": 19120 + }, + { + "epoch": 3.4, + "learning_rate": 3.8671407407407414e-05, + "loss": 2.0088, + "step": 19125 + }, + { + "epoch": 3.4, + "learning_rate": 3.8668444444444446e-05, + "loss": 2.0003, + "step": 19130 + }, + { + "epoch": 3.4, + "learning_rate": 3.8665481481481485e-05, + "loss": 1.8878, + "step": 19135 + }, + { + "epoch": 3.4, + "learning_rate": 3.866251851851852e-05, + "loss": 2.0486, + "step": 19140 + }, + { + "epoch": 3.4, + "learning_rate": 3.865955555555556e-05, + "loss": 2.0623, + "step": 19145 + }, + { + "epoch": 3.4, + "learning_rate": 3.8656592592592595e-05, + "loss": 2.0215, + "step": 19150 + }, + { + "epoch": 3.41, + "learning_rate": 3.8653629629629634e-05, + "loss": 1.9206, + "step": 19155 + }, + { + "epoch": 3.41, + "learning_rate": 3.8650666666666666e-05, + "loss": 1.9868, + "step": 19160 + }, + { + "epoch": 3.41, + "learning_rate": 3.864770370370371e-05, + "loss": 1.983, + "step": 19165 + }, + { + "epoch": 3.41, + "learning_rate": 3.864474074074074e-05, + "loss": 1.9487, + "step": 19170 + }, + { + "epoch": 3.41, + "learning_rate": 3.864177777777778e-05, + "loss": 1.8834, + "step": 19175 + }, + { + "epoch": 3.41, + "learning_rate": 3.8638814814814814e-05, + "loss": 1.9915, + "step": 19180 + }, + { + "epoch": 3.41, + "learning_rate": 3.863585185185185e-05, + "loss": 1.9366, + "step": 19185 + }, + { + "epoch": 3.41, + "learning_rate": 3.863288888888889e-05, + "loss": 1.9728, + "step": 19190 + }, + { + "epoch": 3.41, + "learning_rate": 3.862992592592593e-05, + "loss": 1.8977, + "step": 19195 + }, + { + "epoch": 3.41, + "learning_rate": 3.862696296296296e-05, + "loss": 1.9914, + "step": 19200 + }, + { + "epoch": 3.41, + "learning_rate": 3.8624e-05, + "loss": 1.9226, + "step": 19205 + }, + { + "epoch": 3.42, + "learning_rate": 3.862103703703704e-05, + "loss": 1.9111, + "step": 19210 + }, + { + "epoch": 3.42, + "learning_rate": 3.861807407407407e-05, + "loss": 2.0225, + "step": 19215 + }, + { + "epoch": 3.42, + "learning_rate": 3.861511111111111e-05, + "loss": 1.9123, + "step": 19220 + }, + { + "epoch": 3.42, + "learning_rate": 3.861214814814815e-05, + "loss": 1.9542, + "step": 19225 + }, + { + "epoch": 3.42, + "learning_rate": 3.860918518518519e-05, + "loss": 1.9121, + "step": 19230 + }, + { + "epoch": 3.42, + "learning_rate": 3.860622222222222e-05, + "loss": 1.7409, + "step": 19235 + }, + { + "epoch": 3.42, + "learning_rate": 3.860325925925926e-05, + "loss": 2.1915, + "step": 19240 + }, + { + "epoch": 3.42, + "learning_rate": 3.86002962962963e-05, + "loss": 1.7424, + "step": 19245 + }, + { + "epoch": 3.42, + "learning_rate": 3.859733333333334e-05, + "loss": 2.0072, + "step": 19250 + }, + { + "epoch": 3.42, + "learning_rate": 3.859437037037037e-05, + "loss": 1.8385, + "step": 19255 + }, + { + "epoch": 3.42, + "learning_rate": 3.859140740740741e-05, + "loss": 2.0189, + "step": 19260 + }, + { + "epoch": 3.42, + "learning_rate": 3.858844444444445e-05, + "loss": 1.8994, + "step": 19265 + }, + { + "epoch": 3.43, + "learning_rate": 3.8585481481481486e-05, + "loss": 1.9649, + "step": 19270 + }, + { + "epoch": 3.43, + "learning_rate": 3.858251851851852e-05, + "loss": 2.0742, + "step": 19275 + }, + { + "epoch": 3.43, + "learning_rate": 3.857955555555556e-05, + "loss": 1.8545, + "step": 19280 + }, + { + "epoch": 3.43, + "learning_rate": 3.857659259259259e-05, + "loss": 1.9987, + "step": 19285 + }, + { + "epoch": 3.43, + "learning_rate": 3.8573629629629634e-05, + "loss": 1.963, + "step": 19290 + }, + { + "epoch": 3.43, + "learning_rate": 3.8570666666666666e-05, + "loss": 1.9324, + "step": 19295 + }, + { + "epoch": 3.43, + "learning_rate": 3.8567703703703705e-05, + "loss": 1.7869, + "step": 19300 + }, + { + "epoch": 3.43, + "learning_rate": 3.856474074074074e-05, + "loss": 1.9619, + "step": 19305 + }, + { + "epoch": 3.43, + "learning_rate": 3.856177777777778e-05, + "loss": 2.0605, + "step": 19310 + }, + { + "epoch": 3.43, + "learning_rate": 3.8558814814814815e-05, + "loss": 2.0127, + "step": 19315 + }, + { + "epoch": 3.43, + "learning_rate": 3.8555851851851854e-05, + "loss": 1.9554, + "step": 19320 + }, + { + "epoch": 3.44, + "learning_rate": 3.8552888888888886e-05, + "loss": 1.9327, + "step": 19325 + }, + { + "epoch": 3.44, + "learning_rate": 3.854992592592593e-05, + "loss": 1.9179, + "step": 19330 + }, + { + "epoch": 3.44, + "learning_rate": 3.8546962962962963e-05, + "loss": 2.0802, + "step": 19335 + }, + { + "epoch": 3.44, + "learning_rate": 3.8544e-05, + "loss": 2.0724, + "step": 19340 + }, + { + "epoch": 3.44, + "learning_rate": 3.8541037037037034e-05, + "loss": 2.0894, + "step": 19345 + }, + { + "epoch": 3.44, + "learning_rate": 3.853807407407407e-05, + "loss": 2.0267, + "step": 19350 + }, + { + "epoch": 3.44, + "learning_rate": 3.853511111111111e-05, + "loss": 1.964, + "step": 19355 + }, + { + "epoch": 3.44, + "learning_rate": 3.853214814814815e-05, + "loss": 2.045, + "step": 19360 + }, + { + "epoch": 3.44, + "learning_rate": 3.852918518518518e-05, + "loss": 1.8906, + "step": 19365 + }, + { + "epoch": 3.44, + "learning_rate": 3.852622222222222e-05, + "loss": 2.0693, + "step": 19370 + }, + { + "epoch": 3.44, + "learning_rate": 3.852325925925926e-05, + "loss": 1.8995, + "step": 19375 + }, + { + "epoch": 3.45, + "learning_rate": 3.85202962962963e-05, + "loss": 2.1533, + "step": 19380 + }, + { + "epoch": 3.45, + "learning_rate": 3.851733333333333e-05, + "loss": 1.8457, + "step": 19385 + }, + { + "epoch": 3.45, + "learning_rate": 3.851437037037037e-05, + "loss": 2.0033, + "step": 19390 + }, + { + "epoch": 3.45, + "learning_rate": 3.851140740740741e-05, + "loss": 1.9044, + "step": 19395 + }, + { + "epoch": 3.45, + "learning_rate": 3.850844444444445e-05, + "loss": 1.8425, + "step": 19400 + }, + { + "epoch": 3.45, + "learning_rate": 3.850548148148148e-05, + "loss": 1.8263, + "step": 19405 + }, + { + "epoch": 3.45, + "learning_rate": 3.850251851851852e-05, + "loss": 1.9774, + "step": 19410 + }, + { + "epoch": 3.45, + "learning_rate": 3.849955555555556e-05, + "loss": 1.9672, + "step": 19415 + }, + { + "epoch": 3.45, + "learning_rate": 3.8496592592592596e-05, + "loss": 1.7683, + "step": 19420 + }, + { + "epoch": 3.45, + "learning_rate": 3.849362962962963e-05, + "loss": 2.0004, + "step": 19425 + }, + { + "epoch": 3.45, + "learning_rate": 3.849066666666667e-05, + "loss": 2.1091, + "step": 19430 + }, + { + "epoch": 3.46, + "learning_rate": 3.8487703703703706e-05, + "loss": 1.8965, + "step": 19435 + }, + { + "epoch": 3.46, + "learning_rate": 3.8484740740740745e-05, + "loss": 1.8952, + "step": 19440 + }, + { + "epoch": 3.46, + "learning_rate": 3.848177777777778e-05, + "loss": 1.8952, + "step": 19445 + }, + { + "epoch": 3.46, + "learning_rate": 3.8478814814814816e-05, + "loss": 1.9744, + "step": 19450 + }, + { + "epoch": 3.46, + "learning_rate": 3.8475851851851855e-05, + "loss": 2.0103, + "step": 19455 + }, + { + "epoch": 3.46, + "learning_rate": 3.847288888888889e-05, + "loss": 1.992, + "step": 19460 + }, + { + "epoch": 3.46, + "learning_rate": 3.8469925925925925e-05, + "loss": 1.9147, + "step": 19465 + }, + { + "epoch": 3.46, + "learning_rate": 3.8466962962962964e-05, + "loss": 2.0468, + "step": 19470 + }, + { + "epoch": 3.46, + "learning_rate": 3.8464e-05, + "loss": 1.9042, + "step": 19475 + }, + { + "epoch": 3.46, + "learning_rate": 3.846103703703704e-05, + "loss": 1.942, + "step": 19480 + }, + { + "epoch": 3.46, + "learning_rate": 3.8458074074074074e-05, + "loss": 1.8422, + "step": 19485 + }, + { + "epoch": 3.46, + "learning_rate": 3.845511111111111e-05, + "loss": 1.9813, + "step": 19490 + }, + { + "epoch": 3.47, + "learning_rate": 3.845214814814815e-05, + "loss": 2.0436, + "step": 19495 + }, + { + "epoch": 3.47, + "learning_rate": 3.844918518518519e-05, + "loss": 1.9923, + "step": 19500 + }, + { + "epoch": 3.47, + "learning_rate": 3.844622222222222e-05, + "loss": 1.7841, + "step": 19505 + }, + { + "epoch": 3.47, + "learning_rate": 3.844325925925926e-05, + "loss": 1.8681, + "step": 19510 + }, + { + "epoch": 3.47, + "learning_rate": 3.844029629629629e-05, + "loss": 1.9571, + "step": 19515 + }, + { + "epoch": 3.47, + "learning_rate": 3.843733333333334e-05, + "loss": 1.9107, + "step": 19520 + }, + { + "epoch": 3.47, + "learning_rate": 3.843437037037037e-05, + "loss": 1.9601, + "step": 19525 + }, + { + "epoch": 3.47, + "learning_rate": 3.843140740740741e-05, + "loss": 2.0451, + "step": 19530 + }, + { + "epoch": 3.47, + "learning_rate": 3.842844444444444e-05, + "loss": 1.9737, + "step": 19535 + }, + { + "epoch": 3.47, + "learning_rate": 3.842548148148149e-05, + "loss": 1.9557, + "step": 19540 + }, + { + "epoch": 3.47, + "learning_rate": 3.842251851851852e-05, + "loss": 2.0266, + "step": 19545 + }, + { + "epoch": 3.48, + "learning_rate": 3.841955555555556e-05, + "loss": 1.7994, + "step": 19550 + }, + { + "epoch": 3.48, + "learning_rate": 3.841659259259259e-05, + "loss": 1.8544, + "step": 19555 + }, + { + "epoch": 3.48, + "learning_rate": 3.8413629629629636e-05, + "loss": 2.1024, + "step": 19560 + }, + { + "epoch": 3.48, + "learning_rate": 3.841066666666667e-05, + "loss": 1.9766, + "step": 19565 + }, + { + "epoch": 3.48, + "learning_rate": 3.840770370370371e-05, + "loss": 1.9036, + "step": 19570 + }, + { + "epoch": 3.48, + "learning_rate": 3.840474074074074e-05, + "loss": 2.0583, + "step": 19575 + }, + { + "epoch": 3.48, + "learning_rate": 3.840177777777778e-05, + "loss": 1.9418, + "step": 19580 + }, + { + "epoch": 3.48, + "learning_rate": 3.8398814814814817e-05, + "loss": 2.0346, + "step": 19585 + }, + { + "epoch": 3.48, + "learning_rate": 3.8395851851851855e-05, + "loss": 1.989, + "step": 19590 + }, + { + "epoch": 3.48, + "learning_rate": 3.839288888888889e-05, + "loss": 2.02, + "step": 19595 + }, + { + "epoch": 3.48, + "learning_rate": 3.8389925925925926e-05, + "loss": 1.7909, + "step": 19600 + }, + { + "epoch": 3.49, + "learning_rate": 3.8386962962962965e-05, + "loss": 2.0601, + "step": 19605 + }, + { + "epoch": 3.49, + "learning_rate": 3.8384000000000004e-05, + "loss": 1.9828, + "step": 19610 + }, + { + "epoch": 3.49, + "learning_rate": 3.8381037037037036e-05, + "loss": 2.0091, + "step": 19615 + }, + { + "epoch": 3.49, + "learning_rate": 3.8378074074074075e-05, + "loss": 1.9604, + "step": 19620 + }, + { + "epoch": 3.49, + "learning_rate": 3.8375111111111114e-05, + "loss": 2.0798, + "step": 19625 + }, + { + "epoch": 3.49, + "learning_rate": 3.837214814814815e-05, + "loss": 2.0536, + "step": 19630 + }, + { + "epoch": 3.49, + "learning_rate": 3.8369185185185184e-05, + "loss": 1.846, + "step": 19635 + }, + { + "epoch": 3.49, + "learning_rate": 3.836622222222222e-05, + "loss": 1.957, + "step": 19640 + }, + { + "epoch": 3.49, + "learning_rate": 3.836325925925926e-05, + "loss": 1.9611, + "step": 19645 + }, + { + "epoch": 3.49, + "learning_rate": 3.83602962962963e-05, + "loss": 1.9307, + "step": 19650 + }, + { + "epoch": 3.49, + "learning_rate": 3.835733333333333e-05, + "loss": 1.8096, + "step": 19655 + }, + { + "epoch": 3.5, + "learning_rate": 3.835437037037037e-05, + "loss": 1.883, + "step": 19660 + }, + { + "epoch": 3.5, + "learning_rate": 3.835140740740741e-05, + "loss": 1.9386, + "step": 19665 + }, + { + "epoch": 3.5, + "learning_rate": 3.834844444444445e-05, + "loss": 1.9331, + "step": 19670 + }, + { + "epoch": 3.5, + "learning_rate": 3.834548148148148e-05, + "loss": 1.9057, + "step": 19675 + }, + { + "epoch": 3.5, + "learning_rate": 3.834251851851852e-05, + "loss": 1.8606, + "step": 19680 + }, + { + "epoch": 3.5, + "learning_rate": 3.833955555555556e-05, + "loss": 2.0291, + "step": 19685 + }, + { + "epoch": 3.5, + "learning_rate": 3.83365925925926e-05, + "loss": 1.9986, + "step": 19690 + }, + { + "epoch": 3.5, + "learning_rate": 3.833362962962963e-05, + "loss": 1.918, + "step": 19695 + }, + { + "epoch": 3.5, + "learning_rate": 3.833066666666667e-05, + "loss": 2.0606, + "step": 19700 + }, + { + "epoch": 3.5, + "learning_rate": 3.832770370370371e-05, + "loss": 2.0654, + "step": 19705 + }, + { + "epoch": 3.5, + "learning_rate": 3.8324740740740746e-05, + "loss": 1.9007, + "step": 19710 + }, + { + "epoch": 3.5, + "learning_rate": 3.832177777777778e-05, + "loss": 1.8761, + "step": 19715 + }, + { + "epoch": 3.51, + "learning_rate": 3.831881481481482e-05, + "loss": 2.009, + "step": 19720 + }, + { + "epoch": 3.51, + "learning_rate": 3.8315851851851856e-05, + "loss": 1.8596, + "step": 19725 + }, + { + "epoch": 3.51, + "learning_rate": 3.8312888888888895e-05, + "loss": 1.9878, + "step": 19730 + }, + { + "epoch": 3.51, + "learning_rate": 3.830992592592593e-05, + "loss": 1.9412, + "step": 19735 + }, + { + "epoch": 3.51, + "learning_rate": 3.8306962962962966e-05, + "loss": 1.9634, + "step": 19740 + }, + { + "epoch": 3.51, + "learning_rate": 3.8304e-05, + "loss": 2.0041, + "step": 19745 + }, + { + "epoch": 3.51, + "learning_rate": 3.8301037037037043e-05, + "loss": 1.9637, + "step": 19750 + }, + { + "epoch": 3.51, + "learning_rate": 3.8298074074074075e-05, + "loss": 1.9728, + "step": 19755 + }, + { + "epoch": 3.51, + "learning_rate": 3.8295111111111114e-05, + "loss": 2.0181, + "step": 19760 + }, + { + "epoch": 3.51, + "learning_rate": 3.8292148148148146e-05, + "loss": 1.9003, + "step": 19765 + }, + { + "epoch": 3.51, + "learning_rate": 3.828918518518519e-05, + "loss": 1.9066, + "step": 19770 + }, + { + "epoch": 3.52, + "learning_rate": 3.8286222222222224e-05, + "loss": 2.0553, + "step": 19775 + }, + { + "epoch": 3.52, + "learning_rate": 3.828325925925926e-05, + "loss": 1.9709, + "step": 19780 + }, + { + "epoch": 3.52, + "learning_rate": 3.8280296296296295e-05, + "loss": 1.9395, + "step": 19785 + }, + { + "epoch": 3.52, + "learning_rate": 3.827733333333334e-05, + "loss": 1.9761, + "step": 19790 + }, + { + "epoch": 3.52, + "learning_rate": 3.827437037037037e-05, + "loss": 2.0571, + "step": 19795 + }, + { + "epoch": 3.52, + "learning_rate": 3.827140740740741e-05, + "loss": 1.9006, + "step": 19800 + }, + { + "epoch": 3.52, + "learning_rate": 3.826844444444444e-05, + "loss": 2.0519, + "step": 19805 + }, + { + "epoch": 3.52, + "learning_rate": 3.826548148148148e-05, + "loss": 2.0014, + "step": 19810 + }, + { + "epoch": 3.52, + "learning_rate": 3.826251851851852e-05, + "loss": 1.9408, + "step": 19815 + }, + { + "epoch": 3.52, + "learning_rate": 3.825955555555556e-05, + "loss": 2.0233, + "step": 19820 + }, + { + "epoch": 3.52, + "learning_rate": 3.825659259259259e-05, + "loss": 2.0875, + "step": 19825 + }, + { + "epoch": 3.53, + "learning_rate": 3.825362962962963e-05, + "loss": 1.8947, + "step": 19830 + }, + { + "epoch": 3.53, + "learning_rate": 3.825066666666667e-05, + "loss": 1.9824, + "step": 19835 + }, + { + "epoch": 3.53, + "learning_rate": 3.824770370370371e-05, + "loss": 1.9553, + "step": 19840 + }, + { + "epoch": 3.53, + "learning_rate": 3.824474074074074e-05, + "loss": 1.9245, + "step": 19845 + }, + { + "epoch": 3.53, + "learning_rate": 3.824177777777778e-05, + "loss": 2.0935, + "step": 19850 + }, + { + "epoch": 3.53, + "learning_rate": 3.823881481481482e-05, + "loss": 1.8683, + "step": 19855 + }, + { + "epoch": 3.53, + "learning_rate": 3.823585185185186e-05, + "loss": 1.9085, + "step": 19860 + }, + { + "epoch": 3.53, + "learning_rate": 3.823288888888889e-05, + "loss": 2.0078, + "step": 19865 + }, + { + "epoch": 3.53, + "learning_rate": 3.822992592592593e-05, + "loss": 1.9467, + "step": 19870 + }, + { + "epoch": 3.53, + "learning_rate": 3.8226962962962967e-05, + "loss": 1.929, + "step": 19875 + }, + { + "epoch": 3.53, + "learning_rate": 3.8224000000000005e-05, + "loss": 1.9232, + "step": 19880 + }, + { + "epoch": 3.54, + "learning_rate": 3.822103703703704e-05, + "loss": 1.9001, + "step": 19885 + }, + { + "epoch": 3.54, + "learning_rate": 3.8218074074074076e-05, + "loss": 1.7978, + "step": 19890 + }, + { + "epoch": 3.54, + "learning_rate": 3.8215111111111115e-05, + "loss": 2.0708, + "step": 19895 + }, + { + "epoch": 3.54, + "learning_rate": 3.8212148148148154e-05, + "loss": 2.0007, + "step": 19900 + }, + { + "epoch": 3.54, + "learning_rate": 3.8209185185185186e-05, + "loss": 2.1008, + "step": 19905 + }, + { + "epoch": 3.54, + "learning_rate": 3.8206222222222225e-05, + "loss": 1.8995, + "step": 19910 + }, + { + "epoch": 3.54, + "learning_rate": 3.8203259259259264e-05, + "loss": 1.9984, + "step": 19915 + }, + { + "epoch": 3.54, + "learning_rate": 3.82002962962963e-05, + "loss": 2.049, + "step": 19920 + }, + { + "epoch": 3.54, + "learning_rate": 3.8197333333333334e-05, + "loss": 2.0505, + "step": 19925 + }, + { + "epoch": 3.54, + "learning_rate": 3.819437037037037e-05, + "loss": 1.9941, + "step": 19930 + }, + { + "epoch": 3.54, + "learning_rate": 3.819140740740741e-05, + "loss": 1.9888, + "step": 19935 + }, + { + "epoch": 3.54, + "learning_rate": 3.818844444444445e-05, + "loss": 1.9068, + "step": 19940 + }, + { + "epoch": 3.55, + "learning_rate": 3.818548148148148e-05, + "loss": 1.8618, + "step": 19945 + }, + { + "epoch": 3.55, + "learning_rate": 3.818251851851852e-05, + "loss": 1.9384, + "step": 19950 + }, + { + "epoch": 3.55, + "learning_rate": 3.817955555555556e-05, + "loss": 1.9812, + "step": 19955 + }, + { + "epoch": 3.55, + "learning_rate": 3.81765925925926e-05, + "loss": 1.9673, + "step": 19960 + }, + { + "epoch": 3.55, + "learning_rate": 3.817362962962963e-05, + "loss": 1.8537, + "step": 19965 + }, + { + "epoch": 3.55, + "learning_rate": 3.817066666666667e-05, + "loss": 2.0594, + "step": 19970 + }, + { + "epoch": 3.55, + "learning_rate": 3.81677037037037e-05, + "loss": 1.9228, + "step": 19975 + }, + { + "epoch": 3.55, + "learning_rate": 3.816474074074074e-05, + "loss": 1.9924, + "step": 19980 + }, + { + "epoch": 3.55, + "learning_rate": 3.816177777777778e-05, + "loss": 2.013, + "step": 19985 + }, + { + "epoch": 3.55, + "learning_rate": 3.815881481481481e-05, + "loss": 1.9676, + "step": 19990 + }, + { + "epoch": 3.55, + "learning_rate": 3.815585185185185e-05, + "loss": 1.8215, + "step": 19995 + }, + { + "epoch": 3.56, + "learning_rate": 3.815288888888889e-05, + "loss": 1.9639, + "step": 20000 + }, + { + "epoch": 3.56, + "eval_loss": 1.8181427717208862, + "eval_rouge2_fmeasure": 0.1771, + "eval_rouge2_precision": 0.2125, + "eval_rouge2_recall": 0.1606, + "eval_runtime": 36846.5383, + "eval_samples_per_second": 0.136, + "eval_steps_per_second": 0.068, + "step": 20000 + }, + { + "epoch": 3.56, + "learning_rate": 3.814992592592593e-05, + "loss": 1.9242, + "step": 20005 + }, + { + "epoch": 3.56, + "learning_rate": 3.814696296296296e-05, + "loss": 1.8884, + "step": 20010 + }, + { + "epoch": 3.56, + "learning_rate": 3.8144e-05, + "loss": 1.8602, + "step": 20015 + }, + { + "epoch": 3.56, + "learning_rate": 3.814103703703704e-05, + "loss": 2.146, + "step": 20020 + }, + { + "epoch": 3.56, + "learning_rate": 3.813807407407408e-05, + "loss": 1.914, + "step": 20025 + }, + { + "epoch": 3.56, + "learning_rate": 3.813511111111111e-05, + "loss": 1.9613, + "step": 20030 + }, + { + "epoch": 3.56, + "learning_rate": 3.813214814814815e-05, + "loss": 1.8841, + "step": 20035 + }, + { + "epoch": 3.56, + "learning_rate": 3.812918518518519e-05, + "loss": 1.9845, + "step": 20040 + }, + { + "epoch": 3.56, + "learning_rate": 3.8126222222222226e-05, + "loss": 2.0209, + "step": 20045 + }, + { + "epoch": 3.56, + "learning_rate": 3.812325925925926e-05, + "loss": 1.9506, + "step": 20050 + }, + { + "epoch": 3.57, + "learning_rate": 3.8120296296296296e-05, + "loss": 1.9947, + "step": 20055 + }, + { + "epoch": 3.57, + "learning_rate": 3.8117333333333335e-05, + "loss": 1.917, + "step": 20060 + }, + { + "epoch": 3.57, + "learning_rate": 3.8114370370370374e-05, + "loss": 1.986, + "step": 20065 + }, + { + "epoch": 3.57, + "learning_rate": 3.8111407407407406e-05, + "loss": 2.004, + "step": 20070 + }, + { + "epoch": 3.57, + "learning_rate": 3.8108444444444445e-05, + "loss": 1.9111, + "step": 20075 + }, + { + "epoch": 3.57, + "learning_rate": 3.8105481481481484e-05, + "loss": 1.8988, + "step": 20080 + }, + { + "epoch": 3.57, + "learning_rate": 3.810251851851852e-05, + "loss": 1.9325, + "step": 20085 + }, + { + "epoch": 3.57, + "learning_rate": 3.8099555555555555e-05, + "loss": 1.9578, + "step": 20090 + }, + { + "epoch": 3.57, + "learning_rate": 3.8096592592592593e-05, + "loss": 1.9725, + "step": 20095 + }, + { + "epoch": 3.57, + "learning_rate": 3.809362962962963e-05, + "loss": 1.9984, + "step": 20100 + }, + { + "epoch": 3.57, + "learning_rate": 3.809066666666667e-05, + "loss": 1.9189, + "step": 20105 + }, + { + "epoch": 3.58, + "learning_rate": 3.80877037037037e-05, + "loss": 2.0368, + "step": 20110 + }, + { + "epoch": 3.58, + "learning_rate": 3.808474074074074e-05, + "loss": 1.9537, + "step": 20115 + }, + { + "epoch": 3.58, + "learning_rate": 3.8081777777777774e-05, + "loss": 2.0385, + "step": 20120 + }, + { + "epoch": 3.58, + "learning_rate": 3.807881481481482e-05, + "loss": 1.9464, + "step": 20125 + }, + { + "epoch": 3.58, + "learning_rate": 3.807585185185185e-05, + "loss": 1.928, + "step": 20130 + }, + { + "epoch": 3.58, + "learning_rate": 3.807288888888889e-05, + "loss": 2.0644, + "step": 20135 + }, + { + "epoch": 3.58, + "learning_rate": 3.806992592592592e-05, + "loss": 2.0066, + "step": 20140 + }, + { + "epoch": 3.58, + "learning_rate": 3.806696296296297e-05, + "loss": 1.9024, + "step": 20145 + }, + { + "epoch": 3.58, + "learning_rate": 3.8064e-05, + "loss": 2.1443, + "step": 20150 + }, + { + "epoch": 3.58, + "learning_rate": 3.806103703703704e-05, + "loss": 1.9592, + "step": 20155 + }, + { + "epoch": 3.58, + "learning_rate": 3.805807407407407e-05, + "loss": 1.9597, + "step": 20160 + }, + { + "epoch": 3.58, + "learning_rate": 3.8055111111111117e-05, + "loss": 1.9378, + "step": 20165 + }, + { + "epoch": 3.59, + "learning_rate": 3.805214814814815e-05, + "loss": 2.0411, + "step": 20170 + }, + { + "epoch": 3.59, + "learning_rate": 3.804918518518519e-05, + "loss": 2.0039, + "step": 20175 + }, + { + "epoch": 3.59, + "learning_rate": 3.804622222222222e-05, + "loss": 2.0573, + "step": 20180 + }, + { + "epoch": 3.59, + "learning_rate": 3.8043259259259265e-05, + "loss": 1.9441, + "step": 20185 + }, + { + "epoch": 3.59, + "learning_rate": 3.80402962962963e-05, + "loss": 1.9339, + "step": 20190 + }, + { + "epoch": 3.59, + "learning_rate": 3.8037333333333336e-05, + "loss": 2.0266, + "step": 20195 + }, + { + "epoch": 3.59, + "learning_rate": 3.803437037037037e-05, + "loss": 1.9893, + "step": 20200 + }, + { + "epoch": 3.59, + "learning_rate": 3.803140740740741e-05, + "loss": 2.0178, + "step": 20205 + }, + { + "epoch": 3.59, + "learning_rate": 3.8028444444444446e-05, + "loss": 1.9329, + "step": 20210 + }, + { + "epoch": 3.59, + "learning_rate": 3.8025481481481484e-05, + "loss": 1.994, + "step": 20215 + }, + { + "epoch": 3.59, + "learning_rate": 3.8022518518518517e-05, + "loss": 1.9525, + "step": 20220 + }, + { + "epoch": 3.6, + "learning_rate": 3.8019555555555555e-05, + "loss": 1.7788, + "step": 20225 + }, + { + "epoch": 3.6, + "learning_rate": 3.8016592592592594e-05, + "loss": 2.0706, + "step": 20230 + }, + { + "epoch": 3.6, + "learning_rate": 3.801362962962963e-05, + "loss": 1.7772, + "step": 20235 + }, + { + "epoch": 3.6, + "learning_rate": 3.8010666666666665e-05, + "loss": 1.9923, + "step": 20240 + }, + { + "epoch": 3.6, + "learning_rate": 3.8007703703703704e-05, + "loss": 1.9539, + "step": 20245 + }, + { + "epoch": 3.6, + "learning_rate": 3.800474074074074e-05, + "loss": 1.9335, + "step": 20250 + }, + { + "epoch": 3.6, + "learning_rate": 3.800177777777778e-05, + "loss": 2.0497, + "step": 20255 + }, + { + "epoch": 3.6, + "learning_rate": 3.7998814814814814e-05, + "loss": 1.9357, + "step": 20260 + }, + { + "epoch": 3.6, + "learning_rate": 3.799585185185185e-05, + "loss": 2.0328, + "step": 20265 + }, + { + "epoch": 3.6, + "learning_rate": 3.799288888888889e-05, + "loss": 1.9598, + "step": 20270 + }, + { + "epoch": 3.6, + "learning_rate": 3.798992592592593e-05, + "loss": 1.936, + "step": 20275 + }, + { + "epoch": 3.61, + "learning_rate": 3.798696296296296e-05, + "loss": 2.0266, + "step": 20280 + }, + { + "epoch": 3.61, + "learning_rate": 3.7984e-05, + "loss": 2.0422, + "step": 20285 + }, + { + "epoch": 3.61, + "learning_rate": 3.798103703703704e-05, + "loss": 1.9786, + "step": 20290 + }, + { + "epoch": 3.61, + "learning_rate": 3.797807407407408e-05, + "loss": 2.0306, + "step": 20295 + }, + { + "epoch": 3.61, + "learning_rate": 3.797511111111111e-05, + "loss": 2.0431, + "step": 20300 + }, + { + "epoch": 3.61, + "learning_rate": 3.797214814814815e-05, + "loss": 1.9746, + "step": 20305 + }, + { + "epoch": 3.61, + "learning_rate": 3.796918518518519e-05, + "loss": 1.9534, + "step": 20310 + }, + { + "epoch": 3.61, + "learning_rate": 3.796622222222223e-05, + "loss": 1.9085, + "step": 20315 + }, + { + "epoch": 3.61, + "learning_rate": 3.796325925925926e-05, + "loss": 2.0734, + "step": 20320 + }, + { + "epoch": 3.61, + "learning_rate": 3.79602962962963e-05, + "loss": 1.8987, + "step": 20325 + }, + { + "epoch": 3.61, + "learning_rate": 3.795733333333334e-05, + "loss": 1.9868, + "step": 20330 + }, + { + "epoch": 3.62, + "learning_rate": 3.7954370370370376e-05, + "loss": 1.9497, + "step": 20335 + }, + { + "epoch": 3.62, + "learning_rate": 3.795140740740741e-05, + "loss": 2.0128, + "step": 20340 + }, + { + "epoch": 3.62, + "learning_rate": 3.7948444444444446e-05, + "loss": 1.9626, + "step": 20345 + }, + { + "epoch": 3.62, + "learning_rate": 3.794548148148148e-05, + "loss": 1.9887, + "step": 20350 + }, + { + "epoch": 3.62, + "learning_rate": 3.7942518518518524e-05, + "loss": 2.0096, + "step": 20355 + }, + { + "epoch": 3.62, + "learning_rate": 3.7939555555555556e-05, + "loss": 1.9029, + "step": 20360 + }, + { + "epoch": 3.62, + "learning_rate": 3.7936592592592595e-05, + "loss": 1.9932, + "step": 20365 + }, + { + "epoch": 3.62, + "learning_rate": 3.793362962962963e-05, + "loss": 1.9596, + "step": 20370 + }, + { + "epoch": 3.62, + "learning_rate": 3.793066666666667e-05, + "loss": 1.9879, + "step": 20375 + }, + { + "epoch": 3.62, + "learning_rate": 3.7927703703703705e-05, + "loss": 1.8336, + "step": 20380 + }, + { + "epoch": 3.62, + "learning_rate": 3.7924740740740743e-05, + "loss": 1.9932, + "step": 20385 + }, + { + "epoch": 3.62, + "learning_rate": 3.7921777777777775e-05, + "loss": 1.9208, + "step": 20390 + }, + { + "epoch": 3.63, + "learning_rate": 3.791881481481482e-05, + "loss": 1.8098, + "step": 20395 + }, + { + "epoch": 3.63, + "learning_rate": 3.791585185185185e-05, + "loss": 1.9656, + "step": 20400 + }, + { + "epoch": 3.63, + "learning_rate": 3.791288888888889e-05, + "loss": 1.9073, + "step": 20405 + }, + { + "epoch": 3.63, + "learning_rate": 3.7909925925925924e-05, + "loss": 2.0418, + "step": 20410 + }, + { + "epoch": 3.63, + "learning_rate": 3.790696296296297e-05, + "loss": 2.0456, + "step": 20415 + }, + { + "epoch": 3.63, + "learning_rate": 3.7904e-05, + "loss": 1.9665, + "step": 20420 + }, + { + "epoch": 3.63, + "learning_rate": 3.790103703703704e-05, + "loss": 1.8169, + "step": 20425 + }, + { + "epoch": 3.63, + "learning_rate": 3.789807407407407e-05, + "loss": 1.9226, + "step": 20430 + }, + { + "epoch": 3.63, + "learning_rate": 3.789511111111111e-05, + "loss": 2.0569, + "step": 20435 + }, + { + "epoch": 3.63, + "learning_rate": 3.789214814814815e-05, + "loss": 1.9005, + "step": 20440 + }, + { + "epoch": 3.63, + "learning_rate": 3.788918518518519e-05, + "loss": 2.1218, + "step": 20445 + }, + { + "epoch": 3.64, + "learning_rate": 3.788622222222222e-05, + "loss": 1.886, + "step": 20450 + }, + { + "epoch": 3.64, + "learning_rate": 3.788325925925926e-05, + "loss": 2.0779, + "step": 20455 + }, + { + "epoch": 3.64, + "learning_rate": 3.78802962962963e-05, + "loss": 2.0185, + "step": 20460 + }, + { + "epoch": 3.64, + "learning_rate": 3.787733333333334e-05, + "loss": 1.9625, + "step": 20465 + }, + { + "epoch": 3.64, + "learning_rate": 3.787437037037037e-05, + "loss": 1.9478, + "step": 20470 + }, + { + "epoch": 3.64, + "learning_rate": 3.787140740740741e-05, + "loss": 2.0012, + "step": 20475 + }, + { + "epoch": 3.64, + "learning_rate": 3.786844444444445e-05, + "loss": 2.0288, + "step": 20480 + }, + { + "epoch": 3.64, + "learning_rate": 3.7865481481481486e-05, + "loss": 1.9601, + "step": 20485 + }, + { + "epoch": 3.64, + "learning_rate": 3.786251851851852e-05, + "loss": 2.0511, + "step": 20490 + }, + { + "epoch": 3.64, + "learning_rate": 3.785955555555556e-05, + "loss": 1.9344, + "step": 20495 + }, + { + "epoch": 3.64, + "learning_rate": 3.7856592592592596e-05, + "loss": 1.9684, + "step": 20500 + }, + { + "epoch": 3.65, + "learning_rate": 3.7853629629629635e-05, + "loss": 1.8574, + "step": 20505 + }, + { + "epoch": 3.65, + "learning_rate": 3.7850666666666667e-05, + "loss": 1.9739, + "step": 20510 + }, + { + "epoch": 3.65, + "learning_rate": 3.7847703703703705e-05, + "loss": 1.9307, + "step": 20515 + }, + { + "epoch": 3.65, + "learning_rate": 3.7844740740740744e-05, + "loss": 1.9406, + "step": 20520 + }, + { + "epoch": 3.65, + "learning_rate": 3.784177777777778e-05, + "loss": 1.8881, + "step": 20525 + }, + { + "epoch": 3.65, + "learning_rate": 3.7838814814814815e-05, + "loss": 1.9621, + "step": 20530 + }, + { + "epoch": 3.65, + "learning_rate": 3.7835851851851854e-05, + "loss": 1.84, + "step": 20535 + }, + { + "epoch": 3.65, + "learning_rate": 3.783288888888889e-05, + "loss": 1.8347, + "step": 20540 + }, + { + "epoch": 3.65, + "learning_rate": 3.782992592592593e-05, + "loss": 1.8915, + "step": 20545 + }, + { + "epoch": 3.65, + "learning_rate": 3.7826962962962964e-05, + "loss": 1.8863, + "step": 20550 + }, + { + "epoch": 3.65, + "learning_rate": 3.7824e-05, + "loss": 1.9382, + "step": 20555 + }, + { + "epoch": 3.66, + "learning_rate": 3.782103703703704e-05, + "loss": 1.8947, + "step": 20560 + }, + { + "epoch": 3.66, + "learning_rate": 3.781807407407408e-05, + "loss": 1.9358, + "step": 20565 + }, + { + "epoch": 3.66, + "learning_rate": 3.781511111111111e-05, + "loss": 1.9603, + "step": 20570 + }, + { + "epoch": 3.66, + "learning_rate": 3.781214814814815e-05, + "loss": 1.8794, + "step": 20575 + }, + { + "epoch": 3.66, + "learning_rate": 3.780918518518519e-05, + "loss": 1.9687, + "step": 20580 + }, + { + "epoch": 3.66, + "learning_rate": 3.780622222222223e-05, + "loss": 1.9004, + "step": 20585 + }, + { + "epoch": 3.66, + "learning_rate": 3.780325925925926e-05, + "loss": 2.0997, + "step": 20590 + }, + { + "epoch": 3.66, + "learning_rate": 3.78002962962963e-05, + "loss": 1.9507, + "step": 20595 + }, + { + "epoch": 3.66, + "learning_rate": 3.779733333333333e-05, + "loss": 1.9861, + "step": 20600 + }, + { + "epoch": 3.66, + "learning_rate": 3.779437037037038e-05, + "loss": 2.0139, + "step": 20605 + }, + { + "epoch": 3.66, + "learning_rate": 3.779140740740741e-05, + "loss": 2.0509, + "step": 20610 + }, + { + "epoch": 3.66, + "learning_rate": 3.778844444444445e-05, + "loss": 1.9274, + "step": 20615 + }, + { + "epoch": 3.67, + "learning_rate": 3.778548148148148e-05, + "loss": 2.0703, + "step": 20620 + }, + { + "epoch": 3.67, + "learning_rate": 3.7782518518518526e-05, + "loss": 2.0063, + "step": 20625 + }, + { + "epoch": 3.67, + "learning_rate": 3.777955555555556e-05, + "loss": 1.9817, + "step": 20630 + }, + { + "epoch": 3.67, + "learning_rate": 3.7776592592592596e-05, + "loss": 2.1252, + "step": 20635 + }, + { + "epoch": 3.67, + "learning_rate": 3.777362962962963e-05, + "loss": 1.9124, + "step": 20640 + }, + { + "epoch": 3.67, + "learning_rate": 3.7770666666666674e-05, + "loss": 1.94, + "step": 20645 + }, + { + "epoch": 3.67, + "learning_rate": 3.7767703703703706e-05, + "loss": 2.0087, + "step": 20650 + }, + { + "epoch": 3.67, + "learning_rate": 3.7764740740740745e-05, + "loss": 2.0185, + "step": 20655 + }, + { + "epoch": 3.67, + "learning_rate": 3.776177777777778e-05, + "loss": 1.941, + "step": 20660 + }, + { + "epoch": 3.67, + "learning_rate": 3.7758814814814816e-05, + "loss": 1.8871, + "step": 20665 + }, + { + "epoch": 3.67, + "learning_rate": 3.7755851851851855e-05, + "loss": 2.1713, + "step": 20670 + }, + { + "epoch": 3.68, + "learning_rate": 3.7752888888888893e-05, + "loss": 2.1213, + "step": 20675 + }, + { + "epoch": 3.68, + "learning_rate": 3.7749925925925926e-05, + "loss": 1.8814, + "step": 20680 + }, + { + "epoch": 3.68, + "learning_rate": 3.7746962962962964e-05, + "loss": 1.9309, + "step": 20685 + }, + { + "epoch": 3.68, + "learning_rate": 3.7744e-05, + "loss": 1.9567, + "step": 20690 + }, + { + "epoch": 3.68, + "learning_rate": 3.774103703703704e-05, + "loss": 1.9551, + "step": 20695 + }, + { + "epoch": 3.68, + "learning_rate": 3.7738074074074074e-05, + "loss": 1.9598, + "step": 20700 + }, + { + "epoch": 3.68, + "learning_rate": 3.773511111111111e-05, + "loss": 1.8591, + "step": 20705 + }, + { + "epoch": 3.68, + "learning_rate": 3.773214814814815e-05, + "loss": 1.9494, + "step": 20710 + }, + { + "epoch": 3.68, + "learning_rate": 3.772918518518519e-05, + "loss": 1.9957, + "step": 20715 + }, + { + "epoch": 3.68, + "learning_rate": 3.772622222222222e-05, + "loss": 1.96, + "step": 20720 + }, + { + "epoch": 3.68, + "learning_rate": 3.772325925925926e-05, + "loss": 1.8708, + "step": 20725 + }, + { + "epoch": 3.69, + "learning_rate": 3.77202962962963e-05, + "loss": 2.0776, + "step": 20730 + }, + { + "epoch": 3.69, + "learning_rate": 3.771733333333334e-05, + "loss": 1.8553, + "step": 20735 + }, + { + "epoch": 3.69, + "learning_rate": 3.771437037037037e-05, + "loss": 1.9028, + "step": 20740 + }, + { + "epoch": 3.69, + "learning_rate": 3.771140740740741e-05, + "loss": 2.0337, + "step": 20745 + }, + { + "epoch": 3.69, + "learning_rate": 3.770844444444445e-05, + "loss": 1.9958, + "step": 20750 + }, + { + "epoch": 3.69, + "learning_rate": 3.770548148148148e-05, + "loss": 1.8491, + "step": 20755 + }, + { + "epoch": 3.69, + "learning_rate": 3.770251851851852e-05, + "loss": 1.9595, + "step": 20760 + }, + { + "epoch": 3.69, + "learning_rate": 3.769955555555555e-05, + "loss": 1.9609, + "step": 20765 + }, + { + "epoch": 3.69, + "learning_rate": 3.76965925925926e-05, + "loss": 1.9981, + "step": 20770 + }, + { + "epoch": 3.69, + "learning_rate": 3.769362962962963e-05, + "loss": 1.9176, + "step": 20775 + }, + { + "epoch": 3.69, + "learning_rate": 3.769066666666667e-05, + "loss": 1.9321, + "step": 20780 + }, + { + "epoch": 3.7, + "learning_rate": 3.76877037037037e-05, + "loss": 1.9501, + "step": 20785 + }, + { + "epoch": 3.7, + "learning_rate": 3.7684740740740746e-05, + "loss": 1.881, + "step": 20790 + }, + { + "epoch": 3.7, + "learning_rate": 3.768177777777778e-05, + "loss": 1.9772, + "step": 20795 + }, + { + "epoch": 3.7, + "learning_rate": 3.767881481481482e-05, + "loss": 1.9585, + "step": 20800 + }, + { + "epoch": 3.7, + "learning_rate": 3.767585185185185e-05, + "loss": 1.8707, + "step": 20805 + }, + { + "epoch": 3.7, + "learning_rate": 3.7672888888888894e-05, + "loss": 1.8278, + "step": 20810 + }, + { + "epoch": 3.7, + "learning_rate": 3.7669925925925926e-05, + "loss": 2.0021, + "step": 20815 + }, + { + "epoch": 3.7, + "learning_rate": 3.7666962962962965e-05, + "loss": 1.9674, + "step": 20820 + }, + { + "epoch": 3.7, + "learning_rate": 3.7664e-05, + "loss": 2.0349, + "step": 20825 + }, + { + "epoch": 3.7, + "learning_rate": 3.7661037037037036e-05, + "loss": 1.9602, + "step": 20830 + }, + { + "epoch": 3.7, + "learning_rate": 3.7658074074074075e-05, + "loss": 2.0821, + "step": 20835 + }, + { + "epoch": 3.7, + "learning_rate": 3.7655111111111114e-05, + "loss": 1.977, + "step": 20840 + }, + { + "epoch": 3.71, + "learning_rate": 3.7652148148148146e-05, + "loss": 1.9405, + "step": 20845 + }, + { + "epoch": 3.71, + "learning_rate": 3.7649185185185185e-05, + "loss": 1.8417, + "step": 20850 + }, + { + "epoch": 3.71, + "learning_rate": 3.764622222222222e-05, + "loss": 1.9594, + "step": 20855 + }, + { + "epoch": 3.71, + "learning_rate": 3.764325925925926e-05, + "loss": 2.0869, + "step": 20860 + }, + { + "epoch": 3.71, + "learning_rate": 3.7640296296296294e-05, + "loss": 2.0832, + "step": 20865 + }, + { + "epoch": 3.71, + "learning_rate": 3.763733333333333e-05, + "loss": 1.972, + "step": 20870 + }, + { + "epoch": 3.71, + "learning_rate": 3.763437037037037e-05, + "loss": 1.9647, + "step": 20875 + }, + { + "epoch": 3.71, + "learning_rate": 3.763140740740741e-05, + "loss": 2.0283, + "step": 20880 + }, + { + "epoch": 3.71, + "learning_rate": 3.762844444444444e-05, + "loss": 1.8196, + "step": 20885 + }, + { + "epoch": 3.71, + "learning_rate": 3.762548148148148e-05, + "loss": 1.9829, + "step": 20890 + }, + { + "epoch": 3.71, + "learning_rate": 3.762251851851852e-05, + "loss": 1.8277, + "step": 20895 + }, + { + "epoch": 3.72, + "learning_rate": 3.761955555555556e-05, + "loss": 1.9101, + "step": 20900 + }, + { + "epoch": 3.72, + "learning_rate": 3.761659259259259e-05, + "loss": 2.0292, + "step": 20905 + }, + { + "epoch": 3.72, + "learning_rate": 3.761362962962963e-05, + "loss": 1.9849, + "step": 20910 + }, + { + "epoch": 3.72, + "learning_rate": 3.761066666666667e-05, + "loss": 1.8762, + "step": 20915 + }, + { + "epoch": 3.72, + "learning_rate": 3.760770370370371e-05, + "loss": 1.9212, + "step": 20920 + }, + { + "epoch": 3.72, + "learning_rate": 3.760474074074074e-05, + "loss": 1.9331, + "step": 20925 + }, + { + "epoch": 3.72, + "learning_rate": 3.760177777777778e-05, + "loss": 1.9854, + "step": 20930 + }, + { + "epoch": 3.72, + "learning_rate": 3.759881481481482e-05, + "loss": 1.9306, + "step": 20935 + }, + { + "epoch": 3.72, + "learning_rate": 3.7595851851851856e-05, + "loss": 2.0732, + "step": 20940 + }, + { + "epoch": 3.72, + "learning_rate": 3.759288888888889e-05, + "loss": 1.9525, + "step": 20945 + }, + { + "epoch": 3.72, + "learning_rate": 3.758992592592593e-05, + "loss": 1.9798, + "step": 20950 + }, + { + "epoch": 3.73, + "learning_rate": 3.7586962962962966e-05, + "loss": 2.0048, + "step": 20955 + }, + { + "epoch": 3.73, + "learning_rate": 3.7584000000000005e-05, + "loss": 2.086, + "step": 20960 + }, + { + "epoch": 3.73, + "learning_rate": 3.758103703703704e-05, + "loss": 2.0286, + "step": 20965 + }, + { + "epoch": 3.73, + "learning_rate": 3.7578074074074076e-05, + "loss": 1.9974, + "step": 20970 + }, + { + "epoch": 3.73, + "learning_rate": 3.757511111111111e-05, + "loss": 2.0797, + "step": 20975 + }, + { + "epoch": 3.73, + "learning_rate": 3.757214814814815e-05, + "loss": 1.9855, + "step": 20980 + }, + { + "epoch": 3.73, + "learning_rate": 3.7569185185185185e-05, + "loss": 1.9892, + "step": 20985 + }, + { + "epoch": 3.73, + "learning_rate": 3.7566222222222224e-05, + "loss": 1.9774, + "step": 20990 + }, + { + "epoch": 3.73, + "learning_rate": 3.7563259259259256e-05, + "loss": 2.0364, + "step": 20995 + }, + { + "epoch": 3.73, + "learning_rate": 3.75602962962963e-05, + "loss": 1.9657, + "step": 21000 + }, + { + "epoch": 3.73, + "learning_rate": 3.7557333333333334e-05, + "loss": 1.8665, + "step": 21005 + }, + { + "epoch": 3.74, + "learning_rate": 3.755437037037037e-05, + "loss": 1.879, + "step": 21010 + }, + { + "epoch": 3.74, + "learning_rate": 3.7551407407407405e-05, + "loss": 1.9235, + "step": 21015 + }, + { + "epoch": 3.74, + "learning_rate": 3.754844444444445e-05, + "loss": 2.0489, + "step": 21020 + }, + { + "epoch": 3.74, + "learning_rate": 3.754548148148148e-05, + "loss": 2.0077, + "step": 21025 + }, + { + "epoch": 3.74, + "learning_rate": 3.754251851851852e-05, + "loss": 1.8935, + "step": 21030 + }, + { + "epoch": 3.74, + "learning_rate": 3.753955555555555e-05, + "loss": 1.9394, + "step": 21035 + }, + { + "epoch": 3.74, + "learning_rate": 3.75365925925926e-05, + "loss": 1.8245, + "step": 21040 + }, + { + "epoch": 3.74, + "learning_rate": 3.753362962962963e-05, + "loss": 1.9497, + "step": 21045 + }, + { + "epoch": 3.74, + "learning_rate": 3.753066666666667e-05, + "loss": 1.8306, + "step": 21050 + }, + { + "epoch": 3.74, + "learning_rate": 3.75277037037037e-05, + "loss": 1.9738, + "step": 21055 + }, + { + "epoch": 3.74, + "learning_rate": 3.752474074074074e-05, + "loss": 2.0362, + "step": 21060 + }, + { + "epoch": 3.74, + "learning_rate": 3.752177777777778e-05, + "loss": 1.9915, + "step": 21065 + }, + { + "epoch": 3.75, + "learning_rate": 3.751881481481482e-05, + "loss": 2.0905, + "step": 21070 + }, + { + "epoch": 3.75, + "learning_rate": 3.751585185185185e-05, + "loss": 1.9069, + "step": 21075 + }, + { + "epoch": 3.75, + "learning_rate": 3.751288888888889e-05, + "loss": 1.8922, + "step": 21080 + }, + { + "epoch": 3.75, + "learning_rate": 3.750992592592593e-05, + "loss": 1.764, + "step": 21085 + }, + { + "epoch": 3.75, + "learning_rate": 3.750696296296297e-05, + "loss": 1.8953, + "step": 21090 + }, + { + "epoch": 3.75, + "learning_rate": 3.7504e-05, + "loss": 2.0851, + "step": 21095 + }, + { + "epoch": 3.75, + "learning_rate": 3.750103703703704e-05, + "loss": 2.0103, + "step": 21100 + }, + { + "epoch": 3.75, + "learning_rate": 3.7498074074074076e-05, + "loss": 1.9683, + "step": 21105 + }, + { + "epoch": 3.75, + "learning_rate": 3.7495111111111115e-05, + "loss": 1.9679, + "step": 21110 + }, + { + "epoch": 3.75, + "learning_rate": 3.749214814814815e-05, + "loss": 1.9317, + "step": 21115 + }, + { + "epoch": 3.75, + "learning_rate": 3.7489185185185186e-05, + "loss": 1.933, + "step": 21120 + }, + { + "epoch": 3.76, + "learning_rate": 3.7486222222222225e-05, + "loss": 2.0687, + "step": 21125 + }, + { + "epoch": 3.76, + "learning_rate": 3.7483259259259264e-05, + "loss": 2.0281, + "step": 21130 + }, + { + "epoch": 3.76, + "learning_rate": 3.7480296296296296e-05, + "loss": 2.0827, + "step": 21135 + }, + { + "epoch": 3.76, + "learning_rate": 3.7477333333333335e-05, + "loss": 1.8444, + "step": 21140 + }, + { + "epoch": 3.76, + "learning_rate": 3.747437037037037e-05, + "loss": 2.0821, + "step": 21145 + }, + { + "epoch": 3.76, + "learning_rate": 3.747140740740741e-05, + "loss": 1.8199, + "step": 21150 + }, + { + "epoch": 3.76, + "learning_rate": 3.7468444444444444e-05, + "loss": 1.8965, + "step": 21155 + }, + { + "epoch": 3.76, + "learning_rate": 3.746548148148148e-05, + "loss": 1.9435, + "step": 21160 + }, + { + "epoch": 3.76, + "learning_rate": 3.746251851851852e-05, + "loss": 2.0255, + "step": 21165 + }, + { + "epoch": 3.76, + "learning_rate": 3.745955555555556e-05, + "loss": 1.9893, + "step": 21170 + }, + { + "epoch": 3.76, + "learning_rate": 3.745659259259259e-05, + "loss": 1.847, + "step": 21175 + }, + { + "epoch": 3.77, + "learning_rate": 3.745362962962963e-05, + "loss": 2.0298, + "step": 21180 + }, + { + "epoch": 3.77, + "learning_rate": 3.745066666666667e-05, + "loss": 1.891, + "step": 21185 + }, + { + "epoch": 3.77, + "learning_rate": 3.744770370370371e-05, + "loss": 1.8723, + "step": 21190 + }, + { + "epoch": 3.77, + "learning_rate": 3.744474074074074e-05, + "loss": 1.9551, + "step": 21195 + }, + { + "epoch": 3.77, + "learning_rate": 3.744177777777778e-05, + "loss": 1.9063, + "step": 21200 + }, + { + "epoch": 3.77, + "learning_rate": 3.743881481481481e-05, + "loss": 2.0195, + "step": 21205 + }, + { + "epoch": 3.77, + "learning_rate": 3.743585185185186e-05, + "loss": 1.9634, + "step": 21210 + }, + { + "epoch": 3.77, + "learning_rate": 3.743288888888889e-05, + "loss": 2.0447, + "step": 21215 + }, + { + "epoch": 3.77, + "learning_rate": 3.742992592592593e-05, + "loss": 2.0322, + "step": 21220 + }, + { + "epoch": 3.77, + "learning_rate": 3.742696296296296e-05, + "loss": 2.0058, + "step": 21225 + }, + { + "epoch": 3.77, + "learning_rate": 3.7424000000000006e-05, + "loss": 2.0062, + "step": 21230 + }, + { + "epoch": 3.78, + "learning_rate": 3.742103703703704e-05, + "loss": 2.0324, + "step": 21235 + }, + { + "epoch": 3.78, + "learning_rate": 3.741807407407408e-05, + "loss": 1.9296, + "step": 21240 + }, + { + "epoch": 3.78, + "learning_rate": 3.741511111111111e-05, + "loss": 1.9292, + "step": 21245 + }, + { + "epoch": 3.78, + "learning_rate": 3.7412148148148155e-05, + "loss": 1.9179, + "step": 21250 + }, + { + "epoch": 3.78, + "learning_rate": 3.740918518518519e-05, + "loss": 2.0071, + "step": 21255 + }, + { + "epoch": 3.78, + "learning_rate": 3.7406222222222226e-05, + "loss": 1.9377, + "step": 21260 + }, + { + "epoch": 3.78, + "learning_rate": 3.740325925925926e-05, + "loss": 2.0115, + "step": 21265 + }, + { + "epoch": 3.78, + "learning_rate": 3.74002962962963e-05, + "loss": 2.0358, + "step": 21270 + }, + { + "epoch": 3.78, + "learning_rate": 3.7397333333333335e-05, + "loss": 1.9841, + "step": 21275 + }, + { + "epoch": 3.78, + "learning_rate": 3.7394370370370374e-05, + "loss": 2.0212, + "step": 21280 + }, + { + "epoch": 3.78, + "learning_rate": 3.7391407407407406e-05, + "loss": 1.9864, + "step": 21285 + }, + { + "epoch": 3.78, + "learning_rate": 3.7388444444444445e-05, + "loss": 1.9788, + "step": 21290 + }, + { + "epoch": 3.79, + "learning_rate": 3.7385481481481484e-05, + "loss": 1.9798, + "step": 21295 + }, + { + "epoch": 3.79, + "learning_rate": 3.738251851851852e-05, + "loss": 1.9088, + "step": 21300 + }, + { + "epoch": 3.79, + "learning_rate": 3.7379555555555555e-05, + "loss": 1.8635, + "step": 21305 + }, + { + "epoch": 3.79, + "learning_rate": 3.7376592592592594e-05, + "loss": 1.925, + "step": 21310 + }, + { + "epoch": 3.79, + "learning_rate": 3.737362962962963e-05, + "loss": 2.0639, + "step": 21315 + }, + { + "epoch": 3.79, + "learning_rate": 3.737066666666667e-05, + "loss": 1.9953, + "step": 21320 + }, + { + "epoch": 3.79, + "learning_rate": 3.73677037037037e-05, + "loss": 1.9843, + "step": 21325 + }, + { + "epoch": 3.79, + "learning_rate": 3.736474074074074e-05, + "loss": 1.9552, + "step": 21330 + }, + { + "epoch": 3.79, + "learning_rate": 3.736177777777778e-05, + "loss": 1.9286, + "step": 21335 + }, + { + "epoch": 3.79, + "learning_rate": 3.735881481481482e-05, + "loss": 1.8823, + "step": 21340 + }, + { + "epoch": 3.79, + "learning_rate": 3.735585185185185e-05, + "loss": 2.0201, + "step": 21345 + }, + { + "epoch": 3.8, + "learning_rate": 3.735288888888889e-05, + "loss": 1.9328, + "step": 21350 + }, + { + "epoch": 3.8, + "learning_rate": 3.734992592592593e-05, + "loss": 1.9121, + "step": 21355 + }, + { + "epoch": 3.8, + "learning_rate": 3.734696296296297e-05, + "loss": 2.0631, + "step": 21360 + }, + { + "epoch": 3.8, + "learning_rate": 3.7344e-05, + "loss": 1.8997, + "step": 21365 + }, + { + "epoch": 3.8, + "learning_rate": 3.734103703703704e-05, + "loss": 2.0884, + "step": 21370 + }, + { + "epoch": 3.8, + "learning_rate": 3.733807407407408e-05, + "loss": 1.8873, + "step": 21375 + }, + { + "epoch": 3.8, + "learning_rate": 3.733511111111112e-05, + "loss": 1.9064, + "step": 21380 + }, + { + "epoch": 3.8, + "learning_rate": 3.733214814814815e-05, + "loss": 1.9223, + "step": 21385 + }, + { + "epoch": 3.8, + "learning_rate": 3.732918518518519e-05, + "loss": 2.0491, + "step": 21390 + }, + { + "epoch": 3.8, + "learning_rate": 3.7326222222222226e-05, + "loss": 2.0175, + "step": 21395 + }, + { + "epoch": 3.8, + "learning_rate": 3.7323259259259265e-05, + "loss": 1.9188, + "step": 21400 + }, + { + "epoch": 3.81, + "learning_rate": 3.73202962962963e-05, + "loss": 1.9284, + "step": 21405 + }, + { + "epoch": 3.81, + "learning_rate": 3.7317333333333336e-05, + "loss": 1.7904, + "step": 21410 + }, + { + "epoch": 3.81, + "learning_rate": 3.7314370370370375e-05, + "loss": 1.9965, + "step": 21415 + }, + { + "epoch": 3.81, + "learning_rate": 3.7311407407407414e-05, + "loss": 1.9632, + "step": 21420 + }, + { + "epoch": 3.81, + "learning_rate": 3.7308444444444446e-05, + "loss": 1.9321, + "step": 21425 + }, + { + "epoch": 3.81, + "learning_rate": 3.7305481481481485e-05, + "loss": 1.9903, + "step": 21430 + }, + { + "epoch": 3.81, + "learning_rate": 3.730251851851852e-05, + "loss": 2.0091, + "step": 21435 + }, + { + "epoch": 3.81, + "learning_rate": 3.729955555555556e-05, + "loss": 2.0615, + "step": 21440 + }, + { + "epoch": 3.81, + "learning_rate": 3.7296592592592594e-05, + "loss": 1.7409, + "step": 21445 + }, + { + "epoch": 3.81, + "learning_rate": 3.729362962962963e-05, + "loss": 1.9476, + "step": 21450 + }, + { + "epoch": 3.81, + "learning_rate": 3.7290666666666665e-05, + "loss": 1.7534, + "step": 21455 + }, + { + "epoch": 3.82, + "learning_rate": 3.728770370370371e-05, + "loss": 2.0288, + "step": 21460 + }, + { + "epoch": 3.82, + "learning_rate": 3.728474074074074e-05, + "loss": 1.8851, + "step": 21465 + }, + { + "epoch": 3.82, + "learning_rate": 3.728177777777778e-05, + "loss": 2.092, + "step": 21470 + }, + { + "epoch": 3.82, + "learning_rate": 3.7278814814814814e-05, + "loss": 2.0597, + "step": 21475 + }, + { + "epoch": 3.82, + "learning_rate": 3.727585185185186e-05, + "loss": 1.9215, + "step": 21480 + }, + { + "epoch": 3.82, + "learning_rate": 3.727288888888889e-05, + "loss": 1.8503, + "step": 21485 + }, + { + "epoch": 3.82, + "learning_rate": 3.726992592592593e-05, + "loss": 1.9161, + "step": 21490 + }, + { + "epoch": 3.82, + "learning_rate": 3.726696296296296e-05, + "loss": 2.0338, + "step": 21495 + }, + { + "epoch": 3.82, + "learning_rate": 3.726400000000001e-05, + "loss": 1.9394, + "step": 21500 + }, + { + "epoch": 3.82, + "learning_rate": 3.726103703703704e-05, + "loss": 1.997, + "step": 21505 + }, + { + "epoch": 3.82, + "learning_rate": 3.725807407407408e-05, + "loss": 1.9885, + "step": 21510 + }, + { + "epoch": 3.82, + "learning_rate": 3.725511111111111e-05, + "loss": 1.893, + "step": 21515 + }, + { + "epoch": 3.83, + "learning_rate": 3.725214814814815e-05, + "loss": 1.9817, + "step": 21520 + }, + { + "epoch": 3.83, + "learning_rate": 3.724918518518519e-05, + "loss": 1.9804, + "step": 21525 + }, + { + "epoch": 3.83, + "learning_rate": 3.724622222222222e-05, + "loss": 1.962, + "step": 21530 + }, + { + "epoch": 3.83, + "learning_rate": 3.724325925925926e-05, + "loss": 1.9211, + "step": 21535 + }, + { + "epoch": 3.83, + "learning_rate": 3.72402962962963e-05, + "loss": 2.0399, + "step": 21540 + }, + { + "epoch": 3.83, + "learning_rate": 3.723733333333334e-05, + "loss": 2.0076, + "step": 21545 + }, + { + "epoch": 3.83, + "learning_rate": 3.723437037037037e-05, + "loss": 1.9601, + "step": 21550 + }, + { + "epoch": 3.83, + "learning_rate": 3.723140740740741e-05, + "loss": 1.9727, + "step": 21555 + }, + { + "epoch": 3.83, + "learning_rate": 3.7228444444444447e-05, + "loss": 1.9021, + "step": 21560 + }, + { + "epoch": 3.83, + "learning_rate": 3.7225481481481485e-05, + "loss": 1.9097, + "step": 21565 + }, + { + "epoch": 3.83, + "learning_rate": 3.722251851851852e-05, + "loss": 2.0889, + "step": 21570 + }, + { + "epoch": 3.84, + "learning_rate": 3.7219555555555556e-05, + "loss": 2.0256, + "step": 21575 + }, + { + "epoch": 3.84, + "learning_rate": 3.7216592592592595e-05, + "loss": 1.9004, + "step": 21580 + }, + { + "epoch": 3.84, + "learning_rate": 3.7213629629629634e-05, + "loss": 1.9035, + "step": 21585 + }, + { + "epoch": 3.84, + "learning_rate": 3.7210666666666666e-05, + "loss": 2.0012, + "step": 21590 + }, + { + "epoch": 3.84, + "learning_rate": 3.7207703703703705e-05, + "loss": 1.8954, + "step": 21595 + }, + { + "epoch": 3.84, + "learning_rate": 3.720474074074074e-05, + "loss": 1.8018, + "step": 21600 + }, + { + "epoch": 3.84, + "learning_rate": 3.720177777777778e-05, + "loss": 1.8887, + "step": 21605 + }, + { + "epoch": 3.84, + "learning_rate": 3.7198814814814814e-05, + "loss": 1.8725, + "step": 21610 + }, + { + "epoch": 3.84, + "learning_rate": 3.719585185185185e-05, + "loss": 2.031, + "step": 21615 + }, + { + "epoch": 3.84, + "learning_rate": 3.7192888888888885e-05, + "loss": 1.9748, + "step": 21620 + }, + { + "epoch": 3.84, + "learning_rate": 3.718992592592593e-05, + "loss": 1.9256, + "step": 21625 + }, + { + "epoch": 3.85, + "learning_rate": 3.718696296296296e-05, + "loss": 1.9378, + "step": 21630 + }, + { + "epoch": 3.85, + "learning_rate": 3.7184e-05, + "loss": 1.9179, + "step": 21635 + }, + { + "epoch": 3.85, + "learning_rate": 3.7181037037037034e-05, + "loss": 1.9444, + "step": 21640 + }, + { + "epoch": 3.85, + "learning_rate": 3.717807407407408e-05, + "loss": 1.9535, + "step": 21645 + }, + { + "epoch": 3.85, + "learning_rate": 3.717511111111111e-05, + "loss": 1.9446, + "step": 21650 + }, + { + "epoch": 3.85, + "learning_rate": 3.717214814814815e-05, + "loss": 1.7618, + "step": 21655 + }, + { + "epoch": 3.85, + "learning_rate": 3.716918518518518e-05, + "loss": 1.8998, + "step": 21660 + }, + { + "epoch": 3.85, + "learning_rate": 3.716622222222222e-05, + "loss": 2.0236, + "step": 21665 + }, + { + "epoch": 3.85, + "learning_rate": 3.716325925925926e-05, + "loss": 1.8581, + "step": 21670 + }, + { + "epoch": 3.85, + "learning_rate": 3.71602962962963e-05, + "loss": 1.9523, + "step": 21675 + }, + { + "epoch": 3.85, + "learning_rate": 3.715733333333333e-05, + "loss": 1.9489, + "step": 21680 + }, + { + "epoch": 3.86, + "learning_rate": 3.715437037037037e-05, + "loss": 2.0347, + "step": 21685 + }, + { + "epoch": 3.86, + "learning_rate": 3.715140740740741e-05, + "loss": 2.0028, + "step": 21690 + }, + { + "epoch": 3.86, + "learning_rate": 3.714844444444445e-05, + "loss": 1.8828, + "step": 21695 + }, + { + "epoch": 3.86, + "learning_rate": 3.714548148148148e-05, + "loss": 1.8815, + "step": 21700 + }, + { + "epoch": 3.86, + "learning_rate": 3.714251851851852e-05, + "loss": 1.9018, + "step": 21705 + }, + { + "epoch": 3.86, + "learning_rate": 3.713955555555556e-05, + "loss": 2.0293, + "step": 21710 + }, + { + "epoch": 3.86, + "learning_rate": 3.7136592592592596e-05, + "loss": 2.035, + "step": 21715 + }, + { + "epoch": 3.86, + "learning_rate": 3.713362962962963e-05, + "loss": 1.9395, + "step": 21720 + }, + { + "epoch": 3.86, + "learning_rate": 3.713066666666667e-05, + "loss": 2.0302, + "step": 21725 + }, + { + "epoch": 3.86, + "learning_rate": 3.7127703703703706e-05, + "loss": 1.9714, + "step": 21730 + }, + { + "epoch": 3.86, + "learning_rate": 3.7124740740740744e-05, + "loss": 1.8971, + "step": 21735 + }, + { + "epoch": 3.86, + "learning_rate": 3.7121777777777776e-05, + "loss": 2.0585, + "step": 21740 + }, + { + "epoch": 3.87, + "learning_rate": 3.7118814814814815e-05, + "loss": 1.925, + "step": 21745 + }, + { + "epoch": 3.87, + "learning_rate": 3.7115851851851854e-05, + "loss": 1.9618, + "step": 21750 + }, + { + "epoch": 3.87, + "learning_rate": 3.711288888888889e-05, + "loss": 1.966, + "step": 21755 + }, + { + "epoch": 3.87, + "learning_rate": 3.7109925925925925e-05, + "loss": 2.0209, + "step": 21760 + }, + { + "epoch": 3.87, + "learning_rate": 3.7106962962962964e-05, + "loss": 1.8004, + "step": 21765 + }, + { + "epoch": 3.87, + "learning_rate": 3.7104e-05, + "loss": 2.0392, + "step": 21770 + }, + { + "epoch": 3.87, + "learning_rate": 3.710103703703704e-05, + "loss": 1.9251, + "step": 21775 + }, + { + "epoch": 3.87, + "learning_rate": 3.7098074074074073e-05, + "loss": 1.9758, + "step": 21780 + }, + { + "epoch": 3.87, + "learning_rate": 3.709511111111111e-05, + "loss": 1.8781, + "step": 21785 + }, + { + "epoch": 3.87, + "learning_rate": 3.709214814814815e-05, + "loss": 1.95, + "step": 21790 + }, + { + "epoch": 3.87, + "learning_rate": 3.708918518518519e-05, + "loss": 1.9053, + "step": 21795 + }, + { + "epoch": 3.88, + "learning_rate": 3.708622222222222e-05, + "loss": 1.8633, + "step": 21800 + }, + { + "epoch": 3.88, + "learning_rate": 3.708325925925926e-05, + "loss": 1.8152, + "step": 21805 + }, + { + "epoch": 3.88, + "learning_rate": 3.70802962962963e-05, + "loss": 1.9397, + "step": 21810 + }, + { + "epoch": 3.88, + "learning_rate": 3.707733333333334e-05, + "loss": 1.9326, + "step": 21815 + }, + { + "epoch": 3.88, + "learning_rate": 3.707437037037037e-05, + "loss": 2.0459, + "step": 21820 + }, + { + "epoch": 3.88, + "learning_rate": 3.707140740740741e-05, + "loss": 2.101, + "step": 21825 + }, + { + "epoch": 3.88, + "learning_rate": 3.706844444444444e-05, + "loss": 1.8689, + "step": 21830 + }, + { + "epoch": 3.88, + "learning_rate": 3.706548148148149e-05, + "loss": 1.9857, + "step": 21835 + }, + { + "epoch": 3.88, + "learning_rate": 3.706251851851852e-05, + "loss": 1.8677, + "step": 21840 + }, + { + "epoch": 3.88, + "learning_rate": 3.705955555555556e-05, + "loss": 1.8501, + "step": 21845 + }, + { + "epoch": 3.88, + "learning_rate": 3.705659259259259e-05, + "loss": 1.9851, + "step": 21850 + }, + { + "epoch": 3.89, + "learning_rate": 3.7053629629629635e-05, + "loss": 2.0446, + "step": 21855 + }, + { + "epoch": 3.89, + "learning_rate": 3.705066666666667e-05, + "loss": 2.0297, + "step": 21860 + }, + { + "epoch": 3.89, + "learning_rate": 3.7047703703703706e-05, + "loss": 1.9441, + "step": 21865 + }, + { + "epoch": 3.89, + "learning_rate": 3.704474074074074e-05, + "loss": 1.951, + "step": 21870 + }, + { + "epoch": 3.89, + "learning_rate": 3.7041777777777784e-05, + "loss": 1.9539, + "step": 21875 + }, + { + "epoch": 3.89, + "learning_rate": 3.7038814814814816e-05, + "loss": 1.8907, + "step": 21880 + }, + { + "epoch": 3.89, + "learning_rate": 3.7035851851851855e-05, + "loss": 2.052, + "step": 21885 + }, + { + "epoch": 3.89, + "learning_rate": 3.703288888888889e-05, + "loss": 1.9617, + "step": 21890 + }, + { + "epoch": 3.89, + "learning_rate": 3.7029925925925926e-05, + "loss": 1.9081, + "step": 21895 + }, + { + "epoch": 3.89, + "learning_rate": 3.7026962962962964e-05, + "loss": 1.9695, + "step": 21900 + }, + { + "epoch": 3.89, + "learning_rate": 3.7024e-05, + "loss": 1.8691, + "step": 21905 + }, + { + "epoch": 3.9, + "learning_rate": 3.7021037037037035e-05, + "loss": 1.9065, + "step": 21910 + }, + { + "epoch": 3.9, + "learning_rate": 3.7018074074074074e-05, + "loss": 1.9233, + "step": 21915 + }, + { + "epoch": 3.9, + "learning_rate": 3.701511111111111e-05, + "loss": 1.912, + "step": 21920 + }, + { + "epoch": 3.9, + "learning_rate": 3.701214814814815e-05, + "loss": 1.9988, + "step": 21925 + }, + { + "epoch": 3.9, + "learning_rate": 3.7009185185185184e-05, + "loss": 1.9526, + "step": 21930 + }, + { + "epoch": 3.9, + "learning_rate": 3.700622222222222e-05, + "loss": 2.0439, + "step": 21935 + }, + { + "epoch": 3.9, + "learning_rate": 3.700325925925926e-05, + "loss": 2.021, + "step": 21940 + }, + { + "epoch": 3.9, + "learning_rate": 3.70002962962963e-05, + "loss": 2.0019, + "step": 21945 + }, + { + "epoch": 3.9, + "learning_rate": 3.699733333333333e-05, + "loss": 1.9089, + "step": 21950 + }, + { + "epoch": 3.9, + "learning_rate": 3.699437037037037e-05, + "loss": 1.9837, + "step": 21955 + }, + { + "epoch": 3.9, + "learning_rate": 3.699140740740741e-05, + "loss": 1.9148, + "step": 21960 + }, + { + "epoch": 3.9, + "learning_rate": 3.698844444444445e-05, + "loss": 1.967, + "step": 21965 + }, + { + "epoch": 3.91, + "learning_rate": 3.698548148148148e-05, + "loss": 1.9087, + "step": 21970 + }, + { + "epoch": 3.91, + "learning_rate": 3.698251851851852e-05, + "loss": 1.9309, + "step": 21975 + }, + { + "epoch": 3.91, + "learning_rate": 3.697955555555556e-05, + "loss": 1.9613, + "step": 21980 + }, + { + "epoch": 3.91, + "learning_rate": 3.69765925925926e-05, + "loss": 1.881, + "step": 21985 + }, + { + "epoch": 3.91, + "learning_rate": 3.697362962962963e-05, + "loss": 1.9466, + "step": 21990 + }, + { + "epoch": 3.91, + "learning_rate": 3.697066666666667e-05, + "loss": 1.9503, + "step": 21995 + }, + { + "epoch": 3.91, + "learning_rate": 3.696770370370371e-05, + "loss": 2.1085, + "step": 22000 + }, + { + "epoch": 3.91, + "learning_rate": 3.6964740740740746e-05, + "loss": 1.9026, + "step": 22005 + }, + { + "epoch": 3.91, + "learning_rate": 3.696177777777778e-05, + "loss": 2.0555, + "step": 22010 + }, + { + "epoch": 3.91, + "learning_rate": 3.695881481481482e-05, + "loss": 1.8716, + "step": 22015 + }, + { + "epoch": 3.91, + "learning_rate": 3.6955851851851856e-05, + "loss": 1.9142, + "step": 22020 + }, + { + "epoch": 3.92, + "learning_rate": 3.6952888888888894e-05, + "loss": 1.9085, + "step": 22025 + }, + { + "epoch": 3.92, + "learning_rate": 3.6949925925925926e-05, + "loss": 2.0172, + "step": 22030 + }, + { + "epoch": 3.92, + "learning_rate": 3.6946962962962965e-05, + "loss": 2.0271, + "step": 22035 + }, + { + "epoch": 3.92, + "learning_rate": 3.6944000000000004e-05, + "loss": 1.9035, + "step": 22040 + }, + { + "epoch": 3.92, + "learning_rate": 3.694103703703704e-05, + "loss": 1.8703, + "step": 22045 + }, + { + "epoch": 3.92, + "learning_rate": 3.6938074074074075e-05, + "loss": 2.0103, + "step": 22050 + }, + { + "epoch": 3.92, + "learning_rate": 3.6935111111111114e-05, + "loss": 1.9128, + "step": 22055 + }, + { + "epoch": 3.92, + "learning_rate": 3.6932148148148146e-05, + "loss": 1.9627, + "step": 22060 + }, + { + "epoch": 3.92, + "learning_rate": 3.692918518518519e-05, + "loss": 1.973, + "step": 22065 + }, + { + "epoch": 3.92, + "learning_rate": 3.6926222222222223e-05, + "loss": 2.063, + "step": 22070 + }, + { + "epoch": 3.92, + "learning_rate": 3.692325925925926e-05, + "loss": 2.0007, + "step": 22075 + }, + { + "epoch": 3.93, + "learning_rate": 3.6920296296296294e-05, + "loss": 2.0242, + "step": 22080 + }, + { + "epoch": 3.93, + "learning_rate": 3.691733333333334e-05, + "loss": 2.0554, + "step": 22085 + }, + { + "epoch": 3.93, + "learning_rate": 3.691437037037037e-05, + "loss": 2.0196, + "step": 22090 + }, + { + "epoch": 3.93, + "learning_rate": 3.691140740740741e-05, + "loss": 2.1185, + "step": 22095 + }, + { + "epoch": 3.93, + "learning_rate": 3.690844444444444e-05, + "loss": 1.8731, + "step": 22100 + }, + { + "epoch": 3.93, + "learning_rate": 3.690548148148149e-05, + "loss": 1.9425, + "step": 22105 + }, + { + "epoch": 3.93, + "learning_rate": 3.690251851851852e-05, + "loss": 1.8755, + "step": 22110 + }, + { + "epoch": 3.93, + "learning_rate": 3.689955555555556e-05, + "loss": 2.0374, + "step": 22115 + }, + { + "epoch": 3.93, + "learning_rate": 3.689659259259259e-05, + "loss": 1.9534, + "step": 22120 + }, + { + "epoch": 3.93, + "learning_rate": 3.689362962962963e-05, + "loss": 2.0297, + "step": 22125 + }, + { + "epoch": 3.93, + "learning_rate": 3.689066666666667e-05, + "loss": 2.0105, + "step": 22130 + }, + { + "epoch": 3.94, + "learning_rate": 3.688770370370371e-05, + "loss": 1.8696, + "step": 22135 + }, + { + "epoch": 3.94, + "learning_rate": 3.688474074074074e-05, + "loss": 1.8431, + "step": 22140 + }, + { + "epoch": 3.94, + "learning_rate": 3.688177777777778e-05, + "loss": 1.9253, + "step": 22145 + }, + { + "epoch": 3.94, + "learning_rate": 3.687881481481482e-05, + "loss": 1.8729, + "step": 22150 + }, + { + "epoch": 3.94, + "learning_rate": 3.6875851851851856e-05, + "loss": 1.9633, + "step": 22155 + }, + { + "epoch": 3.94, + "learning_rate": 3.687288888888889e-05, + "loss": 1.8941, + "step": 22160 + }, + { + "epoch": 3.94, + "learning_rate": 3.686992592592593e-05, + "loss": 1.8762, + "step": 22165 + }, + { + "epoch": 3.94, + "learning_rate": 3.6866962962962966e-05, + "loss": 1.999, + "step": 22170 + }, + { + "epoch": 3.94, + "learning_rate": 3.6864000000000005e-05, + "loss": 1.879, + "step": 22175 + }, + { + "epoch": 3.94, + "learning_rate": 3.686103703703704e-05, + "loss": 1.9317, + "step": 22180 + }, + { + "epoch": 3.94, + "learning_rate": 3.6858074074074076e-05, + "loss": 1.8732, + "step": 22185 + }, + { + "epoch": 3.94, + "learning_rate": 3.6855111111111115e-05, + "loss": 1.9577, + "step": 22190 + }, + { + "epoch": 3.95, + "learning_rate": 3.685214814814815e-05, + "loss": 1.9193, + "step": 22195 + }, + { + "epoch": 3.95, + "learning_rate": 3.6849185185185185e-05, + "loss": 2.0029, + "step": 22200 + }, + { + "epoch": 3.95, + "learning_rate": 3.6846222222222224e-05, + "loss": 1.9735, + "step": 22205 + }, + { + "epoch": 3.95, + "learning_rate": 3.684325925925926e-05, + "loss": 1.999, + "step": 22210 + }, + { + "epoch": 3.95, + "learning_rate": 3.68402962962963e-05, + "loss": 2.0577, + "step": 22215 + }, + { + "epoch": 3.95, + "learning_rate": 3.6837333333333334e-05, + "loss": 2.0269, + "step": 22220 + }, + { + "epoch": 3.95, + "learning_rate": 3.6834962962962964e-05, + "loss": 2.0091, + "step": 22225 + }, + { + "epoch": 3.95, + "learning_rate": 3.6832e-05, + "loss": 2.0179, + "step": 22230 + }, + { + "epoch": 3.95, + "learning_rate": 3.682903703703704e-05, + "loss": 1.9184, + "step": 22235 + }, + { + "epoch": 3.95, + "learning_rate": 3.682607407407407e-05, + "loss": 1.9849, + "step": 22240 + }, + { + "epoch": 3.95, + "learning_rate": 3.682311111111111e-05, + "loss": 2.0387, + "step": 22245 + }, + { + "epoch": 3.96, + "learning_rate": 3.682014814814815e-05, + "loss": 1.9514, + "step": 22250 + }, + { + "epoch": 3.96, + "learning_rate": 3.681718518518519e-05, + "loss": 1.9759, + "step": 22255 + }, + { + "epoch": 3.96, + "learning_rate": 3.681422222222222e-05, + "loss": 2.0762, + "step": 22260 + }, + { + "epoch": 3.96, + "learning_rate": 3.681125925925926e-05, + "loss": 1.9457, + "step": 22265 + }, + { + "epoch": 3.96, + "learning_rate": 3.68082962962963e-05, + "loss": 2.1658, + "step": 22270 + }, + { + "epoch": 3.96, + "learning_rate": 3.680533333333334e-05, + "loss": 2.0151, + "step": 22275 + }, + { + "epoch": 3.96, + "learning_rate": 3.680237037037037e-05, + "loss": 2.0719, + "step": 22280 + }, + { + "epoch": 3.96, + "learning_rate": 3.679940740740741e-05, + "loss": 1.9233, + "step": 22285 + }, + { + "epoch": 3.96, + "learning_rate": 3.679644444444444e-05, + "loss": 1.894, + "step": 22290 + }, + { + "epoch": 3.96, + "learning_rate": 3.679348148148149e-05, + "loss": 1.9605, + "step": 22295 + }, + { + "epoch": 3.96, + "learning_rate": 3.679051851851852e-05, + "loss": 2.0719, + "step": 22300 + }, + { + "epoch": 3.97, + "learning_rate": 3.678755555555556e-05, + "loss": 1.9266, + "step": 22305 + }, + { + "epoch": 3.97, + "learning_rate": 3.678459259259259e-05, + "loss": 1.9575, + "step": 22310 + }, + { + "epoch": 3.97, + "learning_rate": 3.6781629629629635e-05, + "loss": 1.912, + "step": 22315 + }, + { + "epoch": 3.97, + "learning_rate": 3.677866666666667e-05, + "loss": 1.8704, + "step": 22320 + }, + { + "epoch": 3.97, + "learning_rate": 3.6775703703703706e-05, + "loss": 1.914, + "step": 22325 + }, + { + "epoch": 3.97, + "learning_rate": 3.677274074074074e-05, + "loss": 1.8973, + "step": 22330 + }, + { + "epoch": 3.97, + "learning_rate": 3.6769777777777784e-05, + "loss": 2.0735, + "step": 22335 + }, + { + "epoch": 3.97, + "learning_rate": 3.6766814814814816e-05, + "loss": 1.9187, + "step": 22340 + }, + { + "epoch": 3.97, + "learning_rate": 3.6763851851851855e-05, + "loss": 2.0702, + "step": 22345 + }, + { + "epoch": 3.97, + "learning_rate": 3.676088888888889e-05, + "loss": 1.928, + "step": 22350 + }, + { + "epoch": 3.97, + "learning_rate": 3.675792592592593e-05, + "loss": 1.7717, + "step": 22355 + }, + { + "epoch": 3.98, + "learning_rate": 3.6754962962962964e-05, + "loss": 2.1978, + "step": 22360 + }, + { + "epoch": 3.98, + "learning_rate": 3.6752e-05, + "loss": 2.0037, + "step": 22365 + }, + { + "epoch": 3.98, + "learning_rate": 3.6749037037037035e-05, + "loss": 1.9325, + "step": 22370 + }, + { + "epoch": 3.98, + "learning_rate": 3.6746074074074074e-05, + "loss": 2.0281, + "step": 22375 + }, + { + "epoch": 3.98, + "learning_rate": 3.674311111111111e-05, + "loss": 2.0673, + "step": 22380 + }, + { + "epoch": 3.98, + "learning_rate": 3.674014814814815e-05, + "loss": 1.8333, + "step": 22385 + }, + { + "epoch": 3.98, + "learning_rate": 3.6737185185185184e-05, + "loss": 1.9408, + "step": 22390 + }, + { + "epoch": 3.98, + "learning_rate": 3.673422222222222e-05, + "loss": 1.9469, + "step": 22395 + }, + { + "epoch": 3.98, + "learning_rate": 3.673125925925926e-05, + "loss": 2.0528, + "step": 22400 + }, + { + "epoch": 3.98, + "learning_rate": 3.67282962962963e-05, + "loss": 1.9336, + "step": 22405 + }, + { + "epoch": 3.98, + "learning_rate": 3.672533333333333e-05, + "loss": 1.9752, + "step": 22410 + }, + { + "epoch": 3.98, + "learning_rate": 3.672237037037037e-05, + "loss": 2.0135, + "step": 22415 + }, + { + "epoch": 3.99, + "learning_rate": 3.671940740740741e-05, + "loss": 2.026, + "step": 22420 + }, + { + "epoch": 3.99, + "learning_rate": 3.671644444444445e-05, + "loss": 2.0687, + "step": 22425 + }, + { + "epoch": 3.99, + "learning_rate": 3.671348148148148e-05, + "loss": 2.016, + "step": 22430 + }, + { + "epoch": 3.99, + "learning_rate": 3.671051851851852e-05, + "loss": 2.0342, + "step": 22435 + }, + { + "epoch": 3.99, + "learning_rate": 3.670755555555556e-05, + "loss": 1.8784, + "step": 22440 + }, + { + "epoch": 3.99, + "learning_rate": 3.67045925925926e-05, + "loss": 1.8306, + "step": 22445 + }, + { + "epoch": 3.99, + "learning_rate": 3.670162962962963e-05, + "loss": 1.9235, + "step": 22450 + }, + { + "epoch": 3.99, + "learning_rate": 3.669866666666667e-05, + "loss": 2.011, + "step": 22455 + }, + { + "epoch": 3.99, + "learning_rate": 3.669570370370371e-05, + "loss": 1.9513, + "step": 22460 + }, + { + "epoch": 3.99, + "learning_rate": 3.6692740740740746e-05, + "loss": 2.0292, + "step": 22465 + }, + { + "epoch": 3.99, + "learning_rate": 3.668977777777778e-05, + "loss": 1.7799, + "step": 22470 + }, + { + "epoch": 4.0, + "learning_rate": 3.668681481481482e-05, + "loss": 1.912, + "step": 22475 + }, + { + "epoch": 4.0, + "learning_rate": 3.6683851851851855e-05, + "loss": 1.9549, + "step": 22480 + }, + { + "epoch": 4.0, + "learning_rate": 3.6680888888888894e-05, + "loss": 2.0827, + "step": 22485 + }, + { + "epoch": 4.0, + "learning_rate": 3.6677925925925926e-05, + "loss": 1.9584, + "step": 22490 + }, + { + "epoch": 4.0, + "learning_rate": 3.6674962962962965e-05, + "loss": 1.946, + "step": 22495 + }, + { + "epoch": 4.0, + "learning_rate": 3.6672000000000004e-05, + "loss": 1.9209, + "step": 22500 + }, + { + "epoch": 4.0, + "learning_rate": 3.666903703703704e-05, + "loss": 1.7846, + "step": 22505 + }, + { + "epoch": 4.0, + "learning_rate": 3.6666074074074075e-05, + "loss": 1.9142, + "step": 22510 + }, + { + "epoch": 4.0, + "learning_rate": 3.6663111111111114e-05, + "loss": 1.6891, + "step": 22515 + }, + { + "epoch": 4.0, + "learning_rate": 3.6660148148148146e-05, + "loss": 1.7633, + "step": 22520 + }, + { + "epoch": 4.0, + "learning_rate": 3.665718518518519e-05, + "loss": 1.7224, + "step": 22525 + }, + { + "epoch": 4.01, + "learning_rate": 3.665422222222222e-05, + "loss": 1.8495, + "step": 22530 + }, + { + "epoch": 4.01, + "learning_rate": 3.665125925925926e-05, + "loss": 1.7849, + "step": 22535 + }, + { + "epoch": 4.01, + "learning_rate": 3.6648296296296294e-05, + "loss": 1.784, + "step": 22540 + }, + { + "epoch": 4.01, + "learning_rate": 3.664533333333334e-05, + "loss": 1.8489, + "step": 22545 + }, + { + "epoch": 4.01, + "learning_rate": 3.664237037037037e-05, + "loss": 1.8199, + "step": 22550 + }, + { + "epoch": 4.01, + "learning_rate": 3.663940740740741e-05, + "loss": 1.8022, + "step": 22555 + }, + { + "epoch": 4.01, + "learning_rate": 3.663644444444444e-05, + "loss": 1.8269, + "step": 22560 + }, + { + "epoch": 4.01, + "learning_rate": 3.663348148148149e-05, + "loss": 1.875, + "step": 22565 + }, + { + "epoch": 4.01, + "learning_rate": 3.663051851851852e-05, + "loss": 1.8675, + "step": 22570 + }, + { + "epoch": 4.01, + "learning_rate": 3.662755555555556e-05, + "loss": 1.7677, + "step": 22575 + }, + { + "epoch": 4.01, + "learning_rate": 3.662459259259259e-05, + "loss": 1.6193, + "step": 22580 + }, + { + "epoch": 4.02, + "learning_rate": 3.662162962962964e-05, + "loss": 1.7538, + "step": 22585 + }, + { + "epoch": 4.02, + "learning_rate": 3.661866666666667e-05, + "loss": 1.9258, + "step": 22590 + }, + { + "epoch": 4.02, + "learning_rate": 3.661570370370371e-05, + "loss": 1.8371, + "step": 22595 + }, + { + "epoch": 4.02, + "learning_rate": 3.661274074074074e-05, + "loss": 1.7307, + "step": 22600 + }, + { + "epoch": 4.02, + "learning_rate": 3.660977777777778e-05, + "loss": 1.8381, + "step": 22605 + }, + { + "epoch": 4.02, + "learning_rate": 3.660681481481482e-05, + "loss": 1.777, + "step": 22610 + }, + { + "epoch": 4.02, + "learning_rate": 3.6603851851851856e-05, + "loss": 1.8284, + "step": 22615 + }, + { + "epoch": 4.02, + "learning_rate": 3.660088888888889e-05, + "loss": 1.7321, + "step": 22620 + }, + { + "epoch": 4.02, + "learning_rate": 3.659792592592593e-05, + "loss": 1.7577, + "step": 22625 + }, + { + "epoch": 4.02, + "learning_rate": 3.6594962962962966e-05, + "loss": 1.8464, + "step": 22630 + }, + { + "epoch": 4.02, + "learning_rate": 3.6592000000000005e-05, + "loss": 1.8259, + "step": 22635 + }, + { + "epoch": 4.02, + "learning_rate": 3.658903703703704e-05, + "loss": 1.7938, + "step": 22640 + }, + { + "epoch": 4.03, + "learning_rate": 3.6586074074074076e-05, + "loss": 1.8788, + "step": 22645 + }, + { + "epoch": 4.03, + "learning_rate": 3.6583111111111114e-05, + "loss": 1.8338, + "step": 22650 + }, + { + "epoch": 4.03, + "learning_rate": 3.658014814814815e-05, + "loss": 1.8972, + "step": 22655 + }, + { + "epoch": 4.03, + "learning_rate": 3.6577185185185185e-05, + "loss": 1.6231, + "step": 22660 + }, + { + "epoch": 4.03, + "learning_rate": 3.6574222222222224e-05, + "loss": 1.9287, + "step": 22665 + }, + { + "epoch": 4.03, + "learning_rate": 3.657125925925926e-05, + "loss": 1.8256, + "step": 22670 + }, + { + "epoch": 4.03, + "learning_rate": 3.65682962962963e-05, + "loss": 1.7844, + "step": 22675 + }, + { + "epoch": 4.03, + "learning_rate": 3.6565333333333334e-05, + "loss": 1.8454, + "step": 22680 + }, + { + "epoch": 4.03, + "learning_rate": 3.656237037037037e-05, + "loss": 1.806, + "step": 22685 + }, + { + "epoch": 4.03, + "learning_rate": 3.655940740740741e-05, + "loss": 1.7871, + "step": 22690 + }, + { + "epoch": 4.03, + "learning_rate": 3.655644444444445e-05, + "loss": 1.7787, + "step": 22695 + }, + { + "epoch": 4.04, + "learning_rate": 3.655348148148148e-05, + "loss": 1.8291, + "step": 22700 + }, + { + "epoch": 4.04, + "learning_rate": 3.655051851851852e-05, + "loss": 1.8615, + "step": 22705 + }, + { + "epoch": 4.04, + "learning_rate": 3.654755555555556e-05, + "loss": 1.8746, + "step": 22710 + }, + { + "epoch": 4.04, + "learning_rate": 3.65445925925926e-05, + "loss": 1.664, + "step": 22715 + }, + { + "epoch": 4.04, + "learning_rate": 3.654162962962963e-05, + "loss": 1.87, + "step": 22720 + }, + { + "epoch": 4.04, + "learning_rate": 3.653866666666667e-05, + "loss": 1.9176, + "step": 22725 + }, + { + "epoch": 4.04, + "learning_rate": 3.653570370370371e-05, + "loss": 1.8991, + "step": 22730 + }, + { + "epoch": 4.04, + "learning_rate": 3.653274074074075e-05, + "loss": 1.8384, + "step": 22735 + }, + { + "epoch": 4.04, + "learning_rate": 3.652977777777778e-05, + "loss": 1.8157, + "step": 22740 + }, + { + "epoch": 4.04, + "learning_rate": 3.652681481481482e-05, + "loss": 1.8445, + "step": 22745 + }, + { + "epoch": 4.04, + "learning_rate": 3.652385185185185e-05, + "loss": 1.8351, + "step": 22750 + }, + { + "epoch": 4.05, + "learning_rate": 3.6520888888888896e-05, + "loss": 1.7864, + "step": 22755 + }, + { + "epoch": 4.05, + "learning_rate": 3.651792592592593e-05, + "loss": 1.7783, + "step": 22760 + }, + { + "epoch": 4.05, + "learning_rate": 3.651496296296297e-05, + "loss": 1.8957, + "step": 22765 + }, + { + "epoch": 4.05, + "learning_rate": 3.6512e-05, + "loss": 1.9294, + "step": 22770 + }, + { + "epoch": 4.05, + "learning_rate": 3.650903703703704e-05, + "loss": 1.8531, + "step": 22775 + }, + { + "epoch": 4.05, + "learning_rate": 3.6506074074074076e-05, + "loss": 1.9054, + "step": 22780 + }, + { + "epoch": 4.05, + "learning_rate": 3.650311111111111e-05, + "loss": 1.7755, + "step": 22785 + }, + { + "epoch": 4.05, + "learning_rate": 3.650014814814815e-05, + "loss": 1.8592, + "step": 22790 + }, + { + "epoch": 4.05, + "learning_rate": 3.6497185185185186e-05, + "loss": 1.8501, + "step": 22795 + }, + { + "epoch": 4.05, + "learning_rate": 3.6494222222222225e-05, + "loss": 1.7843, + "step": 22800 + }, + { + "epoch": 4.05, + "learning_rate": 3.649125925925926e-05, + "loss": 1.7665, + "step": 22805 + }, + { + "epoch": 4.06, + "learning_rate": 3.6488296296296296e-05, + "loss": 1.9824, + "step": 22810 + }, + { + "epoch": 4.06, + "learning_rate": 3.6485333333333335e-05, + "loss": 1.8467, + "step": 22815 + }, + { + "epoch": 4.06, + "learning_rate": 3.6482370370370373e-05, + "loss": 1.8848, + "step": 22820 + }, + { + "epoch": 4.06, + "learning_rate": 3.6479407407407405e-05, + "loss": 1.7562, + "step": 22825 + }, + { + "epoch": 4.06, + "learning_rate": 3.6476444444444444e-05, + "loss": 1.934, + "step": 22830 + }, + { + "epoch": 4.06, + "learning_rate": 3.647348148148148e-05, + "loss": 1.8489, + "step": 22835 + }, + { + "epoch": 4.06, + "learning_rate": 3.647051851851852e-05, + "loss": 1.8435, + "step": 22840 + }, + { + "epoch": 4.06, + "learning_rate": 3.6467555555555554e-05, + "loss": 1.6824, + "step": 22845 + }, + { + "epoch": 4.06, + "learning_rate": 3.646459259259259e-05, + "loss": 1.8905, + "step": 22850 + }, + { + "epoch": 4.06, + "learning_rate": 3.646162962962963e-05, + "loss": 1.812, + "step": 22855 + }, + { + "epoch": 4.06, + "learning_rate": 3.645866666666667e-05, + "loss": 1.8359, + "step": 22860 + }, + { + "epoch": 4.06, + "learning_rate": 3.64557037037037e-05, + "loss": 1.8117, + "step": 22865 + }, + { + "epoch": 4.07, + "learning_rate": 3.645274074074074e-05, + "loss": 1.7905, + "step": 22870 + }, + { + "epoch": 4.07, + "learning_rate": 3.644977777777778e-05, + "loss": 1.9334, + "step": 22875 + }, + { + "epoch": 4.07, + "learning_rate": 3.644681481481482e-05, + "loss": 1.7589, + "step": 22880 + }, + { + "epoch": 4.07, + "learning_rate": 3.644385185185185e-05, + "loss": 1.9271, + "step": 22885 + }, + { + "epoch": 4.07, + "learning_rate": 3.644088888888889e-05, + "loss": 1.7317, + "step": 22890 + }, + { + "epoch": 4.07, + "learning_rate": 3.643792592592593e-05, + "loss": 1.8193, + "step": 22895 + }, + { + "epoch": 4.07, + "learning_rate": 3.643496296296297e-05, + "loss": 1.8843, + "step": 22900 + }, + { + "epoch": 4.07, + "learning_rate": 3.6432e-05, + "loss": 1.8023, + "step": 22905 + }, + { + "epoch": 4.07, + "learning_rate": 3.642903703703704e-05, + "loss": 1.837, + "step": 22910 + }, + { + "epoch": 4.07, + "learning_rate": 3.642607407407407e-05, + "loss": 1.7608, + "step": 22915 + }, + { + "epoch": 4.07, + "learning_rate": 3.6423111111111116e-05, + "loss": 1.9327, + "step": 22920 + }, + { + "epoch": 4.08, + "learning_rate": 3.642014814814815e-05, + "loss": 1.8393, + "step": 22925 + }, + { + "epoch": 4.08, + "learning_rate": 3.641718518518519e-05, + "loss": 1.8862, + "step": 22930 + }, + { + "epoch": 4.08, + "learning_rate": 3.641422222222222e-05, + "loss": 1.7127, + "step": 22935 + }, + { + "epoch": 4.08, + "learning_rate": 3.6411259259259264e-05, + "loss": 1.9046, + "step": 22940 + }, + { + "epoch": 4.08, + "learning_rate": 3.6408296296296297e-05, + "loss": 1.8927, + "step": 22945 + }, + { + "epoch": 4.08, + "learning_rate": 3.6405333333333335e-05, + "loss": 1.9441, + "step": 22950 + }, + { + "epoch": 4.08, + "learning_rate": 3.640237037037037e-05, + "loss": 1.8953, + "step": 22955 + }, + { + "epoch": 4.08, + "learning_rate": 3.639940740740741e-05, + "loss": 1.7568, + "step": 22960 + }, + { + "epoch": 4.08, + "learning_rate": 3.6396444444444445e-05, + "loss": 1.7743, + "step": 22965 + }, + { + "epoch": 4.08, + "learning_rate": 3.6393481481481484e-05, + "loss": 1.8492, + "step": 22970 + }, + { + "epoch": 4.08, + "learning_rate": 3.6390518518518516e-05, + "loss": 1.8732, + "step": 22975 + }, + { + "epoch": 4.09, + "learning_rate": 3.6387555555555555e-05, + "loss": 1.8879, + "step": 22980 + }, + { + "epoch": 4.09, + "learning_rate": 3.6384592592592594e-05, + "loss": 1.7628, + "step": 22985 + }, + { + "epoch": 4.09, + "learning_rate": 3.638162962962963e-05, + "loss": 1.8158, + "step": 22990 + }, + { + "epoch": 4.09, + "learning_rate": 3.6378666666666664e-05, + "loss": 1.8153, + "step": 22995 + }, + { + "epoch": 4.09, + "learning_rate": 3.63757037037037e-05, + "loss": 1.8163, + "step": 23000 + }, + { + "epoch": 4.09, + "learning_rate": 3.637274074074074e-05, + "loss": 1.8222, + "step": 23005 + }, + { + "epoch": 4.09, + "learning_rate": 3.636977777777778e-05, + "loss": 1.9337, + "step": 23010 + }, + { + "epoch": 4.09, + "learning_rate": 3.636681481481481e-05, + "loss": 1.9534, + "step": 23015 + }, + { + "epoch": 4.09, + "learning_rate": 3.636385185185185e-05, + "loss": 1.8636, + "step": 23020 + }, + { + "epoch": 4.09, + "learning_rate": 3.636088888888889e-05, + "loss": 1.8016, + "step": 23025 + }, + { + "epoch": 4.09, + "learning_rate": 3.635792592592593e-05, + "loss": 1.8423, + "step": 23030 + }, + { + "epoch": 4.1, + "learning_rate": 3.635496296296296e-05, + "loss": 1.6686, + "step": 23035 + }, + { + "epoch": 4.1, + "learning_rate": 3.6352e-05, + "loss": 1.7344, + "step": 23040 + }, + { + "epoch": 4.1, + "learning_rate": 3.634903703703704e-05, + "loss": 1.9504, + "step": 23045 + }, + { + "epoch": 4.1, + "learning_rate": 3.634607407407408e-05, + "loss": 1.8448, + "step": 23050 + }, + { + "epoch": 4.1, + "learning_rate": 3.634311111111111e-05, + "loss": 1.8852, + "step": 23055 + }, + { + "epoch": 4.1, + "learning_rate": 3.634014814814815e-05, + "loss": 1.8572, + "step": 23060 + }, + { + "epoch": 4.1, + "learning_rate": 3.633718518518519e-05, + "loss": 2.0033, + "step": 23065 + }, + { + "epoch": 4.1, + "learning_rate": 3.6334222222222226e-05, + "loss": 1.8127, + "step": 23070 + }, + { + "epoch": 4.1, + "learning_rate": 3.633125925925926e-05, + "loss": 1.8652, + "step": 23075 + }, + { + "epoch": 4.1, + "learning_rate": 3.63282962962963e-05, + "loss": 1.926, + "step": 23080 + }, + { + "epoch": 4.1, + "learning_rate": 3.6325333333333336e-05, + "loss": 1.8205, + "step": 23085 + }, + { + "epoch": 4.1, + "learning_rate": 3.6322370370370375e-05, + "loss": 1.9406, + "step": 23090 + }, + { + "epoch": 4.11, + "learning_rate": 3.631940740740741e-05, + "loss": 1.7483, + "step": 23095 + }, + { + "epoch": 4.11, + "learning_rate": 3.6316444444444446e-05, + "loss": 1.7942, + "step": 23100 + }, + { + "epoch": 4.11, + "learning_rate": 3.6313481481481485e-05, + "loss": 1.9152, + "step": 23105 + }, + { + "epoch": 4.11, + "learning_rate": 3.6310518518518523e-05, + "loss": 1.7856, + "step": 23110 + }, + { + "epoch": 4.11, + "learning_rate": 3.6307555555555555e-05, + "loss": 1.8829, + "step": 23115 + }, + { + "epoch": 4.11, + "learning_rate": 3.6304592592592594e-05, + "loss": 1.9182, + "step": 23120 + }, + { + "epoch": 4.11, + "learning_rate": 3.630162962962963e-05, + "loss": 1.9467, + "step": 23125 + }, + { + "epoch": 4.11, + "learning_rate": 3.629866666666667e-05, + "loss": 1.8157, + "step": 23130 + }, + { + "epoch": 4.11, + "learning_rate": 3.6295703703703704e-05, + "loss": 1.9435, + "step": 23135 + }, + { + "epoch": 4.11, + "learning_rate": 3.629274074074074e-05, + "loss": 1.9298, + "step": 23140 + }, + { + "epoch": 4.11, + "learning_rate": 3.6289777777777775e-05, + "loss": 1.82, + "step": 23145 + }, + { + "epoch": 4.12, + "learning_rate": 3.628681481481482e-05, + "loss": 1.7396, + "step": 23150 + }, + { + "epoch": 4.12, + "learning_rate": 3.628385185185185e-05, + "loss": 1.8248, + "step": 23155 + }, + { + "epoch": 4.12, + "learning_rate": 3.628088888888889e-05, + "loss": 1.8267, + "step": 23160 + }, + { + "epoch": 4.12, + "learning_rate": 3.627792592592592e-05, + "loss": 1.8337, + "step": 23165 + }, + { + "epoch": 4.12, + "learning_rate": 3.627496296296297e-05, + "loss": 1.9368, + "step": 23170 + }, + { + "epoch": 4.12, + "learning_rate": 3.6272e-05, + "loss": 1.9274, + "step": 23175 + }, + { + "epoch": 4.12, + "learning_rate": 3.626903703703704e-05, + "loss": 1.8147, + "step": 23180 + }, + { + "epoch": 4.12, + "learning_rate": 3.626607407407407e-05, + "loss": 1.8806, + "step": 23185 + }, + { + "epoch": 4.12, + "learning_rate": 3.626311111111112e-05, + "loss": 1.8838, + "step": 23190 + }, + { + "epoch": 4.12, + "learning_rate": 3.626014814814815e-05, + "loss": 1.8962, + "step": 23195 + }, + { + "epoch": 4.12, + "learning_rate": 3.625718518518519e-05, + "loss": 1.9163, + "step": 23200 + }, + { + "epoch": 4.13, + "learning_rate": 3.625422222222222e-05, + "loss": 1.8, + "step": 23205 + }, + { + "epoch": 4.13, + "learning_rate": 3.625125925925926e-05, + "loss": 1.8729, + "step": 23210 + }, + { + "epoch": 4.13, + "learning_rate": 3.62482962962963e-05, + "loss": 1.7963, + "step": 23215 + }, + { + "epoch": 4.13, + "learning_rate": 3.624533333333334e-05, + "loss": 1.9343, + "step": 23220 + }, + { + "epoch": 4.13, + "learning_rate": 3.624237037037037e-05, + "loss": 1.8489, + "step": 23225 + }, + { + "epoch": 4.13, + "learning_rate": 3.623940740740741e-05, + "loss": 1.8235, + "step": 23230 + }, + { + "epoch": 4.13, + "learning_rate": 3.6236444444444447e-05, + "loss": 2.0098, + "step": 23235 + }, + { + "epoch": 4.13, + "learning_rate": 3.6233481481481485e-05, + "loss": 1.9015, + "step": 23240 + }, + { + "epoch": 4.13, + "learning_rate": 3.623051851851852e-05, + "loss": 1.9523, + "step": 23245 + }, + { + "epoch": 4.13, + "learning_rate": 3.6227555555555556e-05, + "loss": 1.8714, + "step": 23250 + }, + { + "epoch": 4.13, + "learning_rate": 3.6224592592592595e-05, + "loss": 1.788, + "step": 23255 + }, + { + "epoch": 4.14, + "learning_rate": 3.6221629629629634e-05, + "loss": 1.8974, + "step": 23260 + }, + { + "epoch": 4.14, + "learning_rate": 3.6218666666666666e-05, + "loss": 1.8034, + "step": 23265 + }, + { + "epoch": 4.14, + "learning_rate": 3.6215703703703705e-05, + "loss": 1.7945, + "step": 23270 + }, + { + "epoch": 4.14, + "learning_rate": 3.6212740740740744e-05, + "loss": 1.9759, + "step": 23275 + }, + { + "epoch": 4.14, + "learning_rate": 3.620977777777778e-05, + "loss": 1.8251, + "step": 23280 + }, + { + "epoch": 4.14, + "learning_rate": 3.6206814814814814e-05, + "loss": 1.754, + "step": 23285 + }, + { + "epoch": 4.14, + "learning_rate": 3.620385185185185e-05, + "loss": 1.8727, + "step": 23290 + }, + { + "epoch": 4.14, + "learning_rate": 3.620088888888889e-05, + "loss": 1.8385, + "step": 23295 + }, + { + "epoch": 4.14, + "learning_rate": 3.619792592592593e-05, + "loss": 1.9065, + "step": 23300 + }, + { + "epoch": 4.14, + "learning_rate": 3.619496296296296e-05, + "loss": 1.9687, + "step": 23305 + }, + { + "epoch": 4.14, + "learning_rate": 3.6192e-05, + "loss": 1.8316, + "step": 23310 + }, + { + "epoch": 4.14, + "learning_rate": 3.618903703703704e-05, + "loss": 1.9082, + "step": 23315 + }, + { + "epoch": 4.15, + "learning_rate": 3.618607407407408e-05, + "loss": 1.6368, + "step": 23320 + }, + { + "epoch": 4.15, + "learning_rate": 3.618311111111111e-05, + "loss": 1.7399, + "step": 23325 + }, + { + "epoch": 4.15, + "learning_rate": 3.618014814814815e-05, + "loss": 1.8213, + "step": 23330 + }, + { + "epoch": 4.15, + "learning_rate": 3.617718518518519e-05, + "loss": 1.7772, + "step": 23335 + }, + { + "epoch": 4.15, + "learning_rate": 3.617422222222223e-05, + "loss": 1.8351, + "step": 23340 + }, + { + "epoch": 4.15, + "learning_rate": 3.617125925925926e-05, + "loss": 1.8276, + "step": 23345 + }, + { + "epoch": 4.15, + "learning_rate": 3.61682962962963e-05, + "loss": 1.7526, + "step": 23350 + }, + { + "epoch": 4.15, + "learning_rate": 3.616533333333334e-05, + "loss": 1.905, + "step": 23355 + }, + { + "epoch": 4.15, + "learning_rate": 3.6162370370370376e-05, + "loss": 1.9396, + "step": 23360 + }, + { + "epoch": 4.15, + "learning_rate": 3.615940740740741e-05, + "loss": 1.8504, + "step": 23365 + }, + { + "epoch": 4.15, + "learning_rate": 3.615644444444445e-05, + "loss": 1.7356, + "step": 23370 + }, + { + "epoch": 4.16, + "learning_rate": 3.615348148148148e-05, + "loss": 1.8353, + "step": 23375 + }, + { + "epoch": 4.16, + "learning_rate": 3.6150518518518525e-05, + "loss": 1.7861, + "step": 23380 + }, + { + "epoch": 4.16, + "learning_rate": 3.614755555555556e-05, + "loss": 1.7629, + "step": 23385 + }, + { + "epoch": 4.16, + "learning_rate": 3.6144592592592596e-05, + "loss": 2.0237, + "step": 23390 + }, + { + "epoch": 4.16, + "learning_rate": 3.614162962962963e-05, + "loss": 1.8321, + "step": 23395 + }, + { + "epoch": 4.16, + "learning_rate": 3.6138666666666673e-05, + "loss": 1.9313, + "step": 23400 + }, + { + "epoch": 4.16, + "learning_rate": 3.6135703703703706e-05, + "loss": 1.9016, + "step": 23405 + }, + { + "epoch": 4.16, + "learning_rate": 3.6132740740740744e-05, + "loss": 1.6967, + "step": 23410 + }, + { + "epoch": 4.16, + "learning_rate": 3.6129777777777776e-05, + "loss": 1.7836, + "step": 23415 + }, + { + "epoch": 4.16, + "learning_rate": 3.612681481481482e-05, + "loss": 1.8029, + "step": 23420 + }, + { + "epoch": 4.16, + "learning_rate": 3.6123851851851854e-05, + "loss": 1.8984, + "step": 23425 + }, + { + "epoch": 4.17, + "learning_rate": 3.612088888888889e-05, + "loss": 1.7787, + "step": 23430 + }, + { + "epoch": 4.17, + "learning_rate": 3.6117925925925925e-05, + "loss": 1.6863, + "step": 23435 + }, + { + "epoch": 4.17, + "learning_rate": 3.6114962962962964e-05, + "loss": 1.8302, + "step": 23440 + }, + { + "epoch": 4.17, + "learning_rate": 3.6112e-05, + "loss": 1.8805, + "step": 23445 + }, + { + "epoch": 4.17, + "learning_rate": 3.610903703703704e-05, + "loss": 1.6919, + "step": 23450 + }, + { + "epoch": 4.17, + "learning_rate": 3.6106074074074073e-05, + "loss": 1.8433, + "step": 23455 + }, + { + "epoch": 4.17, + "learning_rate": 3.610311111111111e-05, + "loss": 1.8331, + "step": 23460 + }, + { + "epoch": 4.17, + "learning_rate": 3.610014814814815e-05, + "loss": 1.8952, + "step": 23465 + }, + { + "epoch": 4.17, + "learning_rate": 3.609718518518519e-05, + "loss": 1.861, + "step": 23470 + }, + { + "epoch": 4.17, + "learning_rate": 3.609422222222222e-05, + "loss": 1.9349, + "step": 23475 + }, + { + "epoch": 4.17, + "learning_rate": 3.609125925925926e-05, + "loss": 1.8969, + "step": 23480 + }, + { + "epoch": 4.18, + "learning_rate": 3.60882962962963e-05, + "loss": 1.9112, + "step": 23485 + }, + { + "epoch": 4.18, + "learning_rate": 3.608533333333334e-05, + "loss": 1.9284, + "step": 23490 + }, + { + "epoch": 4.18, + "learning_rate": 3.608237037037037e-05, + "loss": 1.8119, + "step": 23495 + }, + { + "epoch": 4.18, + "learning_rate": 3.607940740740741e-05, + "loss": 1.899, + "step": 23500 + }, + { + "epoch": 4.18, + "learning_rate": 3.607644444444445e-05, + "loss": 1.796, + "step": 23505 + }, + { + "epoch": 4.18, + "learning_rate": 3.607348148148149e-05, + "loss": 1.8193, + "step": 23510 + }, + { + "epoch": 4.18, + "learning_rate": 3.607051851851852e-05, + "loss": 1.9024, + "step": 23515 + }, + { + "epoch": 4.18, + "learning_rate": 3.606755555555556e-05, + "loss": 1.8121, + "step": 23520 + }, + { + "epoch": 4.18, + "learning_rate": 3.6064592592592597e-05, + "loss": 1.8408, + "step": 23525 + }, + { + "epoch": 4.18, + "learning_rate": 3.6061629629629635e-05, + "loss": 1.9334, + "step": 23530 + }, + { + "epoch": 4.18, + "learning_rate": 3.605866666666667e-05, + "loss": 1.879, + "step": 23535 + }, + { + "epoch": 4.18, + "learning_rate": 3.6055703703703706e-05, + "loss": 1.9985, + "step": 23540 + }, + { + "epoch": 4.19, + "learning_rate": 3.6052740740740745e-05, + "loss": 1.9182, + "step": 23545 + }, + { + "epoch": 4.19, + "learning_rate": 3.604977777777778e-05, + "loss": 1.9913, + "step": 23550 + }, + { + "epoch": 4.19, + "learning_rate": 3.6046814814814816e-05, + "loss": 1.8068, + "step": 23555 + }, + { + "epoch": 4.19, + "learning_rate": 3.604385185185185e-05, + "loss": 1.7911, + "step": 23560 + }, + { + "epoch": 4.19, + "learning_rate": 3.6040888888888894e-05, + "loss": 1.7813, + "step": 23565 + }, + { + "epoch": 4.19, + "learning_rate": 3.6037925925925926e-05, + "loss": 1.8827, + "step": 23570 + }, + { + "epoch": 4.19, + "learning_rate": 3.6034962962962964e-05, + "loss": 1.7944, + "step": 23575 + }, + { + "epoch": 4.19, + "learning_rate": 3.6031999999999997e-05, + "loss": 1.8562, + "step": 23580 + }, + { + "epoch": 4.19, + "learning_rate": 3.602903703703704e-05, + "loss": 1.8451, + "step": 23585 + }, + { + "epoch": 4.19, + "learning_rate": 3.6026074074074074e-05, + "loss": 1.8021, + "step": 23590 + }, + { + "epoch": 4.19, + "learning_rate": 3.602311111111111e-05, + "loss": 1.866, + "step": 23595 + }, + { + "epoch": 4.2, + "learning_rate": 3.6020148148148145e-05, + "loss": 1.934, + "step": 23600 + }, + { + "epoch": 4.2, + "learning_rate": 3.6017185185185184e-05, + "loss": 1.9609, + "step": 23605 + }, + { + "epoch": 4.2, + "learning_rate": 3.601422222222222e-05, + "loss": 1.8684, + "step": 23610 + }, + { + "epoch": 4.2, + "learning_rate": 3.601125925925926e-05, + "loss": 1.9892, + "step": 23615 + }, + { + "epoch": 4.2, + "learning_rate": 3.6008296296296294e-05, + "loss": 1.8274, + "step": 23620 + }, + { + "epoch": 4.2, + "learning_rate": 3.600533333333333e-05, + "loss": 1.7849, + "step": 23625 + }, + { + "epoch": 4.2, + "learning_rate": 3.600237037037037e-05, + "loss": 1.872, + "step": 23630 + }, + { + "epoch": 4.2, + "learning_rate": 3.599940740740741e-05, + "loss": 1.9701, + "step": 23635 + }, + { + "epoch": 4.2, + "learning_rate": 3.599644444444444e-05, + "loss": 1.8229, + "step": 23640 + }, + { + "epoch": 4.2, + "learning_rate": 3.599348148148148e-05, + "loss": 1.8587, + "step": 23645 + }, + { + "epoch": 4.2, + "learning_rate": 3.599051851851852e-05, + "loss": 1.8681, + "step": 23650 + }, + { + "epoch": 4.21, + "learning_rate": 3.598755555555556e-05, + "loss": 1.8745, + "step": 23655 + }, + { + "epoch": 4.21, + "learning_rate": 3.598459259259259e-05, + "loss": 1.8708, + "step": 23660 + }, + { + "epoch": 4.21, + "learning_rate": 3.598162962962963e-05, + "loss": 1.7522, + "step": 23665 + }, + { + "epoch": 4.21, + "learning_rate": 3.597866666666667e-05, + "loss": 1.8429, + "step": 23670 + }, + { + "epoch": 4.21, + "learning_rate": 3.597570370370371e-05, + "loss": 1.8165, + "step": 23675 + }, + { + "epoch": 4.21, + "learning_rate": 3.597274074074074e-05, + "loss": 1.9381, + "step": 23680 + }, + { + "epoch": 4.21, + "learning_rate": 3.596977777777778e-05, + "loss": 1.7509, + "step": 23685 + }, + { + "epoch": 4.21, + "learning_rate": 3.596681481481482e-05, + "loss": 1.8546, + "step": 23690 + }, + { + "epoch": 4.21, + "learning_rate": 3.5963851851851856e-05, + "loss": 1.9515, + "step": 23695 + }, + { + "epoch": 4.21, + "learning_rate": 3.596088888888889e-05, + "loss": 1.7258, + "step": 23700 + }, + { + "epoch": 4.21, + "learning_rate": 3.5957925925925926e-05, + "loss": 1.8776, + "step": 23705 + }, + { + "epoch": 4.22, + "learning_rate": 3.5954962962962965e-05, + "loss": 1.9044, + "step": 23710 + }, + { + "epoch": 4.22, + "learning_rate": 3.5952000000000004e-05, + "loss": 1.8833, + "step": 23715 + }, + { + "epoch": 4.22, + "learning_rate": 3.5949037037037036e-05, + "loss": 1.8041, + "step": 23720 + }, + { + "epoch": 4.22, + "learning_rate": 3.5946074074074075e-05, + "loss": 1.8098, + "step": 23725 + }, + { + "epoch": 4.22, + "learning_rate": 3.5943111111111114e-05, + "loss": 1.8424, + "step": 23730 + }, + { + "epoch": 4.22, + "learning_rate": 3.594014814814815e-05, + "loss": 1.838, + "step": 23735 + }, + { + "epoch": 4.22, + "learning_rate": 3.5937185185185185e-05, + "loss": 1.8764, + "step": 23740 + }, + { + "epoch": 4.22, + "learning_rate": 3.5934222222222223e-05, + "loss": 1.8344, + "step": 23745 + }, + { + "epoch": 4.22, + "learning_rate": 3.593125925925926e-05, + "loss": 1.8099, + "step": 23750 + }, + { + "epoch": 4.22, + "learning_rate": 3.59282962962963e-05, + "loss": 1.9737, + "step": 23755 + }, + { + "epoch": 4.22, + "learning_rate": 3.592533333333333e-05, + "loss": 1.9193, + "step": 23760 + }, + { + "epoch": 4.22, + "learning_rate": 3.592237037037037e-05, + "loss": 1.7806, + "step": 23765 + }, + { + "epoch": 4.23, + "learning_rate": 3.5919407407407404e-05, + "loss": 1.8442, + "step": 23770 + }, + { + "epoch": 4.23, + "learning_rate": 3.591644444444445e-05, + "loss": 1.8861, + "step": 23775 + }, + { + "epoch": 4.23, + "learning_rate": 3.591348148148148e-05, + "loss": 1.8891, + "step": 23780 + }, + { + "epoch": 4.23, + "learning_rate": 3.591051851851852e-05, + "loss": 1.9153, + "step": 23785 + }, + { + "epoch": 4.23, + "learning_rate": 3.590755555555555e-05, + "loss": 1.8506, + "step": 23790 + }, + { + "epoch": 4.23, + "learning_rate": 3.59045925925926e-05, + "loss": 1.8424, + "step": 23795 + }, + { + "epoch": 4.23, + "learning_rate": 3.590162962962963e-05, + "loss": 1.9049, + "step": 23800 + }, + { + "epoch": 4.23, + "learning_rate": 3.589866666666667e-05, + "loss": 1.873, + "step": 23805 + }, + { + "epoch": 4.23, + "learning_rate": 3.58957037037037e-05, + "loss": 1.9368, + "step": 23810 + }, + { + "epoch": 4.23, + "learning_rate": 3.589274074074075e-05, + "loss": 1.7873, + "step": 23815 + }, + { + "epoch": 4.23, + "learning_rate": 3.588977777777778e-05, + "loss": 1.7702, + "step": 23820 + }, + { + "epoch": 4.24, + "learning_rate": 3.588681481481482e-05, + "loss": 1.7825, + "step": 23825 + }, + { + "epoch": 4.24, + "learning_rate": 3.588385185185185e-05, + "loss": 1.9163, + "step": 23830 + }, + { + "epoch": 4.24, + "learning_rate": 3.588088888888889e-05, + "loss": 1.7773, + "step": 23835 + }, + { + "epoch": 4.24, + "learning_rate": 3.587792592592593e-05, + "loss": 1.8207, + "step": 23840 + }, + { + "epoch": 4.24, + "learning_rate": 3.5874962962962966e-05, + "loss": 1.7184, + "step": 23845 + }, + { + "epoch": 4.24, + "learning_rate": 3.5872e-05, + "loss": 1.7885, + "step": 23850 + }, + { + "epoch": 4.24, + "learning_rate": 3.586903703703704e-05, + "loss": 1.9153, + "step": 23855 + }, + { + "epoch": 4.24, + "learning_rate": 3.5866074074074076e-05, + "loss": 1.8651, + "step": 23860 + }, + { + "epoch": 4.24, + "learning_rate": 3.5863111111111115e-05, + "loss": 1.8613, + "step": 23865 + }, + { + "epoch": 4.24, + "learning_rate": 3.5860148148148147e-05, + "loss": 1.9139, + "step": 23870 + }, + { + "epoch": 4.24, + "learning_rate": 3.5857185185185185e-05, + "loss": 1.7422, + "step": 23875 + }, + { + "epoch": 4.25, + "learning_rate": 3.5854222222222224e-05, + "loss": 1.8131, + "step": 23880 + }, + { + "epoch": 4.25, + "learning_rate": 3.585125925925926e-05, + "loss": 1.7602, + "step": 23885 + }, + { + "epoch": 4.25, + "learning_rate": 3.5848296296296295e-05, + "loss": 1.7918, + "step": 23890 + }, + { + "epoch": 4.25, + "learning_rate": 3.5845333333333334e-05, + "loss": 1.7561, + "step": 23895 + }, + { + "epoch": 4.25, + "learning_rate": 3.584237037037037e-05, + "loss": 1.9091, + "step": 23900 + }, + { + "epoch": 4.25, + "learning_rate": 3.583940740740741e-05, + "loss": 1.8036, + "step": 23905 + }, + { + "epoch": 4.25, + "learning_rate": 3.5836444444444444e-05, + "loss": 1.7554, + "step": 23910 + }, + { + "epoch": 4.25, + "learning_rate": 3.583348148148148e-05, + "loss": 1.7602, + "step": 23915 + }, + { + "epoch": 4.25, + "learning_rate": 3.583051851851852e-05, + "loss": 1.7864, + "step": 23920 + }, + { + "epoch": 4.25, + "learning_rate": 3.582755555555556e-05, + "loss": 1.9324, + "step": 23925 + }, + { + "epoch": 4.25, + "learning_rate": 3.582459259259259e-05, + "loss": 1.8254, + "step": 23930 + }, + { + "epoch": 4.26, + "learning_rate": 3.582162962962963e-05, + "loss": 1.8523, + "step": 23935 + }, + { + "epoch": 4.26, + "learning_rate": 3.581866666666667e-05, + "loss": 1.7055, + "step": 23940 + }, + { + "epoch": 4.26, + "learning_rate": 3.581570370370371e-05, + "loss": 1.8094, + "step": 23945 + }, + { + "epoch": 4.26, + "learning_rate": 3.581274074074074e-05, + "loss": 1.776, + "step": 23950 + }, + { + "epoch": 4.26, + "learning_rate": 3.580977777777778e-05, + "loss": 1.7461, + "step": 23955 + }, + { + "epoch": 4.26, + "learning_rate": 3.580681481481482e-05, + "loss": 1.9923, + "step": 23960 + }, + { + "epoch": 4.26, + "learning_rate": 3.580385185185186e-05, + "loss": 1.8857, + "step": 23965 + }, + { + "epoch": 4.26, + "learning_rate": 3.580088888888889e-05, + "loss": 1.7751, + "step": 23970 + }, + { + "epoch": 4.26, + "learning_rate": 3.579792592592593e-05, + "loss": 1.8931, + "step": 23975 + }, + { + "epoch": 4.26, + "learning_rate": 3.579496296296297e-05, + "loss": 1.839, + "step": 23980 + }, + { + "epoch": 4.26, + "learning_rate": 3.5792000000000006e-05, + "loss": 1.9552, + "step": 23985 + }, + { + "epoch": 4.26, + "learning_rate": 3.578903703703704e-05, + "loss": 1.7468, + "step": 23990 + }, + { + "epoch": 4.27, + "learning_rate": 3.5786074074074076e-05, + "loss": 1.8894, + "step": 23995 + }, + { + "epoch": 4.27, + "learning_rate": 3.578311111111111e-05, + "loss": 1.9389, + "step": 24000 + }, + { + "epoch": 4.27, + "learning_rate": 3.5780148148148154e-05, + "loss": 1.7804, + "step": 24005 + }, + { + "epoch": 4.27, + "learning_rate": 3.5777185185185186e-05, + "loss": 1.927, + "step": 24010 + }, + { + "epoch": 4.27, + "learning_rate": 3.5774222222222225e-05, + "loss": 1.8599, + "step": 24015 + }, + { + "epoch": 4.27, + "learning_rate": 3.577125925925926e-05, + "loss": 1.7356, + "step": 24020 + }, + { + "epoch": 4.27, + "learning_rate": 3.57682962962963e-05, + "loss": 1.8256, + "step": 24025 + }, + { + "epoch": 4.27, + "learning_rate": 3.5765333333333335e-05, + "loss": 1.9001, + "step": 24030 + }, + { + "epoch": 4.27, + "learning_rate": 3.5762370370370374e-05, + "loss": 1.9931, + "step": 24035 + }, + { + "epoch": 4.27, + "learning_rate": 3.5759407407407406e-05, + "loss": 1.721, + "step": 24040 + }, + { + "epoch": 4.27, + "learning_rate": 3.575644444444445e-05, + "loss": 1.8939, + "step": 24045 + }, + { + "epoch": 4.28, + "learning_rate": 3.575348148148148e-05, + "loss": 1.8057, + "step": 24050 + }, + { + "epoch": 4.28, + "learning_rate": 3.575051851851852e-05, + "loss": 1.8283, + "step": 24055 + }, + { + "epoch": 4.28, + "learning_rate": 3.5747555555555554e-05, + "loss": 1.7916, + "step": 24060 + }, + { + "epoch": 4.28, + "learning_rate": 3.574459259259259e-05, + "loss": 1.8491, + "step": 24065 + }, + { + "epoch": 4.28, + "learning_rate": 3.574162962962963e-05, + "loss": 1.7988, + "step": 24070 + }, + { + "epoch": 4.28, + "learning_rate": 3.573866666666667e-05, + "loss": 2.0095, + "step": 24075 + }, + { + "epoch": 4.28, + "learning_rate": 3.57357037037037e-05, + "loss": 2.0253, + "step": 24080 + }, + { + "epoch": 4.28, + "learning_rate": 3.573274074074074e-05, + "loss": 1.8337, + "step": 24085 + }, + { + "epoch": 4.28, + "learning_rate": 3.572977777777778e-05, + "loss": 1.9918, + "step": 24090 + }, + { + "epoch": 4.28, + "learning_rate": 3.572681481481482e-05, + "loss": 1.9256, + "step": 24095 + }, + { + "epoch": 4.28, + "learning_rate": 3.572385185185185e-05, + "loss": 1.9391, + "step": 24100 + }, + { + "epoch": 4.29, + "learning_rate": 3.572088888888889e-05, + "loss": 1.9269, + "step": 24105 + }, + { + "epoch": 4.29, + "learning_rate": 3.571792592592593e-05, + "loss": 1.9112, + "step": 24110 + }, + { + "epoch": 4.29, + "learning_rate": 3.571496296296297e-05, + "loss": 1.9586, + "step": 24115 + }, + { + "epoch": 4.29, + "learning_rate": 3.5712e-05, + "loss": 1.7859, + "step": 24120 + }, + { + "epoch": 4.29, + "learning_rate": 3.570903703703704e-05, + "loss": 1.7673, + "step": 24125 + }, + { + "epoch": 4.29, + "learning_rate": 3.570607407407408e-05, + "loss": 1.8856, + "step": 24130 + }, + { + "epoch": 4.29, + "learning_rate": 3.5703111111111116e-05, + "loss": 1.9015, + "step": 24135 + }, + { + "epoch": 4.29, + "learning_rate": 3.570014814814815e-05, + "loss": 1.8433, + "step": 24140 + }, + { + "epoch": 4.29, + "learning_rate": 3.569718518518519e-05, + "loss": 1.9985, + "step": 24145 + }, + { + "epoch": 4.29, + "learning_rate": 3.5694222222222226e-05, + "loss": 1.9534, + "step": 24150 + }, + { + "epoch": 4.29, + "learning_rate": 3.5691259259259265e-05, + "loss": 1.8793, + "step": 24155 + }, + { + "epoch": 4.3, + "learning_rate": 3.56882962962963e-05, + "loss": 1.7488, + "step": 24160 + }, + { + "epoch": 4.3, + "learning_rate": 3.5685333333333335e-05, + "loss": 1.8579, + "step": 24165 + }, + { + "epoch": 4.3, + "learning_rate": 3.5682370370370374e-05, + "loss": 1.8537, + "step": 24170 + }, + { + "epoch": 4.3, + "learning_rate": 3.567940740740741e-05, + "loss": 1.807, + "step": 24175 + }, + { + "epoch": 4.3, + "learning_rate": 3.5676444444444445e-05, + "loss": 1.6736, + "step": 24180 + }, + { + "epoch": 4.3, + "learning_rate": 3.5673481481481484e-05, + "loss": 1.9001, + "step": 24185 + }, + { + "epoch": 4.3, + "learning_rate": 3.567051851851852e-05, + "loss": 1.8104, + "step": 24190 + }, + { + "epoch": 4.3, + "learning_rate": 3.566755555555556e-05, + "loss": 1.9599, + "step": 24195 + }, + { + "epoch": 4.3, + "learning_rate": 3.5664592592592594e-05, + "loss": 1.8312, + "step": 24200 + }, + { + "epoch": 4.3, + "learning_rate": 3.566162962962963e-05, + "loss": 1.8425, + "step": 24205 + }, + { + "epoch": 4.3, + "learning_rate": 3.565866666666667e-05, + "loss": 1.853, + "step": 24210 + }, + { + "epoch": 4.3, + "learning_rate": 3.565570370370371e-05, + "loss": 1.7619, + "step": 24215 + }, + { + "epoch": 4.31, + "learning_rate": 3.565274074074074e-05, + "loss": 1.9584, + "step": 24220 + }, + { + "epoch": 4.31, + "learning_rate": 3.564977777777778e-05, + "loss": 1.8885, + "step": 24225 + }, + { + "epoch": 4.31, + "learning_rate": 3.564681481481481e-05, + "loss": 1.8371, + "step": 24230 + }, + { + "epoch": 4.31, + "learning_rate": 3.564385185185186e-05, + "loss": 1.9163, + "step": 24235 + }, + { + "epoch": 4.31, + "learning_rate": 3.564088888888889e-05, + "loss": 1.8696, + "step": 24240 + }, + { + "epoch": 4.31, + "learning_rate": 3.563792592592593e-05, + "loss": 1.7983, + "step": 24245 + }, + { + "epoch": 4.31, + "learning_rate": 3.563496296296296e-05, + "loss": 1.8355, + "step": 24250 + }, + { + "epoch": 4.31, + "learning_rate": 3.563200000000001e-05, + "loss": 1.8772, + "step": 24255 + }, + { + "epoch": 4.31, + "learning_rate": 3.562903703703704e-05, + "loss": 1.8363, + "step": 24260 + }, + { + "epoch": 4.31, + "learning_rate": 3.562607407407408e-05, + "loss": 1.8263, + "step": 24265 + }, + { + "epoch": 4.31, + "learning_rate": 3.562311111111111e-05, + "loss": 1.9177, + "step": 24270 + }, + { + "epoch": 4.32, + "learning_rate": 3.5620148148148156e-05, + "loss": 1.856, + "step": 24275 + }, + { + "epoch": 4.32, + "learning_rate": 3.561718518518519e-05, + "loss": 1.7943, + "step": 24280 + }, + { + "epoch": 4.32, + "learning_rate": 3.5614222222222227e-05, + "loss": 1.8397, + "step": 24285 + }, + { + "epoch": 4.32, + "learning_rate": 3.561125925925926e-05, + "loss": 1.7794, + "step": 24290 + }, + { + "epoch": 4.32, + "learning_rate": 3.56082962962963e-05, + "loss": 1.9025, + "step": 24295 + }, + { + "epoch": 4.32, + "learning_rate": 3.5605333333333336e-05, + "loss": 1.9026, + "step": 24300 + }, + { + "epoch": 4.32, + "learning_rate": 3.5602370370370375e-05, + "loss": 1.837, + "step": 24305 + }, + { + "epoch": 4.32, + "learning_rate": 3.559940740740741e-05, + "loss": 1.733, + "step": 24310 + }, + { + "epoch": 4.32, + "learning_rate": 3.5596444444444446e-05, + "loss": 1.889, + "step": 24315 + }, + { + "epoch": 4.32, + "learning_rate": 3.5593481481481485e-05, + "loss": 1.8402, + "step": 24320 + }, + { + "epoch": 4.32, + "learning_rate": 3.559051851851852e-05, + "loss": 1.9087, + "step": 24325 + }, + { + "epoch": 4.33, + "learning_rate": 3.5587555555555556e-05, + "loss": 1.8417, + "step": 24330 + }, + { + "epoch": 4.33, + "learning_rate": 3.5584592592592594e-05, + "loss": 1.9214, + "step": 24335 + }, + { + "epoch": 4.33, + "learning_rate": 3.558162962962963e-05, + "loss": 1.9137, + "step": 24340 + }, + { + "epoch": 4.33, + "learning_rate": 3.5578666666666665e-05, + "loss": 1.9687, + "step": 24345 + }, + { + "epoch": 4.33, + "learning_rate": 3.5575703703703704e-05, + "loss": 1.8883, + "step": 24350 + }, + { + "epoch": 4.33, + "learning_rate": 3.557274074074074e-05, + "loss": 1.8356, + "step": 24355 + }, + { + "epoch": 4.33, + "learning_rate": 3.556977777777778e-05, + "loss": 1.9336, + "step": 24360 + }, + { + "epoch": 4.33, + "learning_rate": 3.5566814814814814e-05, + "loss": 1.8689, + "step": 24365 + }, + { + "epoch": 4.33, + "learning_rate": 3.556385185185185e-05, + "loss": 1.809, + "step": 24370 + }, + { + "epoch": 4.33, + "learning_rate": 3.5560888888888885e-05, + "loss": 1.8831, + "step": 24375 + }, + { + "epoch": 4.33, + "learning_rate": 3.555792592592593e-05, + "loss": 1.8179, + "step": 24380 + }, + { + "epoch": 4.34, + "learning_rate": 3.555496296296296e-05, + "loss": 1.7819, + "step": 24385 + }, + { + "epoch": 4.34, + "learning_rate": 3.5552e-05, + "loss": 1.8614, + "step": 24390 + }, + { + "epoch": 4.34, + "learning_rate": 3.554903703703703e-05, + "loss": 1.8621, + "step": 24395 + }, + { + "epoch": 4.34, + "learning_rate": 3.554607407407408e-05, + "loss": 1.8766, + "step": 24400 + }, + { + "epoch": 4.34, + "learning_rate": 3.554311111111111e-05, + "loss": 2.0427, + "step": 24405 + }, + { + "epoch": 4.34, + "learning_rate": 3.554014814814815e-05, + "loss": 1.8638, + "step": 24410 + }, + { + "epoch": 4.34, + "learning_rate": 3.553777777777778e-05, + "loss": 1.8285, + "step": 24415 + }, + { + "epoch": 4.34, + "learning_rate": 3.553481481481482e-05, + "loss": 1.9399, + "step": 24420 + }, + { + "epoch": 4.34, + "learning_rate": 3.553185185185186e-05, + "loss": 1.8146, + "step": 24425 + }, + { + "epoch": 4.34, + "learning_rate": 3.552888888888889e-05, + "loss": 1.9894, + "step": 24430 + }, + { + "epoch": 4.34, + "learning_rate": 3.552592592592593e-05, + "loss": 1.727, + "step": 24435 + }, + { + "epoch": 4.34, + "learning_rate": 3.552296296296297e-05, + "loss": 1.8678, + "step": 24440 + }, + { + "epoch": 4.35, + "learning_rate": 3.5520000000000006e-05, + "loss": 1.7672, + "step": 24445 + }, + { + "epoch": 4.35, + "learning_rate": 3.551703703703704e-05, + "loss": 1.8745, + "step": 24450 + }, + { + "epoch": 4.35, + "learning_rate": 3.5514074074074076e-05, + "loss": 1.9035, + "step": 24455 + }, + { + "epoch": 4.35, + "learning_rate": 3.551111111111111e-05, + "loss": 1.8315, + "step": 24460 + }, + { + "epoch": 4.35, + "learning_rate": 3.5508148148148154e-05, + "loss": 1.7804, + "step": 24465 + }, + { + "epoch": 4.35, + "learning_rate": 3.5505185185185186e-05, + "loss": 1.8774, + "step": 24470 + }, + { + "epoch": 4.35, + "learning_rate": 3.5502222222222225e-05, + "loss": 1.8165, + "step": 24475 + }, + { + "epoch": 4.35, + "learning_rate": 3.549925925925926e-05, + "loss": 1.8997, + "step": 24480 + }, + { + "epoch": 4.35, + "learning_rate": 3.54962962962963e-05, + "loss": 1.896, + "step": 24485 + }, + { + "epoch": 4.35, + "learning_rate": 3.5493333333333335e-05, + "loss": 1.9262, + "step": 24490 + }, + { + "epoch": 4.35, + "learning_rate": 3.5490370370370373e-05, + "loss": 1.9757, + "step": 24495 + }, + { + "epoch": 4.36, + "learning_rate": 3.5487407407407405e-05, + "loss": 1.8168, + "step": 24500 + }, + { + "epoch": 4.36, + "learning_rate": 3.548444444444445e-05, + "loss": 1.9046, + "step": 24505 + }, + { + "epoch": 4.36, + "learning_rate": 3.548148148148148e-05, + "loss": 1.761, + "step": 24510 + }, + { + "epoch": 4.36, + "learning_rate": 3.547851851851852e-05, + "loss": 1.944, + "step": 24515 + }, + { + "epoch": 4.36, + "learning_rate": 3.5475555555555554e-05, + "loss": 1.9702, + "step": 24520 + }, + { + "epoch": 4.36, + "learning_rate": 3.547259259259259e-05, + "loss": 1.7807, + "step": 24525 + }, + { + "epoch": 4.36, + "learning_rate": 3.546962962962963e-05, + "loss": 1.8418, + "step": 24530 + }, + { + "epoch": 4.36, + "learning_rate": 3.546666666666667e-05, + "loss": 1.7804, + "step": 24535 + }, + { + "epoch": 4.36, + "learning_rate": 3.54637037037037e-05, + "loss": 1.8409, + "step": 24540 + }, + { + "epoch": 4.36, + "learning_rate": 3.546074074074074e-05, + "loss": 1.8596, + "step": 24545 + }, + { + "epoch": 4.36, + "learning_rate": 3.545777777777778e-05, + "loss": 1.8124, + "step": 24550 + }, + { + "epoch": 4.37, + "learning_rate": 3.545481481481482e-05, + "loss": 1.8125, + "step": 24555 + }, + { + "epoch": 4.37, + "learning_rate": 3.545185185185185e-05, + "loss": 1.8544, + "step": 24560 + }, + { + "epoch": 4.37, + "learning_rate": 3.544888888888889e-05, + "loss": 1.8986, + "step": 24565 + }, + { + "epoch": 4.37, + "learning_rate": 3.544592592592593e-05, + "loss": 1.8437, + "step": 24570 + }, + { + "epoch": 4.37, + "learning_rate": 3.544296296296297e-05, + "loss": 1.7703, + "step": 24575 + }, + { + "epoch": 4.37, + "learning_rate": 3.544e-05, + "loss": 1.9501, + "step": 24580 + }, + { + "epoch": 4.37, + "learning_rate": 3.543703703703704e-05, + "loss": 1.7988, + "step": 24585 + }, + { + "epoch": 4.37, + "learning_rate": 3.543407407407408e-05, + "loss": 1.83, + "step": 24590 + }, + { + "epoch": 4.37, + "learning_rate": 3.5431111111111116e-05, + "loss": 1.882, + "step": 24595 + }, + { + "epoch": 4.37, + "learning_rate": 3.542814814814815e-05, + "loss": 1.7785, + "step": 24600 + }, + { + "epoch": 4.37, + "learning_rate": 3.542518518518519e-05, + "loss": 1.862, + "step": 24605 + }, + { + "epoch": 4.38, + "learning_rate": 3.5422222222222226e-05, + "loss": 2.0215, + "step": 24610 + }, + { + "epoch": 4.38, + "learning_rate": 3.5419259259259264e-05, + "loss": 1.7936, + "step": 24615 + }, + { + "epoch": 4.38, + "learning_rate": 3.5416296296296297e-05, + "loss": 2.037, + "step": 24620 + }, + { + "epoch": 4.38, + "learning_rate": 3.5413333333333335e-05, + "loss": 1.841, + "step": 24625 + }, + { + "epoch": 4.38, + "learning_rate": 3.5410370370370374e-05, + "loss": 1.693, + "step": 24630 + }, + { + "epoch": 4.38, + "learning_rate": 3.540740740740741e-05, + "loss": 1.851, + "step": 24635 + }, + { + "epoch": 4.38, + "learning_rate": 3.5404444444444445e-05, + "loss": 1.8352, + "step": 24640 + }, + { + "epoch": 4.38, + "learning_rate": 3.5401481481481484e-05, + "loss": 1.9591, + "step": 24645 + }, + { + "epoch": 4.38, + "learning_rate": 3.539851851851852e-05, + "loss": 1.7345, + "step": 24650 + }, + { + "epoch": 4.38, + "learning_rate": 3.539555555555556e-05, + "loss": 1.7922, + "step": 24655 + }, + { + "epoch": 4.38, + "learning_rate": 3.5392592592592594e-05, + "loss": 1.8394, + "step": 24660 + }, + { + "epoch": 4.38, + "learning_rate": 3.538962962962963e-05, + "loss": 1.7019, + "step": 24665 + }, + { + "epoch": 4.39, + "learning_rate": 3.538666666666667e-05, + "loss": 1.8921, + "step": 24670 + }, + { + "epoch": 4.39, + "learning_rate": 3.538370370370371e-05, + "loss": 1.9013, + "step": 24675 + }, + { + "epoch": 4.39, + "learning_rate": 3.538074074074074e-05, + "loss": 1.8075, + "step": 24680 + }, + { + "epoch": 4.39, + "learning_rate": 3.537777777777778e-05, + "loss": 1.9012, + "step": 24685 + }, + { + "epoch": 4.39, + "learning_rate": 3.537481481481481e-05, + "loss": 1.9693, + "step": 24690 + }, + { + "epoch": 4.39, + "learning_rate": 3.537185185185186e-05, + "loss": 1.9659, + "step": 24695 + }, + { + "epoch": 4.39, + "learning_rate": 3.536888888888889e-05, + "loss": 1.8073, + "step": 24700 + }, + { + "epoch": 4.39, + "learning_rate": 3.536592592592593e-05, + "loss": 1.8324, + "step": 24705 + }, + { + "epoch": 4.39, + "learning_rate": 3.536296296296296e-05, + "loss": 1.7607, + "step": 24710 + }, + { + "epoch": 4.39, + "learning_rate": 3.536000000000001e-05, + "loss": 1.8187, + "step": 24715 + }, + { + "epoch": 4.39, + "learning_rate": 3.535703703703704e-05, + "loss": 1.8858, + "step": 24720 + }, + { + "epoch": 4.4, + "learning_rate": 3.535407407407408e-05, + "loss": 1.8929, + "step": 24725 + }, + { + "epoch": 4.4, + "learning_rate": 3.535111111111111e-05, + "loss": 1.793, + "step": 24730 + }, + { + "epoch": 4.4, + "learning_rate": 3.5348148148148156e-05, + "loss": 1.8317, + "step": 24735 + }, + { + "epoch": 4.4, + "learning_rate": 3.534518518518519e-05, + "loss": 1.8341, + "step": 24740 + }, + { + "epoch": 4.4, + "learning_rate": 3.5342222222222226e-05, + "loss": 1.9416, + "step": 24745 + }, + { + "epoch": 4.4, + "learning_rate": 3.533925925925926e-05, + "loss": 1.7878, + "step": 24750 + }, + { + "epoch": 4.4, + "learning_rate": 3.53362962962963e-05, + "loss": 1.8886, + "step": 24755 + }, + { + "epoch": 4.4, + "learning_rate": 3.5333333333333336e-05, + "loss": 1.863, + "step": 24760 + }, + { + "epoch": 4.4, + "learning_rate": 3.5330370370370375e-05, + "loss": 1.926, + "step": 24765 + }, + { + "epoch": 4.4, + "learning_rate": 3.532740740740741e-05, + "loss": 1.9467, + "step": 24770 + }, + { + "epoch": 4.4, + "learning_rate": 3.5324444444444446e-05, + "loss": 1.886, + "step": 24775 + }, + { + "epoch": 4.41, + "learning_rate": 3.5321481481481485e-05, + "loss": 1.9764, + "step": 24780 + }, + { + "epoch": 4.41, + "learning_rate": 3.531851851851852e-05, + "loss": 1.7238, + "step": 24785 + }, + { + "epoch": 4.41, + "learning_rate": 3.5315555555555555e-05, + "loss": 1.7047, + "step": 24790 + }, + { + "epoch": 4.41, + "learning_rate": 3.5312592592592594e-05, + "loss": 1.8313, + "step": 24795 + }, + { + "epoch": 4.41, + "learning_rate": 3.530962962962963e-05, + "loss": 1.835, + "step": 24800 + }, + { + "epoch": 4.41, + "learning_rate": 3.5306666666666665e-05, + "loss": 2.0512, + "step": 24805 + }, + { + "epoch": 4.41, + "learning_rate": 3.5303703703703704e-05, + "loss": 1.8739, + "step": 24810 + }, + { + "epoch": 4.41, + "learning_rate": 3.530074074074074e-05, + "loss": 1.8078, + "step": 24815 + }, + { + "epoch": 4.41, + "learning_rate": 3.529777777777778e-05, + "loss": 1.8331, + "step": 24820 + }, + { + "epoch": 4.41, + "learning_rate": 3.5294814814814814e-05, + "loss": 1.8031, + "step": 24825 + }, + { + "epoch": 4.41, + "learning_rate": 3.529185185185185e-05, + "loss": 1.8622, + "step": 24830 + }, + { + "epoch": 4.42, + "learning_rate": 3.528888888888889e-05, + "loss": 1.942, + "step": 24835 + }, + { + "epoch": 4.42, + "learning_rate": 3.528592592592593e-05, + "loss": 1.8436, + "step": 24840 + }, + { + "epoch": 4.42, + "learning_rate": 3.528296296296296e-05, + "loss": 1.9, + "step": 24845 + }, + { + "epoch": 4.42, + "learning_rate": 3.528e-05, + "loss": 1.8748, + "step": 24850 + }, + { + "epoch": 4.42, + "learning_rate": 3.527703703703703e-05, + "loss": 1.8249, + "step": 24855 + }, + { + "epoch": 4.42, + "learning_rate": 3.527407407407408e-05, + "loss": 1.8156, + "step": 24860 + }, + { + "epoch": 4.42, + "learning_rate": 3.527111111111111e-05, + "loss": 1.8637, + "step": 24865 + }, + { + "epoch": 4.42, + "learning_rate": 3.526814814814815e-05, + "loss": 1.742, + "step": 24870 + }, + { + "epoch": 4.42, + "learning_rate": 3.526518518518518e-05, + "loss": 1.8675, + "step": 24875 + }, + { + "epoch": 4.42, + "learning_rate": 3.526222222222223e-05, + "loss": 1.8444, + "step": 24880 + }, + { + "epoch": 4.42, + "learning_rate": 3.525925925925926e-05, + "loss": 1.8077, + "step": 24885 + }, + { + "epoch": 4.42, + "learning_rate": 3.52562962962963e-05, + "loss": 1.8205, + "step": 24890 + }, + { + "epoch": 4.43, + "learning_rate": 3.525333333333333e-05, + "loss": 1.8491, + "step": 24895 + }, + { + "epoch": 4.43, + "learning_rate": 3.5250370370370376e-05, + "loss": 1.8633, + "step": 24900 + }, + { + "epoch": 4.43, + "learning_rate": 3.524740740740741e-05, + "loss": 2.0354, + "step": 24905 + }, + { + "epoch": 4.43, + "learning_rate": 3.5244444444444447e-05, + "loss": 1.9558, + "step": 24910 + }, + { + "epoch": 4.43, + "learning_rate": 3.524148148148148e-05, + "loss": 1.9479, + "step": 24915 + }, + { + "epoch": 4.43, + "learning_rate": 3.523851851851852e-05, + "loss": 1.9407, + "step": 24920 + }, + { + "epoch": 4.43, + "learning_rate": 3.5235555555555556e-05, + "loss": 1.7419, + "step": 24925 + }, + { + "epoch": 4.43, + "learning_rate": 3.5232592592592595e-05, + "loss": 1.8207, + "step": 24930 + }, + { + "epoch": 4.43, + "learning_rate": 3.522962962962963e-05, + "loss": 1.8884, + "step": 24935 + }, + { + "epoch": 4.43, + "learning_rate": 3.5226666666666666e-05, + "loss": 1.8133, + "step": 24940 + }, + { + "epoch": 4.43, + "learning_rate": 3.5223703703703705e-05, + "loss": 1.9704, + "step": 24945 + }, + { + "epoch": 4.44, + "learning_rate": 3.5220740740740744e-05, + "loss": 1.8793, + "step": 24950 + }, + { + "epoch": 4.44, + "learning_rate": 3.5217777777777776e-05, + "loss": 1.8465, + "step": 24955 + }, + { + "epoch": 4.44, + "learning_rate": 3.5214814814814814e-05, + "loss": 1.89, + "step": 24960 + }, + { + "epoch": 4.44, + "learning_rate": 3.521185185185185e-05, + "loss": 1.8096, + "step": 24965 + }, + { + "epoch": 4.44, + "learning_rate": 3.520888888888889e-05, + "loss": 1.919, + "step": 24970 + }, + { + "epoch": 4.44, + "learning_rate": 3.5205925925925924e-05, + "loss": 1.9718, + "step": 24975 + }, + { + "epoch": 4.44, + "learning_rate": 3.520296296296296e-05, + "loss": 1.7884, + "step": 24980 + }, + { + "epoch": 4.44, + "learning_rate": 3.52e-05, + "loss": 1.7779, + "step": 24985 + }, + { + "epoch": 4.44, + "learning_rate": 3.519703703703704e-05, + "loss": 1.8378, + "step": 24990 + }, + { + "epoch": 4.44, + "learning_rate": 3.519407407407407e-05, + "loss": 1.8966, + "step": 24995 + }, + { + "epoch": 4.44, + "learning_rate": 3.519111111111111e-05, + "loss": 1.8504, + "step": 25000 + }, + { + "epoch": 4.45, + "learning_rate": 3.518814814814815e-05, + "loss": 1.9341, + "step": 25005 + }, + { + "epoch": 4.45, + "learning_rate": 3.518518518518519e-05, + "loss": 1.901, + "step": 25010 + }, + { + "epoch": 4.45, + "learning_rate": 3.518222222222222e-05, + "loss": 1.9542, + "step": 25015 + }, + { + "epoch": 4.45, + "learning_rate": 3.517925925925926e-05, + "loss": 1.9702, + "step": 25020 + }, + { + "epoch": 4.45, + "learning_rate": 3.51762962962963e-05, + "loss": 1.8339, + "step": 25025 + }, + { + "epoch": 4.45, + "learning_rate": 3.517333333333334e-05, + "loss": 1.9315, + "step": 25030 + }, + { + "epoch": 4.45, + "learning_rate": 3.517037037037037e-05, + "loss": 1.7635, + "step": 25035 + }, + { + "epoch": 4.45, + "learning_rate": 3.516740740740741e-05, + "loss": 1.8099, + "step": 25040 + }, + { + "epoch": 4.45, + "learning_rate": 3.516444444444445e-05, + "loss": 1.8775, + "step": 25045 + }, + { + "epoch": 4.45, + "learning_rate": 3.5161481481481486e-05, + "loss": 1.8339, + "step": 25050 + }, + { + "epoch": 4.45, + "learning_rate": 3.515851851851852e-05, + "loss": 1.8749, + "step": 25055 + }, + { + "epoch": 4.46, + "learning_rate": 3.515555555555556e-05, + "loss": 1.8664, + "step": 25060 + }, + { + "epoch": 4.46, + "learning_rate": 3.5152592592592596e-05, + "loss": 1.863, + "step": 25065 + }, + { + "epoch": 4.46, + "learning_rate": 3.5149629629629635e-05, + "loss": 1.9182, + "step": 25070 + }, + { + "epoch": 4.46, + "learning_rate": 3.514666666666667e-05, + "loss": 1.9756, + "step": 25075 + }, + { + "epoch": 4.46, + "learning_rate": 3.5143703703703706e-05, + "loss": 1.8123, + "step": 25080 + }, + { + "epoch": 4.46, + "learning_rate": 3.514074074074074e-05, + "loss": 1.9715, + "step": 25085 + }, + { + "epoch": 4.46, + "learning_rate": 3.513777777777778e-05, + "loss": 1.945, + "step": 25090 + }, + { + "epoch": 4.46, + "learning_rate": 3.5134814814814815e-05, + "loss": 1.8166, + "step": 25095 + }, + { + "epoch": 4.46, + "learning_rate": 3.5131851851851854e-05, + "loss": 1.9568, + "step": 25100 + }, + { + "epoch": 4.46, + "learning_rate": 3.5128888888888886e-05, + "loss": 1.9139, + "step": 25105 + }, + { + "epoch": 4.46, + "learning_rate": 3.512592592592593e-05, + "loss": 1.7323, + "step": 25110 + }, + { + "epoch": 4.46, + "learning_rate": 3.5122962962962964e-05, + "loss": 1.841, + "step": 25115 + }, + { + "epoch": 4.47, + "learning_rate": 3.512e-05, + "loss": 1.796, + "step": 25120 + }, + { + "epoch": 4.47, + "learning_rate": 3.5117037037037035e-05, + "loss": 1.9153, + "step": 25125 + }, + { + "epoch": 4.47, + "learning_rate": 3.511407407407408e-05, + "loss": 1.931, + "step": 25130 + }, + { + "epoch": 4.47, + "learning_rate": 3.511111111111111e-05, + "loss": 1.9465, + "step": 25135 + }, + { + "epoch": 4.47, + "learning_rate": 3.510814814814815e-05, + "loss": 1.956, + "step": 25140 + }, + { + "epoch": 4.47, + "learning_rate": 3.510518518518518e-05, + "loss": 1.8571, + "step": 25145 + }, + { + "epoch": 4.47, + "learning_rate": 3.510222222222222e-05, + "loss": 1.8822, + "step": 25150 + }, + { + "epoch": 4.47, + "learning_rate": 3.509925925925926e-05, + "loss": 1.8245, + "step": 25155 + }, + { + "epoch": 4.47, + "learning_rate": 3.50962962962963e-05, + "loss": 1.7957, + "step": 25160 + }, + { + "epoch": 4.47, + "learning_rate": 3.509333333333333e-05, + "loss": 1.7799, + "step": 25165 + }, + { + "epoch": 4.47, + "learning_rate": 3.509037037037037e-05, + "loss": 1.9367, + "step": 25170 + }, + { + "epoch": 4.48, + "learning_rate": 3.508740740740741e-05, + "loss": 1.8052, + "step": 25175 + }, + { + "epoch": 4.48, + "learning_rate": 3.508444444444445e-05, + "loss": 1.7642, + "step": 25180 + }, + { + "epoch": 4.48, + "learning_rate": 3.508148148148148e-05, + "loss": 1.8024, + "step": 25185 + }, + { + "epoch": 4.48, + "learning_rate": 3.507851851851852e-05, + "loss": 1.8394, + "step": 25190 + }, + { + "epoch": 4.48, + "learning_rate": 3.507555555555556e-05, + "loss": 1.9051, + "step": 25195 + }, + { + "epoch": 4.48, + "learning_rate": 3.5072592592592597e-05, + "loss": 1.7754, + "step": 25200 + }, + { + "epoch": 4.48, + "learning_rate": 3.506962962962963e-05, + "loss": 1.8441, + "step": 25205 + }, + { + "epoch": 4.48, + "learning_rate": 3.506666666666667e-05, + "loss": 1.8541, + "step": 25210 + }, + { + "epoch": 4.48, + "learning_rate": 3.5063703703703706e-05, + "loss": 1.7804, + "step": 25215 + }, + { + "epoch": 4.48, + "learning_rate": 3.5060740740740745e-05, + "loss": 1.9259, + "step": 25220 + }, + { + "epoch": 4.48, + "learning_rate": 3.505777777777778e-05, + "loss": 1.9042, + "step": 25225 + }, + { + "epoch": 4.49, + "learning_rate": 3.5054814814814816e-05, + "loss": 1.7943, + "step": 25230 + }, + { + "epoch": 4.49, + "learning_rate": 3.5051851851851855e-05, + "loss": 1.7717, + "step": 25235 + }, + { + "epoch": 4.49, + "learning_rate": 3.5048888888888894e-05, + "loss": 1.7454, + "step": 25240 + }, + { + "epoch": 4.49, + "learning_rate": 3.5045925925925926e-05, + "loss": 1.8993, + "step": 25245 + }, + { + "epoch": 4.49, + "learning_rate": 3.5042962962962965e-05, + "loss": 1.8667, + "step": 25250 + }, + { + "epoch": 4.49, + "learning_rate": 3.504e-05, + "loss": 1.7998, + "step": 25255 + }, + { + "epoch": 4.49, + "learning_rate": 3.503703703703704e-05, + "loss": 1.9124, + "step": 25260 + }, + { + "epoch": 4.49, + "learning_rate": 3.5034074074074074e-05, + "loss": 1.9739, + "step": 25265 + }, + { + "epoch": 4.49, + "learning_rate": 3.503111111111111e-05, + "loss": 1.8649, + "step": 25270 + }, + { + "epoch": 4.49, + "learning_rate": 3.502814814814815e-05, + "loss": 1.9104, + "step": 25275 + }, + { + "epoch": 4.49, + "learning_rate": 3.502518518518519e-05, + "loss": 1.899, + "step": 25280 + }, + { + "epoch": 4.5, + "learning_rate": 3.502222222222222e-05, + "loss": 1.7956, + "step": 25285 + }, + { + "epoch": 4.5, + "learning_rate": 3.501925925925926e-05, + "loss": 1.8665, + "step": 25290 + }, + { + "epoch": 4.5, + "learning_rate": 3.50162962962963e-05, + "loss": 1.8491, + "step": 25295 + }, + { + "epoch": 4.5, + "learning_rate": 3.501333333333334e-05, + "loss": 1.8148, + "step": 25300 + }, + { + "epoch": 4.5, + "learning_rate": 3.501037037037037e-05, + "loss": 1.7376, + "step": 25305 + }, + { + "epoch": 4.5, + "learning_rate": 3.500740740740741e-05, + "loss": 1.876, + "step": 25310 + }, + { + "epoch": 4.5, + "learning_rate": 3.500444444444444e-05, + "loss": 1.6976, + "step": 25315 + }, + { + "epoch": 4.5, + "learning_rate": 3.500148148148149e-05, + "loss": 1.8688, + "step": 25320 + }, + { + "epoch": 4.5, + "learning_rate": 3.499851851851852e-05, + "loss": 1.9759, + "step": 25325 + }, + { + "epoch": 4.5, + "learning_rate": 3.499555555555556e-05, + "loss": 1.7414, + "step": 25330 + }, + { + "epoch": 4.5, + "learning_rate": 3.499259259259259e-05, + "loss": 1.8782, + "step": 25335 + }, + { + "epoch": 4.5, + "learning_rate": 3.4989629629629636e-05, + "loss": 1.8686, + "step": 25340 + }, + { + "epoch": 4.51, + "learning_rate": 3.498666666666667e-05, + "loss": 1.8583, + "step": 25345 + }, + { + "epoch": 4.51, + "learning_rate": 3.498370370370371e-05, + "loss": 1.9514, + "step": 25350 + }, + { + "epoch": 4.51, + "learning_rate": 3.498074074074074e-05, + "loss": 1.8586, + "step": 25355 + }, + { + "epoch": 4.51, + "learning_rate": 3.4977777777777785e-05, + "loss": 1.7982, + "step": 25360 + }, + { + "epoch": 4.51, + "learning_rate": 3.497481481481482e-05, + "loss": 1.9058, + "step": 25365 + }, + { + "epoch": 4.51, + "learning_rate": 3.4971851851851856e-05, + "loss": 1.8518, + "step": 25370 + }, + { + "epoch": 4.51, + "learning_rate": 3.496888888888889e-05, + "loss": 1.9264, + "step": 25375 + }, + { + "epoch": 4.51, + "learning_rate": 3.4965925925925926e-05, + "loss": 1.9888, + "step": 25380 + }, + { + "epoch": 4.51, + "learning_rate": 3.4962962962962965e-05, + "loss": 1.9091, + "step": 25385 + }, + { + "epoch": 4.51, + "learning_rate": 3.4960000000000004e-05, + "loss": 1.8697, + "step": 25390 + }, + { + "epoch": 4.51, + "learning_rate": 3.4957037037037036e-05, + "loss": 1.9636, + "step": 25395 + }, + { + "epoch": 4.52, + "learning_rate": 3.4954074074074075e-05, + "loss": 1.7825, + "step": 25400 + }, + { + "epoch": 4.52, + "learning_rate": 3.4951111111111114e-05, + "loss": 1.7781, + "step": 25405 + }, + { + "epoch": 4.52, + "learning_rate": 3.494814814814815e-05, + "loss": 1.8524, + "step": 25410 + }, + { + "epoch": 4.52, + "learning_rate": 3.4945185185185185e-05, + "loss": 1.8798, + "step": 25415 + }, + { + "epoch": 4.52, + "learning_rate": 3.4942222222222223e-05, + "loss": 1.7901, + "step": 25420 + }, + { + "epoch": 4.52, + "learning_rate": 3.493925925925926e-05, + "loss": 1.8895, + "step": 25425 + }, + { + "epoch": 4.52, + "learning_rate": 3.49362962962963e-05, + "loss": 1.8999, + "step": 25430 + }, + { + "epoch": 4.52, + "learning_rate": 3.493333333333333e-05, + "loss": 1.7997, + "step": 25435 + }, + { + "epoch": 4.52, + "learning_rate": 3.493037037037037e-05, + "loss": 1.9247, + "step": 25440 + }, + { + "epoch": 4.52, + "learning_rate": 3.492740740740741e-05, + "loss": 1.7978, + "step": 25445 + }, + { + "epoch": 4.52, + "learning_rate": 3.492444444444445e-05, + "loss": 1.9353, + "step": 25450 + }, + { + "epoch": 4.53, + "learning_rate": 3.492148148148148e-05, + "loss": 1.9127, + "step": 25455 + }, + { + "epoch": 4.53, + "learning_rate": 3.491851851851852e-05, + "loss": 1.9625, + "step": 25460 + }, + { + "epoch": 4.53, + "learning_rate": 3.491555555555556e-05, + "loss": 1.8562, + "step": 25465 + }, + { + "epoch": 4.53, + "learning_rate": 3.49125925925926e-05, + "loss": 1.7948, + "step": 25470 + }, + { + "epoch": 4.53, + "learning_rate": 3.490962962962963e-05, + "loss": 1.9687, + "step": 25475 + }, + { + "epoch": 4.53, + "learning_rate": 3.490666666666667e-05, + "loss": 1.883, + "step": 25480 + }, + { + "epoch": 4.53, + "learning_rate": 3.490370370370371e-05, + "loss": 1.8448, + "step": 25485 + }, + { + "epoch": 4.53, + "learning_rate": 3.490074074074075e-05, + "loss": 1.8739, + "step": 25490 + }, + { + "epoch": 4.53, + "learning_rate": 3.489777777777778e-05, + "loss": 1.8471, + "step": 25495 + }, + { + "epoch": 4.53, + "learning_rate": 3.489481481481482e-05, + "loss": 1.94, + "step": 25500 + }, + { + "epoch": 4.53, + "learning_rate": 3.4891851851851856e-05, + "loss": 1.8707, + "step": 25505 + }, + { + "epoch": 4.54, + "learning_rate": 3.4888888888888895e-05, + "loss": 1.9176, + "step": 25510 + }, + { + "epoch": 4.54, + "learning_rate": 3.488592592592593e-05, + "loss": 1.9169, + "step": 25515 + }, + { + "epoch": 4.54, + "learning_rate": 3.4882962962962966e-05, + "loss": 1.8741, + "step": 25520 + }, + { + "epoch": 4.54, + "learning_rate": 3.4880000000000005e-05, + "loss": 1.7539, + "step": 25525 + }, + { + "epoch": 4.54, + "learning_rate": 3.4877037037037044e-05, + "loss": 1.9299, + "step": 25530 + }, + { + "epoch": 4.54, + "learning_rate": 3.4874074074074076e-05, + "loss": 1.8688, + "step": 25535 + }, + { + "epoch": 4.54, + "learning_rate": 3.4871111111111115e-05, + "loss": 1.8637, + "step": 25540 + }, + { + "epoch": 4.54, + "learning_rate": 3.4868148148148147e-05, + "loss": 1.8973, + "step": 25545 + }, + { + "epoch": 4.54, + "learning_rate": 3.486518518518519e-05, + "loss": 1.9224, + "step": 25550 + }, + { + "epoch": 4.54, + "learning_rate": 3.4862222222222224e-05, + "loss": 1.903, + "step": 25555 + }, + { + "epoch": 4.54, + "learning_rate": 3.4859259259259256e-05, + "loss": 1.7937, + "step": 25560 + }, + { + "epoch": 4.54, + "learning_rate": 3.4856296296296295e-05, + "loss": 1.7909, + "step": 25565 + }, + { + "epoch": 4.55, + "learning_rate": 3.4853333333333334e-05, + "loss": 1.8075, + "step": 25570 + }, + { + "epoch": 4.55, + "learning_rate": 3.485037037037037e-05, + "loss": 1.9302, + "step": 25575 + }, + { + "epoch": 4.55, + "learning_rate": 3.4847407407407405e-05, + "loss": 1.8582, + "step": 25580 + }, + { + "epoch": 4.55, + "learning_rate": 3.4844444444444444e-05, + "loss": 1.8397, + "step": 25585 + }, + { + "epoch": 4.55, + "learning_rate": 3.484148148148148e-05, + "loss": 1.9057, + "step": 25590 + }, + { + "epoch": 4.55, + "learning_rate": 3.483851851851852e-05, + "loss": 1.8557, + "step": 25595 + }, + { + "epoch": 4.55, + "learning_rate": 3.483555555555555e-05, + "loss": 1.9313, + "step": 25600 + }, + { + "epoch": 4.55, + "learning_rate": 3.483259259259259e-05, + "loss": 1.9711, + "step": 25605 + }, + { + "epoch": 4.55, + "learning_rate": 3.482962962962963e-05, + "loss": 1.9934, + "step": 25610 + }, + { + "epoch": 4.55, + "learning_rate": 3.482666666666667e-05, + "loss": 1.8104, + "step": 25615 + }, + { + "epoch": 4.55, + "learning_rate": 3.48237037037037e-05, + "loss": 1.7785, + "step": 25620 + }, + { + "epoch": 4.56, + "learning_rate": 3.482074074074074e-05, + "loss": 1.9117, + "step": 25625 + }, + { + "epoch": 4.56, + "learning_rate": 3.481777777777778e-05, + "loss": 1.7903, + "step": 25630 + }, + { + "epoch": 4.56, + "learning_rate": 3.481481481481482e-05, + "loss": 1.8321, + "step": 25635 + }, + { + "epoch": 4.56, + "learning_rate": 3.481185185185185e-05, + "loss": 1.9943, + "step": 25640 + }, + { + "epoch": 4.56, + "learning_rate": 3.480888888888889e-05, + "loss": 1.8763, + "step": 25645 + }, + { + "epoch": 4.56, + "learning_rate": 3.480592592592593e-05, + "loss": 1.7984, + "step": 25650 + }, + { + "epoch": 4.56, + "learning_rate": 3.480296296296297e-05, + "loss": 1.903, + "step": 25655 + }, + { + "epoch": 4.56, + "learning_rate": 3.48e-05, + "loss": 1.9589, + "step": 25660 + }, + { + "epoch": 4.56, + "learning_rate": 3.479703703703704e-05, + "loss": 1.9972, + "step": 25665 + }, + { + "epoch": 4.56, + "learning_rate": 3.4794074074074076e-05, + "loss": 1.8235, + "step": 25670 + }, + { + "epoch": 4.56, + "learning_rate": 3.4791111111111115e-05, + "loss": 2.0638, + "step": 25675 + }, + { + "epoch": 4.57, + "learning_rate": 3.4788740740740745e-05, + "loss": 1.8662, + "step": 25680 + }, + { + "epoch": 4.57, + "learning_rate": 3.478577777777778e-05, + "loss": 1.8202, + "step": 25685 + }, + { + "epoch": 4.57, + "learning_rate": 3.4782814814814816e-05, + "loss": 1.7592, + "step": 25690 + }, + { + "epoch": 4.57, + "learning_rate": 3.4779851851851855e-05, + "loss": 1.8934, + "step": 25695 + }, + { + "epoch": 4.57, + "learning_rate": 3.4776888888888894e-05, + "loss": 1.8565, + "step": 25700 + }, + { + "epoch": 4.57, + "learning_rate": 3.4773925925925926e-05, + "loss": 1.6515, + "step": 25705 + }, + { + "epoch": 4.57, + "learning_rate": 3.4770962962962964e-05, + "loss": 1.8836, + "step": 25710 + }, + { + "epoch": 4.57, + "learning_rate": 3.4768e-05, + "loss": 1.8516, + "step": 25715 + }, + { + "epoch": 4.57, + "learning_rate": 3.476503703703704e-05, + "loss": 1.8772, + "step": 25720 + }, + { + "epoch": 4.57, + "learning_rate": 3.4762074074074074e-05, + "loss": 1.8507, + "step": 25725 + }, + { + "epoch": 4.57, + "learning_rate": 3.475911111111111e-05, + "loss": 1.7597, + "step": 25730 + }, + { + "epoch": 4.58, + "learning_rate": 3.475614814814815e-05, + "loss": 1.9444, + "step": 25735 + }, + { + "epoch": 4.58, + "learning_rate": 3.475318518518519e-05, + "loss": 1.7096, + "step": 25740 + }, + { + "epoch": 4.58, + "learning_rate": 3.475022222222222e-05, + "loss": 1.8264, + "step": 25745 + }, + { + "epoch": 4.58, + "learning_rate": 3.474725925925926e-05, + "loss": 1.8842, + "step": 25750 + }, + { + "epoch": 4.58, + "learning_rate": 3.47442962962963e-05, + "loss": 1.983, + "step": 25755 + }, + { + "epoch": 4.58, + "learning_rate": 3.474133333333334e-05, + "loss": 1.7881, + "step": 25760 + }, + { + "epoch": 4.58, + "learning_rate": 3.473837037037037e-05, + "loss": 1.8182, + "step": 25765 + }, + { + "epoch": 4.58, + "learning_rate": 3.473540740740741e-05, + "loss": 1.9144, + "step": 25770 + }, + { + "epoch": 4.58, + "learning_rate": 3.473244444444444e-05, + "loss": 1.8981, + "step": 25775 + }, + { + "epoch": 4.58, + "learning_rate": 3.472948148148149e-05, + "loss": 2.0234, + "step": 25780 + }, + { + "epoch": 4.58, + "learning_rate": 3.472651851851852e-05, + "loss": 1.8354, + "step": 25785 + }, + { + "epoch": 4.58, + "learning_rate": 3.472355555555556e-05, + "loss": 2.0328, + "step": 25790 + }, + { + "epoch": 4.59, + "learning_rate": 3.472059259259259e-05, + "loss": 1.9785, + "step": 25795 + }, + { + "epoch": 4.59, + "learning_rate": 3.4717629629629636e-05, + "loss": 1.857, + "step": 25800 + }, + { + "epoch": 4.59, + "learning_rate": 3.471466666666667e-05, + "loss": 2.021, + "step": 25805 + }, + { + "epoch": 4.59, + "learning_rate": 3.471170370370371e-05, + "loss": 1.9032, + "step": 25810 + }, + { + "epoch": 4.59, + "learning_rate": 3.470874074074074e-05, + "loss": 1.9665, + "step": 25815 + }, + { + "epoch": 4.59, + "learning_rate": 3.4705777777777785e-05, + "loss": 1.8891, + "step": 25820 + }, + { + "epoch": 4.59, + "learning_rate": 3.470281481481482e-05, + "loss": 1.8862, + "step": 25825 + }, + { + "epoch": 4.59, + "learning_rate": 3.4699851851851855e-05, + "loss": 1.907, + "step": 25830 + }, + { + "epoch": 4.59, + "learning_rate": 3.469688888888889e-05, + "loss": 1.7811, + "step": 25835 + }, + { + "epoch": 4.59, + "learning_rate": 3.4693925925925926e-05, + "loss": 1.8134, + "step": 25840 + }, + { + "epoch": 4.59, + "learning_rate": 3.4690962962962965e-05, + "loss": 1.803, + "step": 25845 + }, + { + "epoch": 4.6, + "learning_rate": 3.4688000000000004e-05, + "loss": 1.8969, + "step": 25850 + }, + { + "epoch": 4.6, + "learning_rate": 3.4685037037037036e-05, + "loss": 1.8, + "step": 25855 + }, + { + "epoch": 4.6, + "learning_rate": 3.4682074074074075e-05, + "loss": 1.9012, + "step": 25860 + }, + { + "epoch": 4.6, + "learning_rate": 3.4679111111111114e-05, + "loss": 1.843, + "step": 25865 + }, + { + "epoch": 4.6, + "learning_rate": 3.467614814814815e-05, + "loss": 1.8891, + "step": 25870 + }, + { + "epoch": 4.6, + "learning_rate": 3.4673185185185185e-05, + "loss": 1.8011, + "step": 25875 + }, + { + "epoch": 4.6, + "learning_rate": 3.467022222222222e-05, + "loss": 1.7436, + "step": 25880 + }, + { + "epoch": 4.6, + "learning_rate": 3.466725925925926e-05, + "loss": 1.9957, + "step": 25885 + }, + { + "epoch": 4.6, + "learning_rate": 3.46642962962963e-05, + "loss": 1.8581, + "step": 25890 + }, + { + "epoch": 4.6, + "learning_rate": 3.466133333333333e-05, + "loss": 1.8335, + "step": 25895 + }, + { + "epoch": 4.6, + "learning_rate": 3.465837037037037e-05, + "loss": 1.9573, + "step": 25900 + }, + { + "epoch": 4.61, + "learning_rate": 3.465540740740741e-05, + "loss": 1.8825, + "step": 25905 + }, + { + "epoch": 4.61, + "learning_rate": 3.465244444444445e-05, + "loss": 1.9169, + "step": 25910 + }, + { + "epoch": 4.61, + "learning_rate": 3.464948148148148e-05, + "loss": 1.7855, + "step": 25915 + }, + { + "epoch": 4.61, + "learning_rate": 3.464651851851852e-05, + "loss": 1.8015, + "step": 25920 + }, + { + "epoch": 4.61, + "learning_rate": 3.464355555555556e-05, + "loss": 1.9257, + "step": 25925 + }, + { + "epoch": 4.61, + "learning_rate": 3.46405925925926e-05, + "loss": 1.8334, + "step": 25930 + }, + { + "epoch": 4.61, + "learning_rate": 3.463762962962963e-05, + "loss": 1.8031, + "step": 25935 + }, + { + "epoch": 4.61, + "learning_rate": 3.463466666666667e-05, + "loss": 1.8825, + "step": 25940 + }, + { + "epoch": 4.61, + "learning_rate": 3.463170370370371e-05, + "loss": 1.7468, + "step": 25945 + }, + { + "epoch": 4.61, + "learning_rate": 3.4628740740740747e-05, + "loss": 1.8302, + "step": 25950 + }, + { + "epoch": 4.61, + "learning_rate": 3.462577777777778e-05, + "loss": 1.6789, + "step": 25955 + }, + { + "epoch": 4.62, + "learning_rate": 3.462281481481482e-05, + "loss": 1.7938, + "step": 25960 + }, + { + "epoch": 4.62, + "learning_rate": 3.4619851851851856e-05, + "loss": 1.9825, + "step": 25965 + }, + { + "epoch": 4.62, + "learning_rate": 3.4616888888888895e-05, + "loss": 2.0325, + "step": 25970 + }, + { + "epoch": 4.62, + "learning_rate": 3.461392592592593e-05, + "loss": 1.8721, + "step": 25975 + }, + { + "epoch": 4.62, + "learning_rate": 3.4610962962962966e-05, + "loss": 1.856, + "step": 25980 + }, + { + "epoch": 4.62, + "learning_rate": 3.4608000000000005e-05, + "loss": 1.8112, + "step": 25985 + }, + { + "epoch": 4.62, + "learning_rate": 3.4605037037037044e-05, + "loss": 1.8795, + "step": 25990 + }, + { + "epoch": 4.62, + "learning_rate": 3.4602074074074076e-05, + "loss": 1.9922, + "step": 25995 + }, + { + "epoch": 4.62, + "learning_rate": 3.4599111111111114e-05, + "loss": 1.9082, + "step": 26000 + }, + { + "epoch": 4.62, + "learning_rate": 3.4596148148148146e-05, + "loss": 1.7745, + "step": 26005 + }, + { + "epoch": 4.62, + "learning_rate": 3.459318518518519e-05, + "loss": 1.7269, + "step": 26010 + }, + { + "epoch": 4.62, + "learning_rate": 3.4590222222222224e-05, + "loss": 1.8373, + "step": 26015 + }, + { + "epoch": 4.63, + "learning_rate": 3.458725925925926e-05, + "loss": 1.9266, + "step": 26020 + }, + { + "epoch": 4.63, + "learning_rate": 3.4584296296296295e-05, + "loss": 1.8215, + "step": 26025 + }, + { + "epoch": 4.63, + "learning_rate": 3.4581333333333334e-05, + "loss": 1.8742, + "step": 26030 + }, + { + "epoch": 4.63, + "learning_rate": 3.457837037037037e-05, + "loss": 1.8247, + "step": 26035 + }, + { + "epoch": 4.63, + "learning_rate": 3.4575407407407405e-05, + "loss": 1.9136, + "step": 26040 + }, + { + "epoch": 4.63, + "learning_rate": 3.4572444444444444e-05, + "loss": 1.8285, + "step": 26045 + }, + { + "epoch": 4.63, + "learning_rate": 3.456948148148148e-05, + "loss": 1.8065, + "step": 26050 + }, + { + "epoch": 4.63, + "learning_rate": 3.456651851851852e-05, + "loss": 1.8805, + "step": 26055 + }, + { + "epoch": 4.63, + "learning_rate": 3.456355555555555e-05, + "loss": 1.8095, + "step": 26060 + }, + { + "epoch": 4.63, + "learning_rate": 3.456059259259259e-05, + "loss": 1.7353, + "step": 26065 + }, + { + "epoch": 4.63, + "learning_rate": 3.455762962962963e-05, + "loss": 1.9693, + "step": 26070 + }, + { + "epoch": 4.64, + "learning_rate": 3.455466666666667e-05, + "loss": 1.8052, + "step": 26075 + }, + { + "epoch": 4.64, + "learning_rate": 3.45517037037037e-05, + "loss": 1.8169, + "step": 26080 + }, + { + "epoch": 4.64, + "learning_rate": 3.454874074074074e-05, + "loss": 1.9702, + "step": 26085 + }, + { + "epoch": 4.64, + "learning_rate": 3.454577777777778e-05, + "loss": 1.7609, + "step": 26090 + }, + { + "epoch": 4.64, + "learning_rate": 3.454281481481482e-05, + "loss": 1.8585, + "step": 26095 + }, + { + "epoch": 4.64, + "learning_rate": 3.453985185185185e-05, + "loss": 2.0404, + "step": 26100 + }, + { + "epoch": 4.64, + "learning_rate": 3.453688888888889e-05, + "loss": 1.8763, + "step": 26105 + }, + { + "epoch": 4.64, + "learning_rate": 3.453392592592593e-05, + "loss": 1.9617, + "step": 26110 + }, + { + "epoch": 4.64, + "learning_rate": 3.453096296296297e-05, + "loss": 1.9698, + "step": 26115 + }, + { + "epoch": 4.64, + "learning_rate": 3.4528e-05, + "loss": 1.8195, + "step": 26120 + }, + { + "epoch": 4.64, + "learning_rate": 3.452503703703704e-05, + "loss": 1.9747, + "step": 26125 + }, + { + "epoch": 4.65, + "learning_rate": 3.4522074074074076e-05, + "loss": 1.8068, + "step": 26130 + }, + { + "epoch": 4.65, + "learning_rate": 3.4519111111111115e-05, + "loss": 1.921, + "step": 26135 + }, + { + "epoch": 4.65, + "learning_rate": 3.451614814814815e-05, + "loss": 1.7984, + "step": 26140 + }, + { + "epoch": 4.65, + "learning_rate": 3.4513185185185186e-05, + "loss": 1.7852, + "step": 26145 + }, + { + "epoch": 4.65, + "learning_rate": 3.4510222222222225e-05, + "loss": 1.8887, + "step": 26150 + }, + { + "epoch": 4.65, + "learning_rate": 3.4507259259259264e-05, + "loss": 1.9726, + "step": 26155 + }, + { + "epoch": 4.65, + "learning_rate": 3.4504296296296296e-05, + "loss": 1.823, + "step": 26160 + }, + { + "epoch": 4.65, + "learning_rate": 3.4501333333333335e-05, + "loss": 1.8372, + "step": 26165 + }, + { + "epoch": 4.65, + "learning_rate": 3.449837037037037e-05, + "loss": 1.8759, + "step": 26170 + }, + { + "epoch": 4.65, + "learning_rate": 3.449540740740741e-05, + "loss": 1.7618, + "step": 26175 + }, + { + "epoch": 4.65, + "learning_rate": 3.4492444444444444e-05, + "loss": 1.7007, + "step": 26180 + }, + { + "epoch": 4.66, + "learning_rate": 3.448948148148148e-05, + "loss": 1.9698, + "step": 26185 + }, + { + "epoch": 4.66, + "learning_rate": 3.4486518518518515e-05, + "loss": 1.9448, + "step": 26190 + }, + { + "epoch": 4.66, + "learning_rate": 3.448355555555556e-05, + "loss": 1.9044, + "step": 26195 + }, + { + "epoch": 4.66, + "learning_rate": 3.448059259259259e-05, + "loss": 1.8554, + "step": 26200 + }, + { + "epoch": 4.66, + "learning_rate": 3.447762962962963e-05, + "loss": 1.8383, + "step": 26205 + }, + { + "epoch": 4.66, + "learning_rate": 3.4474666666666664e-05, + "loss": 1.795, + "step": 26210 + }, + { + "epoch": 4.66, + "learning_rate": 3.447170370370371e-05, + "loss": 1.9453, + "step": 26215 + }, + { + "epoch": 4.66, + "learning_rate": 3.446874074074074e-05, + "loss": 1.8724, + "step": 26220 + }, + { + "epoch": 4.66, + "learning_rate": 3.446577777777778e-05, + "loss": 2.0048, + "step": 26225 + }, + { + "epoch": 4.66, + "learning_rate": 3.446281481481481e-05, + "loss": 1.8792, + "step": 26230 + }, + { + "epoch": 4.66, + "learning_rate": 3.445985185185185e-05, + "loss": 1.8202, + "step": 26235 + }, + { + "epoch": 4.66, + "learning_rate": 3.445688888888889e-05, + "loss": 1.991, + "step": 26240 + }, + { + "epoch": 4.67, + "learning_rate": 3.445392592592593e-05, + "loss": 1.8573, + "step": 26245 + }, + { + "epoch": 4.67, + "learning_rate": 3.445096296296296e-05, + "loss": 1.8389, + "step": 26250 + }, + { + "epoch": 4.67, + "learning_rate": 3.4448e-05, + "loss": 1.859, + "step": 26255 + }, + { + "epoch": 4.67, + "learning_rate": 3.444503703703704e-05, + "loss": 1.88, + "step": 26260 + }, + { + "epoch": 4.67, + "learning_rate": 3.444207407407408e-05, + "loss": 1.8078, + "step": 26265 + }, + { + "epoch": 4.67, + "learning_rate": 3.443911111111111e-05, + "loss": 1.92, + "step": 26270 + }, + { + "epoch": 4.67, + "learning_rate": 3.443614814814815e-05, + "loss": 1.9742, + "step": 26275 + }, + { + "epoch": 4.67, + "learning_rate": 3.443318518518519e-05, + "loss": 1.977, + "step": 26280 + }, + { + "epoch": 4.67, + "learning_rate": 3.4430222222222226e-05, + "loss": 1.8195, + "step": 26285 + }, + { + "epoch": 4.67, + "learning_rate": 3.442725925925926e-05, + "loss": 2.011, + "step": 26290 + }, + { + "epoch": 4.67, + "learning_rate": 3.4424296296296297e-05, + "loss": 1.8426, + "step": 26295 + }, + { + "epoch": 4.68, + "learning_rate": 3.4421333333333335e-05, + "loss": 1.7877, + "step": 26300 + }, + { + "epoch": 4.68, + "learning_rate": 3.4418370370370374e-05, + "loss": 1.8484, + "step": 26305 + }, + { + "epoch": 4.68, + "learning_rate": 3.4415407407407406e-05, + "loss": 1.8728, + "step": 26310 + }, + { + "epoch": 4.68, + "learning_rate": 3.4412444444444445e-05, + "loss": 1.9012, + "step": 26315 + }, + { + "epoch": 4.68, + "learning_rate": 3.4409481481481484e-05, + "loss": 1.8064, + "step": 26320 + }, + { + "epoch": 4.68, + "learning_rate": 3.440651851851852e-05, + "loss": 1.7642, + "step": 26325 + }, + { + "epoch": 4.68, + "learning_rate": 3.4403555555555555e-05, + "loss": 1.8113, + "step": 26330 + }, + { + "epoch": 4.68, + "learning_rate": 3.4400592592592594e-05, + "loss": 1.7067, + "step": 26335 + }, + { + "epoch": 4.68, + "learning_rate": 3.439762962962963e-05, + "loss": 1.9396, + "step": 26340 + }, + { + "epoch": 4.68, + "learning_rate": 3.439466666666667e-05, + "loss": 1.8967, + "step": 26345 + }, + { + "epoch": 4.68, + "learning_rate": 3.43917037037037e-05, + "loss": 1.8276, + "step": 26350 + }, + { + "epoch": 4.69, + "learning_rate": 3.438874074074074e-05, + "loss": 1.7048, + "step": 26355 + }, + { + "epoch": 4.69, + "learning_rate": 3.438577777777778e-05, + "loss": 1.7739, + "step": 26360 + }, + { + "epoch": 4.69, + "learning_rate": 3.438281481481482e-05, + "loss": 1.9196, + "step": 26365 + }, + { + "epoch": 4.69, + "learning_rate": 3.437985185185185e-05, + "loss": 1.9543, + "step": 26370 + }, + { + "epoch": 4.69, + "learning_rate": 3.437688888888889e-05, + "loss": 1.7592, + "step": 26375 + }, + { + "epoch": 4.69, + "learning_rate": 3.437392592592593e-05, + "loss": 1.8271, + "step": 26380 + }, + { + "epoch": 4.69, + "learning_rate": 3.437096296296297e-05, + "loss": 1.7922, + "step": 26385 + }, + { + "epoch": 4.69, + "learning_rate": 3.4368e-05, + "loss": 1.867, + "step": 26390 + }, + { + "epoch": 4.69, + "learning_rate": 3.436503703703704e-05, + "loss": 1.8377, + "step": 26395 + }, + { + "epoch": 4.69, + "learning_rate": 3.436207407407407e-05, + "loss": 1.9104, + "step": 26400 + }, + { + "epoch": 4.69, + "learning_rate": 3.435911111111112e-05, + "loss": 1.8955, + "step": 26405 + }, + { + "epoch": 4.7, + "learning_rate": 3.435614814814815e-05, + "loss": 1.7656, + "step": 26410 + }, + { + "epoch": 4.7, + "learning_rate": 3.435318518518519e-05, + "loss": 1.8218, + "step": 26415 + }, + { + "epoch": 4.7, + "learning_rate": 3.435022222222222e-05, + "loss": 1.7283, + "step": 26420 + }, + { + "epoch": 4.7, + "learning_rate": 3.4347259259259265e-05, + "loss": 1.855, + "step": 26425 + }, + { + "epoch": 4.7, + "learning_rate": 3.43442962962963e-05, + "loss": 1.8763, + "step": 26430 + }, + { + "epoch": 4.7, + "learning_rate": 3.4341333333333336e-05, + "loss": 1.8621, + "step": 26435 + }, + { + "epoch": 4.7, + "learning_rate": 3.433837037037037e-05, + "loss": 1.7818, + "step": 26440 + }, + { + "epoch": 4.7, + "learning_rate": 3.4335407407407414e-05, + "loss": 1.8186, + "step": 26445 + }, + { + "epoch": 4.7, + "learning_rate": 3.4332444444444446e-05, + "loss": 1.838, + "step": 26450 + }, + { + "epoch": 4.7, + "learning_rate": 3.4329481481481485e-05, + "loss": 1.8215, + "step": 26455 + }, + { + "epoch": 4.7, + "learning_rate": 3.432651851851852e-05, + "loss": 1.7418, + "step": 26460 + }, + { + "epoch": 4.7, + "learning_rate": 3.4323555555555556e-05, + "loss": 1.8272, + "step": 26465 + }, + { + "epoch": 4.71, + "learning_rate": 3.4320592592592594e-05, + "loss": 1.8945, + "step": 26470 + }, + { + "epoch": 4.71, + "learning_rate": 3.431762962962963e-05, + "loss": 1.7323, + "step": 26475 + }, + { + "epoch": 4.71, + "learning_rate": 3.4314666666666665e-05, + "loss": 1.7787, + "step": 26480 + }, + { + "epoch": 4.71, + "learning_rate": 3.4311703703703704e-05, + "loss": 1.8821, + "step": 26485 + }, + { + "epoch": 4.71, + "learning_rate": 3.430874074074074e-05, + "loss": 1.9165, + "step": 26490 + }, + { + "epoch": 4.71, + "learning_rate": 3.430577777777778e-05, + "loss": 1.8994, + "step": 26495 + }, + { + "epoch": 4.71, + "learning_rate": 3.4302814814814814e-05, + "loss": 1.8609, + "step": 26500 + }, + { + "epoch": 4.71, + "learning_rate": 3.429985185185185e-05, + "loss": 1.8666, + "step": 26505 + }, + { + "epoch": 4.71, + "learning_rate": 3.429688888888889e-05, + "loss": 1.9216, + "step": 26510 + }, + { + "epoch": 4.71, + "learning_rate": 3.429392592592593e-05, + "loss": 1.842, + "step": 26515 + }, + { + "epoch": 4.71, + "learning_rate": 3.429096296296296e-05, + "loss": 1.8254, + "step": 26520 + }, + { + "epoch": 4.72, + "learning_rate": 3.4288e-05, + "loss": 1.7813, + "step": 26525 + }, + { + "epoch": 4.72, + "learning_rate": 3.428503703703704e-05, + "loss": 1.8412, + "step": 26530 + }, + { + "epoch": 4.72, + "learning_rate": 3.428207407407408e-05, + "loss": 1.8566, + "step": 26535 + }, + { + "epoch": 4.72, + "learning_rate": 3.427911111111111e-05, + "loss": 1.8186, + "step": 26540 + }, + { + "epoch": 4.72, + "learning_rate": 3.427614814814815e-05, + "loss": 1.8927, + "step": 26545 + }, + { + "epoch": 4.72, + "learning_rate": 3.427318518518519e-05, + "loss": 1.8554, + "step": 26550 + }, + { + "epoch": 4.72, + "learning_rate": 3.427022222222223e-05, + "loss": 1.9565, + "step": 26555 + }, + { + "epoch": 4.72, + "learning_rate": 3.426725925925926e-05, + "loss": 1.844, + "step": 26560 + }, + { + "epoch": 4.72, + "learning_rate": 3.42642962962963e-05, + "loss": 1.9028, + "step": 26565 + }, + { + "epoch": 4.72, + "learning_rate": 3.426133333333334e-05, + "loss": 1.8434, + "step": 26570 + }, + { + "epoch": 4.72, + "learning_rate": 3.4258370370370376e-05, + "loss": 1.808, + "step": 26575 + }, + { + "epoch": 4.73, + "learning_rate": 3.425540740740741e-05, + "loss": 2.0082, + "step": 26580 + }, + { + "epoch": 4.73, + "learning_rate": 3.4252444444444447e-05, + "loss": 1.7569, + "step": 26585 + }, + { + "epoch": 4.73, + "learning_rate": 3.4249481481481485e-05, + "loss": 1.9194, + "step": 26590 + }, + { + "epoch": 4.73, + "learning_rate": 3.4246518518518524e-05, + "loss": 1.8615, + "step": 26595 + }, + { + "epoch": 4.73, + "learning_rate": 3.4243555555555556e-05, + "loss": 1.8587, + "step": 26600 + }, + { + "epoch": 4.73, + "learning_rate": 3.4240592592592595e-05, + "loss": 1.8431, + "step": 26605 + }, + { + "epoch": 4.73, + "learning_rate": 3.4237629629629634e-05, + "loss": 1.9205, + "step": 26610 + }, + { + "epoch": 4.73, + "learning_rate": 3.423466666666667e-05, + "loss": 1.8673, + "step": 26615 + }, + { + "epoch": 4.73, + "learning_rate": 3.4231703703703705e-05, + "loss": 1.7975, + "step": 26620 + }, + { + "epoch": 4.73, + "learning_rate": 3.4228740740740744e-05, + "loss": 1.8357, + "step": 26625 + }, + { + "epoch": 4.73, + "learning_rate": 3.4225777777777776e-05, + "loss": 2.0048, + "step": 26630 + }, + { + "epoch": 4.74, + "learning_rate": 3.422281481481482e-05, + "loss": 1.9088, + "step": 26635 + }, + { + "epoch": 4.74, + "learning_rate": 3.421985185185185e-05, + "loss": 1.8599, + "step": 26640 + }, + { + "epoch": 4.74, + "learning_rate": 3.421688888888889e-05, + "loss": 1.878, + "step": 26645 + }, + { + "epoch": 4.74, + "learning_rate": 3.4213925925925924e-05, + "loss": 1.9824, + "step": 26650 + }, + { + "epoch": 4.74, + "learning_rate": 3.421096296296297e-05, + "loss": 1.7353, + "step": 26655 + }, + { + "epoch": 4.74, + "learning_rate": 3.4208e-05, + "loss": 1.8436, + "step": 26660 + }, + { + "epoch": 4.74, + "learning_rate": 3.420503703703704e-05, + "loss": 1.8544, + "step": 26665 + }, + { + "epoch": 4.74, + "learning_rate": 3.420207407407407e-05, + "loss": 1.9418, + "step": 26670 + }, + { + "epoch": 4.74, + "learning_rate": 3.419911111111112e-05, + "loss": 1.8483, + "step": 26675 + }, + { + "epoch": 4.74, + "learning_rate": 3.419614814814815e-05, + "loss": 1.8695, + "step": 26680 + }, + { + "epoch": 4.74, + "learning_rate": 3.419318518518519e-05, + "loss": 1.8626, + "step": 26685 + }, + { + "epoch": 4.74, + "learning_rate": 3.419022222222222e-05, + "loss": 1.9012, + "step": 26690 + }, + { + "epoch": 4.75, + "learning_rate": 3.418725925925926e-05, + "loss": 2.0329, + "step": 26695 + }, + { + "epoch": 4.75, + "learning_rate": 3.41842962962963e-05, + "loss": 1.8436, + "step": 26700 + }, + { + "epoch": 4.75, + "learning_rate": 3.418133333333334e-05, + "loss": 1.8494, + "step": 26705 + }, + { + "epoch": 4.75, + "learning_rate": 3.417837037037037e-05, + "loss": 1.8044, + "step": 26710 + }, + { + "epoch": 4.75, + "learning_rate": 3.417540740740741e-05, + "loss": 1.8807, + "step": 26715 + }, + { + "epoch": 4.75, + "learning_rate": 3.417244444444445e-05, + "loss": 1.8754, + "step": 26720 + }, + { + "epoch": 4.75, + "learning_rate": 3.4169481481481486e-05, + "loss": 1.8899, + "step": 26725 + }, + { + "epoch": 4.75, + "learning_rate": 3.416651851851852e-05, + "loss": 1.7409, + "step": 26730 + }, + { + "epoch": 4.75, + "learning_rate": 3.416355555555556e-05, + "loss": 1.9378, + "step": 26735 + }, + { + "epoch": 4.75, + "learning_rate": 3.4160592592592596e-05, + "loss": 1.8474, + "step": 26740 + }, + { + "epoch": 4.75, + "learning_rate": 3.4157629629629635e-05, + "loss": 1.837, + "step": 26745 + }, + { + "epoch": 4.76, + "learning_rate": 3.415466666666667e-05, + "loss": 2.0005, + "step": 26750 + }, + { + "epoch": 4.76, + "learning_rate": 3.4151703703703706e-05, + "loss": 1.9536, + "step": 26755 + }, + { + "epoch": 4.76, + "learning_rate": 3.4148740740740744e-05, + "loss": 1.7999, + "step": 26760 + }, + { + "epoch": 4.76, + "learning_rate": 3.414577777777778e-05, + "loss": 1.9324, + "step": 26765 + }, + { + "epoch": 4.76, + "learning_rate": 3.4142814814814815e-05, + "loss": 1.8026, + "step": 26770 + }, + { + "epoch": 4.76, + "learning_rate": 3.4139851851851854e-05, + "loss": 1.7909, + "step": 26775 + }, + { + "epoch": 4.76, + "learning_rate": 3.413688888888889e-05, + "loss": 1.9094, + "step": 26780 + }, + { + "epoch": 4.76, + "learning_rate": 3.413392592592593e-05, + "loss": 1.9107, + "step": 26785 + }, + { + "epoch": 4.76, + "learning_rate": 3.4130962962962964e-05, + "loss": 1.8998, + "step": 26790 + }, + { + "epoch": 4.76, + "learning_rate": 3.4127999999999996e-05, + "loss": 1.8793, + "step": 26795 + }, + { + "epoch": 4.76, + "learning_rate": 3.412503703703704e-05, + "loss": 1.8062, + "step": 26800 + }, + { + "epoch": 4.77, + "learning_rate": 3.4122074074074073e-05, + "loss": 1.8257, + "step": 26805 + }, + { + "epoch": 4.77, + "learning_rate": 3.411911111111111e-05, + "loss": 1.9068, + "step": 26810 + }, + { + "epoch": 4.77, + "learning_rate": 3.4116148148148144e-05, + "loss": 1.833, + "step": 26815 + }, + { + "epoch": 4.77, + "learning_rate": 3.411318518518519e-05, + "loss": 1.9013, + "step": 26820 + }, + { + "epoch": 4.77, + "learning_rate": 3.411022222222222e-05, + "loss": 1.9421, + "step": 26825 + }, + { + "epoch": 4.77, + "learning_rate": 3.410725925925926e-05, + "loss": 1.7704, + "step": 26830 + }, + { + "epoch": 4.77, + "learning_rate": 3.410429629629629e-05, + "loss": 1.8683, + "step": 26835 + }, + { + "epoch": 4.77, + "learning_rate": 3.410133333333334e-05, + "loss": 2.0467, + "step": 26840 + }, + { + "epoch": 4.77, + "learning_rate": 3.409837037037037e-05, + "loss": 1.979, + "step": 26845 + }, + { + "epoch": 4.77, + "learning_rate": 3.409540740740741e-05, + "loss": 1.77, + "step": 26850 + }, + { + "epoch": 4.77, + "learning_rate": 3.409244444444444e-05, + "loss": 1.8678, + "step": 26855 + }, + { + "epoch": 4.78, + "learning_rate": 3.408948148148148e-05, + "loss": 1.8441, + "step": 26860 + }, + { + "epoch": 4.78, + "learning_rate": 3.408651851851852e-05, + "loss": 1.7521, + "step": 26865 + }, + { + "epoch": 4.78, + "learning_rate": 3.408355555555556e-05, + "loss": 1.8018, + "step": 26870 + }, + { + "epoch": 4.78, + "learning_rate": 3.408059259259259e-05, + "loss": 1.8317, + "step": 26875 + }, + { + "epoch": 4.78, + "learning_rate": 3.407762962962963e-05, + "loss": 1.8124, + "step": 26880 + }, + { + "epoch": 4.78, + "learning_rate": 3.407466666666667e-05, + "loss": 1.8533, + "step": 26885 + }, + { + "epoch": 4.78, + "learning_rate": 3.4071703703703706e-05, + "loss": 1.8812, + "step": 26890 + }, + { + "epoch": 4.78, + "learning_rate": 3.406874074074074e-05, + "loss": 1.9247, + "step": 26895 + }, + { + "epoch": 4.78, + "learning_rate": 3.406577777777778e-05, + "loss": 1.7035, + "step": 26900 + }, + { + "epoch": 4.78, + "learning_rate": 3.4062814814814816e-05, + "loss": 1.8284, + "step": 26905 + }, + { + "epoch": 4.78, + "learning_rate": 3.4059851851851855e-05, + "loss": 2.0453, + "step": 26910 + }, + { + "epoch": 4.78, + "learning_rate": 3.405688888888889e-05, + "loss": 2.0362, + "step": 26915 + }, + { + "epoch": 4.79, + "learning_rate": 3.4053925925925926e-05, + "loss": 1.9043, + "step": 26920 + }, + { + "epoch": 4.79, + "learning_rate": 3.4050962962962965e-05, + "loss": 1.833, + "step": 26925 + }, + { + "epoch": 4.79, + "learning_rate": 3.4048e-05, + "loss": 1.8868, + "step": 26930 + }, + { + "epoch": 4.79, + "learning_rate": 3.4045037037037035e-05, + "loss": 1.8316, + "step": 26935 + }, + { + "epoch": 4.79, + "learning_rate": 3.4042074074074074e-05, + "loss": 1.9401, + "step": 26940 + }, + { + "epoch": 4.79, + "learning_rate": 3.403911111111111e-05, + "loss": 1.7539, + "step": 26945 + }, + { + "epoch": 4.79, + "learning_rate": 3.403614814814815e-05, + "loss": 1.8662, + "step": 26950 + }, + { + "epoch": 4.79, + "learning_rate": 3.4033185185185184e-05, + "loss": 1.8887, + "step": 26955 + }, + { + "epoch": 4.79, + "learning_rate": 3.403022222222222e-05, + "loss": 1.9022, + "step": 26960 + }, + { + "epoch": 4.79, + "learning_rate": 3.402725925925926e-05, + "loss": 1.8331, + "step": 26965 + }, + { + "epoch": 4.79, + "learning_rate": 3.40242962962963e-05, + "loss": 1.9568, + "step": 26970 + }, + { + "epoch": 4.8, + "learning_rate": 3.402133333333333e-05, + "loss": 1.8852, + "step": 26975 + }, + { + "epoch": 4.8, + "learning_rate": 3.401837037037037e-05, + "loss": 1.7597, + "step": 26980 + }, + { + "epoch": 4.8, + "learning_rate": 3.401540740740741e-05, + "loss": 1.8942, + "step": 26985 + }, + { + "epoch": 4.8, + "learning_rate": 3.401244444444445e-05, + "loss": 1.8976, + "step": 26990 + }, + { + "epoch": 4.8, + "learning_rate": 3.400948148148148e-05, + "loss": 1.8534, + "step": 26995 + }, + { + "epoch": 4.8, + "learning_rate": 3.400651851851852e-05, + "loss": 1.8821, + "step": 27000 + }, + { + "epoch": 4.8, + "learning_rate": 3.400355555555555e-05, + "loss": 1.8543, + "step": 27005 + }, + { + "epoch": 4.8, + "learning_rate": 3.40005925925926e-05, + "loss": 1.9833, + "step": 27010 + }, + { + "epoch": 4.8, + "learning_rate": 3.399762962962963e-05, + "loss": 1.823, + "step": 27015 + }, + { + "epoch": 4.8, + "learning_rate": 3.399466666666667e-05, + "loss": 1.8767, + "step": 27020 + }, + { + "epoch": 4.8, + "learning_rate": 3.39917037037037e-05, + "loss": 1.769, + "step": 27025 + }, + { + "epoch": 4.81, + "learning_rate": 3.3988740740740746e-05, + "loss": 1.8142, + "step": 27030 + }, + { + "epoch": 4.81, + "learning_rate": 3.398577777777778e-05, + "loss": 1.9023, + "step": 27035 + }, + { + "epoch": 4.81, + "learning_rate": 3.398281481481482e-05, + "loss": 1.8469, + "step": 27040 + }, + { + "epoch": 4.81, + "learning_rate": 3.397985185185185e-05, + "loss": 1.994, + "step": 27045 + }, + { + "epoch": 4.81, + "learning_rate": 3.3976888888888894e-05, + "loss": 1.8669, + "step": 27050 + }, + { + "epoch": 4.81, + "learning_rate": 3.3973925925925926e-05, + "loss": 1.86, + "step": 27055 + }, + { + "epoch": 4.81, + "learning_rate": 3.3970962962962965e-05, + "loss": 1.7664, + "step": 27060 + }, + { + "epoch": 4.81, + "learning_rate": 3.3968e-05, + "loss": 1.9338, + "step": 27065 + }, + { + "epoch": 4.81, + "learning_rate": 3.396503703703704e-05, + "loss": 1.8918, + "step": 27070 + }, + { + "epoch": 4.81, + "learning_rate": 3.3962074074074075e-05, + "loss": 1.934, + "step": 27075 + }, + { + "epoch": 4.81, + "learning_rate": 3.3959111111111114e-05, + "loss": 1.9046, + "step": 27080 + }, + { + "epoch": 4.82, + "learning_rate": 3.3956148148148146e-05, + "loss": 1.9348, + "step": 27085 + }, + { + "epoch": 4.82, + "learning_rate": 3.3953185185185185e-05, + "loss": 1.9185, + "step": 27090 + }, + { + "epoch": 4.82, + "learning_rate": 3.3950222222222223e-05, + "loss": 1.8127, + "step": 27095 + }, + { + "epoch": 4.82, + "learning_rate": 3.394725925925926e-05, + "loss": 1.9062, + "step": 27100 + }, + { + "epoch": 4.82, + "learning_rate": 3.3944296296296294e-05, + "loss": 1.7339, + "step": 27105 + }, + { + "epoch": 4.82, + "learning_rate": 3.394133333333333e-05, + "loss": 1.7953, + "step": 27110 + }, + { + "epoch": 4.82, + "learning_rate": 3.393837037037037e-05, + "loss": 1.8504, + "step": 27115 + }, + { + "epoch": 4.82, + "learning_rate": 3.393540740740741e-05, + "loss": 1.8805, + "step": 27120 + }, + { + "epoch": 4.82, + "learning_rate": 3.393244444444444e-05, + "loss": 1.954, + "step": 27125 + }, + { + "epoch": 4.82, + "learning_rate": 3.392948148148148e-05, + "loss": 1.8441, + "step": 27130 + }, + { + "epoch": 4.82, + "learning_rate": 3.392651851851852e-05, + "loss": 1.8299, + "step": 27135 + }, + { + "epoch": 4.82, + "learning_rate": 3.392355555555556e-05, + "loss": 1.9614, + "step": 27140 + }, + { + "epoch": 4.83, + "learning_rate": 3.392059259259259e-05, + "loss": 1.9247, + "step": 27145 + }, + { + "epoch": 4.83, + "learning_rate": 3.391762962962963e-05, + "loss": 1.8856, + "step": 27150 + }, + { + "epoch": 4.83, + "learning_rate": 3.391466666666667e-05, + "loss": 1.8852, + "step": 27155 + }, + { + "epoch": 4.83, + "learning_rate": 3.391170370370371e-05, + "loss": 1.8622, + "step": 27160 + }, + { + "epoch": 4.83, + "learning_rate": 3.390874074074074e-05, + "loss": 1.8509, + "step": 27165 + }, + { + "epoch": 4.83, + "learning_rate": 3.390577777777778e-05, + "loss": 1.8655, + "step": 27170 + }, + { + "epoch": 4.83, + "learning_rate": 3.390281481481482e-05, + "loss": 1.8659, + "step": 27175 + }, + { + "epoch": 4.83, + "learning_rate": 3.3899851851851856e-05, + "loss": 1.9569, + "step": 27180 + }, + { + "epoch": 4.83, + "learning_rate": 3.389688888888889e-05, + "loss": 1.8286, + "step": 27185 + }, + { + "epoch": 4.83, + "learning_rate": 3.389392592592593e-05, + "loss": 1.7826, + "step": 27190 + }, + { + "epoch": 4.83, + "learning_rate": 3.3890962962962966e-05, + "loss": 1.8569, + "step": 27195 + }, + { + "epoch": 4.84, + "learning_rate": 3.3888000000000005e-05, + "loss": 1.9217, + "step": 27200 + }, + { + "epoch": 4.84, + "learning_rate": 3.388503703703704e-05, + "loss": 1.8555, + "step": 27205 + }, + { + "epoch": 4.84, + "learning_rate": 3.3882074074074076e-05, + "loss": 1.9139, + "step": 27210 + }, + { + "epoch": 4.84, + "learning_rate": 3.3879111111111115e-05, + "loss": 1.8997, + "step": 27215 + }, + { + "epoch": 4.84, + "learning_rate": 3.387614814814815e-05, + "loss": 1.8336, + "step": 27220 + }, + { + "epoch": 4.84, + "learning_rate": 3.3873185185185185e-05, + "loss": 1.9609, + "step": 27225 + }, + { + "epoch": 4.84, + "learning_rate": 3.3870222222222224e-05, + "loss": 1.8004, + "step": 27230 + }, + { + "epoch": 4.84, + "learning_rate": 3.386725925925926e-05, + "loss": 1.8281, + "step": 27235 + }, + { + "epoch": 4.84, + "learning_rate": 3.38642962962963e-05, + "loss": 1.8662, + "step": 27240 + }, + { + "epoch": 4.84, + "learning_rate": 3.3861333333333334e-05, + "loss": 1.81, + "step": 27245 + }, + { + "epoch": 4.84, + "learning_rate": 3.385837037037037e-05, + "loss": 1.8333, + "step": 27250 + }, + { + "epoch": 4.85, + "learning_rate": 3.3855407407407405e-05, + "loss": 1.7969, + "step": 27255 + }, + { + "epoch": 4.85, + "learning_rate": 3.385244444444445e-05, + "loss": 1.7766, + "step": 27260 + }, + { + "epoch": 4.85, + "learning_rate": 3.384948148148148e-05, + "loss": 1.8643, + "step": 27265 + }, + { + "epoch": 4.85, + "learning_rate": 3.384651851851852e-05, + "loss": 1.7737, + "step": 27270 + }, + { + "epoch": 4.85, + "learning_rate": 3.384355555555555e-05, + "loss": 1.8939, + "step": 27275 + }, + { + "epoch": 4.85, + "learning_rate": 3.38405925925926e-05, + "loss": 1.8586, + "step": 27280 + }, + { + "epoch": 4.85, + "learning_rate": 3.383762962962963e-05, + "loss": 1.8959, + "step": 27285 + }, + { + "epoch": 4.85, + "learning_rate": 3.383466666666667e-05, + "loss": 1.8894, + "step": 27290 + }, + { + "epoch": 4.85, + "learning_rate": 3.38317037037037e-05, + "loss": 1.8193, + "step": 27295 + }, + { + "epoch": 4.85, + "learning_rate": 3.382874074074075e-05, + "loss": 1.8419, + "step": 27300 + }, + { + "epoch": 4.85, + "learning_rate": 3.382577777777778e-05, + "loss": 1.9208, + "step": 27305 + }, + { + "epoch": 4.86, + "learning_rate": 3.382281481481482e-05, + "loss": 1.8542, + "step": 27310 + }, + { + "epoch": 4.86, + "learning_rate": 3.381985185185185e-05, + "loss": 1.8298, + "step": 27315 + }, + { + "epoch": 4.86, + "learning_rate": 3.381688888888889e-05, + "loss": 1.894, + "step": 27320 + }, + { + "epoch": 4.86, + "learning_rate": 3.381392592592593e-05, + "loss": 1.8889, + "step": 27325 + }, + { + "epoch": 4.86, + "learning_rate": 3.381096296296297e-05, + "loss": 1.7362, + "step": 27330 + }, + { + "epoch": 4.86, + "learning_rate": 3.3808e-05, + "loss": 1.9045, + "step": 27335 + }, + { + "epoch": 4.86, + "learning_rate": 3.380503703703704e-05, + "loss": 1.9487, + "step": 27340 + }, + { + "epoch": 4.86, + "learning_rate": 3.3802074074074077e-05, + "loss": 1.7598, + "step": 27345 + }, + { + "epoch": 4.86, + "learning_rate": 3.3799111111111115e-05, + "loss": 1.9082, + "step": 27350 + }, + { + "epoch": 4.86, + "learning_rate": 3.379614814814815e-05, + "loss": 1.8697, + "step": 27355 + }, + { + "epoch": 4.86, + "learning_rate": 3.3793185185185186e-05, + "loss": 1.829, + "step": 27360 + }, + { + "epoch": 4.86, + "learning_rate": 3.3790222222222225e-05, + "loss": 1.7843, + "step": 27365 + }, + { + "epoch": 4.87, + "learning_rate": 3.3787259259259264e-05, + "loss": 1.8943, + "step": 27370 + }, + { + "epoch": 4.87, + "learning_rate": 3.3784296296296296e-05, + "loss": 1.7697, + "step": 27375 + }, + { + "epoch": 4.87, + "learning_rate": 3.3781333333333335e-05, + "loss": 1.9667, + "step": 27380 + }, + { + "epoch": 4.87, + "learning_rate": 3.3778370370370374e-05, + "loss": 1.9397, + "step": 27385 + }, + { + "epoch": 4.87, + "learning_rate": 3.377540740740741e-05, + "loss": 1.8692, + "step": 27390 + }, + { + "epoch": 4.87, + "learning_rate": 3.3772444444444444e-05, + "loss": 1.7121, + "step": 27395 + }, + { + "epoch": 4.87, + "learning_rate": 3.376948148148148e-05, + "loss": 1.7247, + "step": 27400 + }, + { + "epoch": 4.87, + "learning_rate": 3.376651851851852e-05, + "loss": 1.8749, + "step": 27405 + }, + { + "epoch": 4.87, + "learning_rate": 3.376355555555556e-05, + "loss": 1.9242, + "step": 27410 + }, + { + "epoch": 4.87, + "learning_rate": 3.376059259259259e-05, + "loss": 1.8277, + "step": 27415 + }, + { + "epoch": 4.87, + "learning_rate": 3.375762962962963e-05, + "loss": 1.8604, + "step": 27420 + }, + { + "epoch": 4.88, + "learning_rate": 3.375466666666667e-05, + "loss": 1.7147, + "step": 27425 + }, + { + "epoch": 4.88, + "learning_rate": 3.375170370370371e-05, + "loss": 1.7693, + "step": 27430 + }, + { + "epoch": 4.88, + "learning_rate": 3.374874074074074e-05, + "loss": 1.8308, + "step": 27435 + }, + { + "epoch": 4.88, + "learning_rate": 3.374577777777778e-05, + "loss": 1.9276, + "step": 27440 + }, + { + "epoch": 4.88, + "learning_rate": 3.374281481481482e-05, + "loss": 1.8627, + "step": 27445 + }, + { + "epoch": 4.88, + "learning_rate": 3.373985185185186e-05, + "loss": 1.8388, + "step": 27450 + }, + { + "epoch": 4.88, + "learning_rate": 3.373688888888889e-05, + "loss": 1.917, + "step": 27455 + }, + { + "epoch": 4.88, + "learning_rate": 3.373392592592593e-05, + "loss": 1.9011, + "step": 27460 + }, + { + "epoch": 4.88, + "learning_rate": 3.373096296296297e-05, + "loss": 1.8276, + "step": 27465 + }, + { + "epoch": 4.88, + "learning_rate": 3.3728000000000006e-05, + "loss": 1.8683, + "step": 27470 + }, + { + "epoch": 4.88, + "learning_rate": 3.372503703703704e-05, + "loss": 1.986, + "step": 27475 + }, + { + "epoch": 4.89, + "learning_rate": 3.372207407407408e-05, + "loss": 1.851, + "step": 27480 + }, + { + "epoch": 4.89, + "learning_rate": 3.371911111111111e-05, + "loss": 1.8596, + "step": 27485 + }, + { + "epoch": 4.89, + "learning_rate": 3.3716148148148155e-05, + "loss": 1.8237, + "step": 27490 + }, + { + "epoch": 4.89, + "learning_rate": 3.371318518518519e-05, + "loss": 1.7493, + "step": 27495 + }, + { + "epoch": 4.89, + "learning_rate": 3.3710222222222226e-05, + "loss": 1.8453, + "step": 27500 + }, + { + "epoch": 4.89, + "learning_rate": 3.370725925925926e-05, + "loss": 1.9803, + "step": 27505 + }, + { + "epoch": 4.89, + "learning_rate": 3.3704296296296303e-05, + "loss": 1.8217, + "step": 27510 + }, + { + "epoch": 4.89, + "learning_rate": 3.3701333333333335e-05, + "loss": 1.7674, + "step": 27515 + }, + { + "epoch": 4.89, + "learning_rate": 3.3698370370370374e-05, + "loss": 1.9307, + "step": 27520 + }, + { + "epoch": 4.89, + "learning_rate": 3.3695407407407406e-05, + "loss": 1.7862, + "step": 27525 + }, + { + "epoch": 4.89, + "learning_rate": 3.369244444444445e-05, + "loss": 1.8071, + "step": 27530 + }, + { + "epoch": 4.9, + "learning_rate": 3.3689481481481484e-05, + "loss": 1.9153, + "step": 27535 + }, + { + "epoch": 4.9, + "learning_rate": 3.368651851851852e-05, + "loss": 1.9317, + "step": 27540 + }, + { + "epoch": 4.9, + "learning_rate": 3.3683555555555555e-05, + "loss": 1.8791, + "step": 27545 + }, + { + "epoch": 4.9, + "learning_rate": 3.3680592592592594e-05, + "loss": 1.7685, + "step": 27550 + }, + { + "epoch": 4.9, + "learning_rate": 3.367762962962963e-05, + "loss": 1.8431, + "step": 27555 + }, + { + "epoch": 4.9, + "learning_rate": 3.367466666666667e-05, + "loss": 1.7817, + "step": 27560 + }, + { + "epoch": 4.9, + "learning_rate": 3.36717037037037e-05, + "loss": 1.8976, + "step": 27565 + }, + { + "epoch": 4.9, + "learning_rate": 3.366874074074074e-05, + "loss": 1.7995, + "step": 27570 + }, + { + "epoch": 4.9, + "learning_rate": 3.366577777777778e-05, + "loss": 1.8784, + "step": 27575 + }, + { + "epoch": 4.9, + "learning_rate": 3.366281481481481e-05, + "loss": 1.8824, + "step": 27580 + }, + { + "epoch": 4.9, + "learning_rate": 3.365985185185185e-05, + "loss": 1.8377, + "step": 27585 + }, + { + "epoch": 4.9, + "learning_rate": 3.365688888888889e-05, + "loss": 1.9117, + "step": 27590 + }, + { + "epoch": 4.91, + "learning_rate": 3.365392592592593e-05, + "loss": 1.8219, + "step": 27595 + }, + { + "epoch": 4.91, + "learning_rate": 3.365096296296296e-05, + "loss": 1.8808, + "step": 27600 + }, + { + "epoch": 4.91, + "learning_rate": 3.3648e-05, + "loss": 1.975, + "step": 27605 + }, + { + "epoch": 4.91, + "learning_rate": 3.364503703703704e-05, + "loss": 1.9522, + "step": 27610 + }, + { + "epoch": 4.91, + "learning_rate": 3.364207407407408e-05, + "loss": 1.8265, + "step": 27615 + }, + { + "epoch": 4.91, + "learning_rate": 3.363911111111111e-05, + "loss": 1.8612, + "step": 27620 + }, + { + "epoch": 4.91, + "learning_rate": 3.363614814814815e-05, + "loss": 1.8166, + "step": 27625 + }, + { + "epoch": 4.91, + "learning_rate": 3.363318518518518e-05, + "loss": 1.9841, + "step": 27630 + }, + { + "epoch": 4.91, + "learning_rate": 3.3630222222222227e-05, + "loss": 1.865, + "step": 27635 + }, + { + "epoch": 4.91, + "learning_rate": 3.362725925925926e-05, + "loss": 1.8827, + "step": 27640 + }, + { + "epoch": 4.91, + "learning_rate": 3.36242962962963e-05, + "loss": 1.8036, + "step": 27645 + }, + { + "epoch": 4.92, + "learning_rate": 3.362133333333333e-05, + "loss": 1.8639, + "step": 27650 + }, + { + "epoch": 4.92, + "learning_rate": 3.3618370370370375e-05, + "loss": 1.9511, + "step": 27655 + }, + { + "epoch": 4.92, + "learning_rate": 3.361540740740741e-05, + "loss": 1.7709, + "step": 27660 + }, + { + "epoch": 4.92, + "learning_rate": 3.3612444444444446e-05, + "loss": 1.9322, + "step": 27665 + }, + { + "epoch": 4.92, + "learning_rate": 3.360948148148148e-05, + "loss": 1.9328, + "step": 27670 + }, + { + "epoch": 4.92, + "learning_rate": 3.3606518518518524e-05, + "loss": 1.8236, + "step": 27675 + }, + { + "epoch": 4.92, + "learning_rate": 3.3603555555555556e-05, + "loss": 1.8497, + "step": 27680 + }, + { + "epoch": 4.92, + "learning_rate": 3.3600592592592594e-05, + "loss": 1.9246, + "step": 27685 + }, + { + "epoch": 4.92, + "learning_rate": 3.3597629629629626e-05, + "loss": 1.7647, + "step": 27690 + }, + { + "epoch": 4.92, + "learning_rate": 3.359466666666667e-05, + "loss": 1.9265, + "step": 27695 + }, + { + "epoch": 4.92, + "learning_rate": 3.3591703703703704e-05, + "loss": 1.9041, + "step": 27700 + }, + { + "epoch": 4.93, + "learning_rate": 3.358874074074074e-05, + "loss": 1.9264, + "step": 27705 + }, + { + "epoch": 4.93, + "learning_rate": 3.3585777777777775e-05, + "loss": 1.9545, + "step": 27710 + }, + { + "epoch": 4.93, + "learning_rate": 3.3582814814814814e-05, + "loss": 1.8655, + "step": 27715 + }, + { + "epoch": 4.93, + "learning_rate": 3.357985185185185e-05, + "loss": 1.8238, + "step": 27720 + }, + { + "epoch": 4.93, + "learning_rate": 3.357688888888889e-05, + "loss": 1.7974, + "step": 27725 + }, + { + "epoch": 4.93, + "learning_rate": 3.3573925925925924e-05, + "loss": 1.9911, + "step": 27730 + }, + { + "epoch": 4.93, + "learning_rate": 3.357096296296296e-05, + "loss": 1.8882, + "step": 27735 + }, + { + "epoch": 4.93, + "learning_rate": 3.3568e-05, + "loss": 1.8791, + "step": 27740 + }, + { + "epoch": 4.93, + "learning_rate": 3.356503703703704e-05, + "loss": 1.7675, + "step": 27745 + }, + { + "epoch": 4.93, + "learning_rate": 3.356207407407407e-05, + "loss": 1.9004, + "step": 27750 + }, + { + "epoch": 4.93, + "learning_rate": 3.355911111111111e-05, + "loss": 1.6786, + "step": 27755 + }, + { + "epoch": 4.94, + "learning_rate": 3.355614814814815e-05, + "loss": 1.9158, + "step": 27760 + }, + { + "epoch": 4.94, + "learning_rate": 3.355318518518519e-05, + "loss": 1.9172, + "step": 27765 + }, + { + "epoch": 4.94, + "learning_rate": 3.355022222222222e-05, + "loss": 1.903, + "step": 27770 + }, + { + "epoch": 4.94, + "learning_rate": 3.354725925925926e-05, + "loss": 1.8954, + "step": 27775 + }, + { + "epoch": 4.94, + "learning_rate": 3.35442962962963e-05, + "loss": 1.8498, + "step": 27780 + }, + { + "epoch": 4.94, + "learning_rate": 3.354133333333334e-05, + "loss": 1.7382, + "step": 27785 + }, + { + "epoch": 4.94, + "learning_rate": 3.353837037037037e-05, + "loss": 1.8835, + "step": 27790 + }, + { + "epoch": 4.94, + "learning_rate": 3.353540740740741e-05, + "loss": 1.7171, + "step": 27795 + }, + { + "epoch": 4.94, + "learning_rate": 3.353244444444445e-05, + "loss": 1.8001, + "step": 27800 + }, + { + "epoch": 4.94, + "learning_rate": 3.3529481481481486e-05, + "loss": 1.8424, + "step": 27805 + }, + { + "epoch": 4.94, + "learning_rate": 3.352651851851852e-05, + "loss": 1.9593, + "step": 27810 + }, + { + "epoch": 4.94, + "learning_rate": 3.3523555555555556e-05, + "loss": 1.8342, + "step": 27815 + }, + { + "epoch": 4.95, + "learning_rate": 3.3520592592592595e-05, + "loss": 1.827, + "step": 27820 + }, + { + "epoch": 4.95, + "learning_rate": 3.3517629629629634e-05, + "loss": 1.9964, + "step": 27825 + }, + { + "epoch": 4.95, + "learning_rate": 3.3514666666666666e-05, + "loss": 1.803, + "step": 27830 + }, + { + "epoch": 4.95, + "learning_rate": 3.3511703703703705e-05, + "loss": 1.9362, + "step": 27835 + }, + { + "epoch": 4.95, + "learning_rate": 3.3508740740740744e-05, + "loss": 1.8935, + "step": 27840 + }, + { + "epoch": 4.95, + "learning_rate": 3.350577777777778e-05, + "loss": 1.8737, + "step": 27845 + }, + { + "epoch": 4.95, + "learning_rate": 3.3502814814814815e-05, + "loss": 1.8276, + "step": 27850 + }, + { + "epoch": 4.95, + "learning_rate": 3.3499851851851853e-05, + "loss": 1.7986, + "step": 27855 + }, + { + "epoch": 4.95, + "learning_rate": 3.3496888888888885e-05, + "loss": 1.7834, + "step": 27860 + }, + { + "epoch": 4.95, + "learning_rate": 3.349392592592593e-05, + "loss": 1.8473, + "step": 27865 + }, + { + "epoch": 4.95, + "learning_rate": 3.349096296296296e-05, + "loss": 1.756, + "step": 27870 + }, + { + "epoch": 4.96, + "learning_rate": 3.3488e-05, + "loss": 1.9552, + "step": 27875 + }, + { + "epoch": 4.96, + "learning_rate": 3.3485037037037034e-05, + "loss": 1.8291, + "step": 27880 + }, + { + "epoch": 4.96, + "learning_rate": 3.348207407407408e-05, + "loss": 1.8651, + "step": 27885 + }, + { + "epoch": 4.96, + "learning_rate": 3.347911111111111e-05, + "loss": 1.7846, + "step": 27890 + }, + { + "epoch": 4.96, + "learning_rate": 3.347614814814815e-05, + "loss": 1.6562, + "step": 27895 + }, + { + "epoch": 4.96, + "learning_rate": 3.347318518518518e-05, + "loss": 1.9841, + "step": 27900 + }, + { + "epoch": 4.96, + "learning_rate": 3.347022222222223e-05, + "loss": 1.8572, + "step": 27905 + }, + { + "epoch": 4.96, + "learning_rate": 3.346725925925926e-05, + "loss": 1.885, + "step": 27910 + }, + { + "epoch": 4.96, + "learning_rate": 3.34642962962963e-05, + "loss": 1.9045, + "step": 27915 + }, + { + "epoch": 4.96, + "learning_rate": 3.346133333333333e-05, + "loss": 1.8631, + "step": 27920 + }, + { + "epoch": 4.96, + "learning_rate": 3.3458370370370377e-05, + "loss": 1.7185, + "step": 27925 + }, + { + "epoch": 4.97, + "learning_rate": 3.345540740740741e-05, + "loss": 1.9752, + "step": 27930 + }, + { + "epoch": 4.97, + "learning_rate": 3.345244444444445e-05, + "loss": 1.8501, + "step": 27935 + }, + { + "epoch": 4.97, + "learning_rate": 3.344948148148148e-05, + "loss": 1.9054, + "step": 27940 + }, + { + "epoch": 4.97, + "learning_rate": 3.344651851851852e-05, + "loss": 1.8105, + "step": 27945 + }, + { + "epoch": 4.97, + "learning_rate": 3.344355555555556e-05, + "loss": 1.687, + "step": 27950 + }, + { + "epoch": 4.97, + "learning_rate": 3.3440592592592596e-05, + "loss": 1.8352, + "step": 27955 + }, + { + "epoch": 4.97, + "learning_rate": 3.343762962962963e-05, + "loss": 1.8621, + "step": 27960 + }, + { + "epoch": 4.97, + "learning_rate": 3.343466666666667e-05, + "loss": 1.9309, + "step": 27965 + }, + { + "epoch": 4.97, + "learning_rate": 3.3431703703703706e-05, + "loss": 1.9289, + "step": 27970 + }, + { + "epoch": 4.97, + "learning_rate": 3.3428740740740744e-05, + "loss": 1.7891, + "step": 27975 + }, + { + "epoch": 4.97, + "learning_rate": 3.3425777777777777e-05, + "loss": 1.9466, + "step": 27980 + }, + { + "epoch": 4.98, + "learning_rate": 3.3422814814814815e-05, + "loss": 1.9184, + "step": 27985 + }, + { + "epoch": 4.98, + "learning_rate": 3.3419851851851854e-05, + "loss": 1.9668, + "step": 27990 + }, + { + "epoch": 4.98, + "learning_rate": 3.341688888888889e-05, + "loss": 1.9515, + "step": 27995 + }, + { + "epoch": 4.98, + "learning_rate": 3.3413925925925925e-05, + "loss": 1.8021, + "step": 28000 + }, + { + "epoch": 4.98, + "learning_rate": 3.3410962962962964e-05, + "loss": 1.8978, + "step": 28005 + }, + { + "epoch": 4.98, + "learning_rate": 3.3408e-05, + "loss": 1.7877, + "step": 28010 + }, + { + "epoch": 4.98, + "learning_rate": 3.340503703703704e-05, + "loss": 1.8504, + "step": 28015 + }, + { + "epoch": 4.98, + "learning_rate": 3.3402074074074074e-05, + "loss": 1.8294, + "step": 28020 + }, + { + "epoch": 4.98, + "learning_rate": 3.339911111111111e-05, + "loss": 1.928, + "step": 28025 + }, + { + "epoch": 4.98, + "learning_rate": 3.339614814814815e-05, + "loss": 1.8477, + "step": 28030 + }, + { + "epoch": 4.98, + "learning_rate": 3.339318518518519e-05, + "loss": 1.9393, + "step": 28035 + }, + { + "epoch": 4.98, + "learning_rate": 3.339022222222222e-05, + "loss": 1.9513, + "step": 28040 + }, + { + "epoch": 4.99, + "learning_rate": 3.338725925925926e-05, + "loss": 1.8603, + "step": 28045 + }, + { + "epoch": 4.99, + "learning_rate": 3.33842962962963e-05, + "loss": 1.8251, + "step": 28050 + }, + { + "epoch": 4.99, + "learning_rate": 3.338133333333334e-05, + "loss": 1.8296, + "step": 28055 + }, + { + "epoch": 4.99, + "learning_rate": 3.337837037037037e-05, + "loss": 1.7772, + "step": 28060 + }, + { + "epoch": 4.99, + "learning_rate": 3.337540740740741e-05, + "loss": 1.9128, + "step": 28065 + }, + { + "epoch": 4.99, + "learning_rate": 3.337244444444445e-05, + "loss": 1.8094, + "step": 28070 + }, + { + "epoch": 4.99, + "learning_rate": 3.336948148148149e-05, + "loss": 2.0236, + "step": 28075 + }, + { + "epoch": 4.99, + "learning_rate": 3.336651851851852e-05, + "loss": 1.9778, + "step": 28080 + }, + { + "epoch": 4.99, + "learning_rate": 3.336355555555556e-05, + "loss": 1.7681, + "step": 28085 + }, + { + "epoch": 4.99, + "learning_rate": 3.336059259259259e-05, + "loss": 1.8195, + "step": 28090 + }, + { + "epoch": 4.99, + "learning_rate": 3.3357629629629636e-05, + "loss": 1.7828, + "step": 28095 + }, + { + "epoch": 5.0, + "learning_rate": 3.335466666666667e-05, + "loss": 1.902, + "step": 28100 + }, + { + "epoch": 5.0, + "learning_rate": 3.3351703703703706e-05, + "loss": 1.9494, + "step": 28105 + }, + { + "epoch": 5.0, + "learning_rate": 3.334874074074074e-05, + "loss": 1.8356, + "step": 28110 + }, + { + "epoch": 5.0, + "learning_rate": 3.3345777777777784e-05, + "loss": 1.9337, + "step": 28115 + }, + { + "epoch": 5.0, + "learning_rate": 3.3342814814814816e-05, + "loss": 1.829, + "step": 28120 + }, + { + "epoch": 5.0, + "learning_rate": 3.3339851851851855e-05, + "loss": 1.8372, + "step": 28125 + }, + { + "epoch": 5.0, + "learning_rate": 3.333688888888889e-05, + "loss": 1.7224, + "step": 28130 + }, + { + "epoch": 5.0, + "learning_rate": 3.333392592592593e-05, + "loss": 1.6538, + "step": 28135 + }, + { + "epoch": 5.0, + "learning_rate": 3.3330962962962965e-05, + "loss": 1.5821, + "step": 28140 + }, + { + "epoch": 5.0, + "learning_rate": 3.3328000000000003e-05, + "loss": 1.6281, + "step": 28145 + }, + { + "epoch": 5.0, + "learning_rate": 3.3325037037037036e-05, + "loss": 1.7006, + "step": 28150 + }, + { + "epoch": 5.01, + "learning_rate": 3.332207407407408e-05, + "loss": 1.782, + "step": 28155 + }, + { + "epoch": 5.01, + "learning_rate": 3.331911111111111e-05, + "loss": 1.7907, + "step": 28160 + }, + { + "epoch": 5.01, + "learning_rate": 3.331614814814815e-05, + "loss": 1.7706, + "step": 28165 + }, + { + "epoch": 5.01, + "learning_rate": 3.3313185185185184e-05, + "loss": 1.7231, + "step": 28170 + }, + { + "epoch": 5.01, + "learning_rate": 3.331022222222222e-05, + "loss": 1.8103, + "step": 28175 + }, + { + "epoch": 5.01, + "learning_rate": 3.330725925925926e-05, + "loss": 1.7197, + "step": 28180 + }, + { + "epoch": 5.01, + "learning_rate": 3.33042962962963e-05, + "loss": 1.6846, + "step": 28185 + }, + { + "epoch": 5.01, + "learning_rate": 3.330133333333333e-05, + "loss": 1.8474, + "step": 28190 + }, + { + "epoch": 5.01, + "learning_rate": 3.329837037037037e-05, + "loss": 1.7646, + "step": 28195 + }, + { + "epoch": 5.01, + "learning_rate": 3.329540740740741e-05, + "loss": 1.6594, + "step": 28200 + }, + { + "epoch": 5.01, + "learning_rate": 3.329244444444445e-05, + "loss": 1.7398, + "step": 28205 + }, + { + "epoch": 5.02, + "learning_rate": 3.328948148148148e-05, + "loss": 1.8003, + "step": 28210 + }, + { + "epoch": 5.02, + "learning_rate": 3.328651851851852e-05, + "loss": 1.7009, + "step": 28215 + }, + { + "epoch": 5.02, + "learning_rate": 3.328355555555556e-05, + "loss": 1.5595, + "step": 28220 + }, + { + "epoch": 5.02, + "learning_rate": 3.32805925925926e-05, + "loss": 1.6855, + "step": 28225 + }, + { + "epoch": 5.02, + "learning_rate": 3.327762962962963e-05, + "loss": 1.6388, + "step": 28230 + }, + { + "epoch": 5.02, + "learning_rate": 3.327466666666667e-05, + "loss": 1.7706, + "step": 28235 + }, + { + "epoch": 5.02, + "learning_rate": 3.327170370370371e-05, + "loss": 1.8324, + "step": 28240 + }, + { + "epoch": 5.02, + "learning_rate": 3.3268740740740746e-05, + "loss": 1.898, + "step": 28245 + }, + { + "epoch": 5.02, + "learning_rate": 3.326577777777778e-05, + "loss": 1.8067, + "step": 28250 + }, + { + "epoch": 5.02, + "learning_rate": 3.326281481481482e-05, + "loss": 1.6449, + "step": 28255 + }, + { + "epoch": 5.02, + "learning_rate": 3.3259851851851856e-05, + "loss": 1.8342, + "step": 28260 + }, + { + "epoch": 5.02, + "learning_rate": 3.3256888888888895e-05, + "loss": 1.7177, + "step": 28265 + }, + { + "epoch": 5.03, + "learning_rate": 3.3253925925925927e-05, + "loss": 1.6746, + "step": 28270 + }, + { + "epoch": 5.03, + "learning_rate": 3.3250962962962965e-05, + "loss": 1.7529, + "step": 28275 + }, + { + "epoch": 5.03, + "learning_rate": 3.3248000000000004e-05, + "loss": 1.6488, + "step": 28280 + }, + { + "epoch": 5.03, + "learning_rate": 3.324503703703704e-05, + "loss": 1.6773, + "step": 28285 + }, + { + "epoch": 5.03, + "learning_rate": 3.3242074074074075e-05, + "loss": 1.742, + "step": 28290 + }, + { + "epoch": 5.03, + "learning_rate": 3.3239111111111114e-05, + "loss": 1.7488, + "step": 28295 + }, + { + "epoch": 5.03, + "learning_rate": 3.323614814814815e-05, + "loss": 1.7793, + "step": 28300 + }, + { + "epoch": 5.03, + "learning_rate": 3.323318518518519e-05, + "loss": 1.7276, + "step": 28305 + }, + { + "epoch": 5.03, + "learning_rate": 3.3230222222222224e-05, + "loss": 1.7337, + "step": 28310 + }, + { + "epoch": 5.03, + "learning_rate": 3.322725925925926e-05, + "loss": 1.6659, + "step": 28315 + }, + { + "epoch": 5.03, + "learning_rate": 3.3224296296296294e-05, + "loss": 1.8224, + "step": 28320 + }, + { + "epoch": 5.04, + "learning_rate": 3.322133333333334e-05, + "loss": 1.6992, + "step": 28325 + }, + { + "epoch": 5.04, + "learning_rate": 3.321837037037037e-05, + "loss": 1.7376, + "step": 28330 + }, + { + "epoch": 5.04, + "learning_rate": 3.321540740740741e-05, + "loss": 1.6674, + "step": 28335 + }, + { + "epoch": 5.04, + "learning_rate": 3.321244444444444e-05, + "loss": 1.7425, + "step": 28340 + }, + { + "epoch": 5.04, + "learning_rate": 3.320948148148148e-05, + "loss": 1.7201, + "step": 28345 + }, + { + "epoch": 5.04, + "learning_rate": 3.320651851851852e-05, + "loss": 1.7534, + "step": 28350 + }, + { + "epoch": 5.04, + "learning_rate": 3.320355555555555e-05, + "loss": 1.7864, + "step": 28355 + }, + { + "epoch": 5.04, + "learning_rate": 3.320059259259259e-05, + "loss": 1.7196, + "step": 28360 + }, + { + "epoch": 5.04, + "learning_rate": 3.319762962962963e-05, + "loss": 1.7655, + "step": 28365 + }, + { + "epoch": 5.04, + "learning_rate": 3.319466666666667e-05, + "loss": 1.6174, + "step": 28370 + }, + { + "epoch": 5.04, + "learning_rate": 3.31917037037037e-05, + "loss": 1.7523, + "step": 28375 + }, + { + "epoch": 5.05, + "learning_rate": 3.318874074074074e-05, + "loss": 1.7561, + "step": 28380 + }, + { + "epoch": 5.05, + "learning_rate": 3.318577777777778e-05, + "loss": 1.7134, + "step": 28385 + }, + { + "epoch": 5.05, + "learning_rate": 3.318281481481482e-05, + "loss": 1.7702, + "step": 28390 + }, + { + "epoch": 5.05, + "learning_rate": 3.317985185185185e-05, + "loss": 1.7156, + "step": 28395 + }, + { + "epoch": 5.05, + "learning_rate": 3.317688888888889e-05, + "loss": 1.7965, + "step": 28400 + }, + { + "epoch": 5.05, + "learning_rate": 3.317392592592593e-05, + "loss": 1.7134, + "step": 28405 + }, + { + "epoch": 5.05, + "learning_rate": 3.3170962962962966e-05, + "loss": 1.8202, + "step": 28410 + }, + { + "epoch": 5.05, + "learning_rate": 3.3168e-05, + "loss": 1.6275, + "step": 28415 + }, + { + "epoch": 5.05, + "learning_rate": 3.316503703703704e-05, + "loss": 1.7416, + "step": 28420 + }, + { + "epoch": 5.05, + "learning_rate": 3.3162074074074076e-05, + "loss": 1.744, + "step": 28425 + }, + { + "epoch": 5.05, + "learning_rate": 3.3159111111111115e-05, + "loss": 1.8075, + "step": 28430 + }, + { + "epoch": 5.06, + "learning_rate": 3.315614814814815e-05, + "loss": 1.6238, + "step": 28435 + }, + { + "epoch": 5.06, + "learning_rate": 3.3153185185185186e-05, + "loss": 1.7057, + "step": 28440 + }, + { + "epoch": 5.06, + "learning_rate": 3.3150222222222224e-05, + "loss": 1.7718, + "step": 28445 + }, + { + "epoch": 5.06, + "learning_rate": 3.314725925925926e-05, + "loss": 1.7203, + "step": 28450 + }, + { + "epoch": 5.06, + "learning_rate": 3.3144296296296295e-05, + "loss": 1.7554, + "step": 28455 + }, + { + "epoch": 5.06, + "learning_rate": 3.3141333333333334e-05, + "loss": 1.8198, + "step": 28460 + }, + { + "epoch": 5.06, + "learning_rate": 3.313837037037037e-05, + "loss": 1.8542, + "step": 28465 + }, + { + "epoch": 5.06, + "learning_rate": 3.313540740740741e-05, + "loss": 1.8241, + "step": 28470 + }, + { + "epoch": 5.06, + "learning_rate": 3.3132444444444444e-05, + "loss": 1.8091, + "step": 28475 + }, + { + "epoch": 5.06, + "learning_rate": 3.312948148148148e-05, + "loss": 1.7722, + "step": 28480 + }, + { + "epoch": 5.06, + "learning_rate": 3.3126518518518515e-05, + "loss": 1.776, + "step": 28485 + }, + { + "epoch": 5.06, + "learning_rate": 3.312355555555556e-05, + "loss": 1.7729, + "step": 28490 + }, + { + "epoch": 5.07, + "learning_rate": 3.312059259259259e-05, + "loss": 1.7194, + "step": 28495 + }, + { + "epoch": 5.07, + "learning_rate": 3.311762962962963e-05, + "loss": 1.8304, + "step": 28500 + }, + { + "epoch": 5.07, + "learning_rate": 3.311466666666666e-05, + "loss": 1.7313, + "step": 28505 + }, + { + "epoch": 5.07, + "learning_rate": 3.311170370370371e-05, + "loss": 1.7868, + "step": 28510 + }, + { + "epoch": 5.07, + "learning_rate": 3.310874074074074e-05, + "loss": 1.5741, + "step": 28515 + }, + { + "epoch": 5.07, + "learning_rate": 3.310577777777778e-05, + "loss": 1.6619, + "step": 28520 + }, + { + "epoch": 5.07, + "learning_rate": 3.310281481481481e-05, + "loss": 1.7637, + "step": 28525 + }, + { + "epoch": 5.07, + "learning_rate": 3.309985185185186e-05, + "loss": 1.8242, + "step": 28530 + }, + { + "epoch": 5.07, + "learning_rate": 3.309688888888889e-05, + "loss": 1.7894, + "step": 28535 + }, + { + "epoch": 5.07, + "learning_rate": 3.309392592592593e-05, + "loss": 1.7902, + "step": 28540 + }, + { + "epoch": 5.07, + "learning_rate": 3.309096296296296e-05, + "loss": 1.7513, + "step": 28545 + }, + { + "epoch": 5.08, + "learning_rate": 3.3088e-05, + "loss": 1.7672, + "step": 28550 + }, + { + "epoch": 5.08, + "learning_rate": 3.308503703703704e-05, + "loss": 1.7964, + "step": 28555 + }, + { + "epoch": 5.08, + "learning_rate": 3.308207407407408e-05, + "loss": 1.6526, + "step": 28560 + }, + { + "epoch": 5.08, + "learning_rate": 3.307911111111111e-05, + "loss": 1.7298, + "step": 28565 + }, + { + "epoch": 5.08, + "learning_rate": 3.307614814814815e-05, + "loss": 1.7679, + "step": 28570 + }, + { + "epoch": 5.08, + "learning_rate": 3.3073185185185186e-05, + "loss": 1.7826, + "step": 28575 + }, + { + "epoch": 5.08, + "learning_rate": 3.3070222222222225e-05, + "loss": 1.7892, + "step": 28580 + }, + { + "epoch": 5.08, + "learning_rate": 3.306725925925926e-05, + "loss": 1.6299, + "step": 28585 + }, + { + "epoch": 5.08, + "learning_rate": 3.3064296296296296e-05, + "loss": 1.7188, + "step": 28590 + }, + { + "epoch": 5.08, + "learning_rate": 3.3061333333333335e-05, + "loss": 1.6656, + "step": 28595 + }, + { + "epoch": 5.08, + "learning_rate": 3.3058370370370374e-05, + "loss": 1.8235, + "step": 28600 + }, + { + "epoch": 5.09, + "learning_rate": 3.3055407407407406e-05, + "loss": 1.6673, + "step": 28605 + }, + { + "epoch": 5.09, + "learning_rate": 3.3052444444444445e-05, + "loss": 1.8927, + "step": 28610 + }, + { + "epoch": 5.09, + "learning_rate": 3.304948148148148e-05, + "loss": 1.7311, + "step": 28615 + }, + { + "epoch": 5.09, + "learning_rate": 3.304651851851852e-05, + "loss": 1.7159, + "step": 28620 + }, + { + "epoch": 5.09, + "learning_rate": 3.3043555555555554e-05, + "loss": 1.8549, + "step": 28625 + }, + { + "epoch": 5.09, + "learning_rate": 3.304059259259259e-05, + "loss": 1.6865, + "step": 28630 + }, + { + "epoch": 5.09, + "learning_rate": 3.303762962962963e-05, + "loss": 1.7174, + "step": 28635 + }, + { + "epoch": 5.09, + "learning_rate": 3.303466666666667e-05, + "loss": 1.6539, + "step": 28640 + }, + { + "epoch": 5.09, + "learning_rate": 3.30317037037037e-05, + "loss": 1.8361, + "step": 28645 + }, + { + "epoch": 5.09, + "learning_rate": 3.302874074074074e-05, + "loss": 1.6395, + "step": 28650 + }, + { + "epoch": 5.09, + "learning_rate": 3.302577777777778e-05, + "loss": 1.6991, + "step": 28655 + }, + { + "epoch": 5.1, + "learning_rate": 3.302281481481482e-05, + "loss": 1.7888, + "step": 28660 + }, + { + "epoch": 5.1, + "learning_rate": 3.301985185185185e-05, + "loss": 1.6486, + "step": 28665 + }, + { + "epoch": 5.1, + "learning_rate": 3.301688888888889e-05, + "loss": 1.827, + "step": 28670 + }, + { + "epoch": 5.1, + "learning_rate": 3.301392592592593e-05, + "loss": 1.8384, + "step": 28675 + }, + { + "epoch": 5.1, + "learning_rate": 3.301096296296297e-05, + "loss": 1.8186, + "step": 28680 + }, + { + "epoch": 5.1, + "learning_rate": 3.3008e-05, + "loss": 1.7747, + "step": 28685 + }, + { + "epoch": 5.1, + "learning_rate": 3.300503703703704e-05, + "loss": 1.7714, + "step": 28690 + }, + { + "epoch": 5.1, + "learning_rate": 3.300207407407408e-05, + "loss": 1.6875, + "step": 28695 + }, + { + "epoch": 5.1, + "learning_rate": 3.2999111111111116e-05, + "loss": 1.72, + "step": 28700 + }, + { + "epoch": 5.1, + "learning_rate": 3.299614814814815e-05, + "loss": 1.7074, + "step": 28705 + }, + { + "epoch": 5.1, + "learning_rate": 3.299318518518519e-05, + "loss": 1.6847, + "step": 28710 + }, + { + "epoch": 5.1, + "learning_rate": 3.299022222222222e-05, + "loss": 1.7436, + "step": 28715 + }, + { + "epoch": 5.11, + "learning_rate": 3.2987259259259265e-05, + "loss": 2.0024, + "step": 28720 + }, + { + "epoch": 5.11, + "learning_rate": 3.29842962962963e-05, + "loss": 1.6868, + "step": 28725 + }, + { + "epoch": 5.11, + "learning_rate": 3.2981333333333336e-05, + "loss": 1.648, + "step": 28730 + }, + { + "epoch": 5.11, + "learning_rate": 3.297837037037037e-05, + "loss": 1.7467, + "step": 28735 + }, + { + "epoch": 5.11, + "learning_rate": 3.297540740740741e-05, + "loss": 1.7308, + "step": 28740 + }, + { + "epoch": 5.11, + "learning_rate": 3.2972444444444445e-05, + "loss": 1.6924, + "step": 28745 + }, + { + "epoch": 5.11, + "learning_rate": 3.2969481481481484e-05, + "loss": 1.773, + "step": 28750 + }, + { + "epoch": 5.11, + "learning_rate": 3.2966518518518516e-05, + "loss": 1.739, + "step": 28755 + }, + { + "epoch": 5.11, + "learning_rate": 3.296355555555556e-05, + "loss": 1.8963, + "step": 28760 + }, + { + "epoch": 5.11, + "learning_rate": 3.2960592592592594e-05, + "loss": 1.7628, + "step": 28765 + }, + { + "epoch": 5.11, + "learning_rate": 3.295762962962963e-05, + "loss": 1.7016, + "step": 28770 + }, + { + "epoch": 5.12, + "learning_rate": 3.2954666666666665e-05, + "loss": 1.764, + "step": 28775 + }, + { + "epoch": 5.12, + "learning_rate": 3.2951703703703703e-05, + "loss": 1.6707, + "step": 28780 + }, + { + "epoch": 5.12, + "learning_rate": 3.294874074074074e-05, + "loss": 1.8278, + "step": 28785 + }, + { + "epoch": 5.12, + "learning_rate": 3.294577777777778e-05, + "loss": 1.6476, + "step": 28790 + }, + { + "epoch": 5.12, + "learning_rate": 3.294281481481481e-05, + "loss": 1.7054, + "step": 28795 + }, + { + "epoch": 5.12, + "learning_rate": 3.293985185185185e-05, + "loss": 1.8137, + "step": 28800 + }, + { + "epoch": 5.12, + "learning_rate": 3.293688888888889e-05, + "loss": 1.6914, + "step": 28805 + }, + { + "epoch": 5.12, + "learning_rate": 3.293392592592593e-05, + "loss": 1.9189, + "step": 28810 + }, + { + "epoch": 5.12, + "learning_rate": 3.293096296296296e-05, + "loss": 1.7599, + "step": 28815 + }, + { + "epoch": 5.12, + "learning_rate": 3.2928e-05, + "loss": 1.7007, + "step": 28820 + }, + { + "epoch": 5.12, + "learning_rate": 3.292503703703704e-05, + "loss": 1.8185, + "step": 28825 + }, + { + "epoch": 5.13, + "learning_rate": 3.292207407407408e-05, + "loss": 1.8087, + "step": 28830 + }, + { + "epoch": 5.13, + "learning_rate": 3.291911111111111e-05, + "loss": 2.0938, + "step": 28835 + }, + { + "epoch": 5.13, + "learning_rate": 3.291614814814815e-05, + "loss": 1.6221, + "step": 28840 + }, + { + "epoch": 5.13, + "learning_rate": 3.291318518518519e-05, + "loss": 1.7422, + "step": 28845 + }, + { + "epoch": 5.13, + "learning_rate": 3.291022222222223e-05, + "loss": 1.8171, + "step": 28850 + }, + { + "epoch": 5.13, + "learning_rate": 3.290725925925926e-05, + "loss": 1.8412, + "step": 28855 + }, + { + "epoch": 5.13, + "learning_rate": 3.29042962962963e-05, + "loss": 1.762, + "step": 28860 + }, + { + "epoch": 5.13, + "learning_rate": 3.2901333333333336e-05, + "loss": 1.6639, + "step": 28865 + }, + { + "epoch": 5.13, + "learning_rate": 3.2898370370370375e-05, + "loss": 1.7134, + "step": 28870 + }, + { + "epoch": 5.13, + "learning_rate": 3.289540740740741e-05, + "loss": 1.8787, + "step": 28875 + }, + { + "epoch": 5.13, + "learning_rate": 3.2892444444444446e-05, + "loss": 1.7999, + "step": 28880 + }, + { + "epoch": 5.14, + "learning_rate": 3.2889481481481485e-05, + "loss": 1.8612, + "step": 28885 + }, + { + "epoch": 5.14, + "learning_rate": 3.2886518518518524e-05, + "loss": 1.7203, + "step": 28890 + }, + { + "epoch": 5.14, + "learning_rate": 3.2883555555555556e-05, + "loss": 1.6524, + "step": 28895 + }, + { + "epoch": 5.14, + "learning_rate": 3.2880592592592595e-05, + "loss": 1.7538, + "step": 28900 + }, + { + "epoch": 5.14, + "learning_rate": 3.287762962962963e-05, + "loss": 1.7661, + "step": 28905 + }, + { + "epoch": 5.14, + "learning_rate": 3.287466666666667e-05, + "loss": 1.7724, + "step": 28910 + }, + { + "epoch": 5.14, + "learning_rate": 3.2871703703703704e-05, + "loss": 1.7852, + "step": 28915 + }, + { + "epoch": 5.14, + "learning_rate": 3.286874074074074e-05, + "loss": 1.817, + "step": 28920 + }, + { + "epoch": 5.14, + "learning_rate": 3.286577777777778e-05, + "loss": 1.7295, + "step": 28925 + }, + { + "epoch": 5.14, + "learning_rate": 3.286281481481482e-05, + "loss": 1.7138, + "step": 28930 + }, + { + "epoch": 5.14, + "learning_rate": 3.285985185185185e-05, + "loss": 1.8018, + "step": 28935 + }, + { + "epoch": 5.14, + "learning_rate": 3.285688888888889e-05, + "loss": 1.8042, + "step": 28940 + }, + { + "epoch": 5.15, + "learning_rate": 3.2853925925925924e-05, + "loss": 1.709, + "step": 28945 + }, + { + "epoch": 5.15, + "learning_rate": 3.285096296296297e-05, + "loss": 1.7333, + "step": 28950 + }, + { + "epoch": 5.15, + "learning_rate": 3.2848e-05, + "loss": 1.8335, + "step": 28955 + }, + { + "epoch": 5.15, + "learning_rate": 3.284503703703704e-05, + "loss": 1.7092, + "step": 28960 + }, + { + "epoch": 5.15, + "learning_rate": 3.284207407407407e-05, + "loss": 1.8359, + "step": 28965 + }, + { + "epoch": 5.15, + "learning_rate": 3.283911111111112e-05, + "loss": 1.7295, + "step": 28970 + }, + { + "epoch": 5.15, + "learning_rate": 3.283614814814815e-05, + "loss": 1.7539, + "step": 28975 + }, + { + "epoch": 5.15, + "learning_rate": 3.283318518518519e-05, + "loss": 1.6472, + "step": 28980 + }, + { + "epoch": 5.15, + "learning_rate": 3.283022222222222e-05, + "loss": 1.8571, + "step": 28985 + }, + { + "epoch": 5.15, + "learning_rate": 3.2827259259259266e-05, + "loss": 1.8143, + "step": 28990 + }, + { + "epoch": 5.15, + "learning_rate": 3.28242962962963e-05, + "loss": 1.6736, + "step": 28995 + }, + { + "epoch": 5.16, + "learning_rate": 3.282133333333334e-05, + "loss": 1.6895, + "step": 29000 + }, + { + "epoch": 5.16, + "learning_rate": 3.281837037037037e-05, + "loss": 1.7298, + "step": 29005 + }, + { + "epoch": 5.16, + "learning_rate": 3.281540740740741e-05, + "loss": 1.7215, + "step": 29010 + }, + { + "epoch": 5.16, + "learning_rate": 3.281244444444445e-05, + "loss": 1.7344, + "step": 29015 + }, + { + "epoch": 5.16, + "learning_rate": 3.2809481481481486e-05, + "loss": 1.7413, + "step": 29020 + }, + { + "epoch": 5.16, + "learning_rate": 3.280651851851852e-05, + "loss": 1.7335, + "step": 29025 + }, + { + "epoch": 5.16, + "learning_rate": 3.2803555555555557e-05, + "loss": 1.6782, + "step": 29030 + }, + { + "epoch": 5.16, + "learning_rate": 3.2800592592592595e-05, + "loss": 1.8319, + "step": 29035 + }, + { + "epoch": 5.16, + "learning_rate": 3.2797629629629634e-05, + "loss": 1.8867, + "step": 29040 + }, + { + "epoch": 5.16, + "learning_rate": 3.2794666666666666e-05, + "loss": 1.8898, + "step": 29045 + }, + { + "epoch": 5.16, + "learning_rate": 3.2791703703703705e-05, + "loss": 1.6628, + "step": 29050 + }, + { + "epoch": 5.17, + "learning_rate": 3.2788740740740744e-05, + "loss": 1.6802, + "step": 29055 + }, + { + "epoch": 5.17, + "learning_rate": 3.278577777777778e-05, + "loss": 1.8035, + "step": 29060 + }, + { + "epoch": 5.17, + "learning_rate": 3.2782814814814815e-05, + "loss": 1.7856, + "step": 29065 + }, + { + "epoch": 5.17, + "learning_rate": 3.2779851851851854e-05, + "loss": 1.8685, + "step": 29070 + }, + { + "epoch": 5.17, + "learning_rate": 3.277688888888889e-05, + "loss": 1.6967, + "step": 29075 + }, + { + "epoch": 5.17, + "learning_rate": 3.277392592592593e-05, + "loss": 1.7145, + "step": 29080 + }, + { + "epoch": 5.17, + "learning_rate": 3.277096296296296e-05, + "loss": 1.7407, + "step": 29085 + }, + { + "epoch": 5.17, + "learning_rate": 3.2768e-05, + "loss": 1.8242, + "step": 29090 + }, + { + "epoch": 5.17, + "learning_rate": 3.276503703703704e-05, + "loss": 1.8387, + "step": 29095 + }, + { + "epoch": 5.17, + "learning_rate": 3.276207407407408e-05, + "loss": 1.7806, + "step": 29100 + }, + { + "epoch": 5.17, + "learning_rate": 3.275911111111111e-05, + "loss": 1.8899, + "step": 29105 + }, + { + "epoch": 5.18, + "learning_rate": 3.275614814814815e-05, + "loss": 1.7004, + "step": 29110 + }, + { + "epoch": 5.18, + "learning_rate": 3.275318518518519e-05, + "loss": 1.6837, + "step": 29115 + }, + { + "epoch": 5.18, + "learning_rate": 3.275022222222222e-05, + "loss": 1.7514, + "step": 29120 + }, + { + "epoch": 5.18, + "learning_rate": 3.274725925925926e-05, + "loss": 1.7557, + "step": 29125 + }, + { + "epoch": 5.18, + "learning_rate": 3.274429629629629e-05, + "loss": 1.7695, + "step": 29130 + }, + { + "epoch": 5.18, + "learning_rate": 3.274133333333334e-05, + "loss": 1.6763, + "step": 29135 + }, + { + "epoch": 5.18, + "learning_rate": 3.273837037037037e-05, + "loss": 1.8425, + "step": 29140 + }, + { + "epoch": 5.18, + "learning_rate": 3.273540740740741e-05, + "loss": 1.802, + "step": 29145 + }, + { + "epoch": 5.18, + "learning_rate": 3.273244444444444e-05, + "loss": 1.7162, + "step": 29150 + }, + { + "epoch": 5.18, + "learning_rate": 3.2729481481481486e-05, + "loss": 1.825, + "step": 29155 + }, + { + "epoch": 5.18, + "learning_rate": 3.272651851851852e-05, + "loss": 1.6796, + "step": 29160 + }, + { + "epoch": 5.18, + "learning_rate": 3.272355555555556e-05, + "loss": 1.755, + "step": 29165 + }, + { + "epoch": 5.19, + "learning_rate": 3.272059259259259e-05, + "loss": 1.7298, + "step": 29170 + }, + { + "epoch": 5.19, + "learning_rate": 3.271762962962963e-05, + "loss": 1.9278, + "step": 29175 + }, + { + "epoch": 5.19, + "learning_rate": 3.271466666666667e-05, + "loss": 1.821, + "step": 29180 + }, + { + "epoch": 5.19, + "learning_rate": 3.2711703703703706e-05, + "loss": 1.846, + "step": 29185 + }, + { + "epoch": 5.19, + "learning_rate": 3.270874074074074e-05, + "loss": 1.6878, + "step": 29190 + }, + { + "epoch": 5.19, + "learning_rate": 3.270577777777778e-05, + "loss": 1.7034, + "step": 29195 + }, + { + "epoch": 5.19, + "learning_rate": 3.2702814814814815e-05, + "loss": 1.6669, + "step": 29200 + }, + { + "epoch": 5.19, + "learning_rate": 3.2699851851851854e-05, + "loss": 1.8904, + "step": 29205 + }, + { + "epoch": 5.19, + "learning_rate": 3.2696888888888886e-05, + "loss": 1.679, + "step": 29210 + }, + { + "epoch": 5.19, + "learning_rate": 3.2693925925925925e-05, + "loss": 1.7235, + "step": 29215 + }, + { + "epoch": 5.19, + "learning_rate": 3.2690962962962964e-05, + "loss": 1.7319, + "step": 29220 + }, + { + "epoch": 5.2, + "learning_rate": 3.2688e-05, + "loss": 1.579, + "step": 29225 + }, + { + "epoch": 5.2, + "learning_rate": 3.2685037037037035e-05, + "loss": 1.8418, + "step": 29230 + }, + { + "epoch": 5.2, + "learning_rate": 3.2682074074074074e-05, + "loss": 1.657, + "step": 29235 + }, + { + "epoch": 5.2, + "learning_rate": 3.267911111111111e-05, + "loss": 1.8681, + "step": 29240 + }, + { + "epoch": 5.2, + "learning_rate": 3.267614814814815e-05, + "loss": 1.7843, + "step": 29245 + }, + { + "epoch": 5.2, + "learning_rate": 3.267318518518518e-05, + "loss": 1.744, + "step": 29250 + }, + { + "epoch": 5.2, + "learning_rate": 3.267022222222222e-05, + "loss": 1.8594, + "step": 29255 + }, + { + "epoch": 5.2, + "learning_rate": 3.266725925925926e-05, + "loss": 1.8169, + "step": 29260 + }, + { + "epoch": 5.2, + "learning_rate": 3.26642962962963e-05, + "loss": 1.7859, + "step": 29265 + }, + { + "epoch": 5.2, + "learning_rate": 3.266133333333333e-05, + "loss": 1.6908, + "step": 29270 + }, + { + "epoch": 5.2, + "learning_rate": 3.265837037037037e-05, + "loss": 1.815, + "step": 29275 + }, + { + "epoch": 5.21, + "learning_rate": 3.265540740740741e-05, + "loss": 1.7012, + "step": 29280 + }, + { + "epoch": 5.21, + "learning_rate": 3.265244444444445e-05, + "loss": 1.7931, + "step": 29285 + }, + { + "epoch": 5.21, + "learning_rate": 3.264948148148148e-05, + "loss": 1.761, + "step": 29290 + }, + { + "epoch": 5.21, + "learning_rate": 3.264651851851852e-05, + "loss": 1.7149, + "step": 29295 + }, + { + "epoch": 5.21, + "learning_rate": 3.264355555555556e-05, + "loss": 1.8547, + "step": 29300 + }, + { + "epoch": 5.21, + "learning_rate": 3.26405925925926e-05, + "loss": 1.8682, + "step": 29305 + }, + { + "epoch": 5.21, + "learning_rate": 3.263762962962963e-05, + "loss": 1.7597, + "step": 29310 + }, + { + "epoch": 5.21, + "learning_rate": 3.263466666666667e-05, + "loss": 1.7386, + "step": 29315 + }, + { + "epoch": 5.21, + "learning_rate": 3.2631703703703707e-05, + "loss": 1.7125, + "step": 29320 + }, + { + "epoch": 5.21, + "learning_rate": 3.2628740740740745e-05, + "loss": 1.7047, + "step": 29325 + }, + { + "epoch": 5.21, + "learning_rate": 3.262577777777778e-05, + "loss": 1.8127, + "step": 29330 + }, + { + "epoch": 5.22, + "learning_rate": 3.2622814814814816e-05, + "loss": 1.6905, + "step": 29335 + }, + { + "epoch": 5.22, + "learning_rate": 3.261985185185185e-05, + "loss": 1.8548, + "step": 29340 + }, + { + "epoch": 5.22, + "learning_rate": 3.2616888888888894e-05, + "loss": 1.7607, + "step": 29345 + }, + { + "epoch": 5.22, + "learning_rate": 3.2613925925925926e-05, + "loss": 1.797, + "step": 29350 + }, + { + "epoch": 5.22, + "learning_rate": 3.2610962962962965e-05, + "loss": 1.661, + "step": 29355 + }, + { + "epoch": 5.22, + "learning_rate": 3.2608e-05, + "loss": 1.7261, + "step": 29360 + }, + { + "epoch": 5.22, + "learning_rate": 3.260503703703704e-05, + "loss": 1.8225, + "step": 29365 + }, + { + "epoch": 5.22, + "learning_rate": 3.2602074074074074e-05, + "loss": 1.6582, + "step": 29370 + }, + { + "epoch": 5.22, + "learning_rate": 3.259911111111111e-05, + "loss": 1.8001, + "step": 29375 + }, + { + "epoch": 5.22, + "learning_rate": 3.2596148148148145e-05, + "loss": 1.7668, + "step": 29380 + }, + { + "epoch": 5.22, + "learning_rate": 3.259318518518519e-05, + "loss": 1.7052, + "step": 29385 + }, + { + "epoch": 5.22, + "learning_rate": 3.259022222222222e-05, + "loss": 1.8592, + "step": 29390 + }, + { + "epoch": 5.23, + "learning_rate": 3.258725925925926e-05, + "loss": 1.7894, + "step": 29395 + }, + { + "epoch": 5.23, + "learning_rate": 3.2584296296296294e-05, + "loss": 1.7153, + "step": 29400 + }, + { + "epoch": 5.23, + "learning_rate": 3.258133333333333e-05, + "loss": 1.7068, + "step": 29405 + }, + { + "epoch": 5.23, + "learning_rate": 3.257837037037037e-05, + "loss": 1.7645, + "step": 29410 + }, + { + "epoch": 5.23, + "learning_rate": 3.257540740740741e-05, + "loss": 1.7908, + "step": 29415 + }, + { + "epoch": 5.23, + "learning_rate": 3.257244444444444e-05, + "loss": 1.8108, + "step": 29420 + }, + { + "epoch": 5.23, + "learning_rate": 3.256948148148148e-05, + "loss": 1.7633, + "step": 29425 + }, + { + "epoch": 5.23, + "learning_rate": 3.256651851851852e-05, + "loss": 1.8961, + "step": 29430 + }, + { + "epoch": 5.23, + "learning_rate": 3.256355555555556e-05, + "loss": 1.8886, + "step": 29435 + }, + { + "epoch": 5.23, + "learning_rate": 3.256059259259259e-05, + "loss": 1.7095, + "step": 29440 + }, + { + "epoch": 5.23, + "learning_rate": 3.255762962962963e-05, + "loss": 1.5951, + "step": 29445 + }, + { + "epoch": 5.24, + "learning_rate": 3.255466666666667e-05, + "loss": 1.7643, + "step": 29450 + }, + { + "epoch": 5.24, + "learning_rate": 3.255170370370371e-05, + "loss": 1.629, + "step": 29455 + }, + { + "epoch": 5.24, + "learning_rate": 3.254874074074074e-05, + "loss": 1.8029, + "step": 29460 + }, + { + "epoch": 5.24, + "learning_rate": 3.254577777777778e-05, + "loss": 1.5463, + "step": 29465 + }, + { + "epoch": 5.24, + "learning_rate": 3.254281481481482e-05, + "loss": 1.6309, + "step": 29470 + }, + { + "epoch": 5.24, + "learning_rate": 3.2539851851851856e-05, + "loss": 1.821, + "step": 29475 + }, + { + "epoch": 5.24, + "learning_rate": 3.253688888888889e-05, + "loss": 1.6447, + "step": 29480 + }, + { + "epoch": 5.24, + "learning_rate": 3.253392592592593e-05, + "loss": 1.7735, + "step": 29485 + }, + { + "epoch": 5.24, + "learning_rate": 3.2530962962962966e-05, + "loss": 1.8529, + "step": 29490 + }, + { + "epoch": 5.24, + "learning_rate": 3.2528000000000004e-05, + "loss": 1.7299, + "step": 29495 + }, + { + "epoch": 5.24, + "learning_rate": 3.2525037037037036e-05, + "loss": 1.9048, + "step": 29500 + }, + { + "epoch": 5.25, + "learning_rate": 3.2522074074074075e-05, + "loss": 1.6739, + "step": 29505 + }, + { + "epoch": 5.25, + "learning_rate": 3.2519111111111114e-05, + "loss": 1.7896, + "step": 29510 + }, + { + "epoch": 5.25, + "learning_rate": 3.251614814814815e-05, + "loss": 1.7819, + "step": 29515 + }, + { + "epoch": 5.25, + "learning_rate": 3.2513185185185185e-05, + "loss": 1.774, + "step": 29520 + }, + { + "epoch": 5.25, + "learning_rate": 3.2510222222222224e-05, + "loss": 1.6446, + "step": 29525 + }, + { + "epoch": 5.25, + "learning_rate": 3.250725925925926e-05, + "loss": 1.7756, + "step": 29530 + }, + { + "epoch": 5.25, + "learning_rate": 3.25042962962963e-05, + "loss": 1.7319, + "step": 29535 + }, + { + "epoch": 5.25, + "learning_rate": 3.2501333333333333e-05, + "loss": 1.9038, + "step": 29540 + }, + { + "epoch": 5.25, + "learning_rate": 3.249837037037037e-05, + "loss": 1.7563, + "step": 29545 + }, + { + "epoch": 5.25, + "learning_rate": 3.249540740740741e-05, + "loss": 1.8106, + "step": 29550 + }, + { + "epoch": 5.25, + "learning_rate": 3.249244444444445e-05, + "loss": 1.8337, + "step": 29555 + }, + { + "epoch": 5.26, + "learning_rate": 3.248948148148148e-05, + "loss": 1.8299, + "step": 29560 + }, + { + "epoch": 5.26, + "learning_rate": 3.248651851851852e-05, + "loss": 1.7799, + "step": 29565 + }, + { + "epoch": 5.26, + "learning_rate": 3.248355555555555e-05, + "loss": 1.7407, + "step": 29570 + }, + { + "epoch": 5.26, + "learning_rate": 3.24805925925926e-05, + "loss": 1.7248, + "step": 29575 + }, + { + "epoch": 5.26, + "learning_rate": 3.247762962962963e-05, + "loss": 1.8263, + "step": 29580 + }, + { + "epoch": 5.26, + "learning_rate": 3.247466666666667e-05, + "loss": 1.7576, + "step": 29585 + }, + { + "epoch": 5.26, + "learning_rate": 3.24717037037037e-05, + "loss": 1.8666, + "step": 29590 + }, + { + "epoch": 5.26, + "learning_rate": 3.246874074074075e-05, + "loss": 1.725, + "step": 29595 + }, + { + "epoch": 5.26, + "learning_rate": 3.246577777777778e-05, + "loss": 1.6978, + "step": 29600 + }, + { + "epoch": 5.26, + "learning_rate": 3.246281481481482e-05, + "loss": 1.6997, + "step": 29605 + }, + { + "epoch": 5.26, + "learning_rate": 3.245985185185185e-05, + "loss": 1.7745, + "step": 29610 + }, + { + "epoch": 5.26, + "learning_rate": 3.2457481481481486e-05, + "loss": 1.7577, + "step": 29615 + }, + { + "epoch": 5.27, + "learning_rate": 3.245451851851852e-05, + "loss": 1.8617, + "step": 29620 + }, + { + "epoch": 5.27, + "learning_rate": 3.245155555555556e-05, + "loss": 1.8229, + "step": 29625 + }, + { + "epoch": 5.27, + "learning_rate": 3.244859259259259e-05, + "loss": 1.7525, + "step": 29630 + }, + { + "epoch": 5.27, + "learning_rate": 3.244562962962963e-05, + "loss": 1.8071, + "step": 29635 + }, + { + "epoch": 5.27, + "learning_rate": 3.244266666666667e-05, + "loss": 1.9536, + "step": 29640 + }, + { + "epoch": 5.27, + "learning_rate": 3.2439703703703706e-05, + "loss": 1.7349, + "step": 29645 + }, + { + "epoch": 5.27, + "learning_rate": 3.243674074074074e-05, + "loss": 1.7494, + "step": 29650 + }, + { + "epoch": 5.27, + "learning_rate": 3.2433777777777777e-05, + "loss": 1.7886, + "step": 29655 + }, + { + "epoch": 5.27, + "learning_rate": 3.2430814814814815e-05, + "loss": 1.7791, + "step": 29660 + }, + { + "epoch": 5.27, + "learning_rate": 3.2427851851851854e-05, + "loss": 1.7325, + "step": 29665 + }, + { + "epoch": 5.27, + "learning_rate": 3.2424888888888886e-05, + "loss": 1.8129, + "step": 29670 + }, + { + "epoch": 5.28, + "learning_rate": 3.2421925925925925e-05, + "loss": 1.8459, + "step": 29675 + }, + { + "epoch": 5.28, + "learning_rate": 3.2418962962962964e-05, + "loss": 1.8665, + "step": 29680 + }, + { + "epoch": 5.28, + "learning_rate": 3.2416e-05, + "loss": 1.8058, + "step": 29685 + }, + { + "epoch": 5.28, + "learning_rate": 3.2413037037037035e-05, + "loss": 1.8305, + "step": 29690 + }, + { + "epoch": 5.28, + "learning_rate": 3.2410074074074074e-05, + "loss": 1.8658, + "step": 29695 + }, + { + "epoch": 5.28, + "learning_rate": 3.240711111111111e-05, + "loss": 1.8145, + "step": 29700 + }, + { + "epoch": 5.28, + "learning_rate": 3.240414814814815e-05, + "loss": 1.9193, + "step": 29705 + }, + { + "epoch": 5.28, + "learning_rate": 3.240118518518518e-05, + "loss": 1.8542, + "step": 29710 + }, + { + "epoch": 5.28, + "learning_rate": 3.239822222222222e-05, + "loss": 1.9819, + "step": 29715 + }, + { + "epoch": 5.28, + "learning_rate": 3.239525925925926e-05, + "loss": 1.7057, + "step": 29720 + }, + { + "epoch": 5.28, + "learning_rate": 3.23922962962963e-05, + "loss": 1.7611, + "step": 29725 + }, + { + "epoch": 5.29, + "learning_rate": 3.238933333333333e-05, + "loss": 1.7125, + "step": 29730 + }, + { + "epoch": 5.29, + "learning_rate": 3.238637037037037e-05, + "loss": 1.7924, + "step": 29735 + }, + { + "epoch": 5.29, + "learning_rate": 3.238340740740741e-05, + "loss": 1.6958, + "step": 29740 + }, + { + "epoch": 5.29, + "learning_rate": 3.238044444444445e-05, + "loss": 1.7958, + "step": 29745 + }, + { + "epoch": 5.29, + "learning_rate": 3.237748148148148e-05, + "loss": 1.7368, + "step": 29750 + }, + { + "epoch": 5.29, + "learning_rate": 3.237451851851852e-05, + "loss": 1.777, + "step": 29755 + }, + { + "epoch": 5.29, + "learning_rate": 3.237155555555556e-05, + "loss": 1.7095, + "step": 29760 + }, + { + "epoch": 5.29, + "learning_rate": 3.23685925925926e-05, + "loss": 1.6856, + "step": 29765 + }, + { + "epoch": 5.29, + "learning_rate": 3.236562962962963e-05, + "loss": 1.7666, + "step": 29770 + }, + { + "epoch": 5.29, + "learning_rate": 3.236266666666667e-05, + "loss": 1.8457, + "step": 29775 + }, + { + "epoch": 5.29, + "learning_rate": 3.2359703703703706e-05, + "loss": 1.7137, + "step": 29780 + }, + { + "epoch": 5.3, + "learning_rate": 3.2356740740740745e-05, + "loss": 1.6178, + "step": 29785 + }, + { + "epoch": 5.3, + "learning_rate": 3.235377777777778e-05, + "loss": 1.8272, + "step": 29790 + }, + { + "epoch": 5.3, + "learning_rate": 3.2350814814814816e-05, + "loss": 1.8348, + "step": 29795 + }, + { + "epoch": 5.3, + "learning_rate": 3.234785185185185e-05, + "loss": 1.7047, + "step": 29800 + }, + { + "epoch": 5.3, + "learning_rate": 3.2344888888888894e-05, + "loss": 1.8682, + "step": 29805 + }, + { + "epoch": 5.3, + "learning_rate": 3.2341925925925926e-05, + "loss": 1.7249, + "step": 29810 + }, + { + "epoch": 5.3, + "learning_rate": 3.2338962962962965e-05, + "loss": 1.7136, + "step": 29815 + }, + { + "epoch": 5.3, + "learning_rate": 3.2336e-05, + "loss": 1.7797, + "step": 29820 + }, + { + "epoch": 5.3, + "learning_rate": 3.233303703703704e-05, + "loss": 1.7417, + "step": 29825 + }, + { + "epoch": 5.3, + "learning_rate": 3.2330074074074074e-05, + "loss": 1.7154, + "step": 29830 + }, + { + "epoch": 5.3, + "learning_rate": 3.232711111111111e-05, + "loss": 1.8332, + "step": 29835 + }, + { + "epoch": 5.3, + "learning_rate": 3.2324148148148145e-05, + "loss": 1.8463, + "step": 29840 + }, + { + "epoch": 5.31, + "learning_rate": 3.232118518518519e-05, + "loss": 1.7096, + "step": 29845 + }, + { + "epoch": 5.31, + "learning_rate": 3.231822222222222e-05, + "loss": 1.8184, + "step": 29850 + }, + { + "epoch": 5.31, + "learning_rate": 3.231525925925926e-05, + "loss": 1.6954, + "step": 29855 + }, + { + "epoch": 5.31, + "learning_rate": 3.2312296296296294e-05, + "loss": 1.6259, + "step": 29860 + }, + { + "epoch": 5.31, + "learning_rate": 3.230933333333333e-05, + "loss": 1.759, + "step": 29865 + }, + { + "epoch": 5.31, + "learning_rate": 3.230637037037037e-05, + "loss": 1.8674, + "step": 29870 + }, + { + "epoch": 5.31, + "learning_rate": 3.230340740740741e-05, + "loss": 1.7582, + "step": 29875 + }, + { + "epoch": 5.31, + "learning_rate": 3.230044444444444e-05, + "loss": 1.682, + "step": 29880 + }, + { + "epoch": 5.31, + "learning_rate": 3.229748148148148e-05, + "loss": 1.856, + "step": 29885 + }, + { + "epoch": 5.31, + "learning_rate": 3.229451851851852e-05, + "loss": 1.7464, + "step": 29890 + }, + { + "epoch": 5.31, + "learning_rate": 3.229155555555556e-05, + "loss": 1.8445, + "step": 29895 + }, + { + "epoch": 5.32, + "learning_rate": 3.228859259259259e-05, + "loss": 1.7165, + "step": 29900 + }, + { + "epoch": 5.32, + "learning_rate": 3.228562962962963e-05, + "loss": 1.806, + "step": 29905 + }, + { + "epoch": 5.32, + "learning_rate": 3.228266666666667e-05, + "loss": 1.736, + "step": 29910 + }, + { + "epoch": 5.32, + "learning_rate": 3.227970370370371e-05, + "loss": 1.7156, + "step": 29915 + }, + { + "epoch": 5.32, + "learning_rate": 3.227674074074074e-05, + "loss": 1.7369, + "step": 29920 + }, + { + "epoch": 5.32, + "learning_rate": 3.227377777777778e-05, + "loss": 1.7532, + "step": 29925 + }, + { + "epoch": 5.32, + "learning_rate": 3.227081481481482e-05, + "loss": 1.7148, + "step": 29930 + }, + { + "epoch": 5.32, + "learning_rate": 3.2267851851851856e-05, + "loss": 1.6292, + "step": 29935 + }, + { + "epoch": 5.32, + "learning_rate": 3.226488888888889e-05, + "loss": 1.7505, + "step": 29940 + }, + { + "epoch": 5.32, + "learning_rate": 3.2261925925925927e-05, + "loss": 1.9294, + "step": 29945 + }, + { + "epoch": 5.32, + "learning_rate": 3.2258962962962965e-05, + "loss": 1.772, + "step": 29950 + }, + { + "epoch": 5.33, + "learning_rate": 3.2256000000000004e-05, + "loss": 1.8099, + "step": 29955 + }, + { + "epoch": 5.33, + "learning_rate": 3.2253037037037036e-05, + "loss": 1.7111, + "step": 29960 + }, + { + "epoch": 5.33, + "learning_rate": 3.2250074074074075e-05, + "loss": 1.8077, + "step": 29965 + }, + { + "epoch": 5.33, + "learning_rate": 3.2247111111111114e-05, + "loss": 1.7618, + "step": 29970 + }, + { + "epoch": 5.33, + "learning_rate": 3.224414814814815e-05, + "loss": 1.8128, + "step": 29975 + }, + { + "epoch": 5.33, + "learning_rate": 3.2241185185185185e-05, + "loss": 1.7034, + "step": 29980 + }, + { + "epoch": 5.33, + "learning_rate": 3.2238222222222224e-05, + "loss": 1.8062, + "step": 29985 + }, + { + "epoch": 5.33, + "learning_rate": 3.223525925925926e-05, + "loss": 1.7719, + "step": 29990 + }, + { + "epoch": 5.33, + "learning_rate": 3.22322962962963e-05, + "loss": 1.7577, + "step": 29995 + }, + { + "epoch": 5.33, + "learning_rate": 3.222933333333333e-05, + "loss": 1.6706, + "step": 30000 + }, + { + "epoch": 5.33, + "eval_loss": 1.6660683155059814, + "eval_rouge2_fmeasure": 0.1858, + "eval_rouge2_precision": 0.212, + "eval_rouge2_recall": 0.1754, + "eval_runtime": 40154.3823, + "eval_samples_per_second": 0.125, + "eval_steps_per_second": 0.062, + "step": 30000 + }, + { + "epoch": 5.33, + "learning_rate": 3.222637037037037e-05, + "loss": 1.7674, + "step": 30005 + }, + { + "epoch": 5.34, + "learning_rate": 3.222340740740741e-05, + "loss": 1.7299, + "step": 30010 + }, + { + "epoch": 5.34, + "learning_rate": 3.222044444444445e-05, + "loss": 1.7153, + "step": 30015 + }, + { + "epoch": 5.34, + "learning_rate": 3.221748148148148e-05, + "loss": 1.814, + "step": 30020 + }, + { + "epoch": 5.34, + "learning_rate": 3.221451851851852e-05, + "loss": 1.7316, + "step": 30025 + }, + { + "epoch": 5.34, + "learning_rate": 3.221155555555555e-05, + "loss": 1.7726, + "step": 30030 + }, + { + "epoch": 5.34, + "learning_rate": 3.22085925925926e-05, + "loss": 1.6476, + "step": 30035 + }, + { + "epoch": 5.34, + "learning_rate": 3.220562962962963e-05, + "loss": 1.6778, + "step": 30040 + }, + { + "epoch": 5.34, + "learning_rate": 3.220266666666667e-05, + "loss": 1.8191, + "step": 30045 + }, + { + "epoch": 5.34, + "learning_rate": 3.21997037037037e-05, + "loss": 1.7427, + "step": 30050 + }, + { + "epoch": 5.34, + "learning_rate": 3.219674074074075e-05, + "loss": 1.8601, + "step": 30055 + }, + { + "epoch": 5.34, + "learning_rate": 3.219377777777778e-05, + "loss": 1.6535, + "step": 30060 + }, + { + "epoch": 5.34, + "learning_rate": 3.219081481481482e-05, + "loss": 1.8686, + "step": 30065 + }, + { + "epoch": 5.35, + "learning_rate": 3.218785185185185e-05, + "loss": 1.7833, + "step": 30070 + }, + { + "epoch": 5.35, + "learning_rate": 3.2184888888888895e-05, + "loss": 1.6951, + "step": 30075 + }, + { + "epoch": 5.35, + "learning_rate": 3.218192592592593e-05, + "loss": 1.6929, + "step": 30080 + }, + { + "epoch": 5.35, + "learning_rate": 3.2178962962962966e-05, + "loss": 1.7882, + "step": 30085 + }, + { + "epoch": 5.35, + "learning_rate": 3.2176e-05, + "loss": 1.7619, + "step": 30090 + }, + { + "epoch": 5.35, + "learning_rate": 3.217303703703704e-05, + "loss": 1.6784, + "step": 30095 + }, + { + "epoch": 5.35, + "learning_rate": 3.2170074074074076e-05, + "loss": 1.7072, + "step": 30100 + }, + { + "epoch": 5.35, + "learning_rate": 3.2167111111111115e-05, + "loss": 1.9223, + "step": 30105 + }, + { + "epoch": 5.35, + "learning_rate": 3.216414814814815e-05, + "loss": 1.5836, + "step": 30110 + }, + { + "epoch": 5.35, + "learning_rate": 3.2161185185185186e-05, + "loss": 1.7562, + "step": 30115 + }, + { + "epoch": 5.35, + "learning_rate": 3.2158222222222224e-05, + "loss": 1.7978, + "step": 30120 + }, + { + "epoch": 5.36, + "learning_rate": 3.215525925925926e-05, + "loss": 1.8594, + "step": 30125 + }, + { + "epoch": 5.36, + "learning_rate": 3.2152296296296295e-05, + "loss": 1.777, + "step": 30130 + }, + { + "epoch": 5.36, + "learning_rate": 3.2149333333333334e-05, + "loss": 1.8381, + "step": 30135 + }, + { + "epoch": 5.36, + "learning_rate": 3.214637037037037e-05, + "loss": 1.6528, + "step": 30140 + }, + { + "epoch": 5.36, + "learning_rate": 3.214340740740741e-05, + "loss": 1.7207, + "step": 30145 + }, + { + "epoch": 5.36, + "learning_rate": 3.2140444444444444e-05, + "loss": 1.8969, + "step": 30150 + }, + { + "epoch": 5.36, + "learning_rate": 3.213748148148148e-05, + "loss": 1.7894, + "step": 30155 + }, + { + "epoch": 5.36, + "learning_rate": 3.213451851851852e-05, + "loss": 1.7771, + "step": 30160 + }, + { + "epoch": 5.36, + "learning_rate": 3.213155555555556e-05, + "loss": 1.6601, + "step": 30165 + }, + { + "epoch": 5.36, + "learning_rate": 3.212859259259259e-05, + "loss": 1.7616, + "step": 30170 + }, + { + "epoch": 5.36, + "learning_rate": 3.212562962962963e-05, + "loss": 1.9105, + "step": 30175 + }, + { + "epoch": 5.37, + "learning_rate": 3.212266666666667e-05, + "loss": 1.8659, + "step": 30180 + }, + { + "epoch": 5.37, + "learning_rate": 3.211970370370371e-05, + "loss": 1.7576, + "step": 30185 + }, + { + "epoch": 5.37, + "learning_rate": 3.211674074074074e-05, + "loss": 1.7892, + "step": 30190 + }, + { + "epoch": 5.37, + "learning_rate": 3.211377777777778e-05, + "loss": 1.6566, + "step": 30195 + }, + { + "epoch": 5.37, + "learning_rate": 3.211081481481482e-05, + "loss": 1.8304, + "step": 30200 + }, + { + "epoch": 5.37, + "learning_rate": 3.210785185185186e-05, + "loss": 1.7859, + "step": 30205 + }, + { + "epoch": 5.37, + "learning_rate": 3.210488888888889e-05, + "loss": 1.8121, + "step": 30210 + }, + { + "epoch": 5.37, + "learning_rate": 3.210192592592593e-05, + "loss": 1.764, + "step": 30215 + }, + { + "epoch": 5.37, + "learning_rate": 3.209896296296297e-05, + "loss": 1.7455, + "step": 30220 + }, + { + "epoch": 5.37, + "learning_rate": 3.2096000000000006e-05, + "loss": 1.7184, + "step": 30225 + }, + { + "epoch": 5.37, + "learning_rate": 3.209303703703704e-05, + "loss": 1.8512, + "step": 30230 + }, + { + "epoch": 5.38, + "learning_rate": 3.209007407407408e-05, + "loss": 1.7478, + "step": 30235 + }, + { + "epoch": 5.38, + "learning_rate": 3.2087111111111115e-05, + "loss": 1.8058, + "step": 30240 + }, + { + "epoch": 5.38, + "learning_rate": 3.2084148148148154e-05, + "loss": 1.6179, + "step": 30245 + }, + { + "epoch": 5.38, + "learning_rate": 3.2081185185185186e-05, + "loss": 1.6414, + "step": 30250 + }, + { + "epoch": 5.38, + "learning_rate": 3.2078222222222225e-05, + "loss": 1.745, + "step": 30255 + }, + { + "epoch": 5.38, + "learning_rate": 3.207525925925926e-05, + "loss": 1.6527, + "step": 30260 + }, + { + "epoch": 5.38, + "learning_rate": 3.20722962962963e-05, + "loss": 1.7201, + "step": 30265 + }, + { + "epoch": 5.38, + "learning_rate": 3.2069333333333335e-05, + "loss": 1.7982, + "step": 30270 + }, + { + "epoch": 5.38, + "learning_rate": 3.2066370370370374e-05, + "loss": 1.7573, + "step": 30275 + }, + { + "epoch": 5.38, + "learning_rate": 3.2063407407407406e-05, + "loss": 1.8131, + "step": 30280 + }, + { + "epoch": 5.38, + "learning_rate": 3.206044444444445e-05, + "loss": 1.7896, + "step": 30285 + }, + { + "epoch": 5.38, + "learning_rate": 3.205748148148148e-05, + "loss": 1.7856, + "step": 30290 + }, + { + "epoch": 5.39, + "learning_rate": 3.205451851851852e-05, + "loss": 1.8124, + "step": 30295 + }, + { + "epoch": 5.39, + "learning_rate": 3.2051555555555554e-05, + "loss": 1.9074, + "step": 30300 + }, + { + "epoch": 5.39, + "learning_rate": 3.20485925925926e-05, + "loss": 1.8047, + "step": 30305 + }, + { + "epoch": 5.39, + "learning_rate": 3.204562962962963e-05, + "loss": 1.6993, + "step": 30310 + }, + { + "epoch": 5.39, + "learning_rate": 3.204266666666667e-05, + "loss": 1.694, + "step": 30315 + }, + { + "epoch": 5.39, + "learning_rate": 3.20397037037037e-05, + "loss": 1.695, + "step": 30320 + }, + { + "epoch": 5.39, + "learning_rate": 3.203674074074075e-05, + "loss": 1.7901, + "step": 30325 + }, + { + "epoch": 5.39, + "learning_rate": 3.203377777777778e-05, + "loss": 1.8551, + "step": 30330 + }, + { + "epoch": 5.39, + "learning_rate": 3.203081481481482e-05, + "loss": 1.9053, + "step": 30335 + }, + { + "epoch": 5.39, + "learning_rate": 3.202785185185185e-05, + "loss": 1.8342, + "step": 30340 + }, + { + "epoch": 5.39, + "learning_rate": 3.202488888888889e-05, + "loss": 1.8835, + "step": 30345 + }, + { + "epoch": 5.4, + "learning_rate": 3.202192592592593e-05, + "loss": 1.7006, + "step": 30350 + }, + { + "epoch": 5.4, + "learning_rate": 3.201896296296297e-05, + "loss": 1.8549, + "step": 30355 + }, + { + "epoch": 5.4, + "learning_rate": 3.2016e-05, + "loss": 1.7715, + "step": 30360 + }, + { + "epoch": 5.4, + "learning_rate": 3.201303703703704e-05, + "loss": 1.7962, + "step": 30365 + }, + { + "epoch": 5.4, + "learning_rate": 3.201007407407408e-05, + "loss": 1.7828, + "step": 30370 + }, + { + "epoch": 5.4, + "learning_rate": 3.200711111111111e-05, + "loss": 1.7275, + "step": 30375 + }, + { + "epoch": 5.4, + "learning_rate": 3.200414814814815e-05, + "loss": 1.7349, + "step": 30380 + }, + { + "epoch": 5.4, + "learning_rate": 3.200118518518519e-05, + "loss": 1.7595, + "step": 30385 + }, + { + "epoch": 5.4, + "learning_rate": 3.1998222222222226e-05, + "loss": 1.8099, + "step": 30390 + }, + { + "epoch": 5.4, + "learning_rate": 3.199525925925926e-05, + "loss": 1.8064, + "step": 30395 + }, + { + "epoch": 5.4, + "learning_rate": 3.19922962962963e-05, + "loss": 1.8302, + "step": 30400 + }, + { + "epoch": 5.41, + "learning_rate": 3.1989333333333336e-05, + "loss": 1.6796, + "step": 30405 + }, + { + "epoch": 5.41, + "learning_rate": 3.1986370370370374e-05, + "loss": 1.7358, + "step": 30410 + }, + { + "epoch": 5.41, + "learning_rate": 3.1983407407407406e-05, + "loss": 1.8466, + "step": 30415 + }, + { + "epoch": 5.41, + "learning_rate": 3.1980444444444445e-05, + "loss": 1.9074, + "step": 30420 + }, + { + "epoch": 5.41, + "learning_rate": 3.197748148148148e-05, + "loss": 1.8118, + "step": 30425 + }, + { + "epoch": 5.41, + "learning_rate": 3.197451851851852e-05, + "loss": 1.6472, + "step": 30430 + }, + { + "epoch": 5.41, + "learning_rate": 3.1971555555555555e-05, + "loss": 1.7588, + "step": 30435 + }, + { + "epoch": 5.41, + "learning_rate": 3.1968592592592594e-05, + "loss": 1.8209, + "step": 30440 + }, + { + "epoch": 5.41, + "learning_rate": 3.1965629629629626e-05, + "loss": 1.8238, + "step": 30445 + }, + { + "epoch": 5.41, + "learning_rate": 3.196266666666667e-05, + "loss": 1.7621, + "step": 30450 + }, + { + "epoch": 5.41, + "learning_rate": 3.1959703703703703e-05, + "loss": 1.8016, + "step": 30455 + }, + { + "epoch": 5.42, + "learning_rate": 3.195674074074074e-05, + "loss": 1.6624, + "step": 30460 + }, + { + "epoch": 5.42, + "learning_rate": 3.1953777777777774e-05, + "loss": 1.7246, + "step": 30465 + }, + { + "epoch": 5.42, + "learning_rate": 3.195081481481482e-05, + "loss": 1.7926, + "step": 30470 + }, + { + "epoch": 5.42, + "learning_rate": 3.194785185185185e-05, + "loss": 1.7122, + "step": 30475 + }, + { + "epoch": 5.42, + "learning_rate": 3.194488888888889e-05, + "loss": 1.742, + "step": 30480 + }, + { + "epoch": 5.42, + "learning_rate": 3.194192592592592e-05, + "loss": 1.6996, + "step": 30485 + }, + { + "epoch": 5.42, + "learning_rate": 3.193896296296296e-05, + "loss": 1.8148, + "step": 30490 + }, + { + "epoch": 5.42, + "learning_rate": 3.1936e-05, + "loss": 1.7629, + "step": 30495 + }, + { + "epoch": 5.42, + "learning_rate": 3.193303703703704e-05, + "loss": 1.6853, + "step": 30500 + }, + { + "epoch": 5.42, + "learning_rate": 3.193007407407407e-05, + "loss": 1.8395, + "step": 30505 + }, + { + "epoch": 5.42, + "learning_rate": 3.192711111111111e-05, + "loss": 1.8557, + "step": 30510 + }, + { + "epoch": 5.42, + "learning_rate": 3.192414814814815e-05, + "loss": 1.7458, + "step": 30515 + }, + { + "epoch": 5.43, + "learning_rate": 3.192118518518519e-05, + "loss": 1.8935, + "step": 30520 + }, + { + "epoch": 5.43, + "learning_rate": 3.191822222222222e-05, + "loss": 1.6818, + "step": 30525 + }, + { + "epoch": 5.43, + "learning_rate": 3.191525925925926e-05, + "loss": 1.9193, + "step": 30530 + }, + { + "epoch": 5.43, + "learning_rate": 3.19122962962963e-05, + "loss": 1.7242, + "step": 30535 + }, + { + "epoch": 5.43, + "learning_rate": 3.1909333333333336e-05, + "loss": 1.7063, + "step": 30540 + }, + { + "epoch": 5.43, + "learning_rate": 3.190637037037037e-05, + "loss": 1.725, + "step": 30545 + }, + { + "epoch": 5.43, + "learning_rate": 3.190340740740741e-05, + "loss": 1.7652, + "step": 30550 + }, + { + "epoch": 5.43, + "learning_rate": 3.1900444444444446e-05, + "loss": 1.821, + "step": 30555 + }, + { + "epoch": 5.43, + "learning_rate": 3.1897481481481485e-05, + "loss": 1.8678, + "step": 30560 + }, + { + "epoch": 5.43, + "learning_rate": 3.189451851851852e-05, + "loss": 1.6798, + "step": 30565 + }, + { + "epoch": 5.43, + "learning_rate": 3.1891555555555556e-05, + "loss": 1.7284, + "step": 30570 + }, + { + "epoch": 5.44, + "learning_rate": 3.1888592592592595e-05, + "loss": 1.7557, + "step": 30575 + }, + { + "epoch": 5.44, + "learning_rate": 3.1885629629629633e-05, + "loss": 1.7892, + "step": 30580 + }, + { + "epoch": 5.44, + "learning_rate": 3.1882666666666665e-05, + "loss": 1.7837, + "step": 30585 + }, + { + "epoch": 5.44, + "learning_rate": 3.1879703703703704e-05, + "loss": 1.7782, + "step": 30590 + }, + { + "epoch": 5.44, + "learning_rate": 3.187674074074074e-05, + "loss": 1.8331, + "step": 30595 + }, + { + "epoch": 5.44, + "learning_rate": 3.187377777777778e-05, + "loss": 1.842, + "step": 30600 + }, + { + "epoch": 5.44, + "learning_rate": 3.1870814814814814e-05, + "loss": 1.7656, + "step": 30605 + }, + { + "epoch": 5.44, + "learning_rate": 3.186785185185185e-05, + "loss": 1.828, + "step": 30610 + }, + { + "epoch": 5.44, + "learning_rate": 3.186488888888889e-05, + "loss": 1.8331, + "step": 30615 + }, + { + "epoch": 5.44, + "learning_rate": 3.186192592592593e-05, + "loss": 1.7244, + "step": 30620 + }, + { + "epoch": 5.44, + "learning_rate": 3.185896296296296e-05, + "loss": 1.8241, + "step": 30625 + }, + { + "epoch": 5.45, + "learning_rate": 3.1856e-05, + "loss": 1.7786, + "step": 30630 + }, + { + "epoch": 5.45, + "learning_rate": 3.185303703703704e-05, + "loss": 1.6925, + "step": 30635 + }, + { + "epoch": 5.45, + "learning_rate": 3.185007407407408e-05, + "loss": 1.7422, + "step": 30640 + }, + { + "epoch": 5.45, + "learning_rate": 3.184711111111111e-05, + "loss": 1.7634, + "step": 30645 + }, + { + "epoch": 5.45, + "learning_rate": 3.184414814814815e-05, + "loss": 1.6899, + "step": 30650 + }, + { + "epoch": 5.45, + "learning_rate": 3.184118518518518e-05, + "loss": 1.9626, + "step": 30655 + }, + { + "epoch": 5.45, + "learning_rate": 3.183822222222223e-05, + "loss": 1.6883, + "step": 30660 + }, + { + "epoch": 5.45, + "learning_rate": 3.183525925925926e-05, + "loss": 1.7763, + "step": 30665 + }, + { + "epoch": 5.45, + "learning_rate": 3.18322962962963e-05, + "loss": 1.8264, + "step": 30670 + }, + { + "epoch": 5.45, + "learning_rate": 3.182933333333333e-05, + "loss": 1.7097, + "step": 30675 + }, + { + "epoch": 5.45, + "learning_rate": 3.1826370370370376e-05, + "loss": 1.7946, + "step": 30680 + }, + { + "epoch": 5.46, + "learning_rate": 3.182340740740741e-05, + "loss": 1.7882, + "step": 30685 + }, + { + "epoch": 5.46, + "learning_rate": 3.182044444444445e-05, + "loss": 1.788, + "step": 30690 + }, + { + "epoch": 5.46, + "learning_rate": 3.181748148148148e-05, + "loss": 1.8282, + "step": 30695 + }, + { + "epoch": 5.46, + "learning_rate": 3.1814518518518524e-05, + "loss": 1.8171, + "step": 30700 + }, + { + "epoch": 5.46, + "learning_rate": 3.1811555555555557e-05, + "loss": 1.8122, + "step": 30705 + }, + { + "epoch": 5.46, + "learning_rate": 3.1808592592592595e-05, + "loss": 1.8627, + "step": 30710 + }, + { + "epoch": 5.46, + "learning_rate": 3.180562962962963e-05, + "loss": 1.6132, + "step": 30715 + }, + { + "epoch": 5.46, + "learning_rate": 3.1802666666666666e-05, + "loss": 1.8072, + "step": 30720 + }, + { + "epoch": 5.46, + "learning_rate": 3.1799703703703705e-05, + "loss": 1.7519, + "step": 30725 + }, + { + "epoch": 5.46, + "learning_rate": 3.1796740740740744e-05, + "loss": 1.8369, + "step": 30730 + }, + { + "epoch": 5.46, + "learning_rate": 3.1793777777777776e-05, + "loss": 1.7252, + "step": 30735 + }, + { + "epoch": 5.46, + "learning_rate": 3.1790814814814815e-05, + "loss": 1.725, + "step": 30740 + }, + { + "epoch": 5.47, + "learning_rate": 3.1787851851851854e-05, + "loss": 1.7855, + "step": 30745 + }, + { + "epoch": 5.47, + "learning_rate": 3.178488888888889e-05, + "loss": 1.8013, + "step": 30750 + }, + { + "epoch": 5.47, + "learning_rate": 3.1781925925925924e-05, + "loss": 1.6371, + "step": 30755 + }, + { + "epoch": 5.47, + "learning_rate": 3.177896296296296e-05, + "loss": 1.9044, + "step": 30760 + }, + { + "epoch": 5.47, + "learning_rate": 3.1776e-05, + "loss": 1.7048, + "step": 30765 + }, + { + "epoch": 5.47, + "learning_rate": 3.177303703703704e-05, + "loss": 1.8366, + "step": 30770 + }, + { + "epoch": 5.47, + "learning_rate": 3.177007407407407e-05, + "loss": 1.7882, + "step": 30775 + }, + { + "epoch": 5.47, + "learning_rate": 3.176711111111111e-05, + "loss": 1.7665, + "step": 30780 + }, + { + "epoch": 5.47, + "learning_rate": 3.176414814814815e-05, + "loss": 1.8495, + "step": 30785 + }, + { + "epoch": 5.47, + "learning_rate": 3.176118518518519e-05, + "loss": 1.8446, + "step": 30790 + }, + { + "epoch": 5.47, + "learning_rate": 3.175822222222222e-05, + "loss": 1.6982, + "step": 30795 + }, + { + "epoch": 5.48, + "learning_rate": 3.175525925925926e-05, + "loss": 1.7092, + "step": 30800 + }, + { + "epoch": 5.48, + "learning_rate": 3.17522962962963e-05, + "loss": 1.7658, + "step": 30805 + }, + { + "epoch": 5.48, + "learning_rate": 3.174933333333334e-05, + "loss": 1.7615, + "step": 30810 + }, + { + "epoch": 5.48, + "learning_rate": 3.174637037037037e-05, + "loss": 1.7814, + "step": 30815 + }, + { + "epoch": 5.48, + "learning_rate": 3.174340740740741e-05, + "loss": 1.7661, + "step": 30820 + }, + { + "epoch": 5.48, + "learning_rate": 3.174044444444445e-05, + "loss": 1.8778, + "step": 30825 + }, + { + "epoch": 5.48, + "learning_rate": 3.1737481481481486e-05, + "loss": 1.6932, + "step": 30830 + }, + { + "epoch": 5.48, + "learning_rate": 3.173451851851852e-05, + "loss": 1.7178, + "step": 30835 + }, + { + "epoch": 5.48, + "learning_rate": 3.173155555555556e-05, + "loss": 1.7099, + "step": 30840 + }, + { + "epoch": 5.48, + "learning_rate": 3.1728592592592596e-05, + "loss": 1.9026, + "step": 30845 + }, + { + "epoch": 5.48, + "learning_rate": 3.1725629629629635e-05, + "loss": 1.7083, + "step": 30850 + }, + { + "epoch": 5.49, + "learning_rate": 3.172266666666667e-05, + "loss": 1.7845, + "step": 30855 + }, + { + "epoch": 5.49, + "learning_rate": 3.1719703703703706e-05, + "loss": 1.7638, + "step": 30860 + }, + { + "epoch": 5.49, + "learning_rate": 3.1716740740740745e-05, + "loss": 1.7684, + "step": 30865 + }, + { + "epoch": 5.49, + "learning_rate": 3.1713777777777783e-05, + "loss": 1.6788, + "step": 30870 + }, + { + "epoch": 5.49, + "learning_rate": 3.1710814814814815e-05, + "loss": 1.867, + "step": 30875 + }, + { + "epoch": 5.49, + "learning_rate": 3.1707851851851854e-05, + "loss": 1.7258, + "step": 30880 + }, + { + "epoch": 5.49, + "learning_rate": 3.1704888888888886e-05, + "loss": 1.7607, + "step": 30885 + }, + { + "epoch": 5.49, + "learning_rate": 3.170192592592593e-05, + "loss": 1.6763, + "step": 30890 + }, + { + "epoch": 5.49, + "learning_rate": 3.1698962962962964e-05, + "loss": 1.7192, + "step": 30895 + }, + { + "epoch": 5.49, + "learning_rate": 3.1696e-05, + "loss": 1.7689, + "step": 30900 + }, + { + "epoch": 5.49, + "learning_rate": 3.1693037037037035e-05, + "loss": 1.7727, + "step": 30905 + }, + { + "epoch": 5.5, + "learning_rate": 3.169007407407408e-05, + "loss": 1.7891, + "step": 30910 + }, + { + "epoch": 5.5, + "learning_rate": 3.168711111111111e-05, + "loss": 1.821, + "step": 30915 + }, + { + "epoch": 5.5, + "learning_rate": 3.168414814814815e-05, + "loss": 1.6491, + "step": 30920 + }, + { + "epoch": 5.5, + "learning_rate": 3.168118518518518e-05, + "loss": 1.8538, + "step": 30925 + }, + { + "epoch": 5.5, + "learning_rate": 3.167822222222223e-05, + "loss": 1.8045, + "step": 30930 + }, + { + "epoch": 5.5, + "learning_rate": 3.167525925925926e-05, + "loss": 1.7913, + "step": 30935 + }, + { + "epoch": 5.5, + "learning_rate": 3.16722962962963e-05, + "loss": 1.7949, + "step": 30940 + }, + { + "epoch": 5.5, + "learning_rate": 3.166933333333333e-05, + "loss": 1.7217, + "step": 30945 + }, + { + "epoch": 5.5, + "learning_rate": 3.166637037037037e-05, + "loss": 1.975, + "step": 30950 + }, + { + "epoch": 5.5, + "learning_rate": 3.166340740740741e-05, + "loss": 1.6561, + "step": 30955 + }, + { + "epoch": 5.5, + "learning_rate": 3.166044444444445e-05, + "loss": 1.9199, + "step": 30960 + }, + { + "epoch": 5.5, + "learning_rate": 3.165748148148148e-05, + "loss": 1.7394, + "step": 30965 + }, + { + "epoch": 5.51, + "learning_rate": 3.165451851851852e-05, + "loss": 1.8672, + "step": 30970 + }, + { + "epoch": 5.51, + "learning_rate": 3.165155555555556e-05, + "loss": 1.6846, + "step": 30975 + }, + { + "epoch": 5.51, + "learning_rate": 3.16485925925926e-05, + "loss": 1.7104, + "step": 30980 + }, + { + "epoch": 5.51, + "learning_rate": 3.164562962962963e-05, + "loss": 1.7971, + "step": 30985 + }, + { + "epoch": 5.51, + "learning_rate": 3.164266666666667e-05, + "loss": 1.6928, + "step": 30990 + }, + { + "epoch": 5.51, + "learning_rate": 3.1639703703703707e-05, + "loss": 1.7726, + "step": 30995 + }, + { + "epoch": 5.51, + "learning_rate": 3.1636740740740745e-05, + "loss": 1.7499, + "step": 31000 + }, + { + "epoch": 5.51, + "learning_rate": 3.163377777777778e-05, + "loss": 1.8686, + "step": 31005 + }, + { + "epoch": 5.51, + "learning_rate": 3.1630814814814816e-05, + "loss": 1.7976, + "step": 31010 + }, + { + "epoch": 5.51, + "learning_rate": 3.1627851851851855e-05, + "loss": 1.8947, + "step": 31015 + }, + { + "epoch": 5.51, + "learning_rate": 3.1624888888888894e-05, + "loss": 1.838, + "step": 31020 + }, + { + "epoch": 5.52, + "learning_rate": 3.1621925925925926e-05, + "loss": 1.7313, + "step": 31025 + }, + { + "epoch": 5.52, + "learning_rate": 3.1618962962962965e-05, + "loss": 1.8511, + "step": 31030 + }, + { + "epoch": 5.52, + "learning_rate": 3.1616000000000004e-05, + "loss": 1.6926, + "step": 31035 + }, + { + "epoch": 5.52, + "learning_rate": 3.161303703703704e-05, + "loss": 1.8433, + "step": 31040 + }, + { + "epoch": 5.52, + "learning_rate": 3.1610666666666665e-05, + "loss": 1.7327, + "step": 31045 + }, + { + "epoch": 5.52, + "learning_rate": 3.1607703703703704e-05, + "loss": 1.7641, + "step": 31050 + }, + { + "epoch": 5.52, + "learning_rate": 3.160474074074074e-05, + "loss": 1.8921, + "step": 31055 + }, + { + "epoch": 5.52, + "learning_rate": 3.160177777777778e-05, + "loss": 1.8823, + "step": 31060 + }, + { + "epoch": 5.52, + "learning_rate": 3.1598814814814814e-05, + "loss": 1.8443, + "step": 31065 + }, + { + "epoch": 5.52, + "learning_rate": 3.159585185185185e-05, + "loss": 1.8559, + "step": 31070 + }, + { + "epoch": 5.52, + "learning_rate": 3.159288888888889e-05, + "loss": 1.7812, + "step": 31075 + }, + { + "epoch": 5.53, + "learning_rate": 3.158992592592593e-05, + "loss": 1.78, + "step": 31080 + }, + { + "epoch": 5.53, + "learning_rate": 3.158696296296296e-05, + "loss": 1.694, + "step": 31085 + }, + { + "epoch": 5.53, + "learning_rate": 3.1584e-05, + "loss": 1.7249, + "step": 31090 + }, + { + "epoch": 5.53, + "learning_rate": 3.158103703703704e-05, + "loss": 1.8089, + "step": 31095 + }, + { + "epoch": 5.53, + "learning_rate": 3.157807407407408e-05, + "loss": 1.815, + "step": 31100 + }, + { + "epoch": 5.53, + "learning_rate": 3.157511111111111e-05, + "loss": 1.7734, + "step": 31105 + }, + { + "epoch": 5.53, + "learning_rate": 3.157214814814815e-05, + "loss": 1.6808, + "step": 31110 + }, + { + "epoch": 5.53, + "learning_rate": 3.156918518518518e-05, + "loss": 1.7346, + "step": 31115 + }, + { + "epoch": 5.53, + "learning_rate": 3.156622222222223e-05, + "loss": 1.8647, + "step": 31120 + }, + { + "epoch": 5.53, + "learning_rate": 3.156325925925926e-05, + "loss": 1.6936, + "step": 31125 + }, + { + "epoch": 5.53, + "learning_rate": 3.15602962962963e-05, + "loss": 1.6982, + "step": 31130 + }, + { + "epoch": 5.54, + "learning_rate": 3.155733333333333e-05, + "loss": 1.8464, + "step": 31135 + }, + { + "epoch": 5.54, + "learning_rate": 3.1554370370370376e-05, + "loss": 1.5864, + "step": 31140 + }, + { + "epoch": 5.54, + "learning_rate": 3.155140740740741e-05, + "loss": 1.7326, + "step": 31145 + }, + { + "epoch": 5.54, + "learning_rate": 3.154844444444445e-05, + "loss": 1.7698, + "step": 31150 + }, + { + "epoch": 5.54, + "learning_rate": 3.154548148148148e-05, + "loss": 1.7541, + "step": 31155 + }, + { + "epoch": 5.54, + "learning_rate": 3.1542518518518524e-05, + "loss": 1.7638, + "step": 31160 + }, + { + "epoch": 5.54, + "learning_rate": 3.1539555555555556e-05, + "loss": 1.8659, + "step": 31165 + }, + { + "epoch": 5.54, + "learning_rate": 3.1536592592592595e-05, + "loss": 1.7968, + "step": 31170 + }, + { + "epoch": 5.54, + "learning_rate": 3.153362962962963e-05, + "loss": 1.7329, + "step": 31175 + }, + { + "epoch": 5.54, + "learning_rate": 3.1530666666666666e-05, + "loss": 1.7213, + "step": 31180 + }, + { + "epoch": 5.54, + "learning_rate": 3.1527703703703705e-05, + "loss": 1.7661, + "step": 31185 + }, + { + "epoch": 5.54, + "learning_rate": 3.1524740740740744e-05, + "loss": 1.7155, + "step": 31190 + }, + { + "epoch": 5.55, + "learning_rate": 3.1521777777777776e-05, + "loss": 1.8216, + "step": 31195 + }, + { + "epoch": 5.55, + "learning_rate": 3.1518814814814815e-05, + "loss": 1.701, + "step": 31200 + }, + { + "epoch": 5.55, + "learning_rate": 3.1515851851851853e-05, + "loss": 1.8173, + "step": 31205 + }, + { + "epoch": 5.55, + "learning_rate": 3.151288888888889e-05, + "loss": 1.7842, + "step": 31210 + }, + { + "epoch": 5.55, + "learning_rate": 3.1509925925925924e-05, + "loss": 1.6522, + "step": 31215 + }, + { + "epoch": 5.55, + "learning_rate": 3.150696296296296e-05, + "loss": 1.6995, + "step": 31220 + }, + { + "epoch": 5.55, + "learning_rate": 3.1504e-05, + "loss": 1.6889, + "step": 31225 + }, + { + "epoch": 5.55, + "learning_rate": 3.150103703703704e-05, + "loss": 1.7295, + "step": 31230 + }, + { + "epoch": 5.55, + "learning_rate": 3.149807407407407e-05, + "loss": 1.7438, + "step": 31235 + }, + { + "epoch": 5.55, + "learning_rate": 3.149511111111111e-05, + "loss": 1.8172, + "step": 31240 + }, + { + "epoch": 5.55, + "learning_rate": 3.149214814814815e-05, + "loss": 1.858, + "step": 31245 + }, + { + "epoch": 5.56, + "learning_rate": 3.148918518518519e-05, + "loss": 1.8805, + "step": 31250 + }, + { + "epoch": 5.56, + "learning_rate": 3.148622222222222e-05, + "loss": 1.773, + "step": 31255 + }, + { + "epoch": 5.56, + "learning_rate": 3.148325925925926e-05, + "loss": 1.662, + "step": 31260 + }, + { + "epoch": 5.56, + "learning_rate": 3.14802962962963e-05, + "loss": 1.71, + "step": 31265 + }, + { + "epoch": 5.56, + "learning_rate": 3.147733333333334e-05, + "loss": 1.7215, + "step": 31270 + }, + { + "epoch": 5.56, + "learning_rate": 3.147437037037037e-05, + "loss": 1.683, + "step": 31275 + }, + { + "epoch": 5.56, + "learning_rate": 3.147140740740741e-05, + "loss": 1.7385, + "step": 31280 + }, + { + "epoch": 5.56, + "learning_rate": 3.146844444444445e-05, + "loss": 1.8453, + "step": 31285 + }, + { + "epoch": 5.56, + "learning_rate": 3.1465481481481486e-05, + "loss": 1.8511, + "step": 31290 + }, + { + "epoch": 5.56, + "learning_rate": 3.146251851851852e-05, + "loss": 1.743, + "step": 31295 + }, + { + "epoch": 5.56, + "learning_rate": 3.145955555555556e-05, + "loss": 1.7316, + "step": 31300 + }, + { + "epoch": 5.57, + "learning_rate": 3.1456592592592596e-05, + "loss": 1.7029, + "step": 31305 + }, + { + "epoch": 5.57, + "learning_rate": 3.1453629629629635e-05, + "loss": 1.9747, + "step": 31310 + }, + { + "epoch": 5.57, + "learning_rate": 3.145066666666667e-05, + "loss": 1.7667, + "step": 31315 + }, + { + "epoch": 5.57, + "learning_rate": 3.1447703703703706e-05, + "loss": 1.6564, + "step": 31320 + }, + { + "epoch": 5.57, + "learning_rate": 3.1444740740740745e-05, + "loss": 1.7722, + "step": 31325 + }, + { + "epoch": 5.57, + "learning_rate": 3.144177777777778e-05, + "loss": 1.7273, + "step": 31330 + }, + { + "epoch": 5.57, + "learning_rate": 3.1438814814814815e-05, + "loss": 1.7702, + "step": 31335 + }, + { + "epoch": 5.57, + "learning_rate": 3.1435851851851854e-05, + "loss": 1.7738, + "step": 31340 + }, + { + "epoch": 5.57, + "learning_rate": 3.1432888888888886e-05, + "loss": 1.8663, + "step": 31345 + }, + { + "epoch": 5.57, + "learning_rate": 3.142992592592593e-05, + "loss": 1.7274, + "step": 31350 + }, + { + "epoch": 5.57, + "learning_rate": 3.1426962962962964e-05, + "loss": 1.8418, + "step": 31355 + }, + { + "epoch": 5.58, + "learning_rate": 3.1424e-05, + "loss": 1.8298, + "step": 31360 + }, + { + "epoch": 5.58, + "learning_rate": 3.1421037037037035e-05, + "loss": 1.9037, + "step": 31365 + }, + { + "epoch": 5.58, + "learning_rate": 3.141807407407408e-05, + "loss": 1.7335, + "step": 31370 + }, + { + "epoch": 5.58, + "learning_rate": 3.141511111111111e-05, + "loss": 1.6693, + "step": 31375 + }, + { + "epoch": 5.58, + "learning_rate": 3.141214814814815e-05, + "loss": 1.79, + "step": 31380 + }, + { + "epoch": 5.58, + "learning_rate": 3.140918518518518e-05, + "loss": 1.7368, + "step": 31385 + }, + { + "epoch": 5.58, + "learning_rate": 3.140622222222223e-05, + "loss": 1.7137, + "step": 31390 + }, + { + "epoch": 5.58, + "learning_rate": 3.140325925925926e-05, + "loss": 1.8864, + "step": 31395 + }, + { + "epoch": 5.58, + "learning_rate": 3.14002962962963e-05, + "loss": 1.7851, + "step": 31400 + }, + { + "epoch": 5.58, + "learning_rate": 3.139733333333333e-05, + "loss": 1.7961, + "step": 31405 + }, + { + "epoch": 5.58, + "learning_rate": 3.139437037037037e-05, + "loss": 1.7735, + "step": 31410 + }, + { + "epoch": 5.58, + "learning_rate": 3.139140740740741e-05, + "loss": 1.7106, + "step": 31415 + }, + { + "epoch": 5.59, + "learning_rate": 3.138844444444445e-05, + "loss": 1.7121, + "step": 31420 + }, + { + "epoch": 5.59, + "learning_rate": 3.138548148148148e-05, + "loss": 1.8201, + "step": 31425 + }, + { + "epoch": 5.59, + "learning_rate": 3.138251851851852e-05, + "loss": 1.7026, + "step": 31430 + }, + { + "epoch": 5.59, + "learning_rate": 3.137955555555556e-05, + "loss": 1.7519, + "step": 31435 + }, + { + "epoch": 5.59, + "learning_rate": 3.13765925925926e-05, + "loss": 1.7989, + "step": 31440 + }, + { + "epoch": 5.59, + "learning_rate": 3.137362962962963e-05, + "loss": 1.7369, + "step": 31445 + }, + { + "epoch": 5.59, + "learning_rate": 3.137066666666667e-05, + "loss": 1.7181, + "step": 31450 + }, + { + "epoch": 5.59, + "learning_rate": 3.1367703703703706e-05, + "loss": 1.7591, + "step": 31455 + }, + { + "epoch": 5.59, + "learning_rate": 3.1364740740740745e-05, + "loss": 1.8078, + "step": 31460 + }, + { + "epoch": 5.59, + "learning_rate": 3.136177777777778e-05, + "loss": 1.6645, + "step": 31465 + }, + { + "epoch": 5.59, + "learning_rate": 3.1358814814814816e-05, + "loss": 1.7519, + "step": 31470 + }, + { + "epoch": 5.6, + "learning_rate": 3.1355851851851855e-05, + "loss": 1.7399, + "step": 31475 + }, + { + "epoch": 5.6, + "learning_rate": 3.1352888888888894e-05, + "loss": 1.8531, + "step": 31480 + }, + { + "epoch": 5.6, + "learning_rate": 3.1349925925925926e-05, + "loss": 1.8759, + "step": 31485 + }, + { + "epoch": 5.6, + "learning_rate": 3.1346962962962965e-05, + "loss": 1.7434, + "step": 31490 + }, + { + "epoch": 5.6, + "learning_rate": 3.1344000000000003e-05, + "loss": 1.7048, + "step": 31495 + }, + { + "epoch": 5.6, + "learning_rate": 3.134103703703704e-05, + "loss": 1.7197, + "step": 31500 + }, + { + "epoch": 5.6, + "learning_rate": 3.1338074074074074e-05, + "loss": 1.7388, + "step": 31505 + }, + { + "epoch": 5.6, + "learning_rate": 3.133511111111111e-05, + "loss": 1.6485, + "step": 31510 + }, + { + "epoch": 5.6, + "learning_rate": 3.133214814814815e-05, + "loss": 1.7801, + "step": 31515 + }, + { + "epoch": 5.6, + "learning_rate": 3.132918518518519e-05, + "loss": 1.7048, + "step": 31520 + }, + { + "epoch": 5.6, + "learning_rate": 3.132622222222222e-05, + "loss": 1.7085, + "step": 31525 + }, + { + "epoch": 5.61, + "learning_rate": 3.132325925925926e-05, + "loss": 1.7465, + "step": 31530 + }, + { + "epoch": 5.61, + "learning_rate": 3.13202962962963e-05, + "loss": 1.6885, + "step": 31535 + }, + { + "epoch": 5.61, + "learning_rate": 3.131733333333334e-05, + "loss": 1.7921, + "step": 31540 + }, + { + "epoch": 5.61, + "learning_rate": 3.131437037037037e-05, + "loss": 1.7291, + "step": 31545 + }, + { + "epoch": 5.61, + "learning_rate": 3.131140740740741e-05, + "loss": 1.7648, + "step": 31550 + }, + { + "epoch": 5.61, + "learning_rate": 3.130844444444445e-05, + "loss": 1.8669, + "step": 31555 + }, + { + "epoch": 5.61, + "learning_rate": 3.130548148148149e-05, + "loss": 1.82, + "step": 31560 + }, + { + "epoch": 5.61, + "learning_rate": 3.130251851851852e-05, + "loss": 1.8781, + "step": 31565 + }, + { + "epoch": 5.61, + "learning_rate": 3.129955555555556e-05, + "loss": 1.7801, + "step": 31570 + }, + { + "epoch": 5.61, + "learning_rate": 3.129659259259259e-05, + "loss": 1.8379, + "step": 31575 + }, + { + "epoch": 5.61, + "learning_rate": 3.1293629629629636e-05, + "loss": 1.8033, + "step": 31580 + }, + { + "epoch": 5.62, + "learning_rate": 3.129066666666667e-05, + "loss": 1.684, + "step": 31585 + }, + { + "epoch": 5.62, + "learning_rate": 3.128770370370371e-05, + "loss": 1.8527, + "step": 31590 + }, + { + "epoch": 5.62, + "learning_rate": 3.128474074074074e-05, + "loss": 1.8069, + "step": 31595 + }, + { + "epoch": 5.62, + "learning_rate": 3.128177777777778e-05, + "loss": 1.847, + "step": 31600 + }, + { + "epoch": 5.62, + "learning_rate": 3.127881481481482e-05, + "loss": 1.7497, + "step": 31605 + }, + { + "epoch": 5.62, + "learning_rate": 3.127585185185185e-05, + "loss": 1.7918, + "step": 31610 + }, + { + "epoch": 5.62, + "learning_rate": 3.127288888888889e-05, + "loss": 1.7614, + "step": 31615 + }, + { + "epoch": 5.62, + "learning_rate": 3.1269925925925927e-05, + "loss": 1.7639, + "step": 31620 + }, + { + "epoch": 5.62, + "learning_rate": 3.1266962962962965e-05, + "loss": 1.7072, + "step": 31625 + }, + { + "epoch": 5.62, + "learning_rate": 3.1264e-05, + "loss": 1.693, + "step": 31630 + }, + { + "epoch": 5.62, + "learning_rate": 3.1261037037037036e-05, + "loss": 1.7512, + "step": 31635 + }, + { + "epoch": 5.62, + "learning_rate": 3.1258074074074075e-05, + "loss": 1.7095, + "step": 31640 + }, + { + "epoch": 5.63, + "learning_rate": 3.1255111111111114e-05, + "loss": 1.7813, + "step": 31645 + }, + { + "epoch": 5.63, + "learning_rate": 3.1252148148148146e-05, + "loss": 1.7179, + "step": 31650 + }, + { + "epoch": 5.63, + "learning_rate": 3.1249185185185185e-05, + "loss": 1.6958, + "step": 31655 + }, + { + "epoch": 5.63, + "learning_rate": 3.1246222222222224e-05, + "loss": 1.8054, + "step": 31660 + }, + { + "epoch": 5.63, + "learning_rate": 3.124325925925926e-05, + "loss": 1.6277, + "step": 31665 + }, + { + "epoch": 5.63, + "learning_rate": 3.1240296296296294e-05, + "loss": 1.8798, + "step": 31670 + }, + { + "epoch": 5.63, + "learning_rate": 3.123733333333333e-05, + "loss": 1.7651, + "step": 31675 + }, + { + "epoch": 5.63, + "learning_rate": 3.123437037037037e-05, + "loss": 1.8662, + "step": 31680 + }, + { + "epoch": 5.63, + "learning_rate": 3.123140740740741e-05, + "loss": 1.8777, + "step": 31685 + }, + { + "epoch": 5.63, + "learning_rate": 3.122844444444444e-05, + "loss": 1.8076, + "step": 31690 + }, + { + "epoch": 5.63, + "learning_rate": 3.122548148148148e-05, + "loss": 1.7202, + "step": 31695 + }, + { + "epoch": 5.64, + "learning_rate": 3.122251851851852e-05, + "loss": 1.6982, + "step": 31700 + }, + { + "epoch": 5.64, + "learning_rate": 3.121955555555556e-05, + "loss": 1.7004, + "step": 31705 + }, + { + "epoch": 5.64, + "learning_rate": 3.121659259259259e-05, + "loss": 1.9044, + "step": 31710 + }, + { + "epoch": 5.64, + "learning_rate": 3.121362962962963e-05, + "loss": 1.7976, + "step": 31715 + }, + { + "epoch": 5.64, + "learning_rate": 3.121066666666667e-05, + "loss": 1.844, + "step": 31720 + }, + { + "epoch": 5.64, + "learning_rate": 3.120770370370371e-05, + "loss": 1.73, + "step": 31725 + }, + { + "epoch": 5.64, + "learning_rate": 3.120474074074074e-05, + "loss": 1.7927, + "step": 31730 + }, + { + "epoch": 5.64, + "learning_rate": 3.120177777777778e-05, + "loss": 1.7507, + "step": 31735 + }, + { + "epoch": 5.64, + "learning_rate": 3.119881481481481e-05, + "loss": 1.7915, + "step": 31740 + }, + { + "epoch": 5.64, + "learning_rate": 3.1195851851851856e-05, + "loss": 1.7597, + "step": 31745 + }, + { + "epoch": 5.64, + "learning_rate": 3.119288888888889e-05, + "loss": 1.7875, + "step": 31750 + }, + { + "epoch": 5.65, + "learning_rate": 3.118992592592593e-05, + "loss": 1.7953, + "step": 31755 + }, + { + "epoch": 5.65, + "learning_rate": 3.118696296296296e-05, + "loss": 1.8576, + "step": 31760 + }, + { + "epoch": 5.65, + "learning_rate": 3.1184000000000005e-05, + "loss": 1.8272, + "step": 31765 + }, + { + "epoch": 5.65, + "learning_rate": 3.118103703703704e-05, + "loss": 1.6859, + "step": 31770 + }, + { + "epoch": 5.65, + "learning_rate": 3.1178074074074076e-05, + "loss": 1.8597, + "step": 31775 + }, + { + "epoch": 5.65, + "learning_rate": 3.117511111111111e-05, + "loss": 1.7329, + "step": 31780 + }, + { + "epoch": 5.65, + "learning_rate": 3.1172148148148154e-05, + "loss": 1.7861, + "step": 31785 + }, + { + "epoch": 5.65, + "learning_rate": 3.1169185185185186e-05, + "loss": 1.7981, + "step": 31790 + }, + { + "epoch": 5.65, + "learning_rate": 3.1166222222222224e-05, + "loss": 1.7985, + "step": 31795 + }, + { + "epoch": 5.65, + "learning_rate": 3.1163259259259256e-05, + "loss": 1.7294, + "step": 31800 + }, + { + "epoch": 5.65, + "learning_rate": 3.1160296296296295e-05, + "loss": 1.7675, + "step": 31805 + }, + { + "epoch": 5.66, + "learning_rate": 3.1157333333333334e-05, + "loss": 1.813, + "step": 31810 + }, + { + "epoch": 5.66, + "learning_rate": 3.115437037037037e-05, + "loss": 1.6723, + "step": 31815 + }, + { + "epoch": 5.66, + "learning_rate": 3.1151407407407405e-05, + "loss": 1.7141, + "step": 31820 + }, + { + "epoch": 5.66, + "learning_rate": 3.1148444444444444e-05, + "loss": 1.7696, + "step": 31825 + }, + { + "epoch": 5.66, + "learning_rate": 3.114548148148148e-05, + "loss": 1.8026, + "step": 31830 + }, + { + "epoch": 5.66, + "learning_rate": 3.114251851851852e-05, + "loss": 1.831, + "step": 31835 + }, + { + "epoch": 5.66, + "learning_rate": 3.1139555555555553e-05, + "loss": 1.835, + "step": 31840 + }, + { + "epoch": 5.66, + "learning_rate": 3.113659259259259e-05, + "loss": 1.8629, + "step": 31845 + }, + { + "epoch": 5.66, + "learning_rate": 3.113362962962963e-05, + "loss": 1.7756, + "step": 31850 + }, + { + "epoch": 5.66, + "learning_rate": 3.113066666666667e-05, + "loss": 1.6591, + "step": 31855 + }, + { + "epoch": 5.66, + "learning_rate": 3.11277037037037e-05, + "loss": 1.827, + "step": 31860 + }, + { + "epoch": 5.66, + "learning_rate": 3.112474074074074e-05, + "loss": 1.8058, + "step": 31865 + }, + { + "epoch": 5.67, + "learning_rate": 3.112177777777778e-05, + "loss": 1.8309, + "step": 31870 + }, + { + "epoch": 5.67, + "learning_rate": 3.111881481481482e-05, + "loss": 1.7748, + "step": 31875 + }, + { + "epoch": 5.67, + "learning_rate": 3.111585185185185e-05, + "loss": 1.7768, + "step": 31880 + }, + { + "epoch": 5.67, + "learning_rate": 3.111288888888889e-05, + "loss": 1.6996, + "step": 31885 + }, + { + "epoch": 5.67, + "learning_rate": 3.110992592592593e-05, + "loss": 1.8126, + "step": 31890 + }, + { + "epoch": 5.67, + "learning_rate": 3.110696296296297e-05, + "loss": 1.7201, + "step": 31895 + }, + { + "epoch": 5.67, + "learning_rate": 3.1104e-05, + "loss": 1.8051, + "step": 31900 + }, + { + "epoch": 5.67, + "learning_rate": 3.110103703703704e-05, + "loss": 1.7476, + "step": 31905 + }, + { + "epoch": 5.67, + "learning_rate": 3.109807407407408e-05, + "loss": 1.6694, + "step": 31910 + }, + { + "epoch": 5.67, + "learning_rate": 3.1095111111111115e-05, + "loss": 1.803, + "step": 31915 + }, + { + "epoch": 5.67, + "learning_rate": 3.109214814814815e-05, + "loss": 1.9136, + "step": 31920 + }, + { + "epoch": 5.68, + "learning_rate": 3.1089185185185186e-05, + "loss": 1.8455, + "step": 31925 + }, + { + "epoch": 5.68, + "learning_rate": 3.1086222222222225e-05, + "loss": 1.7325, + "step": 31930 + }, + { + "epoch": 5.68, + "learning_rate": 3.1083259259259264e-05, + "loss": 1.7701, + "step": 31935 + }, + { + "epoch": 5.68, + "learning_rate": 3.1080296296296296e-05, + "loss": 1.8716, + "step": 31940 + }, + { + "epoch": 5.68, + "learning_rate": 3.1077333333333335e-05, + "loss": 1.8155, + "step": 31945 + }, + { + "epoch": 5.68, + "learning_rate": 3.1074370370370374e-05, + "loss": 1.8082, + "step": 31950 + }, + { + "epoch": 5.68, + "learning_rate": 3.107140740740741e-05, + "loss": 1.6301, + "step": 31955 + }, + { + "epoch": 5.68, + "learning_rate": 3.1068444444444445e-05, + "loss": 1.7759, + "step": 31960 + }, + { + "epoch": 5.68, + "learning_rate": 3.106548148148148e-05, + "loss": 1.768, + "step": 31965 + }, + { + "epoch": 5.68, + "learning_rate": 3.1062518518518515e-05, + "loss": 1.7293, + "step": 31970 + }, + { + "epoch": 5.68, + "learning_rate": 3.105955555555556e-05, + "loss": 1.8429, + "step": 31975 + }, + { + "epoch": 5.69, + "learning_rate": 3.105659259259259e-05, + "loss": 1.8491, + "step": 31980 + }, + { + "epoch": 5.69, + "learning_rate": 3.105362962962963e-05, + "loss": 1.7755, + "step": 31985 + }, + { + "epoch": 5.69, + "learning_rate": 3.1050666666666664e-05, + "loss": 1.8813, + "step": 31990 + }, + { + "epoch": 5.69, + "learning_rate": 3.104770370370371e-05, + "loss": 1.756, + "step": 31995 + }, + { + "epoch": 5.69, + "learning_rate": 3.104474074074074e-05, + "loss": 1.9161, + "step": 32000 + }, + { + "epoch": 5.69, + "learning_rate": 3.104177777777778e-05, + "loss": 1.8715, + "step": 32005 + }, + { + "epoch": 5.69, + "learning_rate": 3.103881481481481e-05, + "loss": 1.7547, + "step": 32010 + }, + { + "epoch": 5.69, + "learning_rate": 3.103585185185186e-05, + "loss": 1.9679, + "step": 32015 + }, + { + "epoch": 5.69, + "learning_rate": 3.103288888888889e-05, + "loss": 1.7302, + "step": 32020 + }, + { + "epoch": 5.69, + "learning_rate": 3.102992592592593e-05, + "loss": 1.8798, + "step": 32025 + }, + { + "epoch": 5.69, + "learning_rate": 3.102696296296296e-05, + "loss": 1.8049, + "step": 32030 + }, + { + "epoch": 5.7, + "learning_rate": 3.1024e-05, + "loss": 1.8419, + "step": 32035 + }, + { + "epoch": 5.7, + "learning_rate": 3.102103703703704e-05, + "loss": 1.6936, + "step": 32040 + }, + { + "epoch": 5.7, + "learning_rate": 3.101807407407408e-05, + "loss": 1.6731, + "step": 32045 + }, + { + "epoch": 5.7, + "learning_rate": 3.101511111111111e-05, + "loss": 1.7687, + "step": 32050 + }, + { + "epoch": 5.7, + "learning_rate": 3.101214814814815e-05, + "loss": 1.7636, + "step": 32055 + }, + { + "epoch": 5.7, + "learning_rate": 3.100918518518519e-05, + "loss": 1.8046, + "step": 32060 + }, + { + "epoch": 5.7, + "learning_rate": 3.1006222222222226e-05, + "loss": 1.8165, + "step": 32065 + }, + { + "epoch": 5.7, + "learning_rate": 3.100325925925926e-05, + "loss": 1.6993, + "step": 32070 + }, + { + "epoch": 5.7, + "learning_rate": 3.10002962962963e-05, + "loss": 1.7324, + "step": 32075 + }, + { + "epoch": 5.7, + "learning_rate": 3.0997333333333336e-05, + "loss": 1.8225, + "step": 32080 + }, + { + "epoch": 5.7, + "learning_rate": 3.0994370370370374e-05, + "loss": 1.8352, + "step": 32085 + }, + { + "epoch": 5.7, + "learning_rate": 3.0991407407407406e-05, + "loss": 1.7509, + "step": 32090 + }, + { + "epoch": 5.71, + "learning_rate": 3.0988444444444445e-05, + "loss": 1.8205, + "step": 32095 + }, + { + "epoch": 5.71, + "learning_rate": 3.0985481481481484e-05, + "loss": 1.7646, + "step": 32100 + }, + { + "epoch": 5.71, + "learning_rate": 3.098251851851852e-05, + "loss": 1.738, + "step": 32105 + }, + { + "epoch": 5.71, + "learning_rate": 3.0979555555555555e-05, + "loss": 1.8839, + "step": 32110 + }, + { + "epoch": 5.71, + "learning_rate": 3.0976592592592594e-05, + "loss": 1.8064, + "step": 32115 + }, + { + "epoch": 5.71, + "learning_rate": 3.097362962962963e-05, + "loss": 1.7439, + "step": 32120 + }, + { + "epoch": 5.71, + "learning_rate": 3.097066666666667e-05, + "loss": 1.6806, + "step": 32125 + }, + { + "epoch": 5.71, + "learning_rate": 3.0967703703703704e-05, + "loss": 1.8937, + "step": 32130 + }, + { + "epoch": 5.71, + "learning_rate": 3.096474074074074e-05, + "loss": 1.6911, + "step": 32135 + }, + { + "epoch": 5.71, + "learning_rate": 3.096177777777778e-05, + "loss": 1.835, + "step": 32140 + }, + { + "epoch": 5.71, + "learning_rate": 3.095881481481482e-05, + "loss": 1.7977, + "step": 32145 + }, + { + "epoch": 5.72, + "learning_rate": 3.095585185185185e-05, + "loss": 1.7236, + "step": 32150 + }, + { + "epoch": 5.72, + "learning_rate": 3.095288888888889e-05, + "loss": 1.8154, + "step": 32155 + }, + { + "epoch": 5.72, + "learning_rate": 3.094992592592593e-05, + "loss": 1.7937, + "step": 32160 + }, + { + "epoch": 5.72, + "learning_rate": 3.094696296296297e-05, + "loss": 1.7209, + "step": 32165 + }, + { + "epoch": 5.72, + "learning_rate": 3.0944e-05, + "loss": 1.8279, + "step": 32170 + }, + { + "epoch": 5.72, + "learning_rate": 3.094103703703704e-05, + "loss": 1.6862, + "step": 32175 + }, + { + "epoch": 5.72, + "learning_rate": 3.093807407407408e-05, + "loss": 1.9646, + "step": 32180 + }, + { + "epoch": 5.72, + "learning_rate": 3.093511111111112e-05, + "loss": 1.8541, + "step": 32185 + }, + { + "epoch": 5.72, + "learning_rate": 3.093214814814815e-05, + "loss": 1.6799, + "step": 32190 + }, + { + "epoch": 5.72, + "learning_rate": 3.092918518518519e-05, + "loss": 1.6851, + "step": 32195 + }, + { + "epoch": 5.72, + "learning_rate": 3.092622222222222e-05, + "loss": 1.7665, + "step": 32200 + }, + { + "epoch": 5.73, + "learning_rate": 3.0923259259259266e-05, + "loss": 1.8934, + "step": 32205 + }, + { + "epoch": 5.73, + "learning_rate": 3.09202962962963e-05, + "loss": 1.7695, + "step": 32210 + }, + { + "epoch": 5.73, + "learning_rate": 3.0917333333333336e-05, + "loss": 1.7963, + "step": 32215 + }, + { + "epoch": 5.73, + "learning_rate": 3.091437037037037e-05, + "loss": 1.7366, + "step": 32220 + }, + { + "epoch": 5.73, + "learning_rate": 3.0911407407407414e-05, + "loss": 1.7312, + "step": 32225 + }, + { + "epoch": 5.73, + "learning_rate": 3.0908444444444446e-05, + "loss": 1.9089, + "step": 32230 + }, + { + "epoch": 5.73, + "learning_rate": 3.0905481481481485e-05, + "loss": 1.7415, + "step": 32235 + }, + { + "epoch": 5.73, + "learning_rate": 3.090251851851852e-05, + "loss": 1.9092, + "step": 32240 + }, + { + "epoch": 5.73, + "learning_rate": 3.089955555555556e-05, + "loss": 1.7883, + "step": 32245 + }, + { + "epoch": 5.73, + "learning_rate": 3.0896592592592595e-05, + "loss": 1.8109, + "step": 32250 + }, + { + "epoch": 5.73, + "learning_rate": 3.0893629629629633e-05, + "loss": 1.9123, + "step": 32255 + }, + { + "epoch": 5.74, + "learning_rate": 3.0890666666666665e-05, + "loss": 1.8401, + "step": 32260 + }, + { + "epoch": 5.74, + "learning_rate": 3.0887703703703704e-05, + "loss": 1.724, + "step": 32265 + }, + { + "epoch": 5.74, + "learning_rate": 3.088474074074074e-05, + "loss": 1.8262, + "step": 32270 + }, + { + "epoch": 5.74, + "learning_rate": 3.088177777777778e-05, + "loss": 1.7865, + "step": 32275 + }, + { + "epoch": 5.74, + "learning_rate": 3.0878814814814814e-05, + "loss": 1.8945, + "step": 32280 + }, + { + "epoch": 5.74, + "learning_rate": 3.087585185185185e-05, + "loss": 1.8649, + "step": 32285 + }, + { + "epoch": 5.74, + "learning_rate": 3.087288888888889e-05, + "loss": 1.7616, + "step": 32290 + }, + { + "epoch": 5.74, + "learning_rate": 3.086992592592593e-05, + "loss": 1.677, + "step": 32295 + }, + { + "epoch": 5.74, + "learning_rate": 3.086696296296296e-05, + "loss": 1.758, + "step": 32300 + }, + { + "epoch": 5.74, + "learning_rate": 3.0864e-05, + "loss": 1.8861, + "step": 32305 + }, + { + "epoch": 5.74, + "learning_rate": 3.086103703703704e-05, + "loss": 1.7799, + "step": 32310 + }, + { + "epoch": 5.74, + "learning_rate": 3.085807407407408e-05, + "loss": 1.7554, + "step": 32315 + }, + { + "epoch": 5.75, + "learning_rate": 3.085511111111111e-05, + "loss": 1.6893, + "step": 32320 + }, + { + "epoch": 5.75, + "learning_rate": 3.085214814814815e-05, + "loss": 1.8322, + "step": 32325 + }, + { + "epoch": 5.75, + "learning_rate": 3.084918518518519e-05, + "loss": 1.7639, + "step": 32330 + }, + { + "epoch": 5.75, + "learning_rate": 3.084622222222223e-05, + "loss": 1.7998, + "step": 32335 + }, + { + "epoch": 5.75, + "learning_rate": 3.084325925925926e-05, + "loss": 1.9589, + "step": 32340 + }, + { + "epoch": 5.75, + "learning_rate": 3.08402962962963e-05, + "loss": 1.6992, + "step": 32345 + }, + { + "epoch": 5.75, + "learning_rate": 3.083733333333334e-05, + "loss": 1.8168, + "step": 32350 + }, + { + "epoch": 5.75, + "learning_rate": 3.0834370370370376e-05, + "loss": 1.8621, + "step": 32355 + }, + { + "epoch": 5.75, + "learning_rate": 3.083140740740741e-05, + "loss": 1.757, + "step": 32360 + }, + { + "epoch": 5.75, + "learning_rate": 3.082844444444445e-05, + "loss": 1.8237, + "step": 32365 + }, + { + "epoch": 5.75, + "learning_rate": 3.0825481481481486e-05, + "loss": 1.7844, + "step": 32370 + }, + { + "epoch": 5.76, + "learning_rate": 3.082251851851852e-05, + "loss": 1.811, + "step": 32375 + }, + { + "epoch": 5.76, + "learning_rate": 3.0819555555555557e-05, + "loss": 1.7126, + "step": 32380 + }, + { + "epoch": 5.76, + "learning_rate": 3.081659259259259e-05, + "loss": 1.8338, + "step": 32385 + }, + { + "epoch": 5.76, + "learning_rate": 3.0813629629629634e-05, + "loss": 1.6839, + "step": 32390 + }, + { + "epoch": 5.76, + "learning_rate": 3.0810666666666666e-05, + "loss": 1.8455, + "step": 32395 + }, + { + "epoch": 5.76, + "learning_rate": 3.0807703703703705e-05, + "loss": 1.7468, + "step": 32400 + }, + { + "epoch": 5.76, + "learning_rate": 3.080474074074074e-05, + "loss": 1.6845, + "step": 32405 + }, + { + "epoch": 5.76, + "learning_rate": 3.080177777777778e-05, + "loss": 1.7447, + "step": 32410 + }, + { + "epoch": 5.76, + "learning_rate": 3.0798814814814815e-05, + "loss": 1.7269, + "step": 32415 + }, + { + "epoch": 5.76, + "learning_rate": 3.0795851851851854e-05, + "loss": 1.8179, + "step": 32420 + }, + { + "epoch": 5.76, + "learning_rate": 3.0792888888888886e-05, + "loss": 1.7824, + "step": 32425 + }, + { + "epoch": 5.77, + "learning_rate": 3.0789925925925924e-05, + "loss": 1.8312, + "step": 32430 + }, + { + "epoch": 5.77, + "learning_rate": 3.078696296296296e-05, + "loss": 1.7442, + "step": 32435 + }, + { + "epoch": 5.77, + "learning_rate": 3.0784e-05, + "loss": 1.8164, + "step": 32440 + }, + { + "epoch": 5.77, + "learning_rate": 3.0781037037037034e-05, + "loss": 1.603, + "step": 32445 + }, + { + "epoch": 5.77, + "learning_rate": 3.077807407407407e-05, + "loss": 1.792, + "step": 32450 + }, + { + "epoch": 5.77, + "learning_rate": 3.077511111111111e-05, + "loss": 1.6909, + "step": 32455 + }, + { + "epoch": 5.77, + "learning_rate": 3.077214814814815e-05, + "loss": 1.8125, + "step": 32460 + }, + { + "epoch": 5.77, + "learning_rate": 3.076918518518518e-05, + "loss": 1.8062, + "step": 32465 + }, + { + "epoch": 5.77, + "learning_rate": 3.076622222222222e-05, + "loss": 1.8516, + "step": 32470 + }, + { + "epoch": 5.77, + "learning_rate": 3.076325925925926e-05, + "loss": 1.8091, + "step": 32475 + }, + { + "epoch": 5.77, + "learning_rate": 3.07602962962963e-05, + "loss": 1.8028, + "step": 32480 + }, + { + "epoch": 5.78, + "learning_rate": 3.075733333333333e-05, + "loss": 1.7271, + "step": 32485 + }, + { + "epoch": 5.78, + "learning_rate": 3.075437037037037e-05, + "loss": 1.7458, + "step": 32490 + }, + { + "epoch": 5.78, + "learning_rate": 3.075140740740741e-05, + "loss": 1.7706, + "step": 32495 + }, + { + "epoch": 5.78, + "learning_rate": 3.074844444444445e-05, + "loss": 1.6899, + "step": 32500 + }, + { + "epoch": 5.78, + "learning_rate": 3.074548148148148e-05, + "loss": 1.7182, + "step": 32505 + }, + { + "epoch": 5.78, + "learning_rate": 3.074251851851852e-05, + "loss": 1.6718, + "step": 32510 + }, + { + "epoch": 5.78, + "learning_rate": 3.073955555555556e-05, + "loss": 1.77, + "step": 32515 + }, + { + "epoch": 5.78, + "learning_rate": 3.0736592592592596e-05, + "loss": 1.7513, + "step": 32520 + }, + { + "epoch": 5.78, + "learning_rate": 3.073362962962963e-05, + "loss": 1.7807, + "step": 32525 + }, + { + "epoch": 5.78, + "learning_rate": 3.073066666666667e-05, + "loss": 1.9059, + "step": 32530 + }, + { + "epoch": 5.78, + "learning_rate": 3.0727703703703706e-05, + "loss": 1.8233, + "step": 32535 + }, + { + "epoch": 5.78, + "learning_rate": 3.0724740740740745e-05, + "loss": 1.8722, + "step": 32540 + }, + { + "epoch": 5.79, + "learning_rate": 3.072177777777778e-05, + "loss": 1.6277, + "step": 32545 + }, + { + "epoch": 5.79, + "learning_rate": 3.0718814814814815e-05, + "loss": 1.7178, + "step": 32550 + }, + { + "epoch": 5.79, + "learning_rate": 3.0715851851851854e-05, + "loss": 1.8676, + "step": 32555 + }, + { + "epoch": 5.79, + "learning_rate": 3.071288888888889e-05, + "loss": 1.811, + "step": 32560 + }, + { + "epoch": 5.79, + "learning_rate": 3.0709925925925925e-05, + "loss": 1.6865, + "step": 32565 + }, + { + "epoch": 5.79, + "learning_rate": 3.0706962962962964e-05, + "loss": 1.7294, + "step": 32570 + }, + { + "epoch": 5.79, + "learning_rate": 3.0703999999999996e-05, + "loss": 1.8459, + "step": 32575 + }, + { + "epoch": 5.79, + "learning_rate": 3.070103703703704e-05, + "loss": 1.7731, + "step": 32580 + }, + { + "epoch": 5.79, + "learning_rate": 3.0698074074074074e-05, + "loss": 1.8431, + "step": 32585 + }, + { + "epoch": 5.79, + "learning_rate": 3.069511111111111e-05, + "loss": 1.7727, + "step": 32590 + }, + { + "epoch": 5.79, + "learning_rate": 3.0692148148148145e-05, + "loss": 1.8884, + "step": 32595 + }, + { + "epoch": 5.8, + "learning_rate": 3.068918518518519e-05, + "loss": 1.8343, + "step": 32600 + }, + { + "epoch": 5.8, + "learning_rate": 3.068622222222222e-05, + "loss": 1.7352, + "step": 32605 + }, + { + "epoch": 5.8, + "learning_rate": 3.068325925925926e-05, + "loss": 1.6606, + "step": 32610 + }, + { + "epoch": 5.8, + "learning_rate": 3.068029629629629e-05, + "loss": 1.7679, + "step": 32615 + }, + { + "epoch": 5.8, + "learning_rate": 3.067733333333334e-05, + "loss": 1.8008, + "step": 32620 + }, + { + "epoch": 5.8, + "learning_rate": 3.067437037037037e-05, + "loss": 1.8578, + "step": 32625 + }, + { + "epoch": 5.8, + "learning_rate": 3.067140740740741e-05, + "loss": 1.6442, + "step": 32630 + }, + { + "epoch": 5.8, + "learning_rate": 3.066844444444444e-05, + "loss": 1.8161, + "step": 32635 + }, + { + "epoch": 5.8, + "learning_rate": 3.066548148148149e-05, + "loss": 1.859, + "step": 32640 + }, + { + "epoch": 5.8, + "learning_rate": 3.066251851851852e-05, + "loss": 1.7786, + "step": 32645 + }, + { + "epoch": 5.8, + "learning_rate": 3.065955555555556e-05, + "loss": 1.7326, + "step": 32650 + }, + { + "epoch": 5.81, + "learning_rate": 3.065659259259259e-05, + "loss": 1.8917, + "step": 32655 + }, + { + "epoch": 5.81, + "learning_rate": 3.065362962962963e-05, + "loss": 1.6301, + "step": 32660 + }, + { + "epoch": 5.81, + "learning_rate": 3.065066666666667e-05, + "loss": 1.7904, + "step": 32665 + }, + { + "epoch": 5.81, + "learning_rate": 3.0647703703703707e-05, + "loss": 1.7485, + "step": 32670 + }, + { + "epoch": 5.81, + "learning_rate": 3.064474074074074e-05, + "loss": 1.6792, + "step": 32675 + }, + { + "epoch": 5.81, + "learning_rate": 3.064177777777778e-05, + "loss": 1.8091, + "step": 32680 + }, + { + "epoch": 5.81, + "learning_rate": 3.0638814814814816e-05, + "loss": 1.843, + "step": 32685 + }, + { + "epoch": 5.81, + "learning_rate": 3.0635851851851855e-05, + "loss": 1.7934, + "step": 32690 + }, + { + "epoch": 5.81, + "learning_rate": 3.063288888888889e-05, + "loss": 1.894, + "step": 32695 + }, + { + "epoch": 5.81, + "learning_rate": 3.0629925925925926e-05, + "loss": 1.6811, + "step": 32700 + }, + { + "epoch": 5.81, + "learning_rate": 3.0626962962962965e-05, + "loss": 1.7364, + "step": 32705 + }, + { + "epoch": 5.82, + "learning_rate": 3.0624000000000004e-05, + "loss": 1.7358, + "step": 32710 + }, + { + "epoch": 5.82, + "learning_rate": 3.0621037037037036e-05, + "loss": 1.8058, + "step": 32715 + }, + { + "epoch": 5.82, + "learning_rate": 3.0618074074074074e-05, + "loss": 1.782, + "step": 32720 + }, + { + "epoch": 5.82, + "learning_rate": 3.061511111111111e-05, + "loss": 1.8822, + "step": 32725 + }, + { + "epoch": 5.82, + "learning_rate": 3.061214814814815e-05, + "loss": 1.8611, + "step": 32730 + }, + { + "epoch": 5.82, + "learning_rate": 3.0609185185185184e-05, + "loss": 1.7027, + "step": 32735 + }, + { + "epoch": 5.82, + "learning_rate": 3.060622222222222e-05, + "loss": 1.784, + "step": 32740 + }, + { + "epoch": 5.82, + "learning_rate": 3.060325925925926e-05, + "loss": 1.8381, + "step": 32745 + }, + { + "epoch": 5.82, + "learning_rate": 3.06002962962963e-05, + "loss": 1.9241, + "step": 32750 + }, + { + "epoch": 5.82, + "learning_rate": 3.059733333333333e-05, + "loss": 1.8515, + "step": 32755 + }, + { + "epoch": 5.82, + "learning_rate": 3.059437037037037e-05, + "loss": 1.9129, + "step": 32760 + }, + { + "epoch": 5.82, + "learning_rate": 3.059140740740741e-05, + "loss": 1.764, + "step": 32765 + }, + { + "epoch": 5.83, + "learning_rate": 3.058844444444445e-05, + "loss": 1.7665, + "step": 32770 + }, + { + "epoch": 5.83, + "learning_rate": 3.058548148148148e-05, + "loss": 1.8665, + "step": 32775 + }, + { + "epoch": 5.83, + "learning_rate": 3.058251851851852e-05, + "loss": 1.7605, + "step": 32780 + }, + { + "epoch": 5.83, + "learning_rate": 3.057955555555556e-05, + "loss": 1.8172, + "step": 32785 + }, + { + "epoch": 5.83, + "learning_rate": 3.05765925925926e-05, + "loss": 1.881, + "step": 32790 + }, + { + "epoch": 5.83, + "learning_rate": 3.057362962962963e-05, + "loss": 1.6562, + "step": 32795 + }, + { + "epoch": 5.83, + "learning_rate": 3.057066666666667e-05, + "loss": 1.835, + "step": 32800 + }, + { + "epoch": 5.83, + "learning_rate": 3.056770370370371e-05, + "loss": 1.8155, + "step": 32805 + }, + { + "epoch": 5.83, + "learning_rate": 3.0564740740740746e-05, + "loss": 1.7459, + "step": 32810 + }, + { + "epoch": 5.83, + "learning_rate": 3.056177777777778e-05, + "loss": 1.765, + "step": 32815 + }, + { + "epoch": 5.83, + "learning_rate": 3.055881481481482e-05, + "loss": 1.8043, + "step": 32820 + }, + { + "epoch": 5.84, + "learning_rate": 3.055585185185185e-05, + "loss": 1.6945, + "step": 32825 + }, + { + "epoch": 5.84, + "learning_rate": 3.0552888888888895e-05, + "loss": 1.7981, + "step": 32830 + }, + { + "epoch": 5.84, + "learning_rate": 3.054992592592593e-05, + "loss": 1.7391, + "step": 32835 + }, + { + "epoch": 5.84, + "learning_rate": 3.0546962962962966e-05, + "loss": 1.7702, + "step": 32840 + }, + { + "epoch": 5.84, + "learning_rate": 3.0544e-05, + "loss": 1.6233, + "step": 32845 + }, + { + "epoch": 5.84, + "learning_rate": 3.054103703703704e-05, + "loss": 1.7269, + "step": 32850 + }, + { + "epoch": 5.84, + "learning_rate": 3.0538074074074075e-05, + "loss": 1.7322, + "step": 32855 + }, + { + "epoch": 5.84, + "learning_rate": 3.0535111111111114e-05, + "loss": 1.7417, + "step": 32860 + }, + { + "epoch": 5.84, + "learning_rate": 3.0532148148148146e-05, + "loss": 1.856, + "step": 32865 + }, + { + "epoch": 5.84, + "learning_rate": 3.052918518518519e-05, + "loss": 1.715, + "step": 32870 + }, + { + "epoch": 5.84, + "learning_rate": 3.0526222222222224e-05, + "loss": 1.6516, + "step": 32875 + }, + { + "epoch": 5.85, + "learning_rate": 3.052325925925926e-05, + "loss": 1.7325, + "step": 32880 + }, + { + "epoch": 5.85, + "learning_rate": 3.0520296296296295e-05, + "loss": 1.8397, + "step": 32885 + }, + { + "epoch": 5.85, + "learning_rate": 3.0517333333333337e-05, + "loss": 1.8394, + "step": 32890 + }, + { + "epoch": 5.85, + "learning_rate": 3.0514370370370372e-05, + "loss": 1.8224, + "step": 32895 + }, + { + "epoch": 5.85, + "learning_rate": 3.051140740740741e-05, + "loss": 1.8271, + "step": 32900 + }, + { + "epoch": 5.85, + "learning_rate": 3.0508444444444447e-05, + "loss": 1.8381, + "step": 32905 + }, + { + "epoch": 5.85, + "learning_rate": 3.0505481481481485e-05, + "loss": 1.6783, + "step": 32910 + }, + { + "epoch": 5.85, + "learning_rate": 3.050251851851852e-05, + "loss": 1.8412, + "step": 32915 + }, + { + "epoch": 5.85, + "learning_rate": 3.049955555555556e-05, + "loss": 1.805, + "step": 32920 + }, + { + "epoch": 5.85, + "learning_rate": 3.049659259259259e-05, + "loss": 1.7668, + "step": 32925 + }, + { + "epoch": 5.85, + "learning_rate": 3.0493629629629634e-05, + "loss": 1.8387, + "step": 32930 + }, + { + "epoch": 5.86, + "learning_rate": 3.0490666666666666e-05, + "loss": 1.8191, + "step": 32935 + }, + { + "epoch": 5.86, + "learning_rate": 3.0487703703703708e-05, + "loss": 1.7234, + "step": 32940 + }, + { + "epoch": 5.86, + "learning_rate": 3.048474074074074e-05, + "loss": 1.8168, + "step": 32945 + }, + { + "epoch": 5.86, + "learning_rate": 3.0481777777777782e-05, + "loss": 1.7194, + "step": 32950 + }, + { + "epoch": 5.86, + "learning_rate": 3.0478814814814814e-05, + "loss": 1.7382, + "step": 32955 + }, + { + "epoch": 5.86, + "learning_rate": 3.0475851851851857e-05, + "loss": 1.9391, + "step": 32960 + }, + { + "epoch": 5.86, + "learning_rate": 3.047288888888889e-05, + "loss": 1.907, + "step": 32965 + }, + { + "epoch": 5.86, + "learning_rate": 3.046992592592593e-05, + "loss": 1.7447, + "step": 32970 + }, + { + "epoch": 5.86, + "learning_rate": 3.0466962962962963e-05, + "loss": 1.7888, + "step": 32975 + }, + { + "epoch": 5.86, + "learning_rate": 3.0464000000000005e-05, + "loss": 1.7229, + "step": 32980 + }, + { + "epoch": 5.86, + "learning_rate": 3.0461037037037037e-05, + "loss": 1.7552, + "step": 32985 + }, + { + "epoch": 5.86, + "learning_rate": 3.045807407407408e-05, + "loss": 1.706, + "step": 32990 + }, + { + "epoch": 5.87, + "learning_rate": 3.045511111111111e-05, + "loss": 1.9783, + "step": 32995 + }, + { + "epoch": 5.87, + "learning_rate": 3.045214814814815e-05, + "loss": 1.7567, + "step": 33000 + }, + { + "epoch": 5.87, + "learning_rate": 3.0449185185185186e-05, + "loss": 1.8879, + "step": 33005 + }, + { + "epoch": 5.87, + "learning_rate": 3.0446222222222225e-05, + "loss": 1.8954, + "step": 33010 + }, + { + "epoch": 5.87, + "learning_rate": 3.044325925925926e-05, + "loss": 1.7172, + "step": 33015 + }, + { + "epoch": 5.87, + "learning_rate": 3.04402962962963e-05, + "loss": 1.807, + "step": 33020 + }, + { + "epoch": 5.87, + "learning_rate": 3.0437333333333334e-05, + "loss": 1.8217, + "step": 33025 + }, + { + "epoch": 5.87, + "learning_rate": 3.0434370370370373e-05, + "loss": 1.8028, + "step": 33030 + }, + { + "epoch": 5.87, + "learning_rate": 3.043140740740741e-05, + "loss": 1.823, + "step": 33035 + }, + { + "epoch": 5.87, + "learning_rate": 3.0428444444444447e-05, + "loss": 1.8359, + "step": 33040 + }, + { + "epoch": 5.87, + "learning_rate": 3.0425481481481483e-05, + "loss": 1.8417, + "step": 33045 + }, + { + "epoch": 5.88, + "learning_rate": 3.042251851851852e-05, + "loss": 1.6888, + "step": 33050 + }, + { + "epoch": 5.88, + "learning_rate": 3.0419555555555557e-05, + "loss": 1.8624, + "step": 33055 + }, + { + "epoch": 5.88, + "learning_rate": 3.0416592592592596e-05, + "loss": 1.724, + "step": 33060 + }, + { + "epoch": 5.88, + "learning_rate": 3.041362962962963e-05, + "loss": 1.7003, + "step": 33065 + }, + { + "epoch": 5.88, + "learning_rate": 3.041066666666667e-05, + "loss": 1.8295, + "step": 33070 + }, + { + "epoch": 5.88, + "learning_rate": 3.0407703703703705e-05, + "loss": 1.8725, + "step": 33075 + }, + { + "epoch": 5.88, + "learning_rate": 3.0404740740740744e-05, + "loss": 1.7355, + "step": 33080 + }, + { + "epoch": 5.88, + "learning_rate": 3.040177777777778e-05, + "loss": 1.8111, + "step": 33085 + }, + { + "epoch": 5.88, + "learning_rate": 3.039881481481482e-05, + "loss": 1.7846, + "step": 33090 + }, + { + "epoch": 5.88, + "learning_rate": 3.0395851851851854e-05, + "loss": 1.7004, + "step": 33095 + }, + { + "epoch": 5.88, + "learning_rate": 3.0392888888888893e-05, + "loss": 1.8205, + "step": 33100 + }, + { + "epoch": 5.89, + "learning_rate": 3.0389925925925928e-05, + "loss": 1.8023, + "step": 33105 + }, + { + "epoch": 5.89, + "learning_rate": 3.0386962962962967e-05, + "loss": 1.7341, + "step": 33110 + }, + { + "epoch": 5.89, + "learning_rate": 3.0384000000000003e-05, + "loss": 1.8227, + "step": 33115 + }, + { + "epoch": 5.89, + "learning_rate": 3.038103703703704e-05, + "loss": 1.6902, + "step": 33120 + }, + { + "epoch": 5.89, + "learning_rate": 3.0378074074074077e-05, + "loss": 1.7117, + "step": 33125 + }, + { + "epoch": 5.89, + "learning_rate": 3.0375111111111116e-05, + "loss": 1.869, + "step": 33130 + }, + { + "epoch": 5.89, + "learning_rate": 3.037214814814815e-05, + "loss": 1.7632, + "step": 33135 + }, + { + "epoch": 5.89, + "learning_rate": 3.036918518518519e-05, + "loss": 1.802, + "step": 33140 + }, + { + "epoch": 5.89, + "learning_rate": 3.0366222222222225e-05, + "loss": 1.8446, + "step": 33145 + }, + { + "epoch": 5.89, + "learning_rate": 3.0363259259259257e-05, + "loss": 1.7217, + "step": 33150 + }, + { + "epoch": 5.89, + "learning_rate": 3.0360296296296296e-05, + "loss": 1.8901, + "step": 33155 + }, + { + "epoch": 5.9, + "learning_rate": 3.035733333333333e-05, + "loss": 1.7266, + "step": 33160 + }, + { + "epoch": 5.9, + "learning_rate": 3.035437037037037e-05, + "loss": 1.7804, + "step": 33165 + }, + { + "epoch": 5.9, + "learning_rate": 3.0351407407407406e-05, + "loss": 1.9004, + "step": 33170 + }, + { + "epoch": 5.9, + "learning_rate": 3.0348444444444445e-05, + "loss": 1.7654, + "step": 33175 + }, + { + "epoch": 5.9, + "learning_rate": 3.034548148148148e-05, + "loss": 1.754, + "step": 33180 + }, + { + "epoch": 5.9, + "learning_rate": 3.034251851851852e-05, + "loss": 1.7461, + "step": 33185 + }, + { + "epoch": 5.9, + "learning_rate": 3.0339555555555554e-05, + "loss": 1.7956, + "step": 33190 + }, + { + "epoch": 5.9, + "learning_rate": 3.0336592592592593e-05, + "loss": 1.8341, + "step": 33195 + }, + { + "epoch": 5.9, + "learning_rate": 3.033362962962963e-05, + "loss": 1.8076, + "step": 33200 + }, + { + "epoch": 5.9, + "learning_rate": 3.0330666666666667e-05, + "loss": 1.7301, + "step": 33205 + }, + { + "epoch": 5.9, + "learning_rate": 3.0327703703703703e-05, + "loss": 1.8836, + "step": 33210 + }, + { + "epoch": 5.9, + "learning_rate": 3.032474074074074e-05, + "loss": 1.8296, + "step": 33215 + }, + { + "epoch": 5.91, + "learning_rate": 3.0321777777777777e-05, + "loss": 1.7662, + "step": 33220 + }, + { + "epoch": 5.91, + "learning_rate": 3.0318814814814816e-05, + "loss": 1.8175, + "step": 33225 + }, + { + "epoch": 5.91, + "learning_rate": 3.031585185185185e-05, + "loss": 1.8687, + "step": 33230 + }, + { + "epoch": 5.91, + "learning_rate": 3.031288888888889e-05, + "loss": 1.8071, + "step": 33235 + }, + { + "epoch": 5.91, + "learning_rate": 3.0309925925925926e-05, + "loss": 1.8342, + "step": 33240 + }, + { + "epoch": 5.91, + "learning_rate": 3.0306962962962964e-05, + "loss": 1.7908, + "step": 33245 + }, + { + "epoch": 5.91, + "learning_rate": 3.0304e-05, + "loss": 1.7336, + "step": 33250 + }, + { + "epoch": 5.91, + "learning_rate": 3.030103703703704e-05, + "loss": 1.8212, + "step": 33255 + }, + { + "epoch": 5.91, + "learning_rate": 3.0298074074074074e-05, + "loss": 1.7287, + "step": 33260 + }, + { + "epoch": 5.91, + "learning_rate": 3.0295111111111113e-05, + "loss": 1.8038, + "step": 33265 + }, + { + "epoch": 5.91, + "learning_rate": 3.029214814814815e-05, + "loss": 1.8705, + "step": 33270 + }, + { + "epoch": 5.92, + "learning_rate": 3.0289185185185187e-05, + "loss": 1.7662, + "step": 33275 + }, + { + "epoch": 5.92, + "learning_rate": 3.0286222222222223e-05, + "loss": 1.8536, + "step": 33280 + }, + { + "epoch": 5.92, + "learning_rate": 3.028325925925926e-05, + "loss": 1.7853, + "step": 33285 + }, + { + "epoch": 5.92, + "learning_rate": 3.0280296296296297e-05, + "loss": 1.8236, + "step": 33290 + }, + { + "epoch": 5.92, + "learning_rate": 3.0277333333333336e-05, + "loss": 1.8192, + "step": 33295 + }, + { + "epoch": 5.92, + "learning_rate": 3.027437037037037e-05, + "loss": 1.6988, + "step": 33300 + }, + { + "epoch": 5.92, + "learning_rate": 3.027140740740741e-05, + "loss": 1.7753, + "step": 33305 + }, + { + "epoch": 5.92, + "learning_rate": 3.0268444444444442e-05, + "loss": 1.7168, + "step": 33310 + }, + { + "epoch": 5.92, + "learning_rate": 3.0265481481481484e-05, + "loss": 1.9222, + "step": 33315 + }, + { + "epoch": 5.92, + "learning_rate": 3.0262518518518516e-05, + "loss": 1.7062, + "step": 33320 + }, + { + "epoch": 5.92, + "learning_rate": 3.025955555555556e-05, + "loss": 1.8108, + "step": 33325 + }, + { + "epoch": 5.93, + "learning_rate": 3.025659259259259e-05, + "loss": 1.8518, + "step": 33330 + }, + { + "epoch": 5.93, + "learning_rate": 3.0253629629629633e-05, + "loss": 1.6796, + "step": 33335 + }, + { + "epoch": 5.93, + "learning_rate": 3.0250666666666665e-05, + "loss": 1.8585, + "step": 33340 + }, + { + "epoch": 5.93, + "learning_rate": 3.0247703703703707e-05, + "loss": 1.8071, + "step": 33345 + }, + { + "epoch": 5.93, + "learning_rate": 3.024474074074074e-05, + "loss": 1.8277, + "step": 33350 + }, + { + "epoch": 5.93, + "learning_rate": 3.024177777777778e-05, + "loss": 1.7331, + "step": 33355 + }, + { + "epoch": 5.93, + "learning_rate": 3.0238814814814813e-05, + "loss": 1.836, + "step": 33360 + }, + { + "epoch": 5.93, + "learning_rate": 3.0235851851851856e-05, + "loss": 1.7649, + "step": 33365 + }, + { + "epoch": 5.93, + "learning_rate": 3.0232888888888888e-05, + "loss": 1.859, + "step": 33370 + }, + { + "epoch": 5.93, + "learning_rate": 3.022992592592593e-05, + "loss": 1.6977, + "step": 33375 + }, + { + "epoch": 5.93, + "learning_rate": 3.0226962962962962e-05, + "loss": 1.7289, + "step": 33380 + }, + { + "epoch": 5.94, + "learning_rate": 3.0224e-05, + "loss": 1.7872, + "step": 33385 + }, + { + "epoch": 5.94, + "learning_rate": 3.0221037037037036e-05, + "loss": 1.807, + "step": 33390 + }, + { + "epoch": 5.94, + "learning_rate": 3.0218074074074075e-05, + "loss": 1.7508, + "step": 33395 + }, + { + "epoch": 5.94, + "learning_rate": 3.021511111111111e-05, + "loss": 1.7862, + "step": 33400 + }, + { + "epoch": 5.94, + "learning_rate": 3.021214814814815e-05, + "loss": 1.7932, + "step": 33405 + }, + { + "epoch": 5.94, + "learning_rate": 3.0209185185185185e-05, + "loss": 1.8647, + "step": 33410 + }, + { + "epoch": 5.94, + "learning_rate": 3.0206222222222223e-05, + "loss": 1.9436, + "step": 33415 + }, + { + "epoch": 5.94, + "learning_rate": 3.020325925925926e-05, + "loss": 1.8402, + "step": 33420 + }, + { + "epoch": 5.94, + "learning_rate": 3.0200296296296298e-05, + "loss": 1.723, + "step": 33425 + }, + { + "epoch": 5.94, + "learning_rate": 3.0197333333333333e-05, + "loss": 1.7367, + "step": 33430 + }, + { + "epoch": 5.94, + "learning_rate": 3.0194370370370372e-05, + "loss": 1.8149, + "step": 33435 + }, + { + "epoch": 5.94, + "learning_rate": 3.0191407407407407e-05, + "loss": 1.9072, + "step": 33440 + }, + { + "epoch": 5.95, + "learning_rate": 3.0188444444444446e-05, + "loss": 1.795, + "step": 33445 + }, + { + "epoch": 5.95, + "learning_rate": 3.018548148148148e-05, + "loss": 1.8279, + "step": 33450 + }, + { + "epoch": 5.95, + "learning_rate": 3.018251851851852e-05, + "loss": 1.7217, + "step": 33455 + }, + { + "epoch": 5.95, + "learning_rate": 3.0179555555555556e-05, + "loss": 1.788, + "step": 33460 + }, + { + "epoch": 5.95, + "learning_rate": 3.0176592592592595e-05, + "loss": 1.7992, + "step": 33465 + }, + { + "epoch": 5.95, + "learning_rate": 3.017362962962963e-05, + "loss": 1.8002, + "step": 33470 + }, + { + "epoch": 5.95, + "learning_rate": 3.017066666666667e-05, + "loss": 1.7818, + "step": 33475 + }, + { + "epoch": 5.95, + "learning_rate": 3.0167703703703704e-05, + "loss": 1.7239, + "step": 33480 + }, + { + "epoch": 5.95, + "learning_rate": 3.0164740740740743e-05, + "loss": 1.8778, + "step": 33485 + }, + { + "epoch": 5.95, + "learning_rate": 3.016177777777778e-05, + "loss": 1.8305, + "step": 33490 + }, + { + "epoch": 5.95, + "learning_rate": 3.0158814814814817e-05, + "loss": 1.7127, + "step": 33495 + }, + { + "epoch": 5.96, + "learning_rate": 3.0155851851851853e-05, + "loss": 1.8816, + "step": 33500 + }, + { + "epoch": 5.96, + "learning_rate": 3.0152888888888892e-05, + "loss": 1.7817, + "step": 33505 + }, + { + "epoch": 5.96, + "learning_rate": 3.0149925925925927e-05, + "loss": 1.8087, + "step": 33510 + }, + { + "epoch": 5.96, + "learning_rate": 3.0146962962962966e-05, + "loss": 1.8684, + "step": 33515 + }, + { + "epoch": 5.96, + "learning_rate": 3.0144e-05, + "loss": 1.711, + "step": 33520 + }, + { + "epoch": 5.96, + "learning_rate": 3.014103703703704e-05, + "loss": 1.9192, + "step": 33525 + }, + { + "epoch": 5.96, + "learning_rate": 3.0138074074074076e-05, + "loss": 1.7684, + "step": 33530 + }, + { + "epoch": 5.96, + "learning_rate": 3.0135111111111114e-05, + "loss": 1.806, + "step": 33535 + }, + { + "epoch": 5.96, + "learning_rate": 3.013214814814815e-05, + "loss": 1.7198, + "step": 33540 + }, + { + "epoch": 5.96, + "learning_rate": 3.012918518518519e-05, + "loss": 1.8798, + "step": 33545 + }, + { + "epoch": 5.96, + "learning_rate": 3.012622222222222e-05, + "loss": 1.7622, + "step": 33550 + }, + { + "epoch": 5.97, + "learning_rate": 3.0123259259259263e-05, + "loss": 1.9225, + "step": 33555 + }, + { + "epoch": 5.97, + "learning_rate": 3.0120296296296295e-05, + "loss": 1.7447, + "step": 33560 + }, + { + "epoch": 5.97, + "learning_rate": 3.0117333333333337e-05, + "loss": 1.7922, + "step": 33565 + }, + { + "epoch": 5.97, + "learning_rate": 3.011437037037037e-05, + "loss": 1.7375, + "step": 33570 + }, + { + "epoch": 5.97, + "learning_rate": 3.011140740740741e-05, + "loss": 1.84, + "step": 33575 + }, + { + "epoch": 5.97, + "learning_rate": 3.0108444444444444e-05, + "loss": 1.9391, + "step": 33580 + }, + { + "epoch": 5.97, + "learning_rate": 3.0105481481481486e-05, + "loss": 1.7395, + "step": 33585 + }, + { + "epoch": 5.97, + "learning_rate": 3.0102518518518518e-05, + "loss": 1.8104, + "step": 33590 + }, + { + "epoch": 5.97, + "learning_rate": 3.009955555555556e-05, + "loss": 1.805, + "step": 33595 + }, + { + "epoch": 5.97, + "learning_rate": 3.0096592592592592e-05, + "loss": 1.8873, + "step": 33600 + }, + { + "epoch": 5.97, + "learning_rate": 3.0093629629629634e-05, + "loss": 1.6849, + "step": 33605 + }, + { + "epoch": 5.98, + "learning_rate": 3.0090666666666666e-05, + "loss": 1.7263, + "step": 33610 + }, + { + "epoch": 5.98, + "learning_rate": 3.0087703703703705e-05, + "loss": 1.676, + "step": 33615 + }, + { + "epoch": 5.98, + "learning_rate": 3.008474074074074e-05, + "loss": 1.8337, + "step": 33620 + }, + { + "epoch": 5.98, + "learning_rate": 3.008177777777778e-05, + "loss": 1.7559, + "step": 33625 + }, + { + "epoch": 5.98, + "learning_rate": 3.0078814814814815e-05, + "loss": 1.8618, + "step": 33630 + }, + { + "epoch": 5.98, + "learning_rate": 3.0075851851851854e-05, + "loss": 1.8777, + "step": 33635 + }, + { + "epoch": 5.98, + "learning_rate": 3.007288888888889e-05, + "loss": 1.7222, + "step": 33640 + }, + { + "epoch": 5.98, + "learning_rate": 3.0069925925925928e-05, + "loss": 1.7913, + "step": 33645 + }, + { + "epoch": 5.98, + "learning_rate": 3.0066962962962963e-05, + "loss": 1.8384, + "step": 33650 + }, + { + "epoch": 5.98, + "learning_rate": 3.0064000000000002e-05, + "loss": 1.8482, + "step": 33655 + }, + { + "epoch": 5.98, + "learning_rate": 3.0061037037037038e-05, + "loss": 1.8629, + "step": 33660 + }, + { + "epoch": 5.98, + "learning_rate": 3.0058074074074076e-05, + "loss": 1.8857, + "step": 33665 + }, + { + "epoch": 5.99, + "learning_rate": 3.0055111111111112e-05, + "loss": 1.8183, + "step": 33670 + }, + { + "epoch": 5.99, + "learning_rate": 3.005214814814815e-05, + "loss": 1.8407, + "step": 33675 + }, + { + "epoch": 5.99, + "learning_rate": 3.0049185185185186e-05, + "loss": 1.7283, + "step": 33680 + }, + { + "epoch": 5.99, + "learning_rate": 3.0046222222222225e-05, + "loss": 1.6895, + "step": 33685 + }, + { + "epoch": 5.99, + "learning_rate": 3.004325925925926e-05, + "loss": 1.7861, + "step": 33690 + }, + { + "epoch": 5.99, + "learning_rate": 3.00402962962963e-05, + "loss": 1.8633, + "step": 33695 + }, + { + "epoch": 5.99, + "learning_rate": 3.0037333333333335e-05, + "loss": 1.8043, + "step": 33700 + }, + { + "epoch": 5.99, + "learning_rate": 3.0034370370370373e-05, + "loss": 1.7407, + "step": 33705 + }, + { + "epoch": 5.99, + "learning_rate": 3.003140740740741e-05, + "loss": 1.87, + "step": 33710 + }, + { + "epoch": 5.99, + "learning_rate": 3.0028444444444448e-05, + "loss": 1.8539, + "step": 33715 + }, + { + "epoch": 5.99, + "learning_rate": 3.0025481481481483e-05, + "loss": 1.6793, + "step": 33720 + }, + { + "epoch": 6.0, + "learning_rate": 3.0022518518518522e-05, + "loss": 1.7757, + "step": 33725 + }, + { + "epoch": 6.0, + "learning_rate": 3.0019555555555557e-05, + "loss": 1.7561, + "step": 33730 + }, + { + "epoch": 6.0, + "learning_rate": 3.0016592592592596e-05, + "loss": 1.7549, + "step": 33735 + }, + { + "epoch": 6.0, + "learning_rate": 3.001362962962963e-05, + "loss": 1.843, + "step": 33740 + }, + { + "epoch": 6.0, + "learning_rate": 3.001066666666667e-05, + "loss": 1.8345, + "step": 33745 + }, + { + "epoch": 6.0, + "learning_rate": 3.0007703703703706e-05, + "loss": 1.7254, + "step": 33750 + }, + { + "epoch": 6.0, + "learning_rate": 3.0004740740740745e-05, + "loss": 1.618, + "step": 33755 + }, + { + "epoch": 6.0, + "learning_rate": 3.000177777777778e-05, + "loss": 1.6368, + "step": 33760 + }, + { + "epoch": 6.0, + "learning_rate": 2.999881481481482e-05, + "loss": 1.6529, + "step": 33765 + }, + { + "epoch": 6.0, + "learning_rate": 2.9995851851851854e-05, + "loss": 1.707, + "step": 33770 + }, + { + "epoch": 6.0, + "learning_rate": 2.9992888888888893e-05, + "loss": 1.6199, + "step": 33775 + }, + { + "epoch": 6.01, + "learning_rate": 2.9989925925925925e-05, + "loss": 1.5544, + "step": 33780 + }, + { + "epoch": 6.01, + "learning_rate": 2.9986962962962968e-05, + "loss": 1.7129, + "step": 33785 + }, + { + "epoch": 6.01, + "learning_rate": 2.9984e-05, + "loss": 1.6938, + "step": 33790 + }, + { + "epoch": 6.01, + "learning_rate": 2.9981037037037042e-05, + "loss": 1.6195, + "step": 33795 + }, + { + "epoch": 6.01, + "learning_rate": 2.9978074074074074e-05, + "loss": 1.6191, + "step": 33800 + }, + { + "epoch": 6.01, + "learning_rate": 2.9975111111111116e-05, + "loss": 1.6264, + "step": 33805 + }, + { + "epoch": 6.01, + "learning_rate": 2.9972148148148148e-05, + "loss": 1.6907, + "step": 33810 + }, + { + "epoch": 6.01, + "learning_rate": 2.996918518518519e-05, + "loss": 1.6615, + "step": 33815 + }, + { + "epoch": 6.01, + "learning_rate": 2.9966222222222222e-05, + "loss": 1.6945, + "step": 33820 + }, + { + "epoch": 6.01, + "learning_rate": 2.9963259259259265e-05, + "loss": 1.7901, + "step": 33825 + }, + { + "epoch": 6.01, + "learning_rate": 2.9960296296296297e-05, + "loss": 1.7297, + "step": 33830 + }, + { + "epoch": 6.02, + "learning_rate": 2.995733333333334e-05, + "loss": 1.7551, + "step": 33835 + }, + { + "epoch": 6.02, + "learning_rate": 2.995437037037037e-05, + "loss": 1.6866, + "step": 33840 + }, + { + "epoch": 6.02, + "learning_rate": 2.9951407407407413e-05, + "loss": 1.7442, + "step": 33845 + }, + { + "epoch": 6.02, + "learning_rate": 2.9948444444444445e-05, + "loss": 1.7564, + "step": 33850 + }, + { + "epoch": 6.02, + "learning_rate": 2.9945481481481484e-05, + "loss": 1.7313, + "step": 33855 + }, + { + "epoch": 6.02, + "learning_rate": 2.994251851851852e-05, + "loss": 1.7315, + "step": 33860 + }, + { + "epoch": 6.02, + "learning_rate": 2.9939555555555558e-05, + "loss": 1.6853, + "step": 33865 + }, + { + "epoch": 6.02, + "learning_rate": 2.9936592592592594e-05, + "loss": 1.7534, + "step": 33870 + }, + { + "epoch": 6.02, + "learning_rate": 2.9933629629629632e-05, + "loss": 1.7043, + "step": 33875 + }, + { + "epoch": 6.02, + "learning_rate": 2.9930666666666668e-05, + "loss": 1.6982, + "step": 33880 + }, + { + "epoch": 6.02, + "learning_rate": 2.9927703703703707e-05, + "loss": 1.708, + "step": 33885 + }, + { + "epoch": 6.02, + "learning_rate": 2.9924740740740742e-05, + "loss": 1.7616, + "step": 33890 + }, + { + "epoch": 6.03, + "learning_rate": 2.992177777777778e-05, + "loss": 1.6442, + "step": 33895 + }, + { + "epoch": 6.03, + "learning_rate": 2.9918814814814816e-05, + "loss": 1.6282, + "step": 33900 + }, + { + "epoch": 6.03, + "learning_rate": 2.9915851851851855e-05, + "loss": 1.6471, + "step": 33905 + }, + { + "epoch": 6.03, + "learning_rate": 2.991288888888889e-05, + "loss": 1.5863, + "step": 33910 + }, + { + "epoch": 6.03, + "learning_rate": 2.990992592592593e-05, + "loss": 1.7191, + "step": 33915 + }, + { + "epoch": 6.03, + "learning_rate": 2.9906962962962965e-05, + "loss": 1.6954, + "step": 33920 + }, + { + "epoch": 6.03, + "learning_rate": 2.9904e-05, + "loss": 1.7576, + "step": 33925 + }, + { + "epoch": 6.03, + "learning_rate": 2.990103703703704e-05, + "loss": 1.6584, + "step": 33930 + }, + { + "epoch": 6.03, + "learning_rate": 2.989807407407407e-05, + "loss": 1.6205, + "step": 33935 + }, + { + "epoch": 6.03, + "learning_rate": 2.9895111111111113e-05, + "loss": 1.6828, + "step": 33940 + }, + { + "epoch": 6.03, + "learning_rate": 2.9892148148148145e-05, + "loss": 1.6621, + "step": 33945 + }, + { + "epoch": 6.04, + "learning_rate": 2.9889185185185188e-05, + "loss": 1.5537, + "step": 33950 + }, + { + "epoch": 6.04, + "learning_rate": 2.988622222222222e-05, + "loss": 1.6692, + "step": 33955 + }, + { + "epoch": 6.04, + "learning_rate": 2.9883259259259262e-05, + "loss": 1.6727, + "step": 33960 + }, + { + "epoch": 6.04, + "learning_rate": 2.9880296296296294e-05, + "loss": 1.6318, + "step": 33965 + }, + { + "epoch": 6.04, + "learning_rate": 2.9877333333333336e-05, + "loss": 1.6832, + "step": 33970 + }, + { + "epoch": 6.04, + "learning_rate": 2.9874370370370368e-05, + "loss": 1.7019, + "step": 33975 + }, + { + "epoch": 6.04, + "learning_rate": 2.987140740740741e-05, + "loss": 1.8028, + "step": 33980 + }, + { + "epoch": 6.04, + "learning_rate": 2.9868444444444442e-05, + "loss": 1.6411, + "step": 33985 + }, + { + "epoch": 6.04, + "learning_rate": 2.9865481481481485e-05, + "loss": 1.6213, + "step": 33990 + }, + { + "epoch": 6.04, + "learning_rate": 2.9862518518518517e-05, + "loss": 1.578, + "step": 33995 + }, + { + "epoch": 6.04, + "learning_rate": 2.985955555555556e-05, + "loss": 1.754, + "step": 34000 + }, + { + "epoch": 6.05, + "learning_rate": 2.985659259259259e-05, + "loss": 1.7044, + "step": 34005 + }, + { + "epoch": 6.05, + "learning_rate": 2.985362962962963e-05, + "loss": 1.6541, + "step": 34010 + }, + { + "epoch": 6.05, + "learning_rate": 2.9850666666666665e-05, + "loss": 1.7141, + "step": 34015 + }, + { + "epoch": 6.05, + "learning_rate": 2.9847703703703704e-05, + "loss": 1.7351, + "step": 34020 + }, + { + "epoch": 6.05, + "learning_rate": 2.984474074074074e-05, + "loss": 1.7489, + "step": 34025 + }, + { + "epoch": 6.05, + "learning_rate": 2.984177777777778e-05, + "loss": 1.4647, + "step": 34030 + }, + { + "epoch": 6.05, + "learning_rate": 2.9838814814814814e-05, + "loss": 1.6804, + "step": 34035 + }, + { + "epoch": 6.05, + "learning_rate": 2.9835851851851853e-05, + "loss": 1.7143, + "step": 34040 + }, + { + "epoch": 6.05, + "learning_rate": 2.9832888888888888e-05, + "loss": 1.6343, + "step": 34045 + }, + { + "epoch": 6.05, + "learning_rate": 2.9829925925925927e-05, + "loss": 1.7678, + "step": 34050 + }, + { + "epoch": 6.05, + "learning_rate": 2.9826962962962962e-05, + "loss": 1.762, + "step": 34055 + }, + { + "epoch": 6.06, + "learning_rate": 2.9824e-05, + "loss": 1.8423, + "step": 34060 + }, + { + "epoch": 6.06, + "learning_rate": 2.9821037037037037e-05, + "loss": 1.5978, + "step": 34065 + }, + { + "epoch": 6.06, + "learning_rate": 2.9818074074074075e-05, + "loss": 1.7009, + "step": 34070 + }, + { + "epoch": 6.06, + "learning_rate": 2.981511111111111e-05, + "loss": 1.6667, + "step": 34075 + }, + { + "epoch": 6.06, + "learning_rate": 2.981214814814815e-05, + "loss": 1.7031, + "step": 34080 + }, + { + "epoch": 6.06, + "learning_rate": 2.9809185185185185e-05, + "loss": 1.828, + "step": 34085 + }, + { + "epoch": 6.06, + "learning_rate": 2.9806222222222224e-05, + "loss": 1.6861, + "step": 34090 + }, + { + "epoch": 6.06, + "learning_rate": 2.980325925925926e-05, + "loss": 1.6868, + "step": 34095 + }, + { + "epoch": 6.06, + "learning_rate": 2.9800296296296298e-05, + "loss": 1.7758, + "step": 34100 + }, + { + "epoch": 6.06, + "learning_rate": 2.9797333333333334e-05, + "loss": 1.6938, + "step": 34105 + }, + { + "epoch": 6.06, + "learning_rate": 2.9794370370370372e-05, + "loss": 1.5606, + "step": 34110 + }, + { + "epoch": 6.06, + "learning_rate": 2.9791407407407408e-05, + "loss": 1.6517, + "step": 34115 + }, + { + "epoch": 6.07, + "learning_rate": 2.9788444444444447e-05, + "loss": 1.698, + "step": 34120 + }, + { + "epoch": 6.07, + "learning_rate": 2.9785481481481482e-05, + "loss": 1.6033, + "step": 34125 + }, + { + "epoch": 6.07, + "learning_rate": 2.978251851851852e-05, + "loss": 1.6159, + "step": 34130 + }, + { + "epoch": 6.07, + "learning_rate": 2.9779555555555556e-05, + "loss": 1.8065, + "step": 34135 + }, + { + "epoch": 6.07, + "learning_rate": 2.9776592592592595e-05, + "loss": 1.7817, + "step": 34140 + }, + { + "epoch": 6.07, + "learning_rate": 2.977362962962963e-05, + "loss": 1.6644, + "step": 34145 + }, + { + "epoch": 6.07, + "learning_rate": 2.977066666666667e-05, + "loss": 1.652, + "step": 34150 + }, + { + "epoch": 6.07, + "learning_rate": 2.9767703703703705e-05, + "loss": 1.7358, + "step": 34155 + }, + { + "epoch": 6.07, + "learning_rate": 2.9764740740740744e-05, + "loss": 1.6544, + "step": 34160 + }, + { + "epoch": 6.07, + "learning_rate": 2.9761777777777776e-05, + "loss": 1.6763, + "step": 34165 + }, + { + "epoch": 6.07, + "learning_rate": 2.9758814814814818e-05, + "loss": 1.6336, + "step": 34170 + }, + { + "epoch": 6.08, + "learning_rate": 2.975585185185185e-05, + "loss": 1.665, + "step": 34175 + }, + { + "epoch": 6.08, + "learning_rate": 2.9752888888888892e-05, + "loss": 1.6985, + "step": 34180 + }, + { + "epoch": 6.08, + "learning_rate": 2.9749925925925924e-05, + "loss": 1.7731, + "step": 34185 + }, + { + "epoch": 6.08, + "learning_rate": 2.9746962962962966e-05, + "loss": 1.7405, + "step": 34190 + }, + { + "epoch": 6.08, + "learning_rate": 2.9744e-05, + "loss": 1.6614, + "step": 34195 + }, + { + "epoch": 6.08, + "learning_rate": 2.974103703703704e-05, + "loss": 1.7459, + "step": 34200 + }, + { + "epoch": 6.08, + "learning_rate": 2.9738074074074073e-05, + "loss": 1.6534, + "step": 34205 + }, + { + "epoch": 6.08, + "learning_rate": 2.9735111111111115e-05, + "loss": 1.8145, + "step": 34210 + }, + { + "epoch": 6.08, + "learning_rate": 2.9732148148148147e-05, + "loss": 1.6832, + "step": 34215 + }, + { + "epoch": 6.08, + "learning_rate": 2.972918518518519e-05, + "loss": 1.6496, + "step": 34220 + }, + { + "epoch": 6.08, + "learning_rate": 2.972622222222222e-05, + "loss": 1.7708, + "step": 34225 + }, + { + "epoch": 6.09, + "learning_rate": 2.9723259259259263e-05, + "loss": 1.7077, + "step": 34230 + }, + { + "epoch": 6.09, + "learning_rate": 2.9720296296296295e-05, + "loss": 1.6498, + "step": 34235 + }, + { + "epoch": 6.09, + "learning_rate": 2.9717333333333334e-05, + "loss": 1.7104, + "step": 34240 + }, + { + "epoch": 6.09, + "learning_rate": 2.971437037037037e-05, + "loss": 1.7321, + "step": 34245 + }, + { + "epoch": 6.09, + "learning_rate": 2.971140740740741e-05, + "loss": 1.6193, + "step": 34250 + }, + { + "epoch": 6.09, + "learning_rate": 2.9708444444444444e-05, + "loss": 1.6764, + "step": 34255 + }, + { + "epoch": 6.09, + "learning_rate": 2.9705481481481483e-05, + "loss": 1.7075, + "step": 34260 + }, + { + "epoch": 6.09, + "learning_rate": 2.9702518518518518e-05, + "loss": 1.6696, + "step": 34265 + }, + { + "epoch": 6.09, + "learning_rate": 2.9699555555555557e-05, + "loss": 1.6629, + "step": 34270 + }, + { + "epoch": 6.09, + "learning_rate": 2.9696592592592593e-05, + "loss": 1.6489, + "step": 34275 + }, + { + "epoch": 6.09, + "learning_rate": 2.969362962962963e-05, + "loss": 1.6474, + "step": 34280 + }, + { + "epoch": 6.1, + "learning_rate": 2.9690666666666667e-05, + "loss": 1.7237, + "step": 34285 + }, + { + "epoch": 6.1, + "learning_rate": 2.9687703703703706e-05, + "loss": 1.6771, + "step": 34290 + }, + { + "epoch": 6.1, + "learning_rate": 2.968474074074074e-05, + "loss": 1.761, + "step": 34295 + }, + { + "epoch": 6.1, + "learning_rate": 2.968177777777778e-05, + "loss": 1.6355, + "step": 34300 + }, + { + "epoch": 6.1, + "learning_rate": 2.9678814814814815e-05, + "loss": 1.6497, + "step": 34305 + }, + { + "epoch": 6.1, + "learning_rate": 2.9675851851851854e-05, + "loss": 1.599, + "step": 34310 + }, + { + "epoch": 6.1, + "learning_rate": 2.967288888888889e-05, + "loss": 1.7796, + "step": 34315 + }, + { + "epoch": 6.1, + "learning_rate": 2.966992592592593e-05, + "loss": 1.6912, + "step": 34320 + }, + { + "epoch": 6.1, + "learning_rate": 2.9666962962962964e-05, + "loss": 1.8121, + "step": 34325 + }, + { + "epoch": 6.1, + "learning_rate": 2.9664000000000003e-05, + "loss": 1.827, + "step": 34330 + }, + { + "epoch": 6.1, + "learning_rate": 2.9661037037037038e-05, + "loss": 1.7152, + "step": 34335 + }, + { + "epoch": 6.1, + "learning_rate": 2.9658074074074077e-05, + "loss": 1.7404, + "step": 34340 + }, + { + "epoch": 6.11, + "learning_rate": 2.9655111111111112e-05, + "loss": 1.7682, + "step": 34345 + }, + { + "epoch": 6.11, + "learning_rate": 2.965214814814815e-05, + "loss": 1.724, + "step": 34350 + }, + { + "epoch": 6.11, + "learning_rate": 2.9649185185185187e-05, + "loss": 1.5152, + "step": 34355 + }, + { + "epoch": 6.11, + "learning_rate": 2.9646222222222225e-05, + "loss": 1.6644, + "step": 34360 + }, + { + "epoch": 6.11, + "learning_rate": 2.964325925925926e-05, + "loss": 1.7016, + "step": 34365 + }, + { + "epoch": 6.11, + "learning_rate": 2.96402962962963e-05, + "loss": 1.7095, + "step": 34370 + }, + { + "epoch": 6.11, + "learning_rate": 2.9637333333333335e-05, + "loss": 1.7522, + "step": 34375 + }, + { + "epoch": 6.11, + "learning_rate": 2.9634370370370374e-05, + "loss": 1.7325, + "step": 34380 + }, + { + "epoch": 6.11, + "learning_rate": 2.963140740740741e-05, + "loss": 1.8635, + "step": 34385 + }, + { + "epoch": 6.11, + "learning_rate": 2.9628444444444448e-05, + "loss": 1.6929, + "step": 34390 + }, + { + "epoch": 6.11, + "learning_rate": 2.962548148148148e-05, + "loss": 1.6304, + "step": 34395 + }, + { + "epoch": 6.12, + "learning_rate": 2.9622518518518522e-05, + "loss": 1.7542, + "step": 34400 + }, + { + "epoch": 6.12, + "learning_rate": 2.9619555555555554e-05, + "loss": 1.4836, + "step": 34405 + }, + { + "epoch": 6.12, + "learning_rate": 2.9616592592592597e-05, + "loss": 1.5855, + "step": 34410 + }, + { + "epoch": 6.12, + "learning_rate": 2.961362962962963e-05, + "loss": 1.7014, + "step": 34415 + }, + { + "epoch": 6.12, + "learning_rate": 2.961066666666667e-05, + "loss": 1.5937, + "step": 34420 + }, + { + "epoch": 6.12, + "learning_rate": 2.9607703703703703e-05, + "loss": 1.7605, + "step": 34425 + }, + { + "epoch": 6.12, + "learning_rate": 2.9604740740740745e-05, + "loss": 1.6697, + "step": 34430 + }, + { + "epoch": 6.12, + "learning_rate": 2.9601777777777777e-05, + "loss": 1.7434, + "step": 34435 + }, + { + "epoch": 6.12, + "learning_rate": 2.959881481481482e-05, + "loss": 1.575, + "step": 34440 + }, + { + "epoch": 6.12, + "learning_rate": 2.959585185185185e-05, + "loss": 1.7406, + "step": 34445 + }, + { + "epoch": 6.12, + "learning_rate": 2.9592888888888894e-05, + "loss": 1.6422, + "step": 34450 + }, + { + "epoch": 6.13, + "learning_rate": 2.9589925925925926e-05, + "loss": 1.6747, + "step": 34455 + }, + { + "epoch": 6.13, + "learning_rate": 2.9586962962962968e-05, + "loss": 1.7242, + "step": 34460 + }, + { + "epoch": 6.13, + "learning_rate": 2.9584e-05, + "loss": 1.7692, + "step": 34465 + }, + { + "epoch": 6.13, + "learning_rate": 2.958103703703704e-05, + "loss": 1.7235, + "step": 34470 + }, + { + "epoch": 6.13, + "learning_rate": 2.9578074074074074e-05, + "loss": 1.7879, + "step": 34475 + }, + { + "epoch": 6.13, + "learning_rate": 2.9575111111111113e-05, + "loss": 1.6452, + "step": 34480 + }, + { + "epoch": 6.13, + "learning_rate": 2.957214814814815e-05, + "loss": 1.5948, + "step": 34485 + }, + { + "epoch": 6.13, + "learning_rate": 2.9569185185185187e-05, + "loss": 1.7005, + "step": 34490 + }, + { + "epoch": 6.13, + "learning_rate": 2.9566222222222223e-05, + "loss": 1.6388, + "step": 34495 + }, + { + "epoch": 6.13, + "learning_rate": 2.956325925925926e-05, + "loss": 1.7898, + "step": 34500 + }, + { + "epoch": 6.13, + "learning_rate": 2.9560296296296297e-05, + "loss": 1.6613, + "step": 34505 + }, + { + "epoch": 6.14, + "learning_rate": 2.9557333333333336e-05, + "loss": 1.5934, + "step": 34510 + }, + { + "epoch": 6.14, + "learning_rate": 2.955437037037037e-05, + "loss": 1.6482, + "step": 34515 + }, + { + "epoch": 6.14, + "learning_rate": 2.955140740740741e-05, + "loss": 1.6685, + "step": 34520 + }, + { + "epoch": 6.14, + "learning_rate": 2.9548444444444446e-05, + "loss": 1.7316, + "step": 34525 + }, + { + "epoch": 6.14, + "learning_rate": 2.9545481481481484e-05, + "loss": 1.7118, + "step": 34530 + }, + { + "epoch": 6.14, + "learning_rate": 2.954251851851852e-05, + "loss": 1.7731, + "step": 34535 + }, + { + "epoch": 6.14, + "learning_rate": 2.953955555555556e-05, + "loss": 1.6958, + "step": 34540 + }, + { + "epoch": 6.14, + "learning_rate": 2.9536592592592594e-05, + "loss": 1.514, + "step": 34545 + }, + { + "epoch": 6.14, + "learning_rate": 2.9533629629629633e-05, + "loss": 1.6441, + "step": 34550 + }, + { + "epoch": 6.14, + "learning_rate": 2.953066666666667e-05, + "loss": 1.7139, + "step": 34555 + }, + { + "epoch": 6.14, + "learning_rate": 2.9527703703703707e-05, + "loss": 1.6581, + "step": 34560 + }, + { + "epoch": 6.14, + "learning_rate": 2.9524740740740743e-05, + "loss": 1.8076, + "step": 34565 + }, + { + "epoch": 6.15, + "learning_rate": 2.952177777777778e-05, + "loss": 1.7009, + "step": 34570 + }, + { + "epoch": 6.15, + "learning_rate": 2.9518814814814817e-05, + "loss": 1.7005, + "step": 34575 + }, + { + "epoch": 6.15, + "learning_rate": 2.9515851851851856e-05, + "loss": 1.7571, + "step": 34580 + }, + { + "epoch": 6.15, + "learning_rate": 2.951288888888889e-05, + "loss": 1.7, + "step": 34585 + }, + { + "epoch": 6.15, + "learning_rate": 2.950992592592593e-05, + "loss": 1.7021, + "step": 34590 + }, + { + "epoch": 6.15, + "learning_rate": 2.9506962962962965e-05, + "loss": 1.8133, + "step": 34595 + }, + { + "epoch": 6.15, + "learning_rate": 2.9504000000000004e-05, + "loss": 1.7433, + "step": 34600 + }, + { + "epoch": 6.15, + "learning_rate": 2.950103703703704e-05, + "loss": 1.6422, + "step": 34605 + }, + { + "epoch": 6.15, + "learning_rate": 2.949807407407408e-05, + "loss": 1.6432, + "step": 34610 + }, + { + "epoch": 6.15, + "learning_rate": 2.9495111111111114e-05, + "loss": 1.7034, + "step": 34615 + }, + { + "epoch": 6.15, + "learning_rate": 2.9492148148148153e-05, + "loss": 1.805, + "step": 34620 + }, + { + "epoch": 6.16, + "learning_rate": 2.9489185185185185e-05, + "loss": 1.7426, + "step": 34625 + }, + { + "epoch": 6.16, + "learning_rate": 2.9486222222222227e-05, + "loss": 1.6444, + "step": 34630 + }, + { + "epoch": 6.16, + "learning_rate": 2.948325925925926e-05, + "loss": 1.6825, + "step": 34635 + }, + { + "epoch": 6.16, + "learning_rate": 2.94802962962963e-05, + "loss": 1.6265, + "step": 34640 + }, + { + "epoch": 6.16, + "learning_rate": 2.9477333333333333e-05, + "loss": 1.743, + "step": 34645 + }, + { + "epoch": 6.16, + "learning_rate": 2.9474370370370375e-05, + "loss": 1.7748, + "step": 34650 + }, + { + "epoch": 6.16, + "learning_rate": 2.9471407407407407e-05, + "loss": 1.7518, + "step": 34655 + }, + { + "epoch": 6.16, + "learning_rate": 2.946844444444445e-05, + "loss": 1.7464, + "step": 34660 + }, + { + "epoch": 6.16, + "learning_rate": 2.9465481481481482e-05, + "loss": 1.8027, + "step": 34665 + }, + { + "epoch": 6.16, + "learning_rate": 2.9462518518518524e-05, + "loss": 1.6877, + "step": 34670 + }, + { + "epoch": 6.16, + "learning_rate": 2.9459555555555556e-05, + "loss": 1.6869, + "step": 34675 + }, + { + "epoch": 6.17, + "learning_rate": 2.9456592592592598e-05, + "loss": 1.6942, + "step": 34680 + }, + { + "epoch": 6.17, + "learning_rate": 2.945362962962963e-05, + "loss": 1.6864, + "step": 34685 + }, + { + "epoch": 6.17, + "learning_rate": 2.9450666666666672e-05, + "loss": 1.7328, + "step": 34690 + }, + { + "epoch": 6.17, + "learning_rate": 2.9447703703703705e-05, + "loss": 1.5921, + "step": 34695 + }, + { + "epoch": 6.17, + "learning_rate": 2.944474074074074e-05, + "loss": 1.6978, + "step": 34700 + }, + { + "epoch": 6.17, + "learning_rate": 2.944177777777778e-05, + "loss": 1.7213, + "step": 34705 + }, + { + "epoch": 6.17, + "learning_rate": 2.9438814814814814e-05, + "loss": 1.7641, + "step": 34710 + }, + { + "epoch": 6.17, + "learning_rate": 2.9435851851851853e-05, + "loss": 1.6318, + "step": 34715 + }, + { + "epoch": 6.17, + "learning_rate": 2.943288888888889e-05, + "loss": 1.6806, + "step": 34720 + }, + { + "epoch": 6.17, + "learning_rate": 2.9429925925925927e-05, + "loss": 1.6974, + "step": 34725 + }, + { + "epoch": 6.17, + "learning_rate": 2.9426962962962963e-05, + "loss": 1.5681, + "step": 34730 + }, + { + "epoch": 6.18, + "learning_rate": 2.9424e-05, + "loss": 1.7455, + "step": 34735 + }, + { + "epoch": 6.18, + "learning_rate": 2.9421037037037037e-05, + "loss": 1.8332, + "step": 34740 + }, + { + "epoch": 6.18, + "learning_rate": 2.9418074074074076e-05, + "loss": 1.6696, + "step": 34745 + }, + { + "epoch": 6.18, + "learning_rate": 2.941511111111111e-05, + "loss": 1.8294, + "step": 34750 + }, + { + "epoch": 6.18, + "learning_rate": 2.941214814814815e-05, + "loss": 1.7163, + "step": 34755 + }, + { + "epoch": 6.18, + "learning_rate": 2.9409185185185185e-05, + "loss": 1.7248, + "step": 34760 + }, + { + "epoch": 6.18, + "learning_rate": 2.9406222222222224e-05, + "loss": 1.5798, + "step": 34765 + }, + { + "epoch": 6.18, + "learning_rate": 2.940325925925926e-05, + "loss": 1.6716, + "step": 34770 + }, + { + "epoch": 6.18, + "learning_rate": 2.94002962962963e-05, + "loss": 1.7085, + "step": 34775 + }, + { + "epoch": 6.18, + "learning_rate": 2.939733333333333e-05, + "loss": 1.5901, + "step": 34780 + }, + { + "epoch": 6.18, + "learning_rate": 2.9394370370370373e-05, + "loss": 1.6369, + "step": 34785 + }, + { + "epoch": 6.18, + "learning_rate": 2.9391407407407405e-05, + "loss": 1.6093, + "step": 34790 + }, + { + "epoch": 6.19, + "learning_rate": 2.9388444444444447e-05, + "loss": 1.7818, + "step": 34795 + }, + { + "epoch": 6.19, + "learning_rate": 2.938548148148148e-05, + "loss": 1.5222, + "step": 34800 + }, + { + "epoch": 6.19, + "learning_rate": 2.938251851851852e-05, + "loss": 1.7544, + "step": 34805 + }, + { + "epoch": 6.19, + "learning_rate": 2.9379555555555553e-05, + "loss": 1.6635, + "step": 34810 + }, + { + "epoch": 6.19, + "learning_rate": 2.9376592592592596e-05, + "loss": 1.7475, + "step": 34815 + }, + { + "epoch": 6.19, + "learning_rate": 2.9373629629629628e-05, + "loss": 1.7178, + "step": 34820 + }, + { + "epoch": 6.19, + "learning_rate": 2.937066666666667e-05, + "loss": 1.6827, + "step": 34825 + }, + { + "epoch": 6.19, + "learning_rate": 2.9367703703703702e-05, + "loss": 1.6979, + "step": 34830 + }, + { + "epoch": 6.19, + "learning_rate": 2.9364740740740744e-05, + "loss": 1.5967, + "step": 34835 + }, + { + "epoch": 6.19, + "learning_rate": 2.9361777777777776e-05, + "loss": 1.7171, + "step": 34840 + }, + { + "epoch": 6.19, + "learning_rate": 2.935881481481482e-05, + "loss": 1.7448, + "step": 34845 + }, + { + "epoch": 6.2, + "learning_rate": 2.935585185185185e-05, + "loss": 1.7429, + "step": 34850 + }, + { + "epoch": 6.2, + "learning_rate": 2.935288888888889e-05, + "loss": 1.6259, + "step": 34855 + }, + { + "epoch": 6.2, + "learning_rate": 2.9349925925925925e-05, + "loss": 1.5977, + "step": 34860 + }, + { + "epoch": 6.2, + "learning_rate": 2.9346962962962963e-05, + "loss": 1.5715, + "step": 34865 + }, + { + "epoch": 6.2, + "learning_rate": 2.9344e-05, + "loss": 1.7037, + "step": 34870 + }, + { + "epoch": 6.2, + "learning_rate": 2.9341037037037038e-05, + "loss": 1.697, + "step": 34875 + }, + { + "epoch": 6.2, + "learning_rate": 2.9338074074074073e-05, + "loss": 1.7249, + "step": 34880 + }, + { + "epoch": 6.2, + "learning_rate": 2.9335111111111112e-05, + "loss": 1.6468, + "step": 34885 + }, + { + "epoch": 6.2, + "learning_rate": 2.9332148148148147e-05, + "loss": 1.6014, + "step": 34890 + }, + { + "epoch": 6.2, + "learning_rate": 2.9329185185185186e-05, + "loss": 1.6939, + "step": 34895 + }, + { + "epoch": 6.2, + "learning_rate": 2.932622222222222e-05, + "loss": 1.6192, + "step": 34900 + }, + { + "epoch": 6.21, + "learning_rate": 2.932325925925926e-05, + "loss": 1.5189, + "step": 34905 + }, + { + "epoch": 6.21, + "learning_rate": 2.9320296296296296e-05, + "loss": 1.5842, + "step": 34910 + }, + { + "epoch": 6.21, + "learning_rate": 2.9317333333333335e-05, + "loss": 1.7684, + "step": 34915 + }, + { + "epoch": 6.21, + "learning_rate": 2.931437037037037e-05, + "loss": 1.8412, + "step": 34920 + }, + { + "epoch": 6.21, + "learning_rate": 2.931140740740741e-05, + "loss": 1.5999, + "step": 34925 + }, + { + "epoch": 6.21, + "learning_rate": 2.9308444444444444e-05, + "loss": 1.7073, + "step": 34930 + }, + { + "epoch": 6.21, + "learning_rate": 2.9305481481481483e-05, + "loss": 1.5543, + "step": 34935 + }, + { + "epoch": 6.21, + "learning_rate": 2.930251851851852e-05, + "loss": 1.6312, + "step": 34940 + }, + { + "epoch": 6.21, + "learning_rate": 2.9299555555555558e-05, + "loss": 1.7119, + "step": 34945 + }, + { + "epoch": 6.21, + "learning_rate": 2.9296592592592593e-05, + "loss": 1.6556, + "step": 34950 + }, + { + "epoch": 6.21, + "learning_rate": 2.9293629629629632e-05, + "loss": 1.7503, + "step": 34955 + }, + { + "epoch": 6.22, + "learning_rate": 2.9290666666666667e-05, + "loss": 1.6169, + "step": 34960 + }, + { + "epoch": 6.22, + "learning_rate": 2.9287703703703706e-05, + "loss": 1.6711, + "step": 34965 + }, + { + "epoch": 6.22, + "learning_rate": 2.928474074074074e-05, + "loss": 1.6329, + "step": 34970 + }, + { + "epoch": 6.22, + "learning_rate": 2.928177777777778e-05, + "loss": 1.6745, + "step": 34975 + }, + { + "epoch": 6.22, + "learning_rate": 2.9278814814814816e-05, + "loss": 1.7142, + "step": 34980 + }, + { + "epoch": 6.22, + "learning_rate": 2.9275851851851855e-05, + "loss": 1.7648, + "step": 34985 + }, + { + "epoch": 6.22, + "learning_rate": 2.927288888888889e-05, + "loss": 1.7745, + "step": 34990 + }, + { + "epoch": 6.22, + "learning_rate": 2.926992592592593e-05, + "loss": 1.6848, + "step": 34995 + }, + { + "epoch": 6.22, + "learning_rate": 2.9266962962962964e-05, + "loss": 1.6403, + "step": 35000 + }, + { + "epoch": 6.22, + "learning_rate": 2.9264000000000003e-05, + "loss": 1.6901, + "step": 35005 + }, + { + "epoch": 6.22, + "learning_rate": 2.926103703703704e-05, + "loss": 1.6245, + "step": 35010 + }, + { + "epoch": 6.22, + "learning_rate": 2.9258074074074077e-05, + "loss": 1.7866, + "step": 35015 + }, + { + "epoch": 6.23, + "learning_rate": 2.925511111111111e-05, + "loss": 1.6307, + "step": 35020 + }, + { + "epoch": 6.23, + "learning_rate": 2.925214814814815e-05, + "loss": 1.675, + "step": 35025 + }, + { + "epoch": 6.23, + "learning_rate": 2.9249185185185184e-05, + "loss": 1.6596, + "step": 35030 + }, + { + "epoch": 6.23, + "learning_rate": 2.9246222222222226e-05, + "loss": 1.745, + "step": 35035 + }, + { + "epoch": 6.23, + "learning_rate": 2.9243259259259258e-05, + "loss": 1.8042, + "step": 35040 + }, + { + "epoch": 6.23, + "learning_rate": 2.92402962962963e-05, + "loss": 1.6436, + "step": 35045 + }, + { + "epoch": 6.23, + "learning_rate": 2.9237333333333332e-05, + "loss": 1.6487, + "step": 35050 + }, + { + "epoch": 6.23, + "learning_rate": 2.9234370370370374e-05, + "loss": 1.7189, + "step": 35055 + }, + { + "epoch": 6.23, + "learning_rate": 2.9231407407407406e-05, + "loss": 1.8007, + "step": 35060 + }, + { + "epoch": 6.23, + "learning_rate": 2.922844444444445e-05, + "loss": 1.6904, + "step": 35065 + }, + { + "epoch": 6.23, + "learning_rate": 2.922548148148148e-05, + "loss": 1.6839, + "step": 35070 + }, + { + "epoch": 6.24, + "learning_rate": 2.9222518518518523e-05, + "loss": 1.7326, + "step": 35075 + }, + { + "epoch": 6.24, + "learning_rate": 2.9219555555555555e-05, + "loss": 1.7593, + "step": 35080 + }, + { + "epoch": 6.24, + "learning_rate": 2.9216592592592594e-05, + "loss": 1.8014, + "step": 35085 + }, + { + "epoch": 6.24, + "learning_rate": 2.921362962962963e-05, + "loss": 1.8002, + "step": 35090 + }, + { + "epoch": 6.24, + "learning_rate": 2.9210666666666668e-05, + "loss": 1.6133, + "step": 35095 + }, + { + "epoch": 6.24, + "learning_rate": 2.9207703703703703e-05, + "loss": 1.6689, + "step": 35100 + }, + { + "epoch": 6.24, + "learning_rate": 2.9204740740740742e-05, + "loss": 1.7544, + "step": 35105 + }, + { + "epoch": 6.24, + "learning_rate": 2.9201777777777778e-05, + "loss": 1.7256, + "step": 35110 + }, + { + "epoch": 6.24, + "learning_rate": 2.9198814814814816e-05, + "loss": 1.6477, + "step": 35115 + }, + { + "epoch": 6.24, + "learning_rate": 2.9195851851851852e-05, + "loss": 1.7478, + "step": 35120 + }, + { + "epoch": 6.24, + "learning_rate": 2.919288888888889e-05, + "loss": 1.731, + "step": 35125 + }, + { + "epoch": 6.25, + "learning_rate": 2.9189925925925926e-05, + "loss": 1.7869, + "step": 35130 + }, + { + "epoch": 6.25, + "learning_rate": 2.9186962962962965e-05, + "loss": 1.6297, + "step": 35135 + }, + { + "epoch": 6.25, + "learning_rate": 2.9184e-05, + "loss": 1.6673, + "step": 35140 + }, + { + "epoch": 6.25, + "learning_rate": 2.918103703703704e-05, + "loss": 1.5686, + "step": 35145 + }, + { + "epoch": 6.25, + "learning_rate": 2.9178074074074075e-05, + "loss": 1.6817, + "step": 35150 + }, + { + "epoch": 6.25, + "learning_rate": 2.9175111111111114e-05, + "loss": 1.7309, + "step": 35155 + }, + { + "epoch": 6.25, + "learning_rate": 2.917214814814815e-05, + "loss": 1.6829, + "step": 35160 + }, + { + "epoch": 6.25, + "learning_rate": 2.9169185185185188e-05, + "loss": 1.7266, + "step": 35165 + }, + { + "epoch": 6.25, + "learning_rate": 2.9166222222222223e-05, + "loss": 1.7686, + "step": 35170 + }, + { + "epoch": 6.25, + "learning_rate": 2.9163259259259262e-05, + "loss": 1.6987, + "step": 35175 + }, + { + "epoch": 6.25, + "learning_rate": 2.9160296296296297e-05, + "loss": 1.5816, + "step": 35180 + }, + { + "epoch": 6.26, + "learning_rate": 2.9157333333333336e-05, + "loss": 1.782, + "step": 35185 + }, + { + "epoch": 6.26, + "learning_rate": 2.9154370370370372e-05, + "loss": 1.9132, + "step": 35190 + }, + { + "epoch": 6.26, + "learning_rate": 2.915140740740741e-05, + "loss": 1.846, + "step": 35195 + }, + { + "epoch": 6.26, + "learning_rate": 2.9148444444444446e-05, + "loss": 1.5972, + "step": 35200 + }, + { + "epoch": 6.26, + "learning_rate": 2.9145481481481485e-05, + "loss": 1.6839, + "step": 35205 + }, + { + "epoch": 6.26, + "learning_rate": 2.914251851851852e-05, + "loss": 1.6688, + "step": 35210 + }, + { + "epoch": 6.26, + "learning_rate": 2.913955555555556e-05, + "loss": 1.6094, + "step": 35215 + }, + { + "epoch": 6.26, + "learning_rate": 2.9136592592592594e-05, + "loss": 1.6683, + "step": 35220 + }, + { + "epoch": 6.26, + "learning_rate": 2.9133629629629633e-05, + "loss": 1.6496, + "step": 35225 + }, + { + "epoch": 6.26, + "learning_rate": 2.913066666666667e-05, + "loss": 1.6207, + "step": 35230 + }, + { + "epoch": 6.26, + "learning_rate": 2.9127703703703708e-05, + "loss": 1.6322, + "step": 35235 + }, + { + "epoch": 6.26, + "learning_rate": 2.9124740740740743e-05, + "loss": 1.7996, + "step": 35240 + }, + { + "epoch": 6.27, + "learning_rate": 2.9121777777777782e-05, + "loss": 1.6854, + "step": 35245 + }, + { + "epoch": 6.27, + "learning_rate": 2.9118814814814814e-05, + "loss": 1.6989, + "step": 35250 + }, + { + "epoch": 6.27, + "learning_rate": 2.9115851851851856e-05, + "loss": 1.6679, + "step": 35255 + }, + { + "epoch": 6.27, + "learning_rate": 2.9112888888888888e-05, + "loss": 1.5994, + "step": 35260 + }, + { + "epoch": 6.27, + "learning_rate": 2.910992592592593e-05, + "loss": 1.7172, + "step": 35265 + }, + { + "epoch": 6.27, + "learning_rate": 2.9106962962962962e-05, + "loss": 1.7505, + "step": 35270 + }, + { + "epoch": 6.27, + "learning_rate": 2.9104000000000005e-05, + "loss": 1.6467, + "step": 35275 + }, + { + "epoch": 6.27, + "learning_rate": 2.9101037037037037e-05, + "loss": 1.8069, + "step": 35280 + }, + { + "epoch": 6.27, + "learning_rate": 2.909807407407408e-05, + "loss": 1.7493, + "step": 35285 + }, + { + "epoch": 6.27, + "learning_rate": 2.909511111111111e-05, + "loss": 1.628, + "step": 35290 + }, + { + "epoch": 6.27, + "learning_rate": 2.9092148148148153e-05, + "loss": 1.6491, + "step": 35295 + }, + { + "epoch": 6.28, + "learning_rate": 2.9089185185185185e-05, + "loss": 1.5969, + "step": 35300 + }, + { + "epoch": 6.28, + "learning_rate": 2.9086222222222227e-05, + "loss": 1.7183, + "step": 35305 + }, + { + "epoch": 6.28, + "learning_rate": 2.908325925925926e-05, + "loss": 1.6179, + "step": 35310 + }, + { + "epoch": 6.28, + "learning_rate": 2.9080296296296298e-05, + "loss": 1.7476, + "step": 35315 + }, + { + "epoch": 6.28, + "learning_rate": 2.9077333333333334e-05, + "loss": 1.6681, + "step": 35320 + }, + { + "epoch": 6.28, + "learning_rate": 2.9074370370370372e-05, + "loss": 1.602, + "step": 35325 + }, + { + "epoch": 6.28, + "learning_rate": 2.9071407407407408e-05, + "loss": 1.6958, + "step": 35330 + }, + { + "epoch": 6.28, + "learning_rate": 2.9068444444444447e-05, + "loss": 1.6426, + "step": 35335 + }, + { + "epoch": 6.28, + "learning_rate": 2.9065481481481482e-05, + "loss": 1.8645, + "step": 35340 + }, + { + "epoch": 6.28, + "learning_rate": 2.906251851851852e-05, + "loss": 1.6872, + "step": 35345 + }, + { + "epoch": 6.28, + "learning_rate": 2.9059555555555556e-05, + "loss": 1.7233, + "step": 35350 + }, + { + "epoch": 6.29, + "learning_rate": 2.9056592592592595e-05, + "loss": 1.704, + "step": 35355 + }, + { + "epoch": 6.29, + "learning_rate": 2.905362962962963e-05, + "loss": 1.5488, + "step": 35360 + }, + { + "epoch": 6.29, + "learning_rate": 2.905066666666667e-05, + "loss": 1.7239, + "step": 35365 + }, + { + "epoch": 6.29, + "learning_rate": 2.9047703703703705e-05, + "loss": 1.7043, + "step": 35370 + }, + { + "epoch": 6.29, + "learning_rate": 2.9044740740740744e-05, + "loss": 1.8472, + "step": 35375 + }, + { + "epoch": 6.29, + "learning_rate": 2.904177777777778e-05, + "loss": 1.7252, + "step": 35380 + }, + { + "epoch": 6.29, + "learning_rate": 2.9038814814814818e-05, + "loss": 1.6132, + "step": 35385 + }, + { + "epoch": 6.29, + "learning_rate": 2.9035851851851853e-05, + "loss": 1.6126, + "step": 35390 + }, + { + "epoch": 6.29, + "learning_rate": 2.9032888888888892e-05, + "loss": 1.7249, + "step": 35395 + }, + { + "epoch": 6.29, + "learning_rate": 2.9029925925925928e-05, + "loss": 1.7575, + "step": 35400 + }, + { + "epoch": 6.29, + "learning_rate": 2.9026962962962967e-05, + "loss": 1.699, + "step": 35405 + }, + { + "epoch": 6.3, + "learning_rate": 2.9024000000000002e-05, + "loss": 1.5789, + "step": 35410 + }, + { + "epoch": 6.3, + "learning_rate": 2.902103703703704e-05, + "loss": 1.7811, + "step": 35415 + }, + { + "epoch": 6.3, + "learning_rate": 2.9018074074074076e-05, + "loss": 1.7265, + "step": 35420 + }, + { + "epoch": 6.3, + "learning_rate": 2.9015111111111115e-05, + "loss": 1.6848, + "step": 35425 + }, + { + "epoch": 6.3, + "learning_rate": 2.901214814814815e-05, + "loss": 1.7577, + "step": 35430 + }, + { + "epoch": 6.3, + "learning_rate": 2.900918518518519e-05, + "loss": 1.7382, + "step": 35435 + }, + { + "epoch": 6.3, + "learning_rate": 2.9006222222222225e-05, + "loss": 1.6024, + "step": 35440 + }, + { + "epoch": 6.3, + "learning_rate": 2.9003259259259264e-05, + "loss": 1.7424, + "step": 35445 + }, + { + "epoch": 6.3, + "learning_rate": 2.90002962962963e-05, + "loss": 1.6956, + "step": 35450 + }, + { + "epoch": 6.3, + "learning_rate": 2.8997333333333338e-05, + "loss": 1.6871, + "step": 35455 + }, + { + "epoch": 6.3, + "learning_rate": 2.8994370370370373e-05, + "loss": 1.6488, + "step": 35460 + }, + { + "epoch": 6.3, + "learning_rate": 2.8991407407407412e-05, + "loss": 1.6925, + "step": 35465 + }, + { + "epoch": 6.31, + "learning_rate": 2.8988444444444448e-05, + "loss": 1.6803, + "step": 35470 + }, + { + "epoch": 6.31, + "learning_rate": 2.898548148148148e-05, + "loss": 1.6696, + "step": 35475 + }, + { + "epoch": 6.31, + "learning_rate": 2.898251851851852e-05, + "loss": 1.6735, + "step": 35480 + }, + { + "epoch": 6.31, + "learning_rate": 2.8979555555555554e-05, + "loss": 1.6817, + "step": 35485 + }, + { + "epoch": 6.31, + "learning_rate": 2.8976592592592593e-05, + "loss": 1.6523, + "step": 35490 + }, + { + "epoch": 6.31, + "learning_rate": 2.8973629629629628e-05, + "loss": 1.5696, + "step": 35495 + }, + { + "epoch": 6.31, + "learning_rate": 2.8970666666666667e-05, + "loss": 1.7812, + "step": 35500 + }, + { + "epoch": 6.31, + "learning_rate": 2.8967703703703702e-05, + "loss": 1.623, + "step": 35505 + }, + { + "epoch": 6.31, + "learning_rate": 2.896474074074074e-05, + "loss": 1.5621, + "step": 35510 + }, + { + "epoch": 6.31, + "learning_rate": 2.8961777777777777e-05, + "loss": 1.8169, + "step": 35515 + }, + { + "epoch": 6.31, + "learning_rate": 2.8958814814814815e-05, + "loss": 1.7048, + "step": 35520 + }, + { + "epoch": 6.32, + "learning_rate": 2.895585185185185e-05, + "loss": 1.6495, + "step": 35525 + }, + { + "epoch": 6.32, + "learning_rate": 2.895288888888889e-05, + "loss": 1.6285, + "step": 35530 + }, + { + "epoch": 6.32, + "learning_rate": 2.8949925925925925e-05, + "loss": 1.7909, + "step": 35535 + }, + { + "epoch": 6.32, + "learning_rate": 2.8946962962962964e-05, + "loss": 1.8344, + "step": 35540 + }, + { + "epoch": 6.32, + "learning_rate": 2.8944e-05, + "loss": 1.6961, + "step": 35545 + }, + { + "epoch": 6.32, + "learning_rate": 2.8941037037037038e-05, + "loss": 1.6589, + "step": 35550 + }, + { + "epoch": 6.32, + "learning_rate": 2.8938074074074074e-05, + "loss": 1.7257, + "step": 35555 + }, + { + "epoch": 6.32, + "learning_rate": 2.8935111111111112e-05, + "loss": 1.6998, + "step": 35560 + }, + { + "epoch": 6.32, + "learning_rate": 2.8932148148148148e-05, + "loss": 1.6839, + "step": 35565 + }, + { + "epoch": 6.32, + "learning_rate": 2.8929185185185187e-05, + "loss": 1.6863, + "step": 35570 + }, + { + "epoch": 6.32, + "learning_rate": 2.8926222222222222e-05, + "loss": 1.7956, + "step": 35575 + }, + { + "epoch": 6.33, + "learning_rate": 2.892325925925926e-05, + "loss": 1.7074, + "step": 35580 + }, + { + "epoch": 6.33, + "learning_rate": 2.8920296296296296e-05, + "loss": 1.61, + "step": 35585 + }, + { + "epoch": 6.33, + "learning_rate": 2.8917333333333335e-05, + "loss": 1.6168, + "step": 35590 + }, + { + "epoch": 6.33, + "learning_rate": 2.891437037037037e-05, + "loss": 1.6629, + "step": 35595 + }, + { + "epoch": 6.33, + "learning_rate": 2.891140740740741e-05, + "loss": 1.615, + "step": 35600 + }, + { + "epoch": 6.33, + "learning_rate": 2.8908444444444445e-05, + "loss": 1.6262, + "step": 35605 + }, + { + "epoch": 6.33, + "learning_rate": 2.8905481481481484e-05, + "loss": 1.6818, + "step": 35610 + }, + { + "epoch": 6.33, + "learning_rate": 2.890251851851852e-05, + "loss": 1.6881, + "step": 35615 + }, + { + "epoch": 6.33, + "learning_rate": 2.8899555555555558e-05, + "loss": 1.7645, + "step": 35620 + }, + { + "epoch": 6.33, + "learning_rate": 2.8896592592592593e-05, + "loss": 1.6151, + "step": 35625 + }, + { + "epoch": 6.33, + "learning_rate": 2.8893629629629632e-05, + "loss": 1.7182, + "step": 35630 + }, + { + "epoch": 6.34, + "learning_rate": 2.8890666666666664e-05, + "loss": 1.7784, + "step": 35635 + }, + { + "epoch": 6.34, + "learning_rate": 2.8887703703703706e-05, + "loss": 1.7886, + "step": 35640 + }, + { + "epoch": 6.34, + "learning_rate": 2.888474074074074e-05, + "loss": 1.7255, + "step": 35645 + }, + { + "epoch": 6.34, + "learning_rate": 2.888177777777778e-05, + "loss": 1.7832, + "step": 35650 + }, + { + "epoch": 6.34, + "learning_rate": 2.8878814814814813e-05, + "loss": 1.7318, + "step": 35655 + }, + { + "epoch": 6.34, + "learning_rate": 2.8875851851851855e-05, + "loss": 1.7444, + "step": 35660 + }, + { + "epoch": 6.34, + "learning_rate": 2.8872888888888887e-05, + "loss": 1.681, + "step": 35665 + }, + { + "epoch": 6.34, + "learning_rate": 2.886992592592593e-05, + "loss": 1.715, + "step": 35670 + }, + { + "epoch": 6.34, + "learning_rate": 2.886696296296296e-05, + "loss": 1.6661, + "step": 35675 + }, + { + "epoch": 6.34, + "learning_rate": 2.8864000000000004e-05, + "loss": 1.6536, + "step": 35680 + }, + { + "epoch": 6.34, + "learning_rate": 2.8861037037037036e-05, + "loss": 1.6566, + "step": 35685 + }, + { + "epoch": 6.34, + "learning_rate": 2.8858074074074078e-05, + "loss": 1.6786, + "step": 35690 + }, + { + "epoch": 6.35, + "learning_rate": 2.885511111111111e-05, + "loss": 1.8193, + "step": 35695 + }, + { + "epoch": 6.35, + "learning_rate": 2.8852148148148152e-05, + "loss": 1.7458, + "step": 35700 + }, + { + "epoch": 6.35, + "learning_rate": 2.8849185185185184e-05, + "loss": 1.7614, + "step": 35705 + }, + { + "epoch": 6.35, + "learning_rate": 2.8846222222222223e-05, + "loss": 1.7476, + "step": 35710 + }, + { + "epoch": 6.35, + "learning_rate": 2.884325925925926e-05, + "loss": 1.7565, + "step": 35715 + }, + { + "epoch": 6.35, + "learning_rate": 2.8840296296296297e-05, + "loss": 1.5851, + "step": 35720 + }, + { + "epoch": 6.35, + "learning_rate": 2.8837333333333333e-05, + "loss": 1.619, + "step": 35725 + }, + { + "epoch": 6.35, + "learning_rate": 2.883437037037037e-05, + "loss": 1.7964, + "step": 35730 + }, + { + "epoch": 6.35, + "learning_rate": 2.8831407407407407e-05, + "loss": 1.7471, + "step": 35735 + }, + { + "epoch": 6.35, + "learning_rate": 2.8828444444444446e-05, + "loss": 1.6224, + "step": 35740 + }, + { + "epoch": 6.35, + "learning_rate": 2.882548148148148e-05, + "loss": 1.6818, + "step": 35745 + }, + { + "epoch": 6.36, + "learning_rate": 2.882251851851852e-05, + "loss": 1.6923, + "step": 35750 + }, + { + "epoch": 6.36, + "learning_rate": 2.8819555555555555e-05, + "loss": 1.6103, + "step": 35755 + }, + { + "epoch": 6.36, + "learning_rate": 2.8816592592592594e-05, + "loss": 1.6712, + "step": 35760 + }, + { + "epoch": 6.36, + "learning_rate": 2.881362962962963e-05, + "loss": 1.7183, + "step": 35765 + }, + { + "epoch": 6.36, + "learning_rate": 2.881066666666667e-05, + "loss": 1.7547, + "step": 35770 + }, + { + "epoch": 6.36, + "learning_rate": 2.8807703703703704e-05, + "loss": 1.6302, + "step": 35775 + }, + { + "epoch": 6.36, + "learning_rate": 2.8804740740740743e-05, + "loss": 1.6706, + "step": 35780 + }, + { + "epoch": 6.36, + "learning_rate": 2.8801777777777778e-05, + "loss": 1.5911, + "step": 35785 + }, + { + "epoch": 6.36, + "learning_rate": 2.8798814814814817e-05, + "loss": 1.6637, + "step": 35790 + }, + { + "epoch": 6.36, + "learning_rate": 2.8795851851851852e-05, + "loss": 1.7942, + "step": 35795 + }, + { + "epoch": 6.36, + "learning_rate": 2.879288888888889e-05, + "loss": 1.5871, + "step": 35800 + }, + { + "epoch": 6.37, + "learning_rate": 2.8789925925925927e-05, + "loss": 1.7172, + "step": 35805 + }, + { + "epoch": 6.37, + "learning_rate": 2.8786962962962965e-05, + "loss": 1.7684, + "step": 35810 + }, + { + "epoch": 6.37, + "learning_rate": 2.8784e-05, + "loss": 1.7756, + "step": 35815 + }, + { + "epoch": 6.37, + "learning_rate": 2.878103703703704e-05, + "loss": 1.6873, + "step": 35820 + }, + { + "epoch": 6.37, + "learning_rate": 2.8778074074074075e-05, + "loss": 1.7449, + "step": 35825 + }, + { + "epoch": 6.37, + "learning_rate": 2.8775111111111114e-05, + "loss": 1.7066, + "step": 35830 + }, + { + "epoch": 6.37, + "learning_rate": 2.877214814814815e-05, + "loss": 1.5802, + "step": 35835 + }, + { + "epoch": 6.37, + "learning_rate": 2.8769185185185188e-05, + "loss": 1.6759, + "step": 35840 + }, + { + "epoch": 6.37, + "learning_rate": 2.8766222222222224e-05, + "loss": 1.7996, + "step": 35845 + }, + { + "epoch": 6.37, + "learning_rate": 2.8763259259259262e-05, + "loss": 1.633, + "step": 35850 + }, + { + "epoch": 6.37, + "learning_rate": 2.8760296296296298e-05, + "loss": 1.6693, + "step": 35855 + }, + { + "epoch": 6.38, + "learning_rate": 2.8757333333333337e-05, + "loss": 1.7648, + "step": 35860 + }, + { + "epoch": 6.38, + "learning_rate": 2.875437037037037e-05, + "loss": 1.7205, + "step": 35865 + }, + { + "epoch": 6.38, + "learning_rate": 2.875140740740741e-05, + "loss": 1.6858, + "step": 35870 + }, + { + "epoch": 6.38, + "learning_rate": 2.8748444444444443e-05, + "loss": 1.5705, + "step": 35875 + }, + { + "epoch": 6.38, + "learning_rate": 2.8745481481481485e-05, + "loss": 1.6418, + "step": 35880 + }, + { + "epoch": 6.38, + "learning_rate": 2.8742518518518517e-05, + "loss": 1.8294, + "step": 35885 + }, + { + "epoch": 6.38, + "learning_rate": 2.873955555555556e-05, + "loss": 1.6743, + "step": 35890 + }, + { + "epoch": 6.38, + "learning_rate": 2.873659259259259e-05, + "loss": 1.6596, + "step": 35895 + }, + { + "epoch": 6.38, + "learning_rate": 2.8733629629629634e-05, + "loss": 1.7387, + "step": 35900 + }, + { + "epoch": 6.38, + "learning_rate": 2.8730666666666666e-05, + "loss": 1.6302, + "step": 35905 + }, + { + "epoch": 6.38, + "learning_rate": 2.8727703703703708e-05, + "loss": 1.6678, + "step": 35910 + }, + { + "epoch": 6.38, + "learning_rate": 2.872474074074074e-05, + "loss": 1.7268, + "step": 35915 + }, + { + "epoch": 6.39, + "learning_rate": 2.8721777777777782e-05, + "loss": 1.6373, + "step": 35920 + }, + { + "epoch": 6.39, + "learning_rate": 2.8718814814814814e-05, + "loss": 1.6894, + "step": 35925 + }, + { + "epoch": 6.39, + "learning_rate": 2.8715851851851857e-05, + "loss": 1.6514, + "step": 35930 + }, + { + "epoch": 6.39, + "learning_rate": 2.871288888888889e-05, + "loss": 1.745, + "step": 35935 + }, + { + "epoch": 6.39, + "learning_rate": 2.8709925925925927e-05, + "loss": 1.8065, + "step": 35940 + }, + { + "epoch": 6.39, + "learning_rate": 2.8706962962962963e-05, + "loss": 1.8386, + "step": 35945 + }, + { + "epoch": 6.39, + "learning_rate": 2.8704e-05, + "loss": 1.798, + "step": 35950 + }, + { + "epoch": 6.39, + "learning_rate": 2.8701037037037037e-05, + "loss": 1.6165, + "step": 35955 + }, + { + "epoch": 6.39, + "learning_rate": 2.8698074074074076e-05, + "loss": 1.6324, + "step": 35960 + }, + { + "epoch": 6.39, + "learning_rate": 2.869511111111111e-05, + "loss": 1.5644, + "step": 35965 + }, + { + "epoch": 6.39, + "learning_rate": 2.869214814814815e-05, + "loss": 1.6807, + "step": 35970 + }, + { + "epoch": 6.4, + "learning_rate": 2.8689185185185186e-05, + "loss": 1.6495, + "step": 35975 + }, + { + "epoch": 6.4, + "learning_rate": 2.8686222222222224e-05, + "loss": 1.8092, + "step": 35980 + }, + { + "epoch": 6.4, + "learning_rate": 2.868325925925926e-05, + "loss": 1.6521, + "step": 35985 + }, + { + "epoch": 6.4, + "learning_rate": 2.86802962962963e-05, + "loss": 1.6443, + "step": 35990 + }, + { + "epoch": 6.4, + "learning_rate": 2.8677333333333334e-05, + "loss": 1.7089, + "step": 35995 + }, + { + "epoch": 6.4, + "learning_rate": 2.8674370370370373e-05, + "loss": 1.7755, + "step": 36000 + }, + { + "epoch": 6.4, + "learning_rate": 2.867140740740741e-05, + "loss": 1.7712, + "step": 36005 + }, + { + "epoch": 6.4, + "learning_rate": 2.8668444444444447e-05, + "loss": 1.6435, + "step": 36010 + }, + { + "epoch": 6.4, + "learning_rate": 2.8665481481481483e-05, + "loss": 1.7075, + "step": 36015 + }, + { + "epoch": 6.4, + "learning_rate": 2.866251851851852e-05, + "loss": 1.7878, + "step": 36020 + }, + { + "epoch": 6.4, + "learning_rate": 2.8659555555555557e-05, + "loss": 1.6686, + "step": 36025 + }, + { + "epoch": 6.41, + "learning_rate": 2.8656592592592596e-05, + "loss": 1.716, + "step": 36030 + }, + { + "epoch": 6.41, + "learning_rate": 2.865362962962963e-05, + "loss": 1.6834, + "step": 36035 + }, + { + "epoch": 6.41, + "learning_rate": 2.865066666666667e-05, + "loss": 1.7007, + "step": 36040 + }, + { + "epoch": 6.41, + "learning_rate": 2.8647703703703705e-05, + "loss": 1.6985, + "step": 36045 + }, + { + "epoch": 6.41, + "learning_rate": 2.8644740740740744e-05, + "loss": 1.62, + "step": 36050 + }, + { + "epoch": 6.41, + "learning_rate": 2.864177777777778e-05, + "loss": 1.6819, + "step": 36055 + }, + { + "epoch": 6.41, + "learning_rate": 2.863881481481482e-05, + "loss": 1.6782, + "step": 36060 + }, + { + "epoch": 6.41, + "learning_rate": 2.8635851851851854e-05, + "loss": 1.6066, + "step": 36065 + }, + { + "epoch": 6.41, + "learning_rate": 2.8632888888888893e-05, + "loss": 1.5761, + "step": 36070 + }, + { + "epoch": 6.41, + "learning_rate": 2.8629925925925928e-05, + "loss": 1.6755, + "step": 36075 + }, + { + "epoch": 6.41, + "learning_rate": 2.8626962962962967e-05, + "loss": 1.6864, + "step": 36080 + }, + { + "epoch": 6.42, + "learning_rate": 2.8624000000000002e-05, + "loss": 1.6985, + "step": 36085 + }, + { + "epoch": 6.42, + "learning_rate": 2.862103703703704e-05, + "loss": 1.7328, + "step": 36090 + }, + { + "epoch": 6.42, + "learning_rate": 2.8618074074074073e-05, + "loss": 1.7107, + "step": 36095 + }, + { + "epoch": 6.42, + "learning_rate": 2.8615111111111115e-05, + "loss": 1.7662, + "step": 36100 + }, + { + "epoch": 6.42, + "learning_rate": 2.8612148148148148e-05, + "loss": 1.7049, + "step": 36105 + }, + { + "epoch": 6.42, + "learning_rate": 2.860918518518519e-05, + "loss": 1.6431, + "step": 36110 + }, + { + "epoch": 6.42, + "learning_rate": 2.8606222222222222e-05, + "loss": 1.6793, + "step": 36115 + }, + { + "epoch": 6.42, + "learning_rate": 2.8603259259259264e-05, + "loss": 1.7822, + "step": 36120 + }, + { + "epoch": 6.42, + "learning_rate": 2.8600296296296296e-05, + "loss": 1.7163, + "step": 36125 + }, + { + "epoch": 6.42, + "learning_rate": 2.8597333333333338e-05, + "loss": 1.764, + "step": 36130 + }, + { + "epoch": 6.42, + "learning_rate": 2.859437037037037e-05, + "loss": 1.767, + "step": 36135 + }, + { + "epoch": 6.42, + "learning_rate": 2.8591407407407413e-05, + "loss": 1.7765, + "step": 36140 + }, + { + "epoch": 6.43, + "learning_rate": 2.8588444444444445e-05, + "loss": 1.6454, + "step": 36145 + }, + { + "epoch": 6.43, + "learning_rate": 2.8585481481481487e-05, + "loss": 1.7626, + "step": 36150 + }, + { + "epoch": 6.43, + "learning_rate": 2.858251851851852e-05, + "loss": 1.7795, + "step": 36155 + }, + { + "epoch": 6.43, + "learning_rate": 2.857955555555556e-05, + "loss": 1.6693, + "step": 36160 + }, + { + "epoch": 6.43, + "learning_rate": 2.8576592592592593e-05, + "loss": 1.74, + "step": 36165 + }, + { + "epoch": 6.43, + "learning_rate": 2.8573629629629632e-05, + "loss": 1.7159, + "step": 36170 + }, + { + "epoch": 6.43, + "learning_rate": 2.8570666666666667e-05, + "loss": 1.651, + "step": 36175 + }, + { + "epoch": 6.43, + "learning_rate": 2.8567703703703706e-05, + "loss": 1.7222, + "step": 36180 + }, + { + "epoch": 6.43, + "learning_rate": 2.856474074074074e-05, + "loss": 1.7134, + "step": 36185 + }, + { + "epoch": 6.43, + "learning_rate": 2.856177777777778e-05, + "loss": 1.7167, + "step": 36190 + }, + { + "epoch": 6.43, + "learning_rate": 2.8558814814814816e-05, + "loss": 1.6719, + "step": 36195 + }, + { + "epoch": 6.44, + "learning_rate": 2.8555851851851855e-05, + "loss": 1.6683, + "step": 36200 + }, + { + "epoch": 6.44, + "learning_rate": 2.855288888888889e-05, + "loss": 1.8396, + "step": 36205 + }, + { + "epoch": 6.44, + "learning_rate": 2.854992592592593e-05, + "loss": 1.6848, + "step": 36210 + }, + { + "epoch": 6.44, + "learning_rate": 2.8546962962962964e-05, + "loss": 1.6915, + "step": 36215 + }, + { + "epoch": 6.44, + "learning_rate": 2.8544000000000003e-05, + "loss": 1.6469, + "step": 36220 + }, + { + "epoch": 6.44, + "learning_rate": 2.854103703703704e-05, + "loss": 1.7248, + "step": 36225 + }, + { + "epoch": 6.44, + "learning_rate": 2.8538074074074077e-05, + "loss": 1.6567, + "step": 36230 + }, + { + "epoch": 6.44, + "learning_rate": 2.8535111111111113e-05, + "loss": 1.5925, + "step": 36235 + }, + { + "epoch": 6.44, + "learning_rate": 2.853214814814815e-05, + "loss": 1.6462, + "step": 36240 + }, + { + "epoch": 6.44, + "learning_rate": 2.8529185185185187e-05, + "loss": 1.6808, + "step": 36245 + }, + { + "epoch": 6.44, + "learning_rate": 2.852622222222222e-05, + "loss": 1.62, + "step": 36250 + }, + { + "epoch": 6.45, + "learning_rate": 2.852325925925926e-05, + "loss": 1.8099, + "step": 36255 + }, + { + "epoch": 6.45, + "learning_rate": 2.8520296296296293e-05, + "loss": 1.7827, + "step": 36260 + }, + { + "epoch": 6.45, + "learning_rate": 2.8517333333333336e-05, + "loss": 1.7174, + "step": 36265 + }, + { + "epoch": 6.45, + "learning_rate": 2.8514370370370368e-05, + "loss": 1.768, + "step": 36270 + }, + { + "epoch": 6.45, + "learning_rate": 2.851140740740741e-05, + "loss": 1.6862, + "step": 36275 + }, + { + "epoch": 6.45, + "learning_rate": 2.8508444444444442e-05, + "loss": 1.6689, + "step": 36280 + }, + { + "epoch": 6.45, + "learning_rate": 2.8505481481481484e-05, + "loss": 1.743, + "step": 36285 + }, + { + "epoch": 6.45, + "learning_rate": 2.8502518518518516e-05, + "loss": 1.7272, + "step": 36290 + }, + { + "epoch": 6.45, + "learning_rate": 2.849955555555556e-05, + "loss": 1.8075, + "step": 36295 + }, + { + "epoch": 6.45, + "learning_rate": 2.849659259259259e-05, + "loss": 1.6846, + "step": 36300 + }, + { + "epoch": 6.45, + "learning_rate": 2.8493629629629633e-05, + "loss": 1.7026, + "step": 36305 + }, + { + "epoch": 6.46, + "learning_rate": 2.8490666666666665e-05, + "loss": 1.7219, + "step": 36310 + }, + { + "epoch": 6.46, + "learning_rate": 2.8487703703703707e-05, + "loss": 1.7353, + "step": 36315 + }, + { + "epoch": 6.46, + "learning_rate": 2.848474074074074e-05, + "loss": 1.6455, + "step": 36320 + }, + { + "epoch": 6.46, + "learning_rate": 2.8481777777777778e-05, + "loss": 1.6074, + "step": 36325 + }, + { + "epoch": 6.46, + "learning_rate": 2.8478814814814813e-05, + "loss": 1.744, + "step": 36330 + }, + { + "epoch": 6.46, + "learning_rate": 2.8475851851851852e-05, + "loss": 1.6917, + "step": 36335 + }, + { + "epoch": 6.46, + "learning_rate": 2.8472888888888887e-05, + "loss": 1.6971, + "step": 36340 + }, + { + "epoch": 6.46, + "learning_rate": 2.8469925925925926e-05, + "loss": 1.5893, + "step": 36345 + }, + { + "epoch": 6.46, + "learning_rate": 2.8466962962962962e-05, + "loss": 1.5295, + "step": 36350 + }, + { + "epoch": 6.46, + "learning_rate": 2.8464e-05, + "loss": 1.6715, + "step": 36355 + }, + { + "epoch": 6.46, + "learning_rate": 2.8461037037037036e-05, + "loss": 1.6306, + "step": 36360 + }, + { + "epoch": 6.46, + "learning_rate": 2.8458074074074075e-05, + "loss": 1.7374, + "step": 36365 + }, + { + "epoch": 6.47, + "learning_rate": 2.845511111111111e-05, + "loss": 1.6939, + "step": 36370 + }, + { + "epoch": 6.47, + "learning_rate": 2.845214814814815e-05, + "loss": 1.6596, + "step": 36375 + }, + { + "epoch": 6.47, + "learning_rate": 2.8449185185185185e-05, + "loss": 1.7467, + "step": 36380 + }, + { + "epoch": 6.47, + "learning_rate": 2.8446222222222223e-05, + "loss": 1.6553, + "step": 36385 + }, + { + "epoch": 6.47, + "learning_rate": 2.844325925925926e-05, + "loss": 1.7137, + "step": 36390 + }, + { + "epoch": 6.47, + "learning_rate": 2.8440296296296298e-05, + "loss": 1.6375, + "step": 36395 + }, + { + "epoch": 6.47, + "learning_rate": 2.8437333333333333e-05, + "loss": 1.5801, + "step": 36400 + }, + { + "epoch": 6.47, + "learning_rate": 2.8434370370370372e-05, + "loss": 1.7203, + "step": 36405 + }, + { + "epoch": 6.47, + "learning_rate": 2.8431407407407407e-05, + "loss": 1.7435, + "step": 36410 + }, + { + "epoch": 6.47, + "learning_rate": 2.8428444444444446e-05, + "loss": 1.8046, + "step": 36415 + }, + { + "epoch": 6.47, + "learning_rate": 2.842548148148148e-05, + "loss": 1.7054, + "step": 36420 + }, + { + "epoch": 6.48, + "learning_rate": 2.842251851851852e-05, + "loss": 1.6894, + "step": 36425 + }, + { + "epoch": 6.48, + "learning_rate": 2.8419555555555556e-05, + "loss": 1.7202, + "step": 36430 + }, + { + "epoch": 6.48, + "learning_rate": 2.8416592592592595e-05, + "loss": 1.7252, + "step": 36435 + }, + { + "epoch": 6.48, + "learning_rate": 2.841362962962963e-05, + "loss": 1.7016, + "step": 36440 + }, + { + "epoch": 6.48, + "learning_rate": 2.841066666666667e-05, + "loss": 1.5628, + "step": 36445 + }, + { + "epoch": 6.48, + "learning_rate": 2.8407703703703704e-05, + "loss": 1.6319, + "step": 36450 + }, + { + "epoch": 6.48, + "learning_rate": 2.8404740740740743e-05, + "loss": 1.7028, + "step": 36455 + }, + { + "epoch": 6.48, + "learning_rate": 2.840177777777778e-05, + "loss": 1.6473, + "step": 36460 + }, + { + "epoch": 6.48, + "learning_rate": 2.8398814814814817e-05, + "loss": 1.6792, + "step": 36465 + }, + { + "epoch": 6.48, + "learning_rate": 2.8395851851851853e-05, + "loss": 1.7081, + "step": 36470 + }, + { + "epoch": 6.48, + "learning_rate": 2.839288888888889e-05, + "loss": 1.6674, + "step": 36475 + }, + { + "epoch": 6.49, + "learning_rate": 2.8389925925925924e-05, + "loss": 1.777, + "step": 36480 + }, + { + "epoch": 6.49, + "learning_rate": 2.8386962962962966e-05, + "loss": 1.6902, + "step": 36485 + }, + { + "epoch": 6.49, + "learning_rate": 2.8383999999999998e-05, + "loss": 1.6702, + "step": 36490 + }, + { + "epoch": 6.49, + "learning_rate": 2.838103703703704e-05, + "loss": 1.7862, + "step": 36495 + }, + { + "epoch": 6.49, + "learning_rate": 2.8378074074074072e-05, + "loss": 1.6143, + "step": 36500 + }, + { + "epoch": 6.49, + "learning_rate": 2.8375111111111114e-05, + "loss": 1.7763, + "step": 36505 + }, + { + "epoch": 6.49, + "learning_rate": 2.8372148148148146e-05, + "loss": 1.5738, + "step": 36510 + }, + { + "epoch": 6.49, + "learning_rate": 2.836918518518519e-05, + "loss": 1.763, + "step": 36515 + }, + { + "epoch": 6.49, + "learning_rate": 2.836622222222222e-05, + "loss": 1.8592, + "step": 36520 + }, + { + "epoch": 6.49, + "learning_rate": 2.8363259259259263e-05, + "loss": 1.6954, + "step": 36525 + }, + { + "epoch": 6.49, + "learning_rate": 2.8360296296296295e-05, + "loss": 1.811, + "step": 36530 + }, + { + "epoch": 6.5, + "learning_rate": 2.8357333333333337e-05, + "loss": 1.7427, + "step": 36535 + }, + { + "epoch": 6.5, + "learning_rate": 2.835437037037037e-05, + "loss": 1.7426, + "step": 36540 + }, + { + "epoch": 6.5, + "learning_rate": 2.835140740740741e-05, + "loss": 1.7703, + "step": 36545 + }, + { + "epoch": 6.5, + "learning_rate": 2.8348444444444443e-05, + "loss": 1.7319, + "step": 36550 + }, + { + "epoch": 6.5, + "learning_rate": 2.8345481481481482e-05, + "loss": 1.7613, + "step": 36555 + }, + { + "epoch": 6.5, + "learning_rate": 2.8342518518518518e-05, + "loss": 1.7009, + "step": 36560 + }, + { + "epoch": 6.5, + "learning_rate": 2.8339555555555557e-05, + "loss": 1.7612, + "step": 36565 + }, + { + "epoch": 6.5, + "learning_rate": 2.8336592592592592e-05, + "loss": 1.6782, + "step": 36570 + }, + { + "epoch": 6.5, + "learning_rate": 2.833362962962963e-05, + "loss": 1.7726, + "step": 36575 + }, + { + "epoch": 6.5, + "learning_rate": 2.8330666666666666e-05, + "loss": 1.6242, + "step": 36580 + }, + { + "epoch": 6.5, + "learning_rate": 2.8327703703703705e-05, + "loss": 1.7328, + "step": 36585 + }, + { + "epoch": 6.5, + "learning_rate": 2.832474074074074e-05, + "loss": 1.6987, + "step": 36590 + }, + { + "epoch": 6.51, + "learning_rate": 2.832177777777778e-05, + "loss": 1.7388, + "step": 36595 + }, + { + "epoch": 6.51, + "learning_rate": 2.8318814814814815e-05, + "loss": 1.6625, + "step": 36600 + }, + { + "epoch": 6.51, + "learning_rate": 2.8315851851851854e-05, + "loss": 1.6675, + "step": 36605 + }, + { + "epoch": 6.51, + "learning_rate": 2.831288888888889e-05, + "loss": 1.6722, + "step": 36610 + }, + { + "epoch": 6.51, + "learning_rate": 2.8309925925925928e-05, + "loss": 1.576, + "step": 36615 + }, + { + "epoch": 6.51, + "learning_rate": 2.8306962962962963e-05, + "loss": 1.6292, + "step": 36620 + }, + { + "epoch": 6.51, + "learning_rate": 2.8304000000000002e-05, + "loss": 1.6363, + "step": 36625 + }, + { + "epoch": 6.51, + "learning_rate": 2.8301037037037038e-05, + "loss": 1.6855, + "step": 36630 + }, + { + "epoch": 6.51, + "learning_rate": 2.8298074074074076e-05, + "loss": 1.6527, + "step": 36635 + }, + { + "epoch": 6.51, + "learning_rate": 2.8295111111111112e-05, + "loss": 1.6172, + "step": 36640 + }, + { + "epoch": 6.51, + "learning_rate": 2.829214814814815e-05, + "loss": 1.7243, + "step": 36645 + }, + { + "epoch": 6.52, + "learning_rate": 2.8289185185185186e-05, + "loss": 1.6595, + "step": 36650 + }, + { + "epoch": 6.52, + "learning_rate": 2.8286222222222225e-05, + "loss": 1.617, + "step": 36655 + }, + { + "epoch": 6.52, + "learning_rate": 2.828325925925926e-05, + "loss": 1.6275, + "step": 36660 + }, + { + "epoch": 6.52, + "learning_rate": 2.82802962962963e-05, + "loss": 1.6912, + "step": 36665 + }, + { + "epoch": 6.52, + "learning_rate": 2.8277333333333335e-05, + "loss": 1.5998, + "step": 36670 + }, + { + "epoch": 6.52, + "learning_rate": 2.8274370370370373e-05, + "loss": 1.7649, + "step": 36675 + }, + { + "epoch": 6.52, + "learning_rate": 2.827140740740741e-05, + "loss": 1.6509, + "step": 36680 + }, + { + "epoch": 6.52, + "learning_rate": 2.8268444444444448e-05, + "loss": 1.7309, + "step": 36685 + }, + { + "epoch": 6.52, + "learning_rate": 2.8265481481481483e-05, + "loss": 1.7929, + "step": 36690 + }, + { + "epoch": 6.52, + "learning_rate": 2.8262518518518522e-05, + "loss": 1.6679, + "step": 36695 + }, + { + "epoch": 6.52, + "learning_rate": 2.8259555555555557e-05, + "loss": 1.6754, + "step": 36700 + }, + { + "epoch": 6.53, + "learning_rate": 2.8256592592592596e-05, + "loss": 1.6957, + "step": 36705 + }, + { + "epoch": 6.53, + "learning_rate": 2.825362962962963e-05, + "loss": 1.794, + "step": 36710 + }, + { + "epoch": 6.53, + "learning_rate": 2.825066666666667e-05, + "loss": 1.744, + "step": 36715 + }, + { + "epoch": 6.53, + "learning_rate": 2.8247703703703702e-05, + "loss": 1.7253, + "step": 36720 + }, + { + "epoch": 6.53, + "learning_rate": 2.8244740740740745e-05, + "loss": 1.6742, + "step": 36725 + }, + { + "epoch": 6.53, + "learning_rate": 2.8241777777777777e-05, + "loss": 1.662, + "step": 36730 + }, + { + "epoch": 6.53, + "learning_rate": 2.823881481481482e-05, + "loss": 1.7324, + "step": 36735 + }, + { + "epoch": 6.53, + "learning_rate": 2.823585185185185e-05, + "loss": 1.687, + "step": 36740 + }, + { + "epoch": 6.53, + "learning_rate": 2.8232888888888893e-05, + "loss": 1.7163, + "step": 36745 + }, + { + "epoch": 6.53, + "learning_rate": 2.8229925925925925e-05, + "loss": 1.7221, + "step": 36750 + }, + { + "epoch": 6.53, + "learning_rate": 2.8226962962962967e-05, + "loss": 1.7284, + "step": 36755 + }, + { + "epoch": 6.54, + "learning_rate": 2.8224e-05, + "loss": 1.7503, + "step": 36760 + }, + { + "epoch": 6.54, + "learning_rate": 2.822103703703704e-05, + "loss": 1.6126, + "step": 36765 + }, + { + "epoch": 6.54, + "learning_rate": 2.8218074074074074e-05, + "loss": 1.7407, + "step": 36770 + }, + { + "epoch": 6.54, + "learning_rate": 2.8215111111111116e-05, + "loss": 1.5999, + "step": 36775 + }, + { + "epoch": 6.54, + "learning_rate": 2.8212148148148148e-05, + "loss": 1.7796, + "step": 36780 + }, + { + "epoch": 6.54, + "learning_rate": 2.8209185185185187e-05, + "loss": 1.7596, + "step": 36785 + }, + { + "epoch": 6.54, + "learning_rate": 2.8206222222222222e-05, + "loss": 1.608, + "step": 36790 + }, + { + "epoch": 6.54, + "learning_rate": 2.820325925925926e-05, + "loss": 1.7843, + "step": 36795 + }, + { + "epoch": 6.54, + "learning_rate": 2.8200296296296296e-05, + "loss": 1.7297, + "step": 36800 + }, + { + "epoch": 6.54, + "learning_rate": 2.8197333333333335e-05, + "loss": 1.6764, + "step": 36805 + }, + { + "epoch": 6.54, + "learning_rate": 2.819437037037037e-05, + "loss": 1.6951, + "step": 36810 + }, + { + "epoch": 6.54, + "learning_rate": 2.819140740740741e-05, + "loss": 1.5253, + "step": 36815 + }, + { + "epoch": 6.55, + "learning_rate": 2.8188444444444445e-05, + "loss": 1.6556, + "step": 36820 + }, + { + "epoch": 6.55, + "learning_rate": 2.8185481481481484e-05, + "loss": 1.7757, + "step": 36825 + }, + { + "epoch": 6.55, + "learning_rate": 2.818251851851852e-05, + "loss": 1.638, + "step": 36830 + }, + { + "epoch": 6.55, + "learning_rate": 2.8179555555555558e-05, + "loss": 1.6229, + "step": 36835 + }, + { + "epoch": 6.55, + "learning_rate": 2.8176592592592594e-05, + "loss": 1.7166, + "step": 36840 + }, + { + "epoch": 6.55, + "learning_rate": 2.8173629629629632e-05, + "loss": 1.7254, + "step": 36845 + }, + { + "epoch": 6.55, + "learning_rate": 2.8170666666666668e-05, + "loss": 1.7394, + "step": 36850 + }, + { + "epoch": 6.55, + "learning_rate": 2.8167703703703707e-05, + "loss": 1.7183, + "step": 36855 + }, + { + "epoch": 6.55, + "learning_rate": 2.8164740740740742e-05, + "loss": 1.6788, + "step": 36860 + }, + { + "epoch": 6.55, + "learning_rate": 2.816177777777778e-05, + "loss": 1.5688, + "step": 36865 + }, + { + "epoch": 6.55, + "learning_rate": 2.8158814814814816e-05, + "loss": 1.7802, + "step": 36870 + }, + { + "epoch": 6.56, + "learning_rate": 2.8155851851851855e-05, + "loss": 1.7025, + "step": 36875 + }, + { + "epoch": 6.56, + "learning_rate": 2.815288888888889e-05, + "loss": 1.6699, + "step": 36880 + }, + { + "epoch": 6.56, + "learning_rate": 2.814992592592593e-05, + "loss": 1.6333, + "step": 36885 + }, + { + "epoch": 6.56, + "learning_rate": 2.8146962962962965e-05, + "loss": 1.6357, + "step": 36890 + }, + { + "epoch": 6.56, + "learning_rate": 2.8144000000000004e-05, + "loss": 1.7468, + "step": 36895 + }, + { + "epoch": 6.56, + "learning_rate": 2.814103703703704e-05, + "loss": 1.746, + "step": 36900 + }, + { + "epoch": 6.56, + "learning_rate": 2.8138074074074078e-05, + "loss": 1.726, + "step": 36905 + }, + { + "epoch": 6.56, + "learning_rate": 2.8135111111111113e-05, + "loss": 1.7173, + "step": 36910 + }, + { + "epoch": 6.56, + "learning_rate": 2.8132148148148152e-05, + "loss": 1.6483, + "step": 36915 + }, + { + "epoch": 6.56, + "learning_rate": 2.8129185185185188e-05, + "loss": 1.6667, + "step": 36920 + }, + { + "epoch": 6.56, + "learning_rate": 2.8126222222222226e-05, + "loss": 1.5941, + "step": 36925 + }, + { + "epoch": 6.57, + "learning_rate": 2.8123259259259262e-05, + "loss": 1.6653, + "step": 36930 + }, + { + "epoch": 6.57, + "learning_rate": 2.81202962962963e-05, + "loss": 1.7748, + "step": 36935 + }, + { + "epoch": 6.57, + "learning_rate": 2.8117333333333336e-05, + "loss": 1.7226, + "step": 36940 + }, + { + "epoch": 6.57, + "learning_rate": 2.8114370370370375e-05, + "loss": 1.7881, + "step": 36945 + }, + { + "epoch": 6.57, + "learning_rate": 2.8111407407407407e-05, + "loss": 1.6415, + "step": 36950 + }, + { + "epoch": 6.57, + "learning_rate": 2.810844444444445e-05, + "loss": 1.5883, + "step": 36955 + }, + { + "epoch": 6.57, + "learning_rate": 2.810548148148148e-05, + "loss": 1.6338, + "step": 36960 + }, + { + "epoch": 6.57, + "learning_rate": 2.8102518518518523e-05, + "loss": 1.6433, + "step": 36965 + }, + { + "epoch": 6.57, + "learning_rate": 2.8099555555555555e-05, + "loss": 1.6955, + "step": 36970 + }, + { + "epoch": 6.57, + "learning_rate": 2.8096592592592598e-05, + "loss": 1.701, + "step": 36975 + }, + { + "epoch": 6.57, + "learning_rate": 2.809362962962963e-05, + "loss": 1.6956, + "step": 36980 + }, + { + "epoch": 6.58, + "learning_rate": 2.8090666666666672e-05, + "loss": 1.6669, + "step": 36985 + }, + { + "epoch": 6.58, + "learning_rate": 2.8087703703703704e-05, + "loss": 1.6975, + "step": 36990 + }, + { + "epoch": 6.58, + "learning_rate": 2.8084740740740746e-05, + "loss": 1.5879, + "step": 36995 + }, + { + "epoch": 6.58, + "learning_rate": 2.8081777777777778e-05, + "loss": 1.7765, + "step": 37000 + }, + { + "epoch": 6.58, + "learning_rate": 2.807881481481482e-05, + "loss": 1.7033, + "step": 37005 + }, + { + "epoch": 6.58, + "learning_rate": 2.8075851851851852e-05, + "loss": 1.667, + "step": 37010 + }, + { + "epoch": 6.58, + "learning_rate": 2.8072888888888888e-05, + "loss": 1.7115, + "step": 37015 + }, + { + "epoch": 6.58, + "learning_rate": 2.8069925925925927e-05, + "loss": 1.6972, + "step": 37020 + }, + { + "epoch": 6.58, + "learning_rate": 2.8066962962962962e-05, + "loss": 1.6282, + "step": 37025 + }, + { + "epoch": 6.58, + "learning_rate": 2.8064e-05, + "loss": 1.7011, + "step": 37030 + }, + { + "epoch": 6.58, + "learning_rate": 2.8061037037037036e-05, + "loss": 1.6973, + "step": 37035 + }, + { + "epoch": 6.58, + "learning_rate": 2.8058074074074075e-05, + "loss": 1.6613, + "step": 37040 + }, + { + "epoch": 6.59, + "learning_rate": 2.805511111111111e-05, + "loss": 1.5986, + "step": 37045 + }, + { + "epoch": 6.59, + "learning_rate": 2.805214814814815e-05, + "loss": 1.68, + "step": 37050 + }, + { + "epoch": 6.59, + "learning_rate": 2.8049185185185185e-05, + "loss": 1.8451, + "step": 37055 + }, + { + "epoch": 6.59, + "learning_rate": 2.8046222222222224e-05, + "loss": 1.7597, + "step": 37060 + }, + { + "epoch": 6.59, + "learning_rate": 2.804325925925926e-05, + "loss": 1.7371, + "step": 37065 + }, + { + "epoch": 6.59, + "learning_rate": 2.8040296296296298e-05, + "loss": 1.7445, + "step": 37070 + }, + { + "epoch": 6.59, + "learning_rate": 2.8037333333333333e-05, + "loss": 1.7925, + "step": 37075 + }, + { + "epoch": 6.59, + "learning_rate": 2.8034370370370372e-05, + "loss": 1.7462, + "step": 37080 + }, + { + "epoch": 6.59, + "learning_rate": 2.8031407407407408e-05, + "loss": 1.7, + "step": 37085 + }, + { + "epoch": 6.59, + "learning_rate": 2.8028444444444447e-05, + "loss": 1.7781, + "step": 37090 + }, + { + "epoch": 6.59, + "learning_rate": 2.8025481481481482e-05, + "loss": 1.7328, + "step": 37095 + }, + { + "epoch": 6.6, + "learning_rate": 2.802251851851852e-05, + "loss": 1.6862, + "step": 37100 + }, + { + "epoch": 6.6, + "learning_rate": 2.8019555555555553e-05, + "loss": 1.6607, + "step": 37105 + }, + { + "epoch": 6.6, + "learning_rate": 2.8016592592592595e-05, + "loss": 1.6926, + "step": 37110 + }, + { + "epoch": 6.6, + "learning_rate": 2.8013629629629627e-05, + "loss": 1.6385, + "step": 37115 + }, + { + "epoch": 6.6, + "learning_rate": 2.801066666666667e-05, + "loss": 1.8231, + "step": 37120 + }, + { + "epoch": 6.6, + "learning_rate": 2.80077037037037e-05, + "loss": 1.6058, + "step": 37125 + }, + { + "epoch": 6.6, + "learning_rate": 2.8004740740740744e-05, + "loss": 1.6244, + "step": 37130 + }, + { + "epoch": 6.6, + "learning_rate": 2.8001777777777776e-05, + "loss": 1.8306, + "step": 37135 + }, + { + "epoch": 6.6, + "learning_rate": 2.7998814814814818e-05, + "loss": 1.722, + "step": 37140 + }, + { + "epoch": 6.6, + "learning_rate": 2.799585185185185e-05, + "loss": 1.7487, + "step": 37145 + }, + { + "epoch": 6.6, + "learning_rate": 2.7992888888888892e-05, + "loss": 1.7595, + "step": 37150 + }, + { + "epoch": 6.61, + "learning_rate": 2.7989925925925924e-05, + "loss": 1.7614, + "step": 37155 + }, + { + "epoch": 6.61, + "learning_rate": 2.7986962962962966e-05, + "loss": 1.6809, + "step": 37160 + }, + { + "epoch": 6.61, + "learning_rate": 2.7984e-05, + "loss": 1.695, + "step": 37165 + }, + { + "epoch": 6.61, + "learning_rate": 2.798103703703704e-05, + "loss": 1.6628, + "step": 37170 + }, + { + "epoch": 6.61, + "learning_rate": 2.7978074074074073e-05, + "loss": 1.6343, + "step": 37175 + }, + { + "epoch": 6.61, + "learning_rate": 2.797511111111111e-05, + "loss": 1.7839, + "step": 37180 + }, + { + "epoch": 6.61, + "learning_rate": 2.7972148148148147e-05, + "loss": 1.7136, + "step": 37185 + }, + { + "epoch": 6.61, + "learning_rate": 2.7969185185185186e-05, + "loss": 1.7572, + "step": 37190 + }, + { + "epoch": 6.61, + "learning_rate": 2.796622222222222e-05, + "loss": 1.7662, + "step": 37195 + }, + { + "epoch": 6.61, + "learning_rate": 2.796325925925926e-05, + "loss": 1.5771, + "step": 37200 + }, + { + "epoch": 6.61, + "learning_rate": 2.7960296296296295e-05, + "loss": 1.6898, + "step": 37205 + }, + { + "epoch": 6.62, + "learning_rate": 2.7957333333333334e-05, + "loss": 1.7676, + "step": 37210 + }, + { + "epoch": 6.62, + "learning_rate": 2.795437037037037e-05, + "loss": 1.6991, + "step": 37215 + }, + { + "epoch": 6.62, + "learning_rate": 2.795140740740741e-05, + "loss": 1.5731, + "step": 37220 + }, + { + "epoch": 6.62, + "learning_rate": 2.7948444444444444e-05, + "loss": 1.6064, + "step": 37225 + }, + { + "epoch": 6.62, + "learning_rate": 2.7945481481481483e-05, + "loss": 1.6628, + "step": 37230 + }, + { + "epoch": 6.62, + "learning_rate": 2.7942518518518518e-05, + "loss": 1.6861, + "step": 37235 + }, + { + "epoch": 6.62, + "learning_rate": 2.7939555555555557e-05, + "loss": 1.7704, + "step": 37240 + }, + { + "epoch": 6.62, + "learning_rate": 2.7936592592592592e-05, + "loss": 1.6042, + "step": 37245 + }, + { + "epoch": 6.62, + "learning_rate": 2.793362962962963e-05, + "loss": 1.7919, + "step": 37250 + }, + { + "epoch": 6.62, + "learning_rate": 2.7930666666666667e-05, + "loss": 1.7071, + "step": 37255 + }, + { + "epoch": 6.62, + "learning_rate": 2.7927703703703706e-05, + "loss": 1.8354, + "step": 37260 + }, + { + "epoch": 6.62, + "learning_rate": 2.792474074074074e-05, + "loss": 1.678, + "step": 37265 + }, + { + "epoch": 6.63, + "learning_rate": 2.792177777777778e-05, + "loss": 1.8012, + "step": 37270 + }, + { + "epoch": 6.63, + "learning_rate": 2.7918814814814815e-05, + "loss": 1.67, + "step": 37275 + }, + { + "epoch": 6.63, + "learning_rate": 2.7915851851851854e-05, + "loss": 1.7499, + "step": 37280 + }, + { + "epoch": 6.63, + "learning_rate": 2.791288888888889e-05, + "loss": 1.6338, + "step": 37285 + }, + { + "epoch": 6.63, + "learning_rate": 2.7909925925925928e-05, + "loss": 1.6439, + "step": 37290 + }, + { + "epoch": 6.63, + "learning_rate": 2.7906962962962964e-05, + "loss": 1.8399, + "step": 37295 + }, + { + "epoch": 6.63, + "learning_rate": 2.7904000000000003e-05, + "loss": 1.7093, + "step": 37300 + }, + { + "epoch": 6.63, + "learning_rate": 2.7901037037037038e-05, + "loss": 1.759, + "step": 37305 + }, + { + "epoch": 6.63, + "learning_rate": 2.7898074074074077e-05, + "loss": 1.7399, + "step": 37310 + }, + { + "epoch": 6.63, + "learning_rate": 2.7895111111111112e-05, + "loss": 1.6921, + "step": 37315 + }, + { + "epoch": 6.63, + "learning_rate": 2.789214814814815e-05, + "loss": 1.6583, + "step": 37320 + }, + { + "epoch": 6.64, + "learning_rate": 2.7889185185185186e-05, + "loss": 1.6882, + "step": 37325 + }, + { + "epoch": 6.64, + "learning_rate": 2.7886222222222225e-05, + "loss": 1.7021, + "step": 37330 + }, + { + "epoch": 6.64, + "learning_rate": 2.7883259259259257e-05, + "loss": 1.6507, + "step": 37335 + }, + { + "epoch": 6.64, + "learning_rate": 2.78802962962963e-05, + "loss": 1.7626, + "step": 37340 + }, + { + "epoch": 6.64, + "learning_rate": 2.787792592592593e-05, + "loss": 1.6454, + "step": 37345 + }, + { + "epoch": 6.64, + "learning_rate": 2.7874962962962965e-05, + "loss": 1.8968, + "step": 37350 + }, + { + "epoch": 6.64, + "learning_rate": 2.7872000000000004e-05, + "loss": 1.7958, + "step": 37355 + }, + { + "epoch": 6.64, + "learning_rate": 2.786903703703704e-05, + "loss": 1.636, + "step": 37360 + }, + { + "epoch": 6.64, + "learning_rate": 2.7866074074074078e-05, + "loss": 1.7187, + "step": 37365 + }, + { + "epoch": 6.64, + "learning_rate": 2.7863111111111113e-05, + "loss": 1.68, + "step": 37370 + }, + { + "epoch": 6.64, + "learning_rate": 2.7860148148148152e-05, + "loss": 1.6245, + "step": 37375 + }, + { + "epoch": 6.65, + "learning_rate": 2.7857185185185187e-05, + "loss": 1.7621, + "step": 37380 + }, + { + "epoch": 6.65, + "learning_rate": 2.7854222222222226e-05, + "loss": 1.6942, + "step": 37385 + }, + { + "epoch": 6.65, + "learning_rate": 2.7851259259259262e-05, + "loss": 1.6967, + "step": 37390 + }, + { + "epoch": 6.65, + "learning_rate": 2.78482962962963e-05, + "loss": 1.6773, + "step": 37395 + }, + { + "epoch": 6.65, + "learning_rate": 2.7845333333333336e-05, + "loss": 1.6733, + "step": 37400 + }, + { + "epoch": 6.65, + "learning_rate": 2.7842370370370375e-05, + "loss": 1.6554, + "step": 37405 + }, + { + "epoch": 6.65, + "learning_rate": 2.7839407407407407e-05, + "loss": 1.6194, + "step": 37410 + }, + { + "epoch": 6.65, + "learning_rate": 2.783644444444445e-05, + "loss": 1.8113, + "step": 37415 + }, + { + "epoch": 6.65, + "learning_rate": 2.783348148148148e-05, + "loss": 1.7262, + "step": 37420 + }, + { + "epoch": 6.65, + "learning_rate": 2.7830518518518523e-05, + "loss": 1.9016, + "step": 37425 + }, + { + "epoch": 6.65, + "learning_rate": 2.7827555555555555e-05, + "loss": 1.7025, + "step": 37430 + }, + { + "epoch": 6.66, + "learning_rate": 2.7824592592592598e-05, + "loss": 1.6478, + "step": 37435 + }, + { + "epoch": 6.66, + "learning_rate": 2.782162962962963e-05, + "loss": 1.752, + "step": 37440 + }, + { + "epoch": 6.66, + "learning_rate": 2.7818666666666672e-05, + "loss": 1.8082, + "step": 37445 + }, + { + "epoch": 6.66, + "learning_rate": 2.7815703703703704e-05, + "loss": 1.6672, + "step": 37450 + }, + { + "epoch": 6.66, + "learning_rate": 2.7812740740740746e-05, + "loss": 1.6883, + "step": 37455 + }, + { + "epoch": 6.66, + "learning_rate": 2.7809777777777778e-05, + "loss": 1.7377, + "step": 37460 + }, + { + "epoch": 6.66, + "learning_rate": 2.780681481481482e-05, + "loss": 1.6921, + "step": 37465 + }, + { + "epoch": 6.66, + "learning_rate": 2.7803851851851852e-05, + "loss": 1.7795, + "step": 37470 + }, + { + "epoch": 6.66, + "learning_rate": 2.7800888888888895e-05, + "loss": 1.5579, + "step": 37475 + }, + { + "epoch": 6.66, + "learning_rate": 2.7797925925925927e-05, + "loss": 1.6036, + "step": 37480 + }, + { + "epoch": 6.66, + "learning_rate": 2.7794962962962962e-05, + "loss": 1.6752, + "step": 37485 + }, + { + "epoch": 6.66, + "learning_rate": 2.7792e-05, + "loss": 1.6855, + "step": 37490 + }, + { + "epoch": 6.67, + "learning_rate": 2.7789037037037036e-05, + "loss": 1.6936, + "step": 37495 + }, + { + "epoch": 6.67, + "learning_rate": 2.7786074074074075e-05, + "loss": 1.7692, + "step": 37500 + }, + { + "epoch": 6.67, + "learning_rate": 2.778311111111111e-05, + "loss": 1.7005, + "step": 37505 + }, + { + "epoch": 6.67, + "learning_rate": 2.778014814814815e-05, + "loss": 1.5449, + "step": 37510 + }, + { + "epoch": 6.67, + "learning_rate": 2.7777185185185185e-05, + "loss": 1.5893, + "step": 37515 + }, + { + "epoch": 6.67, + "learning_rate": 2.7774222222222224e-05, + "loss": 1.5849, + "step": 37520 + }, + { + "epoch": 6.67, + "learning_rate": 2.777125925925926e-05, + "loss": 1.787, + "step": 37525 + }, + { + "epoch": 6.67, + "learning_rate": 2.7768296296296298e-05, + "loss": 1.6223, + "step": 37530 + }, + { + "epoch": 6.67, + "learning_rate": 2.7765333333333333e-05, + "loss": 1.6336, + "step": 37535 + }, + { + "epoch": 6.67, + "learning_rate": 2.7762370370370372e-05, + "loss": 1.7443, + "step": 37540 + }, + { + "epoch": 6.67, + "learning_rate": 2.7759407407407408e-05, + "loss": 1.6701, + "step": 37545 + }, + { + "epoch": 6.68, + "learning_rate": 2.7756444444444446e-05, + "loss": 1.8236, + "step": 37550 + }, + { + "epoch": 6.68, + "learning_rate": 2.7753481481481482e-05, + "loss": 1.687, + "step": 37555 + }, + { + "epoch": 6.68, + "learning_rate": 2.775051851851852e-05, + "loss": 1.7662, + "step": 37560 + }, + { + "epoch": 6.68, + "learning_rate": 2.7747555555555553e-05, + "loss": 1.7042, + "step": 37565 + }, + { + "epoch": 6.68, + "learning_rate": 2.7744592592592595e-05, + "loss": 1.6591, + "step": 37570 + }, + { + "epoch": 6.68, + "learning_rate": 2.7741629629629627e-05, + "loss": 1.738, + "step": 37575 + }, + { + "epoch": 6.68, + "learning_rate": 2.773866666666667e-05, + "loss": 1.6957, + "step": 37580 + }, + { + "epoch": 6.68, + "learning_rate": 2.77357037037037e-05, + "loss": 1.7032, + "step": 37585 + }, + { + "epoch": 6.68, + "learning_rate": 2.7732740740740743e-05, + "loss": 1.795, + "step": 37590 + }, + { + "epoch": 6.68, + "learning_rate": 2.7729777777777776e-05, + "loss": 1.86, + "step": 37595 + }, + { + "epoch": 6.68, + "learning_rate": 2.7726814814814818e-05, + "loss": 1.644, + "step": 37600 + }, + { + "epoch": 6.69, + "learning_rate": 2.772385185185185e-05, + "loss": 1.7699, + "step": 37605 + }, + { + "epoch": 6.69, + "learning_rate": 2.7720888888888892e-05, + "loss": 1.6672, + "step": 37610 + }, + { + "epoch": 6.69, + "learning_rate": 2.7717925925925924e-05, + "loss": 1.6136, + "step": 37615 + }, + { + "epoch": 6.69, + "learning_rate": 2.7714962962962966e-05, + "loss": 1.6395, + "step": 37620 + }, + { + "epoch": 6.69, + "learning_rate": 2.7711999999999998e-05, + "loss": 1.6436, + "step": 37625 + }, + { + "epoch": 6.69, + "learning_rate": 2.770903703703704e-05, + "loss": 1.6875, + "step": 37630 + }, + { + "epoch": 6.69, + "learning_rate": 2.7706074074074073e-05, + "loss": 1.8121, + "step": 37635 + }, + { + "epoch": 6.69, + "learning_rate": 2.770311111111111e-05, + "loss": 1.7108, + "step": 37640 + }, + { + "epoch": 6.69, + "learning_rate": 2.7700148148148147e-05, + "loss": 1.7058, + "step": 37645 + }, + { + "epoch": 6.69, + "learning_rate": 2.7697185185185186e-05, + "loss": 1.68, + "step": 37650 + }, + { + "epoch": 6.69, + "learning_rate": 2.769422222222222e-05, + "loss": 1.7114, + "step": 37655 + }, + { + "epoch": 6.7, + "learning_rate": 2.769125925925926e-05, + "loss": 1.6908, + "step": 37660 + }, + { + "epoch": 6.7, + "learning_rate": 2.7688296296296295e-05, + "loss": 1.663, + "step": 37665 + }, + { + "epoch": 6.7, + "learning_rate": 2.7685333333333334e-05, + "loss": 1.7002, + "step": 37670 + }, + { + "epoch": 6.7, + "learning_rate": 2.768237037037037e-05, + "loss": 1.8325, + "step": 37675 + }, + { + "epoch": 6.7, + "learning_rate": 2.767940740740741e-05, + "loss": 1.6941, + "step": 37680 + }, + { + "epoch": 6.7, + "learning_rate": 2.7676444444444444e-05, + "loss": 1.768, + "step": 37685 + }, + { + "epoch": 6.7, + "learning_rate": 2.7673481481481483e-05, + "loss": 1.6701, + "step": 37690 + }, + { + "epoch": 6.7, + "learning_rate": 2.7670518518518518e-05, + "loss": 1.6397, + "step": 37695 + }, + { + "epoch": 6.7, + "learning_rate": 2.7667555555555557e-05, + "loss": 1.5641, + "step": 37700 + }, + { + "epoch": 6.7, + "learning_rate": 2.7664592592592592e-05, + "loss": 1.7797, + "step": 37705 + }, + { + "epoch": 6.7, + "learning_rate": 2.766162962962963e-05, + "loss": 1.6815, + "step": 37710 + }, + { + "epoch": 6.7, + "learning_rate": 2.7658666666666667e-05, + "loss": 1.7425, + "step": 37715 + }, + { + "epoch": 6.71, + "learning_rate": 2.7655703703703705e-05, + "loss": 1.6911, + "step": 37720 + }, + { + "epoch": 6.71, + "learning_rate": 2.765274074074074e-05, + "loss": 1.8086, + "step": 37725 + }, + { + "epoch": 6.71, + "learning_rate": 2.764977777777778e-05, + "loss": 1.7508, + "step": 37730 + }, + { + "epoch": 6.71, + "learning_rate": 2.7646814814814815e-05, + "loss": 1.6767, + "step": 37735 + }, + { + "epoch": 6.71, + "learning_rate": 2.7643851851851854e-05, + "loss": 1.5837, + "step": 37740 + }, + { + "epoch": 6.71, + "learning_rate": 2.764088888888889e-05, + "loss": 1.6061, + "step": 37745 + }, + { + "epoch": 6.71, + "learning_rate": 2.7637925925925928e-05, + "loss": 1.6955, + "step": 37750 + }, + { + "epoch": 6.71, + "learning_rate": 2.7634962962962964e-05, + "loss": 1.7186, + "step": 37755 + }, + { + "epoch": 6.71, + "learning_rate": 2.7632000000000002e-05, + "loss": 1.6586, + "step": 37760 + }, + { + "epoch": 6.71, + "learning_rate": 2.7629037037037038e-05, + "loss": 1.61, + "step": 37765 + }, + { + "epoch": 6.71, + "learning_rate": 2.7626074074074077e-05, + "loss": 1.6162, + "step": 37770 + }, + { + "epoch": 6.72, + "learning_rate": 2.7623111111111112e-05, + "loss": 1.7439, + "step": 37775 + }, + { + "epoch": 6.72, + "learning_rate": 2.762014814814815e-05, + "loss": 1.8286, + "step": 37780 + }, + { + "epoch": 6.72, + "learning_rate": 2.7617185185185186e-05, + "loss": 1.777, + "step": 37785 + }, + { + "epoch": 6.72, + "learning_rate": 2.7614222222222225e-05, + "loss": 1.8202, + "step": 37790 + }, + { + "epoch": 6.72, + "learning_rate": 2.761125925925926e-05, + "loss": 1.7619, + "step": 37795 + }, + { + "epoch": 6.72, + "learning_rate": 2.76082962962963e-05, + "loss": 1.6695, + "step": 37800 + }, + { + "epoch": 6.72, + "learning_rate": 2.760533333333333e-05, + "loss": 1.6611, + "step": 37805 + }, + { + "epoch": 6.72, + "learning_rate": 2.7602370370370374e-05, + "loss": 1.695, + "step": 37810 + }, + { + "epoch": 6.72, + "learning_rate": 2.7599407407407406e-05, + "loss": 1.6386, + "step": 37815 + }, + { + "epoch": 6.72, + "learning_rate": 2.7596444444444448e-05, + "loss": 1.7514, + "step": 37820 + }, + { + "epoch": 6.72, + "learning_rate": 2.759348148148148e-05, + "loss": 1.688, + "step": 37825 + }, + { + "epoch": 6.73, + "learning_rate": 2.7590518518518522e-05, + "loss": 1.6023, + "step": 37830 + }, + { + "epoch": 6.73, + "learning_rate": 2.7587555555555554e-05, + "loss": 1.6761, + "step": 37835 + }, + { + "epoch": 6.73, + "learning_rate": 2.7584592592592596e-05, + "loss": 1.6187, + "step": 37840 + }, + { + "epoch": 6.73, + "learning_rate": 2.758162962962963e-05, + "loss": 1.7075, + "step": 37845 + }, + { + "epoch": 6.73, + "learning_rate": 2.757866666666667e-05, + "loss": 1.7353, + "step": 37850 + }, + { + "epoch": 6.73, + "learning_rate": 2.7575703703703703e-05, + "loss": 1.7485, + "step": 37855 + }, + { + "epoch": 6.73, + "learning_rate": 2.7572740740740745e-05, + "loss": 1.6554, + "step": 37860 + }, + { + "epoch": 6.73, + "learning_rate": 2.7569777777777777e-05, + "loss": 1.7078, + "step": 37865 + }, + { + "epoch": 6.73, + "learning_rate": 2.7566814814814816e-05, + "loss": 1.6511, + "step": 37870 + }, + { + "epoch": 6.73, + "learning_rate": 2.756385185185185e-05, + "loss": 1.7261, + "step": 37875 + }, + { + "epoch": 6.73, + "learning_rate": 2.756088888888889e-05, + "loss": 1.6764, + "step": 37880 + }, + { + "epoch": 6.74, + "learning_rate": 2.7557925925925926e-05, + "loss": 1.6939, + "step": 37885 + }, + { + "epoch": 6.74, + "learning_rate": 2.7554962962962964e-05, + "loss": 1.709, + "step": 37890 + }, + { + "epoch": 6.74, + "learning_rate": 2.7552e-05, + "loss": 1.7082, + "step": 37895 + }, + { + "epoch": 6.74, + "learning_rate": 2.754903703703704e-05, + "loss": 1.7022, + "step": 37900 + }, + { + "epoch": 6.74, + "learning_rate": 2.7546074074074074e-05, + "loss": 1.7175, + "step": 37905 + }, + { + "epoch": 6.74, + "learning_rate": 2.7543111111111113e-05, + "loss": 1.7595, + "step": 37910 + }, + { + "epoch": 6.74, + "learning_rate": 2.754014814814815e-05, + "loss": 1.7246, + "step": 37915 + }, + { + "epoch": 6.74, + "learning_rate": 2.7537185185185187e-05, + "loss": 1.7699, + "step": 37920 + }, + { + "epoch": 6.74, + "learning_rate": 2.7534222222222223e-05, + "loss": 1.7495, + "step": 37925 + }, + { + "epoch": 6.74, + "learning_rate": 2.753125925925926e-05, + "loss": 1.6983, + "step": 37930 + }, + { + "epoch": 6.74, + "learning_rate": 2.7528296296296297e-05, + "loss": 1.6816, + "step": 37935 + }, + { + "epoch": 6.74, + "learning_rate": 2.7525333333333336e-05, + "loss": 1.6985, + "step": 37940 + }, + { + "epoch": 6.75, + "learning_rate": 2.752237037037037e-05, + "loss": 1.7094, + "step": 37945 + }, + { + "epoch": 6.75, + "learning_rate": 2.751940740740741e-05, + "loss": 1.5645, + "step": 37950 + }, + { + "epoch": 6.75, + "learning_rate": 2.7516444444444445e-05, + "loss": 1.6342, + "step": 37955 + }, + { + "epoch": 6.75, + "learning_rate": 2.7513481481481484e-05, + "loss": 1.6337, + "step": 37960 + }, + { + "epoch": 6.75, + "learning_rate": 2.751051851851852e-05, + "loss": 1.7071, + "step": 37965 + }, + { + "epoch": 6.75, + "learning_rate": 2.750755555555556e-05, + "loss": 1.6709, + "step": 37970 + }, + { + "epoch": 6.75, + "learning_rate": 2.7504592592592594e-05, + "loss": 1.7292, + "step": 37975 + }, + { + "epoch": 6.75, + "learning_rate": 2.7501629629629633e-05, + "loss": 1.7846, + "step": 37980 + }, + { + "epoch": 6.75, + "learning_rate": 2.7498666666666668e-05, + "loss": 1.6359, + "step": 37985 + }, + { + "epoch": 6.75, + "learning_rate": 2.7495703703703707e-05, + "loss": 1.69, + "step": 37990 + }, + { + "epoch": 6.75, + "learning_rate": 2.7492740740740742e-05, + "loss": 1.7307, + "step": 37995 + }, + { + "epoch": 6.76, + "learning_rate": 2.748977777777778e-05, + "loss": 1.7098, + "step": 38000 + }, + { + "epoch": 6.76, + "learning_rate": 2.7486814814814817e-05, + "loss": 1.7211, + "step": 38005 + }, + { + "epoch": 6.76, + "learning_rate": 2.7483851851851855e-05, + "loss": 1.625, + "step": 38010 + }, + { + "epoch": 6.76, + "learning_rate": 2.748088888888889e-05, + "loss": 1.7492, + "step": 38015 + }, + { + "epoch": 6.76, + "learning_rate": 2.747792592592593e-05, + "loss": 1.7915, + "step": 38020 + }, + { + "epoch": 6.76, + "learning_rate": 2.7474962962962965e-05, + "loss": 1.6924, + "step": 38025 + }, + { + "epoch": 6.76, + "learning_rate": 2.7472000000000004e-05, + "loss": 1.7519, + "step": 38030 + }, + { + "epoch": 6.76, + "learning_rate": 2.7469037037037036e-05, + "loss": 1.7871, + "step": 38035 + }, + { + "epoch": 6.76, + "learning_rate": 2.7466074074074078e-05, + "loss": 1.7404, + "step": 38040 + }, + { + "epoch": 6.76, + "learning_rate": 2.746311111111111e-05, + "loss": 1.6605, + "step": 38045 + }, + { + "epoch": 6.76, + "learning_rate": 2.7460148148148152e-05, + "loss": 1.7049, + "step": 38050 + }, + { + "epoch": 6.77, + "learning_rate": 2.7457185185185185e-05, + "loss": 1.5744, + "step": 38055 + }, + { + "epoch": 6.77, + "learning_rate": 2.7454222222222227e-05, + "loss": 1.6262, + "step": 38060 + }, + { + "epoch": 6.77, + "learning_rate": 2.745125925925926e-05, + "loss": 1.7362, + "step": 38065 + }, + { + "epoch": 6.77, + "learning_rate": 2.74482962962963e-05, + "loss": 1.7156, + "step": 38070 + }, + { + "epoch": 6.77, + "learning_rate": 2.7445333333333333e-05, + "loss": 1.7149, + "step": 38075 + }, + { + "epoch": 6.77, + "learning_rate": 2.7442370370370375e-05, + "loss": 1.6334, + "step": 38080 + }, + { + "epoch": 6.77, + "learning_rate": 2.7439407407407407e-05, + "loss": 1.6634, + "step": 38085 + }, + { + "epoch": 6.77, + "learning_rate": 2.743644444444445e-05, + "loss": 1.7249, + "step": 38090 + }, + { + "epoch": 6.77, + "learning_rate": 2.743348148148148e-05, + "loss": 1.7338, + "step": 38095 + }, + { + "epoch": 6.77, + "learning_rate": 2.743051851851852e-05, + "loss": 1.6821, + "step": 38100 + }, + { + "epoch": 6.77, + "learning_rate": 2.7427555555555556e-05, + "loss": 1.7063, + "step": 38105 + }, + { + "epoch": 6.78, + "learning_rate": 2.7424592592592595e-05, + "loss": 1.6882, + "step": 38110 + }, + { + "epoch": 6.78, + "learning_rate": 2.742162962962963e-05, + "loss": 1.6067, + "step": 38115 + }, + { + "epoch": 6.78, + "learning_rate": 2.741866666666667e-05, + "loss": 1.7357, + "step": 38120 + }, + { + "epoch": 6.78, + "learning_rate": 2.7415703703703704e-05, + "loss": 1.8009, + "step": 38125 + }, + { + "epoch": 6.78, + "learning_rate": 2.7412740740740743e-05, + "loss": 1.7765, + "step": 38130 + }, + { + "epoch": 6.78, + "learning_rate": 2.740977777777778e-05, + "loss": 1.6655, + "step": 38135 + }, + { + "epoch": 6.78, + "learning_rate": 2.7406814814814817e-05, + "loss": 1.6118, + "step": 38140 + }, + { + "epoch": 6.78, + "learning_rate": 2.7403851851851853e-05, + "loss": 1.7657, + "step": 38145 + }, + { + "epoch": 6.78, + "learning_rate": 2.740088888888889e-05, + "loss": 1.7294, + "step": 38150 + }, + { + "epoch": 6.78, + "learning_rate": 2.7397925925925927e-05, + "loss": 1.6436, + "step": 38155 + }, + { + "epoch": 6.78, + "learning_rate": 2.7394962962962966e-05, + "loss": 1.7629, + "step": 38160 + }, + { + "epoch": 6.78, + "learning_rate": 2.7392e-05, + "loss": 1.6935, + "step": 38165 + }, + { + "epoch": 6.79, + "learning_rate": 2.738903703703704e-05, + "loss": 1.6477, + "step": 38170 + }, + { + "epoch": 6.79, + "learning_rate": 2.7386074074074076e-05, + "loss": 1.7179, + "step": 38175 + }, + { + "epoch": 6.79, + "learning_rate": 2.7383111111111114e-05, + "loss": 1.7263, + "step": 38180 + }, + { + "epoch": 6.79, + "learning_rate": 2.738014814814815e-05, + "loss": 1.7099, + "step": 38185 + }, + { + "epoch": 6.79, + "learning_rate": 2.737718518518519e-05, + "loss": 1.7855, + "step": 38190 + }, + { + "epoch": 6.79, + "learning_rate": 2.7374222222222224e-05, + "loss": 1.7591, + "step": 38195 + }, + { + "epoch": 6.79, + "learning_rate": 2.7371259259259263e-05, + "loss": 1.707, + "step": 38200 + }, + { + "epoch": 6.79, + "learning_rate": 2.73682962962963e-05, + "loss": 1.7097, + "step": 38205 + }, + { + "epoch": 6.79, + "learning_rate": 2.7365333333333337e-05, + "loss": 1.7096, + "step": 38210 + }, + { + "epoch": 6.79, + "learning_rate": 2.7362370370370373e-05, + "loss": 1.7861, + "step": 38215 + }, + { + "epoch": 6.79, + "learning_rate": 2.735940740740741e-05, + "loss": 1.7729, + "step": 38220 + }, + { + "epoch": 6.8, + "learning_rate": 2.7356444444444447e-05, + "loss": 1.709, + "step": 38225 + }, + { + "epoch": 6.8, + "learning_rate": 2.7353481481481486e-05, + "loss": 1.6498, + "step": 38230 + }, + { + "epoch": 6.8, + "learning_rate": 2.735051851851852e-05, + "loss": 1.7757, + "step": 38235 + }, + { + "epoch": 6.8, + "learning_rate": 2.734755555555556e-05, + "loss": 1.7951, + "step": 38240 + }, + { + "epoch": 6.8, + "learning_rate": 2.7344592592592595e-05, + "loss": 1.6588, + "step": 38245 + }, + { + "epoch": 6.8, + "learning_rate": 2.7341629629629634e-05, + "loss": 1.6522, + "step": 38250 + }, + { + "epoch": 6.8, + "learning_rate": 2.733866666666667e-05, + "loss": 1.621, + "step": 38255 + }, + { + "epoch": 6.8, + "learning_rate": 2.73357037037037e-05, + "loss": 1.6179, + "step": 38260 + }, + { + "epoch": 6.8, + "learning_rate": 2.733274074074074e-05, + "loss": 1.7425, + "step": 38265 + }, + { + "epoch": 6.8, + "learning_rate": 2.7329777777777776e-05, + "loss": 1.8092, + "step": 38270 + }, + { + "epoch": 6.8, + "learning_rate": 2.7326814814814815e-05, + "loss": 1.6904, + "step": 38275 + }, + { + "epoch": 6.81, + "learning_rate": 2.732385185185185e-05, + "loss": 1.7383, + "step": 38280 + }, + { + "epoch": 6.81, + "learning_rate": 2.732088888888889e-05, + "loss": 1.7665, + "step": 38285 + }, + { + "epoch": 6.81, + "learning_rate": 2.7317925925925924e-05, + "loss": 1.7098, + "step": 38290 + }, + { + "epoch": 6.81, + "learning_rate": 2.7314962962962963e-05, + "loss": 1.7437, + "step": 38295 + }, + { + "epoch": 6.81, + "learning_rate": 2.7312e-05, + "loss": 1.6721, + "step": 38300 + }, + { + "epoch": 6.81, + "learning_rate": 2.7309037037037038e-05, + "loss": 1.644, + "step": 38305 + }, + { + "epoch": 6.81, + "learning_rate": 2.7306074074074073e-05, + "loss": 1.7014, + "step": 38310 + }, + { + "epoch": 6.81, + "learning_rate": 2.7303111111111112e-05, + "loss": 1.7397, + "step": 38315 + }, + { + "epoch": 6.81, + "learning_rate": 2.7300148148148147e-05, + "loss": 1.6223, + "step": 38320 + }, + { + "epoch": 6.81, + "learning_rate": 2.7297185185185186e-05, + "loss": 1.6928, + "step": 38325 + }, + { + "epoch": 6.81, + "learning_rate": 2.729422222222222e-05, + "loss": 1.6738, + "step": 38330 + }, + { + "epoch": 6.82, + "learning_rate": 2.729125925925926e-05, + "loss": 1.7409, + "step": 38335 + }, + { + "epoch": 6.82, + "learning_rate": 2.7288296296296296e-05, + "loss": 1.656, + "step": 38340 + }, + { + "epoch": 6.82, + "learning_rate": 2.7285333333333335e-05, + "loss": 1.7484, + "step": 38345 + }, + { + "epoch": 6.82, + "learning_rate": 2.728237037037037e-05, + "loss": 1.8178, + "step": 38350 + }, + { + "epoch": 6.82, + "learning_rate": 2.727940740740741e-05, + "loss": 1.6635, + "step": 38355 + }, + { + "epoch": 6.82, + "learning_rate": 2.7276444444444444e-05, + "loss": 1.7901, + "step": 38360 + }, + { + "epoch": 6.82, + "learning_rate": 2.7273481481481483e-05, + "loss": 1.6988, + "step": 38365 + }, + { + "epoch": 6.82, + "learning_rate": 2.727051851851852e-05, + "loss": 1.8211, + "step": 38370 + }, + { + "epoch": 6.82, + "learning_rate": 2.7267555555555557e-05, + "loss": 1.7936, + "step": 38375 + }, + { + "epoch": 6.82, + "learning_rate": 2.7265185185185187e-05, + "loss": 1.7099, + "step": 38380 + }, + { + "epoch": 6.82, + "learning_rate": 2.7262222222222222e-05, + "loss": 1.795, + "step": 38385 + }, + { + "epoch": 6.82, + "learning_rate": 2.725925925925926e-05, + "loss": 1.7067, + "step": 38390 + }, + { + "epoch": 6.83, + "learning_rate": 2.7256296296296297e-05, + "loss": 1.7444, + "step": 38395 + }, + { + "epoch": 6.83, + "learning_rate": 2.7253333333333336e-05, + "loss": 1.7236, + "step": 38400 + }, + { + "epoch": 6.83, + "learning_rate": 2.725037037037037e-05, + "loss": 1.7856, + "step": 38405 + }, + { + "epoch": 6.83, + "learning_rate": 2.724740740740741e-05, + "loss": 1.863, + "step": 38410 + }, + { + "epoch": 6.83, + "learning_rate": 2.7244444444444445e-05, + "loss": 1.6582, + "step": 38415 + }, + { + "epoch": 6.83, + "learning_rate": 2.7241481481481484e-05, + "loss": 1.7438, + "step": 38420 + }, + { + "epoch": 6.83, + "learning_rate": 2.723851851851852e-05, + "loss": 1.8004, + "step": 38425 + }, + { + "epoch": 6.83, + "learning_rate": 2.723555555555556e-05, + "loss": 1.8069, + "step": 38430 + }, + { + "epoch": 6.83, + "learning_rate": 2.7232592592592594e-05, + "loss": 1.6842, + "step": 38435 + }, + { + "epoch": 6.83, + "learning_rate": 2.7229629629629633e-05, + "loss": 1.8119, + "step": 38440 + }, + { + "epoch": 6.83, + "learning_rate": 2.7226666666666668e-05, + "loss": 1.813, + "step": 38445 + }, + { + "epoch": 6.84, + "learning_rate": 2.7223703703703707e-05, + "loss": 1.7099, + "step": 38450 + }, + { + "epoch": 6.84, + "learning_rate": 2.7220740740740742e-05, + "loss": 1.6226, + "step": 38455 + }, + { + "epoch": 6.84, + "learning_rate": 2.721777777777778e-05, + "loss": 1.6923, + "step": 38460 + }, + { + "epoch": 6.84, + "learning_rate": 2.7214814814814817e-05, + "loss": 1.6306, + "step": 38465 + }, + { + "epoch": 6.84, + "learning_rate": 2.7211851851851855e-05, + "loss": 1.6701, + "step": 38470 + }, + { + "epoch": 6.84, + "learning_rate": 2.720888888888889e-05, + "loss": 1.7723, + "step": 38475 + }, + { + "epoch": 6.84, + "learning_rate": 2.720592592592593e-05, + "loss": 1.755, + "step": 38480 + }, + { + "epoch": 6.84, + "learning_rate": 2.7202962962962965e-05, + "loss": 1.6728, + "step": 38485 + }, + { + "epoch": 6.84, + "learning_rate": 2.7200000000000004e-05, + "loss": 1.8165, + "step": 38490 + }, + { + "epoch": 6.84, + "learning_rate": 2.7197037037037036e-05, + "loss": 1.6725, + "step": 38495 + }, + { + "epoch": 6.84, + "learning_rate": 2.7194074074074078e-05, + "loss": 1.7182, + "step": 38500 + }, + { + "epoch": 6.85, + "learning_rate": 2.719111111111111e-05, + "loss": 1.6642, + "step": 38505 + }, + { + "epoch": 6.85, + "learning_rate": 2.7188148148148152e-05, + "loss": 1.7075, + "step": 38510 + }, + { + "epoch": 6.85, + "learning_rate": 2.7185185185185184e-05, + "loss": 1.6003, + "step": 38515 + }, + { + "epoch": 6.85, + "learning_rate": 2.7182222222222227e-05, + "loss": 1.8028, + "step": 38520 + }, + { + "epoch": 6.85, + "learning_rate": 2.717925925925926e-05, + "loss": 1.8123, + "step": 38525 + }, + { + "epoch": 6.85, + "learning_rate": 2.71762962962963e-05, + "loss": 1.719, + "step": 38530 + }, + { + "epoch": 6.85, + "learning_rate": 2.7173333333333333e-05, + "loss": 1.6868, + "step": 38535 + }, + { + "epoch": 6.85, + "learning_rate": 2.7170370370370375e-05, + "loss": 1.6494, + "step": 38540 + }, + { + "epoch": 6.85, + "learning_rate": 2.7167407407407407e-05, + "loss": 1.7459, + "step": 38545 + }, + { + "epoch": 6.85, + "learning_rate": 2.716444444444445e-05, + "loss": 1.677, + "step": 38550 + }, + { + "epoch": 6.85, + "learning_rate": 2.716148148148148e-05, + "loss": 1.7114, + "step": 38555 + }, + { + "epoch": 6.86, + "learning_rate": 2.7158518518518524e-05, + "loss": 1.749, + "step": 38560 + }, + { + "epoch": 6.86, + "learning_rate": 2.7155555555555556e-05, + "loss": 1.7212, + "step": 38565 + }, + { + "epoch": 6.86, + "learning_rate": 2.7152592592592595e-05, + "loss": 1.614, + "step": 38570 + }, + { + "epoch": 6.86, + "learning_rate": 2.714962962962963e-05, + "loss": 1.7823, + "step": 38575 + }, + { + "epoch": 6.86, + "learning_rate": 2.714666666666667e-05, + "loss": 1.735, + "step": 38580 + }, + { + "epoch": 6.86, + "learning_rate": 2.7143703703703704e-05, + "loss": 1.6902, + "step": 38585 + }, + { + "epoch": 6.86, + "learning_rate": 2.7140740740740743e-05, + "loss": 1.5945, + "step": 38590 + }, + { + "epoch": 6.86, + "learning_rate": 2.713777777777778e-05, + "loss": 1.7352, + "step": 38595 + }, + { + "epoch": 6.86, + "learning_rate": 2.7134814814814817e-05, + "loss": 1.6341, + "step": 38600 + }, + { + "epoch": 6.86, + "learning_rate": 2.7131851851851853e-05, + "loss": 1.7438, + "step": 38605 + }, + { + "epoch": 6.86, + "learning_rate": 2.712888888888889e-05, + "loss": 1.7386, + "step": 38610 + }, + { + "epoch": 6.86, + "learning_rate": 2.7125925925925927e-05, + "loss": 1.6602, + "step": 38615 + }, + { + "epoch": 6.87, + "learning_rate": 2.7122962962962966e-05, + "loss": 1.7025, + "step": 38620 + }, + { + "epoch": 6.87, + "learning_rate": 2.712e-05, + "loss": 1.7348, + "step": 38625 + }, + { + "epoch": 6.87, + "learning_rate": 2.711703703703704e-05, + "loss": 1.6417, + "step": 38630 + }, + { + "epoch": 6.87, + "learning_rate": 2.7114074074074075e-05, + "loss": 1.7161, + "step": 38635 + }, + { + "epoch": 6.87, + "learning_rate": 2.7111111111111114e-05, + "loss": 1.637, + "step": 38640 + }, + { + "epoch": 6.87, + "learning_rate": 2.710814814814815e-05, + "loss": 1.7299, + "step": 38645 + }, + { + "epoch": 6.87, + "learning_rate": 2.710518518518519e-05, + "loss": 1.7832, + "step": 38650 + }, + { + "epoch": 6.87, + "learning_rate": 2.7102222222222224e-05, + "loss": 1.6933, + "step": 38655 + }, + { + "epoch": 6.87, + "learning_rate": 2.7099259259259263e-05, + "loss": 1.677, + "step": 38660 + }, + { + "epoch": 6.87, + "learning_rate": 2.7096296296296298e-05, + "loss": 1.7022, + "step": 38665 + }, + { + "epoch": 6.87, + "learning_rate": 2.7093333333333337e-05, + "loss": 1.8013, + "step": 38670 + }, + { + "epoch": 6.88, + "learning_rate": 2.7090370370370373e-05, + "loss": 1.5793, + "step": 38675 + }, + { + "epoch": 6.88, + "learning_rate": 2.708740740740741e-05, + "loss": 1.7315, + "step": 38680 + }, + { + "epoch": 6.88, + "learning_rate": 2.7084444444444447e-05, + "loss": 1.8213, + "step": 38685 + }, + { + "epoch": 6.88, + "learning_rate": 2.7081481481481486e-05, + "loss": 1.7617, + "step": 38690 + }, + { + "epoch": 6.88, + "learning_rate": 2.707851851851852e-05, + "loss": 1.6321, + "step": 38695 + }, + { + "epoch": 6.88, + "learning_rate": 2.707555555555556e-05, + "loss": 1.7548, + "step": 38700 + }, + { + "epoch": 6.88, + "learning_rate": 2.7072592592592595e-05, + "loss": 1.7607, + "step": 38705 + }, + { + "epoch": 6.88, + "learning_rate": 2.7069629629629634e-05, + "loss": 1.7558, + "step": 38710 + }, + { + "epoch": 6.88, + "learning_rate": 2.706666666666667e-05, + "loss": 1.7335, + "step": 38715 + }, + { + "epoch": 6.88, + "learning_rate": 2.70637037037037e-05, + "loss": 1.7815, + "step": 38720 + }, + { + "epoch": 6.88, + "learning_rate": 2.706074074074074e-05, + "loss": 1.7328, + "step": 38725 + }, + { + "epoch": 6.89, + "learning_rate": 2.7057777777777776e-05, + "loss": 1.8017, + "step": 38730 + }, + { + "epoch": 6.89, + "learning_rate": 2.7054814814814815e-05, + "loss": 1.7455, + "step": 38735 + }, + { + "epoch": 6.89, + "learning_rate": 2.705185185185185e-05, + "loss": 1.7456, + "step": 38740 + }, + { + "epoch": 6.89, + "learning_rate": 2.704888888888889e-05, + "loss": 1.5899, + "step": 38745 + }, + { + "epoch": 6.89, + "learning_rate": 2.7045925925925924e-05, + "loss": 1.737, + "step": 38750 + }, + { + "epoch": 6.89, + "learning_rate": 2.7042962962962963e-05, + "loss": 1.5926, + "step": 38755 + }, + { + "epoch": 6.89, + "learning_rate": 2.704e-05, + "loss": 1.7656, + "step": 38760 + }, + { + "epoch": 6.89, + "learning_rate": 2.7037037037037037e-05, + "loss": 1.6769, + "step": 38765 + }, + { + "epoch": 6.89, + "learning_rate": 2.7034074074074073e-05, + "loss": 1.7178, + "step": 38770 + }, + { + "epoch": 6.89, + "learning_rate": 2.703111111111111e-05, + "loss": 1.7439, + "step": 38775 + }, + { + "epoch": 6.89, + "learning_rate": 2.7028148148148147e-05, + "loss": 1.743, + "step": 38780 + }, + { + "epoch": 6.9, + "learning_rate": 2.7025185185185186e-05, + "loss": 1.8063, + "step": 38785 + }, + { + "epoch": 6.9, + "learning_rate": 2.702222222222222e-05, + "loss": 1.6693, + "step": 38790 + }, + { + "epoch": 6.9, + "learning_rate": 2.701925925925926e-05, + "loss": 1.7541, + "step": 38795 + }, + { + "epoch": 6.9, + "learning_rate": 2.7016296296296296e-05, + "loss": 1.5864, + "step": 38800 + }, + { + "epoch": 6.9, + "learning_rate": 2.7013333333333334e-05, + "loss": 1.749, + "step": 38805 + }, + { + "epoch": 6.9, + "learning_rate": 2.701037037037037e-05, + "loss": 1.7416, + "step": 38810 + }, + { + "epoch": 6.9, + "learning_rate": 2.700740740740741e-05, + "loss": 1.6326, + "step": 38815 + }, + { + "epoch": 6.9, + "learning_rate": 2.7004444444444444e-05, + "loss": 1.6893, + "step": 38820 + }, + { + "epoch": 6.9, + "learning_rate": 2.7001481481481483e-05, + "loss": 1.5128, + "step": 38825 + }, + { + "epoch": 6.9, + "learning_rate": 2.699851851851852e-05, + "loss": 1.6727, + "step": 38830 + }, + { + "epoch": 6.9, + "learning_rate": 2.6995555555555557e-05, + "loss": 1.5707, + "step": 38835 + }, + { + "epoch": 6.9, + "learning_rate": 2.6992592592592593e-05, + "loss": 1.737, + "step": 38840 + }, + { + "epoch": 6.91, + "learning_rate": 2.698962962962963e-05, + "loss": 1.5429, + "step": 38845 + }, + { + "epoch": 6.91, + "learning_rate": 2.6986666666666667e-05, + "loss": 1.6957, + "step": 38850 + }, + { + "epoch": 6.91, + "learning_rate": 2.6983703703703706e-05, + "loss": 1.8308, + "step": 38855 + }, + { + "epoch": 6.91, + "learning_rate": 2.698074074074074e-05, + "loss": 1.7329, + "step": 38860 + }, + { + "epoch": 6.91, + "learning_rate": 2.697777777777778e-05, + "loss": 1.6496, + "step": 38865 + }, + { + "epoch": 6.91, + "learning_rate": 2.6974814814814815e-05, + "loss": 1.831, + "step": 38870 + }, + { + "epoch": 6.91, + "learning_rate": 2.6971851851851854e-05, + "loss": 1.6553, + "step": 38875 + }, + { + "epoch": 6.91, + "learning_rate": 2.6968888888888886e-05, + "loss": 1.4619, + "step": 38880 + }, + { + "epoch": 6.91, + "learning_rate": 2.696592592592593e-05, + "loss": 1.7402, + "step": 38885 + }, + { + "epoch": 6.91, + "learning_rate": 2.696296296296296e-05, + "loss": 1.6119, + "step": 38890 + }, + { + "epoch": 6.91, + "learning_rate": 2.6960000000000003e-05, + "loss": 1.7387, + "step": 38895 + }, + { + "epoch": 6.92, + "learning_rate": 2.6957037037037035e-05, + "loss": 1.9125, + "step": 38900 + }, + { + "epoch": 6.92, + "learning_rate": 2.6954074074074077e-05, + "loss": 1.6853, + "step": 38905 + }, + { + "epoch": 6.92, + "learning_rate": 2.695111111111111e-05, + "loss": 1.7305, + "step": 38910 + }, + { + "epoch": 6.92, + "learning_rate": 2.694814814814815e-05, + "loss": 1.7969, + "step": 38915 + }, + { + "epoch": 6.92, + "learning_rate": 2.6945185185185183e-05, + "loss": 1.5919, + "step": 38920 + }, + { + "epoch": 6.92, + "learning_rate": 2.6942222222222226e-05, + "loss": 1.8155, + "step": 38925 + }, + { + "epoch": 6.92, + "learning_rate": 2.6939259259259258e-05, + "loss": 1.7932, + "step": 38930 + }, + { + "epoch": 6.92, + "learning_rate": 2.69362962962963e-05, + "loss": 1.7487, + "step": 38935 + }, + { + "epoch": 6.92, + "learning_rate": 2.6933333333333332e-05, + "loss": 1.6724, + "step": 38940 + }, + { + "epoch": 6.92, + "learning_rate": 2.6930370370370374e-05, + "loss": 1.7682, + "step": 38945 + }, + { + "epoch": 6.92, + "learning_rate": 2.6927407407407406e-05, + "loss": 1.7245, + "step": 38950 + }, + { + "epoch": 6.93, + "learning_rate": 2.6924444444444445e-05, + "loss": 1.6018, + "step": 38955 + }, + { + "epoch": 6.93, + "learning_rate": 2.692148148148148e-05, + "loss": 1.6359, + "step": 38960 + }, + { + "epoch": 6.93, + "learning_rate": 2.691851851851852e-05, + "loss": 1.8212, + "step": 38965 + }, + { + "epoch": 6.93, + "learning_rate": 2.6915555555555555e-05, + "loss": 1.7602, + "step": 38970 + }, + { + "epoch": 6.93, + "learning_rate": 2.6912592592592593e-05, + "loss": 1.6906, + "step": 38975 + }, + { + "epoch": 6.93, + "learning_rate": 2.690962962962963e-05, + "loss": 1.651, + "step": 38980 + }, + { + "epoch": 6.93, + "learning_rate": 2.6906666666666668e-05, + "loss": 1.8139, + "step": 38985 + }, + { + "epoch": 6.93, + "learning_rate": 2.6903703703703703e-05, + "loss": 1.7974, + "step": 38990 + }, + { + "epoch": 6.93, + "learning_rate": 2.6900740740740742e-05, + "loss": 1.7303, + "step": 38995 + }, + { + "epoch": 6.93, + "learning_rate": 2.6897777777777777e-05, + "loss": 1.58, + "step": 39000 + }, + { + "epoch": 6.93, + "learning_rate": 2.6894814814814816e-05, + "loss": 1.7261, + "step": 39005 + }, + { + "epoch": 6.94, + "learning_rate": 2.689185185185185e-05, + "loss": 1.6666, + "step": 39010 + }, + { + "epoch": 6.94, + "learning_rate": 2.688888888888889e-05, + "loss": 1.6752, + "step": 39015 + }, + { + "epoch": 6.94, + "learning_rate": 2.6885925925925926e-05, + "loss": 1.7705, + "step": 39020 + }, + { + "epoch": 6.94, + "learning_rate": 2.6882962962962965e-05, + "loss": 1.5543, + "step": 39025 + }, + { + "epoch": 6.94, + "learning_rate": 2.688e-05, + "loss": 1.8508, + "step": 39030 + }, + { + "epoch": 6.94, + "learning_rate": 2.687703703703704e-05, + "loss": 1.6733, + "step": 39035 + }, + { + "epoch": 6.94, + "learning_rate": 2.6874074074074074e-05, + "loss": 1.7489, + "step": 39040 + }, + { + "epoch": 6.94, + "learning_rate": 2.6871111111111113e-05, + "loss": 1.7324, + "step": 39045 + }, + { + "epoch": 6.94, + "learning_rate": 2.686814814814815e-05, + "loss": 1.704, + "step": 39050 + }, + { + "epoch": 6.94, + "learning_rate": 2.6865185185185187e-05, + "loss": 1.7032, + "step": 39055 + }, + { + "epoch": 6.94, + "learning_rate": 2.6862222222222223e-05, + "loss": 1.732, + "step": 39060 + }, + { + "epoch": 6.94, + "learning_rate": 2.6859259259259262e-05, + "loss": 1.6605, + "step": 39065 + }, + { + "epoch": 6.95, + "learning_rate": 2.6856296296296297e-05, + "loss": 1.674, + "step": 39070 + }, + { + "epoch": 6.95, + "learning_rate": 2.6853333333333336e-05, + "loss": 1.7638, + "step": 39075 + }, + { + "epoch": 6.95, + "learning_rate": 2.685037037037037e-05, + "loss": 1.5656, + "step": 39080 + }, + { + "epoch": 6.95, + "learning_rate": 2.684740740740741e-05, + "loss": 1.6959, + "step": 39085 + }, + { + "epoch": 6.95, + "learning_rate": 2.6844444444444446e-05, + "loss": 1.6939, + "step": 39090 + }, + { + "epoch": 6.95, + "learning_rate": 2.6841481481481484e-05, + "loss": 1.7364, + "step": 39095 + }, + { + "epoch": 6.95, + "learning_rate": 2.683851851851852e-05, + "loss": 1.6231, + "step": 39100 + }, + { + "epoch": 6.95, + "learning_rate": 2.683555555555556e-05, + "loss": 1.8087, + "step": 39105 + }, + { + "epoch": 6.95, + "learning_rate": 2.6832592592592594e-05, + "loss": 1.6183, + "step": 39110 + }, + { + "epoch": 6.95, + "learning_rate": 2.6829629629629633e-05, + "loss": 1.6509, + "step": 39115 + }, + { + "epoch": 6.95, + "learning_rate": 2.6826666666666665e-05, + "loss": 1.6792, + "step": 39120 + }, + { + "epoch": 6.96, + "learning_rate": 2.6823703703703707e-05, + "loss": 1.8754, + "step": 39125 + }, + { + "epoch": 6.96, + "learning_rate": 2.682074074074074e-05, + "loss": 1.6914, + "step": 39130 + }, + { + "epoch": 6.96, + "learning_rate": 2.681777777777778e-05, + "loss": 1.7282, + "step": 39135 + }, + { + "epoch": 6.96, + "learning_rate": 2.6814814814814814e-05, + "loss": 1.7117, + "step": 39140 + }, + { + "epoch": 6.96, + "learning_rate": 2.6811851851851856e-05, + "loss": 1.7073, + "step": 39145 + }, + { + "epoch": 6.96, + "learning_rate": 2.6808888888888888e-05, + "loss": 1.8195, + "step": 39150 + }, + { + "epoch": 6.96, + "learning_rate": 2.680592592592593e-05, + "loss": 1.6012, + "step": 39155 + }, + { + "epoch": 6.96, + "learning_rate": 2.6802962962962962e-05, + "loss": 1.6131, + "step": 39160 + }, + { + "epoch": 6.96, + "learning_rate": 2.6800000000000004e-05, + "loss": 1.7422, + "step": 39165 + }, + { + "epoch": 6.96, + "learning_rate": 2.6797037037037036e-05, + "loss": 1.8097, + "step": 39170 + }, + { + "epoch": 6.96, + "learning_rate": 2.679407407407408e-05, + "loss": 1.7416, + "step": 39175 + }, + { + "epoch": 6.97, + "learning_rate": 2.679111111111111e-05, + "loss": 1.6002, + "step": 39180 + }, + { + "epoch": 6.97, + "learning_rate": 2.678814814814815e-05, + "loss": 1.8867, + "step": 39185 + }, + { + "epoch": 6.97, + "learning_rate": 2.6785185185185185e-05, + "loss": 1.7756, + "step": 39190 + }, + { + "epoch": 6.97, + "learning_rate": 2.6782222222222224e-05, + "loss": 1.8117, + "step": 39195 + }, + { + "epoch": 6.97, + "learning_rate": 2.677925925925926e-05, + "loss": 1.8052, + "step": 39200 + }, + { + "epoch": 6.97, + "learning_rate": 2.6776296296296298e-05, + "loss": 1.7525, + "step": 39205 + }, + { + "epoch": 6.97, + "learning_rate": 2.6773333333333333e-05, + "loss": 1.6986, + "step": 39210 + }, + { + "epoch": 6.97, + "learning_rate": 2.6770370370370372e-05, + "loss": 1.6348, + "step": 39215 + }, + { + "epoch": 6.97, + "learning_rate": 2.6767407407407408e-05, + "loss": 1.7393, + "step": 39220 + }, + { + "epoch": 6.97, + "learning_rate": 2.6764444444444446e-05, + "loss": 1.5454, + "step": 39225 + }, + { + "epoch": 6.97, + "learning_rate": 2.6761481481481482e-05, + "loss": 1.7708, + "step": 39230 + }, + { + "epoch": 6.98, + "learning_rate": 2.675851851851852e-05, + "loss": 1.8216, + "step": 39235 + }, + { + "epoch": 6.98, + "learning_rate": 2.6755555555555556e-05, + "loss": 1.7061, + "step": 39240 + }, + { + "epoch": 6.98, + "learning_rate": 2.6752592592592595e-05, + "loss": 1.6456, + "step": 39245 + }, + { + "epoch": 6.98, + "learning_rate": 2.674962962962963e-05, + "loss": 1.7191, + "step": 39250 + }, + { + "epoch": 6.98, + "learning_rate": 2.674666666666667e-05, + "loss": 1.8653, + "step": 39255 + }, + { + "epoch": 6.98, + "learning_rate": 2.6743703703703705e-05, + "loss": 1.7782, + "step": 39260 + }, + { + "epoch": 6.98, + "learning_rate": 2.6740740740740743e-05, + "loss": 1.6248, + "step": 39265 + }, + { + "epoch": 6.98, + "learning_rate": 2.673777777777778e-05, + "loss": 1.8535, + "step": 39270 + }, + { + "epoch": 6.98, + "learning_rate": 2.6734814814814818e-05, + "loss": 1.6592, + "step": 39275 + }, + { + "epoch": 6.98, + "learning_rate": 2.6731851851851853e-05, + "loss": 1.7687, + "step": 39280 + }, + { + "epoch": 6.98, + "learning_rate": 2.6728888888888892e-05, + "loss": 1.76, + "step": 39285 + }, + { + "epoch": 6.98, + "learning_rate": 2.6725925925925927e-05, + "loss": 1.5495, + "step": 39290 + }, + { + "epoch": 6.99, + "learning_rate": 2.6722962962962966e-05, + "loss": 1.7736, + "step": 39295 + }, + { + "epoch": 6.99, + "learning_rate": 2.672e-05, + "loss": 1.7244, + "step": 39300 + }, + { + "epoch": 6.99, + "learning_rate": 2.671703703703704e-05, + "loss": 1.8789, + "step": 39305 + }, + { + "epoch": 6.99, + "learning_rate": 2.6714074074074076e-05, + "loss": 1.7802, + "step": 39310 + }, + { + "epoch": 6.99, + "learning_rate": 2.6711111111111115e-05, + "loss": 1.6449, + "step": 39315 + }, + { + "epoch": 6.99, + "learning_rate": 2.670814814814815e-05, + "loss": 1.76, + "step": 39320 + }, + { + "epoch": 6.99, + "learning_rate": 2.670518518518519e-05, + "loss": 1.8295, + "step": 39325 + }, + { + "epoch": 6.99, + "learning_rate": 2.6702222222222224e-05, + "loss": 1.6972, + "step": 39330 + }, + { + "epoch": 6.99, + "learning_rate": 2.6699259259259263e-05, + "loss": 1.7639, + "step": 39335 + }, + { + "epoch": 6.99, + "learning_rate": 2.66962962962963e-05, + "loss": 1.704, + "step": 39340 + }, + { + "epoch": 6.99, + "learning_rate": 2.6693333333333338e-05, + "loss": 1.6268, + "step": 39345 + }, + { + "epoch": 7.0, + "learning_rate": 2.669037037037037e-05, + "loss": 1.6957, + "step": 39350 + }, + { + "epoch": 7.0, + "learning_rate": 2.6687407407407412e-05, + "loss": 1.6491, + "step": 39355 + }, + { + "epoch": 7.0, + "learning_rate": 2.6684444444444444e-05, + "loss": 1.7614, + "step": 39360 + }, + { + "epoch": 7.0, + "learning_rate": 2.6681481481481486e-05, + "loss": 1.7203, + "step": 39365 + }, + { + "epoch": 7.0, + "learning_rate": 2.6678518518518518e-05, + "loss": 1.7488, + "step": 39370 + }, + { + "epoch": 7.0, + "learning_rate": 2.667555555555556e-05, + "loss": 1.7527, + "step": 39375 + }, + { + "epoch": 7.0, + "learning_rate": 2.6672592592592592e-05, + "loss": 1.5614, + "step": 39380 + }, + { + "epoch": 7.0, + "learning_rate": 2.6669629629629635e-05, + "loss": 1.6325, + "step": 39385 + }, + { + "epoch": 7.0, + "learning_rate": 2.6666666666666667e-05, + "loss": 1.5437, + "step": 39390 + }, + { + "epoch": 7.0, + "learning_rate": 2.666370370370371e-05, + "loss": 1.6232, + "step": 39395 + }, + { + "epoch": 7.0, + "learning_rate": 2.666074074074074e-05, + "loss": 1.662, + "step": 39400 + }, + { + "epoch": 7.01, + "learning_rate": 2.6657777777777783e-05, + "loss": 1.6137, + "step": 39405 + }, + { + "epoch": 7.01, + "learning_rate": 2.6654814814814815e-05, + "loss": 1.544, + "step": 39410 + }, + { + "epoch": 7.01, + "learning_rate": 2.6651851851851857e-05, + "loss": 1.5205, + "step": 39415 + }, + { + "epoch": 7.01, + "learning_rate": 2.664888888888889e-05, + "loss": 1.5082, + "step": 39420 + }, + { + "epoch": 7.01, + "learning_rate": 2.6645925925925928e-05, + "loss": 1.5779, + "step": 39425 + }, + { + "epoch": 7.01, + "learning_rate": 2.6642962962962964e-05, + "loss": 1.6413, + "step": 39430 + }, + { + "epoch": 7.01, + "learning_rate": 2.6640000000000002e-05, + "loss": 1.7245, + "step": 39435 + }, + { + "epoch": 7.01, + "learning_rate": 2.6637037037037038e-05, + "loss": 1.6349, + "step": 39440 + }, + { + "epoch": 7.01, + "learning_rate": 2.6634074074074077e-05, + "loss": 1.6914, + "step": 39445 + }, + { + "epoch": 7.01, + "learning_rate": 2.6631111111111112e-05, + "loss": 1.6252, + "step": 39450 + }, + { + "epoch": 7.01, + "learning_rate": 2.662814814814815e-05, + "loss": 1.6854, + "step": 39455 + }, + { + "epoch": 7.02, + "learning_rate": 2.6625185185185186e-05, + "loss": 1.6044, + "step": 39460 + }, + { + "epoch": 7.02, + "learning_rate": 2.6622222222222225e-05, + "loss": 1.6407, + "step": 39465 + }, + { + "epoch": 7.02, + "learning_rate": 2.661925925925926e-05, + "loss": 1.4787, + "step": 39470 + }, + { + "epoch": 7.02, + "learning_rate": 2.66162962962963e-05, + "loss": 1.6658, + "step": 39475 + }, + { + "epoch": 7.02, + "learning_rate": 2.6613333333333335e-05, + "loss": 1.6632, + "step": 39480 + }, + { + "epoch": 7.02, + "learning_rate": 2.6610370370370374e-05, + "loss": 1.5911, + "step": 39485 + }, + { + "epoch": 7.02, + "learning_rate": 2.660740740740741e-05, + "loss": 1.5671, + "step": 39490 + }, + { + "epoch": 7.02, + "learning_rate": 2.6604444444444445e-05, + "loss": 1.7011, + "step": 39495 + }, + { + "epoch": 7.02, + "learning_rate": 2.6601481481481483e-05, + "loss": 1.6292, + "step": 39500 + }, + { + "epoch": 7.02, + "learning_rate": 2.6598518518518515e-05, + "loss": 1.6105, + "step": 39505 + }, + { + "epoch": 7.02, + "learning_rate": 2.6595555555555558e-05, + "loss": 1.6266, + "step": 39510 + }, + { + "epoch": 7.02, + "learning_rate": 2.659259259259259e-05, + "loss": 1.5613, + "step": 39515 + }, + { + "epoch": 7.03, + "learning_rate": 2.6589629629629632e-05, + "loss": 1.636, + "step": 39520 + }, + { + "epoch": 7.03, + "learning_rate": 2.6586666666666664e-05, + "loss": 1.5723, + "step": 39525 + }, + { + "epoch": 7.03, + "learning_rate": 2.6583703703703706e-05, + "loss": 1.7308, + "step": 39530 + }, + { + "epoch": 7.03, + "learning_rate": 2.6580740740740738e-05, + "loss": 1.6096, + "step": 39535 + }, + { + "epoch": 7.03, + "learning_rate": 2.657777777777778e-05, + "loss": 1.6317, + "step": 39540 + }, + { + "epoch": 7.03, + "learning_rate": 2.6574814814814812e-05, + "loss": 1.4433, + "step": 39545 + }, + { + "epoch": 7.03, + "learning_rate": 2.6571851851851855e-05, + "loss": 1.6213, + "step": 39550 + }, + { + "epoch": 7.03, + "learning_rate": 2.6568888888888887e-05, + "loss": 1.6322, + "step": 39555 + }, + { + "epoch": 7.03, + "learning_rate": 2.656592592592593e-05, + "loss": 1.5699, + "step": 39560 + }, + { + "epoch": 7.03, + "learning_rate": 2.656296296296296e-05, + "loss": 1.5164, + "step": 39565 + }, + { + "epoch": 7.03, + "learning_rate": 2.6560000000000003e-05, + "loss": 1.58, + "step": 39570 + }, + { + "epoch": 7.04, + "learning_rate": 2.6557037037037035e-05, + "loss": 1.6357, + "step": 39575 + }, + { + "epoch": 7.04, + "learning_rate": 2.6554074074074074e-05, + "loss": 1.6281, + "step": 39580 + }, + { + "epoch": 7.04, + "learning_rate": 2.655111111111111e-05, + "loss": 1.6633, + "step": 39585 + }, + { + "epoch": 7.04, + "learning_rate": 2.654814814814815e-05, + "loss": 1.7541, + "step": 39590 + }, + { + "epoch": 7.04, + "learning_rate": 2.6545185185185184e-05, + "loss": 1.7321, + "step": 39595 + }, + { + "epoch": 7.04, + "learning_rate": 2.6542222222222223e-05, + "loss": 1.6777, + "step": 39600 + }, + { + "epoch": 7.04, + "learning_rate": 2.6539259259259258e-05, + "loss": 1.5609, + "step": 39605 + }, + { + "epoch": 7.04, + "learning_rate": 2.6536296296296297e-05, + "loss": 1.5672, + "step": 39610 + }, + { + "epoch": 7.04, + "learning_rate": 2.6533333333333332e-05, + "loss": 1.574, + "step": 39615 + }, + { + "epoch": 7.04, + "learning_rate": 2.653037037037037e-05, + "loss": 1.5763, + "step": 39620 + }, + { + "epoch": 7.04, + "learning_rate": 2.6527407407407407e-05, + "loss": 1.63, + "step": 39625 + }, + { + "epoch": 7.05, + "learning_rate": 2.6524444444444445e-05, + "loss": 1.6669, + "step": 39630 + }, + { + "epoch": 7.05, + "learning_rate": 2.652148148148148e-05, + "loss": 1.6175, + "step": 39635 + }, + { + "epoch": 7.05, + "learning_rate": 2.651851851851852e-05, + "loss": 1.6702, + "step": 39640 + }, + { + "epoch": 7.05, + "learning_rate": 2.6515555555555555e-05, + "loss": 1.5274, + "step": 39645 + }, + { + "epoch": 7.05, + "learning_rate": 2.6512592592592594e-05, + "loss": 1.6793, + "step": 39650 + }, + { + "epoch": 7.05, + "learning_rate": 2.650962962962963e-05, + "loss": 1.6243, + "step": 39655 + }, + { + "epoch": 7.05, + "learning_rate": 2.6506666666666668e-05, + "loss": 1.5327, + "step": 39660 + }, + { + "epoch": 7.05, + "learning_rate": 2.6503703703703704e-05, + "loss": 1.6488, + "step": 39665 + }, + { + "epoch": 7.05, + "learning_rate": 2.6500740740740742e-05, + "loss": 1.549, + "step": 39670 + }, + { + "epoch": 7.05, + "learning_rate": 2.6497777777777778e-05, + "loss": 1.6504, + "step": 39675 + }, + { + "epoch": 7.05, + "learning_rate": 2.6494814814814817e-05, + "loss": 1.4957, + "step": 39680 + }, + { + "epoch": 7.06, + "learning_rate": 2.6491851851851852e-05, + "loss": 1.5378, + "step": 39685 + }, + { + "epoch": 7.06, + "learning_rate": 2.648888888888889e-05, + "loss": 1.625, + "step": 39690 + }, + { + "epoch": 7.06, + "learning_rate": 2.6485925925925926e-05, + "loss": 1.7486, + "step": 39695 + }, + { + "epoch": 7.06, + "learning_rate": 2.6482962962962965e-05, + "loss": 1.5924, + "step": 39700 + }, + { + "epoch": 7.06, + "learning_rate": 2.648e-05, + "loss": 1.5741, + "step": 39705 + }, + { + "epoch": 7.06, + "learning_rate": 2.647703703703704e-05, + "loss": 1.5325, + "step": 39710 + }, + { + "epoch": 7.06, + "learning_rate": 2.6474074074074075e-05, + "loss": 1.6584, + "step": 39715 + }, + { + "epoch": 7.06, + "learning_rate": 2.6471111111111114e-05, + "loss": 1.5532, + "step": 39720 + }, + { + "epoch": 7.06, + "learning_rate": 2.646814814814815e-05, + "loss": 1.6679, + "step": 39725 + }, + { + "epoch": 7.06, + "learning_rate": 2.6465185185185188e-05, + "loss": 1.5596, + "step": 39730 + }, + { + "epoch": 7.06, + "learning_rate": 2.646222222222222e-05, + "loss": 1.6577, + "step": 39735 + }, + { + "epoch": 7.06, + "learning_rate": 2.6459259259259262e-05, + "loss": 1.5754, + "step": 39740 + }, + { + "epoch": 7.07, + "learning_rate": 2.6456296296296294e-05, + "loss": 1.4986, + "step": 39745 + }, + { + "epoch": 7.07, + "learning_rate": 2.6453333333333336e-05, + "loss": 1.6387, + "step": 39750 + }, + { + "epoch": 7.07, + "learning_rate": 2.645037037037037e-05, + "loss": 1.6718, + "step": 39755 + }, + { + "epoch": 7.07, + "learning_rate": 2.644740740740741e-05, + "loss": 1.5717, + "step": 39760 + }, + { + "epoch": 7.07, + "learning_rate": 2.6444444444444443e-05, + "loss": 1.5841, + "step": 39765 + }, + { + "epoch": 7.07, + "learning_rate": 2.6441481481481485e-05, + "loss": 1.7476, + "step": 39770 + }, + { + "epoch": 7.07, + "learning_rate": 2.6438518518518517e-05, + "loss": 1.545, + "step": 39775 + }, + { + "epoch": 7.07, + "learning_rate": 2.643555555555556e-05, + "loss": 1.6577, + "step": 39780 + }, + { + "epoch": 7.07, + "learning_rate": 2.643259259259259e-05, + "loss": 1.6188, + "step": 39785 + }, + { + "epoch": 7.07, + "learning_rate": 2.6429629629629633e-05, + "loss": 1.6151, + "step": 39790 + }, + { + "epoch": 7.07, + "learning_rate": 2.6426666666666665e-05, + "loss": 1.5332, + "step": 39795 + }, + { + "epoch": 7.08, + "learning_rate": 2.6423703703703708e-05, + "loss": 1.5699, + "step": 39800 + }, + { + "epoch": 7.08, + "learning_rate": 2.642074074074074e-05, + "loss": 1.6497, + "step": 39805 + }, + { + "epoch": 7.08, + "learning_rate": 2.641777777777778e-05, + "loss": 1.582, + "step": 39810 + }, + { + "epoch": 7.08, + "learning_rate": 2.6414814814814814e-05, + "loss": 1.6528, + "step": 39815 + }, + { + "epoch": 7.08, + "learning_rate": 2.6411851851851853e-05, + "loss": 1.6658, + "step": 39820 + }, + { + "epoch": 7.08, + "learning_rate": 2.6408888888888888e-05, + "loss": 1.6631, + "step": 39825 + }, + { + "epoch": 7.08, + "learning_rate": 2.6405925925925927e-05, + "loss": 1.6084, + "step": 39830 + }, + { + "epoch": 7.08, + "learning_rate": 2.6402962962962963e-05, + "loss": 1.6147, + "step": 39835 + }, + { + "epoch": 7.08, + "learning_rate": 2.64e-05, + "loss": 1.555, + "step": 39840 + }, + { + "epoch": 7.08, + "learning_rate": 2.6397037037037037e-05, + "loss": 1.5685, + "step": 39845 + }, + { + "epoch": 7.08, + "learning_rate": 2.6394074074074076e-05, + "loss": 1.6103, + "step": 39850 + }, + { + "epoch": 7.09, + "learning_rate": 2.639111111111111e-05, + "loss": 1.5714, + "step": 39855 + }, + { + "epoch": 7.09, + "learning_rate": 2.638814814814815e-05, + "loss": 1.725, + "step": 39860 + }, + { + "epoch": 7.09, + "learning_rate": 2.6385185185185185e-05, + "loss": 1.5902, + "step": 39865 + }, + { + "epoch": 7.09, + "learning_rate": 2.6382222222222224e-05, + "loss": 1.6609, + "step": 39870 + }, + { + "epoch": 7.09, + "learning_rate": 2.637925925925926e-05, + "loss": 1.6478, + "step": 39875 + }, + { + "epoch": 7.09, + "learning_rate": 2.63762962962963e-05, + "loss": 1.6393, + "step": 39880 + }, + { + "epoch": 7.09, + "learning_rate": 2.6373333333333334e-05, + "loss": 1.5522, + "step": 39885 + }, + { + "epoch": 7.09, + "learning_rate": 2.6370370370370373e-05, + "loss": 1.471, + "step": 39890 + }, + { + "epoch": 7.09, + "learning_rate": 2.6367407407407408e-05, + "loss": 1.6275, + "step": 39895 + }, + { + "epoch": 7.09, + "learning_rate": 2.6364444444444447e-05, + "loss": 1.6529, + "step": 39900 + }, + { + "epoch": 7.09, + "learning_rate": 2.6361481481481482e-05, + "loss": 1.6992, + "step": 39905 + }, + { + "epoch": 7.1, + "learning_rate": 2.635851851851852e-05, + "loss": 1.6153, + "step": 39910 + }, + { + "epoch": 7.1, + "learning_rate": 2.6355555555555557e-05, + "loss": 1.5969, + "step": 39915 + }, + { + "epoch": 7.1, + "learning_rate": 2.6352592592592595e-05, + "loss": 1.6598, + "step": 39920 + }, + { + "epoch": 7.1, + "learning_rate": 2.634962962962963e-05, + "loss": 1.5874, + "step": 39925 + }, + { + "epoch": 7.1, + "learning_rate": 2.634666666666667e-05, + "loss": 1.5792, + "step": 39930 + }, + { + "epoch": 7.1, + "learning_rate": 2.6343703703703705e-05, + "loss": 1.5977, + "step": 39935 + }, + { + "epoch": 7.1, + "learning_rate": 2.6340740740740744e-05, + "loss": 1.6166, + "step": 39940 + }, + { + "epoch": 7.1, + "learning_rate": 2.633777777777778e-05, + "loss": 1.6723, + "step": 39945 + }, + { + "epoch": 7.1, + "learning_rate": 2.6334814814814818e-05, + "loss": 1.72, + "step": 39950 + }, + { + "epoch": 7.1, + "learning_rate": 2.6331851851851854e-05, + "loss": 1.6055, + "step": 39955 + }, + { + "epoch": 7.1, + "learning_rate": 2.6328888888888892e-05, + "loss": 1.4739, + "step": 39960 + }, + { + "epoch": 7.1, + "learning_rate": 2.6325925925925924e-05, + "loss": 1.6605, + "step": 39965 + }, + { + "epoch": 7.11, + "learning_rate": 2.6322962962962967e-05, + "loss": 1.5691, + "step": 39970 + }, + { + "epoch": 7.11, + "learning_rate": 2.632e-05, + "loss": 1.6001, + "step": 39975 + }, + { + "epoch": 7.11, + "learning_rate": 2.631703703703704e-05, + "loss": 1.5721, + "step": 39980 + }, + { + "epoch": 7.11, + "learning_rate": 2.6314074074074073e-05, + "loss": 1.6088, + "step": 39985 + }, + { + "epoch": 7.11, + "learning_rate": 2.6311111111111115e-05, + "loss": 1.7683, + "step": 39990 + }, + { + "epoch": 7.11, + "learning_rate": 2.6308148148148147e-05, + "loss": 1.7085, + "step": 39995 + }, + { + "epoch": 7.11, + "learning_rate": 2.630518518518519e-05, + "loss": 1.7289, + "step": 40000 + }, + { + "epoch": 7.11, + "eval_loss": 1.5409154891967773, + "eval_rouge2_fmeasure": 0.195, + "eval_rouge2_precision": 0.2256, + "eval_rouge2_recall": 0.1809, + "eval_runtime": 38351.4186, + "eval_samples_per_second": 0.13, + "eval_steps_per_second": 0.065, + "step": 40000 + }, + { + "epoch": 7.11, + "learning_rate": 2.630222222222222e-05, + "loss": 1.6161, + "step": 40005 + }, + { + "epoch": 7.11, + "learning_rate": 2.6299259259259264e-05, + "loss": 1.5507, + "step": 40010 + }, + { + "epoch": 7.11, + "learning_rate": 2.6296296296296296e-05, + "loss": 1.7311, + "step": 40015 + }, + { + "epoch": 7.11, + "learning_rate": 2.6293333333333338e-05, + "loss": 1.5518, + "step": 40020 + }, + { + "epoch": 7.12, + "learning_rate": 2.629037037037037e-05, + "loss": 1.507, + "step": 40025 + }, + { + "epoch": 7.12, + "learning_rate": 2.6287407407407412e-05, + "loss": 1.5933, + "step": 40030 + }, + { + "epoch": 7.12, + "learning_rate": 2.6284444444444444e-05, + "loss": 1.5868, + "step": 40035 + }, + { + "epoch": 7.12, + "learning_rate": 2.6281481481481483e-05, + "loss": 1.5985, + "step": 40040 + }, + { + "epoch": 7.12, + "learning_rate": 2.627851851851852e-05, + "loss": 1.6734, + "step": 40045 + }, + { + "epoch": 7.12, + "learning_rate": 2.6275555555555557e-05, + "loss": 1.494, + "step": 40050 + }, + { + "epoch": 7.12, + "learning_rate": 2.6272592592592593e-05, + "loss": 1.573, + "step": 40055 + }, + { + "epoch": 7.12, + "learning_rate": 2.626962962962963e-05, + "loss": 1.5441, + "step": 40060 + }, + { + "epoch": 7.12, + "learning_rate": 2.6266666666666667e-05, + "loss": 1.6247, + "step": 40065 + }, + { + "epoch": 7.12, + "learning_rate": 2.6263703703703706e-05, + "loss": 1.6404, + "step": 40070 + }, + { + "epoch": 7.12, + "learning_rate": 2.626074074074074e-05, + "loss": 1.7445, + "step": 40075 + }, + { + "epoch": 7.13, + "learning_rate": 2.625777777777778e-05, + "loss": 1.5704, + "step": 40080 + }, + { + "epoch": 7.13, + "learning_rate": 2.6254814814814816e-05, + "loss": 1.6002, + "step": 40085 + }, + { + "epoch": 7.13, + "learning_rate": 2.6251851851851854e-05, + "loss": 1.6565, + "step": 40090 + }, + { + "epoch": 7.13, + "learning_rate": 2.624888888888889e-05, + "loss": 1.6243, + "step": 40095 + }, + { + "epoch": 7.13, + "learning_rate": 2.624592592592593e-05, + "loss": 1.6308, + "step": 40100 + }, + { + "epoch": 7.13, + "learning_rate": 2.6242962962962964e-05, + "loss": 1.7161, + "step": 40105 + }, + { + "epoch": 7.13, + "learning_rate": 2.6240000000000003e-05, + "loss": 1.5762, + "step": 40110 + }, + { + "epoch": 7.13, + "learning_rate": 2.623703703703704e-05, + "loss": 1.6354, + "step": 40115 + }, + { + "epoch": 7.13, + "learning_rate": 2.6234074074074077e-05, + "loss": 1.7506, + "step": 40120 + }, + { + "epoch": 7.13, + "learning_rate": 2.6231111111111113e-05, + "loss": 1.5823, + "step": 40125 + }, + { + "epoch": 7.13, + "learning_rate": 2.622814814814815e-05, + "loss": 1.5956, + "step": 40130 + }, + { + "epoch": 7.14, + "learning_rate": 2.6225185185185187e-05, + "loss": 1.5283, + "step": 40135 + }, + { + "epoch": 7.14, + "learning_rate": 2.6222222222222226e-05, + "loss": 1.7062, + "step": 40140 + }, + { + "epoch": 7.14, + "learning_rate": 2.621925925925926e-05, + "loss": 1.5892, + "step": 40145 + }, + { + "epoch": 7.14, + "learning_rate": 2.62162962962963e-05, + "loss": 1.5382, + "step": 40150 + }, + { + "epoch": 7.14, + "learning_rate": 2.6213333333333335e-05, + "loss": 1.5796, + "step": 40155 + }, + { + "epoch": 7.14, + "learning_rate": 2.6210370370370374e-05, + "loss": 1.6191, + "step": 40160 + }, + { + "epoch": 7.14, + "learning_rate": 2.620740740740741e-05, + "loss": 1.5465, + "step": 40165 + }, + { + "epoch": 7.14, + "learning_rate": 2.620444444444445e-05, + "loss": 1.5481, + "step": 40170 + }, + { + "epoch": 7.14, + "learning_rate": 2.6201481481481484e-05, + "loss": 1.5265, + "step": 40175 + }, + { + "epoch": 7.14, + "learning_rate": 2.6198518518518523e-05, + "loss": 1.6412, + "step": 40180 + }, + { + "epoch": 7.14, + "learning_rate": 2.6195555555555558e-05, + "loss": 1.5585, + "step": 40185 + }, + { + "epoch": 7.14, + "learning_rate": 2.6192592592592597e-05, + "loss": 1.5731, + "step": 40190 + }, + { + "epoch": 7.15, + "learning_rate": 2.618962962962963e-05, + "loss": 1.6809, + "step": 40195 + }, + { + "epoch": 7.15, + "learning_rate": 2.618666666666667e-05, + "loss": 1.5661, + "step": 40200 + }, + { + "epoch": 7.15, + "learning_rate": 2.6183703703703703e-05, + "loss": 1.6546, + "step": 40205 + }, + { + "epoch": 7.15, + "learning_rate": 2.6180740740740745e-05, + "loss": 1.579, + "step": 40210 + }, + { + "epoch": 7.15, + "learning_rate": 2.6177777777777777e-05, + "loss": 1.6666, + "step": 40215 + }, + { + "epoch": 7.15, + "learning_rate": 2.617481481481482e-05, + "loss": 1.5772, + "step": 40220 + }, + { + "epoch": 7.15, + "learning_rate": 2.6171851851851852e-05, + "loss": 1.7114, + "step": 40225 + }, + { + "epoch": 7.15, + "learning_rate": 2.6168888888888894e-05, + "loss": 1.5842, + "step": 40230 + }, + { + "epoch": 7.15, + "learning_rate": 2.6165925925925926e-05, + "loss": 1.5407, + "step": 40235 + }, + { + "epoch": 7.15, + "learning_rate": 2.6162962962962968e-05, + "loss": 1.778, + "step": 40240 + }, + { + "epoch": 7.15, + "learning_rate": 2.616e-05, + "loss": 1.5054, + "step": 40245 + }, + { + "epoch": 7.16, + "learning_rate": 2.6157037037037042e-05, + "loss": 1.6951, + "step": 40250 + }, + { + "epoch": 7.16, + "learning_rate": 2.6154074074074075e-05, + "loss": 1.6027, + "step": 40255 + }, + { + "epoch": 7.16, + "learning_rate": 2.6151111111111117e-05, + "loss": 1.6308, + "step": 40260 + }, + { + "epoch": 7.16, + "learning_rate": 2.614814814814815e-05, + "loss": 1.7071, + "step": 40265 + }, + { + "epoch": 7.16, + "learning_rate": 2.6145185185185184e-05, + "loss": 1.5611, + "step": 40270 + }, + { + "epoch": 7.16, + "learning_rate": 2.6142222222222223e-05, + "loss": 1.5572, + "step": 40275 + }, + { + "epoch": 7.16, + "learning_rate": 2.613925925925926e-05, + "loss": 1.6683, + "step": 40280 + }, + { + "epoch": 7.16, + "learning_rate": 2.6136296296296297e-05, + "loss": 1.5243, + "step": 40285 + }, + { + "epoch": 7.16, + "learning_rate": 2.6133333333333333e-05, + "loss": 1.6808, + "step": 40290 + }, + { + "epoch": 7.16, + "learning_rate": 2.613037037037037e-05, + "loss": 1.7563, + "step": 40295 + }, + { + "epoch": 7.16, + "learning_rate": 2.6127407407407407e-05, + "loss": 1.5166, + "step": 40300 + }, + { + "epoch": 7.17, + "learning_rate": 2.6124444444444446e-05, + "loss": 1.5997, + "step": 40305 + }, + { + "epoch": 7.17, + "learning_rate": 2.612148148148148e-05, + "loss": 1.541, + "step": 40310 + }, + { + "epoch": 7.17, + "learning_rate": 2.611851851851852e-05, + "loss": 1.5923, + "step": 40315 + }, + { + "epoch": 7.17, + "learning_rate": 2.6115555555555555e-05, + "loss": 1.6752, + "step": 40320 + }, + { + "epoch": 7.17, + "learning_rate": 2.6112592592592594e-05, + "loss": 1.5918, + "step": 40325 + }, + { + "epoch": 7.17, + "learning_rate": 2.610962962962963e-05, + "loss": 1.5369, + "step": 40330 + }, + { + "epoch": 7.17, + "learning_rate": 2.610666666666667e-05, + "loss": 1.7484, + "step": 40335 + }, + { + "epoch": 7.17, + "learning_rate": 2.6103703703703704e-05, + "loss": 1.6152, + "step": 40340 + }, + { + "epoch": 7.17, + "learning_rate": 2.6100740740740743e-05, + "loss": 1.5989, + "step": 40345 + }, + { + "epoch": 7.17, + "learning_rate": 2.6097777777777775e-05, + "loss": 1.5734, + "step": 40350 + }, + { + "epoch": 7.17, + "learning_rate": 2.6094814814814817e-05, + "loss": 1.6829, + "step": 40355 + }, + { + "epoch": 7.18, + "learning_rate": 2.609185185185185e-05, + "loss": 1.6296, + "step": 40360 + }, + { + "epoch": 7.18, + "learning_rate": 2.608888888888889e-05, + "loss": 1.6255, + "step": 40365 + }, + { + "epoch": 7.18, + "learning_rate": 2.6085925925925923e-05, + "loss": 1.8139, + "step": 40370 + }, + { + "epoch": 7.18, + "learning_rate": 2.6082962962962966e-05, + "loss": 1.5585, + "step": 40375 + }, + { + "epoch": 7.18, + "learning_rate": 2.6079999999999998e-05, + "loss": 1.5572, + "step": 40380 + }, + { + "epoch": 7.18, + "learning_rate": 2.607703703703704e-05, + "loss": 1.553, + "step": 40385 + }, + { + "epoch": 7.18, + "learning_rate": 2.6074074074074072e-05, + "loss": 1.5755, + "step": 40390 + }, + { + "epoch": 7.18, + "learning_rate": 2.6071111111111114e-05, + "loss": 1.6307, + "step": 40395 + }, + { + "epoch": 7.18, + "learning_rate": 2.6068148148148146e-05, + "loss": 1.5193, + "step": 40400 + }, + { + "epoch": 7.18, + "learning_rate": 2.606518518518519e-05, + "loss": 1.6372, + "step": 40405 + }, + { + "epoch": 7.18, + "learning_rate": 2.606222222222222e-05, + "loss": 1.5971, + "step": 40410 + }, + { + "epoch": 7.18, + "learning_rate": 2.6059259259259263e-05, + "loss": 1.7176, + "step": 40415 + }, + { + "epoch": 7.19, + "learning_rate": 2.6056296296296295e-05, + "loss": 1.564, + "step": 40420 + }, + { + "epoch": 7.19, + "learning_rate": 2.6053333333333333e-05, + "loss": 1.6633, + "step": 40425 + }, + { + "epoch": 7.19, + "learning_rate": 2.6050962962962967e-05, + "loss": 1.6445, + "step": 40430 + }, + { + "epoch": 7.19, + "learning_rate": 2.6048e-05, + "loss": 1.6395, + "step": 40435 + }, + { + "epoch": 7.19, + "learning_rate": 2.604503703703704e-05, + "loss": 1.4316, + "step": 40440 + }, + { + "epoch": 7.19, + "learning_rate": 2.6042074074074073e-05, + "loss": 1.6773, + "step": 40445 + }, + { + "epoch": 7.19, + "learning_rate": 2.6039111111111115e-05, + "loss": 1.7636, + "step": 40450 + }, + { + "epoch": 7.19, + "learning_rate": 2.6036148148148147e-05, + "loss": 1.6464, + "step": 40455 + }, + { + "epoch": 7.19, + "learning_rate": 2.603318518518519e-05, + "loss": 1.6541, + "step": 40460 + }, + { + "epoch": 7.19, + "learning_rate": 2.603022222222222e-05, + "loss": 1.6546, + "step": 40465 + }, + { + "epoch": 7.19, + "learning_rate": 2.6027259259259264e-05, + "loss": 1.6637, + "step": 40470 + }, + { + "epoch": 7.2, + "learning_rate": 2.6024296296296296e-05, + "loss": 1.6088, + "step": 40475 + }, + { + "epoch": 7.2, + "learning_rate": 2.6021333333333338e-05, + "loss": 1.5192, + "step": 40480 + }, + { + "epoch": 7.2, + "learning_rate": 2.601837037037037e-05, + "loss": 1.6664, + "step": 40485 + }, + { + "epoch": 7.2, + "learning_rate": 2.6016000000000003e-05, + "loss": 1.5942, + "step": 40490 + }, + { + "epoch": 7.2, + "learning_rate": 2.6013037037037035e-05, + "loss": 1.5652, + "step": 40495 + }, + { + "epoch": 7.2, + "learning_rate": 2.6010074074074077e-05, + "loss": 1.5669, + "step": 40500 + }, + { + "epoch": 7.2, + "learning_rate": 2.600711111111111e-05, + "loss": 1.6862, + "step": 40505 + }, + { + "epoch": 7.2, + "learning_rate": 2.6004148148148148e-05, + "loss": 1.6841, + "step": 40510 + }, + { + "epoch": 7.2, + "learning_rate": 2.6001185185185184e-05, + "loss": 1.6149, + "step": 40515 + }, + { + "epoch": 7.2, + "learning_rate": 2.5998222222222222e-05, + "loss": 1.7018, + "step": 40520 + }, + { + "epoch": 7.2, + "learning_rate": 2.5995259259259258e-05, + "loss": 1.6876, + "step": 40525 + }, + { + "epoch": 7.21, + "learning_rate": 2.5992296296296297e-05, + "loss": 1.717, + "step": 40530 + }, + { + "epoch": 7.21, + "learning_rate": 2.5989333333333332e-05, + "loss": 1.6344, + "step": 40535 + }, + { + "epoch": 7.21, + "learning_rate": 2.598637037037037e-05, + "loss": 1.7217, + "step": 40540 + }, + { + "epoch": 7.21, + "learning_rate": 2.5983407407407406e-05, + "loss": 1.6277, + "step": 40545 + }, + { + "epoch": 7.21, + "learning_rate": 2.5980444444444445e-05, + "loss": 1.6149, + "step": 40550 + }, + { + "epoch": 7.21, + "learning_rate": 2.597748148148148e-05, + "loss": 1.6715, + "step": 40555 + }, + { + "epoch": 7.21, + "learning_rate": 2.597451851851852e-05, + "loss": 1.6932, + "step": 40560 + }, + { + "epoch": 7.21, + "learning_rate": 2.5971555555555555e-05, + "loss": 1.6322, + "step": 40565 + }, + { + "epoch": 7.21, + "learning_rate": 2.5968592592592594e-05, + "loss": 1.5955, + "step": 40570 + }, + { + "epoch": 7.21, + "learning_rate": 2.596562962962963e-05, + "loss": 1.6561, + "step": 40575 + }, + { + "epoch": 7.21, + "learning_rate": 2.5962666666666668e-05, + "loss": 1.5022, + "step": 40580 + }, + { + "epoch": 7.22, + "learning_rate": 2.5959703703703703e-05, + "loss": 1.5576, + "step": 40585 + }, + { + "epoch": 7.22, + "learning_rate": 2.5956740740740742e-05, + "loss": 1.6379, + "step": 40590 + }, + { + "epoch": 7.22, + "learning_rate": 2.5953777777777778e-05, + "loss": 1.7231, + "step": 40595 + }, + { + "epoch": 7.22, + "learning_rate": 2.5950814814814816e-05, + "loss": 1.4801, + "step": 40600 + }, + { + "epoch": 7.22, + "learning_rate": 2.5947851851851852e-05, + "loss": 1.601, + "step": 40605 + }, + { + "epoch": 7.22, + "learning_rate": 2.594488888888889e-05, + "loss": 1.6112, + "step": 40610 + }, + { + "epoch": 7.22, + "learning_rate": 2.5941925925925926e-05, + "loss": 1.6433, + "step": 40615 + }, + { + "epoch": 7.22, + "learning_rate": 2.5938962962962965e-05, + "loss": 1.6634, + "step": 40620 + }, + { + "epoch": 7.22, + "learning_rate": 2.5936e-05, + "loss": 1.639, + "step": 40625 + }, + { + "epoch": 7.22, + "learning_rate": 2.593303703703704e-05, + "loss": 1.5013, + "step": 40630 + }, + { + "epoch": 7.22, + "learning_rate": 2.5930074074074075e-05, + "loss": 1.6138, + "step": 40635 + }, + { + "epoch": 7.22, + "learning_rate": 2.5927111111111113e-05, + "loss": 1.7167, + "step": 40640 + }, + { + "epoch": 7.23, + "learning_rate": 2.592414814814815e-05, + "loss": 1.7774, + "step": 40645 + }, + { + "epoch": 7.23, + "learning_rate": 2.5921185185185188e-05, + "loss": 1.5659, + "step": 40650 + }, + { + "epoch": 7.23, + "learning_rate": 2.5918222222222223e-05, + "loss": 1.7521, + "step": 40655 + }, + { + "epoch": 7.23, + "learning_rate": 2.5915259259259262e-05, + "loss": 1.6002, + "step": 40660 + }, + { + "epoch": 7.23, + "learning_rate": 2.5912296296296294e-05, + "loss": 1.7577, + "step": 40665 + }, + { + "epoch": 7.23, + "learning_rate": 2.5909333333333336e-05, + "loss": 1.5114, + "step": 40670 + }, + { + "epoch": 7.23, + "learning_rate": 2.5906370370370368e-05, + "loss": 1.6489, + "step": 40675 + }, + { + "epoch": 7.23, + "learning_rate": 2.590340740740741e-05, + "loss": 1.6646, + "step": 40680 + }, + { + "epoch": 7.23, + "learning_rate": 2.5900444444444443e-05, + "loss": 1.6403, + "step": 40685 + }, + { + "epoch": 7.23, + "learning_rate": 2.5897481481481485e-05, + "loss": 1.5136, + "step": 40690 + }, + { + "epoch": 7.23, + "learning_rate": 2.5894518518518517e-05, + "loss": 1.6413, + "step": 40695 + }, + { + "epoch": 7.24, + "learning_rate": 2.589155555555556e-05, + "loss": 1.5207, + "step": 40700 + }, + { + "epoch": 7.24, + "learning_rate": 2.588859259259259e-05, + "loss": 1.6235, + "step": 40705 + }, + { + "epoch": 7.24, + "learning_rate": 2.5885629629629633e-05, + "loss": 1.6365, + "step": 40710 + }, + { + "epoch": 7.24, + "learning_rate": 2.5882666666666665e-05, + "loss": 1.536, + "step": 40715 + }, + { + "epoch": 7.24, + "learning_rate": 2.5879703703703707e-05, + "loss": 1.6377, + "step": 40720 + }, + { + "epoch": 7.24, + "learning_rate": 2.587674074074074e-05, + "loss": 1.5948, + "step": 40725 + }, + { + "epoch": 7.24, + "learning_rate": 2.5873777777777782e-05, + "loss": 1.6323, + "step": 40730 + }, + { + "epoch": 7.24, + "learning_rate": 2.5870814814814814e-05, + "loss": 1.6256, + "step": 40735 + }, + { + "epoch": 7.24, + "learning_rate": 2.5867851851851853e-05, + "loss": 1.5577, + "step": 40740 + }, + { + "epoch": 7.24, + "learning_rate": 2.5864888888888888e-05, + "loss": 1.7172, + "step": 40745 + }, + { + "epoch": 7.24, + "learning_rate": 2.5861925925925927e-05, + "loss": 1.5852, + "step": 40750 + }, + { + "epoch": 7.25, + "learning_rate": 2.5858962962962962e-05, + "loss": 1.5515, + "step": 40755 + }, + { + "epoch": 7.25, + "learning_rate": 2.5856e-05, + "loss": 1.6314, + "step": 40760 + }, + { + "epoch": 7.25, + "learning_rate": 2.5853037037037037e-05, + "loss": 1.5578, + "step": 40765 + }, + { + "epoch": 7.25, + "learning_rate": 2.5850074074074075e-05, + "loss": 1.5651, + "step": 40770 + }, + { + "epoch": 7.25, + "learning_rate": 2.584711111111111e-05, + "loss": 1.6884, + "step": 40775 + }, + { + "epoch": 7.25, + "learning_rate": 2.584414814814815e-05, + "loss": 1.6532, + "step": 40780 + }, + { + "epoch": 7.25, + "learning_rate": 2.5841185185185185e-05, + "loss": 1.6346, + "step": 40785 + }, + { + "epoch": 7.25, + "learning_rate": 2.5838222222222224e-05, + "loss": 1.6402, + "step": 40790 + }, + { + "epoch": 7.25, + "learning_rate": 2.583525925925926e-05, + "loss": 1.7316, + "step": 40795 + }, + { + "epoch": 7.25, + "learning_rate": 2.5832296296296298e-05, + "loss": 1.6149, + "step": 40800 + }, + { + "epoch": 7.25, + "learning_rate": 2.5829333333333334e-05, + "loss": 1.6287, + "step": 40805 + }, + { + "epoch": 7.26, + "learning_rate": 2.5826370370370372e-05, + "loss": 1.651, + "step": 40810 + }, + { + "epoch": 7.26, + "learning_rate": 2.5823407407407408e-05, + "loss": 1.5266, + "step": 40815 + }, + { + "epoch": 7.26, + "learning_rate": 2.5820444444444447e-05, + "loss": 1.5881, + "step": 40820 + }, + { + "epoch": 7.26, + "learning_rate": 2.5817481481481482e-05, + "loss": 1.6744, + "step": 40825 + }, + { + "epoch": 7.26, + "learning_rate": 2.581451851851852e-05, + "loss": 1.6872, + "step": 40830 + }, + { + "epoch": 7.26, + "learning_rate": 2.5811555555555556e-05, + "loss": 1.5689, + "step": 40835 + }, + { + "epoch": 7.26, + "learning_rate": 2.5808592592592595e-05, + "loss": 1.5866, + "step": 40840 + }, + { + "epoch": 7.26, + "learning_rate": 2.580562962962963e-05, + "loss": 1.6319, + "step": 40845 + }, + { + "epoch": 7.26, + "learning_rate": 2.580266666666667e-05, + "loss": 1.6249, + "step": 40850 + }, + { + "epoch": 7.26, + "learning_rate": 2.5799703703703705e-05, + "loss": 1.5755, + "step": 40855 + }, + { + "epoch": 7.26, + "learning_rate": 2.5796740740740744e-05, + "loss": 1.5097, + "step": 40860 + }, + { + "epoch": 7.26, + "learning_rate": 2.579377777777778e-05, + "loss": 1.6023, + "step": 40865 + }, + { + "epoch": 7.27, + "learning_rate": 2.5790814814814818e-05, + "loss": 1.6305, + "step": 40870 + }, + { + "epoch": 7.27, + "learning_rate": 2.5787851851851853e-05, + "loss": 1.6617, + "step": 40875 + }, + { + "epoch": 7.27, + "learning_rate": 2.5784888888888892e-05, + "loss": 1.5318, + "step": 40880 + }, + { + "epoch": 7.27, + "learning_rate": 2.5781925925925928e-05, + "loss": 1.703, + "step": 40885 + }, + { + "epoch": 7.27, + "learning_rate": 2.5778962962962966e-05, + "loss": 1.5126, + "step": 40890 + }, + { + "epoch": 7.27, + "learning_rate": 2.5776e-05, + "loss": 1.7034, + "step": 40895 + }, + { + "epoch": 7.27, + "learning_rate": 2.577303703703704e-05, + "loss": 1.6794, + "step": 40900 + }, + { + "epoch": 7.27, + "learning_rate": 2.5770074074074073e-05, + "loss": 1.709, + "step": 40905 + }, + { + "epoch": 7.27, + "learning_rate": 2.5767111111111115e-05, + "loss": 1.5878, + "step": 40910 + }, + { + "epoch": 7.27, + "learning_rate": 2.5764148148148147e-05, + "loss": 1.6028, + "step": 40915 + }, + { + "epoch": 7.27, + "learning_rate": 2.576118518518519e-05, + "loss": 1.5957, + "step": 40920 + }, + { + "epoch": 7.28, + "learning_rate": 2.575822222222222e-05, + "loss": 1.6464, + "step": 40925 + }, + { + "epoch": 7.28, + "learning_rate": 2.5755259259259263e-05, + "loss": 1.6435, + "step": 40930 + }, + { + "epoch": 7.28, + "learning_rate": 2.5752296296296296e-05, + "loss": 1.5976, + "step": 40935 + }, + { + "epoch": 7.28, + "learning_rate": 2.5749333333333338e-05, + "loss": 1.633, + "step": 40940 + }, + { + "epoch": 7.28, + "learning_rate": 2.574637037037037e-05, + "loss": 1.5551, + "step": 40945 + }, + { + "epoch": 7.28, + "learning_rate": 2.5743407407407412e-05, + "loss": 1.5906, + "step": 40950 + }, + { + "epoch": 7.28, + "learning_rate": 2.5740444444444444e-05, + "loss": 1.6156, + "step": 40955 + }, + { + "epoch": 7.28, + "learning_rate": 2.5737481481481486e-05, + "loss": 1.7116, + "step": 40960 + }, + { + "epoch": 7.28, + "learning_rate": 2.5734518518518518e-05, + "loss": 1.686, + "step": 40965 + }, + { + "epoch": 7.28, + "learning_rate": 2.5731555555555557e-05, + "loss": 1.7061, + "step": 40970 + }, + { + "epoch": 7.28, + "learning_rate": 2.5728592592592593e-05, + "loss": 1.6666, + "step": 40975 + }, + { + "epoch": 7.29, + "learning_rate": 2.572562962962963e-05, + "loss": 1.7211, + "step": 40980 + }, + { + "epoch": 7.29, + "learning_rate": 2.5722666666666667e-05, + "loss": 1.6433, + "step": 40985 + }, + { + "epoch": 7.29, + "learning_rate": 2.5719703703703706e-05, + "loss": 1.6192, + "step": 40990 + }, + { + "epoch": 7.29, + "learning_rate": 2.571674074074074e-05, + "loss": 1.667, + "step": 40995 + }, + { + "epoch": 7.29, + "learning_rate": 2.571377777777778e-05, + "loss": 1.6456, + "step": 41000 + }, + { + "epoch": 7.29, + "learning_rate": 2.5710814814814815e-05, + "loss": 1.5619, + "step": 41005 + }, + { + "epoch": 7.29, + "learning_rate": 2.5707851851851854e-05, + "loss": 1.5684, + "step": 41010 + }, + { + "epoch": 7.29, + "learning_rate": 2.570488888888889e-05, + "loss": 1.5446, + "step": 41015 + }, + { + "epoch": 7.29, + "learning_rate": 2.570192592592593e-05, + "loss": 1.5835, + "step": 41020 + }, + { + "epoch": 7.29, + "learning_rate": 2.5698962962962964e-05, + "loss": 1.5784, + "step": 41025 + }, + { + "epoch": 7.29, + "learning_rate": 2.5696000000000003e-05, + "loss": 1.6169, + "step": 41030 + }, + { + "epoch": 7.3, + "learning_rate": 2.5693037037037038e-05, + "loss": 1.6434, + "step": 41035 + }, + { + "epoch": 7.3, + "learning_rate": 2.5690074074074077e-05, + "loss": 1.6005, + "step": 41040 + }, + { + "epoch": 7.3, + "learning_rate": 2.5687111111111112e-05, + "loss": 1.5837, + "step": 41045 + }, + { + "epoch": 7.3, + "learning_rate": 2.568414814814815e-05, + "loss": 1.4372, + "step": 41050 + }, + { + "epoch": 7.3, + "learning_rate": 2.5681185185185187e-05, + "loss": 1.6525, + "step": 41055 + }, + { + "epoch": 7.3, + "learning_rate": 2.5678222222222225e-05, + "loss": 1.5805, + "step": 41060 + }, + { + "epoch": 7.3, + "learning_rate": 2.567525925925926e-05, + "loss": 1.5815, + "step": 41065 + }, + { + "epoch": 7.3, + "learning_rate": 2.56722962962963e-05, + "loss": 1.6489, + "step": 41070 + }, + { + "epoch": 7.3, + "learning_rate": 2.5669333333333335e-05, + "loss": 1.6114, + "step": 41075 + }, + { + "epoch": 7.3, + "learning_rate": 2.5666370370370374e-05, + "loss": 1.5593, + "step": 41080 + }, + { + "epoch": 7.3, + "learning_rate": 2.566340740740741e-05, + "loss": 1.5721, + "step": 41085 + }, + { + "epoch": 7.3, + "learning_rate": 2.5660444444444448e-05, + "loss": 1.6531, + "step": 41090 + }, + { + "epoch": 7.31, + "learning_rate": 2.5657481481481484e-05, + "loss": 1.5929, + "step": 41095 + }, + { + "epoch": 7.31, + "learning_rate": 2.5654518518518522e-05, + "loss": 1.5989, + "step": 41100 + }, + { + "epoch": 7.31, + "learning_rate": 2.5651555555555558e-05, + "loss": 1.5592, + "step": 41105 + }, + { + "epoch": 7.31, + "learning_rate": 2.5648592592592597e-05, + "loss": 1.5599, + "step": 41110 + }, + { + "epoch": 7.31, + "learning_rate": 2.5645629629629632e-05, + "loss": 1.6955, + "step": 41115 + }, + { + "epoch": 7.31, + "learning_rate": 2.564266666666667e-05, + "loss": 1.641, + "step": 41120 + }, + { + "epoch": 7.31, + "learning_rate": 2.5639703703703703e-05, + "loss": 1.6073, + "step": 41125 + }, + { + "epoch": 7.31, + "learning_rate": 2.5636740740740745e-05, + "loss": 1.6717, + "step": 41130 + }, + { + "epoch": 7.31, + "learning_rate": 2.5633777777777777e-05, + "loss": 1.727, + "step": 41135 + }, + { + "epoch": 7.31, + "learning_rate": 2.563081481481482e-05, + "loss": 1.7254, + "step": 41140 + }, + { + "epoch": 7.31, + "learning_rate": 2.562785185185185e-05, + "loss": 1.5932, + "step": 41145 + }, + { + "epoch": 7.32, + "learning_rate": 2.5624888888888894e-05, + "loss": 1.5431, + "step": 41150 + }, + { + "epoch": 7.32, + "learning_rate": 2.5621925925925926e-05, + "loss": 1.6437, + "step": 41155 + }, + { + "epoch": 7.32, + "learning_rate": 2.5618962962962968e-05, + "loss": 1.6051, + "step": 41160 + }, + { + "epoch": 7.32, + "learning_rate": 2.5616e-05, + "loss": 1.5959, + "step": 41165 + }, + { + "epoch": 7.32, + "learning_rate": 2.5613037037037042e-05, + "loss": 1.6433, + "step": 41170 + }, + { + "epoch": 7.32, + "learning_rate": 2.5610074074074074e-05, + "loss": 1.4625, + "step": 41175 + }, + { + "epoch": 7.32, + "learning_rate": 2.5607111111111117e-05, + "loss": 1.5762, + "step": 41180 + }, + { + "epoch": 7.32, + "learning_rate": 2.560414814814815e-05, + "loss": 1.6527, + "step": 41185 + }, + { + "epoch": 7.32, + "learning_rate": 2.560118518518519e-05, + "loss": 1.5029, + "step": 41190 + }, + { + "epoch": 7.32, + "learning_rate": 2.5598222222222223e-05, + "loss": 1.6771, + "step": 41195 + }, + { + "epoch": 7.32, + "learning_rate": 2.5595259259259258e-05, + "loss": 1.6128, + "step": 41200 + }, + { + "epoch": 7.33, + "learning_rate": 2.5592296296296297e-05, + "loss": 1.6624, + "step": 41205 + }, + { + "epoch": 7.33, + "learning_rate": 2.5589333333333333e-05, + "loss": 1.5417, + "step": 41210 + }, + { + "epoch": 7.33, + "learning_rate": 2.558637037037037e-05, + "loss": 1.5949, + "step": 41215 + }, + { + "epoch": 7.33, + "learning_rate": 2.5583407407407407e-05, + "loss": 1.7662, + "step": 41220 + }, + { + "epoch": 7.33, + "learning_rate": 2.5580444444444446e-05, + "loss": 1.6175, + "step": 41225 + }, + { + "epoch": 7.33, + "learning_rate": 2.557748148148148e-05, + "loss": 1.7357, + "step": 41230 + }, + { + "epoch": 7.33, + "learning_rate": 2.557451851851852e-05, + "loss": 1.654, + "step": 41235 + }, + { + "epoch": 7.33, + "learning_rate": 2.5571555555555555e-05, + "loss": 1.6801, + "step": 41240 + }, + { + "epoch": 7.33, + "learning_rate": 2.5568592592592594e-05, + "loss": 1.5849, + "step": 41245 + }, + { + "epoch": 7.33, + "learning_rate": 2.556562962962963e-05, + "loss": 1.6278, + "step": 41250 + }, + { + "epoch": 7.33, + "learning_rate": 2.556266666666667e-05, + "loss": 1.649, + "step": 41255 + }, + { + "epoch": 7.34, + "learning_rate": 2.5559703703703704e-05, + "loss": 1.6587, + "step": 41260 + }, + { + "epoch": 7.34, + "learning_rate": 2.5556740740740743e-05, + "loss": 1.7225, + "step": 41265 + }, + { + "epoch": 7.34, + "learning_rate": 2.5553777777777778e-05, + "loss": 1.5738, + "step": 41270 + }, + { + "epoch": 7.34, + "learning_rate": 2.5550814814814817e-05, + "loss": 1.552, + "step": 41275 + }, + { + "epoch": 7.34, + "learning_rate": 2.554785185185185e-05, + "loss": 1.6288, + "step": 41280 + }, + { + "epoch": 7.34, + "learning_rate": 2.554488888888889e-05, + "loss": 1.6773, + "step": 41285 + }, + { + "epoch": 7.34, + "learning_rate": 2.5541925925925923e-05, + "loss": 1.7602, + "step": 41290 + }, + { + "epoch": 7.34, + "learning_rate": 2.5538962962962965e-05, + "loss": 1.6795, + "step": 41295 + }, + { + "epoch": 7.34, + "learning_rate": 2.5535999999999997e-05, + "loss": 1.6347, + "step": 41300 + }, + { + "epoch": 7.34, + "learning_rate": 2.553303703703704e-05, + "loss": 1.6056, + "step": 41305 + }, + { + "epoch": 7.34, + "learning_rate": 2.553007407407407e-05, + "loss": 1.7138, + "step": 41310 + }, + { + "epoch": 7.34, + "learning_rate": 2.5527111111111114e-05, + "loss": 1.7164, + "step": 41315 + }, + { + "epoch": 7.35, + "learning_rate": 2.5524148148148146e-05, + "loss": 1.6878, + "step": 41320 + }, + { + "epoch": 7.35, + "learning_rate": 2.5521185185185188e-05, + "loss": 1.6425, + "step": 41325 + }, + { + "epoch": 7.35, + "learning_rate": 2.551822222222222e-05, + "loss": 1.5943, + "step": 41330 + }, + { + "epoch": 7.35, + "learning_rate": 2.5515259259259262e-05, + "loss": 1.5752, + "step": 41335 + }, + { + "epoch": 7.35, + "learning_rate": 2.5512296296296294e-05, + "loss": 1.6251, + "step": 41340 + }, + { + "epoch": 7.35, + "learning_rate": 2.5509333333333337e-05, + "loss": 1.6019, + "step": 41345 + }, + { + "epoch": 7.35, + "learning_rate": 2.550637037037037e-05, + "loss": 1.7045, + "step": 41350 + }, + { + "epoch": 7.35, + "learning_rate": 2.5503407407407408e-05, + "loss": 1.6335, + "step": 41355 + }, + { + "epoch": 7.35, + "learning_rate": 2.5500444444444443e-05, + "loss": 1.6608, + "step": 41360 + }, + { + "epoch": 7.35, + "learning_rate": 2.5497481481481482e-05, + "loss": 1.5172, + "step": 41365 + }, + { + "epoch": 7.35, + "learning_rate": 2.5494518518518517e-05, + "loss": 1.6686, + "step": 41370 + }, + { + "epoch": 7.36, + "learning_rate": 2.5491555555555556e-05, + "loss": 1.6731, + "step": 41375 + }, + { + "epoch": 7.36, + "learning_rate": 2.548859259259259e-05, + "loss": 1.7093, + "step": 41380 + }, + { + "epoch": 7.36, + "learning_rate": 2.548562962962963e-05, + "loss": 1.7359, + "step": 41385 + }, + { + "epoch": 7.36, + "learning_rate": 2.5482666666666666e-05, + "loss": 1.7284, + "step": 41390 + }, + { + "epoch": 7.36, + "learning_rate": 2.5479703703703705e-05, + "loss": 1.6845, + "step": 41395 + }, + { + "epoch": 7.36, + "learning_rate": 2.547674074074074e-05, + "loss": 1.5892, + "step": 41400 + }, + { + "epoch": 7.36, + "learning_rate": 2.547377777777778e-05, + "loss": 1.6972, + "step": 41405 + }, + { + "epoch": 7.36, + "learning_rate": 2.5470814814814814e-05, + "loss": 1.5473, + "step": 41410 + }, + { + "epoch": 7.36, + "learning_rate": 2.5467851851851853e-05, + "loss": 1.7014, + "step": 41415 + }, + { + "epoch": 7.36, + "learning_rate": 2.546488888888889e-05, + "loss": 1.6495, + "step": 41420 + }, + { + "epoch": 7.36, + "learning_rate": 2.5461925925925927e-05, + "loss": 1.6484, + "step": 41425 + }, + { + "epoch": 7.37, + "learning_rate": 2.5458962962962963e-05, + "loss": 1.5989, + "step": 41430 + }, + { + "epoch": 7.37, + "learning_rate": 2.5456e-05, + "loss": 1.6658, + "step": 41435 + }, + { + "epoch": 7.37, + "learning_rate": 2.5453037037037037e-05, + "loss": 1.6028, + "step": 41440 + }, + { + "epoch": 7.37, + "learning_rate": 2.5450074074074076e-05, + "loss": 1.7127, + "step": 41445 + }, + { + "epoch": 7.37, + "learning_rate": 2.544711111111111e-05, + "loss": 1.5209, + "step": 41450 + }, + { + "epoch": 7.37, + "learning_rate": 2.544414814814815e-05, + "loss": 1.6499, + "step": 41455 + }, + { + "epoch": 7.37, + "learning_rate": 2.5441185185185186e-05, + "loss": 1.6512, + "step": 41460 + }, + { + "epoch": 7.37, + "learning_rate": 2.5438222222222224e-05, + "loss": 1.5478, + "step": 41465 + }, + { + "epoch": 7.37, + "learning_rate": 2.543525925925926e-05, + "loss": 1.6361, + "step": 41470 + }, + { + "epoch": 7.37, + "learning_rate": 2.54322962962963e-05, + "loss": 1.5953, + "step": 41475 + }, + { + "epoch": 7.37, + "learning_rate": 2.5429333333333334e-05, + "loss": 1.5439, + "step": 41480 + }, + { + "epoch": 7.38, + "learning_rate": 2.5426370370370373e-05, + "loss": 1.4655, + "step": 41485 + }, + { + "epoch": 7.38, + "learning_rate": 2.5423407407407408e-05, + "loss": 1.6708, + "step": 41490 + }, + { + "epoch": 7.38, + "learning_rate": 2.5420444444444447e-05, + "loss": 1.6819, + "step": 41495 + }, + { + "epoch": 7.38, + "learning_rate": 2.5417481481481483e-05, + "loss": 1.5493, + "step": 41500 + }, + { + "epoch": 7.38, + "learning_rate": 2.541451851851852e-05, + "loss": 1.6189, + "step": 41505 + }, + { + "epoch": 7.38, + "learning_rate": 2.5411555555555557e-05, + "loss": 1.7419, + "step": 41510 + }, + { + "epoch": 7.38, + "learning_rate": 2.5408592592592596e-05, + "loss": 1.6409, + "step": 41515 + }, + { + "epoch": 7.38, + "learning_rate": 2.5405629629629628e-05, + "loss": 1.5769, + "step": 41520 + }, + { + "epoch": 7.38, + "learning_rate": 2.540266666666667e-05, + "loss": 1.6083, + "step": 41525 + }, + { + "epoch": 7.38, + "learning_rate": 2.5399703703703702e-05, + "loss": 1.6913, + "step": 41530 + }, + { + "epoch": 7.38, + "learning_rate": 2.5396740740740744e-05, + "loss": 1.6569, + "step": 41535 + }, + { + "epoch": 7.38, + "learning_rate": 2.5393777777777776e-05, + "loss": 1.5657, + "step": 41540 + }, + { + "epoch": 7.39, + "learning_rate": 2.539081481481482e-05, + "loss": 1.6118, + "step": 41545 + }, + { + "epoch": 7.39, + "learning_rate": 2.538785185185185e-05, + "loss": 1.5427, + "step": 41550 + }, + { + "epoch": 7.39, + "learning_rate": 2.5384888888888893e-05, + "loss": 1.6034, + "step": 41555 + }, + { + "epoch": 7.39, + "learning_rate": 2.5381925925925925e-05, + "loss": 1.6307, + "step": 41560 + }, + { + "epoch": 7.39, + "learning_rate": 2.5378962962962967e-05, + "loss": 1.6683, + "step": 41565 + }, + { + "epoch": 7.39, + "learning_rate": 2.5376e-05, + "loss": 1.6206, + "step": 41570 + }, + { + "epoch": 7.39, + "learning_rate": 2.537303703703704e-05, + "loss": 1.7441, + "step": 41575 + }, + { + "epoch": 7.39, + "learning_rate": 2.5370074074074073e-05, + "loss": 1.6082, + "step": 41580 + }, + { + "epoch": 7.39, + "learning_rate": 2.5367111111111112e-05, + "loss": 1.6098, + "step": 41585 + }, + { + "epoch": 7.39, + "learning_rate": 2.5364148148148147e-05, + "loss": 1.6529, + "step": 41590 + }, + { + "epoch": 7.39, + "learning_rate": 2.5361185185185186e-05, + "loss": 1.545, + "step": 41595 + }, + { + "epoch": 7.4, + "learning_rate": 2.5358222222222222e-05, + "loss": 1.5832, + "step": 41600 + }, + { + "epoch": 7.4, + "learning_rate": 2.535525925925926e-05, + "loss": 1.6464, + "step": 41605 + }, + { + "epoch": 7.4, + "learning_rate": 2.5352296296296296e-05, + "loss": 1.5574, + "step": 41610 + }, + { + "epoch": 7.4, + "learning_rate": 2.5349333333333335e-05, + "loss": 1.6297, + "step": 41615 + }, + { + "epoch": 7.4, + "learning_rate": 2.534637037037037e-05, + "loss": 1.5489, + "step": 41620 + }, + { + "epoch": 7.4, + "learning_rate": 2.534340740740741e-05, + "loss": 1.5527, + "step": 41625 + }, + { + "epoch": 7.4, + "learning_rate": 2.5340444444444444e-05, + "loss": 1.5542, + "step": 41630 + }, + { + "epoch": 7.4, + "learning_rate": 2.5337481481481483e-05, + "loss": 1.8144, + "step": 41635 + }, + { + "epoch": 7.4, + "learning_rate": 2.533451851851852e-05, + "loss": 1.6329, + "step": 41640 + }, + { + "epoch": 7.4, + "learning_rate": 2.5331555555555558e-05, + "loss": 1.6166, + "step": 41645 + }, + { + "epoch": 7.4, + "learning_rate": 2.5328592592592593e-05, + "loss": 1.6536, + "step": 41650 + }, + { + "epoch": 7.41, + "learning_rate": 2.5325629629629632e-05, + "loss": 1.7672, + "step": 41655 + }, + { + "epoch": 7.41, + "learning_rate": 2.5322666666666667e-05, + "loss": 1.4989, + "step": 41660 + }, + { + "epoch": 7.41, + "learning_rate": 2.5319703703703706e-05, + "loss": 1.6485, + "step": 41665 + }, + { + "epoch": 7.41, + "learning_rate": 2.531674074074074e-05, + "loss": 1.6296, + "step": 41670 + }, + { + "epoch": 7.41, + "learning_rate": 2.531377777777778e-05, + "loss": 1.6301, + "step": 41675 + }, + { + "epoch": 7.41, + "learning_rate": 2.5311407407407407e-05, + "loss": 1.5545, + "step": 41680 + }, + { + "epoch": 7.41, + "learning_rate": 2.5308444444444445e-05, + "loss": 1.6356, + "step": 41685 + }, + { + "epoch": 7.41, + "learning_rate": 2.530548148148148e-05, + "loss": 1.6627, + "step": 41690 + }, + { + "epoch": 7.41, + "learning_rate": 2.530251851851852e-05, + "loss": 1.6412, + "step": 41695 + }, + { + "epoch": 7.41, + "learning_rate": 2.5299555555555555e-05, + "loss": 1.6722, + "step": 41700 + }, + { + "epoch": 7.41, + "learning_rate": 2.5296592592592594e-05, + "loss": 1.6125, + "step": 41705 + }, + { + "epoch": 7.42, + "learning_rate": 2.529362962962963e-05, + "loss": 1.6116, + "step": 41710 + }, + { + "epoch": 7.42, + "learning_rate": 2.5290666666666668e-05, + "loss": 1.7176, + "step": 41715 + }, + { + "epoch": 7.42, + "learning_rate": 2.5287703703703704e-05, + "loss": 1.6946, + "step": 41720 + }, + { + "epoch": 7.42, + "learning_rate": 2.5284740740740743e-05, + "loss": 1.5997, + "step": 41725 + }, + { + "epoch": 7.42, + "learning_rate": 2.5281777777777778e-05, + "loss": 1.6476, + "step": 41730 + }, + { + "epoch": 7.42, + "learning_rate": 2.5278814814814817e-05, + "loss": 1.6579, + "step": 41735 + }, + { + "epoch": 7.42, + "learning_rate": 2.5275851851851852e-05, + "loss": 1.8409, + "step": 41740 + }, + { + "epoch": 7.42, + "learning_rate": 2.527288888888889e-05, + "loss": 1.6001, + "step": 41745 + }, + { + "epoch": 7.42, + "learning_rate": 2.5269925925925923e-05, + "loss": 1.7044, + "step": 41750 + }, + { + "epoch": 7.42, + "learning_rate": 2.5266962962962965e-05, + "loss": 1.5899, + "step": 41755 + }, + { + "epoch": 7.42, + "learning_rate": 2.5263999999999997e-05, + "loss": 1.677, + "step": 41760 + }, + { + "epoch": 7.42, + "learning_rate": 2.526103703703704e-05, + "loss": 1.6315, + "step": 41765 + }, + { + "epoch": 7.43, + "learning_rate": 2.525807407407407e-05, + "loss": 1.6409, + "step": 41770 + }, + { + "epoch": 7.43, + "learning_rate": 2.5255111111111114e-05, + "loss": 1.6421, + "step": 41775 + }, + { + "epoch": 7.43, + "learning_rate": 2.5252148148148146e-05, + "loss": 1.5427, + "step": 41780 + }, + { + "epoch": 7.43, + "learning_rate": 2.5249185185185188e-05, + "loss": 1.6634, + "step": 41785 + }, + { + "epoch": 7.43, + "learning_rate": 2.524622222222222e-05, + "loss": 1.7061, + "step": 41790 + }, + { + "epoch": 7.43, + "learning_rate": 2.5243259259259262e-05, + "loss": 1.5799, + "step": 41795 + }, + { + "epoch": 7.43, + "learning_rate": 2.5240296296296294e-05, + "loss": 1.6489, + "step": 41800 + }, + { + "epoch": 7.43, + "learning_rate": 2.5237333333333337e-05, + "loss": 1.5817, + "step": 41805 + }, + { + "epoch": 7.43, + "learning_rate": 2.523437037037037e-05, + "loss": 1.6374, + "step": 41810 + }, + { + "epoch": 7.43, + "learning_rate": 2.523140740740741e-05, + "loss": 1.7403, + "step": 41815 + }, + { + "epoch": 7.43, + "learning_rate": 2.5228444444444443e-05, + "loss": 1.6932, + "step": 41820 + }, + { + "epoch": 7.44, + "learning_rate": 2.522548148148148e-05, + "loss": 1.6759, + "step": 41825 + }, + { + "epoch": 7.44, + "learning_rate": 2.5222518518518517e-05, + "loss": 1.7883, + "step": 41830 + }, + { + "epoch": 7.44, + "learning_rate": 2.5219555555555556e-05, + "loss": 1.5962, + "step": 41835 + }, + { + "epoch": 7.44, + "learning_rate": 2.521659259259259e-05, + "loss": 1.6014, + "step": 41840 + }, + { + "epoch": 7.44, + "learning_rate": 2.521362962962963e-05, + "loss": 1.5833, + "step": 41845 + }, + { + "epoch": 7.44, + "learning_rate": 2.5210666666666666e-05, + "loss": 1.5206, + "step": 41850 + }, + { + "epoch": 7.44, + "learning_rate": 2.5207703703703704e-05, + "loss": 1.6621, + "step": 41855 + }, + { + "epoch": 7.44, + "learning_rate": 2.520474074074074e-05, + "loss": 1.5107, + "step": 41860 + }, + { + "epoch": 7.44, + "learning_rate": 2.520177777777778e-05, + "loss": 1.5731, + "step": 41865 + }, + { + "epoch": 7.44, + "learning_rate": 2.5198814814814814e-05, + "loss": 1.6366, + "step": 41870 + }, + { + "epoch": 7.44, + "learning_rate": 2.5195851851851853e-05, + "loss": 1.5646, + "step": 41875 + }, + { + "epoch": 7.45, + "learning_rate": 2.519288888888889e-05, + "loss": 1.7499, + "step": 41880 + }, + { + "epoch": 7.45, + "learning_rate": 2.5189925925925927e-05, + "loss": 1.5806, + "step": 41885 + }, + { + "epoch": 7.45, + "learning_rate": 2.5186962962962963e-05, + "loss": 1.6774, + "step": 41890 + }, + { + "epoch": 7.45, + "learning_rate": 2.5184e-05, + "loss": 1.6753, + "step": 41895 + }, + { + "epoch": 7.45, + "learning_rate": 2.5181037037037037e-05, + "loss": 1.7546, + "step": 41900 + }, + { + "epoch": 7.45, + "learning_rate": 2.5178074074074076e-05, + "loss": 1.5589, + "step": 41905 + }, + { + "epoch": 7.45, + "learning_rate": 2.517511111111111e-05, + "loss": 1.5687, + "step": 41910 + }, + { + "epoch": 7.45, + "learning_rate": 2.517214814814815e-05, + "loss": 1.6259, + "step": 41915 + }, + { + "epoch": 7.45, + "learning_rate": 2.5169185185185185e-05, + "loss": 1.6281, + "step": 41920 + }, + { + "epoch": 7.45, + "learning_rate": 2.5166222222222224e-05, + "loss": 1.6865, + "step": 41925 + }, + { + "epoch": 7.45, + "learning_rate": 2.516325925925926e-05, + "loss": 1.6292, + "step": 41930 + }, + { + "epoch": 7.46, + "learning_rate": 2.51602962962963e-05, + "loss": 1.5822, + "step": 41935 + }, + { + "epoch": 7.46, + "learning_rate": 2.5157333333333334e-05, + "loss": 1.6015, + "step": 41940 + }, + { + "epoch": 7.46, + "learning_rate": 2.5154370370370373e-05, + "loss": 1.5819, + "step": 41945 + }, + { + "epoch": 7.46, + "learning_rate": 2.5151407407407408e-05, + "loss": 1.614, + "step": 41950 + }, + { + "epoch": 7.46, + "learning_rate": 2.5148444444444447e-05, + "loss": 1.4966, + "step": 41955 + }, + { + "epoch": 7.46, + "learning_rate": 2.5145481481481482e-05, + "loss": 1.6192, + "step": 41960 + }, + { + "epoch": 7.46, + "learning_rate": 2.514251851851852e-05, + "loss": 1.757, + "step": 41965 + }, + { + "epoch": 7.46, + "learning_rate": 2.5139555555555557e-05, + "loss": 1.5685, + "step": 41970 + }, + { + "epoch": 7.46, + "learning_rate": 2.5136592592592596e-05, + "loss": 1.7316, + "step": 41975 + }, + { + "epoch": 7.46, + "learning_rate": 2.5133629629629628e-05, + "loss": 1.5773, + "step": 41980 + }, + { + "epoch": 7.46, + "learning_rate": 2.513066666666667e-05, + "loss": 1.6452, + "step": 41985 + }, + { + "epoch": 7.46, + "learning_rate": 2.5127703703703702e-05, + "loss": 1.6205, + "step": 41990 + }, + { + "epoch": 7.47, + "learning_rate": 2.5124740740740744e-05, + "loss": 1.5643, + "step": 41995 + }, + { + "epoch": 7.47, + "learning_rate": 2.5121777777777776e-05, + "loss": 1.7198, + "step": 42000 + }, + { + "epoch": 7.47, + "learning_rate": 2.5118814814814818e-05, + "loss": 1.6528, + "step": 42005 + }, + { + "epoch": 7.47, + "learning_rate": 2.511585185185185e-05, + "loss": 1.6434, + "step": 42010 + }, + { + "epoch": 7.47, + "learning_rate": 2.5112888888888893e-05, + "loss": 1.7483, + "step": 42015 + }, + { + "epoch": 7.47, + "learning_rate": 2.5109925925925925e-05, + "loss": 1.694, + "step": 42020 + }, + { + "epoch": 7.47, + "learning_rate": 2.5106962962962967e-05, + "loss": 1.6142, + "step": 42025 + }, + { + "epoch": 7.47, + "learning_rate": 2.5104e-05, + "loss": 1.6599, + "step": 42030 + }, + { + "epoch": 7.47, + "learning_rate": 2.510103703703704e-05, + "loss": 1.7811, + "step": 42035 + }, + { + "epoch": 7.47, + "learning_rate": 2.5098074074074073e-05, + "loss": 1.6326, + "step": 42040 + }, + { + "epoch": 7.47, + "learning_rate": 2.5095111111111115e-05, + "loss": 1.531, + "step": 42045 + }, + { + "epoch": 7.48, + "learning_rate": 2.5092148148148147e-05, + "loss": 1.5703, + "step": 42050 + }, + { + "epoch": 7.48, + "learning_rate": 2.5089185185185186e-05, + "loss": 1.5535, + "step": 42055 + }, + { + "epoch": 7.48, + "learning_rate": 2.508622222222222e-05, + "loss": 1.6417, + "step": 42060 + }, + { + "epoch": 7.48, + "learning_rate": 2.508325925925926e-05, + "loss": 1.5833, + "step": 42065 + }, + { + "epoch": 7.48, + "learning_rate": 2.5080296296296296e-05, + "loss": 1.6644, + "step": 42070 + }, + { + "epoch": 7.48, + "learning_rate": 2.5077333333333335e-05, + "loss": 1.6165, + "step": 42075 + }, + { + "epoch": 7.48, + "learning_rate": 2.507437037037037e-05, + "loss": 1.6205, + "step": 42080 + }, + { + "epoch": 7.48, + "learning_rate": 2.507140740740741e-05, + "loss": 1.5739, + "step": 42085 + }, + { + "epoch": 7.48, + "learning_rate": 2.5068444444444444e-05, + "loss": 1.526, + "step": 42090 + }, + { + "epoch": 7.48, + "learning_rate": 2.5065481481481483e-05, + "loss": 1.7597, + "step": 42095 + }, + { + "epoch": 7.48, + "learning_rate": 2.506251851851852e-05, + "loss": 1.4853, + "step": 42100 + }, + { + "epoch": 7.49, + "learning_rate": 2.5059555555555557e-05, + "loss": 1.6709, + "step": 42105 + }, + { + "epoch": 7.49, + "learning_rate": 2.5056592592592593e-05, + "loss": 1.6051, + "step": 42110 + }, + { + "epoch": 7.49, + "learning_rate": 2.5053629629629632e-05, + "loss": 1.6251, + "step": 42115 + }, + { + "epoch": 7.49, + "learning_rate": 2.5050666666666667e-05, + "loss": 1.65, + "step": 42120 + }, + { + "epoch": 7.49, + "learning_rate": 2.5047703703703706e-05, + "loss": 1.542, + "step": 42125 + }, + { + "epoch": 7.49, + "learning_rate": 2.504474074074074e-05, + "loss": 1.6226, + "step": 42130 + }, + { + "epoch": 7.49, + "learning_rate": 2.504177777777778e-05, + "loss": 1.5605, + "step": 42135 + }, + { + "epoch": 7.49, + "learning_rate": 2.5038814814814816e-05, + "loss": 1.6663, + "step": 42140 + }, + { + "epoch": 7.49, + "learning_rate": 2.5035851851851854e-05, + "loss": 1.6892, + "step": 42145 + }, + { + "epoch": 7.49, + "learning_rate": 2.503288888888889e-05, + "loss": 1.6128, + "step": 42150 + }, + { + "epoch": 7.49, + "learning_rate": 2.502992592592593e-05, + "loss": 1.7222, + "step": 42155 + }, + { + "epoch": 7.5, + "learning_rate": 2.5026962962962964e-05, + "loss": 1.5738, + "step": 42160 + }, + { + "epoch": 7.5, + "learning_rate": 2.5024000000000003e-05, + "loss": 1.6176, + "step": 42165 + }, + { + "epoch": 7.5, + "learning_rate": 2.502103703703704e-05, + "loss": 1.7673, + "step": 42170 + }, + { + "epoch": 7.5, + "learning_rate": 2.5018074074074077e-05, + "loss": 1.7285, + "step": 42175 + }, + { + "epoch": 7.5, + "learning_rate": 2.5015111111111113e-05, + "loss": 1.6898, + "step": 42180 + }, + { + "epoch": 7.5, + "learning_rate": 2.501214814814815e-05, + "loss": 1.6084, + "step": 42185 + }, + { + "epoch": 7.5, + "learning_rate": 2.5009185185185187e-05, + "loss": 1.5221, + "step": 42190 + }, + { + "epoch": 7.5, + "learning_rate": 2.5006222222222226e-05, + "loss": 1.6874, + "step": 42195 + }, + { + "epoch": 7.5, + "learning_rate": 2.500325925925926e-05, + "loss": 1.5295, + "step": 42200 + }, + { + "epoch": 7.5, + "learning_rate": 2.50002962962963e-05, + "loss": 1.6089, + "step": 42205 + }, + { + "epoch": 7.5, + "learning_rate": 2.4997333333333332e-05, + "loss": 1.6896, + "step": 42210 + }, + { + "epoch": 7.5, + "learning_rate": 2.499437037037037e-05, + "loss": 1.6879, + "step": 42215 + }, + { + "epoch": 7.51, + "learning_rate": 2.4991407407407406e-05, + "loss": 1.6323, + "step": 42220 + }, + { + "epoch": 7.51, + "learning_rate": 2.4988444444444445e-05, + "loss": 1.7671, + "step": 42225 + }, + { + "epoch": 7.51, + "learning_rate": 2.498548148148148e-05, + "loss": 1.6656, + "step": 42230 + }, + { + "epoch": 7.51, + "learning_rate": 2.498251851851852e-05, + "loss": 1.6066, + "step": 42235 + }, + { + "epoch": 7.51, + "learning_rate": 2.4979555555555555e-05, + "loss": 1.6886, + "step": 42240 + }, + { + "epoch": 7.51, + "learning_rate": 2.4976592592592594e-05, + "loss": 1.5731, + "step": 42245 + }, + { + "epoch": 7.51, + "learning_rate": 2.497362962962963e-05, + "loss": 1.6841, + "step": 42250 + }, + { + "epoch": 7.51, + "learning_rate": 2.4970666666666668e-05, + "loss": 1.6698, + "step": 42255 + }, + { + "epoch": 7.51, + "learning_rate": 2.4967703703703703e-05, + "loss": 1.7037, + "step": 42260 + }, + { + "epoch": 7.51, + "learning_rate": 2.4964740740740742e-05, + "loss": 1.5582, + "step": 42265 + }, + { + "epoch": 7.51, + "learning_rate": 2.4961777777777778e-05, + "loss": 1.7294, + "step": 42270 + }, + { + "epoch": 7.52, + "learning_rate": 2.4958814814814816e-05, + "loss": 1.6091, + "step": 42275 + }, + { + "epoch": 7.52, + "learning_rate": 2.4955851851851852e-05, + "loss": 1.6461, + "step": 42280 + }, + { + "epoch": 7.52, + "learning_rate": 2.495288888888889e-05, + "loss": 1.634, + "step": 42285 + }, + { + "epoch": 7.52, + "learning_rate": 2.4949925925925926e-05, + "loss": 1.6421, + "step": 42290 + }, + { + "epoch": 7.52, + "learning_rate": 2.4946962962962965e-05, + "loss": 1.8087, + "step": 42295 + }, + { + "epoch": 7.52, + "learning_rate": 2.4944e-05, + "loss": 1.5589, + "step": 42300 + }, + { + "epoch": 7.52, + "learning_rate": 2.494103703703704e-05, + "loss": 1.4519, + "step": 42305 + }, + { + "epoch": 7.52, + "learning_rate": 2.4938074074074075e-05, + "loss": 1.543, + "step": 42310 + }, + { + "epoch": 7.52, + "learning_rate": 2.4935111111111113e-05, + "loss": 1.6649, + "step": 42315 + }, + { + "epoch": 7.52, + "learning_rate": 2.493214814814815e-05, + "loss": 1.7435, + "step": 42320 + }, + { + "epoch": 7.52, + "learning_rate": 2.4929185185185184e-05, + "loss": 1.6823, + "step": 42325 + }, + { + "epoch": 7.53, + "learning_rate": 2.4926222222222223e-05, + "loss": 1.5719, + "step": 42330 + }, + { + "epoch": 7.53, + "learning_rate": 2.492325925925926e-05, + "loss": 1.592, + "step": 42335 + }, + { + "epoch": 7.53, + "learning_rate": 2.4920296296296297e-05, + "loss": 1.6043, + "step": 42340 + }, + { + "epoch": 7.53, + "learning_rate": 2.4917333333333333e-05, + "loss": 1.6624, + "step": 42345 + }, + { + "epoch": 7.53, + "learning_rate": 2.491437037037037e-05, + "loss": 1.5336, + "step": 42350 + }, + { + "epoch": 7.53, + "learning_rate": 2.4911407407407407e-05, + "loss": 1.6267, + "step": 42355 + }, + { + "epoch": 7.53, + "learning_rate": 2.4908444444444446e-05, + "loss": 1.609, + "step": 42360 + }, + { + "epoch": 7.53, + "learning_rate": 2.490548148148148e-05, + "loss": 1.6932, + "step": 42365 + }, + { + "epoch": 7.53, + "learning_rate": 2.490251851851852e-05, + "loss": 1.6819, + "step": 42370 + }, + { + "epoch": 7.53, + "learning_rate": 2.4899555555555556e-05, + "loss": 1.7832, + "step": 42375 + }, + { + "epoch": 7.53, + "learning_rate": 2.4896592592592594e-05, + "loss": 1.5035, + "step": 42380 + }, + { + "epoch": 7.54, + "learning_rate": 2.489362962962963e-05, + "loss": 1.5465, + "step": 42385 + }, + { + "epoch": 7.54, + "learning_rate": 2.489066666666667e-05, + "loss": 1.6317, + "step": 42390 + }, + { + "epoch": 7.54, + "learning_rate": 2.4887703703703704e-05, + "loss": 1.5068, + "step": 42395 + }, + { + "epoch": 7.54, + "learning_rate": 2.4884740740740743e-05, + "loss": 1.7252, + "step": 42400 + }, + { + "epoch": 7.54, + "learning_rate": 2.488177777777778e-05, + "loss": 1.7102, + "step": 42405 + }, + { + "epoch": 7.54, + "learning_rate": 2.4878814814814817e-05, + "loss": 1.7168, + "step": 42410 + }, + { + "epoch": 7.54, + "learning_rate": 2.4875851851851853e-05, + "loss": 1.5764, + "step": 42415 + }, + { + "epoch": 7.54, + "learning_rate": 2.487288888888889e-05, + "loss": 1.7415, + "step": 42420 + }, + { + "epoch": 7.54, + "learning_rate": 2.4869925925925927e-05, + "loss": 1.6487, + "step": 42425 + }, + { + "epoch": 7.54, + "learning_rate": 2.4866962962962966e-05, + "loss": 1.6504, + "step": 42430 + }, + { + "epoch": 7.54, + "learning_rate": 2.4864e-05, + "loss": 1.7856, + "step": 42435 + }, + { + "epoch": 7.54, + "learning_rate": 2.4861037037037037e-05, + "loss": 1.5304, + "step": 42440 + }, + { + "epoch": 7.55, + "learning_rate": 2.4858074074074075e-05, + "loss": 1.6206, + "step": 42445 + }, + { + "epoch": 7.55, + "learning_rate": 2.485511111111111e-05, + "loss": 1.7216, + "step": 42450 + }, + { + "epoch": 7.55, + "learning_rate": 2.485214814814815e-05, + "loss": 1.6094, + "step": 42455 + }, + { + "epoch": 7.55, + "learning_rate": 2.4849185185185185e-05, + "loss": 1.569, + "step": 42460 + }, + { + "epoch": 7.55, + "learning_rate": 2.4846222222222224e-05, + "loss": 1.6995, + "step": 42465 + }, + { + "epoch": 7.55, + "learning_rate": 2.484325925925926e-05, + "loss": 1.6641, + "step": 42470 + }, + { + "epoch": 7.55, + "learning_rate": 2.4840296296296298e-05, + "loss": 1.744, + "step": 42475 + }, + { + "epoch": 7.55, + "learning_rate": 2.4837333333333334e-05, + "loss": 1.4976, + "step": 42480 + }, + { + "epoch": 7.55, + "learning_rate": 2.4834370370370372e-05, + "loss": 1.5236, + "step": 42485 + }, + { + "epoch": 7.55, + "learning_rate": 2.4831407407407408e-05, + "loss": 1.6698, + "step": 42490 + }, + { + "epoch": 7.55, + "learning_rate": 2.4828444444444447e-05, + "loss": 1.5981, + "step": 42495 + }, + { + "epoch": 7.56, + "learning_rate": 2.4825481481481482e-05, + "loss": 1.5869, + "step": 42500 + }, + { + "epoch": 7.56, + "learning_rate": 2.482251851851852e-05, + "loss": 1.5718, + "step": 42505 + }, + { + "epoch": 7.56, + "learning_rate": 2.4819555555555556e-05, + "loss": 1.6357, + "step": 42510 + }, + { + "epoch": 7.56, + "learning_rate": 2.4816592592592595e-05, + "loss": 1.702, + "step": 42515 + }, + { + "epoch": 7.56, + "learning_rate": 2.481362962962963e-05, + "loss": 1.6627, + "step": 42520 + }, + { + "epoch": 7.56, + "learning_rate": 2.481066666666667e-05, + "loss": 1.6371, + "step": 42525 + }, + { + "epoch": 7.56, + "learning_rate": 2.4807703703703705e-05, + "loss": 1.5821, + "step": 42530 + }, + { + "epoch": 7.56, + "learning_rate": 2.4804740740740744e-05, + "loss": 1.6404, + "step": 42535 + }, + { + "epoch": 7.56, + "learning_rate": 2.480177777777778e-05, + "loss": 1.6249, + "step": 42540 + }, + { + "epoch": 7.56, + "learning_rate": 2.4798814814814818e-05, + "loss": 1.5984, + "step": 42545 + }, + { + "epoch": 7.56, + "learning_rate": 2.4795851851851853e-05, + "loss": 1.5236, + "step": 42550 + }, + { + "epoch": 7.57, + "learning_rate": 2.479288888888889e-05, + "loss": 1.6313, + "step": 42555 + }, + { + "epoch": 7.57, + "learning_rate": 2.4789925925925928e-05, + "loss": 1.5679, + "step": 42560 + }, + { + "epoch": 7.57, + "learning_rate": 2.4786962962962963e-05, + "loss": 1.6113, + "step": 42565 + }, + { + "epoch": 7.57, + "learning_rate": 2.4784000000000002e-05, + "loss": 1.6254, + "step": 42570 + }, + { + "epoch": 7.57, + "learning_rate": 2.4781037037037037e-05, + "loss": 1.8004, + "step": 42575 + }, + { + "epoch": 7.57, + "learning_rate": 2.4778074074074076e-05, + "loss": 1.6595, + "step": 42580 + }, + { + "epoch": 7.57, + "learning_rate": 2.477511111111111e-05, + "loss": 1.6381, + "step": 42585 + }, + { + "epoch": 7.57, + "learning_rate": 2.477214814814815e-05, + "loss": 1.6644, + "step": 42590 + }, + { + "epoch": 7.57, + "learning_rate": 2.4769185185185186e-05, + "loss": 1.6372, + "step": 42595 + }, + { + "epoch": 7.57, + "learning_rate": 2.4766222222222225e-05, + "loss": 1.6561, + "step": 42600 + }, + { + "epoch": 7.57, + "learning_rate": 2.476325925925926e-05, + "loss": 1.6087, + "step": 42605 + }, + { + "epoch": 7.58, + "learning_rate": 2.47602962962963e-05, + "loss": 1.7179, + "step": 42610 + }, + { + "epoch": 7.58, + "learning_rate": 2.4757333333333334e-05, + "loss": 1.6885, + "step": 42615 + }, + { + "epoch": 7.58, + "learning_rate": 2.4754370370370373e-05, + "loss": 1.5692, + "step": 42620 + }, + { + "epoch": 7.58, + "learning_rate": 2.475140740740741e-05, + "loss": 1.486, + "step": 42625 + }, + { + "epoch": 7.58, + "learning_rate": 2.4748444444444447e-05, + "loss": 1.5632, + "step": 42630 + }, + { + "epoch": 7.58, + "learning_rate": 2.4745481481481483e-05, + "loss": 1.6604, + "step": 42635 + }, + { + "epoch": 7.58, + "learning_rate": 2.4742518518518522e-05, + "loss": 1.5488, + "step": 42640 + }, + { + "epoch": 7.58, + "learning_rate": 2.4739555555555557e-05, + "loss": 1.7109, + "step": 42645 + }, + { + "epoch": 7.58, + "learning_rate": 2.4736592592592596e-05, + "loss": 1.6458, + "step": 42650 + }, + { + "epoch": 7.58, + "learning_rate": 2.473362962962963e-05, + "loss": 1.6154, + "step": 42655 + }, + { + "epoch": 7.58, + "learning_rate": 2.473066666666667e-05, + "loss": 1.5993, + "step": 42660 + }, + { + "epoch": 7.58, + "learning_rate": 2.4727703703703706e-05, + "loss": 1.6742, + "step": 42665 + }, + { + "epoch": 7.59, + "learning_rate": 2.472474074074074e-05, + "loss": 1.6274, + "step": 42670 + }, + { + "epoch": 7.59, + "learning_rate": 2.472177777777778e-05, + "loss": 1.5852, + "step": 42675 + }, + { + "epoch": 7.59, + "learning_rate": 2.4718814814814815e-05, + "loss": 1.6499, + "step": 42680 + }, + { + "epoch": 7.59, + "learning_rate": 2.4715851851851854e-05, + "loss": 1.6443, + "step": 42685 + }, + { + "epoch": 7.59, + "learning_rate": 2.471288888888889e-05, + "loss": 1.735, + "step": 42690 + }, + { + "epoch": 7.59, + "learning_rate": 2.470992592592593e-05, + "loss": 1.7045, + "step": 42695 + }, + { + "epoch": 7.59, + "learning_rate": 2.4706962962962964e-05, + "loss": 1.5261, + "step": 42700 + }, + { + "epoch": 7.59, + "learning_rate": 2.4704000000000003e-05, + "loss": 1.5027, + "step": 42705 + }, + { + "epoch": 7.59, + "learning_rate": 2.4701037037037038e-05, + "loss": 1.615, + "step": 42710 + }, + { + "epoch": 7.59, + "learning_rate": 2.4698074074074077e-05, + "loss": 1.7258, + "step": 42715 + }, + { + "epoch": 7.59, + "learning_rate": 2.4695111111111112e-05, + "loss": 1.7026, + "step": 42720 + }, + { + "epoch": 7.6, + "learning_rate": 2.469214814814815e-05, + "loss": 1.5972, + "step": 42725 + }, + { + "epoch": 7.6, + "learning_rate": 2.4689185185185187e-05, + "loss": 1.5827, + "step": 42730 + }, + { + "epoch": 7.6, + "learning_rate": 2.4686222222222225e-05, + "loss": 1.7622, + "step": 42735 + }, + { + "epoch": 7.6, + "learning_rate": 2.468325925925926e-05, + "loss": 1.6694, + "step": 42740 + }, + { + "epoch": 7.6, + "learning_rate": 2.46802962962963e-05, + "loss": 1.7835, + "step": 42745 + }, + { + "epoch": 7.6, + "learning_rate": 2.4677333333333335e-05, + "loss": 1.6116, + "step": 42750 + }, + { + "epoch": 7.6, + "learning_rate": 2.4674370370370374e-05, + "loss": 1.6849, + "step": 42755 + }, + { + "epoch": 7.6, + "learning_rate": 2.467140740740741e-05, + "loss": 1.6807, + "step": 42760 + }, + { + "epoch": 7.6, + "learning_rate": 2.4668444444444448e-05, + "loss": 1.7528, + "step": 42765 + }, + { + "epoch": 7.6, + "learning_rate": 2.4665481481481484e-05, + "loss": 1.579, + "step": 42770 + }, + { + "epoch": 7.6, + "learning_rate": 2.4662518518518522e-05, + "loss": 1.6419, + "step": 42775 + }, + { + "epoch": 7.61, + "learning_rate": 2.4659555555555558e-05, + "loss": 1.6443, + "step": 42780 + }, + { + "epoch": 7.61, + "learning_rate": 2.4656592592592593e-05, + "loss": 1.6024, + "step": 42785 + }, + { + "epoch": 7.61, + "learning_rate": 2.4653629629629632e-05, + "loss": 1.5986, + "step": 42790 + }, + { + "epoch": 7.61, + "learning_rate": 2.4650666666666668e-05, + "loss": 1.6702, + "step": 42795 + }, + { + "epoch": 7.61, + "learning_rate": 2.4647703703703706e-05, + "loss": 1.7322, + "step": 42800 + }, + { + "epoch": 7.61, + "learning_rate": 2.4644740740740742e-05, + "loss": 1.7453, + "step": 42805 + }, + { + "epoch": 7.61, + "learning_rate": 2.464177777777778e-05, + "loss": 1.6644, + "step": 42810 + }, + { + "epoch": 7.61, + "learning_rate": 2.4638814814814816e-05, + "loss": 1.5886, + "step": 42815 + }, + { + "epoch": 7.61, + "learning_rate": 2.4635851851851855e-05, + "loss": 1.6832, + "step": 42820 + }, + { + "epoch": 7.61, + "learning_rate": 2.463288888888889e-05, + "loss": 1.6364, + "step": 42825 + }, + { + "epoch": 7.61, + "learning_rate": 2.4629925925925926e-05, + "loss": 1.6866, + "step": 42830 + }, + { + "epoch": 7.62, + "learning_rate": 2.462696296296296e-05, + "loss": 1.6079, + "step": 42835 + }, + { + "epoch": 7.62, + "learning_rate": 2.4624e-05, + "loss": 1.7441, + "step": 42840 + }, + { + "epoch": 7.62, + "learning_rate": 2.4621037037037035e-05, + "loss": 1.6391, + "step": 42845 + }, + { + "epoch": 7.62, + "learning_rate": 2.4618074074074074e-05, + "loss": 1.69, + "step": 42850 + }, + { + "epoch": 7.62, + "learning_rate": 2.461511111111111e-05, + "loss": 1.6441, + "step": 42855 + }, + { + "epoch": 7.62, + "learning_rate": 2.461214814814815e-05, + "loss": 1.6265, + "step": 42860 + }, + { + "epoch": 7.62, + "learning_rate": 2.4609185185185184e-05, + "loss": 1.6849, + "step": 42865 + }, + { + "epoch": 7.62, + "learning_rate": 2.4606222222222223e-05, + "loss": 1.7985, + "step": 42870 + }, + { + "epoch": 7.62, + "learning_rate": 2.4603259259259258e-05, + "loss": 1.6619, + "step": 42875 + }, + { + "epoch": 7.62, + "learning_rate": 2.4600296296296297e-05, + "loss": 1.5778, + "step": 42880 + }, + { + "epoch": 7.62, + "learning_rate": 2.4597333333333333e-05, + "loss": 1.5081, + "step": 42885 + }, + { + "epoch": 7.62, + "learning_rate": 2.459437037037037e-05, + "loss": 1.5907, + "step": 42890 + }, + { + "epoch": 7.63, + "learning_rate": 2.4591407407407407e-05, + "loss": 1.6738, + "step": 42895 + }, + { + "epoch": 7.63, + "learning_rate": 2.4588444444444446e-05, + "loss": 1.6922, + "step": 42900 + }, + { + "epoch": 7.63, + "learning_rate": 2.458548148148148e-05, + "loss": 1.5943, + "step": 42905 + }, + { + "epoch": 7.63, + "learning_rate": 2.458251851851852e-05, + "loss": 1.6382, + "step": 42910 + }, + { + "epoch": 7.63, + "learning_rate": 2.4579555555555555e-05, + "loss": 1.634, + "step": 42915 + }, + { + "epoch": 7.63, + "learning_rate": 2.4576592592592594e-05, + "loss": 1.7045, + "step": 42920 + }, + { + "epoch": 7.63, + "learning_rate": 2.457362962962963e-05, + "loss": 1.6522, + "step": 42925 + }, + { + "epoch": 7.63, + "learning_rate": 2.457066666666667e-05, + "loss": 1.6777, + "step": 42930 + }, + { + "epoch": 7.63, + "learning_rate": 2.4567703703703704e-05, + "loss": 1.5629, + "step": 42935 + }, + { + "epoch": 7.63, + "learning_rate": 2.4564740740740743e-05, + "loss": 1.7201, + "step": 42940 + }, + { + "epoch": 7.63, + "learning_rate": 2.4561777777777778e-05, + "loss": 1.7041, + "step": 42945 + }, + { + "epoch": 7.64, + "learning_rate": 2.4558814814814813e-05, + "loss": 1.6801, + "step": 42950 + }, + { + "epoch": 7.64, + "learning_rate": 2.4555851851851852e-05, + "loss": 1.604, + "step": 42955 + }, + { + "epoch": 7.64, + "learning_rate": 2.4552888888888888e-05, + "loss": 1.5547, + "step": 42960 + }, + { + "epoch": 7.64, + "learning_rate": 2.4549925925925927e-05, + "loss": 1.7886, + "step": 42965 + }, + { + "epoch": 7.64, + "learning_rate": 2.4546962962962962e-05, + "loss": 1.5996, + "step": 42970 + }, + { + "epoch": 7.64, + "learning_rate": 2.4544e-05, + "loss": 1.6517, + "step": 42975 + }, + { + "epoch": 7.64, + "learning_rate": 2.4541037037037036e-05, + "loss": 1.6069, + "step": 42980 + }, + { + "epoch": 7.64, + "learning_rate": 2.4538074074074075e-05, + "loss": 1.5577, + "step": 42985 + }, + { + "epoch": 7.64, + "learning_rate": 2.453511111111111e-05, + "loss": 1.6836, + "step": 42990 + }, + { + "epoch": 7.64, + "learning_rate": 2.453214814814815e-05, + "loss": 1.6156, + "step": 42995 + }, + { + "epoch": 7.64, + "learning_rate": 2.4529185185185185e-05, + "loss": 1.5328, + "step": 43000 + }, + { + "epoch": 7.65, + "learning_rate": 2.4526222222222224e-05, + "loss": 1.6149, + "step": 43005 + }, + { + "epoch": 7.65, + "learning_rate": 2.452325925925926e-05, + "loss": 1.6449, + "step": 43010 + }, + { + "epoch": 7.65, + "learning_rate": 2.4520296296296298e-05, + "loss": 1.5699, + "step": 43015 + }, + { + "epoch": 7.65, + "learning_rate": 2.4517333333333333e-05, + "loss": 1.5564, + "step": 43020 + }, + { + "epoch": 7.65, + "learning_rate": 2.4514370370370372e-05, + "loss": 1.7178, + "step": 43025 + }, + { + "epoch": 7.65, + "learning_rate": 2.4511407407407408e-05, + "loss": 1.6843, + "step": 43030 + }, + { + "epoch": 7.65, + "learning_rate": 2.4508444444444446e-05, + "loss": 1.6051, + "step": 43035 + }, + { + "epoch": 7.65, + "learning_rate": 2.4505481481481482e-05, + "loss": 1.624, + "step": 43040 + }, + { + "epoch": 7.65, + "learning_rate": 2.450251851851852e-05, + "loss": 1.6851, + "step": 43045 + }, + { + "epoch": 7.65, + "learning_rate": 2.4499555555555556e-05, + "loss": 1.5136, + "step": 43050 + }, + { + "epoch": 7.65, + "learning_rate": 2.4496592592592595e-05, + "loss": 1.5978, + "step": 43055 + }, + { + "epoch": 7.66, + "learning_rate": 2.449362962962963e-05, + "loss": 1.715, + "step": 43060 + }, + { + "epoch": 7.66, + "learning_rate": 2.4490666666666666e-05, + "loss": 1.6392, + "step": 43065 + }, + { + "epoch": 7.66, + "learning_rate": 2.4487703703703705e-05, + "loss": 1.718, + "step": 43070 + }, + { + "epoch": 7.66, + "learning_rate": 2.448474074074074e-05, + "loss": 1.682, + "step": 43075 + }, + { + "epoch": 7.66, + "learning_rate": 2.448177777777778e-05, + "loss": 1.6026, + "step": 43080 + }, + { + "epoch": 7.66, + "learning_rate": 2.4478814814814814e-05, + "loss": 1.6445, + "step": 43085 + }, + { + "epoch": 7.66, + "learning_rate": 2.4475851851851853e-05, + "loss": 1.488, + "step": 43090 + }, + { + "epoch": 7.66, + "learning_rate": 2.447288888888889e-05, + "loss": 1.6129, + "step": 43095 + }, + { + "epoch": 7.66, + "learning_rate": 2.4469925925925927e-05, + "loss": 1.7074, + "step": 43100 + }, + { + "epoch": 7.66, + "learning_rate": 2.4466962962962963e-05, + "loss": 1.6178, + "step": 43105 + }, + { + "epoch": 7.66, + "learning_rate": 2.4464e-05, + "loss": 1.7074, + "step": 43110 + }, + { + "epoch": 7.66, + "learning_rate": 2.4461037037037037e-05, + "loss": 1.6178, + "step": 43115 + }, + { + "epoch": 7.67, + "learning_rate": 2.4458074074074076e-05, + "loss": 1.5888, + "step": 43120 + }, + { + "epoch": 7.67, + "learning_rate": 2.445511111111111e-05, + "loss": 1.7443, + "step": 43125 + }, + { + "epoch": 7.67, + "learning_rate": 2.445214814814815e-05, + "loss": 1.6867, + "step": 43130 + }, + { + "epoch": 7.67, + "learning_rate": 2.4449185185185186e-05, + "loss": 1.5555, + "step": 43135 + }, + { + "epoch": 7.67, + "learning_rate": 2.4446222222222224e-05, + "loss": 1.6084, + "step": 43140 + }, + { + "epoch": 7.67, + "learning_rate": 2.444325925925926e-05, + "loss": 1.7005, + "step": 43145 + }, + { + "epoch": 7.67, + "learning_rate": 2.44402962962963e-05, + "loss": 1.6509, + "step": 43150 + }, + { + "epoch": 7.67, + "learning_rate": 2.4437333333333334e-05, + "loss": 1.675, + "step": 43155 + }, + { + "epoch": 7.67, + "learning_rate": 2.4434370370370373e-05, + "loss": 1.7306, + "step": 43160 + }, + { + "epoch": 7.67, + "learning_rate": 2.4431407407407408e-05, + "loss": 1.6103, + "step": 43165 + }, + { + "epoch": 7.67, + "learning_rate": 2.4428444444444447e-05, + "loss": 1.518, + "step": 43170 + }, + { + "epoch": 7.68, + "learning_rate": 2.4425481481481483e-05, + "loss": 1.6322, + "step": 43175 + }, + { + "epoch": 7.68, + "learning_rate": 2.4422518518518518e-05, + "loss": 1.7166, + "step": 43180 + }, + { + "epoch": 7.68, + "learning_rate": 2.4419555555555557e-05, + "loss": 1.6174, + "step": 43185 + }, + { + "epoch": 7.68, + "learning_rate": 2.4416592592592592e-05, + "loss": 1.7199, + "step": 43190 + }, + { + "epoch": 7.68, + "learning_rate": 2.441362962962963e-05, + "loss": 1.5637, + "step": 43195 + }, + { + "epoch": 7.68, + "learning_rate": 2.4410666666666667e-05, + "loss": 1.7475, + "step": 43200 + }, + { + "epoch": 7.68, + "learning_rate": 2.4407703703703705e-05, + "loss": 1.6044, + "step": 43205 + }, + { + "epoch": 7.68, + "learning_rate": 2.440474074074074e-05, + "loss": 1.5536, + "step": 43210 + }, + { + "epoch": 7.68, + "learning_rate": 2.440177777777778e-05, + "loss": 1.6782, + "step": 43215 + }, + { + "epoch": 7.68, + "learning_rate": 2.4398814814814815e-05, + "loss": 1.6251, + "step": 43220 + }, + { + "epoch": 7.68, + "learning_rate": 2.4395851851851854e-05, + "loss": 1.712, + "step": 43225 + }, + { + "epoch": 7.69, + "learning_rate": 2.439288888888889e-05, + "loss": 1.6403, + "step": 43230 + }, + { + "epoch": 7.69, + "learning_rate": 2.4389925925925928e-05, + "loss": 1.6092, + "step": 43235 + }, + { + "epoch": 7.69, + "learning_rate": 2.4386962962962964e-05, + "loss": 1.6144, + "step": 43240 + }, + { + "epoch": 7.69, + "learning_rate": 2.4384000000000002e-05, + "loss": 1.694, + "step": 43245 + }, + { + "epoch": 7.69, + "learning_rate": 2.4381037037037038e-05, + "loss": 1.6376, + "step": 43250 + }, + { + "epoch": 7.69, + "learning_rate": 2.4378074074074077e-05, + "loss": 1.7075, + "step": 43255 + }, + { + "epoch": 7.69, + "learning_rate": 2.4375111111111112e-05, + "loss": 1.5395, + "step": 43260 + }, + { + "epoch": 7.69, + "learning_rate": 2.437214814814815e-05, + "loss": 1.6706, + "step": 43265 + }, + { + "epoch": 7.69, + "learning_rate": 2.4369185185185186e-05, + "loss": 1.7421, + "step": 43270 + }, + { + "epoch": 7.69, + "learning_rate": 2.4366222222222225e-05, + "loss": 1.5826, + "step": 43275 + }, + { + "epoch": 7.69, + "learning_rate": 2.436325925925926e-05, + "loss": 1.6134, + "step": 43280 + }, + { + "epoch": 7.7, + "learning_rate": 2.43602962962963e-05, + "loss": 1.6581, + "step": 43285 + }, + { + "epoch": 7.7, + "learning_rate": 2.4357333333333335e-05, + "loss": 1.6876, + "step": 43290 + }, + { + "epoch": 7.7, + "learning_rate": 2.435437037037037e-05, + "loss": 1.6236, + "step": 43295 + }, + { + "epoch": 7.7, + "learning_rate": 2.435140740740741e-05, + "loss": 1.6006, + "step": 43300 + }, + { + "epoch": 7.7, + "learning_rate": 2.4348444444444445e-05, + "loss": 1.5795, + "step": 43305 + }, + { + "epoch": 7.7, + "learning_rate": 2.4345481481481483e-05, + "loss": 1.4963, + "step": 43310 + }, + { + "epoch": 7.7, + "learning_rate": 2.434251851851852e-05, + "loss": 1.7048, + "step": 43315 + }, + { + "epoch": 7.7, + "learning_rate": 2.4339555555555558e-05, + "loss": 1.606, + "step": 43320 + }, + { + "epoch": 7.7, + "learning_rate": 2.4336592592592593e-05, + "loss": 1.6692, + "step": 43325 + }, + { + "epoch": 7.7, + "learning_rate": 2.4333629629629632e-05, + "loss": 1.6537, + "step": 43330 + }, + { + "epoch": 7.7, + "learning_rate": 2.4330666666666667e-05, + "loss": 1.5897, + "step": 43335 + }, + { + "epoch": 7.7, + "learning_rate": 2.4327703703703706e-05, + "loss": 1.5532, + "step": 43340 + }, + { + "epoch": 7.71, + "learning_rate": 2.432474074074074e-05, + "loss": 1.7561, + "step": 43345 + }, + { + "epoch": 7.71, + "learning_rate": 2.432177777777778e-05, + "loss": 1.7181, + "step": 43350 + }, + { + "epoch": 7.71, + "learning_rate": 2.4318814814814816e-05, + "loss": 1.6277, + "step": 43355 + }, + { + "epoch": 7.71, + "learning_rate": 2.4315851851851855e-05, + "loss": 1.6867, + "step": 43360 + }, + { + "epoch": 7.71, + "learning_rate": 2.431288888888889e-05, + "loss": 1.7926, + "step": 43365 + }, + { + "epoch": 7.71, + "learning_rate": 2.430992592592593e-05, + "loss": 1.6746, + "step": 43370 + }, + { + "epoch": 7.71, + "learning_rate": 2.4306962962962964e-05, + "loss": 1.5804, + "step": 43375 + }, + { + "epoch": 7.71, + "learning_rate": 2.4304000000000003e-05, + "loss": 1.6194, + "step": 43380 + }, + { + "epoch": 7.71, + "learning_rate": 2.430103703703704e-05, + "loss": 1.6775, + "step": 43385 + }, + { + "epoch": 7.71, + "learning_rate": 2.4298074074074077e-05, + "loss": 1.6694, + "step": 43390 + }, + { + "epoch": 7.71, + "learning_rate": 2.4295111111111113e-05, + "loss": 1.628, + "step": 43395 + }, + { + "epoch": 7.72, + "learning_rate": 2.429214814814815e-05, + "loss": 1.5887, + "step": 43400 + }, + { + "epoch": 7.72, + "learning_rate": 2.4289185185185187e-05, + "loss": 1.643, + "step": 43405 + }, + { + "epoch": 7.72, + "learning_rate": 2.4286222222222222e-05, + "loss": 1.5909, + "step": 43410 + }, + { + "epoch": 7.72, + "learning_rate": 2.428325925925926e-05, + "loss": 1.5589, + "step": 43415 + }, + { + "epoch": 7.72, + "learning_rate": 2.4280296296296297e-05, + "loss": 1.6534, + "step": 43420 + }, + { + "epoch": 7.72, + "learning_rate": 2.4277333333333336e-05, + "loss": 1.6822, + "step": 43425 + }, + { + "epoch": 7.72, + "learning_rate": 2.427437037037037e-05, + "loss": 1.7091, + "step": 43430 + }, + { + "epoch": 7.72, + "learning_rate": 2.427140740740741e-05, + "loss": 1.6612, + "step": 43435 + }, + { + "epoch": 7.72, + "learning_rate": 2.4268444444444445e-05, + "loss": 1.7228, + "step": 43440 + }, + { + "epoch": 7.72, + "learning_rate": 2.4265481481481484e-05, + "loss": 1.6556, + "step": 43445 + }, + { + "epoch": 7.72, + "learning_rate": 2.426251851851852e-05, + "loss": 1.6636, + "step": 43450 + }, + { + "epoch": 7.73, + "learning_rate": 2.425955555555556e-05, + "loss": 1.5737, + "step": 43455 + }, + { + "epoch": 7.73, + "learning_rate": 2.4256592592592594e-05, + "loss": 1.648, + "step": 43460 + }, + { + "epoch": 7.73, + "learning_rate": 2.4253629629629633e-05, + "loss": 1.6036, + "step": 43465 + }, + { + "epoch": 7.73, + "learning_rate": 2.4250666666666668e-05, + "loss": 1.7201, + "step": 43470 + }, + { + "epoch": 7.73, + "learning_rate": 2.4247703703703707e-05, + "loss": 1.5684, + "step": 43475 + }, + { + "epoch": 7.73, + "learning_rate": 2.4244740740740742e-05, + "loss": 1.6336, + "step": 43480 + }, + { + "epoch": 7.73, + "learning_rate": 2.424177777777778e-05, + "loss": 1.8631, + "step": 43485 + }, + { + "epoch": 7.73, + "learning_rate": 2.4238814814814817e-05, + "loss": 1.7817, + "step": 43490 + }, + { + "epoch": 7.73, + "learning_rate": 2.4235851851851855e-05, + "loss": 1.546, + "step": 43495 + }, + { + "epoch": 7.73, + "learning_rate": 2.423288888888889e-05, + "loss": 1.513, + "step": 43500 + }, + { + "epoch": 7.73, + "learning_rate": 2.422992592592593e-05, + "loss": 1.4688, + "step": 43505 + }, + { + "epoch": 7.74, + "learning_rate": 2.4226962962962965e-05, + "loss": 1.6324, + "step": 43510 + }, + { + "epoch": 7.74, + "learning_rate": 2.4224000000000004e-05, + "loss": 1.6673, + "step": 43515 + }, + { + "epoch": 7.74, + "learning_rate": 2.422103703703704e-05, + "loss": 1.7393, + "step": 43520 + }, + { + "epoch": 7.74, + "learning_rate": 2.4218074074074075e-05, + "loss": 1.6036, + "step": 43525 + }, + { + "epoch": 7.74, + "learning_rate": 2.4215111111111114e-05, + "loss": 1.7096, + "step": 43530 + }, + { + "epoch": 7.74, + "learning_rate": 2.421214814814815e-05, + "loss": 1.5361, + "step": 43535 + }, + { + "epoch": 7.74, + "learning_rate": 2.4209185185185188e-05, + "loss": 1.605, + "step": 43540 + }, + { + "epoch": 7.74, + "learning_rate": 2.4206222222222223e-05, + "loss": 1.7471, + "step": 43545 + }, + { + "epoch": 7.74, + "learning_rate": 2.4203259259259262e-05, + "loss": 1.6791, + "step": 43550 + }, + { + "epoch": 7.74, + "learning_rate": 2.4200296296296298e-05, + "loss": 1.6465, + "step": 43555 + }, + { + "epoch": 7.74, + "learning_rate": 2.4197333333333336e-05, + "loss": 1.6986, + "step": 43560 + }, + { + "epoch": 7.74, + "learning_rate": 2.4194370370370372e-05, + "loss": 1.56, + "step": 43565 + }, + { + "epoch": 7.75, + "learning_rate": 2.419140740740741e-05, + "loss": 1.668, + "step": 43570 + }, + { + "epoch": 7.75, + "learning_rate": 2.4188444444444446e-05, + "loss": 1.5966, + "step": 43575 + }, + { + "epoch": 7.75, + "learning_rate": 2.4185481481481485e-05, + "loss": 1.665, + "step": 43580 + }, + { + "epoch": 7.75, + "learning_rate": 2.418251851851852e-05, + "loss": 1.6944, + "step": 43585 + }, + { + "epoch": 7.75, + "learning_rate": 2.417955555555556e-05, + "loss": 1.6117, + "step": 43590 + }, + { + "epoch": 7.75, + "learning_rate": 2.4176592592592595e-05, + "loss": 1.6165, + "step": 43595 + }, + { + "epoch": 7.75, + "learning_rate": 2.417362962962963e-05, + "loss": 1.717, + "step": 43600 + }, + { + "epoch": 7.75, + "learning_rate": 2.4170666666666665e-05, + "loss": 1.7191, + "step": 43605 + }, + { + "epoch": 7.75, + "learning_rate": 2.4167703703703704e-05, + "loss": 1.6871, + "step": 43610 + }, + { + "epoch": 7.75, + "learning_rate": 2.416474074074074e-05, + "loss": 1.5964, + "step": 43615 + }, + { + "epoch": 7.75, + "learning_rate": 2.416177777777778e-05, + "loss": 1.526, + "step": 43620 + }, + { + "epoch": 7.76, + "learning_rate": 2.4158814814814814e-05, + "loss": 1.6351, + "step": 43625 + }, + { + "epoch": 7.76, + "learning_rate": 2.4155851851851853e-05, + "loss": 1.68, + "step": 43630 + }, + { + "epoch": 7.76, + "learning_rate": 2.4152888888888888e-05, + "loss": 1.5641, + "step": 43635 + }, + { + "epoch": 7.76, + "learning_rate": 2.4149925925925927e-05, + "loss": 1.6968, + "step": 43640 + }, + { + "epoch": 7.76, + "learning_rate": 2.4146962962962962e-05, + "loss": 1.7593, + "step": 43645 + }, + { + "epoch": 7.76, + "learning_rate": 2.4144e-05, + "loss": 1.7301, + "step": 43650 + }, + { + "epoch": 7.76, + "learning_rate": 2.4141037037037037e-05, + "loss": 1.7554, + "step": 43655 + }, + { + "epoch": 7.76, + "learning_rate": 2.4138074074074076e-05, + "loss": 1.6871, + "step": 43660 + }, + { + "epoch": 7.76, + "learning_rate": 2.413511111111111e-05, + "loss": 1.5504, + "step": 43665 + }, + { + "epoch": 7.76, + "learning_rate": 2.413214814814815e-05, + "loss": 1.7713, + "step": 43670 + }, + { + "epoch": 7.76, + "learning_rate": 2.4129185185185185e-05, + "loss": 1.6444, + "step": 43675 + }, + { + "epoch": 7.77, + "learning_rate": 2.412622222222222e-05, + "loss": 1.6298, + "step": 43680 + }, + { + "epoch": 7.77, + "learning_rate": 2.412325925925926e-05, + "loss": 1.6716, + "step": 43685 + }, + { + "epoch": 7.77, + "learning_rate": 2.4120296296296295e-05, + "loss": 1.698, + "step": 43690 + }, + { + "epoch": 7.77, + "learning_rate": 2.4117333333333334e-05, + "loss": 1.6255, + "step": 43695 + }, + { + "epoch": 7.77, + "learning_rate": 2.411437037037037e-05, + "loss": 1.6152, + "step": 43700 + }, + { + "epoch": 7.77, + "learning_rate": 2.4111407407407408e-05, + "loss": 1.6757, + "step": 43705 + }, + { + "epoch": 7.77, + "learning_rate": 2.4108444444444443e-05, + "loss": 1.6195, + "step": 43710 + }, + { + "epoch": 7.77, + "learning_rate": 2.4105481481481482e-05, + "loss": 1.5766, + "step": 43715 + }, + { + "epoch": 7.77, + "learning_rate": 2.4102518518518518e-05, + "loss": 1.7298, + "step": 43720 + }, + { + "epoch": 7.77, + "learning_rate": 2.4099555555555556e-05, + "loss": 1.6454, + "step": 43725 + }, + { + "epoch": 7.77, + "learning_rate": 2.4096592592592592e-05, + "loss": 1.6131, + "step": 43730 + }, + { + "epoch": 7.78, + "learning_rate": 2.409362962962963e-05, + "loss": 1.7367, + "step": 43735 + }, + { + "epoch": 7.78, + "learning_rate": 2.4090666666666666e-05, + "loss": 1.6687, + "step": 43740 + }, + { + "epoch": 7.78, + "learning_rate": 2.4087703703703705e-05, + "loss": 1.6111, + "step": 43745 + }, + { + "epoch": 7.78, + "learning_rate": 2.408474074074074e-05, + "loss": 1.5982, + "step": 43750 + }, + { + "epoch": 7.78, + "learning_rate": 2.408177777777778e-05, + "loss": 1.5327, + "step": 43755 + }, + { + "epoch": 7.78, + "learning_rate": 2.4078814814814815e-05, + "loss": 1.5602, + "step": 43760 + }, + { + "epoch": 7.78, + "learning_rate": 2.4075851851851854e-05, + "loss": 1.5002, + "step": 43765 + }, + { + "epoch": 7.78, + "learning_rate": 2.407288888888889e-05, + "loss": 1.5914, + "step": 43770 + }, + { + "epoch": 7.78, + "learning_rate": 2.4069925925925928e-05, + "loss": 1.6102, + "step": 43775 + }, + { + "epoch": 7.78, + "learning_rate": 2.4066962962962963e-05, + "loss": 1.6541, + "step": 43780 + }, + { + "epoch": 7.78, + "learning_rate": 2.4064000000000002e-05, + "loss": 1.4847, + "step": 43785 + }, + { + "epoch": 7.78, + "learning_rate": 2.4061037037037037e-05, + "loss": 1.7369, + "step": 43790 + }, + { + "epoch": 7.79, + "learning_rate": 2.4058074074074073e-05, + "loss": 1.5941, + "step": 43795 + }, + { + "epoch": 7.79, + "learning_rate": 2.4055111111111112e-05, + "loss": 1.5949, + "step": 43800 + }, + { + "epoch": 7.79, + "learning_rate": 2.4052148148148147e-05, + "loss": 1.7215, + "step": 43805 + }, + { + "epoch": 7.79, + "learning_rate": 2.4049185185185186e-05, + "loss": 1.5552, + "step": 43810 + }, + { + "epoch": 7.79, + "learning_rate": 2.404622222222222e-05, + "loss": 1.7436, + "step": 43815 + }, + { + "epoch": 7.79, + "learning_rate": 2.404325925925926e-05, + "loss": 1.6751, + "step": 43820 + }, + { + "epoch": 7.79, + "learning_rate": 2.4040296296296296e-05, + "loss": 1.7211, + "step": 43825 + }, + { + "epoch": 7.79, + "learning_rate": 2.4037333333333334e-05, + "loss": 1.6741, + "step": 43830 + }, + { + "epoch": 7.79, + "learning_rate": 2.403437037037037e-05, + "loss": 1.7234, + "step": 43835 + }, + { + "epoch": 7.79, + "learning_rate": 2.403140740740741e-05, + "loss": 1.7538, + "step": 43840 + }, + { + "epoch": 7.79, + "learning_rate": 2.4028444444444444e-05, + "loss": 1.5806, + "step": 43845 + }, + { + "epoch": 7.8, + "learning_rate": 2.4025481481481483e-05, + "loss": 1.6546, + "step": 43850 + }, + { + "epoch": 7.8, + "learning_rate": 2.402251851851852e-05, + "loss": 1.7191, + "step": 43855 + }, + { + "epoch": 7.8, + "learning_rate": 2.4019555555555557e-05, + "loss": 1.5458, + "step": 43860 + }, + { + "epoch": 7.8, + "learning_rate": 2.4016592592592593e-05, + "loss": 1.7858, + "step": 43865 + }, + { + "epoch": 7.8, + "learning_rate": 2.401362962962963e-05, + "loss": 1.6788, + "step": 43870 + }, + { + "epoch": 7.8, + "learning_rate": 2.4010666666666667e-05, + "loss": 1.593, + "step": 43875 + }, + { + "epoch": 7.8, + "learning_rate": 2.4007703703703706e-05, + "loss": 1.7663, + "step": 43880 + }, + { + "epoch": 7.8, + "learning_rate": 2.400474074074074e-05, + "loss": 1.5753, + "step": 43885 + }, + { + "epoch": 7.8, + "learning_rate": 2.400177777777778e-05, + "loss": 1.6321, + "step": 43890 + }, + { + "epoch": 7.8, + "learning_rate": 2.3998814814814815e-05, + "loss": 1.6525, + "step": 43895 + }, + { + "epoch": 7.8, + "learning_rate": 2.3995851851851854e-05, + "loss": 1.6741, + "step": 43900 + }, + { + "epoch": 7.81, + "learning_rate": 2.399288888888889e-05, + "loss": 1.6693, + "step": 43905 + }, + { + "epoch": 7.81, + "learning_rate": 2.3989925925925925e-05, + "loss": 1.587, + "step": 43910 + }, + { + "epoch": 7.81, + "learning_rate": 2.3986962962962964e-05, + "loss": 1.6631, + "step": 43915 + }, + { + "epoch": 7.81, + "learning_rate": 2.3984e-05, + "loss": 1.7355, + "step": 43920 + }, + { + "epoch": 7.81, + "learning_rate": 2.3981037037037038e-05, + "loss": 1.6301, + "step": 43925 + }, + { + "epoch": 7.81, + "learning_rate": 2.3978074074074074e-05, + "loss": 1.6278, + "step": 43930 + }, + { + "epoch": 7.81, + "learning_rate": 2.3975111111111112e-05, + "loss": 1.5621, + "step": 43935 + }, + { + "epoch": 7.81, + "learning_rate": 2.3972148148148148e-05, + "loss": 1.7544, + "step": 43940 + }, + { + "epoch": 7.81, + "learning_rate": 2.3969185185185187e-05, + "loss": 1.5575, + "step": 43945 + }, + { + "epoch": 7.81, + "learning_rate": 2.3966222222222222e-05, + "loss": 1.5192, + "step": 43950 + }, + { + "epoch": 7.81, + "learning_rate": 2.396325925925926e-05, + "loss": 1.7646, + "step": 43955 + }, + { + "epoch": 7.82, + "learning_rate": 2.3960296296296296e-05, + "loss": 1.7212, + "step": 43960 + }, + { + "epoch": 7.82, + "learning_rate": 2.3957333333333335e-05, + "loss": 1.6293, + "step": 43965 + }, + { + "epoch": 7.82, + "learning_rate": 2.395437037037037e-05, + "loss": 1.7256, + "step": 43970 + }, + { + "epoch": 7.82, + "learning_rate": 2.395140740740741e-05, + "loss": 1.6934, + "step": 43975 + }, + { + "epoch": 7.82, + "learning_rate": 2.3948444444444445e-05, + "loss": 1.5999, + "step": 43980 + }, + { + "epoch": 7.82, + "learning_rate": 2.3945481481481484e-05, + "loss": 1.7327, + "step": 43985 + }, + { + "epoch": 7.82, + "learning_rate": 2.394251851851852e-05, + "loss": 1.6156, + "step": 43990 + }, + { + "epoch": 7.82, + "learning_rate": 2.3939555555555558e-05, + "loss": 1.6161, + "step": 43995 + }, + { + "epoch": 7.82, + "learning_rate": 2.3936592592592593e-05, + "loss": 1.6907, + "step": 44000 + }, + { + "epoch": 7.82, + "learning_rate": 2.3933629629629632e-05, + "loss": 1.6608, + "step": 44005 + }, + { + "epoch": 7.82, + "learning_rate": 2.3930666666666668e-05, + "loss": 1.6311, + "step": 44010 + }, + { + "epoch": 7.82, + "learning_rate": 2.3927703703703707e-05, + "loss": 1.5601, + "step": 44015 + }, + { + "epoch": 7.83, + "learning_rate": 2.3924740740740742e-05, + "loss": 1.6394, + "step": 44020 + }, + { + "epoch": 7.83, + "learning_rate": 2.3921777777777777e-05, + "loss": 1.7379, + "step": 44025 + }, + { + "epoch": 7.83, + "learning_rate": 2.3918814814814816e-05, + "loss": 1.7504, + "step": 44030 + }, + { + "epoch": 7.83, + "learning_rate": 2.391585185185185e-05, + "loss": 1.6393, + "step": 44035 + }, + { + "epoch": 7.83, + "learning_rate": 2.391288888888889e-05, + "loss": 1.6309, + "step": 44040 + }, + { + "epoch": 7.83, + "learning_rate": 2.3909925925925926e-05, + "loss": 1.78, + "step": 44045 + }, + { + "epoch": 7.83, + "learning_rate": 2.3906962962962965e-05, + "loss": 1.6029, + "step": 44050 + }, + { + "epoch": 7.83, + "learning_rate": 2.3904e-05, + "loss": 1.6456, + "step": 44055 + }, + { + "epoch": 7.83, + "learning_rate": 2.390103703703704e-05, + "loss": 1.6269, + "step": 44060 + }, + { + "epoch": 7.83, + "learning_rate": 2.3898074074074074e-05, + "loss": 1.6041, + "step": 44065 + }, + { + "epoch": 7.83, + "learning_rate": 2.3895111111111113e-05, + "loss": 1.554, + "step": 44070 + }, + { + "epoch": 7.84, + "learning_rate": 2.389214814814815e-05, + "loss": 1.6295, + "step": 44075 + }, + { + "epoch": 7.84, + "learning_rate": 2.3889185185185188e-05, + "loss": 1.5618, + "step": 44080 + }, + { + "epoch": 7.84, + "learning_rate": 2.3886222222222223e-05, + "loss": 1.5222, + "step": 44085 + }, + { + "epoch": 7.84, + "learning_rate": 2.3883259259259262e-05, + "loss": 1.6033, + "step": 44090 + }, + { + "epoch": 7.84, + "learning_rate": 2.3880296296296297e-05, + "loss": 1.6292, + "step": 44095 + }, + { + "epoch": 7.84, + "learning_rate": 2.3877333333333336e-05, + "loss": 1.713, + "step": 44100 + }, + { + "epoch": 7.84, + "learning_rate": 2.387437037037037e-05, + "loss": 1.643, + "step": 44105 + }, + { + "epoch": 7.84, + "learning_rate": 2.387140740740741e-05, + "loss": 1.5765, + "step": 44110 + }, + { + "epoch": 7.84, + "learning_rate": 2.3868444444444446e-05, + "loss": 1.6238, + "step": 44115 + }, + { + "epoch": 7.84, + "learning_rate": 2.3865481481481485e-05, + "loss": 1.52, + "step": 44120 + }, + { + "epoch": 7.84, + "learning_rate": 2.386251851851852e-05, + "loss": 1.5639, + "step": 44125 + }, + { + "epoch": 7.85, + "learning_rate": 2.385955555555556e-05, + "loss": 1.5887, + "step": 44130 + }, + { + "epoch": 7.85, + "learning_rate": 2.3856592592592594e-05, + "loss": 1.618, + "step": 44135 + }, + { + "epoch": 7.85, + "learning_rate": 2.385362962962963e-05, + "loss": 1.7144, + "step": 44140 + }, + { + "epoch": 7.85, + "learning_rate": 2.385066666666667e-05, + "loss": 1.6376, + "step": 44145 + }, + { + "epoch": 7.85, + "learning_rate": 2.3847703703703704e-05, + "loss": 1.7256, + "step": 44150 + }, + { + "epoch": 7.85, + "learning_rate": 2.3844740740740743e-05, + "loss": 1.6934, + "step": 44155 + }, + { + "epoch": 7.85, + "learning_rate": 2.3841777777777778e-05, + "loss": 1.5969, + "step": 44160 + }, + { + "epoch": 7.85, + "learning_rate": 2.3838814814814817e-05, + "loss": 1.7101, + "step": 44165 + }, + { + "epoch": 7.85, + "learning_rate": 2.3835851851851852e-05, + "loss": 1.6164, + "step": 44170 + }, + { + "epoch": 7.85, + "learning_rate": 2.383288888888889e-05, + "loss": 1.6782, + "step": 44175 + }, + { + "epoch": 7.85, + "learning_rate": 2.3829925925925927e-05, + "loss": 1.7189, + "step": 44180 + }, + { + "epoch": 7.86, + "learning_rate": 2.3826962962962966e-05, + "loss": 1.6555, + "step": 44185 + }, + { + "epoch": 7.86, + "learning_rate": 2.3824e-05, + "loss": 1.678, + "step": 44190 + }, + { + "epoch": 7.86, + "learning_rate": 2.382103703703704e-05, + "loss": 1.7067, + "step": 44195 + }, + { + "epoch": 7.86, + "learning_rate": 2.3818074074074075e-05, + "loss": 1.7777, + "step": 44200 + }, + { + "epoch": 7.86, + "learning_rate": 2.3815111111111114e-05, + "loss": 1.6683, + "step": 44205 + }, + { + "epoch": 7.86, + "learning_rate": 2.381214814814815e-05, + "loss": 1.6967, + "step": 44210 + }, + { + "epoch": 7.86, + "learning_rate": 2.3809185185185188e-05, + "loss": 1.6287, + "step": 44215 + }, + { + "epoch": 7.86, + "learning_rate": 2.3806222222222224e-05, + "loss": 1.5928, + "step": 44220 + }, + { + "epoch": 7.86, + "learning_rate": 2.3803259259259263e-05, + "loss": 1.7359, + "step": 44225 + }, + { + "epoch": 7.86, + "learning_rate": 2.3800296296296298e-05, + "loss": 1.678, + "step": 44230 + }, + { + "epoch": 7.86, + "learning_rate": 2.3797333333333337e-05, + "loss": 1.477, + "step": 44235 + }, + { + "epoch": 7.86, + "learning_rate": 2.3794370370370372e-05, + "loss": 1.7596, + "step": 44240 + }, + { + "epoch": 7.87, + "learning_rate": 2.379140740740741e-05, + "loss": 1.6922, + "step": 44245 + }, + { + "epoch": 7.87, + "learning_rate": 2.3788444444444446e-05, + "loss": 1.7037, + "step": 44250 + }, + { + "epoch": 7.87, + "learning_rate": 2.3785481481481482e-05, + "loss": 1.4522, + "step": 44255 + }, + { + "epoch": 7.87, + "learning_rate": 2.378251851851852e-05, + "loss": 1.7415, + "step": 44260 + }, + { + "epoch": 7.87, + "learning_rate": 2.3779555555555556e-05, + "loss": 1.6058, + "step": 44265 + }, + { + "epoch": 7.87, + "learning_rate": 2.3776592592592595e-05, + "loss": 1.6538, + "step": 44270 + }, + { + "epoch": 7.87, + "learning_rate": 2.377362962962963e-05, + "loss": 1.6966, + "step": 44275 + }, + { + "epoch": 7.87, + "learning_rate": 2.377066666666667e-05, + "loss": 1.7313, + "step": 44280 + }, + { + "epoch": 7.87, + "learning_rate": 2.3767703703703705e-05, + "loss": 1.8092, + "step": 44285 + }, + { + "epoch": 7.87, + "learning_rate": 2.3764740740740744e-05, + "loss": 1.7834, + "step": 44290 + }, + { + "epoch": 7.87, + "learning_rate": 2.376177777777778e-05, + "loss": 1.5814, + "step": 44295 + }, + { + "epoch": 7.88, + "learning_rate": 2.3758814814814818e-05, + "loss": 1.6047, + "step": 44300 + }, + { + "epoch": 7.88, + "learning_rate": 2.3755851851851853e-05, + "loss": 1.5985, + "step": 44305 + }, + { + "epoch": 7.88, + "learning_rate": 2.3752888888888892e-05, + "loss": 1.4982, + "step": 44310 + }, + { + "epoch": 7.88, + "learning_rate": 2.3749925925925927e-05, + "loss": 1.6979, + "step": 44315 + }, + { + "epoch": 7.88, + "learning_rate": 2.3746962962962966e-05, + "loss": 1.6143, + "step": 44320 + }, + { + "epoch": 7.88, + "learning_rate": 2.3744000000000002e-05, + "loss": 1.6739, + "step": 44325 + }, + { + "epoch": 7.88, + "learning_rate": 2.374103703703704e-05, + "loss": 1.684, + "step": 44330 + }, + { + "epoch": 7.88, + "learning_rate": 2.3738074074074076e-05, + "loss": 1.6024, + "step": 44335 + }, + { + "epoch": 7.88, + "learning_rate": 2.3735111111111115e-05, + "loss": 1.6681, + "step": 44340 + }, + { + "epoch": 7.88, + "learning_rate": 2.373214814814815e-05, + "loss": 1.7536, + "step": 44345 + }, + { + "epoch": 7.88, + "learning_rate": 2.372918518518519e-05, + "loss": 1.6802, + "step": 44350 + }, + { + "epoch": 7.89, + "learning_rate": 2.3726222222222224e-05, + "loss": 1.6843, + "step": 44355 + }, + { + "epoch": 7.89, + "learning_rate": 2.3723259259259263e-05, + "loss": 1.6141, + "step": 44360 + }, + { + "epoch": 7.89, + "learning_rate": 2.37202962962963e-05, + "loss": 1.6366, + "step": 44365 + }, + { + "epoch": 7.89, + "learning_rate": 2.3717333333333334e-05, + "loss": 1.6647, + "step": 44370 + }, + { + "epoch": 7.89, + "learning_rate": 2.371437037037037e-05, + "loss": 1.5831, + "step": 44375 + }, + { + "epoch": 7.89, + "learning_rate": 2.371140740740741e-05, + "loss": 1.7328, + "step": 44380 + }, + { + "epoch": 7.89, + "learning_rate": 2.3708444444444444e-05, + "loss": 1.694, + "step": 44385 + }, + { + "epoch": 7.89, + "learning_rate": 2.3705481481481483e-05, + "loss": 1.7438, + "step": 44390 + }, + { + "epoch": 7.89, + "learning_rate": 2.3702518518518518e-05, + "loss": 1.5709, + "step": 44395 + }, + { + "epoch": 7.89, + "learning_rate": 2.3699555555555557e-05, + "loss": 1.7062, + "step": 44400 + }, + { + "epoch": 7.89, + "learning_rate": 2.3696592592592592e-05, + "loss": 1.6691, + "step": 44405 + }, + { + "epoch": 7.9, + "learning_rate": 2.369362962962963e-05, + "loss": 1.6627, + "step": 44410 + }, + { + "epoch": 7.9, + "learning_rate": 2.3690666666666667e-05, + "loss": 1.5864, + "step": 44415 + }, + { + "epoch": 7.9, + "learning_rate": 2.3687703703703702e-05, + "loss": 1.7458, + "step": 44420 + }, + { + "epoch": 7.9, + "learning_rate": 2.368474074074074e-05, + "loss": 1.6351, + "step": 44425 + }, + { + "epoch": 7.9, + "learning_rate": 2.3681777777777776e-05, + "loss": 1.7145, + "step": 44430 + }, + { + "epoch": 7.9, + "learning_rate": 2.3678814814814815e-05, + "loss": 1.7232, + "step": 44435 + }, + { + "epoch": 7.9, + "learning_rate": 2.367585185185185e-05, + "loss": 1.6018, + "step": 44440 + }, + { + "epoch": 7.9, + "learning_rate": 2.367288888888889e-05, + "loss": 1.7669, + "step": 44445 + }, + { + "epoch": 7.9, + "learning_rate": 2.3669925925925925e-05, + "loss": 1.8388, + "step": 44450 + }, + { + "epoch": 7.9, + "learning_rate": 2.3666962962962964e-05, + "loss": 1.6238, + "step": 44455 + }, + { + "epoch": 7.9, + "learning_rate": 2.3664e-05, + "loss": 1.5801, + "step": 44460 + }, + { + "epoch": 7.9, + "learning_rate": 2.3661037037037038e-05, + "loss": 1.7535, + "step": 44465 + }, + { + "epoch": 7.91, + "learning_rate": 2.3658074074074073e-05, + "loss": 1.7648, + "step": 44470 + }, + { + "epoch": 7.91, + "learning_rate": 2.3655111111111112e-05, + "loss": 1.6927, + "step": 44475 + }, + { + "epoch": 7.91, + "learning_rate": 2.3652148148148148e-05, + "loss": 1.6763, + "step": 44480 + }, + { + "epoch": 7.91, + "learning_rate": 2.3649185185185186e-05, + "loss": 1.6979, + "step": 44485 + }, + { + "epoch": 7.91, + "learning_rate": 2.3646222222222222e-05, + "loss": 1.5611, + "step": 44490 + }, + { + "epoch": 7.91, + "learning_rate": 2.364325925925926e-05, + "loss": 1.6492, + "step": 44495 + }, + { + "epoch": 7.91, + "learning_rate": 2.3640296296296296e-05, + "loss": 1.6176, + "step": 44500 + }, + { + "epoch": 7.91, + "learning_rate": 2.3637333333333335e-05, + "loss": 1.7075, + "step": 44505 + }, + { + "epoch": 7.91, + "learning_rate": 2.363437037037037e-05, + "loss": 1.6913, + "step": 44510 + }, + { + "epoch": 7.91, + "learning_rate": 2.363140740740741e-05, + "loss": 1.687, + "step": 44515 + }, + { + "epoch": 7.91, + "learning_rate": 2.3628444444444445e-05, + "loss": 1.6182, + "step": 44520 + }, + { + "epoch": 7.92, + "learning_rate": 2.3625481481481483e-05, + "loss": 1.5335, + "step": 44525 + }, + { + "epoch": 7.92, + "learning_rate": 2.362251851851852e-05, + "loss": 1.4918, + "step": 44530 + }, + { + "epoch": 7.92, + "learning_rate": 2.3619555555555554e-05, + "loss": 1.682, + "step": 44535 + }, + { + "epoch": 7.92, + "learning_rate": 2.3616592592592593e-05, + "loss": 1.7019, + "step": 44540 + }, + { + "epoch": 7.92, + "learning_rate": 2.361362962962963e-05, + "loss": 1.7195, + "step": 44545 + }, + { + "epoch": 7.92, + "learning_rate": 2.3610666666666667e-05, + "loss": 1.5996, + "step": 44550 + }, + { + "epoch": 7.92, + "learning_rate": 2.3607703703703703e-05, + "loss": 1.5595, + "step": 44555 + }, + { + "epoch": 7.92, + "learning_rate": 2.360474074074074e-05, + "loss": 1.6246, + "step": 44560 + }, + { + "epoch": 7.92, + "learning_rate": 2.3601777777777777e-05, + "loss": 1.5444, + "step": 44565 + }, + { + "epoch": 7.92, + "learning_rate": 2.3598814814814816e-05, + "loss": 1.6714, + "step": 44570 + }, + { + "epoch": 7.92, + "learning_rate": 2.359585185185185e-05, + "loss": 1.6278, + "step": 44575 + }, + { + "epoch": 7.93, + "learning_rate": 2.359288888888889e-05, + "loss": 1.6185, + "step": 44580 + }, + { + "epoch": 7.93, + "learning_rate": 2.3589925925925926e-05, + "loss": 1.6117, + "step": 44585 + }, + { + "epoch": 7.93, + "learning_rate": 2.3586962962962964e-05, + "loss": 1.6647, + "step": 44590 + }, + { + "epoch": 7.93, + "learning_rate": 2.3584e-05, + "loss": 1.6413, + "step": 44595 + }, + { + "epoch": 7.93, + "learning_rate": 2.358103703703704e-05, + "loss": 1.7219, + "step": 44600 + }, + { + "epoch": 7.93, + "learning_rate": 2.3578074074074074e-05, + "loss": 1.5731, + "step": 44605 + }, + { + "epoch": 7.93, + "learning_rate": 2.3575111111111113e-05, + "loss": 1.6878, + "step": 44610 + }, + { + "epoch": 7.93, + "learning_rate": 2.357214814814815e-05, + "loss": 1.579, + "step": 44615 + }, + { + "epoch": 7.93, + "learning_rate": 2.3569185185185187e-05, + "loss": 1.5995, + "step": 44620 + }, + { + "epoch": 7.93, + "learning_rate": 2.3566222222222223e-05, + "loss": 1.6012, + "step": 44625 + }, + { + "epoch": 7.93, + "learning_rate": 2.356325925925926e-05, + "loss": 1.6608, + "step": 44630 + }, + { + "epoch": 7.94, + "learning_rate": 2.3560296296296297e-05, + "loss": 1.482, + "step": 44635 + }, + { + "epoch": 7.94, + "learning_rate": 2.3557333333333336e-05, + "loss": 1.5583, + "step": 44640 + }, + { + "epoch": 7.94, + "learning_rate": 2.355437037037037e-05, + "loss": 1.7036, + "step": 44645 + }, + { + "epoch": 7.94, + "learning_rate": 2.3551407407407407e-05, + "loss": 1.6458, + "step": 44650 + }, + { + "epoch": 7.94, + "learning_rate": 2.3548444444444445e-05, + "loss": 1.6258, + "step": 44655 + }, + { + "epoch": 7.94, + "learning_rate": 2.354548148148148e-05, + "loss": 1.6745, + "step": 44660 + }, + { + "epoch": 7.94, + "learning_rate": 2.354251851851852e-05, + "loss": 1.6146, + "step": 44665 + }, + { + "epoch": 7.94, + "learning_rate": 2.3539555555555555e-05, + "loss": 1.7411, + "step": 44670 + }, + { + "epoch": 7.94, + "learning_rate": 2.3536592592592594e-05, + "loss": 1.7513, + "step": 44675 + }, + { + "epoch": 7.94, + "learning_rate": 2.353362962962963e-05, + "loss": 1.6785, + "step": 44680 + }, + { + "epoch": 7.94, + "learning_rate": 2.3530666666666668e-05, + "loss": 1.653, + "step": 44685 + }, + { + "epoch": 7.94, + "learning_rate": 2.3527703703703704e-05, + "loss": 1.6472, + "step": 44690 + }, + { + "epoch": 7.95, + "learning_rate": 2.3524740740740742e-05, + "loss": 1.6399, + "step": 44695 + }, + { + "epoch": 7.95, + "learning_rate": 2.3521777777777778e-05, + "loss": 1.4768, + "step": 44700 + }, + { + "epoch": 7.95, + "learning_rate": 2.3518814814814817e-05, + "loss": 1.7246, + "step": 44705 + }, + { + "epoch": 7.95, + "learning_rate": 2.3515851851851852e-05, + "loss": 1.6618, + "step": 44710 + }, + { + "epoch": 7.95, + "learning_rate": 2.351288888888889e-05, + "loss": 1.5396, + "step": 44715 + }, + { + "epoch": 7.95, + "learning_rate": 2.3509925925925926e-05, + "loss": 1.6135, + "step": 44720 + }, + { + "epoch": 7.95, + "learning_rate": 2.3506962962962965e-05, + "loss": 1.6407, + "step": 44725 + }, + { + "epoch": 7.95, + "learning_rate": 2.3504e-05, + "loss": 1.7406, + "step": 44730 + }, + { + "epoch": 7.95, + "learning_rate": 2.350103703703704e-05, + "loss": 1.7026, + "step": 44735 + }, + { + "epoch": 7.95, + "learning_rate": 2.3498074074074075e-05, + "loss": 1.643, + "step": 44740 + }, + { + "epoch": 7.95, + "learning_rate": 2.3495111111111114e-05, + "loss": 1.5974, + "step": 44745 + }, + { + "epoch": 7.96, + "learning_rate": 2.349214814814815e-05, + "loss": 1.6868, + "step": 44750 + }, + { + "epoch": 7.96, + "learning_rate": 2.3489185185185188e-05, + "loss": 1.8256, + "step": 44755 + }, + { + "epoch": 7.96, + "learning_rate": 2.3486222222222223e-05, + "loss": 1.6032, + "step": 44760 + }, + { + "epoch": 7.96, + "learning_rate": 2.348325925925926e-05, + "loss": 1.639, + "step": 44765 + }, + { + "epoch": 7.96, + "learning_rate": 2.3480296296296298e-05, + "loss": 1.745, + "step": 44770 + }, + { + "epoch": 7.96, + "learning_rate": 2.3477333333333333e-05, + "loss": 1.6011, + "step": 44775 + }, + { + "epoch": 7.96, + "learning_rate": 2.3474370370370372e-05, + "loss": 1.6498, + "step": 44780 + }, + { + "epoch": 7.96, + "learning_rate": 2.3471407407407407e-05, + "loss": 1.6198, + "step": 44785 + }, + { + "epoch": 7.96, + "learning_rate": 2.3468444444444446e-05, + "loss": 1.6809, + "step": 44790 + }, + { + "epoch": 7.96, + "learning_rate": 2.346548148148148e-05, + "loss": 1.6093, + "step": 44795 + }, + { + "epoch": 7.96, + "learning_rate": 2.346251851851852e-05, + "loss": 1.6004, + "step": 44800 + }, + { + "epoch": 7.97, + "learning_rate": 2.3459555555555556e-05, + "loss": 1.7816, + "step": 44805 + }, + { + "epoch": 7.97, + "learning_rate": 2.3456592592592595e-05, + "loss": 1.7337, + "step": 44810 + }, + { + "epoch": 7.97, + "learning_rate": 2.345362962962963e-05, + "loss": 1.531, + "step": 44815 + }, + { + "epoch": 7.97, + "learning_rate": 2.345066666666667e-05, + "loss": 1.5797, + "step": 44820 + }, + { + "epoch": 7.97, + "learning_rate": 2.3447703703703704e-05, + "loss": 1.6766, + "step": 44825 + }, + { + "epoch": 7.97, + "learning_rate": 2.3444740740740743e-05, + "loss": 1.642, + "step": 44830 + }, + { + "epoch": 7.97, + "learning_rate": 2.344177777777778e-05, + "loss": 1.5744, + "step": 44835 + }, + { + "epoch": 7.97, + "learning_rate": 2.3438814814814817e-05, + "loss": 1.662, + "step": 44840 + }, + { + "epoch": 7.97, + "learning_rate": 2.3435851851851853e-05, + "loss": 1.6339, + "step": 44845 + }, + { + "epoch": 7.97, + "learning_rate": 2.343288888888889e-05, + "loss": 1.6259, + "step": 44850 + }, + { + "epoch": 7.97, + "learning_rate": 2.3429925925925927e-05, + "loss": 1.7587, + "step": 44855 + }, + { + "epoch": 7.98, + "learning_rate": 2.3426962962962966e-05, + "loss": 1.6832, + "step": 44860 + }, + { + "epoch": 7.98, + "learning_rate": 2.3424e-05, + "loss": 1.592, + "step": 44865 + }, + { + "epoch": 7.98, + "learning_rate": 2.342103703703704e-05, + "loss": 1.5972, + "step": 44870 + }, + { + "epoch": 7.98, + "learning_rate": 2.3418074074074076e-05, + "loss": 1.6299, + "step": 44875 + }, + { + "epoch": 7.98, + "learning_rate": 2.341511111111111e-05, + "loss": 1.5961, + "step": 44880 + }, + { + "epoch": 7.98, + "learning_rate": 2.341214814814815e-05, + "loss": 1.6918, + "step": 44885 + }, + { + "epoch": 7.98, + "learning_rate": 2.3409185185185185e-05, + "loss": 1.7329, + "step": 44890 + }, + { + "epoch": 7.98, + "learning_rate": 2.3406222222222224e-05, + "loss": 1.6431, + "step": 44895 + }, + { + "epoch": 7.98, + "learning_rate": 2.340325925925926e-05, + "loss": 1.5081, + "step": 44900 + }, + { + "epoch": 7.98, + "learning_rate": 2.34002962962963e-05, + "loss": 1.6562, + "step": 44905 + }, + { + "epoch": 7.98, + "learning_rate": 2.3397333333333334e-05, + "loss": 1.6081, + "step": 44910 + }, + { + "epoch": 7.98, + "learning_rate": 2.3394370370370373e-05, + "loss": 1.5982, + "step": 44915 + }, + { + "epoch": 7.99, + "learning_rate": 2.3391407407407408e-05, + "loss": 1.5854, + "step": 44920 + }, + { + "epoch": 7.99, + "learning_rate": 2.3388444444444447e-05, + "loss": 1.7278, + "step": 44925 + }, + { + "epoch": 7.99, + "learning_rate": 2.3385481481481482e-05, + "loss": 1.7261, + "step": 44930 + }, + { + "epoch": 7.99, + "learning_rate": 2.338251851851852e-05, + "loss": 1.6154, + "step": 44935 + }, + { + "epoch": 7.99, + "learning_rate": 2.3379555555555557e-05, + "loss": 1.6804, + "step": 44940 + }, + { + "epoch": 7.99, + "learning_rate": 2.3376592592592595e-05, + "loss": 1.7905, + "step": 44945 + }, + { + "epoch": 7.99, + "learning_rate": 2.337362962962963e-05, + "loss": 1.6661, + "step": 44950 + }, + { + "epoch": 7.99, + "learning_rate": 2.337066666666667e-05, + "loss": 1.594, + "step": 44955 + }, + { + "epoch": 7.99, + "learning_rate": 2.3367703703703705e-05, + "loss": 1.5908, + "step": 44960 + }, + { + "epoch": 7.99, + "learning_rate": 2.3364740740740744e-05, + "loss": 1.6451, + "step": 44965 + }, + { + "epoch": 7.99, + "learning_rate": 2.336177777777778e-05, + "loss": 1.6804, + "step": 44970 + }, + { + "epoch": 8.0, + "learning_rate": 2.3358814814814818e-05, + "loss": 1.6909, + "step": 44975 + }, + { + "epoch": 8.0, + "learning_rate": 2.3355851851851854e-05, + "loss": 1.7838, + "step": 44980 + }, + { + "epoch": 8.0, + "learning_rate": 2.3352888888888892e-05, + "loss": 1.7537, + "step": 44985 + }, + { + "epoch": 8.0, + "learning_rate": 2.3349925925925928e-05, + "loss": 1.7058, + "step": 44990 + }, + { + "epoch": 8.0, + "learning_rate": 2.3346962962962963e-05, + "loss": 1.6626, + "step": 44995 + }, + { + "epoch": 8.0, + "learning_rate": 2.3344000000000002e-05, + "loss": 1.6149, + "step": 45000 + }, + { + "epoch": 8.0, + "learning_rate": 2.3341037037037038e-05, + "loss": 1.5319, + "step": 45005 + }, + { + "epoch": 8.0, + "learning_rate": 2.3338074074074076e-05, + "loss": 1.5372, + "step": 45010 + }, + { + "epoch": 8.0, + "learning_rate": 2.3335111111111112e-05, + "loss": 1.5558, + "step": 45015 + }, + { + "epoch": 8.0, + "learning_rate": 2.333214814814815e-05, + "loss": 1.4722, + "step": 45020 + }, + { + "epoch": 8.0, + "learning_rate": 2.3329185185185186e-05, + "loss": 1.6423, + "step": 45025 + }, + { + "epoch": 8.01, + "learning_rate": 2.3326222222222225e-05, + "loss": 1.5207, + "step": 45030 + }, + { + "epoch": 8.01, + "learning_rate": 2.332325925925926e-05, + "loss": 1.4872, + "step": 45035 + }, + { + "epoch": 8.01, + "learning_rate": 2.33202962962963e-05, + "loss": 1.5813, + "step": 45040 + }, + { + "epoch": 8.01, + "learning_rate": 2.3317333333333335e-05, + "loss": 1.6228, + "step": 45045 + }, + { + "epoch": 8.01, + "learning_rate": 2.3314370370370373e-05, + "loss": 1.4591, + "step": 45050 + }, + { + "epoch": 8.01, + "learning_rate": 2.331140740740741e-05, + "loss": 1.5458, + "step": 45055 + }, + { + "epoch": 8.01, + "learning_rate": 2.3308444444444448e-05, + "loss": 1.6699, + "step": 45060 + }, + { + "epoch": 8.01, + "learning_rate": 2.3305481481481483e-05, + "loss": 1.5123, + "step": 45065 + }, + { + "epoch": 8.01, + "learning_rate": 2.3302518518518522e-05, + "loss": 1.5467, + "step": 45070 + }, + { + "epoch": 8.01, + "learning_rate": 2.3299555555555557e-05, + "loss": 1.5179, + "step": 45075 + }, + { + "epoch": 8.01, + "learning_rate": 2.3296592592592596e-05, + "loss": 1.6134, + "step": 45080 + }, + { + "epoch": 8.02, + "learning_rate": 2.329362962962963e-05, + "loss": 1.5591, + "step": 45085 + }, + { + "epoch": 8.02, + "learning_rate": 2.329066666666667e-05, + "loss": 1.5257, + "step": 45090 + }, + { + "epoch": 8.02, + "learning_rate": 2.3287703703703706e-05, + "loss": 1.4926, + "step": 45095 + }, + { + "epoch": 8.02, + "learning_rate": 2.3284740740740745e-05, + "loss": 1.6152, + "step": 45100 + }, + { + "epoch": 8.02, + "learning_rate": 2.328177777777778e-05, + "loss": 1.5089, + "step": 45105 + }, + { + "epoch": 8.02, + "learning_rate": 2.3278814814814816e-05, + "loss": 1.6092, + "step": 45110 + }, + { + "epoch": 8.02, + "learning_rate": 2.3275851851851854e-05, + "loss": 1.4785, + "step": 45115 + }, + { + "epoch": 8.02, + "learning_rate": 2.327288888888889e-05, + "loss": 1.532, + "step": 45120 + }, + { + "epoch": 8.02, + "learning_rate": 2.326992592592593e-05, + "loss": 1.4919, + "step": 45125 + }, + { + "epoch": 8.02, + "learning_rate": 2.3266962962962964e-05, + "loss": 1.5407, + "step": 45130 + }, + { + "epoch": 8.02, + "learning_rate": 2.3264000000000003e-05, + "loss": 1.6088, + "step": 45135 + }, + { + "epoch": 8.02, + "learning_rate": 2.326103703703704e-05, + "loss": 1.5379, + "step": 45140 + }, + { + "epoch": 8.03, + "learning_rate": 2.3258074074074077e-05, + "loss": 1.5356, + "step": 45145 + }, + { + "epoch": 8.03, + "learning_rate": 2.325511111111111e-05, + "loss": 1.5655, + "step": 45150 + }, + { + "epoch": 8.03, + "learning_rate": 2.3252148148148148e-05, + "loss": 1.5902, + "step": 45155 + }, + { + "epoch": 8.03, + "learning_rate": 2.3249185185185183e-05, + "loss": 1.5304, + "step": 45160 + }, + { + "epoch": 8.03, + "learning_rate": 2.3246222222222222e-05, + "loss": 1.4247, + "step": 45165 + }, + { + "epoch": 8.03, + "learning_rate": 2.3243259259259258e-05, + "loss": 1.4893, + "step": 45170 + }, + { + "epoch": 8.03, + "learning_rate": 2.3240296296296297e-05, + "loss": 1.5992, + "step": 45175 + }, + { + "epoch": 8.03, + "learning_rate": 2.3237333333333332e-05, + "loss": 1.5439, + "step": 45180 + }, + { + "epoch": 8.03, + "learning_rate": 2.323437037037037e-05, + "loss": 1.5346, + "step": 45185 + }, + { + "epoch": 8.03, + "learning_rate": 2.3231407407407406e-05, + "loss": 1.5502, + "step": 45190 + }, + { + "epoch": 8.03, + "learning_rate": 2.3228444444444445e-05, + "loss": 1.6376, + "step": 45195 + }, + { + "epoch": 8.04, + "learning_rate": 2.322548148148148e-05, + "loss": 1.6687, + "step": 45200 + }, + { + "epoch": 8.04, + "learning_rate": 2.322251851851852e-05, + "loss": 1.5223, + "step": 45205 + }, + { + "epoch": 8.04, + "learning_rate": 2.3219555555555555e-05, + "loss": 1.5763, + "step": 45210 + }, + { + "epoch": 8.04, + "learning_rate": 2.3216592592592594e-05, + "loss": 1.5428, + "step": 45215 + }, + { + "epoch": 8.04, + "learning_rate": 2.321362962962963e-05, + "loss": 1.455, + "step": 45220 + }, + { + "epoch": 8.04, + "learning_rate": 2.3210666666666668e-05, + "loss": 1.5248, + "step": 45225 + }, + { + "epoch": 8.04, + "learning_rate": 2.3207703703703703e-05, + "loss": 1.5887, + "step": 45230 + }, + { + "epoch": 8.04, + "learning_rate": 2.3204740740740742e-05, + "loss": 1.5292, + "step": 45235 + }, + { + "epoch": 8.04, + "learning_rate": 2.3201777777777778e-05, + "loss": 1.5325, + "step": 45240 + }, + { + "epoch": 8.04, + "learning_rate": 2.3198814814814816e-05, + "loss": 1.5418, + "step": 45245 + }, + { + "epoch": 8.04, + "learning_rate": 2.3195851851851852e-05, + "loss": 1.563, + "step": 45250 + }, + { + "epoch": 8.05, + "learning_rate": 2.319288888888889e-05, + "loss": 1.6303, + "step": 45255 + }, + { + "epoch": 8.05, + "learning_rate": 2.3189925925925926e-05, + "loss": 1.6107, + "step": 45260 + }, + { + "epoch": 8.05, + "learning_rate": 2.318696296296296e-05, + "loss": 1.5798, + "step": 45265 + }, + { + "epoch": 8.05, + "learning_rate": 2.3184e-05, + "loss": 1.6505, + "step": 45270 + }, + { + "epoch": 8.05, + "learning_rate": 2.3181037037037036e-05, + "loss": 1.5395, + "step": 45275 + }, + { + "epoch": 8.05, + "learning_rate": 2.3178074074074075e-05, + "loss": 1.624, + "step": 45280 + }, + { + "epoch": 8.05, + "learning_rate": 2.317511111111111e-05, + "loss": 1.5726, + "step": 45285 + }, + { + "epoch": 8.05, + "learning_rate": 2.317214814814815e-05, + "loss": 1.7139, + "step": 45290 + }, + { + "epoch": 8.05, + "learning_rate": 2.3169185185185184e-05, + "loss": 1.4561, + "step": 45295 + }, + { + "epoch": 8.05, + "learning_rate": 2.3166222222222223e-05, + "loss": 1.503, + "step": 45300 + }, + { + "epoch": 8.05, + "learning_rate": 2.316325925925926e-05, + "loss": 1.5384, + "step": 45305 + }, + { + "epoch": 8.06, + "learning_rate": 2.3160296296296297e-05, + "loss": 1.5321, + "step": 45310 + }, + { + "epoch": 8.06, + "learning_rate": 2.3157333333333333e-05, + "loss": 1.6034, + "step": 45315 + }, + { + "epoch": 8.06, + "learning_rate": 2.315437037037037e-05, + "loss": 1.4981, + "step": 45320 + }, + { + "epoch": 8.06, + "learning_rate": 2.3151407407407407e-05, + "loss": 1.6399, + "step": 45325 + }, + { + "epoch": 8.06, + "learning_rate": 2.3148444444444446e-05, + "loss": 1.581, + "step": 45330 + }, + { + "epoch": 8.06, + "learning_rate": 2.314548148148148e-05, + "loss": 1.6299, + "step": 45335 + }, + { + "epoch": 8.06, + "learning_rate": 2.314251851851852e-05, + "loss": 1.5266, + "step": 45340 + }, + { + "epoch": 8.06, + "learning_rate": 2.3139555555555556e-05, + "loss": 1.4729, + "step": 45345 + }, + { + "epoch": 8.06, + "learning_rate": 2.3136592592592594e-05, + "loss": 1.4905, + "step": 45350 + }, + { + "epoch": 8.06, + "learning_rate": 2.313362962962963e-05, + "loss": 1.6046, + "step": 45355 + }, + { + "epoch": 8.06, + "learning_rate": 2.313066666666667e-05, + "loss": 1.5316, + "step": 45360 + }, + { + "epoch": 8.06, + "learning_rate": 2.3127703703703704e-05, + "loss": 1.3822, + "step": 45365 + }, + { + "epoch": 8.07, + "learning_rate": 2.3124740740740743e-05, + "loss": 1.5819, + "step": 45370 + }, + { + "epoch": 8.07, + "learning_rate": 2.3121777777777778e-05, + "loss": 1.4971, + "step": 45375 + }, + { + "epoch": 8.07, + "learning_rate": 2.3118814814814814e-05, + "loss": 1.7153, + "step": 45380 + }, + { + "epoch": 8.07, + "learning_rate": 2.3115851851851853e-05, + "loss": 1.5087, + "step": 45385 + }, + { + "epoch": 8.07, + "learning_rate": 2.3112888888888888e-05, + "loss": 1.5463, + "step": 45390 + }, + { + "epoch": 8.07, + "learning_rate": 2.3109925925925927e-05, + "loss": 1.586, + "step": 45395 + }, + { + "epoch": 8.07, + "learning_rate": 2.3106962962962962e-05, + "loss": 1.5333, + "step": 45400 + }, + { + "epoch": 8.07, + "learning_rate": 2.3104e-05, + "loss": 1.6281, + "step": 45405 + }, + { + "epoch": 8.07, + "learning_rate": 2.3101037037037036e-05, + "loss": 1.6642, + "step": 45410 + }, + { + "epoch": 8.07, + "learning_rate": 2.3098074074074075e-05, + "loss": 1.5242, + "step": 45415 + }, + { + "epoch": 8.07, + "learning_rate": 2.309511111111111e-05, + "loss": 1.6828, + "step": 45420 + }, + { + "epoch": 8.08, + "learning_rate": 2.309214814814815e-05, + "loss": 1.6406, + "step": 45425 + }, + { + "epoch": 8.08, + "learning_rate": 2.3089185185185185e-05, + "loss": 1.6344, + "step": 45430 + }, + { + "epoch": 8.08, + "learning_rate": 2.3086222222222224e-05, + "loss": 1.6432, + "step": 45435 + }, + { + "epoch": 8.08, + "learning_rate": 2.308325925925926e-05, + "loss": 1.5434, + "step": 45440 + }, + { + "epoch": 8.08, + "learning_rate": 2.3080296296296298e-05, + "loss": 1.5959, + "step": 45445 + }, + { + "epoch": 8.08, + "learning_rate": 2.3077333333333334e-05, + "loss": 1.662, + "step": 45450 + }, + { + "epoch": 8.08, + "learning_rate": 2.3074370370370372e-05, + "loss": 1.5361, + "step": 45455 + }, + { + "epoch": 8.08, + "learning_rate": 2.3071407407407408e-05, + "loss": 1.4627, + "step": 45460 + }, + { + "epoch": 8.08, + "learning_rate": 2.3068444444444447e-05, + "loss": 1.4907, + "step": 45465 + }, + { + "epoch": 8.08, + "learning_rate": 2.3065481481481482e-05, + "loss": 1.4775, + "step": 45470 + }, + { + "epoch": 8.08, + "learning_rate": 2.306251851851852e-05, + "loss": 1.5532, + "step": 45475 + }, + { + "epoch": 8.09, + "learning_rate": 2.3059555555555556e-05, + "loss": 1.5261, + "step": 45480 + }, + { + "epoch": 8.09, + "learning_rate": 2.3056592592592595e-05, + "loss": 1.5028, + "step": 45485 + }, + { + "epoch": 8.09, + "learning_rate": 2.305362962962963e-05, + "loss": 1.5896, + "step": 45490 + }, + { + "epoch": 8.09, + "learning_rate": 2.3050666666666666e-05, + "loss": 1.5869, + "step": 45495 + }, + { + "epoch": 8.09, + "learning_rate": 2.3047703703703705e-05, + "loss": 1.5967, + "step": 45500 + }, + { + "epoch": 8.09, + "learning_rate": 2.304474074074074e-05, + "loss": 1.5541, + "step": 45505 + }, + { + "epoch": 8.09, + "learning_rate": 2.304177777777778e-05, + "loss": 1.4754, + "step": 45510 + }, + { + "epoch": 8.09, + "learning_rate": 2.3038814814814814e-05, + "loss": 1.5697, + "step": 45515 + }, + { + "epoch": 8.09, + "learning_rate": 2.3035851851851853e-05, + "loss": 1.5145, + "step": 45520 + }, + { + "epoch": 8.09, + "learning_rate": 2.303288888888889e-05, + "loss": 1.5985, + "step": 45525 + }, + { + "epoch": 8.09, + "learning_rate": 2.3029925925925928e-05, + "loss": 1.4577, + "step": 45530 + }, + { + "epoch": 8.1, + "learning_rate": 2.3026962962962963e-05, + "loss": 1.4819, + "step": 45535 + }, + { + "epoch": 8.1, + "learning_rate": 2.3024000000000002e-05, + "loss": 1.5791, + "step": 45540 + }, + { + "epoch": 8.1, + "learning_rate": 2.3021037037037037e-05, + "loss": 1.6249, + "step": 45545 + }, + { + "epoch": 8.1, + "learning_rate": 2.3018074074074076e-05, + "loss": 1.4333, + "step": 45550 + }, + { + "epoch": 8.1, + "learning_rate": 2.301511111111111e-05, + "loss": 1.6504, + "step": 45555 + }, + { + "epoch": 8.1, + "learning_rate": 2.301214814814815e-05, + "loss": 1.5436, + "step": 45560 + }, + { + "epoch": 8.1, + "learning_rate": 2.3009185185185186e-05, + "loss": 1.6394, + "step": 45565 + }, + { + "epoch": 8.1, + "learning_rate": 2.3006222222222225e-05, + "loss": 1.4945, + "step": 45570 + }, + { + "epoch": 8.1, + "learning_rate": 2.300325925925926e-05, + "loss": 1.658, + "step": 45575 + }, + { + "epoch": 8.1, + "learning_rate": 2.30002962962963e-05, + "loss": 1.4466, + "step": 45580 + }, + { + "epoch": 8.1, + "learning_rate": 2.2997333333333334e-05, + "loss": 1.4792, + "step": 45585 + }, + { + "epoch": 8.1, + "learning_rate": 2.2994370370370373e-05, + "loss": 1.5259, + "step": 45590 + }, + { + "epoch": 8.11, + "learning_rate": 2.299140740740741e-05, + "loss": 1.5962, + "step": 45595 + }, + { + "epoch": 8.11, + "learning_rate": 2.2988444444444447e-05, + "loss": 1.5246, + "step": 45600 + }, + { + "epoch": 8.11, + "learning_rate": 2.2985481481481483e-05, + "loss": 1.516, + "step": 45605 + }, + { + "epoch": 8.11, + "learning_rate": 2.2982518518518518e-05, + "loss": 1.575, + "step": 45610 + }, + { + "epoch": 8.11, + "learning_rate": 2.2979555555555557e-05, + "loss": 1.6239, + "step": 45615 + }, + { + "epoch": 8.11, + "learning_rate": 2.2976592592592592e-05, + "loss": 1.4821, + "step": 45620 + }, + { + "epoch": 8.11, + "learning_rate": 2.297362962962963e-05, + "loss": 1.5263, + "step": 45625 + }, + { + "epoch": 8.11, + "learning_rate": 2.2970666666666667e-05, + "loss": 1.518, + "step": 45630 + }, + { + "epoch": 8.11, + "learning_rate": 2.2967703703703706e-05, + "loss": 1.4405, + "step": 45635 + }, + { + "epoch": 8.11, + "learning_rate": 2.296474074074074e-05, + "loss": 1.5605, + "step": 45640 + }, + { + "epoch": 8.11, + "learning_rate": 2.296177777777778e-05, + "loss": 1.5732, + "step": 45645 + }, + { + "epoch": 8.12, + "learning_rate": 2.2958814814814815e-05, + "loss": 1.4171, + "step": 45650 + }, + { + "epoch": 8.12, + "learning_rate": 2.2955851851851854e-05, + "loss": 1.593, + "step": 45655 + }, + { + "epoch": 8.12, + "learning_rate": 2.295288888888889e-05, + "loss": 1.5186, + "step": 45660 + }, + { + "epoch": 8.12, + "learning_rate": 2.294992592592593e-05, + "loss": 1.5421, + "step": 45665 + }, + { + "epoch": 8.12, + "learning_rate": 2.2946962962962964e-05, + "loss": 1.5188, + "step": 45670 + }, + { + "epoch": 8.12, + "learning_rate": 2.2944000000000003e-05, + "loss": 1.5741, + "step": 45675 + }, + { + "epoch": 8.12, + "learning_rate": 2.2941037037037038e-05, + "loss": 1.6651, + "step": 45680 + }, + { + "epoch": 8.12, + "learning_rate": 2.2938074074074077e-05, + "loss": 1.5427, + "step": 45685 + }, + { + "epoch": 8.12, + "learning_rate": 2.2935111111111112e-05, + "loss": 1.4956, + "step": 45690 + }, + { + "epoch": 8.12, + "learning_rate": 2.293214814814815e-05, + "loss": 1.554, + "step": 45695 + }, + { + "epoch": 8.12, + "learning_rate": 2.2929185185185187e-05, + "loss": 1.4999, + "step": 45700 + }, + { + "epoch": 8.13, + "learning_rate": 2.2926222222222225e-05, + "loss": 1.497, + "step": 45705 + }, + { + "epoch": 8.13, + "learning_rate": 2.292325925925926e-05, + "loss": 1.6863, + "step": 45710 + }, + { + "epoch": 8.13, + "learning_rate": 2.29202962962963e-05, + "loss": 1.5291, + "step": 45715 + }, + { + "epoch": 8.13, + "learning_rate": 2.2917333333333335e-05, + "loss": 1.4117, + "step": 45720 + }, + { + "epoch": 8.13, + "learning_rate": 2.291437037037037e-05, + "loss": 1.6449, + "step": 45725 + }, + { + "epoch": 8.13, + "learning_rate": 2.291140740740741e-05, + "loss": 1.5422, + "step": 45730 + }, + { + "epoch": 8.13, + "learning_rate": 2.2908444444444445e-05, + "loss": 1.582, + "step": 45735 + }, + { + "epoch": 8.13, + "learning_rate": 2.2905481481481484e-05, + "loss": 1.5755, + "step": 45740 + }, + { + "epoch": 8.13, + "learning_rate": 2.290251851851852e-05, + "loss": 1.5205, + "step": 45745 + }, + { + "epoch": 8.13, + "learning_rate": 2.2899555555555558e-05, + "loss": 1.5642, + "step": 45750 + }, + { + "epoch": 8.13, + "learning_rate": 2.2896592592592593e-05, + "loss": 1.4762, + "step": 45755 + }, + { + "epoch": 8.14, + "learning_rate": 2.2893629629629632e-05, + "loss": 1.6037, + "step": 45760 + }, + { + "epoch": 8.14, + "learning_rate": 2.2890666666666668e-05, + "loss": 1.4866, + "step": 45765 + }, + { + "epoch": 8.14, + "learning_rate": 2.2887703703703706e-05, + "loss": 1.5811, + "step": 45770 + }, + { + "epoch": 8.14, + "learning_rate": 2.2884740740740742e-05, + "loss": 1.5323, + "step": 45775 + }, + { + "epoch": 8.14, + "learning_rate": 2.288177777777778e-05, + "loss": 1.672, + "step": 45780 + }, + { + "epoch": 8.14, + "learning_rate": 2.2878814814814816e-05, + "loss": 1.598, + "step": 45785 + }, + { + "epoch": 8.14, + "learning_rate": 2.2875851851851855e-05, + "loss": 1.5632, + "step": 45790 + }, + { + "epoch": 8.14, + "learning_rate": 2.287288888888889e-05, + "loss": 1.475, + "step": 45795 + }, + { + "epoch": 8.14, + "learning_rate": 2.286992592592593e-05, + "loss": 1.5667, + "step": 45800 + }, + { + "epoch": 8.14, + "learning_rate": 2.2866962962962965e-05, + "loss": 1.5672, + "step": 45805 + }, + { + "epoch": 8.14, + "learning_rate": 2.2864000000000003e-05, + "loss": 1.4992, + "step": 45810 + }, + { + "epoch": 8.14, + "learning_rate": 2.286103703703704e-05, + "loss": 1.7117, + "step": 45815 + }, + { + "epoch": 8.15, + "learning_rate": 2.2858074074074078e-05, + "loss": 1.6201, + "step": 45820 + }, + { + "epoch": 8.15, + "learning_rate": 2.2855111111111113e-05, + "loss": 1.5594, + "step": 45825 + }, + { + "epoch": 8.15, + "learning_rate": 2.2852148148148152e-05, + "loss": 1.5995, + "step": 45830 + }, + { + "epoch": 8.15, + "learning_rate": 2.2849185185185187e-05, + "loss": 1.5737, + "step": 45835 + }, + { + "epoch": 8.15, + "learning_rate": 2.2846222222222223e-05, + "loss": 1.5068, + "step": 45840 + }, + { + "epoch": 8.15, + "learning_rate": 2.284325925925926e-05, + "loss": 1.5644, + "step": 45845 + }, + { + "epoch": 8.15, + "learning_rate": 2.2840296296296297e-05, + "loss": 1.4801, + "step": 45850 + }, + { + "epoch": 8.15, + "learning_rate": 2.2837333333333336e-05, + "loss": 1.5289, + "step": 45855 + }, + { + "epoch": 8.15, + "learning_rate": 2.283437037037037e-05, + "loss": 1.6561, + "step": 45860 + }, + { + "epoch": 8.15, + "learning_rate": 2.283140740740741e-05, + "loss": 1.562, + "step": 45865 + }, + { + "epoch": 8.15, + "learning_rate": 2.2828444444444446e-05, + "loss": 1.4998, + "step": 45870 + }, + { + "epoch": 8.16, + "learning_rate": 2.2825481481481484e-05, + "loss": 1.5373, + "step": 45875 + }, + { + "epoch": 8.16, + "learning_rate": 2.282251851851852e-05, + "loss": 1.4284, + "step": 45880 + }, + { + "epoch": 8.16, + "learning_rate": 2.281955555555556e-05, + "loss": 1.474, + "step": 45885 + }, + { + "epoch": 8.16, + "learning_rate": 2.2816592592592594e-05, + "loss": 1.5559, + "step": 45890 + }, + { + "epoch": 8.16, + "learning_rate": 2.2813629629629633e-05, + "loss": 1.5984, + "step": 45895 + }, + { + "epoch": 8.16, + "learning_rate": 2.2810666666666668e-05, + "loss": 1.5282, + "step": 45900 + }, + { + "epoch": 8.16, + "learning_rate": 2.2807703703703707e-05, + "loss": 1.5808, + "step": 45905 + }, + { + "epoch": 8.16, + "learning_rate": 2.2804740740740743e-05, + "loss": 1.5109, + "step": 45910 + }, + { + "epoch": 8.16, + "learning_rate": 2.280177777777778e-05, + "loss": 1.5597, + "step": 45915 + }, + { + "epoch": 8.16, + "learning_rate": 2.2798814814814817e-05, + "loss": 1.4594, + "step": 45920 + }, + { + "epoch": 8.16, + "learning_rate": 2.2795851851851852e-05, + "loss": 1.5634, + "step": 45925 + }, + { + "epoch": 8.17, + "learning_rate": 2.2792888888888888e-05, + "loss": 1.6347, + "step": 45930 + }, + { + "epoch": 8.17, + "learning_rate": 2.2789925925925926e-05, + "loss": 1.6286, + "step": 45935 + }, + { + "epoch": 8.17, + "learning_rate": 2.2786962962962962e-05, + "loss": 1.4358, + "step": 45940 + }, + { + "epoch": 8.17, + "learning_rate": 2.2784e-05, + "loss": 1.5639, + "step": 45945 + }, + { + "epoch": 8.17, + "learning_rate": 2.2781037037037036e-05, + "loss": 1.5817, + "step": 45950 + }, + { + "epoch": 8.17, + "learning_rate": 2.2778074074074075e-05, + "loss": 1.5448, + "step": 45955 + }, + { + "epoch": 8.17, + "learning_rate": 2.277511111111111e-05, + "loss": 1.4816, + "step": 45960 + }, + { + "epoch": 8.17, + "learning_rate": 2.277214814814815e-05, + "loss": 1.5953, + "step": 45965 + }, + { + "epoch": 8.17, + "learning_rate": 2.2769185185185185e-05, + "loss": 1.5311, + "step": 45970 + }, + { + "epoch": 8.17, + "learning_rate": 2.2766222222222223e-05, + "loss": 1.6437, + "step": 45975 + }, + { + "epoch": 8.17, + "learning_rate": 2.276325925925926e-05, + "loss": 1.4537, + "step": 45980 + }, + { + "epoch": 8.18, + "learning_rate": 2.2760296296296298e-05, + "loss": 1.5871, + "step": 45985 + }, + { + "epoch": 8.18, + "learning_rate": 2.2757333333333333e-05, + "loss": 1.631, + "step": 45990 + }, + { + "epoch": 8.18, + "learning_rate": 2.2754370370370372e-05, + "loss": 1.5192, + "step": 45995 + }, + { + "epoch": 8.18, + "learning_rate": 2.2751407407407407e-05, + "loss": 1.6275, + "step": 46000 + }, + { + "epoch": 8.18, + "learning_rate": 2.2748444444444443e-05, + "loss": 1.5853, + "step": 46005 + }, + { + "epoch": 8.18, + "learning_rate": 2.2745481481481482e-05, + "loss": 1.6075, + "step": 46010 + }, + { + "epoch": 8.18, + "learning_rate": 2.2742518518518517e-05, + "loss": 1.5914, + "step": 46015 + }, + { + "epoch": 8.18, + "learning_rate": 2.2739555555555556e-05, + "loss": 1.5168, + "step": 46020 + }, + { + "epoch": 8.18, + "learning_rate": 2.273659259259259e-05, + "loss": 1.3981, + "step": 46025 + }, + { + "epoch": 8.18, + "learning_rate": 2.273362962962963e-05, + "loss": 1.4735, + "step": 46030 + }, + { + "epoch": 8.18, + "learning_rate": 2.2730666666666666e-05, + "loss": 1.6041, + "step": 46035 + }, + { + "epoch": 8.18, + "learning_rate": 2.2727703703703704e-05, + "loss": 1.5408, + "step": 46040 + }, + { + "epoch": 8.19, + "learning_rate": 2.272474074074074e-05, + "loss": 1.6497, + "step": 46045 + }, + { + "epoch": 8.19, + "learning_rate": 2.272177777777778e-05, + "loss": 1.4999, + "step": 46050 + }, + { + "epoch": 8.19, + "learning_rate": 2.2718814814814814e-05, + "loss": 1.478, + "step": 46055 + }, + { + "epoch": 8.19, + "learning_rate": 2.2715851851851853e-05, + "loss": 1.516, + "step": 46060 + }, + { + "epoch": 8.19, + "learning_rate": 2.271288888888889e-05, + "loss": 1.4681, + "step": 46065 + }, + { + "epoch": 8.19, + "learning_rate": 2.2709925925925927e-05, + "loss": 1.5662, + "step": 46070 + }, + { + "epoch": 8.19, + "learning_rate": 2.2706962962962963e-05, + "loss": 1.5806, + "step": 46075 + }, + { + "epoch": 8.19, + "learning_rate": 2.2704e-05, + "loss": 1.4732, + "step": 46080 + }, + { + "epoch": 8.19, + "learning_rate": 2.2701037037037037e-05, + "loss": 1.4605, + "step": 46085 + }, + { + "epoch": 8.19, + "learning_rate": 2.2698074074074076e-05, + "loss": 1.6052, + "step": 46090 + }, + { + "epoch": 8.19, + "learning_rate": 2.269511111111111e-05, + "loss": 1.5897, + "step": 46095 + }, + { + "epoch": 8.2, + "learning_rate": 2.269214814814815e-05, + "loss": 1.659, + "step": 46100 + }, + { + "epoch": 8.2, + "learning_rate": 2.2689185185185185e-05, + "loss": 1.4688, + "step": 46105 + }, + { + "epoch": 8.2, + "learning_rate": 2.2686222222222224e-05, + "loss": 1.4643, + "step": 46110 + }, + { + "epoch": 8.2, + "learning_rate": 2.268325925925926e-05, + "loss": 1.5818, + "step": 46115 + }, + { + "epoch": 8.2, + "learning_rate": 2.2680296296296295e-05, + "loss": 1.6606, + "step": 46120 + }, + { + "epoch": 8.2, + "learning_rate": 2.2677333333333334e-05, + "loss": 1.5478, + "step": 46125 + }, + { + "epoch": 8.2, + "learning_rate": 2.267437037037037e-05, + "loss": 1.6057, + "step": 46130 + }, + { + "epoch": 8.2, + "learning_rate": 2.2671407407407408e-05, + "loss": 1.5544, + "step": 46135 + }, + { + "epoch": 8.2, + "learning_rate": 2.2668444444444444e-05, + "loss": 1.5777, + "step": 46140 + }, + { + "epoch": 8.2, + "learning_rate": 2.2665481481481482e-05, + "loss": 1.4771, + "step": 46145 + }, + { + "epoch": 8.2, + "learning_rate": 2.2662518518518518e-05, + "loss": 1.5987, + "step": 46150 + }, + { + "epoch": 8.21, + "learning_rate": 2.2659555555555557e-05, + "loss": 1.4826, + "step": 46155 + }, + { + "epoch": 8.21, + "learning_rate": 2.2656592592592592e-05, + "loss": 1.5478, + "step": 46160 + }, + { + "epoch": 8.21, + "learning_rate": 2.265362962962963e-05, + "loss": 1.6778, + "step": 46165 + }, + { + "epoch": 8.21, + "learning_rate": 2.2650666666666666e-05, + "loss": 1.6293, + "step": 46170 + }, + { + "epoch": 8.21, + "learning_rate": 2.2647703703703705e-05, + "loss": 1.6275, + "step": 46175 + }, + { + "epoch": 8.21, + "learning_rate": 2.264474074074074e-05, + "loss": 1.5051, + "step": 46180 + }, + { + "epoch": 8.21, + "learning_rate": 2.264177777777778e-05, + "loss": 1.5817, + "step": 46185 + }, + { + "epoch": 8.21, + "learning_rate": 2.2638814814814815e-05, + "loss": 1.7326, + "step": 46190 + }, + { + "epoch": 8.21, + "learning_rate": 2.2635851851851854e-05, + "loss": 1.6832, + "step": 46195 + }, + { + "epoch": 8.21, + "learning_rate": 2.263288888888889e-05, + "loss": 1.4561, + "step": 46200 + }, + { + "epoch": 8.21, + "learning_rate": 2.2629925925925928e-05, + "loss": 1.5981, + "step": 46205 + }, + { + "epoch": 8.22, + "learning_rate": 2.2626962962962963e-05, + "loss": 1.5215, + "step": 46210 + }, + { + "epoch": 8.22, + "learning_rate": 2.2624000000000002e-05, + "loss": 1.5888, + "step": 46215 + }, + { + "epoch": 8.22, + "learning_rate": 2.2621037037037038e-05, + "loss": 1.6355, + "step": 46220 + }, + { + "epoch": 8.22, + "learning_rate": 2.2618074074074077e-05, + "loss": 1.5591, + "step": 46225 + }, + { + "epoch": 8.22, + "learning_rate": 2.2615111111111112e-05, + "loss": 1.6067, + "step": 46230 + }, + { + "epoch": 8.22, + "learning_rate": 2.2612148148148147e-05, + "loss": 1.6062, + "step": 46235 + }, + { + "epoch": 8.22, + "learning_rate": 2.2609185185185186e-05, + "loss": 1.6973, + "step": 46240 + }, + { + "epoch": 8.22, + "learning_rate": 2.260622222222222e-05, + "loss": 1.4996, + "step": 46245 + }, + { + "epoch": 8.22, + "learning_rate": 2.260325925925926e-05, + "loss": 1.5403, + "step": 46250 + }, + { + "epoch": 8.22, + "learning_rate": 2.2600296296296296e-05, + "loss": 1.5228, + "step": 46255 + }, + { + "epoch": 8.22, + "learning_rate": 2.2597333333333335e-05, + "loss": 1.472, + "step": 46260 + }, + { + "epoch": 8.22, + "learning_rate": 2.259437037037037e-05, + "loss": 1.6462, + "step": 46265 + }, + { + "epoch": 8.23, + "learning_rate": 2.259140740740741e-05, + "loss": 1.5733, + "step": 46270 + }, + { + "epoch": 8.23, + "learning_rate": 2.2588444444444444e-05, + "loss": 1.6584, + "step": 46275 + }, + { + "epoch": 8.23, + "learning_rate": 2.2585481481481483e-05, + "loss": 1.5607, + "step": 46280 + }, + { + "epoch": 8.23, + "learning_rate": 2.258251851851852e-05, + "loss": 1.5951, + "step": 46285 + }, + { + "epoch": 8.23, + "learning_rate": 2.2579555555555557e-05, + "loss": 1.5419, + "step": 46290 + }, + { + "epoch": 8.23, + "learning_rate": 2.2576592592592593e-05, + "loss": 1.518, + "step": 46295 + }, + { + "epoch": 8.23, + "learning_rate": 2.2573629629629632e-05, + "loss": 1.4671, + "step": 46300 + }, + { + "epoch": 8.23, + "learning_rate": 2.2570666666666667e-05, + "loss": 1.6407, + "step": 46305 + }, + { + "epoch": 8.23, + "learning_rate": 2.2567703703703706e-05, + "loss": 1.5801, + "step": 46310 + }, + { + "epoch": 8.23, + "learning_rate": 2.256474074074074e-05, + "loss": 1.5516, + "step": 46315 + }, + { + "epoch": 8.23, + "learning_rate": 2.256177777777778e-05, + "loss": 1.5671, + "step": 46320 + }, + { + "epoch": 8.24, + "learning_rate": 2.2558814814814816e-05, + "loss": 1.5578, + "step": 46325 + }, + { + "epoch": 8.24, + "learning_rate": 2.2555851851851855e-05, + "loss": 1.715, + "step": 46330 + }, + { + "epoch": 8.24, + "learning_rate": 2.255288888888889e-05, + "loss": 1.477, + "step": 46335 + }, + { + "epoch": 8.24, + "learning_rate": 2.254992592592593e-05, + "loss": 1.6092, + "step": 46340 + }, + { + "epoch": 8.24, + "learning_rate": 2.2546962962962964e-05, + "loss": 1.5662, + "step": 46345 + }, + { + "epoch": 8.24, + "learning_rate": 2.2544e-05, + "loss": 1.4304, + "step": 46350 + }, + { + "epoch": 8.24, + "learning_rate": 2.254103703703704e-05, + "loss": 1.4583, + "step": 46355 + }, + { + "epoch": 8.24, + "learning_rate": 2.2538074074074074e-05, + "loss": 1.5449, + "step": 46360 + }, + { + "epoch": 8.24, + "learning_rate": 2.2535111111111113e-05, + "loss": 1.5556, + "step": 46365 + }, + { + "epoch": 8.24, + "learning_rate": 2.2532148148148148e-05, + "loss": 1.5434, + "step": 46370 + }, + { + "epoch": 8.24, + "learning_rate": 2.2529185185185187e-05, + "loss": 1.5861, + "step": 46375 + }, + { + "epoch": 8.25, + "learning_rate": 2.2526222222222222e-05, + "loss": 1.6547, + "step": 46380 + }, + { + "epoch": 8.25, + "learning_rate": 2.252325925925926e-05, + "loss": 1.5264, + "step": 46385 + }, + { + "epoch": 8.25, + "learning_rate": 2.2520296296296297e-05, + "loss": 1.5013, + "step": 46390 + }, + { + "epoch": 8.25, + "learning_rate": 2.2517333333333335e-05, + "loss": 1.5383, + "step": 46395 + }, + { + "epoch": 8.25, + "learning_rate": 2.251437037037037e-05, + "loss": 1.5927, + "step": 46400 + }, + { + "epoch": 8.25, + "learning_rate": 2.251140740740741e-05, + "loss": 1.6724, + "step": 46405 + }, + { + "epoch": 8.25, + "learning_rate": 2.2508444444444445e-05, + "loss": 1.6415, + "step": 46410 + }, + { + "epoch": 8.25, + "learning_rate": 2.2505481481481484e-05, + "loss": 1.6483, + "step": 46415 + }, + { + "epoch": 8.25, + "learning_rate": 2.250251851851852e-05, + "loss": 1.5451, + "step": 46420 + }, + { + "epoch": 8.25, + "learning_rate": 2.2499555555555558e-05, + "loss": 1.6542, + "step": 46425 + }, + { + "epoch": 8.25, + "learning_rate": 2.2496592592592594e-05, + "loss": 1.532, + "step": 46430 + }, + { + "epoch": 8.26, + "learning_rate": 2.2493629629629633e-05, + "loss": 1.5155, + "step": 46435 + }, + { + "epoch": 8.26, + "learning_rate": 2.2490666666666668e-05, + "loss": 1.5673, + "step": 46440 + }, + { + "epoch": 8.26, + "learning_rate": 2.2487703703703707e-05, + "loss": 1.5198, + "step": 46445 + }, + { + "epoch": 8.26, + "learning_rate": 2.2484740740740742e-05, + "loss": 1.5584, + "step": 46450 + }, + { + "epoch": 8.26, + "learning_rate": 2.248177777777778e-05, + "loss": 1.5733, + "step": 46455 + }, + { + "epoch": 8.26, + "learning_rate": 2.2478814814814816e-05, + "loss": 1.4792, + "step": 46460 + }, + { + "epoch": 8.26, + "learning_rate": 2.2475851851851852e-05, + "loss": 1.4191, + "step": 46465 + }, + { + "epoch": 8.26, + "learning_rate": 2.247288888888889e-05, + "loss": 1.6312, + "step": 46470 + }, + { + "epoch": 8.26, + "learning_rate": 2.2469925925925926e-05, + "loss": 1.5587, + "step": 46475 + }, + { + "epoch": 8.26, + "learning_rate": 2.2466962962962965e-05, + "loss": 1.6761, + "step": 46480 + }, + { + "epoch": 8.26, + "learning_rate": 2.2464e-05, + "loss": 1.5225, + "step": 46485 + }, + { + "epoch": 8.26, + "learning_rate": 2.246103703703704e-05, + "loss": 1.5518, + "step": 46490 + }, + { + "epoch": 8.27, + "learning_rate": 2.2458074074074075e-05, + "loss": 1.625, + "step": 46495 + }, + { + "epoch": 8.27, + "learning_rate": 2.2455111111111113e-05, + "loss": 1.5233, + "step": 46500 + }, + { + "epoch": 8.27, + "learning_rate": 2.245214814814815e-05, + "loss": 1.5217, + "step": 46505 + }, + { + "epoch": 8.27, + "learning_rate": 2.2449185185185188e-05, + "loss": 1.7396, + "step": 46510 + }, + { + "epoch": 8.27, + "learning_rate": 2.2446222222222223e-05, + "loss": 1.6406, + "step": 46515 + }, + { + "epoch": 8.27, + "learning_rate": 2.2443259259259262e-05, + "loss": 1.6327, + "step": 46520 + }, + { + "epoch": 8.27, + "learning_rate": 2.2440296296296297e-05, + "loss": 1.5149, + "step": 46525 + }, + { + "epoch": 8.27, + "learning_rate": 2.2437333333333336e-05, + "loss": 1.5056, + "step": 46530 + }, + { + "epoch": 8.27, + "learning_rate": 2.243437037037037e-05, + "loss": 1.5806, + "step": 46535 + }, + { + "epoch": 8.27, + "learning_rate": 2.243140740740741e-05, + "loss": 1.6433, + "step": 46540 + }, + { + "epoch": 8.27, + "learning_rate": 2.2428444444444446e-05, + "loss": 1.5627, + "step": 46545 + }, + { + "epoch": 8.28, + "learning_rate": 2.2425481481481485e-05, + "loss": 1.5305, + "step": 46550 + }, + { + "epoch": 8.28, + "learning_rate": 2.242251851851852e-05, + "loss": 1.5145, + "step": 46555 + }, + { + "epoch": 8.28, + "learning_rate": 2.241955555555556e-05, + "loss": 1.6992, + "step": 46560 + }, + { + "epoch": 8.28, + "learning_rate": 2.2416592592592594e-05, + "loss": 1.5639, + "step": 46565 + }, + { + "epoch": 8.28, + "learning_rate": 2.2413629629629633e-05, + "loss": 1.6389, + "step": 46570 + }, + { + "epoch": 8.28, + "learning_rate": 2.241066666666667e-05, + "loss": 1.7074, + "step": 46575 + }, + { + "epoch": 8.28, + "learning_rate": 2.2407703703703704e-05, + "loss": 1.5138, + "step": 46580 + }, + { + "epoch": 8.28, + "learning_rate": 2.2404740740740743e-05, + "loss": 1.6698, + "step": 46585 + }, + { + "epoch": 8.28, + "learning_rate": 2.240177777777778e-05, + "loss": 1.5069, + "step": 46590 + }, + { + "epoch": 8.28, + "learning_rate": 2.2398814814814817e-05, + "loss": 1.5698, + "step": 46595 + }, + { + "epoch": 8.28, + "learning_rate": 2.2395851851851853e-05, + "loss": 1.6, + "step": 46600 + }, + { + "epoch": 8.29, + "learning_rate": 2.239288888888889e-05, + "loss": 1.5743, + "step": 46605 + }, + { + "epoch": 8.29, + "learning_rate": 2.2389925925925927e-05, + "loss": 1.6339, + "step": 46610 + }, + { + "epoch": 8.29, + "learning_rate": 2.2386962962962966e-05, + "loss": 1.5836, + "step": 46615 + }, + { + "epoch": 8.29, + "learning_rate": 2.2384e-05, + "loss": 1.6186, + "step": 46620 + }, + { + "epoch": 8.29, + "learning_rate": 2.238103703703704e-05, + "loss": 1.5696, + "step": 46625 + }, + { + "epoch": 8.29, + "learning_rate": 2.2378074074074075e-05, + "loss": 1.7069, + "step": 46630 + }, + { + "epoch": 8.29, + "learning_rate": 2.2375111111111114e-05, + "loss": 1.5354, + "step": 46635 + }, + { + "epoch": 8.29, + "learning_rate": 2.237214814814815e-05, + "loss": 1.549, + "step": 46640 + }, + { + "epoch": 8.29, + "learning_rate": 2.236918518518519e-05, + "loss": 1.5874, + "step": 46645 + }, + { + "epoch": 8.29, + "learning_rate": 2.2366222222222224e-05, + "loss": 1.5482, + "step": 46650 + }, + { + "epoch": 8.29, + "learning_rate": 2.2363259259259263e-05, + "loss": 1.5749, + "step": 46655 + }, + { + "epoch": 8.3, + "learning_rate": 2.2360296296296298e-05, + "loss": 1.649, + "step": 46660 + }, + { + "epoch": 8.3, + "learning_rate": 2.2357333333333337e-05, + "loss": 1.461, + "step": 46665 + }, + { + "epoch": 8.3, + "learning_rate": 2.2354370370370372e-05, + "loss": 1.6105, + "step": 46670 + }, + { + "epoch": 8.3, + "learning_rate": 2.235140740740741e-05, + "loss": 1.616, + "step": 46675 + }, + { + "epoch": 8.3, + "learning_rate": 2.2348444444444447e-05, + "loss": 1.3968, + "step": 46680 + }, + { + "epoch": 8.3, + "learning_rate": 2.2345481481481486e-05, + "loss": 1.585, + "step": 46685 + }, + { + "epoch": 8.3, + "learning_rate": 2.234251851851852e-05, + "loss": 1.4973, + "step": 46690 + }, + { + "epoch": 8.3, + "learning_rate": 2.2339555555555556e-05, + "loss": 1.5986, + "step": 46695 + }, + { + "epoch": 8.3, + "learning_rate": 2.2336592592592592e-05, + "loss": 1.4107, + "step": 46700 + }, + { + "epoch": 8.3, + "learning_rate": 2.233362962962963e-05, + "loss": 1.621, + "step": 46705 + }, + { + "epoch": 8.3, + "learning_rate": 2.2330666666666666e-05, + "loss": 1.484, + "step": 46710 + }, + { + "epoch": 8.3, + "learning_rate": 2.2327703703703705e-05, + "loss": 1.6283, + "step": 46715 + }, + { + "epoch": 8.31, + "learning_rate": 2.232474074074074e-05, + "loss": 1.6551, + "step": 46720 + }, + { + "epoch": 8.31, + "learning_rate": 2.232177777777778e-05, + "loss": 1.524, + "step": 46725 + }, + { + "epoch": 8.31, + "learning_rate": 2.2318814814814815e-05, + "loss": 1.488, + "step": 46730 + }, + { + "epoch": 8.31, + "learning_rate": 2.231585185185185e-05, + "loss": 1.6253, + "step": 46735 + }, + { + "epoch": 8.31, + "learning_rate": 2.231288888888889e-05, + "loss": 1.6196, + "step": 46740 + }, + { + "epoch": 8.31, + "learning_rate": 2.2309925925925924e-05, + "loss": 1.5229, + "step": 46745 + }, + { + "epoch": 8.31, + "learning_rate": 2.2306962962962963e-05, + "loss": 1.6012, + "step": 46750 + }, + { + "epoch": 8.31, + "learning_rate": 2.2304e-05, + "loss": 1.4397, + "step": 46755 + }, + { + "epoch": 8.31, + "learning_rate": 2.2301037037037037e-05, + "loss": 1.5493, + "step": 46760 + }, + { + "epoch": 8.31, + "learning_rate": 2.2298074074074073e-05, + "loss": 1.6227, + "step": 46765 + }, + { + "epoch": 8.31, + "learning_rate": 2.229511111111111e-05, + "loss": 1.5586, + "step": 46770 + }, + { + "epoch": 8.32, + "learning_rate": 2.2292148148148147e-05, + "loss": 1.4732, + "step": 46775 + }, + { + "epoch": 8.32, + "learning_rate": 2.2289185185185186e-05, + "loss": 1.5947, + "step": 46780 + }, + { + "epoch": 8.32, + "learning_rate": 2.228622222222222e-05, + "loss": 1.7021, + "step": 46785 + }, + { + "epoch": 8.32, + "learning_rate": 2.228325925925926e-05, + "loss": 1.5885, + "step": 46790 + }, + { + "epoch": 8.32, + "learning_rate": 2.2280296296296296e-05, + "loss": 1.4872, + "step": 46795 + }, + { + "epoch": 8.32, + "learning_rate": 2.2277333333333334e-05, + "loss": 1.6864, + "step": 46800 + }, + { + "epoch": 8.32, + "learning_rate": 2.227437037037037e-05, + "loss": 1.5305, + "step": 46805 + }, + { + "epoch": 8.32, + "learning_rate": 2.227140740740741e-05, + "loss": 1.524, + "step": 46810 + }, + { + "epoch": 8.32, + "learning_rate": 2.2268444444444444e-05, + "loss": 1.5166, + "step": 46815 + }, + { + "epoch": 8.32, + "learning_rate": 2.2265481481481483e-05, + "loss": 1.6664, + "step": 46820 + }, + { + "epoch": 8.32, + "learning_rate": 2.226251851851852e-05, + "loss": 1.4594, + "step": 46825 + }, + { + "epoch": 8.33, + "learning_rate": 2.2259555555555557e-05, + "loss": 1.5694, + "step": 46830 + }, + { + "epoch": 8.33, + "learning_rate": 2.2256592592592593e-05, + "loss": 1.5973, + "step": 46835 + }, + { + "epoch": 8.33, + "learning_rate": 2.225362962962963e-05, + "loss": 1.6549, + "step": 46840 + }, + { + "epoch": 8.33, + "learning_rate": 2.2250666666666667e-05, + "loss": 1.5489, + "step": 46845 + }, + { + "epoch": 8.33, + "learning_rate": 2.2247703703703702e-05, + "loss": 1.5598, + "step": 46850 + }, + { + "epoch": 8.33, + "learning_rate": 2.224474074074074e-05, + "loss": 1.6193, + "step": 46855 + }, + { + "epoch": 8.33, + "learning_rate": 2.2241777777777777e-05, + "loss": 1.3867, + "step": 46860 + }, + { + "epoch": 8.33, + "learning_rate": 2.2238814814814815e-05, + "loss": 1.6404, + "step": 46865 + }, + { + "epoch": 8.33, + "learning_rate": 2.223585185185185e-05, + "loss": 1.4696, + "step": 46870 + }, + { + "epoch": 8.33, + "learning_rate": 2.223288888888889e-05, + "loss": 1.7392, + "step": 46875 + }, + { + "epoch": 8.33, + "learning_rate": 2.2229925925925925e-05, + "loss": 1.6278, + "step": 46880 + }, + { + "epoch": 8.34, + "learning_rate": 2.2226962962962964e-05, + "loss": 1.6228, + "step": 46885 + }, + { + "epoch": 8.34, + "learning_rate": 2.2224e-05, + "loss": 1.6901, + "step": 46890 + }, + { + "epoch": 8.34, + "learning_rate": 2.2221037037037038e-05, + "loss": 1.6052, + "step": 46895 + }, + { + "epoch": 8.34, + "learning_rate": 2.2218074074074074e-05, + "loss": 1.6018, + "step": 46900 + }, + { + "epoch": 8.34, + "learning_rate": 2.2215111111111112e-05, + "loss": 1.5572, + "step": 46905 + }, + { + "epoch": 8.34, + "learning_rate": 2.2212148148148148e-05, + "loss": 1.6287, + "step": 46910 + }, + { + "epoch": 8.34, + "learning_rate": 2.2209185185185187e-05, + "loss": 1.5911, + "step": 46915 + }, + { + "epoch": 8.34, + "learning_rate": 2.2206222222222222e-05, + "loss": 1.4826, + "step": 46920 + }, + { + "epoch": 8.34, + "learning_rate": 2.220325925925926e-05, + "loss": 1.581, + "step": 46925 + }, + { + "epoch": 8.34, + "learning_rate": 2.2200296296296296e-05, + "loss": 1.5588, + "step": 46930 + }, + { + "epoch": 8.34, + "learning_rate": 2.2197333333333335e-05, + "loss": 1.6054, + "step": 46935 + }, + { + "epoch": 8.34, + "learning_rate": 2.219437037037037e-05, + "loss": 1.5383, + "step": 46940 + }, + { + "epoch": 8.35, + "learning_rate": 2.219140740740741e-05, + "loss": 1.5289, + "step": 46945 + }, + { + "epoch": 8.35, + "learning_rate": 2.2188444444444445e-05, + "loss": 1.5351, + "step": 46950 + }, + { + "epoch": 8.35, + "learning_rate": 2.2185481481481484e-05, + "loss": 1.6189, + "step": 46955 + }, + { + "epoch": 8.35, + "learning_rate": 2.218251851851852e-05, + "loss": 1.607, + "step": 46960 + }, + { + "epoch": 8.35, + "learning_rate": 2.2179555555555555e-05, + "loss": 1.615, + "step": 46965 + }, + { + "epoch": 8.35, + "learning_rate": 2.2176592592592593e-05, + "loss": 1.4924, + "step": 46970 + }, + { + "epoch": 8.35, + "learning_rate": 2.217362962962963e-05, + "loss": 1.5961, + "step": 46975 + }, + { + "epoch": 8.35, + "learning_rate": 2.2170666666666668e-05, + "loss": 1.5244, + "step": 46980 + }, + { + "epoch": 8.35, + "learning_rate": 2.2167703703703703e-05, + "loss": 1.5813, + "step": 46985 + }, + { + "epoch": 8.35, + "learning_rate": 2.2164740740740742e-05, + "loss": 1.5784, + "step": 46990 + }, + { + "epoch": 8.35, + "learning_rate": 2.2161777777777777e-05, + "loss": 1.5727, + "step": 46995 + }, + { + "epoch": 8.36, + "learning_rate": 2.2158814814814816e-05, + "loss": 1.5636, + "step": 47000 + }, + { + "epoch": 8.36, + "learning_rate": 2.215585185185185e-05, + "loss": 1.583, + "step": 47005 + }, + { + "epoch": 8.36, + "learning_rate": 2.215288888888889e-05, + "loss": 1.4832, + "step": 47010 + }, + { + "epoch": 8.36, + "learning_rate": 2.2149925925925926e-05, + "loss": 1.5225, + "step": 47015 + }, + { + "epoch": 8.36, + "learning_rate": 2.2146962962962965e-05, + "loss": 1.4656, + "step": 47020 + }, + { + "epoch": 8.36, + "learning_rate": 2.2144e-05, + "loss": 1.571, + "step": 47025 + }, + { + "epoch": 8.36, + "learning_rate": 2.214103703703704e-05, + "loss": 1.5867, + "step": 47030 + }, + { + "epoch": 8.36, + "learning_rate": 2.2138074074074074e-05, + "loss": 1.4768, + "step": 47035 + }, + { + "epoch": 8.36, + "learning_rate": 2.2135111111111113e-05, + "loss": 1.5745, + "step": 47040 + }, + { + "epoch": 8.36, + "learning_rate": 2.213214814814815e-05, + "loss": 1.5118, + "step": 47045 + }, + { + "epoch": 8.36, + "learning_rate": 2.2129185185185187e-05, + "loss": 1.548, + "step": 47050 + }, + { + "epoch": 8.37, + "learning_rate": 2.2126222222222223e-05, + "loss": 1.6263, + "step": 47055 + }, + { + "epoch": 8.37, + "learning_rate": 2.212325925925926e-05, + "loss": 1.6887, + "step": 47060 + }, + { + "epoch": 8.37, + "learning_rate": 2.2120296296296297e-05, + "loss": 1.6418, + "step": 47065 + }, + { + "epoch": 8.37, + "learning_rate": 2.2117333333333336e-05, + "loss": 1.6527, + "step": 47070 + }, + { + "epoch": 8.37, + "learning_rate": 2.211437037037037e-05, + "loss": 1.5866, + "step": 47075 + }, + { + "epoch": 8.37, + "learning_rate": 2.2111407407407407e-05, + "loss": 1.4501, + "step": 47080 + }, + { + "epoch": 8.37, + "learning_rate": 2.2108444444444446e-05, + "loss": 1.6031, + "step": 47085 + }, + { + "epoch": 8.37, + "learning_rate": 2.210548148148148e-05, + "loss": 1.6453, + "step": 47090 + }, + { + "epoch": 8.37, + "learning_rate": 2.210251851851852e-05, + "loss": 1.6063, + "step": 47095 + }, + { + "epoch": 8.37, + "learning_rate": 2.2099555555555555e-05, + "loss": 1.5585, + "step": 47100 + }, + { + "epoch": 8.37, + "learning_rate": 2.2096592592592594e-05, + "loss": 1.5528, + "step": 47105 + }, + { + "epoch": 8.38, + "learning_rate": 2.209362962962963e-05, + "loss": 1.5844, + "step": 47110 + }, + { + "epoch": 8.38, + "learning_rate": 2.209066666666667e-05, + "loss": 1.6351, + "step": 47115 + }, + { + "epoch": 8.38, + "learning_rate": 2.2087703703703704e-05, + "loss": 1.5701, + "step": 47120 + }, + { + "epoch": 8.38, + "learning_rate": 2.2084740740740743e-05, + "loss": 1.6592, + "step": 47125 + }, + { + "epoch": 8.38, + "learning_rate": 2.2081777777777778e-05, + "loss": 1.5281, + "step": 47130 + }, + { + "epoch": 8.38, + "learning_rate": 2.2078814814814817e-05, + "loss": 1.5523, + "step": 47135 + }, + { + "epoch": 8.38, + "learning_rate": 2.2075851851851852e-05, + "loss": 1.5014, + "step": 47140 + }, + { + "epoch": 8.38, + "learning_rate": 2.207288888888889e-05, + "loss": 1.5398, + "step": 47145 + }, + { + "epoch": 8.38, + "learning_rate": 2.2069925925925927e-05, + "loss": 1.6268, + "step": 47150 + }, + { + "epoch": 8.38, + "learning_rate": 2.2066962962962965e-05, + "loss": 1.4891, + "step": 47155 + }, + { + "epoch": 8.38, + "learning_rate": 2.2064e-05, + "loss": 1.5367, + "step": 47160 + }, + { + "epoch": 8.38, + "learning_rate": 2.206103703703704e-05, + "loss": 1.6897, + "step": 47165 + }, + { + "epoch": 8.39, + "learning_rate": 2.2058074074074075e-05, + "loss": 1.6075, + "step": 47170 + }, + { + "epoch": 8.39, + "learning_rate": 2.2055111111111114e-05, + "loss": 1.6416, + "step": 47175 + }, + { + "epoch": 8.39, + "learning_rate": 2.205214814814815e-05, + "loss": 1.6006, + "step": 47180 + }, + { + "epoch": 8.39, + "learning_rate": 2.2049185185185188e-05, + "loss": 1.5777, + "step": 47185 + }, + { + "epoch": 8.39, + "learning_rate": 2.2046222222222224e-05, + "loss": 1.6031, + "step": 47190 + }, + { + "epoch": 8.39, + "learning_rate": 2.204325925925926e-05, + "loss": 1.6258, + "step": 47195 + }, + { + "epoch": 8.39, + "learning_rate": 2.2040296296296298e-05, + "loss": 1.6051, + "step": 47200 + }, + { + "epoch": 8.39, + "learning_rate": 2.2037333333333333e-05, + "loss": 1.6267, + "step": 47205 + }, + { + "epoch": 8.39, + "learning_rate": 2.2034370370370372e-05, + "loss": 1.5893, + "step": 47210 + }, + { + "epoch": 8.39, + "learning_rate": 2.2031407407407408e-05, + "loss": 1.6302, + "step": 47215 + }, + { + "epoch": 8.39, + "learning_rate": 2.2028444444444446e-05, + "loss": 1.6059, + "step": 47220 + }, + { + "epoch": 8.4, + "learning_rate": 2.2025481481481482e-05, + "loss": 1.5594, + "step": 47225 + }, + { + "epoch": 8.4, + "learning_rate": 2.202251851851852e-05, + "loss": 1.6561, + "step": 47230 + }, + { + "epoch": 8.4, + "learning_rate": 2.2019555555555556e-05, + "loss": 1.5227, + "step": 47235 + }, + { + "epoch": 8.4, + "learning_rate": 2.2016592592592595e-05, + "loss": 1.5793, + "step": 47240 + }, + { + "epoch": 8.4, + "learning_rate": 2.201362962962963e-05, + "loss": 1.5484, + "step": 47245 + }, + { + "epoch": 8.4, + "learning_rate": 2.201066666666667e-05, + "loss": 1.5301, + "step": 47250 + }, + { + "epoch": 8.4, + "learning_rate": 2.2007703703703705e-05, + "loss": 1.5685, + "step": 47255 + }, + { + "epoch": 8.4, + "learning_rate": 2.2004740740740743e-05, + "loss": 1.6943, + "step": 47260 + }, + { + "epoch": 8.4, + "learning_rate": 2.200177777777778e-05, + "loss": 1.6373, + "step": 47265 + }, + { + "epoch": 8.4, + "learning_rate": 2.1998814814814818e-05, + "loss": 1.5516, + "step": 47270 + }, + { + "epoch": 8.4, + "learning_rate": 2.1995851851851853e-05, + "loss": 1.6012, + "step": 47275 + }, + { + "epoch": 8.41, + "learning_rate": 2.1992888888888892e-05, + "loss": 1.4865, + "step": 47280 + }, + { + "epoch": 8.41, + "learning_rate": 2.1989925925925927e-05, + "loss": 1.5459, + "step": 47285 + }, + { + "epoch": 8.41, + "learning_rate": 2.1986962962962966e-05, + "loss": 1.5592, + "step": 47290 + }, + { + "epoch": 8.41, + "learning_rate": 2.1984e-05, + "loss": 1.5472, + "step": 47295 + }, + { + "epoch": 8.41, + "learning_rate": 2.198103703703704e-05, + "loss": 1.4812, + "step": 47300 + }, + { + "epoch": 8.41, + "learning_rate": 2.1978074074074076e-05, + "loss": 1.5071, + "step": 47305 + }, + { + "epoch": 8.41, + "learning_rate": 2.197511111111111e-05, + "loss": 1.5342, + "step": 47310 + }, + { + "epoch": 8.41, + "learning_rate": 2.197214814814815e-05, + "loss": 1.3157, + "step": 47315 + }, + { + "epoch": 8.41, + "learning_rate": 2.1969185185185186e-05, + "loss": 1.6173, + "step": 47320 + }, + { + "epoch": 8.41, + "learning_rate": 2.1966222222222224e-05, + "loss": 1.6164, + "step": 47325 + }, + { + "epoch": 8.41, + "learning_rate": 2.196325925925926e-05, + "loss": 1.6466, + "step": 47330 + }, + { + "epoch": 8.42, + "learning_rate": 2.19602962962963e-05, + "loss": 1.4472, + "step": 47335 + }, + { + "epoch": 8.42, + "learning_rate": 2.1957333333333334e-05, + "loss": 1.6237, + "step": 47340 + }, + { + "epoch": 8.42, + "learning_rate": 2.1954370370370373e-05, + "loss": 1.5209, + "step": 47345 + }, + { + "epoch": 8.42, + "learning_rate": 2.195140740740741e-05, + "loss": 1.6338, + "step": 47350 + }, + { + "epoch": 8.42, + "learning_rate": 2.1948444444444447e-05, + "loss": 1.5769, + "step": 47355 + }, + { + "epoch": 8.42, + "learning_rate": 2.1945481481481483e-05, + "loss": 1.6905, + "step": 47360 + }, + { + "epoch": 8.42, + "learning_rate": 2.194251851851852e-05, + "loss": 1.5773, + "step": 47365 + }, + { + "epoch": 8.42, + "learning_rate": 2.1939555555555557e-05, + "loss": 1.6069, + "step": 47370 + }, + { + "epoch": 8.42, + "learning_rate": 2.1936592592592596e-05, + "loss": 1.6848, + "step": 47375 + }, + { + "epoch": 8.42, + "learning_rate": 2.193362962962963e-05, + "loss": 1.6073, + "step": 47380 + }, + { + "epoch": 8.42, + "learning_rate": 2.193066666666667e-05, + "loss": 1.6285, + "step": 47385 + }, + { + "epoch": 8.42, + "learning_rate": 2.1927703703703705e-05, + "loss": 1.575, + "step": 47390 + }, + { + "epoch": 8.43, + "learning_rate": 2.1924740740740744e-05, + "loss": 1.5736, + "step": 47395 + }, + { + "epoch": 8.43, + "learning_rate": 2.192177777777778e-05, + "loss": 1.5025, + "step": 47400 + }, + { + "epoch": 8.43, + "learning_rate": 2.191881481481482e-05, + "loss": 1.5521, + "step": 47405 + }, + { + "epoch": 8.43, + "learning_rate": 2.1915851851851854e-05, + "loss": 1.6113, + "step": 47410 + }, + { + "epoch": 8.43, + "learning_rate": 2.1912888888888893e-05, + "loss": 1.7129, + "step": 47415 + }, + { + "epoch": 8.43, + "learning_rate": 2.1909925925925928e-05, + "loss": 1.6043, + "step": 47420 + }, + { + "epoch": 8.43, + "learning_rate": 2.1906962962962964e-05, + "loss": 1.5439, + "step": 47425 + }, + { + "epoch": 8.43, + "learning_rate": 2.1904000000000002e-05, + "loss": 1.6108, + "step": 47430 + }, + { + "epoch": 8.43, + "learning_rate": 2.1901037037037038e-05, + "loss": 1.5353, + "step": 47435 + }, + { + "epoch": 8.43, + "learning_rate": 2.1898074074074077e-05, + "loss": 1.5844, + "step": 47440 + }, + { + "epoch": 8.43, + "learning_rate": 2.1895111111111112e-05, + "loss": 1.5994, + "step": 47445 + }, + { + "epoch": 8.44, + "learning_rate": 2.189214814814815e-05, + "loss": 1.6359, + "step": 47450 + }, + { + "epoch": 8.44, + "learning_rate": 2.1889185185185186e-05, + "loss": 1.5686, + "step": 47455 + }, + { + "epoch": 8.44, + "learning_rate": 2.1886222222222225e-05, + "loss": 1.6189, + "step": 47460 + }, + { + "epoch": 8.44, + "learning_rate": 2.188325925925926e-05, + "loss": 1.5815, + "step": 47465 + }, + { + "epoch": 8.44, + "learning_rate": 2.1880296296296296e-05, + "loss": 1.687, + "step": 47470 + }, + { + "epoch": 8.44, + "learning_rate": 2.187733333333333e-05, + "loss": 1.5858, + "step": 47475 + }, + { + "epoch": 8.44, + "learning_rate": 2.187437037037037e-05, + "loss": 1.6059, + "step": 47480 + }, + { + "epoch": 8.44, + "learning_rate": 2.1871407407407406e-05, + "loss": 1.5492, + "step": 47485 + }, + { + "epoch": 8.44, + "learning_rate": 2.1868444444444445e-05, + "loss": 1.5101, + "step": 47490 + }, + { + "epoch": 8.44, + "learning_rate": 2.186548148148148e-05, + "loss": 1.6404, + "step": 47495 + }, + { + "epoch": 8.44, + "learning_rate": 2.186251851851852e-05, + "loss": 1.6206, + "step": 47500 + }, + { + "epoch": 8.45, + "learning_rate": 2.1859555555555554e-05, + "loss": 1.6501, + "step": 47505 + }, + { + "epoch": 8.45, + "learning_rate": 2.1856592592592593e-05, + "loss": 1.6834, + "step": 47510 + }, + { + "epoch": 8.45, + "learning_rate": 2.185362962962963e-05, + "loss": 1.5677, + "step": 47515 + }, + { + "epoch": 8.45, + "learning_rate": 2.1850666666666667e-05, + "loss": 1.6211, + "step": 47520 + }, + { + "epoch": 8.45, + "learning_rate": 2.1847703703703703e-05, + "loss": 1.5655, + "step": 47525 + }, + { + "epoch": 8.45, + "learning_rate": 2.184474074074074e-05, + "loss": 1.5535, + "step": 47530 + }, + { + "epoch": 8.45, + "learning_rate": 2.1841777777777777e-05, + "loss": 1.5823, + "step": 47535 + }, + { + "epoch": 8.45, + "learning_rate": 2.1838814814814816e-05, + "loss": 1.502, + "step": 47540 + }, + { + "epoch": 8.45, + "learning_rate": 2.183585185185185e-05, + "loss": 1.5056, + "step": 47545 + }, + { + "epoch": 8.45, + "learning_rate": 2.183288888888889e-05, + "loss": 1.5667, + "step": 47550 + }, + { + "epoch": 8.45, + "learning_rate": 2.1829925925925926e-05, + "loss": 1.5553, + "step": 47555 + }, + { + "epoch": 8.46, + "learning_rate": 2.1826962962962964e-05, + "loss": 1.5297, + "step": 47560 + }, + { + "epoch": 8.46, + "learning_rate": 2.1824e-05, + "loss": 1.6318, + "step": 47565 + }, + { + "epoch": 8.46, + "learning_rate": 2.182103703703704e-05, + "loss": 1.5296, + "step": 47570 + }, + { + "epoch": 8.46, + "learning_rate": 2.1818074074074074e-05, + "loss": 1.6722, + "step": 47575 + }, + { + "epoch": 8.46, + "learning_rate": 2.1815111111111113e-05, + "loss": 1.4001, + "step": 47580 + }, + { + "epoch": 8.46, + "learning_rate": 2.1812148148148148e-05, + "loss": 1.568, + "step": 47585 + }, + { + "epoch": 8.46, + "learning_rate": 2.1809185185185184e-05, + "loss": 1.4917, + "step": 47590 + }, + { + "epoch": 8.46, + "learning_rate": 2.1806222222222223e-05, + "loss": 1.5756, + "step": 47595 + }, + { + "epoch": 8.46, + "learning_rate": 2.1803259259259258e-05, + "loss": 1.5211, + "step": 47600 + }, + { + "epoch": 8.46, + "learning_rate": 2.1800296296296297e-05, + "loss": 1.6027, + "step": 47605 + }, + { + "epoch": 8.46, + "learning_rate": 2.1797333333333332e-05, + "loss": 1.5355, + "step": 47610 + }, + { + "epoch": 8.46, + "learning_rate": 2.179437037037037e-05, + "loss": 1.6005, + "step": 47615 + }, + { + "epoch": 8.47, + "learning_rate": 2.1791407407407406e-05, + "loss": 1.5999, + "step": 47620 + }, + { + "epoch": 8.47, + "learning_rate": 2.1788444444444445e-05, + "loss": 1.6756, + "step": 47625 + }, + { + "epoch": 8.47, + "learning_rate": 2.178548148148148e-05, + "loss": 1.5878, + "step": 47630 + }, + { + "epoch": 8.47, + "learning_rate": 2.178251851851852e-05, + "loss": 1.5147, + "step": 47635 + }, + { + "epoch": 8.47, + "learning_rate": 2.1779555555555555e-05, + "loss": 1.5881, + "step": 47640 + }, + { + "epoch": 8.47, + "learning_rate": 2.1776592592592594e-05, + "loss": 1.7187, + "step": 47645 + }, + { + "epoch": 8.47, + "learning_rate": 2.177362962962963e-05, + "loss": 1.6254, + "step": 47650 + }, + { + "epoch": 8.47, + "learning_rate": 2.1770666666666668e-05, + "loss": 1.5287, + "step": 47655 + }, + { + "epoch": 8.47, + "learning_rate": 2.1767703703703703e-05, + "loss": 1.6405, + "step": 47660 + }, + { + "epoch": 8.47, + "learning_rate": 2.1764740740740742e-05, + "loss": 1.5883, + "step": 47665 + }, + { + "epoch": 8.47, + "learning_rate": 2.1761777777777778e-05, + "loss": 1.5234, + "step": 47670 + }, + { + "epoch": 8.48, + "learning_rate": 2.1758814814814817e-05, + "loss": 1.5134, + "step": 47675 + }, + { + "epoch": 8.48, + "learning_rate": 2.1755851851851852e-05, + "loss": 1.6219, + "step": 47680 + }, + { + "epoch": 8.48, + "learning_rate": 2.175288888888889e-05, + "loss": 1.6502, + "step": 47685 + }, + { + "epoch": 8.48, + "learning_rate": 2.1749925925925926e-05, + "loss": 1.5421, + "step": 47690 + }, + { + "epoch": 8.48, + "learning_rate": 2.1746962962962965e-05, + "loss": 1.6259, + "step": 47695 + }, + { + "epoch": 8.48, + "learning_rate": 2.1744e-05, + "loss": 1.6538, + "step": 47700 + }, + { + "epoch": 8.48, + "learning_rate": 2.1741037037037036e-05, + "loss": 1.5831, + "step": 47705 + }, + { + "epoch": 8.48, + "learning_rate": 2.1738074074074075e-05, + "loss": 1.5366, + "step": 47710 + }, + { + "epoch": 8.48, + "learning_rate": 2.173511111111111e-05, + "loss": 1.6141, + "step": 47715 + }, + { + "epoch": 8.48, + "learning_rate": 2.173214814814815e-05, + "loss": 1.6115, + "step": 47720 + }, + { + "epoch": 8.48, + "learning_rate": 2.1729185185185184e-05, + "loss": 1.4801, + "step": 47725 + }, + { + "epoch": 8.49, + "learning_rate": 2.1726222222222223e-05, + "loss": 1.5836, + "step": 47730 + }, + { + "epoch": 8.49, + "learning_rate": 2.172325925925926e-05, + "loss": 1.5643, + "step": 47735 + }, + { + "epoch": 8.49, + "learning_rate": 2.1720296296296298e-05, + "loss": 1.5182, + "step": 47740 + }, + { + "epoch": 8.49, + "learning_rate": 2.1717333333333333e-05, + "loss": 1.5676, + "step": 47745 + }, + { + "epoch": 8.49, + "learning_rate": 2.1714370370370372e-05, + "loss": 1.6412, + "step": 47750 + }, + { + "epoch": 8.49, + "learning_rate": 2.1711407407407407e-05, + "loss": 1.5194, + "step": 47755 + }, + { + "epoch": 8.49, + "learning_rate": 2.1708444444444446e-05, + "loss": 1.5391, + "step": 47760 + }, + { + "epoch": 8.49, + "learning_rate": 2.170548148148148e-05, + "loss": 1.6565, + "step": 47765 + }, + { + "epoch": 8.49, + "learning_rate": 2.170251851851852e-05, + "loss": 1.5634, + "step": 47770 + }, + { + "epoch": 8.49, + "learning_rate": 2.1699555555555556e-05, + "loss": 1.6943, + "step": 47775 + }, + { + "epoch": 8.49, + "learning_rate": 2.1696592592592595e-05, + "loss": 1.5128, + "step": 47780 + }, + { + "epoch": 8.5, + "learning_rate": 2.169362962962963e-05, + "loss": 1.5908, + "step": 47785 + }, + { + "epoch": 8.5, + "learning_rate": 2.169066666666667e-05, + "loss": 1.5933, + "step": 47790 + }, + { + "epoch": 8.5, + "learning_rate": 2.1687703703703704e-05, + "loss": 1.5426, + "step": 47795 + }, + { + "epoch": 8.5, + "learning_rate": 2.1684740740740743e-05, + "loss": 1.6647, + "step": 47800 + }, + { + "epoch": 8.5, + "learning_rate": 2.168177777777778e-05, + "loss": 1.644, + "step": 47805 + }, + { + "epoch": 8.5, + "learning_rate": 2.1678814814814817e-05, + "loss": 1.6176, + "step": 47810 + }, + { + "epoch": 8.5, + "learning_rate": 2.1675851851851853e-05, + "loss": 1.5917, + "step": 47815 + }, + { + "epoch": 8.5, + "learning_rate": 2.1672888888888888e-05, + "loss": 1.5366, + "step": 47820 + }, + { + "epoch": 8.5, + "learning_rate": 2.1669925925925927e-05, + "loss": 1.5441, + "step": 47825 + }, + { + "epoch": 8.5, + "learning_rate": 2.1666962962962962e-05, + "loss": 1.5943, + "step": 47830 + }, + { + "epoch": 8.5, + "learning_rate": 2.1664e-05, + "loss": 1.6253, + "step": 47835 + }, + { + "epoch": 8.5, + "learning_rate": 2.1661037037037037e-05, + "loss": 1.5159, + "step": 47840 + }, + { + "epoch": 8.51, + "learning_rate": 2.1658074074074076e-05, + "loss": 1.7313, + "step": 47845 + }, + { + "epoch": 8.51, + "learning_rate": 2.165511111111111e-05, + "loss": 1.5913, + "step": 47850 + }, + { + "epoch": 8.51, + "learning_rate": 2.165214814814815e-05, + "loss": 1.5954, + "step": 47855 + }, + { + "epoch": 8.51, + "learning_rate": 2.1649185185185185e-05, + "loss": 1.715, + "step": 47860 + }, + { + "epoch": 8.51, + "learning_rate": 2.1646222222222224e-05, + "loss": 1.6193, + "step": 47865 + }, + { + "epoch": 8.51, + "learning_rate": 2.164325925925926e-05, + "loss": 1.6036, + "step": 47870 + }, + { + "epoch": 8.51, + "learning_rate": 2.16402962962963e-05, + "loss": 1.5861, + "step": 47875 + }, + { + "epoch": 8.51, + "learning_rate": 2.1637333333333334e-05, + "loss": 1.5645, + "step": 47880 + }, + { + "epoch": 8.51, + "learning_rate": 2.1634370370370373e-05, + "loss": 1.5977, + "step": 47885 + }, + { + "epoch": 8.51, + "learning_rate": 2.1631407407407408e-05, + "loss": 1.5133, + "step": 47890 + }, + { + "epoch": 8.51, + "learning_rate": 2.1628444444444447e-05, + "loss": 1.6485, + "step": 47895 + }, + { + "epoch": 8.52, + "learning_rate": 2.1625481481481482e-05, + "loss": 1.5182, + "step": 47900 + }, + { + "epoch": 8.52, + "learning_rate": 2.162251851851852e-05, + "loss": 1.5743, + "step": 47905 + }, + { + "epoch": 8.52, + "learning_rate": 2.1619555555555557e-05, + "loss": 1.6121, + "step": 47910 + }, + { + "epoch": 8.52, + "learning_rate": 2.1616592592592595e-05, + "loss": 1.4969, + "step": 47915 + }, + { + "epoch": 8.52, + "learning_rate": 2.161362962962963e-05, + "loss": 1.4923, + "step": 47920 + }, + { + "epoch": 8.52, + "learning_rate": 2.161066666666667e-05, + "loss": 1.6338, + "step": 47925 + }, + { + "epoch": 8.52, + "learning_rate": 2.1607703703703705e-05, + "loss": 1.6001, + "step": 47930 + }, + { + "epoch": 8.52, + "learning_rate": 2.160474074074074e-05, + "loss": 1.598, + "step": 47935 + }, + { + "epoch": 8.52, + "learning_rate": 2.160177777777778e-05, + "loss": 1.5255, + "step": 47940 + }, + { + "epoch": 8.52, + "learning_rate": 2.1598814814814815e-05, + "loss": 1.6589, + "step": 47945 + }, + { + "epoch": 8.52, + "learning_rate": 2.1595851851851854e-05, + "loss": 1.4474, + "step": 47950 + }, + { + "epoch": 8.53, + "learning_rate": 2.159288888888889e-05, + "loss": 1.5267, + "step": 47955 + }, + { + "epoch": 8.53, + "learning_rate": 2.1589925925925928e-05, + "loss": 1.6272, + "step": 47960 + }, + { + "epoch": 8.53, + "learning_rate": 2.1586962962962963e-05, + "loss": 1.5216, + "step": 47965 + }, + { + "epoch": 8.53, + "learning_rate": 2.1584000000000002e-05, + "loss": 1.6175, + "step": 47970 + }, + { + "epoch": 8.53, + "learning_rate": 2.1581037037037037e-05, + "loss": 1.6078, + "step": 47975 + }, + { + "epoch": 8.53, + "learning_rate": 2.1578074074074076e-05, + "loss": 1.5719, + "step": 47980 + }, + { + "epoch": 8.53, + "learning_rate": 2.1575111111111112e-05, + "loss": 1.6192, + "step": 47985 + }, + { + "epoch": 8.53, + "learning_rate": 2.157214814814815e-05, + "loss": 1.5284, + "step": 47990 + }, + { + "epoch": 8.53, + "learning_rate": 2.1569185185185186e-05, + "loss": 1.5808, + "step": 47995 + }, + { + "epoch": 8.53, + "learning_rate": 2.1566222222222225e-05, + "loss": 1.5719, + "step": 48000 + }, + { + "epoch": 8.53, + "learning_rate": 2.156325925925926e-05, + "loss": 1.4008, + "step": 48005 + }, + { + "epoch": 8.54, + "learning_rate": 2.15602962962963e-05, + "loss": 1.5677, + "step": 48010 + }, + { + "epoch": 8.54, + "learning_rate": 2.1557333333333335e-05, + "loss": 1.5654, + "step": 48015 + }, + { + "epoch": 8.54, + "learning_rate": 2.1554370370370373e-05, + "loss": 1.5501, + "step": 48020 + }, + { + "epoch": 8.54, + "learning_rate": 2.155140740740741e-05, + "loss": 1.641, + "step": 48025 + }, + { + "epoch": 8.54, + "learning_rate": 2.1548444444444448e-05, + "loss": 1.6144, + "step": 48030 + }, + { + "epoch": 8.54, + "learning_rate": 2.1545481481481483e-05, + "loss": 1.6962, + "step": 48035 + }, + { + "epoch": 8.54, + "learning_rate": 2.1542518518518522e-05, + "loss": 1.5706, + "step": 48040 + }, + { + "epoch": 8.54, + "learning_rate": 2.1539555555555557e-05, + "loss": 1.6857, + "step": 48045 + }, + { + "epoch": 8.54, + "learning_rate": 2.1536592592592593e-05, + "loss": 1.6497, + "step": 48050 + }, + { + "epoch": 8.54, + "learning_rate": 2.153362962962963e-05, + "loss": 1.5675, + "step": 48055 + }, + { + "epoch": 8.54, + "learning_rate": 2.1530666666666667e-05, + "loss": 1.632, + "step": 48060 + }, + { + "epoch": 8.54, + "learning_rate": 2.1527703703703706e-05, + "loss": 1.6078, + "step": 48065 + }, + { + "epoch": 8.55, + "learning_rate": 2.152474074074074e-05, + "loss": 1.5795, + "step": 48070 + }, + { + "epoch": 8.55, + "learning_rate": 2.152177777777778e-05, + "loss": 1.6866, + "step": 48075 + }, + { + "epoch": 8.55, + "learning_rate": 2.1518814814814815e-05, + "loss": 1.5731, + "step": 48080 + }, + { + "epoch": 8.55, + "learning_rate": 2.1515851851851854e-05, + "loss": 1.4527, + "step": 48085 + }, + { + "epoch": 8.55, + "learning_rate": 2.151288888888889e-05, + "loss": 1.5202, + "step": 48090 + }, + { + "epoch": 8.55, + "learning_rate": 2.150992592592593e-05, + "loss": 1.4852, + "step": 48095 + }, + { + "epoch": 8.55, + "learning_rate": 2.1506962962962964e-05, + "loss": 1.5316, + "step": 48100 + }, + { + "epoch": 8.55, + "learning_rate": 2.1504000000000003e-05, + "loss": 1.5161, + "step": 48105 + }, + { + "epoch": 8.55, + "learning_rate": 2.1501037037037038e-05, + "loss": 1.5253, + "step": 48110 + }, + { + "epoch": 8.55, + "learning_rate": 2.1498074074074077e-05, + "loss": 1.5583, + "step": 48115 + }, + { + "epoch": 8.55, + "learning_rate": 2.1495111111111113e-05, + "loss": 1.5555, + "step": 48120 + }, + { + "epoch": 8.56, + "learning_rate": 2.149214814814815e-05, + "loss": 1.6236, + "step": 48125 + }, + { + "epoch": 8.56, + "learning_rate": 2.1489185185185187e-05, + "loss": 1.5313, + "step": 48130 + }, + { + "epoch": 8.56, + "learning_rate": 2.1486222222222226e-05, + "loss": 1.5525, + "step": 48135 + }, + { + "epoch": 8.56, + "learning_rate": 2.148325925925926e-05, + "loss": 1.6012, + "step": 48140 + }, + { + "epoch": 8.56, + "learning_rate": 2.14802962962963e-05, + "loss": 1.6264, + "step": 48145 + }, + { + "epoch": 8.56, + "learning_rate": 2.1477333333333335e-05, + "loss": 1.5451, + "step": 48150 + }, + { + "epoch": 8.56, + "learning_rate": 2.1474370370370374e-05, + "loss": 1.5717, + "step": 48155 + }, + { + "epoch": 8.56, + "learning_rate": 2.147140740740741e-05, + "loss": 1.518, + "step": 48160 + }, + { + "epoch": 8.56, + "learning_rate": 2.1468444444444445e-05, + "loss": 1.644, + "step": 48165 + }, + { + "epoch": 8.56, + "learning_rate": 2.1465481481481484e-05, + "loss": 1.6199, + "step": 48170 + }, + { + "epoch": 8.56, + "learning_rate": 2.146251851851852e-05, + "loss": 1.6452, + "step": 48175 + }, + { + "epoch": 8.57, + "learning_rate": 2.1459555555555558e-05, + "loss": 1.5452, + "step": 48180 + }, + { + "epoch": 8.57, + "learning_rate": 2.1456592592592593e-05, + "loss": 1.604, + "step": 48185 + }, + { + "epoch": 8.57, + "learning_rate": 2.1453629629629632e-05, + "loss": 1.6413, + "step": 48190 + }, + { + "epoch": 8.57, + "learning_rate": 2.1450666666666668e-05, + "loss": 1.6097, + "step": 48195 + }, + { + "epoch": 8.57, + "learning_rate": 2.1447703703703707e-05, + "loss": 1.4579, + "step": 48200 + }, + { + "epoch": 8.57, + "learning_rate": 2.1444740740740742e-05, + "loss": 1.5101, + "step": 48205 + }, + { + "epoch": 8.57, + "learning_rate": 2.144177777777778e-05, + "loss": 1.5261, + "step": 48210 + }, + { + "epoch": 8.57, + "learning_rate": 2.1438814814814816e-05, + "loss": 1.5075, + "step": 48215 + }, + { + "epoch": 8.57, + "learning_rate": 2.1435851851851855e-05, + "loss": 1.5177, + "step": 48220 + }, + { + "epoch": 8.57, + "learning_rate": 2.143288888888889e-05, + "loss": 1.6129, + "step": 48225 + }, + { + "epoch": 8.57, + "learning_rate": 2.142992592592593e-05, + "loss": 1.4983, + "step": 48230 + }, + { + "epoch": 8.58, + "learning_rate": 2.1426962962962965e-05, + "loss": 1.5483, + "step": 48235 + }, + { + "epoch": 8.58, + "learning_rate": 2.1424000000000004e-05, + "loss": 1.6729, + "step": 48240 + }, + { + "epoch": 8.58, + "learning_rate": 2.1421037037037036e-05, + "loss": 1.6574, + "step": 48245 + }, + { + "epoch": 8.58, + "learning_rate": 2.1418074074074074e-05, + "loss": 1.5554, + "step": 48250 + }, + { + "epoch": 8.58, + "learning_rate": 2.141511111111111e-05, + "loss": 1.5557, + "step": 48255 + }, + { + "epoch": 8.58, + "learning_rate": 2.141214814814815e-05, + "loss": 1.527, + "step": 48260 + }, + { + "epoch": 8.58, + "learning_rate": 2.1409185185185184e-05, + "loss": 1.6802, + "step": 48265 + }, + { + "epoch": 8.58, + "learning_rate": 2.1406814814814817e-05, + "loss": 1.6135, + "step": 48270 + }, + { + "epoch": 8.58, + "learning_rate": 2.1403851851851853e-05, + "loss": 1.6063, + "step": 48275 + }, + { + "epoch": 8.58, + "learning_rate": 2.1400888888888888e-05, + "loss": 1.6415, + "step": 48280 + }, + { + "epoch": 8.58, + "learning_rate": 2.1397925925925927e-05, + "loss": 1.4916, + "step": 48285 + }, + { + "epoch": 8.58, + "learning_rate": 2.1394962962962962e-05, + "loss": 1.6307, + "step": 48290 + }, + { + "epoch": 8.59, + "learning_rate": 2.1392e-05, + "loss": 1.4657, + "step": 48295 + }, + { + "epoch": 8.59, + "learning_rate": 2.1389037037037037e-05, + "loss": 1.5909, + "step": 48300 + }, + { + "epoch": 8.59, + "learning_rate": 2.1386074074074075e-05, + "loss": 1.4674, + "step": 48305 + }, + { + "epoch": 8.59, + "learning_rate": 2.138311111111111e-05, + "loss": 1.6717, + "step": 48310 + }, + { + "epoch": 8.59, + "learning_rate": 2.138014814814815e-05, + "loss": 1.6869, + "step": 48315 + }, + { + "epoch": 8.59, + "learning_rate": 2.1377185185185185e-05, + "loss": 1.4619, + "step": 48320 + }, + { + "epoch": 8.59, + "learning_rate": 2.1374222222222224e-05, + "loss": 1.7352, + "step": 48325 + }, + { + "epoch": 8.59, + "learning_rate": 2.137125925925926e-05, + "loss": 1.5882, + "step": 48330 + }, + { + "epoch": 8.59, + "learning_rate": 2.1368296296296298e-05, + "loss": 1.6455, + "step": 48335 + }, + { + "epoch": 8.59, + "learning_rate": 2.1365333333333334e-05, + "loss": 1.6886, + "step": 48340 + }, + { + "epoch": 8.59, + "learning_rate": 2.1362370370370372e-05, + "loss": 1.5835, + "step": 48345 + }, + { + "epoch": 8.6, + "learning_rate": 2.1359407407407408e-05, + "loss": 1.6421, + "step": 48350 + }, + { + "epoch": 8.6, + "learning_rate": 2.1356444444444447e-05, + "loss": 1.706, + "step": 48355 + }, + { + "epoch": 8.6, + "learning_rate": 2.1353481481481482e-05, + "loss": 1.4444, + "step": 48360 + }, + { + "epoch": 8.6, + "learning_rate": 2.135051851851852e-05, + "loss": 1.5846, + "step": 48365 + }, + { + "epoch": 8.6, + "learning_rate": 2.1347555555555556e-05, + "loss": 1.6485, + "step": 48370 + }, + { + "epoch": 8.6, + "learning_rate": 2.1344592592592595e-05, + "loss": 1.6293, + "step": 48375 + }, + { + "epoch": 8.6, + "learning_rate": 2.134162962962963e-05, + "loss": 1.6106, + "step": 48380 + }, + { + "epoch": 8.6, + "learning_rate": 2.133866666666667e-05, + "loss": 1.6035, + "step": 48385 + }, + { + "epoch": 8.6, + "learning_rate": 2.1335703703703705e-05, + "loss": 1.6398, + "step": 48390 + }, + { + "epoch": 8.6, + "learning_rate": 2.133274074074074e-05, + "loss": 1.504, + "step": 48395 + }, + { + "epoch": 8.6, + "learning_rate": 2.132977777777778e-05, + "loss": 1.7129, + "step": 48400 + }, + { + "epoch": 8.61, + "learning_rate": 2.1326814814814815e-05, + "loss": 1.6101, + "step": 48405 + }, + { + "epoch": 8.61, + "learning_rate": 2.1323851851851853e-05, + "loss": 1.5538, + "step": 48410 + }, + { + "epoch": 8.61, + "learning_rate": 2.132088888888889e-05, + "loss": 1.5939, + "step": 48415 + }, + { + "epoch": 8.61, + "learning_rate": 2.1317925925925928e-05, + "loss": 1.5957, + "step": 48420 + }, + { + "epoch": 8.61, + "learning_rate": 2.1314962962962963e-05, + "loss": 1.6895, + "step": 48425 + }, + { + "epoch": 8.61, + "learning_rate": 2.1312000000000002e-05, + "loss": 1.5773, + "step": 48430 + }, + { + "epoch": 8.61, + "learning_rate": 2.1309037037037037e-05, + "loss": 1.571, + "step": 48435 + }, + { + "epoch": 8.61, + "learning_rate": 2.1306074074074076e-05, + "loss": 1.5118, + "step": 48440 + }, + { + "epoch": 8.61, + "learning_rate": 2.130311111111111e-05, + "loss": 1.5895, + "step": 48445 + }, + { + "epoch": 8.61, + "learning_rate": 2.130014814814815e-05, + "loss": 1.4697, + "step": 48450 + }, + { + "epoch": 8.61, + "learning_rate": 2.1297185185185186e-05, + "loss": 1.6358, + "step": 48455 + }, + { + "epoch": 8.62, + "learning_rate": 2.1294222222222225e-05, + "loss": 1.4016, + "step": 48460 + }, + { + "epoch": 8.62, + "learning_rate": 2.129125925925926e-05, + "loss": 1.6203, + "step": 48465 + }, + { + "epoch": 8.62, + "learning_rate": 2.12882962962963e-05, + "loss": 1.6866, + "step": 48470 + }, + { + "epoch": 8.62, + "learning_rate": 2.1285333333333334e-05, + "loss": 1.4638, + "step": 48475 + }, + { + "epoch": 8.62, + "learning_rate": 2.1282370370370373e-05, + "loss": 1.644, + "step": 48480 + }, + { + "epoch": 8.62, + "learning_rate": 2.127940740740741e-05, + "loss": 1.6533, + "step": 48485 + }, + { + "epoch": 8.62, + "learning_rate": 2.1276444444444447e-05, + "loss": 1.5509, + "step": 48490 + }, + { + "epoch": 8.62, + "learning_rate": 2.1273481481481483e-05, + "loss": 1.5816, + "step": 48495 + }, + { + "epoch": 8.62, + "learning_rate": 2.1270518518518522e-05, + "loss": 1.5807, + "step": 48500 + }, + { + "epoch": 8.62, + "learning_rate": 2.1267555555555557e-05, + "loss": 1.6322, + "step": 48505 + }, + { + "epoch": 8.62, + "learning_rate": 2.1264592592592593e-05, + "loss": 1.5263, + "step": 48510 + }, + { + "epoch": 8.62, + "learning_rate": 2.126162962962963e-05, + "loss": 1.5807, + "step": 48515 + }, + { + "epoch": 8.63, + "learning_rate": 2.1258666666666667e-05, + "loss": 1.458, + "step": 48520 + }, + { + "epoch": 8.63, + "learning_rate": 2.1255703703703706e-05, + "loss": 1.5772, + "step": 48525 + }, + { + "epoch": 8.63, + "learning_rate": 2.125274074074074e-05, + "loss": 1.6866, + "step": 48530 + }, + { + "epoch": 8.63, + "learning_rate": 2.124977777777778e-05, + "loss": 1.6285, + "step": 48535 + }, + { + "epoch": 8.63, + "learning_rate": 2.1246814814814815e-05, + "loss": 1.5968, + "step": 48540 + }, + { + "epoch": 8.63, + "learning_rate": 2.1243851851851854e-05, + "loss": 1.5838, + "step": 48545 + }, + { + "epoch": 8.63, + "learning_rate": 2.124088888888889e-05, + "loss": 1.57, + "step": 48550 + }, + { + "epoch": 8.63, + "learning_rate": 2.123792592592593e-05, + "loss": 1.5923, + "step": 48555 + }, + { + "epoch": 8.63, + "learning_rate": 2.1234962962962964e-05, + "loss": 1.5629, + "step": 48560 + }, + { + "epoch": 8.63, + "learning_rate": 2.1232000000000003e-05, + "loss": 1.5648, + "step": 48565 + }, + { + "epoch": 8.63, + "learning_rate": 2.1229037037037038e-05, + "loss": 1.6528, + "step": 48570 + }, + { + "epoch": 8.64, + "learning_rate": 2.1226074074074077e-05, + "loss": 1.6432, + "step": 48575 + }, + { + "epoch": 8.64, + "learning_rate": 2.1223111111111112e-05, + "loss": 1.4514, + "step": 48580 + }, + { + "epoch": 8.64, + "learning_rate": 2.122014814814815e-05, + "loss": 1.5709, + "step": 48585 + }, + { + "epoch": 8.64, + "learning_rate": 2.1217185185185187e-05, + "loss": 1.5943, + "step": 48590 + }, + { + "epoch": 8.64, + "learning_rate": 2.1214222222222225e-05, + "loss": 1.6851, + "step": 48595 + }, + { + "epoch": 8.64, + "learning_rate": 2.121125925925926e-05, + "loss": 1.6304, + "step": 48600 + }, + { + "epoch": 8.64, + "learning_rate": 2.12082962962963e-05, + "loss": 1.5762, + "step": 48605 + }, + { + "epoch": 8.64, + "learning_rate": 2.1205333333333335e-05, + "loss": 1.5765, + "step": 48610 + }, + { + "epoch": 8.64, + "learning_rate": 2.1202370370370374e-05, + "loss": 1.4383, + "step": 48615 + }, + { + "epoch": 8.64, + "learning_rate": 2.119940740740741e-05, + "loss": 1.6192, + "step": 48620 + }, + { + "epoch": 8.64, + "learning_rate": 2.1196444444444445e-05, + "loss": 1.5757, + "step": 48625 + }, + { + "epoch": 8.65, + "learning_rate": 2.1193481481481484e-05, + "loss": 1.7062, + "step": 48630 + }, + { + "epoch": 8.65, + "learning_rate": 2.119051851851852e-05, + "loss": 1.5611, + "step": 48635 + }, + { + "epoch": 8.65, + "learning_rate": 2.1187555555555558e-05, + "loss": 1.5267, + "step": 48640 + }, + { + "epoch": 8.65, + "learning_rate": 2.1184592592592593e-05, + "loss": 1.7443, + "step": 48645 + }, + { + "epoch": 8.65, + "learning_rate": 2.1181629629629632e-05, + "loss": 1.5347, + "step": 48650 + }, + { + "epoch": 8.65, + "learning_rate": 2.1178666666666668e-05, + "loss": 1.5754, + "step": 48655 + }, + { + "epoch": 8.65, + "learning_rate": 2.1175703703703706e-05, + "loss": 1.6667, + "step": 48660 + }, + { + "epoch": 8.65, + "learning_rate": 2.1172740740740742e-05, + "loss": 1.5728, + "step": 48665 + }, + { + "epoch": 8.65, + "learning_rate": 2.116977777777778e-05, + "loss": 1.6377, + "step": 48670 + }, + { + "epoch": 8.65, + "learning_rate": 2.1166814814814816e-05, + "loss": 1.5926, + "step": 48675 + }, + { + "epoch": 8.65, + "learning_rate": 2.1163851851851855e-05, + "loss": 1.4464, + "step": 48680 + }, + { + "epoch": 8.66, + "learning_rate": 2.116088888888889e-05, + "loss": 1.6894, + "step": 48685 + }, + { + "epoch": 8.66, + "learning_rate": 2.115792592592593e-05, + "loss": 1.5962, + "step": 48690 + }, + { + "epoch": 8.66, + "learning_rate": 2.1154962962962965e-05, + "loss": 1.6734, + "step": 48695 + }, + { + "epoch": 8.66, + "learning_rate": 2.1152000000000003e-05, + "loss": 1.5913, + "step": 48700 + }, + { + "epoch": 8.66, + "learning_rate": 2.114903703703704e-05, + "loss": 1.5971, + "step": 48705 + }, + { + "epoch": 8.66, + "learning_rate": 2.1146074074074074e-05, + "loss": 1.5631, + "step": 48710 + }, + { + "epoch": 8.66, + "learning_rate": 2.114311111111111e-05, + "loss": 1.6449, + "step": 48715 + }, + { + "epoch": 8.66, + "learning_rate": 2.114014814814815e-05, + "loss": 1.5261, + "step": 48720 + }, + { + "epoch": 8.66, + "learning_rate": 2.1137185185185184e-05, + "loss": 1.6033, + "step": 48725 + }, + { + "epoch": 8.66, + "learning_rate": 2.1134222222222223e-05, + "loss": 1.7437, + "step": 48730 + }, + { + "epoch": 8.66, + "learning_rate": 2.1131259259259258e-05, + "loss": 1.5837, + "step": 48735 + }, + { + "epoch": 8.66, + "learning_rate": 2.1128296296296297e-05, + "loss": 1.5545, + "step": 48740 + }, + { + "epoch": 8.67, + "learning_rate": 2.1125333333333333e-05, + "loss": 1.553, + "step": 48745 + }, + { + "epoch": 8.67, + "learning_rate": 2.112237037037037e-05, + "loss": 1.64, + "step": 48750 + }, + { + "epoch": 8.67, + "learning_rate": 2.1119407407407407e-05, + "loss": 1.588, + "step": 48755 + }, + { + "epoch": 8.67, + "learning_rate": 2.1116444444444446e-05, + "loss": 1.6126, + "step": 48760 + }, + { + "epoch": 8.67, + "learning_rate": 2.111348148148148e-05, + "loss": 1.5341, + "step": 48765 + }, + { + "epoch": 8.67, + "learning_rate": 2.111051851851852e-05, + "loss": 1.6896, + "step": 48770 + }, + { + "epoch": 8.67, + "learning_rate": 2.1107555555555555e-05, + "loss": 1.5585, + "step": 48775 + }, + { + "epoch": 8.67, + "learning_rate": 2.1104592592592594e-05, + "loss": 1.5181, + "step": 48780 + }, + { + "epoch": 8.67, + "learning_rate": 2.110162962962963e-05, + "loss": 1.5537, + "step": 48785 + }, + { + "epoch": 8.67, + "learning_rate": 2.1098666666666665e-05, + "loss": 1.6398, + "step": 48790 + }, + { + "epoch": 8.67, + "learning_rate": 2.1095703703703704e-05, + "loss": 1.6032, + "step": 48795 + }, + { + "epoch": 8.68, + "learning_rate": 2.109274074074074e-05, + "loss": 1.5996, + "step": 48800 + }, + { + "epoch": 8.68, + "learning_rate": 2.1089777777777778e-05, + "loss": 1.586, + "step": 48805 + }, + { + "epoch": 8.68, + "learning_rate": 2.1086814814814814e-05, + "loss": 1.6109, + "step": 48810 + }, + { + "epoch": 8.68, + "learning_rate": 2.1083851851851852e-05, + "loss": 1.7435, + "step": 48815 + }, + { + "epoch": 8.68, + "learning_rate": 2.1080888888888888e-05, + "loss": 1.5501, + "step": 48820 + }, + { + "epoch": 8.68, + "learning_rate": 2.1077925925925927e-05, + "loss": 1.4657, + "step": 48825 + }, + { + "epoch": 8.68, + "learning_rate": 2.1074962962962962e-05, + "loss": 1.6426, + "step": 48830 + }, + { + "epoch": 8.68, + "learning_rate": 2.1072e-05, + "loss": 1.5512, + "step": 48835 + }, + { + "epoch": 8.68, + "learning_rate": 2.1069037037037036e-05, + "loss": 1.7229, + "step": 48840 + }, + { + "epoch": 8.68, + "learning_rate": 2.1066074074074075e-05, + "loss": 1.5883, + "step": 48845 + }, + { + "epoch": 8.68, + "learning_rate": 2.106311111111111e-05, + "loss": 1.6108, + "step": 48850 + }, + { + "epoch": 8.69, + "learning_rate": 2.106014814814815e-05, + "loss": 1.4672, + "step": 48855 + }, + { + "epoch": 8.69, + "learning_rate": 2.1057185185185185e-05, + "loss": 1.634, + "step": 48860 + }, + { + "epoch": 8.69, + "learning_rate": 2.1054222222222224e-05, + "loss": 1.6279, + "step": 48865 + }, + { + "epoch": 8.69, + "learning_rate": 2.105125925925926e-05, + "loss": 1.5786, + "step": 48870 + }, + { + "epoch": 8.69, + "learning_rate": 2.1048296296296298e-05, + "loss": 1.5631, + "step": 48875 + }, + { + "epoch": 8.69, + "learning_rate": 2.1045333333333333e-05, + "loss": 1.5632, + "step": 48880 + }, + { + "epoch": 8.69, + "learning_rate": 2.1042370370370372e-05, + "loss": 1.5749, + "step": 48885 + }, + { + "epoch": 8.69, + "learning_rate": 2.1039407407407408e-05, + "loss": 1.43, + "step": 48890 + }, + { + "epoch": 8.69, + "learning_rate": 2.1036444444444446e-05, + "loss": 1.7026, + "step": 48895 + }, + { + "epoch": 8.69, + "learning_rate": 2.1033481481481482e-05, + "loss": 1.4874, + "step": 48900 + }, + { + "epoch": 8.69, + "learning_rate": 2.1030518518518517e-05, + "loss": 1.5278, + "step": 48905 + }, + { + "epoch": 8.7, + "learning_rate": 2.1027555555555556e-05, + "loss": 1.5622, + "step": 48910 + }, + { + "epoch": 8.7, + "learning_rate": 2.102459259259259e-05, + "loss": 1.4414, + "step": 48915 + }, + { + "epoch": 8.7, + "learning_rate": 2.102162962962963e-05, + "loss": 1.5746, + "step": 48920 + }, + { + "epoch": 8.7, + "learning_rate": 2.1018666666666666e-05, + "loss": 1.5149, + "step": 48925 + }, + { + "epoch": 8.7, + "learning_rate": 2.1015703703703705e-05, + "loss": 1.5961, + "step": 48930 + }, + { + "epoch": 8.7, + "learning_rate": 2.101274074074074e-05, + "loss": 1.5621, + "step": 48935 + }, + { + "epoch": 8.7, + "learning_rate": 2.100977777777778e-05, + "loss": 1.5935, + "step": 48940 + }, + { + "epoch": 8.7, + "learning_rate": 2.1006814814814814e-05, + "loss": 1.5438, + "step": 48945 + }, + { + "epoch": 8.7, + "learning_rate": 2.1003851851851853e-05, + "loss": 1.5941, + "step": 48950 + }, + { + "epoch": 8.7, + "learning_rate": 2.100088888888889e-05, + "loss": 1.6053, + "step": 48955 + }, + { + "epoch": 8.7, + "learning_rate": 2.0997925925925927e-05, + "loss": 1.6134, + "step": 48960 + }, + { + "epoch": 8.7, + "learning_rate": 2.0994962962962963e-05, + "loss": 1.4818, + "step": 48965 + }, + { + "epoch": 8.71, + "learning_rate": 2.0992e-05, + "loss": 1.5851, + "step": 48970 + }, + { + "epoch": 8.71, + "learning_rate": 2.0989037037037037e-05, + "loss": 1.4719, + "step": 48975 + }, + { + "epoch": 8.71, + "learning_rate": 2.0986074074074076e-05, + "loss": 1.5611, + "step": 48980 + }, + { + "epoch": 8.71, + "learning_rate": 2.098311111111111e-05, + "loss": 1.5807, + "step": 48985 + }, + { + "epoch": 8.71, + "learning_rate": 2.098014814814815e-05, + "loss": 1.577, + "step": 48990 + }, + { + "epoch": 8.71, + "learning_rate": 2.0977185185185186e-05, + "loss": 1.6071, + "step": 48995 + }, + { + "epoch": 8.71, + "learning_rate": 2.0974222222222224e-05, + "loss": 1.5249, + "step": 49000 + }, + { + "epoch": 8.71, + "learning_rate": 2.097125925925926e-05, + "loss": 1.6473, + "step": 49005 + }, + { + "epoch": 8.71, + "learning_rate": 2.09682962962963e-05, + "loss": 1.6365, + "step": 49010 + }, + { + "epoch": 8.71, + "learning_rate": 2.0965333333333334e-05, + "loss": 1.6197, + "step": 49015 + }, + { + "epoch": 8.71, + "learning_rate": 2.096237037037037e-05, + "loss": 1.647, + "step": 49020 + }, + { + "epoch": 8.72, + "learning_rate": 2.095940740740741e-05, + "loss": 1.4746, + "step": 49025 + }, + { + "epoch": 8.72, + "learning_rate": 2.0956444444444444e-05, + "loss": 1.5431, + "step": 49030 + }, + { + "epoch": 8.72, + "learning_rate": 2.0953481481481483e-05, + "loss": 1.4384, + "step": 49035 + }, + { + "epoch": 8.72, + "learning_rate": 2.0950518518518518e-05, + "loss": 1.6573, + "step": 49040 + }, + { + "epoch": 8.72, + "learning_rate": 2.0947555555555557e-05, + "loss": 1.6509, + "step": 49045 + }, + { + "epoch": 8.72, + "learning_rate": 2.0944592592592592e-05, + "loss": 1.5921, + "step": 49050 + }, + { + "epoch": 8.72, + "learning_rate": 2.094162962962963e-05, + "loss": 1.6706, + "step": 49055 + }, + { + "epoch": 8.72, + "learning_rate": 2.0938666666666667e-05, + "loss": 1.5353, + "step": 49060 + }, + { + "epoch": 8.72, + "learning_rate": 2.0935703703703705e-05, + "loss": 1.6225, + "step": 49065 + }, + { + "epoch": 8.72, + "learning_rate": 2.093274074074074e-05, + "loss": 1.588, + "step": 49070 + }, + { + "epoch": 8.72, + "learning_rate": 2.092977777777778e-05, + "loss": 1.6211, + "step": 49075 + }, + { + "epoch": 8.73, + "learning_rate": 2.0926814814814815e-05, + "loss": 1.6526, + "step": 49080 + }, + { + "epoch": 8.73, + "learning_rate": 2.0923851851851854e-05, + "loss": 1.6996, + "step": 49085 + }, + { + "epoch": 8.73, + "learning_rate": 2.092088888888889e-05, + "loss": 1.6127, + "step": 49090 + }, + { + "epoch": 8.73, + "learning_rate": 2.0917925925925928e-05, + "loss": 1.555, + "step": 49095 + }, + { + "epoch": 8.73, + "learning_rate": 2.0914962962962964e-05, + "loss": 1.5162, + "step": 49100 + }, + { + "epoch": 8.73, + "learning_rate": 2.0912000000000002e-05, + "loss": 1.5465, + "step": 49105 + }, + { + "epoch": 8.73, + "learning_rate": 2.0909037037037038e-05, + "loss": 1.6422, + "step": 49110 + }, + { + "epoch": 8.73, + "learning_rate": 2.0906074074074077e-05, + "loss": 1.5217, + "step": 49115 + }, + { + "epoch": 8.73, + "learning_rate": 2.0903111111111112e-05, + "loss": 1.619, + "step": 49120 + }, + { + "epoch": 8.73, + "learning_rate": 2.090014814814815e-05, + "loss": 1.5617, + "step": 49125 + }, + { + "epoch": 8.73, + "learning_rate": 2.0897185185185186e-05, + "loss": 1.5746, + "step": 49130 + }, + { + "epoch": 8.74, + "learning_rate": 2.0894222222222222e-05, + "loss": 1.5058, + "step": 49135 + }, + { + "epoch": 8.74, + "learning_rate": 2.089125925925926e-05, + "loss": 1.7295, + "step": 49140 + }, + { + "epoch": 8.74, + "learning_rate": 2.0888296296296296e-05, + "loss": 1.5349, + "step": 49145 + }, + { + "epoch": 8.74, + "learning_rate": 2.0885333333333335e-05, + "loss": 1.6692, + "step": 49150 + }, + { + "epoch": 8.74, + "learning_rate": 2.088237037037037e-05, + "loss": 1.5457, + "step": 49155 + }, + { + "epoch": 8.74, + "learning_rate": 2.087940740740741e-05, + "loss": 1.5076, + "step": 49160 + }, + { + "epoch": 8.74, + "learning_rate": 2.0876444444444445e-05, + "loss": 1.616, + "step": 49165 + }, + { + "epoch": 8.74, + "learning_rate": 2.0873481481481483e-05, + "loss": 1.6334, + "step": 49170 + }, + { + "epoch": 8.74, + "learning_rate": 2.087051851851852e-05, + "loss": 1.6823, + "step": 49175 + }, + { + "epoch": 8.74, + "learning_rate": 2.0867555555555558e-05, + "loss": 1.5324, + "step": 49180 + }, + { + "epoch": 8.74, + "learning_rate": 2.0864592592592593e-05, + "loss": 1.5831, + "step": 49185 + }, + { + "epoch": 8.74, + "learning_rate": 2.0861629629629632e-05, + "loss": 1.5679, + "step": 49190 + }, + { + "epoch": 8.75, + "learning_rate": 2.0858666666666667e-05, + "loss": 1.4764, + "step": 49195 + }, + { + "epoch": 8.75, + "learning_rate": 2.0855703703703706e-05, + "loss": 1.5852, + "step": 49200 + }, + { + "epoch": 8.75, + "learning_rate": 2.085274074074074e-05, + "loss": 1.5673, + "step": 49205 + }, + { + "epoch": 8.75, + "learning_rate": 2.084977777777778e-05, + "loss": 1.5226, + "step": 49210 + }, + { + "epoch": 8.75, + "learning_rate": 2.0846814814814816e-05, + "loss": 1.6363, + "step": 49215 + }, + { + "epoch": 8.75, + "learning_rate": 2.0843851851851855e-05, + "loss": 1.6604, + "step": 49220 + }, + { + "epoch": 8.75, + "learning_rate": 2.084088888888889e-05, + "loss": 1.6934, + "step": 49225 + }, + { + "epoch": 8.75, + "learning_rate": 2.083792592592593e-05, + "loss": 1.5707, + "step": 49230 + }, + { + "epoch": 8.75, + "learning_rate": 2.0834962962962964e-05, + "loss": 1.5703, + "step": 49235 + }, + { + "epoch": 8.75, + "learning_rate": 2.0832000000000003e-05, + "loss": 1.6097, + "step": 49240 + }, + { + "epoch": 8.75, + "learning_rate": 2.082903703703704e-05, + "loss": 1.6224, + "step": 49245 + }, + { + "epoch": 8.76, + "learning_rate": 2.0826074074074074e-05, + "loss": 1.6108, + "step": 49250 + }, + { + "epoch": 8.76, + "learning_rate": 2.0823111111111113e-05, + "loss": 1.6177, + "step": 49255 + }, + { + "epoch": 8.76, + "learning_rate": 2.0820148148148148e-05, + "loss": 1.532, + "step": 49260 + }, + { + "epoch": 8.76, + "learning_rate": 2.0817185185185187e-05, + "loss": 1.5187, + "step": 49265 + }, + { + "epoch": 8.76, + "learning_rate": 2.0814222222222223e-05, + "loss": 1.5628, + "step": 49270 + }, + { + "epoch": 8.76, + "learning_rate": 2.081125925925926e-05, + "loss": 1.5729, + "step": 49275 + }, + { + "epoch": 8.76, + "learning_rate": 2.0808296296296297e-05, + "loss": 1.639, + "step": 49280 + }, + { + "epoch": 8.76, + "learning_rate": 2.0805333333333336e-05, + "loss": 1.5739, + "step": 49285 + }, + { + "epoch": 8.76, + "learning_rate": 2.080237037037037e-05, + "loss": 1.658, + "step": 49290 + }, + { + "epoch": 8.76, + "learning_rate": 2.079940740740741e-05, + "loss": 1.5964, + "step": 49295 + }, + { + "epoch": 8.76, + "learning_rate": 2.0796444444444445e-05, + "loss": 1.6005, + "step": 49300 + }, + { + "epoch": 8.77, + "learning_rate": 2.0793481481481484e-05, + "loss": 1.7932, + "step": 49305 + }, + { + "epoch": 8.77, + "learning_rate": 2.079051851851852e-05, + "loss": 1.4853, + "step": 49310 + }, + { + "epoch": 8.77, + "learning_rate": 2.078755555555556e-05, + "loss": 1.4995, + "step": 49315 + }, + { + "epoch": 8.77, + "learning_rate": 2.0784592592592594e-05, + "loss": 1.5547, + "step": 49320 + }, + { + "epoch": 8.77, + "learning_rate": 2.0781629629629633e-05, + "loss": 1.6237, + "step": 49325 + }, + { + "epoch": 8.77, + "learning_rate": 2.0778666666666668e-05, + "loss": 1.6151, + "step": 49330 + }, + { + "epoch": 8.77, + "learning_rate": 2.0775703703703707e-05, + "loss": 1.6293, + "step": 49335 + }, + { + "epoch": 8.77, + "learning_rate": 2.0772740740740742e-05, + "loss": 1.6123, + "step": 49340 + }, + { + "epoch": 8.77, + "learning_rate": 2.076977777777778e-05, + "loss": 1.6295, + "step": 49345 + }, + { + "epoch": 8.77, + "learning_rate": 2.0766814814814817e-05, + "loss": 1.6001, + "step": 49350 + }, + { + "epoch": 8.77, + "learning_rate": 2.0763851851851855e-05, + "loss": 1.6561, + "step": 49355 + }, + { + "epoch": 8.78, + "learning_rate": 2.076088888888889e-05, + "loss": 1.5899, + "step": 49360 + }, + { + "epoch": 8.78, + "learning_rate": 2.0757925925925926e-05, + "loss": 1.6044, + "step": 49365 + }, + { + "epoch": 8.78, + "learning_rate": 2.0754962962962965e-05, + "loss": 1.6045, + "step": 49370 + }, + { + "epoch": 8.78, + "learning_rate": 2.0752e-05, + "loss": 1.5086, + "step": 49375 + }, + { + "epoch": 8.78, + "learning_rate": 2.074903703703704e-05, + "loss": 1.49, + "step": 49380 + }, + { + "epoch": 8.78, + "learning_rate": 2.0746074074074075e-05, + "loss": 1.646, + "step": 49385 + }, + { + "epoch": 8.78, + "learning_rate": 2.0743111111111114e-05, + "loss": 1.5894, + "step": 49390 + }, + { + "epoch": 8.78, + "learning_rate": 2.074014814814815e-05, + "loss": 1.5594, + "step": 49395 + }, + { + "epoch": 8.78, + "learning_rate": 2.0737185185185188e-05, + "loss": 1.5403, + "step": 49400 + }, + { + "epoch": 8.78, + "learning_rate": 2.0734222222222223e-05, + "loss": 1.5321, + "step": 49405 + }, + { + "epoch": 8.78, + "learning_rate": 2.0731259259259262e-05, + "loss": 1.6369, + "step": 49410 + }, + { + "epoch": 8.78, + "learning_rate": 2.0728296296296298e-05, + "loss": 1.6057, + "step": 49415 + }, + { + "epoch": 8.79, + "learning_rate": 2.0725333333333336e-05, + "loss": 1.45, + "step": 49420 + }, + { + "epoch": 8.79, + "learning_rate": 2.0722370370370372e-05, + "loss": 1.5284, + "step": 49425 + }, + { + "epoch": 8.79, + "learning_rate": 2.071940740740741e-05, + "loss": 1.7225, + "step": 49430 + }, + { + "epoch": 8.79, + "learning_rate": 2.0716444444444446e-05, + "loss": 1.4506, + "step": 49435 + }, + { + "epoch": 8.79, + "learning_rate": 2.0713481481481485e-05, + "loss": 1.5763, + "step": 49440 + }, + { + "epoch": 8.79, + "learning_rate": 2.071051851851852e-05, + "loss": 1.6135, + "step": 49445 + }, + { + "epoch": 8.79, + "learning_rate": 2.070755555555556e-05, + "loss": 1.682, + "step": 49450 + }, + { + "epoch": 8.79, + "learning_rate": 2.0704592592592595e-05, + "loss": 1.5632, + "step": 49455 + }, + { + "epoch": 8.79, + "learning_rate": 2.0701629629629633e-05, + "loss": 1.5185, + "step": 49460 + }, + { + "epoch": 8.79, + "learning_rate": 2.069866666666667e-05, + "loss": 1.5057, + "step": 49465 + }, + { + "epoch": 8.79, + "learning_rate": 2.0695703703703708e-05, + "loss": 1.5888, + "step": 49470 + }, + { + "epoch": 8.8, + "learning_rate": 2.0692740740740743e-05, + "loss": 1.6223, + "step": 49475 + }, + { + "epoch": 8.8, + "learning_rate": 2.068977777777778e-05, + "loss": 1.7058, + "step": 49480 + }, + { + "epoch": 8.8, + "learning_rate": 2.0686814814814814e-05, + "loss": 1.4756, + "step": 49485 + }, + { + "epoch": 8.8, + "learning_rate": 2.0683851851851853e-05, + "loss": 1.5278, + "step": 49490 + }, + { + "epoch": 8.8, + "learning_rate": 2.0680888888888888e-05, + "loss": 1.6419, + "step": 49495 + }, + { + "epoch": 8.8, + "learning_rate": 2.0677925925925927e-05, + "loss": 1.5949, + "step": 49500 + }, + { + "epoch": 8.8, + "learning_rate": 2.0674962962962962e-05, + "loss": 1.664, + "step": 49505 + }, + { + "epoch": 8.8, + "learning_rate": 2.0672e-05, + "loss": 1.5529, + "step": 49510 + }, + { + "epoch": 8.8, + "learning_rate": 2.0669037037037037e-05, + "loss": 1.5925, + "step": 49515 + }, + { + "epoch": 8.8, + "learning_rate": 2.0666074074074072e-05, + "loss": 1.5646, + "step": 49520 + }, + { + "epoch": 8.8, + "learning_rate": 2.066311111111111e-05, + "loss": 1.5784, + "step": 49525 + }, + { + "epoch": 8.81, + "learning_rate": 2.0660148148148146e-05, + "loss": 1.4748, + "step": 49530 + }, + { + "epoch": 8.81, + "learning_rate": 2.0657185185185185e-05, + "loss": 1.5894, + "step": 49535 + }, + { + "epoch": 8.81, + "learning_rate": 2.065422222222222e-05, + "loss": 1.5755, + "step": 49540 + }, + { + "epoch": 8.81, + "learning_rate": 2.065125925925926e-05, + "loss": 1.6588, + "step": 49545 + }, + { + "epoch": 8.81, + "learning_rate": 2.0648296296296295e-05, + "loss": 1.4702, + "step": 49550 + }, + { + "epoch": 8.81, + "learning_rate": 2.0645333333333334e-05, + "loss": 1.5999, + "step": 49555 + }, + { + "epoch": 8.81, + "learning_rate": 2.064237037037037e-05, + "loss": 1.6394, + "step": 49560 + }, + { + "epoch": 8.81, + "learning_rate": 2.0639407407407408e-05, + "loss": 1.5283, + "step": 49565 + }, + { + "epoch": 8.81, + "learning_rate": 2.0636444444444443e-05, + "loss": 1.7313, + "step": 49570 + }, + { + "epoch": 8.81, + "learning_rate": 2.0633481481481482e-05, + "loss": 1.5737, + "step": 49575 + }, + { + "epoch": 8.81, + "learning_rate": 2.0630518518518518e-05, + "loss": 1.5782, + "step": 49580 + }, + { + "epoch": 8.82, + "learning_rate": 2.0627555555555557e-05, + "loss": 1.6574, + "step": 49585 + }, + { + "epoch": 8.82, + "learning_rate": 2.0624592592592592e-05, + "loss": 1.5658, + "step": 49590 + }, + { + "epoch": 8.82, + "learning_rate": 2.062162962962963e-05, + "loss": 1.6031, + "step": 49595 + }, + { + "epoch": 8.82, + "learning_rate": 2.0618666666666666e-05, + "loss": 1.5707, + "step": 49600 + }, + { + "epoch": 8.82, + "learning_rate": 2.0615703703703705e-05, + "loss": 1.6226, + "step": 49605 + }, + { + "epoch": 8.82, + "learning_rate": 2.061274074074074e-05, + "loss": 1.5638, + "step": 49610 + }, + { + "epoch": 8.82, + "learning_rate": 2.060977777777778e-05, + "loss": 1.5982, + "step": 49615 + }, + { + "epoch": 8.82, + "learning_rate": 2.0606814814814815e-05, + "loss": 1.4467, + "step": 49620 + }, + { + "epoch": 8.82, + "learning_rate": 2.0603851851851854e-05, + "loss": 1.6394, + "step": 49625 + }, + { + "epoch": 8.82, + "learning_rate": 2.060088888888889e-05, + "loss": 1.5951, + "step": 49630 + }, + { + "epoch": 8.82, + "learning_rate": 2.0597925925925924e-05, + "loss": 1.5576, + "step": 49635 + }, + { + "epoch": 8.82, + "learning_rate": 2.0594962962962963e-05, + "loss": 1.5621, + "step": 49640 + }, + { + "epoch": 8.83, + "learning_rate": 2.0592e-05, + "loss": 1.5921, + "step": 49645 + }, + { + "epoch": 8.83, + "learning_rate": 2.0589037037037038e-05, + "loss": 1.4863, + "step": 49650 + }, + { + "epoch": 8.83, + "learning_rate": 2.0586074074074073e-05, + "loss": 1.6157, + "step": 49655 + }, + { + "epoch": 8.83, + "learning_rate": 2.0583111111111112e-05, + "loss": 1.5637, + "step": 49660 + }, + { + "epoch": 8.83, + "learning_rate": 2.0580148148148147e-05, + "loss": 1.611, + "step": 49665 + }, + { + "epoch": 8.83, + "learning_rate": 2.0577185185185186e-05, + "loss": 1.6554, + "step": 49670 + }, + { + "epoch": 8.83, + "learning_rate": 2.057422222222222e-05, + "loss": 1.6317, + "step": 49675 + }, + { + "epoch": 8.83, + "learning_rate": 2.057125925925926e-05, + "loss": 1.5502, + "step": 49680 + }, + { + "epoch": 8.83, + "learning_rate": 2.0568296296296296e-05, + "loss": 1.517, + "step": 49685 + }, + { + "epoch": 8.83, + "learning_rate": 2.0565333333333335e-05, + "loss": 1.6514, + "step": 49690 + }, + { + "epoch": 8.83, + "learning_rate": 2.056237037037037e-05, + "loss": 1.6266, + "step": 49695 + }, + { + "epoch": 8.84, + "learning_rate": 2.055940740740741e-05, + "loss": 1.6582, + "step": 49700 + }, + { + "epoch": 8.84, + "learning_rate": 2.0556444444444444e-05, + "loss": 1.5833, + "step": 49705 + }, + { + "epoch": 8.84, + "learning_rate": 2.0553481481481483e-05, + "loss": 1.53, + "step": 49710 + }, + { + "epoch": 8.84, + "learning_rate": 2.055051851851852e-05, + "loss": 1.5525, + "step": 49715 + }, + { + "epoch": 8.84, + "learning_rate": 2.0547555555555557e-05, + "loss": 1.5935, + "step": 49720 + }, + { + "epoch": 8.84, + "learning_rate": 2.0544592592592593e-05, + "loss": 1.517, + "step": 49725 + }, + { + "epoch": 8.84, + "learning_rate": 2.054162962962963e-05, + "loss": 1.5663, + "step": 49730 + }, + { + "epoch": 8.84, + "learning_rate": 2.0538666666666667e-05, + "loss": 1.5452, + "step": 49735 + }, + { + "epoch": 8.84, + "learning_rate": 2.0535703703703706e-05, + "loss": 1.5779, + "step": 49740 + }, + { + "epoch": 8.84, + "learning_rate": 2.053274074074074e-05, + "loss": 1.6238, + "step": 49745 + }, + { + "epoch": 8.84, + "learning_rate": 2.0529777777777777e-05, + "loss": 1.5781, + "step": 49750 + }, + { + "epoch": 8.85, + "learning_rate": 2.0526814814814815e-05, + "loss": 1.5738, + "step": 49755 + }, + { + "epoch": 8.85, + "learning_rate": 2.052385185185185e-05, + "loss": 1.6232, + "step": 49760 + }, + { + "epoch": 8.85, + "learning_rate": 2.052088888888889e-05, + "loss": 1.619, + "step": 49765 + }, + { + "epoch": 8.85, + "learning_rate": 2.0517925925925925e-05, + "loss": 1.5196, + "step": 49770 + }, + { + "epoch": 8.85, + "learning_rate": 2.0514962962962964e-05, + "loss": 1.5067, + "step": 49775 + }, + { + "epoch": 8.85, + "learning_rate": 2.0512e-05, + "loss": 1.6335, + "step": 49780 + }, + { + "epoch": 8.85, + "learning_rate": 2.0509037037037038e-05, + "loss": 1.5742, + "step": 49785 + }, + { + "epoch": 8.85, + "learning_rate": 2.0506074074074074e-05, + "loss": 1.5137, + "step": 49790 + }, + { + "epoch": 8.85, + "learning_rate": 2.0503111111111113e-05, + "loss": 1.5472, + "step": 49795 + }, + { + "epoch": 8.85, + "learning_rate": 2.0500148148148148e-05, + "loss": 1.6103, + "step": 49800 + }, + { + "epoch": 8.85, + "learning_rate": 2.0497185185185187e-05, + "loss": 1.5557, + "step": 49805 + }, + { + "epoch": 8.86, + "learning_rate": 2.0494222222222222e-05, + "loss": 1.4856, + "step": 49810 + }, + { + "epoch": 8.86, + "learning_rate": 2.049125925925926e-05, + "loss": 1.5725, + "step": 49815 + }, + { + "epoch": 8.86, + "learning_rate": 2.0488296296296296e-05, + "loss": 1.6263, + "step": 49820 + }, + { + "epoch": 8.86, + "learning_rate": 2.0485333333333335e-05, + "loss": 1.5305, + "step": 49825 + }, + { + "epoch": 8.86, + "learning_rate": 2.048237037037037e-05, + "loss": 1.5367, + "step": 49830 + }, + { + "epoch": 8.86, + "learning_rate": 2.047940740740741e-05, + "loss": 1.7601, + "step": 49835 + }, + { + "epoch": 8.86, + "learning_rate": 2.0476444444444445e-05, + "loss": 1.5781, + "step": 49840 + }, + { + "epoch": 8.86, + "learning_rate": 2.0473481481481484e-05, + "loss": 1.5829, + "step": 49845 + }, + { + "epoch": 8.86, + "learning_rate": 2.047051851851852e-05, + "loss": 1.5604, + "step": 49850 + }, + { + "epoch": 8.86, + "learning_rate": 2.0467555555555558e-05, + "loss": 1.5173, + "step": 49855 + }, + { + "epoch": 8.86, + "learning_rate": 2.0464592592592593e-05, + "loss": 1.5328, + "step": 49860 + }, + { + "epoch": 8.86, + "learning_rate": 2.046162962962963e-05, + "loss": 1.7212, + "step": 49865 + }, + { + "epoch": 8.87, + "learning_rate": 2.0458666666666668e-05, + "loss": 1.4366, + "step": 49870 + }, + { + "epoch": 8.87, + "learning_rate": 2.0455703703703703e-05, + "loss": 1.6472, + "step": 49875 + }, + { + "epoch": 8.87, + "learning_rate": 2.0452740740740742e-05, + "loss": 1.5149, + "step": 49880 + }, + { + "epoch": 8.87, + "learning_rate": 2.0449777777777777e-05, + "loss": 1.5404, + "step": 49885 + }, + { + "epoch": 8.87, + "learning_rate": 2.0446814814814816e-05, + "loss": 1.5332, + "step": 49890 + }, + { + "epoch": 8.87, + "learning_rate": 2.0443851851851852e-05, + "loss": 1.745, + "step": 49895 + }, + { + "epoch": 8.87, + "learning_rate": 2.044088888888889e-05, + "loss": 1.5552, + "step": 49900 + }, + { + "epoch": 8.87, + "learning_rate": 2.0437925925925926e-05, + "loss": 1.6491, + "step": 49905 + }, + { + "epoch": 8.87, + "learning_rate": 2.0434962962962965e-05, + "loss": 1.4444, + "step": 49910 + }, + { + "epoch": 8.87, + "learning_rate": 2.0432e-05, + "loss": 1.5545, + "step": 49915 + }, + { + "epoch": 8.87, + "learning_rate": 2.042903703703704e-05, + "loss": 1.4867, + "step": 49920 + }, + { + "epoch": 8.88, + "learning_rate": 2.0426074074074074e-05, + "loss": 1.5529, + "step": 49925 + }, + { + "epoch": 8.88, + "learning_rate": 2.0423111111111113e-05, + "loss": 1.5834, + "step": 49930 + }, + { + "epoch": 8.88, + "learning_rate": 2.042014814814815e-05, + "loss": 1.6302, + "step": 49935 + }, + { + "epoch": 8.88, + "learning_rate": 2.0417185185185188e-05, + "loss": 1.5661, + "step": 49940 + }, + { + "epoch": 8.88, + "learning_rate": 2.0414222222222223e-05, + "loss": 1.5621, + "step": 49945 + }, + { + "epoch": 8.88, + "learning_rate": 2.0411259259259262e-05, + "loss": 1.5706, + "step": 49950 + }, + { + "epoch": 8.88, + "learning_rate": 2.0408296296296297e-05, + "loss": 1.5657, + "step": 49955 + }, + { + "epoch": 8.88, + "learning_rate": 2.0405333333333336e-05, + "loss": 1.7071, + "step": 49960 + }, + { + "epoch": 8.88, + "learning_rate": 2.040237037037037e-05, + "loss": 1.612, + "step": 49965 + }, + { + "epoch": 8.88, + "learning_rate": 2.039940740740741e-05, + "loss": 1.5886, + "step": 49970 + }, + { + "epoch": 8.88, + "learning_rate": 2.0396444444444446e-05, + "loss": 1.5698, + "step": 49975 + }, + { + "epoch": 8.89, + "learning_rate": 2.039348148148148e-05, + "loss": 1.6793, + "step": 49980 + }, + { + "epoch": 8.89, + "learning_rate": 2.039051851851852e-05, + "loss": 1.6018, + "step": 49985 + }, + { + "epoch": 8.89, + "learning_rate": 2.0387555555555555e-05, + "loss": 1.4914, + "step": 49990 + }, + { + "epoch": 8.89, + "learning_rate": 2.0384592592592594e-05, + "loss": 1.5095, + "step": 49995 + }, + { + "epoch": 8.89, + "learning_rate": 2.038162962962963e-05, + "loss": 1.7056, + "step": 50000 + }, + { + "epoch": 8.89, + "eval_loss": 1.4441460371017456, + "eval_rouge2_fmeasure": 0.1994, + "eval_rouge2_precision": 0.2358, + "eval_rouge2_recall": 0.1819, + "eval_runtime": 36743.4165, + "eval_samples_per_second": 0.136, + "eval_steps_per_second": 0.068, + "step": 50000 + }, + { + "epoch": 8.89, + "learning_rate": 2.037866666666667e-05, + "loss": 1.5776, + "step": 50005 + }, + { + "epoch": 8.89, + "learning_rate": 2.0375703703703704e-05, + "loss": 1.5854, + "step": 50010 + }, + { + "epoch": 8.89, + "learning_rate": 2.0372740740740743e-05, + "loss": 1.583, + "step": 50015 + }, + { + "epoch": 8.89, + "learning_rate": 2.0369777777777778e-05, + "loss": 1.5779, + "step": 50020 + }, + { + "epoch": 8.89, + "learning_rate": 2.0366814814814817e-05, + "loss": 1.6658, + "step": 50025 + }, + { + "epoch": 8.89, + "learning_rate": 2.0363851851851852e-05, + "loss": 1.5747, + "step": 50030 + }, + { + "epoch": 8.9, + "learning_rate": 2.036088888888889e-05, + "loss": 1.6231, + "step": 50035 + }, + { + "epoch": 8.9, + "learning_rate": 2.0357925925925927e-05, + "loss": 1.6523, + "step": 50040 + }, + { + "epoch": 8.9, + "learning_rate": 2.0354962962962966e-05, + "loss": 1.6102, + "step": 50045 + }, + { + "epoch": 8.9, + "learning_rate": 2.0352e-05, + "loss": 1.5825, + "step": 50050 + }, + { + "epoch": 8.9, + "learning_rate": 2.034903703703704e-05, + "loss": 1.5891, + "step": 50055 + }, + { + "epoch": 8.9, + "learning_rate": 2.0346074074074075e-05, + "loss": 1.4946, + "step": 50060 + }, + { + "epoch": 8.9, + "learning_rate": 2.0343111111111114e-05, + "loss": 1.5902, + "step": 50065 + }, + { + "epoch": 8.9, + "learning_rate": 2.034014814814815e-05, + "loss": 1.5364, + "step": 50070 + }, + { + "epoch": 8.9, + "learning_rate": 2.033718518518519e-05, + "loss": 1.6224, + "step": 50075 + }, + { + "epoch": 8.9, + "learning_rate": 2.0334222222222224e-05, + "loss": 1.6571, + "step": 50080 + }, + { + "epoch": 8.9, + "learning_rate": 2.0331259259259263e-05, + "loss": 1.7396, + "step": 50085 + }, + { + "epoch": 8.9, + "learning_rate": 2.0328296296296298e-05, + "loss": 1.612, + "step": 50090 + }, + { + "epoch": 8.91, + "learning_rate": 2.0325333333333333e-05, + "loss": 1.4919, + "step": 50095 + }, + { + "epoch": 8.91, + "learning_rate": 2.0322370370370372e-05, + "loss": 1.5947, + "step": 50100 + }, + { + "epoch": 8.91, + "learning_rate": 2.0319407407407408e-05, + "loss": 1.6084, + "step": 50105 + }, + { + "epoch": 8.91, + "learning_rate": 2.0316444444444447e-05, + "loss": 1.6492, + "step": 50110 + }, + { + "epoch": 8.91, + "learning_rate": 2.0313481481481482e-05, + "loss": 1.5986, + "step": 50115 + }, + { + "epoch": 8.91, + "learning_rate": 2.031051851851852e-05, + "loss": 1.7051, + "step": 50120 + }, + { + "epoch": 8.91, + "learning_rate": 2.0307555555555556e-05, + "loss": 1.6672, + "step": 50125 + }, + { + "epoch": 8.91, + "learning_rate": 2.0304592592592595e-05, + "loss": 1.5468, + "step": 50130 + }, + { + "epoch": 8.91, + "learning_rate": 2.030162962962963e-05, + "loss": 1.5186, + "step": 50135 + }, + { + "epoch": 8.91, + "learning_rate": 2.029866666666667e-05, + "loss": 1.5598, + "step": 50140 + }, + { + "epoch": 8.91, + "learning_rate": 2.0295703703703705e-05, + "loss": 1.7102, + "step": 50145 + }, + { + "epoch": 8.92, + "learning_rate": 2.0292740740740744e-05, + "loss": 1.589, + "step": 50150 + }, + { + "epoch": 8.92, + "learning_rate": 2.028977777777778e-05, + "loss": 1.4843, + "step": 50155 + }, + { + "epoch": 8.92, + "learning_rate": 2.0286814814814818e-05, + "loss": 1.5548, + "step": 50160 + }, + { + "epoch": 8.92, + "learning_rate": 2.0283851851851853e-05, + "loss": 1.6082, + "step": 50165 + }, + { + "epoch": 8.92, + "learning_rate": 2.0280888888888892e-05, + "loss": 1.6247, + "step": 50170 + }, + { + "epoch": 8.92, + "learning_rate": 2.0277925925925927e-05, + "loss": 1.5119, + "step": 50175 + }, + { + "epoch": 8.92, + "learning_rate": 2.0274962962962966e-05, + "loss": 1.5193, + "step": 50180 + }, + { + "epoch": 8.92, + "learning_rate": 2.0272000000000002e-05, + "loss": 1.6745, + "step": 50185 + }, + { + "epoch": 8.92, + "learning_rate": 2.026903703703704e-05, + "loss": 1.5101, + "step": 50190 + }, + { + "epoch": 8.92, + "learning_rate": 2.0266074074074076e-05, + "loss": 1.6024, + "step": 50195 + }, + { + "epoch": 8.92, + "learning_rate": 2.0263111111111115e-05, + "loss": 1.6457, + "step": 50200 + }, + { + "epoch": 8.93, + "learning_rate": 2.026014814814815e-05, + "loss": 1.6042, + "step": 50205 + }, + { + "epoch": 8.93, + "learning_rate": 2.0257185185185186e-05, + "loss": 1.608, + "step": 50210 + }, + { + "epoch": 8.93, + "learning_rate": 2.0254222222222225e-05, + "loss": 1.5369, + "step": 50215 + }, + { + "epoch": 8.93, + "learning_rate": 2.025125925925926e-05, + "loss": 1.65, + "step": 50220 + }, + { + "epoch": 8.93, + "learning_rate": 2.02482962962963e-05, + "loss": 1.6281, + "step": 50225 + }, + { + "epoch": 8.93, + "learning_rate": 2.0245333333333334e-05, + "loss": 1.6117, + "step": 50230 + }, + { + "epoch": 8.93, + "learning_rate": 2.0242370370370373e-05, + "loss": 1.645, + "step": 50235 + }, + { + "epoch": 8.93, + "learning_rate": 2.023940740740741e-05, + "loss": 1.5832, + "step": 50240 + }, + { + "epoch": 8.93, + "learning_rate": 2.0236444444444447e-05, + "loss": 1.6381, + "step": 50245 + }, + { + "epoch": 8.93, + "learning_rate": 2.0233481481481483e-05, + "loss": 1.6442, + "step": 50250 + }, + { + "epoch": 8.93, + "learning_rate": 2.023051851851852e-05, + "loss": 1.5972, + "step": 50255 + }, + { + "epoch": 8.94, + "learning_rate": 2.0227555555555554e-05, + "loss": 1.5896, + "step": 50260 + }, + { + "epoch": 8.94, + "learning_rate": 2.0224592592592592e-05, + "loss": 1.635, + "step": 50265 + }, + { + "epoch": 8.94, + "learning_rate": 2.0221629629629628e-05, + "loss": 1.4677, + "step": 50270 + }, + { + "epoch": 8.94, + "learning_rate": 2.0218666666666667e-05, + "loss": 1.5737, + "step": 50275 + }, + { + "epoch": 8.94, + "learning_rate": 2.0215703703703702e-05, + "loss": 1.4144, + "step": 50280 + }, + { + "epoch": 8.94, + "learning_rate": 2.021274074074074e-05, + "loss": 1.5732, + "step": 50285 + }, + { + "epoch": 8.94, + "learning_rate": 2.0209777777777776e-05, + "loss": 1.6544, + "step": 50290 + }, + { + "epoch": 8.94, + "learning_rate": 2.0206814814814815e-05, + "loss": 1.5883, + "step": 50295 + }, + { + "epoch": 8.94, + "learning_rate": 2.020385185185185e-05, + "loss": 1.5981, + "step": 50300 + }, + { + "epoch": 8.94, + "learning_rate": 2.020088888888889e-05, + "loss": 1.5717, + "step": 50305 + }, + { + "epoch": 8.94, + "learning_rate": 2.0197925925925925e-05, + "loss": 1.6268, + "step": 50310 + }, + { + "epoch": 8.94, + "learning_rate": 2.0194962962962964e-05, + "loss": 1.6219, + "step": 50315 + }, + { + "epoch": 8.95, + "learning_rate": 2.0192e-05, + "loss": 1.6537, + "step": 50320 + }, + { + "epoch": 8.95, + "learning_rate": 2.0189037037037038e-05, + "loss": 1.4755, + "step": 50325 + }, + { + "epoch": 8.95, + "learning_rate": 2.0186074074074073e-05, + "loss": 1.6602, + "step": 50330 + }, + { + "epoch": 8.95, + "learning_rate": 2.0183111111111112e-05, + "loss": 1.6864, + "step": 50335 + }, + { + "epoch": 8.95, + "learning_rate": 2.0180148148148148e-05, + "loss": 1.5156, + "step": 50340 + }, + { + "epoch": 8.95, + "learning_rate": 2.0177185185185186e-05, + "loss": 1.5972, + "step": 50345 + }, + { + "epoch": 8.95, + "learning_rate": 2.0174222222222222e-05, + "loss": 1.6299, + "step": 50350 + }, + { + "epoch": 8.95, + "learning_rate": 2.017125925925926e-05, + "loss": 1.5206, + "step": 50355 + }, + { + "epoch": 8.95, + "learning_rate": 2.0168296296296296e-05, + "loss": 1.6595, + "step": 50360 + }, + { + "epoch": 8.95, + "learning_rate": 2.0165333333333335e-05, + "loss": 1.6798, + "step": 50365 + }, + { + "epoch": 8.95, + "learning_rate": 2.016237037037037e-05, + "loss": 1.6914, + "step": 50370 + }, + { + "epoch": 8.96, + "learning_rate": 2.0159407407407406e-05, + "loss": 1.7021, + "step": 50375 + }, + { + "epoch": 8.96, + "learning_rate": 2.0156444444444445e-05, + "loss": 1.5465, + "step": 50380 + }, + { + "epoch": 8.96, + "learning_rate": 2.015348148148148e-05, + "loss": 1.5977, + "step": 50385 + }, + { + "epoch": 8.96, + "learning_rate": 2.015051851851852e-05, + "loss": 1.625, + "step": 50390 + }, + { + "epoch": 8.96, + "learning_rate": 2.0147555555555554e-05, + "loss": 1.5877, + "step": 50395 + }, + { + "epoch": 8.96, + "learning_rate": 2.0144592592592593e-05, + "loss": 1.6119, + "step": 50400 + }, + { + "epoch": 8.96, + "learning_rate": 2.014162962962963e-05, + "loss": 1.5196, + "step": 50405 + }, + { + "epoch": 8.96, + "learning_rate": 2.0138666666666667e-05, + "loss": 1.6991, + "step": 50410 + }, + { + "epoch": 8.96, + "learning_rate": 2.0135703703703703e-05, + "loss": 1.544, + "step": 50415 + }, + { + "epoch": 8.96, + "learning_rate": 2.013274074074074e-05, + "loss": 1.6542, + "step": 50420 + }, + { + "epoch": 8.96, + "learning_rate": 2.0129777777777777e-05, + "loss": 1.5415, + "step": 50425 + }, + { + "epoch": 8.97, + "learning_rate": 2.0126814814814816e-05, + "loss": 1.6447, + "step": 50430 + }, + { + "epoch": 8.97, + "learning_rate": 2.012385185185185e-05, + "loss": 1.544, + "step": 50435 + }, + { + "epoch": 8.97, + "learning_rate": 2.012088888888889e-05, + "loss": 1.7818, + "step": 50440 + }, + { + "epoch": 8.97, + "learning_rate": 2.0117925925925926e-05, + "loss": 1.5589, + "step": 50445 + }, + { + "epoch": 8.97, + "learning_rate": 2.0114962962962964e-05, + "loss": 1.6556, + "step": 50450 + }, + { + "epoch": 8.97, + "learning_rate": 2.0112e-05, + "loss": 1.4949, + "step": 50455 + }, + { + "epoch": 8.97, + "learning_rate": 2.010903703703704e-05, + "loss": 1.5588, + "step": 50460 + }, + { + "epoch": 8.97, + "learning_rate": 2.0106074074074074e-05, + "loss": 1.5678, + "step": 50465 + }, + { + "epoch": 8.97, + "learning_rate": 2.0103111111111113e-05, + "loss": 1.5118, + "step": 50470 + }, + { + "epoch": 8.97, + "learning_rate": 2.010014814814815e-05, + "loss": 1.6167, + "step": 50475 + }, + { + "epoch": 8.97, + "learning_rate": 2.0097185185185187e-05, + "loss": 1.5925, + "step": 50480 + }, + { + "epoch": 8.98, + "learning_rate": 2.0094222222222223e-05, + "loss": 1.6209, + "step": 50485 + }, + { + "epoch": 8.98, + "learning_rate": 2.0091259259259258e-05, + "loss": 1.4695, + "step": 50490 + }, + { + "epoch": 8.98, + "learning_rate": 2.0088296296296297e-05, + "loss": 1.399, + "step": 50495 + }, + { + "epoch": 8.98, + "learning_rate": 2.0085333333333332e-05, + "loss": 1.6023, + "step": 50500 + }, + { + "epoch": 8.98, + "learning_rate": 2.008237037037037e-05, + "loss": 1.5268, + "step": 50505 + }, + { + "epoch": 8.98, + "learning_rate": 2.0079407407407407e-05, + "loss": 1.6967, + "step": 50510 + }, + { + "epoch": 8.98, + "learning_rate": 2.0076444444444445e-05, + "loss": 1.4888, + "step": 50515 + }, + { + "epoch": 8.98, + "learning_rate": 2.007348148148148e-05, + "loss": 1.65, + "step": 50520 + }, + { + "epoch": 8.98, + "learning_rate": 2.007051851851852e-05, + "loss": 1.606, + "step": 50525 + }, + { + "epoch": 8.98, + "learning_rate": 2.0067555555555555e-05, + "loss": 1.651, + "step": 50530 + }, + { + "epoch": 8.98, + "learning_rate": 2.0064592592592594e-05, + "loss": 1.6467, + "step": 50535 + }, + { + "epoch": 8.98, + "learning_rate": 2.006162962962963e-05, + "loss": 1.6445, + "step": 50540 + }, + { + "epoch": 8.99, + "learning_rate": 2.0058666666666668e-05, + "loss": 1.4955, + "step": 50545 + }, + { + "epoch": 8.99, + "learning_rate": 2.0055703703703704e-05, + "loss": 1.6046, + "step": 50550 + }, + { + "epoch": 8.99, + "learning_rate": 2.0052740740740742e-05, + "loss": 1.5837, + "step": 50555 + }, + { + "epoch": 8.99, + "learning_rate": 2.0049777777777778e-05, + "loss": 1.4848, + "step": 50560 + }, + { + "epoch": 8.99, + "learning_rate": 2.0046814814814817e-05, + "loss": 1.6505, + "step": 50565 + }, + { + "epoch": 8.99, + "learning_rate": 2.0043851851851852e-05, + "loss": 1.4656, + "step": 50570 + }, + { + "epoch": 8.99, + "learning_rate": 2.004088888888889e-05, + "loss": 1.5581, + "step": 50575 + }, + { + "epoch": 8.99, + "learning_rate": 2.0037925925925926e-05, + "loss": 1.6902, + "step": 50580 + }, + { + "epoch": 8.99, + "learning_rate": 2.0034962962962965e-05, + "loss": 1.5954, + "step": 50585 + }, + { + "epoch": 8.99, + "learning_rate": 2.0032e-05, + "loss": 1.5099, + "step": 50590 + }, + { + "epoch": 8.99, + "learning_rate": 2.002903703703704e-05, + "loss": 1.6501, + "step": 50595 + }, + { + "epoch": 9.0, + "learning_rate": 2.0026074074074075e-05, + "loss": 1.662, + "step": 50600 + }, + { + "epoch": 9.0, + "learning_rate": 2.002311111111111e-05, + "loss": 1.5935, + "step": 50605 + }, + { + "epoch": 9.0, + "learning_rate": 2.002014814814815e-05, + "loss": 1.6109, + "step": 50610 + }, + { + "epoch": 9.0, + "learning_rate": 2.0017185185185185e-05, + "loss": 1.5868, + "step": 50615 + }, + { + "epoch": 9.0, + "learning_rate": 2.0014222222222223e-05, + "loss": 1.6229, + "step": 50620 + }, + { + "epoch": 9.0, + "learning_rate": 2.001125925925926e-05, + "loss": 1.5075, + "step": 50625 + }, + { + "epoch": 9.0, + "learning_rate": 2.0008296296296298e-05, + "loss": 1.5395, + "step": 50630 + }, + { + "epoch": 9.0, + "learning_rate": 2.0005333333333333e-05, + "loss": 1.5758, + "step": 50635 + }, + { + "epoch": 9.0, + "learning_rate": 2.0002370370370372e-05, + "loss": 1.4438, + "step": 50640 + }, + { + "epoch": 9.0, + "learning_rate": 1.9999407407407407e-05, + "loss": 1.4391, + "step": 50645 + }, + { + "epoch": 9.0, + "learning_rate": 1.9996444444444446e-05, + "loss": 1.542, + "step": 50650 + }, + { + "epoch": 9.01, + "learning_rate": 1.999348148148148e-05, + "loss": 1.5029, + "step": 50655 + }, + { + "epoch": 9.01, + "learning_rate": 1.999051851851852e-05, + "loss": 1.4636, + "step": 50660 + }, + { + "epoch": 9.01, + "learning_rate": 1.9987555555555556e-05, + "loss": 1.5685, + "step": 50665 + }, + { + "epoch": 9.01, + "learning_rate": 1.9984592592592595e-05, + "loss": 1.4629, + "step": 50670 + }, + { + "epoch": 9.01, + "learning_rate": 1.998162962962963e-05, + "loss": 1.423, + "step": 50675 + }, + { + "epoch": 9.01, + "learning_rate": 1.997866666666667e-05, + "loss": 1.499, + "step": 50680 + }, + { + "epoch": 9.01, + "learning_rate": 1.9975703703703704e-05, + "loss": 1.4359, + "step": 50685 + }, + { + "epoch": 9.01, + "learning_rate": 1.9972740740740743e-05, + "loss": 1.5721, + "step": 50690 + }, + { + "epoch": 9.01, + "learning_rate": 1.996977777777778e-05, + "loss": 1.568, + "step": 50695 + }, + { + "epoch": 9.01, + "learning_rate": 1.9966814814814817e-05, + "loss": 1.4496, + "step": 50700 + }, + { + "epoch": 9.01, + "learning_rate": 1.9963851851851853e-05, + "loss": 1.5478, + "step": 50705 + }, + { + "epoch": 9.02, + "learning_rate": 1.9960888888888892e-05, + "loss": 1.4705, + "step": 50710 + }, + { + "epoch": 9.02, + "learning_rate": 1.9957925925925927e-05, + "loss": 1.4499, + "step": 50715 + }, + { + "epoch": 9.02, + "learning_rate": 1.9954962962962963e-05, + "loss": 1.5533, + "step": 50720 + }, + { + "epoch": 9.02, + "learning_rate": 1.9952e-05, + "loss": 1.3513, + "step": 50725 + }, + { + "epoch": 9.02, + "learning_rate": 1.9949037037037037e-05, + "loss": 1.4903, + "step": 50730 + }, + { + "epoch": 9.02, + "learning_rate": 1.9946074074074076e-05, + "loss": 1.4191, + "step": 50735 + }, + { + "epoch": 9.02, + "learning_rate": 1.994311111111111e-05, + "loss": 1.4737, + "step": 50740 + }, + { + "epoch": 9.02, + "learning_rate": 1.994014814814815e-05, + "loss": 1.4625, + "step": 50745 + }, + { + "epoch": 9.02, + "learning_rate": 1.9937185185185185e-05, + "loss": 1.5971, + "step": 50750 + }, + { + "epoch": 9.02, + "learning_rate": 1.9934222222222224e-05, + "loss": 1.352, + "step": 50755 + }, + { + "epoch": 9.02, + "learning_rate": 1.993125925925926e-05, + "loss": 1.5194, + "step": 50760 + }, + { + "epoch": 9.02, + "learning_rate": 1.99282962962963e-05, + "loss": 1.5412, + "step": 50765 + }, + { + "epoch": 9.03, + "learning_rate": 1.9925333333333334e-05, + "loss": 1.5252, + "step": 50770 + }, + { + "epoch": 9.03, + "learning_rate": 1.9922370370370373e-05, + "loss": 1.6271, + "step": 50775 + }, + { + "epoch": 9.03, + "learning_rate": 1.9919407407407408e-05, + "loss": 1.41, + "step": 50780 + }, + { + "epoch": 9.03, + "learning_rate": 1.9916444444444447e-05, + "loss": 1.4527, + "step": 50785 + }, + { + "epoch": 9.03, + "learning_rate": 1.9913481481481482e-05, + "loss": 1.5011, + "step": 50790 + }, + { + "epoch": 9.03, + "learning_rate": 1.991051851851852e-05, + "loss": 1.5712, + "step": 50795 + }, + { + "epoch": 9.03, + "learning_rate": 1.9907555555555557e-05, + "loss": 1.5773, + "step": 50800 + }, + { + "epoch": 9.03, + "learning_rate": 1.9904592592592595e-05, + "loss": 1.4935, + "step": 50805 + }, + { + "epoch": 9.03, + "learning_rate": 1.990162962962963e-05, + "loss": 1.4199, + "step": 50810 + }, + { + "epoch": 9.03, + "learning_rate": 1.989866666666667e-05, + "loss": 1.4975, + "step": 50815 + }, + { + "epoch": 9.03, + "learning_rate": 1.9895703703703705e-05, + "loss": 1.4514, + "step": 50820 + }, + { + "epoch": 9.04, + "learning_rate": 1.9892740740740744e-05, + "loss": 1.5839, + "step": 50825 + }, + { + "epoch": 9.04, + "learning_rate": 1.988977777777778e-05, + "loss": 1.4883, + "step": 50830 + }, + { + "epoch": 9.04, + "learning_rate": 1.9886814814814815e-05, + "loss": 1.5182, + "step": 50835 + }, + { + "epoch": 9.04, + "learning_rate": 1.9883851851851854e-05, + "loss": 1.6113, + "step": 50840 + }, + { + "epoch": 9.04, + "learning_rate": 1.988088888888889e-05, + "loss": 1.4834, + "step": 50845 + }, + { + "epoch": 9.04, + "learning_rate": 1.9877925925925928e-05, + "loss": 1.5612, + "step": 50850 + }, + { + "epoch": 9.04, + "learning_rate": 1.9874962962962963e-05, + "loss": 1.5499, + "step": 50855 + }, + { + "epoch": 9.04, + "learning_rate": 1.9872000000000002e-05, + "loss": 1.4613, + "step": 50860 + }, + { + "epoch": 9.04, + "learning_rate": 1.9869037037037038e-05, + "loss": 1.5226, + "step": 50865 + }, + { + "epoch": 9.04, + "learning_rate": 1.9866074074074076e-05, + "loss": 1.5097, + "step": 50870 + }, + { + "epoch": 9.04, + "learning_rate": 1.9863111111111112e-05, + "loss": 1.4149, + "step": 50875 + }, + { + "epoch": 9.05, + "learning_rate": 1.986014814814815e-05, + "loss": 1.5027, + "step": 50880 + }, + { + "epoch": 9.05, + "learning_rate": 1.9857185185185186e-05, + "loss": 1.4902, + "step": 50885 + }, + { + "epoch": 9.05, + "learning_rate": 1.9854222222222225e-05, + "loss": 1.4416, + "step": 50890 + }, + { + "epoch": 9.05, + "learning_rate": 1.985125925925926e-05, + "loss": 1.6142, + "step": 50895 + }, + { + "epoch": 9.05, + "learning_rate": 1.98482962962963e-05, + "loss": 1.5455, + "step": 50900 + }, + { + "epoch": 9.05, + "learning_rate": 1.9845333333333335e-05, + "loss": 1.4468, + "step": 50905 + }, + { + "epoch": 9.05, + "learning_rate": 1.9842370370370373e-05, + "loss": 1.4418, + "step": 50910 + }, + { + "epoch": 9.05, + "learning_rate": 1.983940740740741e-05, + "loss": 1.4279, + "step": 50915 + }, + { + "epoch": 9.05, + "learning_rate": 1.9836444444444448e-05, + "loss": 1.4335, + "step": 50920 + }, + { + "epoch": 9.05, + "learning_rate": 1.9833481481481483e-05, + "loss": 1.4673, + "step": 50925 + }, + { + "epoch": 9.05, + "learning_rate": 1.9830518518518522e-05, + "loss": 1.5131, + "step": 50930 + }, + { + "epoch": 9.06, + "learning_rate": 1.9827555555555557e-05, + "loss": 1.5448, + "step": 50935 + }, + { + "epoch": 9.06, + "learning_rate": 1.9824592592592596e-05, + "loss": 1.4507, + "step": 50940 + }, + { + "epoch": 9.06, + "learning_rate": 1.982162962962963e-05, + "loss": 1.5836, + "step": 50945 + }, + { + "epoch": 9.06, + "learning_rate": 1.9818666666666667e-05, + "loss": 1.4678, + "step": 50950 + }, + { + "epoch": 9.06, + "learning_rate": 1.9815703703703706e-05, + "loss": 1.4997, + "step": 50955 + }, + { + "epoch": 9.06, + "learning_rate": 1.981274074074074e-05, + "loss": 1.4031, + "step": 50960 + }, + { + "epoch": 9.06, + "learning_rate": 1.980977777777778e-05, + "loss": 1.5196, + "step": 50965 + }, + { + "epoch": 9.06, + "learning_rate": 1.9806814814814816e-05, + "loss": 1.5607, + "step": 50970 + }, + { + "epoch": 9.06, + "learning_rate": 1.9803851851851854e-05, + "loss": 1.5165, + "step": 50975 + }, + { + "epoch": 9.06, + "learning_rate": 1.980088888888889e-05, + "loss": 1.6401, + "step": 50980 + }, + { + "epoch": 9.06, + "learning_rate": 1.979792592592593e-05, + "loss": 1.5563, + "step": 50985 + }, + { + "epoch": 9.06, + "learning_rate": 1.9794962962962964e-05, + "loss": 1.5452, + "step": 50990 + }, + { + "epoch": 9.07, + "learning_rate": 1.9792000000000003e-05, + "loss": 1.4259, + "step": 50995 + }, + { + "epoch": 9.07, + "learning_rate": 1.978903703703704e-05, + "loss": 1.5068, + "step": 51000 + }, + { + "epoch": 9.07, + "learning_rate": 1.9786074074074077e-05, + "loss": 1.474, + "step": 51005 + }, + { + "epoch": 9.07, + "learning_rate": 1.9783111111111113e-05, + "loss": 1.4974, + "step": 51010 + }, + { + "epoch": 9.07, + "learning_rate": 1.978014814814815e-05, + "loss": 1.591, + "step": 51015 + }, + { + "epoch": 9.07, + "learning_rate": 1.9777185185185187e-05, + "loss": 1.5843, + "step": 51020 + }, + { + "epoch": 9.07, + "learning_rate": 1.9774222222222226e-05, + "loss": 1.4632, + "step": 51025 + }, + { + "epoch": 9.07, + "learning_rate": 1.977125925925926e-05, + "loss": 1.4455, + "step": 51030 + }, + { + "epoch": 9.07, + "learning_rate": 1.9768296296296297e-05, + "loss": 1.5999, + "step": 51035 + }, + { + "epoch": 9.07, + "learning_rate": 1.9765333333333332e-05, + "loss": 1.4252, + "step": 51040 + }, + { + "epoch": 9.07, + "learning_rate": 1.976237037037037e-05, + "loss": 1.5969, + "step": 51045 + }, + { + "epoch": 9.08, + "learning_rate": 1.9759407407407406e-05, + "loss": 1.4597, + "step": 51050 + }, + { + "epoch": 9.08, + "learning_rate": 1.9756444444444445e-05, + "loss": 1.5726, + "step": 51055 + }, + { + "epoch": 9.08, + "learning_rate": 1.975348148148148e-05, + "loss": 1.5343, + "step": 51060 + }, + { + "epoch": 9.08, + "learning_rate": 1.975051851851852e-05, + "loss": 1.4491, + "step": 51065 + }, + { + "epoch": 9.08, + "learning_rate": 1.9747555555555555e-05, + "loss": 1.3939, + "step": 51070 + }, + { + "epoch": 9.08, + "learning_rate": 1.9744592592592594e-05, + "loss": 1.5018, + "step": 51075 + }, + { + "epoch": 9.08, + "learning_rate": 1.974162962962963e-05, + "loss": 1.5638, + "step": 51080 + }, + { + "epoch": 9.08, + "learning_rate": 1.9738666666666668e-05, + "loss": 1.3192, + "step": 51085 + }, + { + "epoch": 9.08, + "learning_rate": 1.9735703703703703e-05, + "loss": 1.3618, + "step": 51090 + }, + { + "epoch": 9.08, + "learning_rate": 1.9732740740740742e-05, + "loss": 1.4601, + "step": 51095 + }, + { + "epoch": 9.08, + "learning_rate": 1.9729777777777778e-05, + "loss": 1.4781, + "step": 51100 + }, + { + "epoch": 9.09, + "learning_rate": 1.9726814814814813e-05, + "loss": 1.5504, + "step": 51105 + }, + { + "epoch": 9.09, + "learning_rate": 1.9723851851851852e-05, + "loss": 1.3761, + "step": 51110 + }, + { + "epoch": 9.09, + "learning_rate": 1.9720888888888887e-05, + "loss": 1.5334, + "step": 51115 + }, + { + "epoch": 9.09, + "learning_rate": 1.9717925925925926e-05, + "loss": 1.4944, + "step": 51120 + }, + { + "epoch": 9.09, + "learning_rate": 1.971496296296296e-05, + "loss": 1.4185, + "step": 51125 + }, + { + "epoch": 9.09, + "learning_rate": 1.9712e-05, + "loss": 1.4828, + "step": 51130 + }, + { + "epoch": 9.09, + "learning_rate": 1.9709037037037036e-05, + "loss": 1.4456, + "step": 51135 + }, + { + "epoch": 9.09, + "learning_rate": 1.9706074074074075e-05, + "loss": 1.4885, + "step": 51140 + }, + { + "epoch": 9.09, + "learning_rate": 1.970311111111111e-05, + "loss": 1.5398, + "step": 51145 + }, + { + "epoch": 9.09, + "learning_rate": 1.970014814814815e-05, + "loss": 1.4476, + "step": 51150 + }, + { + "epoch": 9.09, + "learning_rate": 1.9697185185185184e-05, + "loss": 1.4976, + "step": 51155 + }, + { + "epoch": 9.1, + "learning_rate": 1.9694222222222223e-05, + "loss": 1.4727, + "step": 51160 + }, + { + "epoch": 9.1, + "learning_rate": 1.969125925925926e-05, + "loss": 1.5212, + "step": 51165 + }, + { + "epoch": 9.1, + "learning_rate": 1.9688296296296297e-05, + "loss": 1.5441, + "step": 51170 + }, + { + "epoch": 9.1, + "learning_rate": 1.9685333333333333e-05, + "loss": 1.4892, + "step": 51175 + }, + { + "epoch": 9.1, + "learning_rate": 1.968237037037037e-05, + "loss": 1.4377, + "step": 51180 + }, + { + "epoch": 9.1, + "learning_rate": 1.9679407407407407e-05, + "loss": 1.4782, + "step": 51185 + }, + { + "epoch": 9.1, + "learning_rate": 1.9676444444444446e-05, + "loss": 1.6596, + "step": 51190 + }, + { + "epoch": 9.1, + "learning_rate": 1.967348148148148e-05, + "loss": 1.5024, + "step": 51195 + }, + { + "epoch": 9.1, + "learning_rate": 1.967051851851852e-05, + "loss": 1.3915, + "step": 51200 + }, + { + "epoch": 9.1, + "learning_rate": 1.9667555555555556e-05, + "loss": 1.5399, + "step": 51205 + }, + { + "epoch": 9.1, + "learning_rate": 1.9664592592592594e-05, + "loss": 1.3916, + "step": 51210 + }, + { + "epoch": 9.1, + "learning_rate": 1.966162962962963e-05, + "loss": 1.6543, + "step": 51215 + }, + { + "epoch": 9.11, + "learning_rate": 1.9658666666666665e-05, + "loss": 1.4786, + "step": 51220 + }, + { + "epoch": 9.11, + "learning_rate": 1.9655703703703704e-05, + "loss": 1.5224, + "step": 51225 + }, + { + "epoch": 9.11, + "learning_rate": 1.965274074074074e-05, + "loss": 1.5361, + "step": 51230 + }, + { + "epoch": 9.11, + "learning_rate": 1.964977777777778e-05, + "loss": 1.4913, + "step": 51235 + }, + { + "epoch": 9.11, + "learning_rate": 1.9646814814814814e-05, + "loss": 1.4675, + "step": 51240 + }, + { + "epoch": 9.11, + "learning_rate": 1.9643851851851853e-05, + "loss": 1.5758, + "step": 51245 + }, + { + "epoch": 9.11, + "learning_rate": 1.9640888888888888e-05, + "loss": 1.5879, + "step": 51250 + }, + { + "epoch": 9.11, + "learning_rate": 1.9637925925925927e-05, + "loss": 1.5508, + "step": 51255 + }, + { + "epoch": 9.11, + "learning_rate": 1.9634962962962962e-05, + "loss": 1.4384, + "step": 51260 + }, + { + "epoch": 9.11, + "learning_rate": 1.9632e-05, + "loss": 1.6381, + "step": 51265 + }, + { + "epoch": 9.11, + "learning_rate": 1.9629037037037037e-05, + "loss": 1.5827, + "step": 51270 + }, + { + "epoch": 9.12, + "learning_rate": 1.9626074074074075e-05, + "loss": 1.5454, + "step": 51275 + }, + { + "epoch": 9.12, + "learning_rate": 1.962311111111111e-05, + "loss": 1.5285, + "step": 51280 + }, + { + "epoch": 9.12, + "learning_rate": 1.962014814814815e-05, + "loss": 1.5665, + "step": 51285 + }, + { + "epoch": 9.12, + "learning_rate": 1.9617185185185185e-05, + "loss": 1.6334, + "step": 51290 + }, + { + "epoch": 9.12, + "learning_rate": 1.9614222222222224e-05, + "loss": 1.4272, + "step": 51295 + }, + { + "epoch": 9.12, + "learning_rate": 1.961125925925926e-05, + "loss": 1.5913, + "step": 51300 + }, + { + "epoch": 9.12, + "learning_rate": 1.9608296296296298e-05, + "loss": 1.5112, + "step": 51305 + }, + { + "epoch": 9.12, + "learning_rate": 1.9605333333333334e-05, + "loss": 1.4201, + "step": 51310 + }, + { + "epoch": 9.12, + "learning_rate": 1.9602370370370372e-05, + "loss": 1.5154, + "step": 51315 + }, + { + "epoch": 9.12, + "learning_rate": 1.9599407407407408e-05, + "loss": 1.558, + "step": 51320 + }, + { + "epoch": 9.12, + "learning_rate": 1.9596444444444447e-05, + "loss": 1.44, + "step": 51325 + }, + { + "epoch": 9.13, + "learning_rate": 1.9593481481481482e-05, + "loss": 1.5309, + "step": 51330 + }, + { + "epoch": 9.13, + "learning_rate": 1.9590518518518517e-05, + "loss": 1.5724, + "step": 51335 + }, + { + "epoch": 9.13, + "learning_rate": 1.9587555555555556e-05, + "loss": 1.4948, + "step": 51340 + }, + { + "epoch": 9.13, + "learning_rate": 1.9584592592592592e-05, + "loss": 1.4579, + "step": 51345 + }, + { + "epoch": 9.13, + "learning_rate": 1.958162962962963e-05, + "loss": 1.5031, + "step": 51350 + }, + { + "epoch": 9.13, + "learning_rate": 1.9578666666666666e-05, + "loss": 1.5334, + "step": 51355 + }, + { + "epoch": 9.13, + "learning_rate": 1.9575703703703705e-05, + "loss": 1.6087, + "step": 51360 + }, + { + "epoch": 9.13, + "learning_rate": 1.957274074074074e-05, + "loss": 1.5051, + "step": 51365 + }, + { + "epoch": 9.13, + "learning_rate": 1.956977777777778e-05, + "loss": 1.5643, + "step": 51370 + }, + { + "epoch": 9.13, + "learning_rate": 1.9566814814814815e-05, + "loss": 1.5748, + "step": 51375 + }, + { + "epoch": 9.13, + "learning_rate": 1.9563851851851853e-05, + "loss": 1.5208, + "step": 51380 + }, + { + "epoch": 9.14, + "learning_rate": 1.956088888888889e-05, + "loss": 1.3993, + "step": 51385 + }, + { + "epoch": 9.14, + "learning_rate": 1.9557925925925928e-05, + "loss": 1.4679, + "step": 51390 + }, + { + "epoch": 9.14, + "learning_rate": 1.9554962962962963e-05, + "loss": 1.4178, + "step": 51395 + }, + { + "epoch": 9.14, + "learning_rate": 1.9552000000000002e-05, + "loss": 1.5598, + "step": 51400 + }, + { + "epoch": 9.14, + "learning_rate": 1.9549037037037037e-05, + "loss": 1.6522, + "step": 51405 + }, + { + "epoch": 9.14, + "learning_rate": 1.9546074074074076e-05, + "loss": 1.4572, + "step": 51410 + }, + { + "epoch": 9.14, + "learning_rate": 1.954311111111111e-05, + "loss": 1.4571, + "step": 51415 + }, + { + "epoch": 9.14, + "learning_rate": 1.954014814814815e-05, + "loss": 1.5616, + "step": 51420 + }, + { + "epoch": 9.14, + "learning_rate": 1.9537185185185186e-05, + "loss": 1.4767, + "step": 51425 + }, + { + "epoch": 9.14, + "learning_rate": 1.9534222222222225e-05, + "loss": 1.6006, + "step": 51430 + }, + { + "epoch": 9.14, + "learning_rate": 1.953125925925926e-05, + "loss": 1.4874, + "step": 51435 + }, + { + "epoch": 9.14, + "learning_rate": 1.95282962962963e-05, + "loss": 1.4748, + "step": 51440 + }, + { + "epoch": 9.15, + "learning_rate": 1.9525333333333334e-05, + "loss": 1.5569, + "step": 51445 + }, + { + "epoch": 9.15, + "learning_rate": 1.952237037037037e-05, + "loss": 1.6453, + "step": 51450 + }, + { + "epoch": 9.15, + "learning_rate": 1.951940740740741e-05, + "loss": 1.4825, + "step": 51455 + }, + { + "epoch": 9.15, + "learning_rate": 1.9516444444444444e-05, + "loss": 1.5934, + "step": 51460 + }, + { + "epoch": 9.15, + "learning_rate": 1.9513481481481483e-05, + "loss": 1.5303, + "step": 51465 + }, + { + "epoch": 9.15, + "learning_rate": 1.9510518518518518e-05, + "loss": 1.4689, + "step": 51470 + }, + { + "epoch": 9.15, + "learning_rate": 1.9507555555555557e-05, + "loss": 1.491, + "step": 51475 + }, + { + "epoch": 9.15, + "learning_rate": 1.9504592592592593e-05, + "loss": 1.4769, + "step": 51480 + }, + { + "epoch": 9.15, + "learning_rate": 1.950162962962963e-05, + "loss": 1.5491, + "step": 51485 + }, + { + "epoch": 9.15, + "learning_rate": 1.9498666666666667e-05, + "loss": 1.5009, + "step": 51490 + }, + { + "epoch": 9.15, + "learning_rate": 1.9495703703703706e-05, + "loss": 1.4224, + "step": 51495 + }, + { + "epoch": 9.16, + "learning_rate": 1.949274074074074e-05, + "loss": 1.6225, + "step": 51500 + }, + { + "epoch": 9.16, + "learning_rate": 1.948977777777778e-05, + "loss": 1.5768, + "step": 51505 + }, + { + "epoch": 9.16, + "learning_rate": 1.9486814814814815e-05, + "loss": 1.6288, + "step": 51510 + }, + { + "epoch": 9.16, + "learning_rate": 1.9483851851851854e-05, + "loss": 1.6132, + "step": 51515 + }, + { + "epoch": 9.16, + "learning_rate": 1.948088888888889e-05, + "loss": 1.5342, + "step": 51520 + }, + { + "epoch": 9.16, + "learning_rate": 1.947792592592593e-05, + "loss": 1.4958, + "step": 51525 + }, + { + "epoch": 9.16, + "learning_rate": 1.9474962962962964e-05, + "loss": 1.4537, + "step": 51530 + }, + { + "epoch": 9.16, + "learning_rate": 1.9472000000000003e-05, + "loss": 1.4955, + "step": 51535 + }, + { + "epoch": 9.16, + "learning_rate": 1.9469037037037038e-05, + "loss": 1.5148, + "step": 51540 + }, + { + "epoch": 9.16, + "learning_rate": 1.9466074074074077e-05, + "loss": 1.5618, + "step": 51545 + }, + { + "epoch": 9.16, + "learning_rate": 1.9463111111111112e-05, + "loss": 1.4807, + "step": 51550 + }, + { + "epoch": 9.17, + "learning_rate": 1.946014814814815e-05, + "loss": 1.5138, + "step": 51555 + }, + { + "epoch": 9.17, + "learning_rate": 1.9457185185185187e-05, + "loss": 1.5513, + "step": 51560 + }, + { + "epoch": 9.17, + "learning_rate": 1.9454222222222222e-05, + "loss": 1.4952, + "step": 51565 + }, + { + "epoch": 9.17, + "learning_rate": 1.945125925925926e-05, + "loss": 1.4795, + "step": 51570 + }, + { + "epoch": 9.17, + "learning_rate": 1.9448296296296296e-05, + "loss": 1.5254, + "step": 51575 + }, + { + "epoch": 9.17, + "learning_rate": 1.9445333333333335e-05, + "loss": 1.5526, + "step": 51580 + }, + { + "epoch": 9.17, + "learning_rate": 1.944237037037037e-05, + "loss": 1.5332, + "step": 51585 + }, + { + "epoch": 9.17, + "learning_rate": 1.943940740740741e-05, + "loss": 1.5666, + "step": 51590 + }, + { + "epoch": 9.17, + "learning_rate": 1.9436444444444445e-05, + "loss": 1.6266, + "step": 51595 + }, + { + "epoch": 9.17, + "learning_rate": 1.9433481481481484e-05, + "loss": 1.4862, + "step": 51600 + }, + { + "epoch": 9.17, + "learning_rate": 1.943051851851852e-05, + "loss": 1.4467, + "step": 51605 + }, + { + "epoch": 9.18, + "learning_rate": 1.9427555555555558e-05, + "loss": 1.4954, + "step": 51610 + }, + { + "epoch": 9.18, + "learning_rate": 1.9424592592592593e-05, + "loss": 1.401, + "step": 51615 + }, + { + "epoch": 9.18, + "learning_rate": 1.9421629629629632e-05, + "loss": 1.5222, + "step": 51620 + }, + { + "epoch": 9.18, + "learning_rate": 1.9418666666666668e-05, + "loss": 1.5357, + "step": 51625 + }, + { + "epoch": 9.18, + "learning_rate": 1.9415703703703706e-05, + "loss": 1.5662, + "step": 51630 + }, + { + "epoch": 9.18, + "learning_rate": 1.9412740740740742e-05, + "loss": 1.4832, + "step": 51635 + }, + { + "epoch": 9.18, + "learning_rate": 1.940977777777778e-05, + "loss": 1.5853, + "step": 51640 + }, + { + "epoch": 9.18, + "learning_rate": 1.9406814814814816e-05, + "loss": 1.6265, + "step": 51645 + }, + { + "epoch": 9.18, + "learning_rate": 1.9403851851851855e-05, + "loss": 1.4231, + "step": 51650 + }, + { + "epoch": 9.18, + "learning_rate": 1.940088888888889e-05, + "loss": 1.5203, + "step": 51655 + }, + { + "epoch": 9.18, + "learning_rate": 1.939792592592593e-05, + "loss": 1.5043, + "step": 51660 + }, + { + "epoch": 9.18, + "learning_rate": 1.9394962962962965e-05, + "loss": 1.4131, + "step": 51665 + }, + { + "epoch": 9.19, + "learning_rate": 1.9392000000000003e-05, + "loss": 1.476, + "step": 51670 + }, + { + "epoch": 9.19, + "learning_rate": 1.938903703703704e-05, + "loss": 1.5579, + "step": 51675 + }, + { + "epoch": 9.19, + "learning_rate": 1.9386074074074074e-05, + "loss": 1.5691, + "step": 51680 + }, + { + "epoch": 9.19, + "learning_rate": 1.9383111111111113e-05, + "loss": 1.4477, + "step": 51685 + }, + { + "epoch": 9.19, + "learning_rate": 1.938014814814815e-05, + "loss": 1.5441, + "step": 51690 + }, + { + "epoch": 9.19, + "learning_rate": 1.9377185185185187e-05, + "loss": 1.4531, + "step": 51695 + }, + { + "epoch": 9.19, + "learning_rate": 1.9374222222222223e-05, + "loss": 1.532, + "step": 51700 + }, + { + "epoch": 9.19, + "learning_rate": 1.937125925925926e-05, + "loss": 1.4588, + "step": 51705 + }, + { + "epoch": 9.19, + "learning_rate": 1.9368296296296297e-05, + "loss": 1.4216, + "step": 51710 + }, + { + "epoch": 9.19, + "learning_rate": 1.9365333333333336e-05, + "loss": 1.5469, + "step": 51715 + }, + { + "epoch": 9.19, + "learning_rate": 1.936237037037037e-05, + "loss": 1.6113, + "step": 51720 + }, + { + "epoch": 9.2, + "learning_rate": 1.935940740740741e-05, + "loss": 1.5982, + "step": 51725 + }, + { + "epoch": 9.2, + "learning_rate": 1.9356444444444446e-05, + "loss": 1.5446, + "step": 51730 + }, + { + "epoch": 9.2, + "learning_rate": 1.9353481481481484e-05, + "loss": 1.5316, + "step": 51735 + }, + { + "epoch": 9.2, + "learning_rate": 1.935051851851852e-05, + "loss": 1.5195, + "step": 51740 + }, + { + "epoch": 9.2, + "learning_rate": 1.934755555555556e-05, + "loss": 1.5165, + "step": 51745 + }, + { + "epoch": 9.2, + "learning_rate": 1.9344592592592594e-05, + "loss": 1.4771, + "step": 51750 + }, + { + "epoch": 9.2, + "learning_rate": 1.9341629629629633e-05, + "loss": 1.5637, + "step": 51755 + }, + { + "epoch": 9.2, + "learning_rate": 1.933866666666667e-05, + "loss": 1.6402, + "step": 51760 + }, + { + "epoch": 9.2, + "learning_rate": 1.9335703703703707e-05, + "loss": 1.5338, + "step": 51765 + }, + { + "epoch": 9.2, + "learning_rate": 1.9332740740740743e-05, + "loss": 1.4786, + "step": 51770 + }, + { + "epoch": 9.2, + "learning_rate": 1.932977777777778e-05, + "loss": 1.4935, + "step": 51775 + }, + { + "epoch": 9.21, + "learning_rate": 1.9326814814814817e-05, + "loss": 1.5779, + "step": 51780 + }, + { + "epoch": 9.21, + "learning_rate": 1.9323851851851856e-05, + "loss": 1.4192, + "step": 51785 + }, + { + "epoch": 9.21, + "learning_rate": 1.932088888888889e-05, + "loss": 1.4549, + "step": 51790 + }, + { + "epoch": 9.21, + "learning_rate": 1.9317925925925927e-05, + "loss": 1.4662, + "step": 51795 + }, + { + "epoch": 9.21, + "learning_rate": 1.9314962962962965e-05, + "loss": 1.441, + "step": 51800 + }, + { + "epoch": 9.21, + "learning_rate": 1.9312e-05, + "loss": 1.5527, + "step": 51805 + }, + { + "epoch": 9.21, + "learning_rate": 1.9309037037037036e-05, + "loss": 1.5808, + "step": 51810 + }, + { + "epoch": 9.21, + "learning_rate": 1.9306074074074075e-05, + "loss": 1.4767, + "step": 51815 + }, + { + "epoch": 9.21, + "learning_rate": 1.930311111111111e-05, + "loss": 1.4949, + "step": 51820 + }, + { + "epoch": 9.21, + "learning_rate": 1.930014814814815e-05, + "loss": 1.5042, + "step": 51825 + }, + { + "epoch": 9.21, + "learning_rate": 1.9297185185185185e-05, + "loss": 1.4731, + "step": 51830 + }, + { + "epoch": 9.22, + "learning_rate": 1.9294222222222224e-05, + "loss": 1.5389, + "step": 51835 + }, + { + "epoch": 9.22, + "learning_rate": 1.929125925925926e-05, + "loss": 1.6636, + "step": 51840 + }, + { + "epoch": 9.22, + "learning_rate": 1.9288296296296294e-05, + "loss": 1.614, + "step": 51845 + }, + { + "epoch": 9.22, + "learning_rate": 1.9285333333333333e-05, + "loss": 1.5786, + "step": 51850 + }, + { + "epoch": 9.22, + "learning_rate": 1.928237037037037e-05, + "loss": 1.4842, + "step": 51855 + }, + { + "epoch": 9.22, + "learning_rate": 1.9279407407407407e-05, + "loss": 1.6326, + "step": 51860 + }, + { + "epoch": 9.22, + "learning_rate": 1.9276444444444443e-05, + "loss": 1.5457, + "step": 51865 + }, + { + "epoch": 9.22, + "learning_rate": 1.9273481481481482e-05, + "loss": 1.5046, + "step": 51870 + }, + { + "epoch": 9.22, + "learning_rate": 1.9270518518518517e-05, + "loss": 1.5181, + "step": 51875 + }, + { + "epoch": 9.22, + "learning_rate": 1.9267555555555556e-05, + "loss": 1.5673, + "step": 51880 + }, + { + "epoch": 9.22, + "learning_rate": 1.926459259259259e-05, + "loss": 1.4592, + "step": 51885 + }, + { + "epoch": 9.22, + "learning_rate": 1.926162962962963e-05, + "loss": 1.6332, + "step": 51890 + }, + { + "epoch": 9.23, + "learning_rate": 1.9258666666666666e-05, + "loss": 1.4966, + "step": 51895 + }, + { + "epoch": 9.23, + "learning_rate": 1.9255703703703705e-05, + "loss": 1.5761, + "step": 51900 + }, + { + "epoch": 9.23, + "learning_rate": 1.925274074074074e-05, + "loss": 1.5466, + "step": 51905 + }, + { + "epoch": 9.23, + "learning_rate": 1.924977777777778e-05, + "loss": 1.4861, + "step": 51910 + }, + { + "epoch": 9.23, + "learning_rate": 1.9246814814814814e-05, + "loss": 1.4751, + "step": 51915 + }, + { + "epoch": 9.23, + "learning_rate": 1.9243851851851853e-05, + "loss": 1.4734, + "step": 51920 + }, + { + "epoch": 9.23, + "learning_rate": 1.924088888888889e-05, + "loss": 1.4942, + "step": 51925 + }, + { + "epoch": 9.23, + "learning_rate": 1.9237925925925927e-05, + "loss": 1.3831, + "step": 51930 + }, + { + "epoch": 9.23, + "learning_rate": 1.9234962962962963e-05, + "loss": 1.566, + "step": 51935 + }, + { + "epoch": 9.23, + "learning_rate": 1.9232e-05, + "loss": 1.5691, + "step": 51940 + }, + { + "epoch": 9.23, + "learning_rate": 1.9229037037037037e-05, + "loss": 1.4648, + "step": 51945 + }, + { + "epoch": 9.24, + "learning_rate": 1.9226074074074076e-05, + "loss": 1.6115, + "step": 51950 + }, + { + "epoch": 9.24, + "learning_rate": 1.922311111111111e-05, + "loss": 1.6449, + "step": 51955 + }, + { + "epoch": 9.24, + "learning_rate": 1.9220148148148147e-05, + "loss": 1.3774, + "step": 51960 + }, + { + "epoch": 9.24, + "learning_rate": 1.9217185185185185e-05, + "loss": 1.4848, + "step": 51965 + }, + { + "epoch": 9.24, + "learning_rate": 1.921422222222222e-05, + "loss": 1.5612, + "step": 51970 + }, + { + "epoch": 9.24, + "learning_rate": 1.921125925925926e-05, + "loss": 1.6708, + "step": 51975 + }, + { + "epoch": 9.24, + "learning_rate": 1.9208296296296295e-05, + "loss": 1.6228, + "step": 51980 + }, + { + "epoch": 9.24, + "learning_rate": 1.9205333333333334e-05, + "loss": 1.5239, + "step": 51985 + }, + { + "epoch": 9.24, + "learning_rate": 1.920237037037037e-05, + "loss": 1.4633, + "step": 51990 + }, + { + "epoch": 9.24, + "learning_rate": 1.9199407407407408e-05, + "loss": 1.4982, + "step": 51995 + }, + { + "epoch": 9.24, + "learning_rate": 1.9196444444444444e-05, + "loss": 1.5373, + "step": 52000 + }, + { + "epoch": 9.25, + "learning_rate": 1.9193481481481483e-05, + "loss": 1.5612, + "step": 52005 + }, + { + "epoch": 9.25, + "learning_rate": 1.9190518518518518e-05, + "loss": 1.5117, + "step": 52010 + }, + { + "epoch": 9.25, + "learning_rate": 1.9187555555555557e-05, + "loss": 1.5714, + "step": 52015 + }, + { + "epoch": 9.25, + "learning_rate": 1.9184592592592592e-05, + "loss": 1.4357, + "step": 52020 + }, + { + "epoch": 9.25, + "learning_rate": 1.918162962962963e-05, + "loss": 1.4625, + "step": 52025 + }, + { + "epoch": 9.25, + "learning_rate": 1.9178666666666666e-05, + "loss": 1.4898, + "step": 52030 + }, + { + "epoch": 9.25, + "learning_rate": 1.9175703703703705e-05, + "loss": 1.4659, + "step": 52035 + }, + { + "epoch": 9.25, + "learning_rate": 1.917274074074074e-05, + "loss": 1.5357, + "step": 52040 + }, + { + "epoch": 9.25, + "learning_rate": 1.916977777777778e-05, + "loss": 1.5178, + "step": 52045 + }, + { + "epoch": 9.25, + "learning_rate": 1.9166814814814815e-05, + "loss": 1.4517, + "step": 52050 + }, + { + "epoch": 9.25, + "learning_rate": 1.9163851851851854e-05, + "loss": 1.5612, + "step": 52055 + }, + { + "epoch": 9.26, + "learning_rate": 1.916088888888889e-05, + "loss": 1.5205, + "step": 52060 + }, + { + "epoch": 9.26, + "learning_rate": 1.9157925925925928e-05, + "loss": 1.4157, + "step": 52065 + }, + { + "epoch": 9.26, + "learning_rate": 1.9154962962962963e-05, + "loss": 1.5638, + "step": 52070 + }, + { + "epoch": 9.26, + "learning_rate": 1.9152e-05, + "loss": 1.426, + "step": 52075 + }, + { + "epoch": 9.26, + "learning_rate": 1.9149037037037038e-05, + "loss": 1.5309, + "step": 52080 + }, + { + "epoch": 9.26, + "learning_rate": 1.9146074074074073e-05, + "loss": 1.5612, + "step": 52085 + }, + { + "epoch": 9.26, + "learning_rate": 1.9143111111111112e-05, + "loss": 1.6217, + "step": 52090 + }, + { + "epoch": 9.26, + "learning_rate": 1.9140148148148147e-05, + "loss": 1.6371, + "step": 52095 + }, + { + "epoch": 9.26, + "learning_rate": 1.9137185185185186e-05, + "loss": 1.4951, + "step": 52100 + }, + { + "epoch": 9.26, + "learning_rate": 1.913422222222222e-05, + "loss": 1.5469, + "step": 52105 + }, + { + "epoch": 9.26, + "learning_rate": 1.913125925925926e-05, + "loss": 1.5426, + "step": 52110 + }, + { + "epoch": 9.26, + "learning_rate": 1.9128296296296296e-05, + "loss": 1.5917, + "step": 52115 + }, + { + "epoch": 9.27, + "learning_rate": 1.9125333333333335e-05, + "loss": 1.5659, + "step": 52120 + }, + { + "epoch": 9.27, + "learning_rate": 1.912237037037037e-05, + "loss": 1.5738, + "step": 52125 + }, + { + "epoch": 9.27, + "learning_rate": 1.911940740740741e-05, + "loss": 1.5553, + "step": 52130 + }, + { + "epoch": 9.27, + "learning_rate": 1.9116444444444444e-05, + "loss": 1.5395, + "step": 52135 + }, + { + "epoch": 9.27, + "learning_rate": 1.9113481481481483e-05, + "loss": 1.4256, + "step": 52140 + }, + { + "epoch": 9.27, + "learning_rate": 1.911051851851852e-05, + "loss": 1.3629, + "step": 52145 + }, + { + "epoch": 9.27, + "learning_rate": 1.9107555555555558e-05, + "loss": 1.5947, + "step": 52150 + }, + { + "epoch": 9.27, + "learning_rate": 1.9104592592592593e-05, + "loss": 1.4103, + "step": 52155 + }, + { + "epoch": 9.27, + "learning_rate": 1.9101629629629632e-05, + "loss": 1.651, + "step": 52160 + }, + { + "epoch": 9.27, + "learning_rate": 1.9098666666666667e-05, + "loss": 1.5684, + "step": 52165 + }, + { + "epoch": 9.27, + "learning_rate": 1.9095703703703706e-05, + "loss": 1.5609, + "step": 52170 + }, + { + "epoch": 9.28, + "learning_rate": 1.909274074074074e-05, + "loss": 1.645, + "step": 52175 + }, + { + "epoch": 9.28, + "learning_rate": 1.908977777777778e-05, + "loss": 1.617, + "step": 52180 + }, + { + "epoch": 9.28, + "learning_rate": 1.9086814814814816e-05, + "loss": 1.5013, + "step": 52185 + }, + { + "epoch": 9.28, + "learning_rate": 1.908385185185185e-05, + "loss": 1.5077, + "step": 52190 + }, + { + "epoch": 9.28, + "learning_rate": 1.908088888888889e-05, + "loss": 1.4899, + "step": 52195 + }, + { + "epoch": 9.28, + "learning_rate": 1.9077925925925925e-05, + "loss": 1.6514, + "step": 52200 + }, + { + "epoch": 9.28, + "learning_rate": 1.9074962962962964e-05, + "loss": 1.5563, + "step": 52205 + }, + { + "epoch": 9.28, + "learning_rate": 1.9072e-05, + "loss": 1.5662, + "step": 52210 + }, + { + "epoch": 9.28, + "learning_rate": 1.906903703703704e-05, + "loss": 1.5416, + "step": 52215 + }, + { + "epoch": 9.28, + "learning_rate": 1.9066074074074074e-05, + "loss": 1.4595, + "step": 52220 + }, + { + "epoch": 9.28, + "learning_rate": 1.9063111111111113e-05, + "loss": 1.4963, + "step": 52225 + }, + { + "epoch": 9.29, + "learning_rate": 1.9060148148148148e-05, + "loss": 1.5882, + "step": 52230 + }, + { + "epoch": 9.29, + "learning_rate": 1.9057185185185187e-05, + "loss": 1.5429, + "step": 52235 + }, + { + "epoch": 9.29, + "learning_rate": 1.9054222222222222e-05, + "loss": 1.5742, + "step": 52240 + }, + { + "epoch": 9.29, + "learning_rate": 1.905125925925926e-05, + "loss": 1.5464, + "step": 52245 + }, + { + "epoch": 9.29, + "learning_rate": 1.9048296296296297e-05, + "loss": 1.4868, + "step": 52250 + }, + { + "epoch": 9.29, + "learning_rate": 1.9045333333333336e-05, + "loss": 1.5181, + "step": 52255 + }, + { + "epoch": 9.29, + "learning_rate": 1.904237037037037e-05, + "loss": 1.5181, + "step": 52260 + }, + { + "epoch": 9.29, + "learning_rate": 1.903940740740741e-05, + "loss": 1.5412, + "step": 52265 + }, + { + "epoch": 9.29, + "learning_rate": 1.9036444444444445e-05, + "loss": 1.5758, + "step": 52270 + }, + { + "epoch": 9.29, + "learning_rate": 1.9033481481481484e-05, + "loss": 1.5296, + "step": 52275 + }, + { + "epoch": 9.29, + "learning_rate": 1.903051851851852e-05, + "loss": 1.564, + "step": 52280 + }, + { + "epoch": 9.3, + "learning_rate": 1.9027555555555558e-05, + "loss": 1.552, + "step": 52285 + }, + { + "epoch": 9.3, + "learning_rate": 1.9024592592592594e-05, + "loss": 1.5129, + "step": 52290 + }, + { + "epoch": 9.3, + "learning_rate": 1.9021629629629633e-05, + "loss": 1.4322, + "step": 52295 + }, + { + "epoch": 9.3, + "learning_rate": 1.9018666666666668e-05, + "loss": 1.5483, + "step": 52300 + }, + { + "epoch": 9.3, + "learning_rate": 1.9015703703703703e-05, + "loss": 1.3346, + "step": 52305 + }, + { + "epoch": 9.3, + "learning_rate": 1.9012740740740742e-05, + "loss": 1.5272, + "step": 52310 + }, + { + "epoch": 9.3, + "learning_rate": 1.9009777777777778e-05, + "loss": 1.5165, + "step": 52315 + }, + { + "epoch": 9.3, + "learning_rate": 1.9006814814814816e-05, + "loss": 1.6013, + "step": 52320 + }, + { + "epoch": 9.3, + "learning_rate": 1.9003851851851852e-05, + "loss": 1.5328, + "step": 52325 + }, + { + "epoch": 9.3, + "learning_rate": 1.900088888888889e-05, + "loss": 1.4681, + "step": 52330 + }, + { + "epoch": 9.3, + "learning_rate": 1.8997925925925926e-05, + "loss": 1.5509, + "step": 52335 + }, + { + "epoch": 9.3, + "learning_rate": 1.8994962962962965e-05, + "loss": 1.5472, + "step": 52340 + }, + { + "epoch": 9.31, + "learning_rate": 1.8992e-05, + "loss": 1.4159, + "step": 52345 + }, + { + "epoch": 9.31, + "learning_rate": 1.898903703703704e-05, + "loss": 1.4428, + "step": 52350 + }, + { + "epoch": 9.31, + "learning_rate": 1.8986074074074075e-05, + "loss": 1.4519, + "step": 52355 + }, + { + "epoch": 9.31, + "learning_rate": 1.8983111111111114e-05, + "loss": 1.5064, + "step": 52360 + }, + { + "epoch": 9.31, + "learning_rate": 1.898014814814815e-05, + "loss": 1.5235, + "step": 52365 + }, + { + "epoch": 9.31, + "learning_rate": 1.8977185185185188e-05, + "loss": 1.5598, + "step": 52370 + }, + { + "epoch": 9.31, + "learning_rate": 1.8974222222222223e-05, + "loss": 1.5391, + "step": 52375 + }, + { + "epoch": 9.31, + "learning_rate": 1.8971259259259262e-05, + "loss": 1.4634, + "step": 52380 + }, + { + "epoch": 9.31, + "learning_rate": 1.8968296296296297e-05, + "loss": 1.5141, + "step": 52385 + }, + { + "epoch": 9.31, + "learning_rate": 1.8965333333333336e-05, + "loss": 1.4912, + "step": 52390 + }, + { + "epoch": 9.31, + "learning_rate": 1.8962370370370372e-05, + "loss": 1.6172, + "step": 52395 + }, + { + "epoch": 9.32, + "learning_rate": 1.895940740740741e-05, + "loss": 1.5, + "step": 52400 + }, + { + "epoch": 9.32, + "learning_rate": 1.8956444444444446e-05, + "loss": 1.4518, + "step": 52405 + }, + { + "epoch": 9.32, + "learning_rate": 1.8953481481481485e-05, + "loss": 1.4996, + "step": 52410 + }, + { + "epoch": 9.32, + "learning_rate": 1.895051851851852e-05, + "loss": 1.6303, + "step": 52415 + }, + { + "epoch": 9.32, + "learning_rate": 1.8947555555555556e-05, + "loss": 1.5302, + "step": 52420 + }, + { + "epoch": 9.32, + "learning_rate": 1.8944592592592594e-05, + "loss": 1.5104, + "step": 52425 + }, + { + "epoch": 9.32, + "learning_rate": 1.894162962962963e-05, + "loss": 1.462, + "step": 52430 + }, + { + "epoch": 9.32, + "learning_rate": 1.893866666666667e-05, + "loss": 1.5999, + "step": 52435 + }, + { + "epoch": 9.32, + "learning_rate": 1.8935703703703704e-05, + "loss": 1.5093, + "step": 52440 + }, + { + "epoch": 9.32, + "learning_rate": 1.8932740740740743e-05, + "loss": 1.4872, + "step": 52445 + }, + { + "epoch": 9.32, + "learning_rate": 1.892977777777778e-05, + "loss": 1.4335, + "step": 52450 + }, + { + "epoch": 9.33, + "learning_rate": 1.8926814814814817e-05, + "loss": 1.3709, + "step": 52455 + }, + { + "epoch": 9.33, + "learning_rate": 1.8923851851851853e-05, + "loss": 1.4729, + "step": 52460 + }, + { + "epoch": 9.33, + "learning_rate": 1.892088888888889e-05, + "loss": 1.5973, + "step": 52465 + }, + { + "epoch": 9.33, + "learning_rate": 1.8917925925925927e-05, + "loss": 1.5374, + "step": 52470 + }, + { + "epoch": 9.33, + "learning_rate": 1.8914962962962966e-05, + "loss": 1.5687, + "step": 52475 + }, + { + "epoch": 9.33, + "learning_rate": 1.8912e-05, + "loss": 1.4101, + "step": 52480 + }, + { + "epoch": 9.33, + "learning_rate": 1.890903703703704e-05, + "loss": 1.5153, + "step": 52485 + }, + { + "epoch": 9.33, + "learning_rate": 1.8906074074074075e-05, + "loss": 1.5439, + "step": 52490 + }, + { + "epoch": 9.33, + "learning_rate": 1.8903111111111114e-05, + "loss": 1.5347, + "step": 52495 + }, + { + "epoch": 9.33, + "learning_rate": 1.890014814814815e-05, + "loss": 1.5496, + "step": 52500 + }, + { + "epoch": 9.33, + "learning_rate": 1.889718518518519e-05, + "loss": 1.5483, + "step": 52505 + }, + { + "epoch": 9.34, + "learning_rate": 1.8894222222222224e-05, + "loss": 1.5495, + "step": 52510 + }, + { + "epoch": 9.34, + "learning_rate": 1.8891259259259263e-05, + "loss": 1.4581, + "step": 52515 + }, + { + "epoch": 9.34, + "learning_rate": 1.8888296296296298e-05, + "loss": 1.3663, + "step": 52520 + }, + { + "epoch": 9.34, + "learning_rate": 1.8885333333333337e-05, + "loss": 1.3726, + "step": 52525 + }, + { + "epoch": 9.34, + "learning_rate": 1.8882370370370372e-05, + "loss": 1.4894, + "step": 52530 + }, + { + "epoch": 9.34, + "learning_rate": 1.8879407407407408e-05, + "loss": 1.67, + "step": 52535 + }, + { + "epoch": 9.34, + "learning_rate": 1.8876444444444447e-05, + "loss": 1.5658, + "step": 52540 + }, + { + "epoch": 9.34, + "learning_rate": 1.8873481481481482e-05, + "loss": 1.5714, + "step": 52545 + }, + { + "epoch": 9.34, + "learning_rate": 1.887051851851852e-05, + "loss": 1.4833, + "step": 52550 + }, + { + "epoch": 9.34, + "learning_rate": 1.8867555555555556e-05, + "loss": 1.4873, + "step": 52555 + }, + { + "epoch": 9.34, + "learning_rate": 1.8864592592592595e-05, + "loss": 1.4935, + "step": 52560 + }, + { + "epoch": 9.34, + "learning_rate": 1.886162962962963e-05, + "loss": 1.4882, + "step": 52565 + }, + { + "epoch": 9.35, + "learning_rate": 1.885866666666667e-05, + "loss": 1.4436, + "step": 52570 + }, + { + "epoch": 9.35, + "learning_rate": 1.8855703703703705e-05, + "loss": 1.5769, + "step": 52575 + }, + { + "epoch": 9.35, + "learning_rate": 1.885274074074074e-05, + "loss": 1.5633, + "step": 52580 + }, + { + "epoch": 9.35, + "learning_rate": 1.8849777777777776e-05, + "loss": 1.5372, + "step": 52585 + }, + { + "epoch": 9.35, + "learning_rate": 1.8846814814814815e-05, + "loss": 1.5765, + "step": 52590 + }, + { + "epoch": 9.35, + "learning_rate": 1.884385185185185e-05, + "loss": 1.5579, + "step": 52595 + }, + { + "epoch": 9.35, + "learning_rate": 1.884088888888889e-05, + "loss": 1.4854, + "step": 52600 + }, + { + "epoch": 9.35, + "learning_rate": 1.8837925925925924e-05, + "loss": 1.5647, + "step": 52605 + }, + { + "epoch": 9.35, + "learning_rate": 1.8834962962962963e-05, + "loss": 1.5512, + "step": 52610 + }, + { + "epoch": 9.35, + "learning_rate": 1.8832e-05, + "loss": 1.4442, + "step": 52615 + }, + { + "epoch": 9.35, + "learning_rate": 1.8829037037037037e-05, + "loss": 1.5132, + "step": 52620 + }, + { + "epoch": 9.36, + "learning_rate": 1.8826074074074073e-05, + "loss": 1.4249, + "step": 52625 + }, + { + "epoch": 9.36, + "learning_rate": 1.882311111111111e-05, + "loss": 1.377, + "step": 52630 + }, + { + "epoch": 9.36, + "learning_rate": 1.8820148148148147e-05, + "loss": 1.5298, + "step": 52635 + }, + { + "epoch": 9.36, + "learning_rate": 1.8817185185185186e-05, + "loss": 1.5412, + "step": 52640 + }, + { + "epoch": 9.36, + "learning_rate": 1.881422222222222e-05, + "loss": 1.4337, + "step": 52645 + }, + { + "epoch": 9.36, + "learning_rate": 1.881125925925926e-05, + "loss": 1.6483, + "step": 52650 + }, + { + "epoch": 9.36, + "learning_rate": 1.8808296296296296e-05, + "loss": 1.4188, + "step": 52655 + }, + { + "epoch": 9.36, + "learning_rate": 1.8805333333333334e-05, + "loss": 1.547, + "step": 52660 + }, + { + "epoch": 9.36, + "learning_rate": 1.880237037037037e-05, + "loss": 1.5614, + "step": 52665 + }, + { + "epoch": 9.36, + "learning_rate": 1.879940740740741e-05, + "loss": 1.5169, + "step": 52670 + }, + { + "epoch": 9.36, + "learning_rate": 1.8796444444444444e-05, + "loss": 1.5795, + "step": 52675 + }, + { + "epoch": 9.37, + "learning_rate": 1.8793481481481483e-05, + "loss": 1.6319, + "step": 52680 + }, + { + "epoch": 9.37, + "learning_rate": 1.879051851851852e-05, + "loss": 1.5922, + "step": 52685 + }, + { + "epoch": 9.37, + "learning_rate": 1.8787555555555554e-05, + "loss": 1.5194, + "step": 52690 + }, + { + "epoch": 9.37, + "learning_rate": 1.8784592592592593e-05, + "loss": 1.4209, + "step": 52695 + }, + { + "epoch": 9.37, + "learning_rate": 1.8781629629629628e-05, + "loss": 1.4611, + "step": 52700 + }, + { + "epoch": 9.37, + "learning_rate": 1.8778666666666667e-05, + "loss": 1.5642, + "step": 52705 + }, + { + "epoch": 9.37, + "learning_rate": 1.8775703703703702e-05, + "loss": 1.5813, + "step": 52710 + }, + { + "epoch": 9.37, + "learning_rate": 1.877274074074074e-05, + "loss": 1.4425, + "step": 52715 + }, + { + "epoch": 9.37, + "learning_rate": 1.8769777777777777e-05, + "loss": 1.4816, + "step": 52720 + }, + { + "epoch": 9.37, + "learning_rate": 1.8766814814814815e-05, + "loss": 1.5387, + "step": 52725 + }, + { + "epoch": 9.37, + "learning_rate": 1.876385185185185e-05, + "loss": 1.6155, + "step": 52730 + }, + { + "epoch": 9.38, + "learning_rate": 1.876088888888889e-05, + "loss": 1.6089, + "step": 52735 + }, + { + "epoch": 9.38, + "learning_rate": 1.8757925925925925e-05, + "loss": 1.4769, + "step": 52740 + }, + { + "epoch": 9.38, + "learning_rate": 1.8754962962962964e-05, + "loss": 1.5734, + "step": 52745 + }, + { + "epoch": 9.38, + "learning_rate": 1.8752e-05, + "loss": 1.458, + "step": 52750 + }, + { + "epoch": 9.38, + "learning_rate": 1.8749037037037038e-05, + "loss": 1.5676, + "step": 52755 + }, + { + "epoch": 9.38, + "learning_rate": 1.8746074074074074e-05, + "loss": 1.4887, + "step": 52760 + }, + { + "epoch": 9.38, + "learning_rate": 1.8743111111111112e-05, + "loss": 1.4885, + "step": 52765 + }, + { + "epoch": 9.38, + "learning_rate": 1.8740148148148148e-05, + "loss": 1.5568, + "step": 52770 + }, + { + "epoch": 9.38, + "learning_rate": 1.8737185185185187e-05, + "loss": 1.466, + "step": 52775 + }, + { + "epoch": 9.38, + "learning_rate": 1.8734222222222222e-05, + "loss": 1.5664, + "step": 52780 + }, + { + "epoch": 9.38, + "learning_rate": 1.873125925925926e-05, + "loss": 1.4928, + "step": 52785 + }, + { + "epoch": 9.38, + "learning_rate": 1.8728296296296296e-05, + "loss": 1.4886, + "step": 52790 + }, + { + "epoch": 9.39, + "learning_rate": 1.8725333333333335e-05, + "loss": 1.4927, + "step": 52795 + }, + { + "epoch": 9.39, + "learning_rate": 1.8722962962962965e-05, + "loss": 1.4207, + "step": 52800 + }, + { + "epoch": 9.39, + "learning_rate": 1.872e-05, + "loss": 1.6124, + "step": 52805 + }, + { + "epoch": 9.39, + "learning_rate": 1.871703703703704e-05, + "loss": 1.4225, + "step": 52810 + }, + { + "epoch": 9.39, + "learning_rate": 1.8714074074074075e-05, + "loss": 1.5204, + "step": 52815 + }, + { + "epoch": 9.39, + "learning_rate": 1.8711111111111113e-05, + "loss": 1.5392, + "step": 52820 + }, + { + "epoch": 9.39, + "learning_rate": 1.870814814814815e-05, + "loss": 1.5742, + "step": 52825 + }, + { + "epoch": 9.39, + "learning_rate": 1.8705185185185188e-05, + "loss": 1.5549, + "step": 52830 + }, + { + "epoch": 9.39, + "learning_rate": 1.8702222222222223e-05, + "loss": 1.5418, + "step": 52835 + }, + { + "epoch": 9.39, + "learning_rate": 1.8699259259259262e-05, + "loss": 1.5664, + "step": 52840 + }, + { + "epoch": 9.39, + "learning_rate": 1.8696296296296297e-05, + "loss": 1.4793, + "step": 52845 + }, + { + "epoch": 9.4, + "learning_rate": 1.8693333333333336e-05, + "loss": 1.6126, + "step": 52850 + }, + { + "epoch": 9.4, + "learning_rate": 1.869037037037037e-05, + "loss": 1.4944, + "step": 52855 + }, + { + "epoch": 9.4, + "learning_rate": 1.868740740740741e-05, + "loss": 1.3641, + "step": 52860 + }, + { + "epoch": 9.4, + "learning_rate": 1.8684444444444446e-05, + "loss": 1.4965, + "step": 52865 + }, + { + "epoch": 9.4, + "learning_rate": 1.8681481481481485e-05, + "loss": 1.488, + "step": 52870 + }, + { + "epoch": 9.4, + "learning_rate": 1.867851851851852e-05, + "loss": 1.5778, + "step": 52875 + }, + { + "epoch": 9.4, + "learning_rate": 1.8675555555555556e-05, + "loss": 1.6234, + "step": 52880 + }, + { + "epoch": 9.4, + "learning_rate": 1.8672592592592594e-05, + "loss": 1.5703, + "step": 52885 + }, + { + "epoch": 9.4, + "learning_rate": 1.866962962962963e-05, + "loss": 1.4391, + "step": 52890 + }, + { + "epoch": 9.4, + "learning_rate": 1.866666666666667e-05, + "loss": 1.5961, + "step": 52895 + }, + { + "epoch": 9.4, + "learning_rate": 1.8663703703703704e-05, + "loss": 1.4264, + "step": 52900 + }, + { + "epoch": 9.41, + "learning_rate": 1.8660740740740743e-05, + "loss": 1.6054, + "step": 52905 + }, + { + "epoch": 9.41, + "learning_rate": 1.865777777777778e-05, + "loss": 1.626, + "step": 52910 + }, + { + "epoch": 9.41, + "learning_rate": 1.8654814814814817e-05, + "loss": 1.5561, + "step": 52915 + }, + { + "epoch": 9.41, + "learning_rate": 1.8651851851851853e-05, + "loss": 1.4536, + "step": 52920 + }, + { + "epoch": 9.41, + "learning_rate": 1.864888888888889e-05, + "loss": 1.5781, + "step": 52925 + }, + { + "epoch": 9.41, + "learning_rate": 1.8645925925925927e-05, + "loss": 1.5282, + "step": 52930 + }, + { + "epoch": 9.41, + "learning_rate": 1.8642962962962966e-05, + "loss": 1.6391, + "step": 52935 + }, + { + "epoch": 9.41, + "learning_rate": 1.864e-05, + "loss": 1.6071, + "step": 52940 + }, + { + "epoch": 9.41, + "learning_rate": 1.863703703703704e-05, + "loss": 1.6976, + "step": 52945 + }, + { + "epoch": 9.41, + "learning_rate": 1.8634074074074075e-05, + "loss": 1.5425, + "step": 52950 + }, + { + "epoch": 9.41, + "learning_rate": 1.8631111111111114e-05, + "loss": 1.5432, + "step": 52955 + }, + { + "epoch": 9.42, + "learning_rate": 1.862814814814815e-05, + "loss": 1.5149, + "step": 52960 + }, + { + "epoch": 9.42, + "learning_rate": 1.862518518518519e-05, + "loss": 1.5467, + "step": 52965 + }, + { + "epoch": 9.42, + "learning_rate": 1.8622222222222224e-05, + "loss": 1.5827, + "step": 52970 + }, + { + "epoch": 9.42, + "learning_rate": 1.8619259259259263e-05, + "loss": 1.4332, + "step": 52975 + }, + { + "epoch": 9.42, + "learning_rate": 1.8616296296296298e-05, + "loss": 1.5034, + "step": 52980 + }, + { + "epoch": 9.42, + "learning_rate": 1.8613333333333337e-05, + "loss": 1.4552, + "step": 52985 + }, + { + "epoch": 9.42, + "learning_rate": 1.8610370370370372e-05, + "loss": 1.4742, + "step": 52990 + }, + { + "epoch": 9.42, + "learning_rate": 1.8607407407407408e-05, + "loss": 1.5992, + "step": 52995 + }, + { + "epoch": 9.42, + "learning_rate": 1.8604444444444447e-05, + "loss": 1.4624, + "step": 53000 + }, + { + "epoch": 9.42, + "learning_rate": 1.8601481481481482e-05, + "loss": 1.5355, + "step": 53005 + }, + { + "epoch": 9.42, + "learning_rate": 1.859851851851852e-05, + "loss": 1.5451, + "step": 53010 + }, + { + "epoch": 9.42, + "learning_rate": 1.8595555555555556e-05, + "loss": 1.3983, + "step": 53015 + }, + { + "epoch": 9.43, + "learning_rate": 1.8592592592592595e-05, + "loss": 1.5402, + "step": 53020 + }, + { + "epoch": 9.43, + "learning_rate": 1.858962962962963e-05, + "loss": 1.5319, + "step": 53025 + }, + { + "epoch": 9.43, + "learning_rate": 1.858666666666667e-05, + "loss": 1.4729, + "step": 53030 + }, + { + "epoch": 9.43, + "learning_rate": 1.8583703703703705e-05, + "loss": 1.6524, + "step": 53035 + }, + { + "epoch": 9.43, + "learning_rate": 1.8580740740740744e-05, + "loss": 1.5144, + "step": 53040 + }, + { + "epoch": 9.43, + "learning_rate": 1.8577777777777776e-05, + "loss": 1.5958, + "step": 53045 + }, + { + "epoch": 9.43, + "learning_rate": 1.8574814814814815e-05, + "loss": 1.418, + "step": 53050 + }, + { + "epoch": 9.43, + "learning_rate": 1.857185185185185e-05, + "loss": 1.4525, + "step": 53055 + }, + { + "epoch": 9.43, + "learning_rate": 1.856888888888889e-05, + "loss": 1.5126, + "step": 53060 + }, + { + "epoch": 9.43, + "learning_rate": 1.8565925925925924e-05, + "loss": 1.5664, + "step": 53065 + }, + { + "epoch": 9.43, + "learning_rate": 1.8562962962962963e-05, + "loss": 1.4276, + "step": 53070 + }, + { + "epoch": 9.44, + "learning_rate": 1.856e-05, + "loss": 1.4984, + "step": 53075 + }, + { + "epoch": 9.44, + "learning_rate": 1.8557037037037037e-05, + "loss": 1.6379, + "step": 53080 + }, + { + "epoch": 9.44, + "learning_rate": 1.8554074074074073e-05, + "loss": 1.6601, + "step": 53085 + }, + { + "epoch": 9.44, + "learning_rate": 1.855111111111111e-05, + "loss": 1.6103, + "step": 53090 + }, + { + "epoch": 9.44, + "learning_rate": 1.8548148148148147e-05, + "loss": 1.5087, + "step": 53095 + }, + { + "epoch": 9.44, + "learning_rate": 1.8545185185185186e-05, + "loss": 1.529, + "step": 53100 + }, + { + "epoch": 9.44, + "learning_rate": 1.854222222222222e-05, + "loss": 1.5469, + "step": 53105 + }, + { + "epoch": 9.44, + "learning_rate": 1.853925925925926e-05, + "loss": 1.6424, + "step": 53110 + }, + { + "epoch": 9.44, + "learning_rate": 1.8536296296296296e-05, + "loss": 1.3707, + "step": 53115 + }, + { + "epoch": 9.44, + "learning_rate": 1.8533333333333334e-05, + "loss": 1.4565, + "step": 53120 + }, + { + "epoch": 9.44, + "learning_rate": 1.853037037037037e-05, + "loss": 1.5717, + "step": 53125 + }, + { + "epoch": 9.45, + "learning_rate": 1.852740740740741e-05, + "loss": 1.5266, + "step": 53130 + }, + { + "epoch": 9.45, + "learning_rate": 1.8524444444444444e-05, + "loss": 1.6055, + "step": 53135 + }, + { + "epoch": 9.45, + "learning_rate": 1.8521481481481483e-05, + "loss": 1.5165, + "step": 53140 + }, + { + "epoch": 9.45, + "learning_rate": 1.8518518518518518e-05, + "loss": 1.5523, + "step": 53145 + }, + { + "epoch": 9.45, + "learning_rate": 1.8515555555555557e-05, + "loss": 1.5659, + "step": 53150 + }, + { + "epoch": 9.45, + "learning_rate": 1.8512592592592593e-05, + "loss": 1.5603, + "step": 53155 + }, + { + "epoch": 9.45, + "learning_rate": 1.8509629629629628e-05, + "loss": 1.5647, + "step": 53160 + }, + { + "epoch": 9.45, + "learning_rate": 1.8506666666666667e-05, + "loss": 1.4642, + "step": 53165 + }, + { + "epoch": 9.45, + "learning_rate": 1.8503703703703702e-05, + "loss": 1.6391, + "step": 53170 + }, + { + "epoch": 9.45, + "learning_rate": 1.850074074074074e-05, + "loss": 1.5329, + "step": 53175 + }, + { + "epoch": 9.45, + "learning_rate": 1.8497777777777776e-05, + "loss": 1.4352, + "step": 53180 + }, + { + "epoch": 9.46, + "learning_rate": 1.8494814814814815e-05, + "loss": 1.6342, + "step": 53185 + }, + { + "epoch": 9.46, + "learning_rate": 1.849185185185185e-05, + "loss": 1.4883, + "step": 53190 + }, + { + "epoch": 9.46, + "learning_rate": 1.848888888888889e-05, + "loss": 1.5331, + "step": 53195 + }, + { + "epoch": 9.46, + "learning_rate": 1.8485925925925925e-05, + "loss": 1.4351, + "step": 53200 + }, + { + "epoch": 9.46, + "learning_rate": 1.8482962962962964e-05, + "loss": 1.5567, + "step": 53205 + }, + { + "epoch": 9.46, + "learning_rate": 1.848e-05, + "loss": 1.5401, + "step": 53210 + }, + { + "epoch": 9.46, + "learning_rate": 1.8477037037037038e-05, + "loss": 1.5085, + "step": 53215 + }, + { + "epoch": 9.46, + "learning_rate": 1.8474074074074074e-05, + "loss": 1.4168, + "step": 53220 + }, + { + "epoch": 9.46, + "learning_rate": 1.8471111111111112e-05, + "loss": 1.568, + "step": 53225 + }, + { + "epoch": 9.46, + "learning_rate": 1.8468148148148148e-05, + "loss": 1.4346, + "step": 53230 + }, + { + "epoch": 9.46, + "learning_rate": 1.8465185185185187e-05, + "loss": 1.5355, + "step": 53235 + }, + { + "epoch": 9.46, + "learning_rate": 1.8462222222222222e-05, + "loss": 1.5787, + "step": 53240 + }, + { + "epoch": 9.47, + "learning_rate": 1.845925925925926e-05, + "loss": 1.554, + "step": 53245 + }, + { + "epoch": 9.47, + "learning_rate": 1.8456296296296296e-05, + "loss": 1.4237, + "step": 53250 + }, + { + "epoch": 9.47, + "learning_rate": 1.8453333333333335e-05, + "loss": 1.5452, + "step": 53255 + }, + { + "epoch": 9.47, + "learning_rate": 1.845037037037037e-05, + "loss": 1.4849, + "step": 53260 + }, + { + "epoch": 9.47, + "learning_rate": 1.844740740740741e-05, + "loss": 1.5303, + "step": 53265 + }, + { + "epoch": 9.47, + "learning_rate": 1.8444444444444445e-05, + "loss": 1.6177, + "step": 53270 + }, + { + "epoch": 9.47, + "learning_rate": 1.844148148148148e-05, + "loss": 1.5961, + "step": 53275 + }, + { + "epoch": 9.47, + "learning_rate": 1.843851851851852e-05, + "loss": 1.4775, + "step": 53280 + }, + { + "epoch": 9.47, + "learning_rate": 1.8435555555555554e-05, + "loss": 1.4317, + "step": 53285 + }, + { + "epoch": 9.47, + "learning_rate": 1.8432592592592593e-05, + "loss": 1.6531, + "step": 53290 + }, + { + "epoch": 9.47, + "learning_rate": 1.842962962962963e-05, + "loss": 1.5542, + "step": 53295 + }, + { + "epoch": 9.48, + "learning_rate": 1.8426666666666668e-05, + "loss": 1.7048, + "step": 53300 + }, + { + "epoch": 9.48, + "learning_rate": 1.8423703703703703e-05, + "loss": 1.6273, + "step": 53305 + }, + { + "epoch": 9.48, + "learning_rate": 1.8420740740740742e-05, + "loss": 1.4796, + "step": 53310 + }, + { + "epoch": 9.48, + "learning_rate": 1.8417777777777777e-05, + "loss": 1.5795, + "step": 53315 + }, + { + "epoch": 9.48, + "learning_rate": 1.8414814814814816e-05, + "loss": 1.5544, + "step": 53320 + }, + { + "epoch": 9.48, + "learning_rate": 1.841185185185185e-05, + "loss": 1.561, + "step": 53325 + }, + { + "epoch": 9.48, + "learning_rate": 1.840888888888889e-05, + "loss": 1.5616, + "step": 53330 + }, + { + "epoch": 9.48, + "learning_rate": 1.8405925925925926e-05, + "loss": 1.5686, + "step": 53335 + }, + { + "epoch": 9.48, + "learning_rate": 1.8402962962962965e-05, + "loss": 1.5136, + "step": 53340 + }, + { + "epoch": 9.48, + "learning_rate": 1.84e-05, + "loss": 1.4563, + "step": 53345 + }, + { + "epoch": 9.48, + "learning_rate": 1.839703703703704e-05, + "loss": 1.5079, + "step": 53350 + }, + { + "epoch": 9.49, + "learning_rate": 1.8394074074074074e-05, + "loss": 1.5759, + "step": 53355 + }, + { + "epoch": 9.49, + "learning_rate": 1.8391111111111113e-05, + "loss": 1.3618, + "step": 53360 + }, + { + "epoch": 9.49, + "learning_rate": 1.838814814814815e-05, + "loss": 1.5627, + "step": 53365 + }, + { + "epoch": 9.49, + "learning_rate": 1.8385185185185187e-05, + "loss": 1.4832, + "step": 53370 + }, + { + "epoch": 9.49, + "learning_rate": 1.8382222222222223e-05, + "loss": 1.5932, + "step": 53375 + }, + { + "epoch": 9.49, + "learning_rate": 1.837925925925926e-05, + "loss": 1.4519, + "step": 53380 + }, + { + "epoch": 9.49, + "learning_rate": 1.8376296296296297e-05, + "loss": 1.52, + "step": 53385 + }, + { + "epoch": 9.49, + "learning_rate": 1.8373333333333332e-05, + "loss": 1.4891, + "step": 53390 + }, + { + "epoch": 9.49, + "learning_rate": 1.837037037037037e-05, + "loss": 1.5784, + "step": 53395 + }, + { + "epoch": 9.49, + "learning_rate": 1.8367407407407407e-05, + "loss": 1.4897, + "step": 53400 + }, + { + "epoch": 9.49, + "learning_rate": 1.8364444444444446e-05, + "loss": 1.4513, + "step": 53405 + }, + { + "epoch": 9.5, + "learning_rate": 1.836148148148148e-05, + "loss": 1.5744, + "step": 53410 + }, + { + "epoch": 9.5, + "learning_rate": 1.835851851851852e-05, + "loss": 1.5402, + "step": 53415 + }, + { + "epoch": 9.5, + "learning_rate": 1.8355555555555555e-05, + "loss": 1.4784, + "step": 53420 + }, + { + "epoch": 9.5, + "learning_rate": 1.8352592592592594e-05, + "loss": 1.5556, + "step": 53425 + }, + { + "epoch": 9.5, + "learning_rate": 1.834962962962963e-05, + "loss": 1.5255, + "step": 53430 + }, + { + "epoch": 9.5, + "learning_rate": 1.834666666666667e-05, + "loss": 1.7009, + "step": 53435 + }, + { + "epoch": 9.5, + "learning_rate": 1.8343703703703704e-05, + "loss": 1.6039, + "step": 53440 + }, + { + "epoch": 9.5, + "learning_rate": 1.8340740740740743e-05, + "loss": 1.6701, + "step": 53445 + }, + { + "epoch": 9.5, + "learning_rate": 1.8337777777777778e-05, + "loss": 1.4897, + "step": 53450 + }, + { + "epoch": 9.5, + "learning_rate": 1.8334814814814817e-05, + "loss": 1.4417, + "step": 53455 + }, + { + "epoch": 9.5, + "learning_rate": 1.8331851851851852e-05, + "loss": 1.5649, + "step": 53460 + }, + { + "epoch": 9.5, + "learning_rate": 1.832888888888889e-05, + "loss": 1.5815, + "step": 53465 + }, + { + "epoch": 9.51, + "learning_rate": 1.8325925925925927e-05, + "loss": 1.5448, + "step": 53470 + }, + { + "epoch": 9.51, + "learning_rate": 1.8322962962962965e-05, + "loss": 1.4661, + "step": 53475 + }, + { + "epoch": 9.51, + "learning_rate": 1.832e-05, + "loss": 1.464, + "step": 53480 + }, + { + "epoch": 9.51, + "learning_rate": 1.831703703703704e-05, + "loss": 1.4425, + "step": 53485 + }, + { + "epoch": 9.51, + "learning_rate": 1.8314074074074075e-05, + "loss": 1.6214, + "step": 53490 + }, + { + "epoch": 9.51, + "learning_rate": 1.8311111111111114e-05, + "loss": 1.4337, + "step": 53495 + }, + { + "epoch": 9.51, + "learning_rate": 1.830814814814815e-05, + "loss": 1.4801, + "step": 53500 + }, + { + "epoch": 9.51, + "learning_rate": 1.8305185185185185e-05, + "loss": 1.5248, + "step": 53505 + }, + { + "epoch": 9.51, + "learning_rate": 1.8302222222222224e-05, + "loss": 1.5553, + "step": 53510 + }, + { + "epoch": 9.51, + "learning_rate": 1.829925925925926e-05, + "loss": 1.4498, + "step": 53515 + }, + { + "epoch": 9.51, + "learning_rate": 1.8296296296296298e-05, + "loss": 1.538, + "step": 53520 + }, + { + "epoch": 9.52, + "learning_rate": 1.8293333333333333e-05, + "loss": 1.471, + "step": 53525 + }, + { + "epoch": 9.52, + "learning_rate": 1.8290370370370372e-05, + "loss": 1.5087, + "step": 53530 + }, + { + "epoch": 9.52, + "learning_rate": 1.8287407407407407e-05, + "loss": 1.4605, + "step": 53535 + }, + { + "epoch": 9.52, + "learning_rate": 1.8284444444444446e-05, + "loss": 1.4719, + "step": 53540 + }, + { + "epoch": 9.52, + "learning_rate": 1.8281481481481482e-05, + "loss": 1.5288, + "step": 53545 + }, + { + "epoch": 9.52, + "learning_rate": 1.827851851851852e-05, + "loss": 1.4695, + "step": 53550 + }, + { + "epoch": 9.52, + "learning_rate": 1.8275555555555556e-05, + "loss": 1.4701, + "step": 53555 + }, + { + "epoch": 9.52, + "learning_rate": 1.8272592592592595e-05, + "loss": 1.5227, + "step": 53560 + }, + { + "epoch": 9.52, + "learning_rate": 1.826962962962963e-05, + "loss": 1.5101, + "step": 53565 + }, + { + "epoch": 9.52, + "learning_rate": 1.826666666666667e-05, + "loss": 1.4663, + "step": 53570 + }, + { + "epoch": 9.52, + "learning_rate": 1.8263703703703705e-05, + "loss": 1.6422, + "step": 53575 + }, + { + "epoch": 9.53, + "learning_rate": 1.8260740740740743e-05, + "loss": 1.6376, + "step": 53580 + }, + { + "epoch": 9.53, + "learning_rate": 1.825777777777778e-05, + "loss": 1.5237, + "step": 53585 + }, + { + "epoch": 9.53, + "learning_rate": 1.8254814814814818e-05, + "loss": 1.4501, + "step": 53590 + }, + { + "epoch": 9.53, + "learning_rate": 1.8251851851851853e-05, + "loss": 1.5463, + "step": 53595 + }, + { + "epoch": 9.53, + "learning_rate": 1.8248888888888892e-05, + "loss": 1.5614, + "step": 53600 + }, + { + "epoch": 9.53, + "learning_rate": 1.8245925925925927e-05, + "loss": 1.5523, + "step": 53605 + }, + { + "epoch": 9.53, + "learning_rate": 1.8242962962962966e-05, + "loss": 1.4974, + "step": 53610 + }, + { + "epoch": 9.53, + "learning_rate": 1.824e-05, + "loss": 1.5042, + "step": 53615 + }, + { + "epoch": 9.53, + "learning_rate": 1.8237037037037037e-05, + "loss": 1.5366, + "step": 53620 + }, + { + "epoch": 9.53, + "learning_rate": 1.8234074074074076e-05, + "loss": 1.5085, + "step": 53625 + }, + { + "epoch": 9.53, + "learning_rate": 1.823111111111111e-05, + "loss": 1.4552, + "step": 53630 + }, + { + "epoch": 9.54, + "learning_rate": 1.822814814814815e-05, + "loss": 1.5201, + "step": 53635 + }, + { + "epoch": 9.54, + "learning_rate": 1.8225185185185185e-05, + "loss": 1.5323, + "step": 53640 + }, + { + "epoch": 9.54, + "learning_rate": 1.8222222222222224e-05, + "loss": 1.5352, + "step": 53645 + }, + { + "epoch": 9.54, + "learning_rate": 1.821925925925926e-05, + "loss": 1.4549, + "step": 53650 + }, + { + "epoch": 9.54, + "learning_rate": 1.82162962962963e-05, + "loss": 1.4525, + "step": 53655 + }, + { + "epoch": 9.54, + "learning_rate": 1.8213333333333334e-05, + "loss": 1.4004, + "step": 53660 + }, + { + "epoch": 9.54, + "learning_rate": 1.8210370370370373e-05, + "loss": 1.6647, + "step": 53665 + }, + { + "epoch": 9.54, + "learning_rate": 1.8207407407407408e-05, + "loss": 1.4208, + "step": 53670 + }, + { + "epoch": 9.54, + "learning_rate": 1.8204444444444447e-05, + "loss": 1.4308, + "step": 53675 + }, + { + "epoch": 9.54, + "learning_rate": 1.8201481481481483e-05, + "loss": 1.5211, + "step": 53680 + }, + { + "epoch": 9.54, + "learning_rate": 1.819851851851852e-05, + "loss": 1.621, + "step": 53685 + }, + { + "epoch": 9.54, + "learning_rate": 1.8195555555555557e-05, + "loss": 1.5202, + "step": 53690 + }, + { + "epoch": 9.55, + "learning_rate": 1.8192592592592596e-05, + "loss": 1.4863, + "step": 53695 + }, + { + "epoch": 9.55, + "learning_rate": 1.818962962962963e-05, + "loss": 1.5894, + "step": 53700 + }, + { + "epoch": 9.55, + "learning_rate": 1.818666666666667e-05, + "loss": 1.5244, + "step": 53705 + }, + { + "epoch": 9.55, + "learning_rate": 1.8183703703703705e-05, + "loss": 1.5203, + "step": 53710 + }, + { + "epoch": 9.55, + "learning_rate": 1.8180740740740744e-05, + "loss": 1.5583, + "step": 53715 + }, + { + "epoch": 9.55, + "learning_rate": 1.817777777777778e-05, + "loss": 1.5182, + "step": 53720 + }, + { + "epoch": 9.55, + "learning_rate": 1.817481481481482e-05, + "loss": 1.5578, + "step": 53725 + }, + { + "epoch": 9.55, + "learning_rate": 1.8171851851851854e-05, + "loss": 1.5833, + "step": 53730 + }, + { + "epoch": 9.55, + "learning_rate": 1.816888888888889e-05, + "loss": 1.5828, + "step": 53735 + }, + { + "epoch": 9.55, + "learning_rate": 1.8165925925925928e-05, + "loss": 1.4147, + "step": 53740 + }, + { + "epoch": 9.55, + "learning_rate": 1.8162962962962963e-05, + "loss": 1.4974, + "step": 53745 + }, + { + "epoch": 9.56, + "learning_rate": 1.8160000000000002e-05, + "loss": 1.4519, + "step": 53750 + }, + { + "epoch": 9.56, + "learning_rate": 1.8157037037037038e-05, + "loss": 1.5565, + "step": 53755 + }, + { + "epoch": 9.56, + "learning_rate": 1.8154074074074077e-05, + "loss": 1.4758, + "step": 53760 + }, + { + "epoch": 9.56, + "learning_rate": 1.8151111111111112e-05, + "loss": 1.5015, + "step": 53765 + }, + { + "epoch": 9.56, + "learning_rate": 1.814814814814815e-05, + "loss": 1.5439, + "step": 53770 + }, + { + "epoch": 9.56, + "learning_rate": 1.8145185185185186e-05, + "loss": 1.4211, + "step": 53775 + }, + { + "epoch": 9.56, + "learning_rate": 1.8142222222222225e-05, + "loss": 1.5991, + "step": 53780 + }, + { + "epoch": 9.56, + "learning_rate": 1.813925925925926e-05, + "loss": 1.5673, + "step": 53785 + }, + { + "epoch": 9.56, + "learning_rate": 1.81362962962963e-05, + "loss": 1.5168, + "step": 53790 + }, + { + "epoch": 9.56, + "learning_rate": 1.8133333333333335e-05, + "loss": 1.6075, + "step": 53795 + }, + { + "epoch": 9.56, + "learning_rate": 1.8130370370370374e-05, + "loss": 1.5046, + "step": 53800 + }, + { + "epoch": 9.57, + "learning_rate": 1.8128e-05, + "loss": 1.536, + "step": 53805 + }, + { + "epoch": 9.57, + "learning_rate": 1.812503703703704e-05, + "loss": 1.6326, + "step": 53810 + }, + { + "epoch": 9.57, + "learning_rate": 1.8122074074074074e-05, + "loss": 1.6135, + "step": 53815 + }, + { + "epoch": 9.57, + "learning_rate": 1.8119111111111113e-05, + "loss": 1.5267, + "step": 53820 + }, + { + "epoch": 9.57, + "learning_rate": 1.811614814814815e-05, + "loss": 1.4672, + "step": 53825 + }, + { + "epoch": 9.57, + "learning_rate": 1.8113185185185187e-05, + "loss": 1.423, + "step": 53830 + }, + { + "epoch": 9.57, + "learning_rate": 1.8110222222222223e-05, + "loss": 1.4972, + "step": 53835 + }, + { + "epoch": 9.57, + "learning_rate": 1.810725925925926e-05, + "loss": 1.5916, + "step": 53840 + }, + { + "epoch": 9.57, + "learning_rate": 1.8104296296296297e-05, + "loss": 1.4925, + "step": 53845 + }, + { + "epoch": 9.57, + "learning_rate": 1.8101333333333332e-05, + "loss": 1.4897, + "step": 53850 + }, + { + "epoch": 9.57, + "learning_rate": 1.809837037037037e-05, + "loss": 1.5223, + "step": 53855 + }, + { + "epoch": 9.58, + "learning_rate": 1.8095407407407407e-05, + "loss": 1.5351, + "step": 53860 + }, + { + "epoch": 9.58, + "learning_rate": 1.8092444444444445e-05, + "loss": 1.5373, + "step": 53865 + }, + { + "epoch": 9.58, + "learning_rate": 1.808948148148148e-05, + "loss": 1.5604, + "step": 53870 + }, + { + "epoch": 9.58, + "learning_rate": 1.808651851851852e-05, + "loss": 1.4046, + "step": 53875 + }, + { + "epoch": 9.58, + "learning_rate": 1.8083555555555555e-05, + "loss": 1.5217, + "step": 53880 + }, + { + "epoch": 9.58, + "learning_rate": 1.8080592592592594e-05, + "loss": 1.5286, + "step": 53885 + }, + { + "epoch": 9.58, + "learning_rate": 1.807762962962963e-05, + "loss": 1.5288, + "step": 53890 + }, + { + "epoch": 9.58, + "learning_rate": 1.8074666666666668e-05, + "loss": 1.4257, + "step": 53895 + }, + { + "epoch": 9.58, + "learning_rate": 1.8071703703703704e-05, + "loss": 1.5695, + "step": 53900 + }, + { + "epoch": 9.58, + "learning_rate": 1.8068740740740742e-05, + "loss": 1.5515, + "step": 53905 + }, + { + "epoch": 9.58, + "learning_rate": 1.8065777777777778e-05, + "loss": 1.4966, + "step": 53910 + }, + { + "epoch": 9.58, + "learning_rate": 1.8062814814814817e-05, + "loss": 1.5215, + "step": 53915 + }, + { + "epoch": 9.59, + "learning_rate": 1.8059851851851852e-05, + "loss": 1.5538, + "step": 53920 + }, + { + "epoch": 9.59, + "learning_rate": 1.805688888888889e-05, + "loss": 1.5105, + "step": 53925 + }, + { + "epoch": 9.59, + "learning_rate": 1.8053925925925926e-05, + "loss": 1.4481, + "step": 53930 + }, + { + "epoch": 9.59, + "learning_rate": 1.8050962962962965e-05, + "loss": 1.4785, + "step": 53935 + }, + { + "epoch": 9.59, + "learning_rate": 1.8048e-05, + "loss": 1.5378, + "step": 53940 + }, + { + "epoch": 9.59, + "learning_rate": 1.804503703703704e-05, + "loss": 1.5608, + "step": 53945 + }, + { + "epoch": 9.59, + "learning_rate": 1.8042074074074075e-05, + "loss": 1.4849, + "step": 53950 + }, + { + "epoch": 9.59, + "learning_rate": 1.8039111111111114e-05, + "loss": 1.6106, + "step": 53955 + }, + { + "epoch": 9.59, + "learning_rate": 1.803614814814815e-05, + "loss": 1.4693, + "step": 53960 + }, + { + "epoch": 9.59, + "learning_rate": 1.8033185185185185e-05, + "loss": 1.4601, + "step": 53965 + }, + { + "epoch": 9.59, + "learning_rate": 1.8030222222222223e-05, + "loss": 1.567, + "step": 53970 + }, + { + "epoch": 9.6, + "learning_rate": 1.802725925925926e-05, + "loss": 1.5337, + "step": 53975 + }, + { + "epoch": 9.6, + "learning_rate": 1.8024296296296298e-05, + "loss": 1.5243, + "step": 53980 + }, + { + "epoch": 9.6, + "learning_rate": 1.8021333333333333e-05, + "loss": 1.4769, + "step": 53985 + }, + { + "epoch": 9.6, + "learning_rate": 1.8018370370370372e-05, + "loss": 1.5179, + "step": 53990 + }, + { + "epoch": 9.6, + "learning_rate": 1.8015407407407407e-05, + "loss": 1.4569, + "step": 53995 + }, + { + "epoch": 9.6, + "learning_rate": 1.8012444444444446e-05, + "loss": 1.5211, + "step": 54000 + }, + { + "epoch": 9.6, + "learning_rate": 1.800948148148148e-05, + "loss": 1.5762, + "step": 54005 + }, + { + "epoch": 9.6, + "learning_rate": 1.800651851851852e-05, + "loss": 1.5771, + "step": 54010 + }, + { + "epoch": 9.6, + "learning_rate": 1.8003555555555556e-05, + "loss": 1.4852, + "step": 54015 + }, + { + "epoch": 9.6, + "learning_rate": 1.8000592592592595e-05, + "loss": 1.5645, + "step": 54020 + }, + { + "epoch": 9.6, + "learning_rate": 1.799762962962963e-05, + "loss": 1.5083, + "step": 54025 + }, + { + "epoch": 9.61, + "learning_rate": 1.799466666666667e-05, + "loss": 1.4824, + "step": 54030 + }, + { + "epoch": 9.61, + "learning_rate": 1.7991703703703704e-05, + "loss": 1.6246, + "step": 54035 + }, + { + "epoch": 9.61, + "learning_rate": 1.7988740740740743e-05, + "loss": 1.4044, + "step": 54040 + }, + { + "epoch": 9.61, + "learning_rate": 1.798577777777778e-05, + "loss": 1.5695, + "step": 54045 + }, + { + "epoch": 9.61, + "learning_rate": 1.7982814814814817e-05, + "loss": 1.5076, + "step": 54050 + }, + { + "epoch": 9.61, + "learning_rate": 1.7979851851851853e-05, + "loss": 1.4996, + "step": 54055 + }, + { + "epoch": 9.61, + "learning_rate": 1.7976888888888892e-05, + "loss": 1.553, + "step": 54060 + }, + { + "epoch": 9.61, + "learning_rate": 1.7973925925925927e-05, + "loss": 1.4756, + "step": 54065 + }, + { + "epoch": 9.61, + "learning_rate": 1.7970962962962966e-05, + "loss": 1.5169, + "step": 54070 + }, + { + "epoch": 9.61, + "learning_rate": 1.7968e-05, + "loss": 1.6612, + "step": 54075 + }, + { + "epoch": 9.61, + "learning_rate": 1.7965037037037037e-05, + "loss": 1.4332, + "step": 54080 + }, + { + "epoch": 9.62, + "learning_rate": 1.7962074074074076e-05, + "loss": 1.5928, + "step": 54085 + }, + { + "epoch": 9.62, + "learning_rate": 1.795911111111111e-05, + "loss": 1.5339, + "step": 54090 + }, + { + "epoch": 9.62, + "learning_rate": 1.795614814814815e-05, + "loss": 1.4681, + "step": 54095 + }, + { + "epoch": 9.62, + "learning_rate": 1.7953185185185185e-05, + "loss": 1.4833, + "step": 54100 + }, + { + "epoch": 9.62, + "learning_rate": 1.7950222222222224e-05, + "loss": 1.5484, + "step": 54105 + }, + { + "epoch": 9.62, + "learning_rate": 1.794725925925926e-05, + "loss": 1.4835, + "step": 54110 + }, + { + "epoch": 9.62, + "learning_rate": 1.79442962962963e-05, + "loss": 1.5865, + "step": 54115 + }, + { + "epoch": 9.62, + "learning_rate": 1.7941333333333334e-05, + "loss": 1.4044, + "step": 54120 + }, + { + "epoch": 9.62, + "learning_rate": 1.7938370370370373e-05, + "loss": 1.5587, + "step": 54125 + }, + { + "epoch": 9.62, + "learning_rate": 1.7935407407407408e-05, + "loss": 1.4233, + "step": 54130 + }, + { + "epoch": 9.62, + "learning_rate": 1.7932444444444447e-05, + "loss": 1.6024, + "step": 54135 + }, + { + "epoch": 9.62, + "learning_rate": 1.7929481481481482e-05, + "loss": 1.4617, + "step": 54140 + }, + { + "epoch": 9.63, + "learning_rate": 1.792651851851852e-05, + "loss": 1.4568, + "step": 54145 + }, + { + "epoch": 9.63, + "learning_rate": 1.7923555555555557e-05, + "loss": 1.5017, + "step": 54150 + }, + { + "epoch": 9.63, + "learning_rate": 1.7920592592592595e-05, + "loss": 1.5826, + "step": 54155 + }, + { + "epoch": 9.63, + "learning_rate": 1.791762962962963e-05, + "loss": 1.5808, + "step": 54160 + }, + { + "epoch": 9.63, + "learning_rate": 1.791466666666667e-05, + "loss": 1.4591, + "step": 54165 + }, + { + "epoch": 9.63, + "learning_rate": 1.7911703703703705e-05, + "loss": 1.5615, + "step": 54170 + }, + { + "epoch": 9.63, + "learning_rate": 1.7908740740740744e-05, + "loss": 1.4873, + "step": 54175 + }, + { + "epoch": 9.63, + "learning_rate": 1.790577777777778e-05, + "loss": 1.5246, + "step": 54180 + }, + { + "epoch": 9.63, + "learning_rate": 1.7902814814814818e-05, + "loss": 1.4016, + "step": 54185 + }, + { + "epoch": 9.63, + "learning_rate": 1.7899851851851854e-05, + "loss": 1.6419, + "step": 54190 + }, + { + "epoch": 9.63, + "learning_rate": 1.789688888888889e-05, + "loss": 1.4813, + "step": 54195 + }, + { + "epoch": 9.64, + "learning_rate": 1.7893925925925928e-05, + "loss": 1.5284, + "step": 54200 + }, + { + "epoch": 9.64, + "learning_rate": 1.7890962962962963e-05, + "loss": 1.4954, + "step": 54205 + }, + { + "epoch": 9.64, + "learning_rate": 1.7888000000000002e-05, + "loss": 1.4813, + "step": 54210 + }, + { + "epoch": 9.64, + "learning_rate": 1.7885037037037038e-05, + "loss": 1.5154, + "step": 54215 + }, + { + "epoch": 9.64, + "learning_rate": 1.7882074074074076e-05, + "loss": 1.5826, + "step": 54220 + }, + { + "epoch": 9.64, + "learning_rate": 1.7879111111111112e-05, + "loss": 1.5873, + "step": 54225 + }, + { + "epoch": 9.64, + "learning_rate": 1.787614814814815e-05, + "loss": 1.546, + "step": 54230 + }, + { + "epoch": 9.64, + "learning_rate": 1.7873185185185186e-05, + "loss": 1.5245, + "step": 54235 + }, + { + "epoch": 9.64, + "learning_rate": 1.7870222222222225e-05, + "loss": 1.4937, + "step": 54240 + }, + { + "epoch": 9.64, + "learning_rate": 1.786725925925926e-05, + "loss": 1.608, + "step": 54245 + }, + { + "epoch": 9.64, + "learning_rate": 1.78642962962963e-05, + "loss": 1.5129, + "step": 54250 + }, + { + "epoch": 9.65, + "learning_rate": 1.7861333333333335e-05, + "loss": 1.5883, + "step": 54255 + }, + { + "epoch": 9.65, + "learning_rate": 1.7858370370370373e-05, + "loss": 1.5027, + "step": 54260 + }, + { + "epoch": 9.65, + "learning_rate": 1.785540740740741e-05, + "loss": 1.6329, + "step": 54265 + }, + { + "epoch": 9.65, + "learning_rate": 1.7852444444444448e-05, + "loss": 1.4551, + "step": 54270 + }, + { + "epoch": 9.65, + "learning_rate": 1.7849481481481483e-05, + "loss": 1.5784, + "step": 54275 + }, + { + "epoch": 9.65, + "learning_rate": 1.7846518518518522e-05, + "loss": 1.4644, + "step": 54280 + }, + { + "epoch": 9.65, + "learning_rate": 1.7843555555555554e-05, + "loss": 1.5849, + "step": 54285 + }, + { + "epoch": 9.65, + "learning_rate": 1.7840592592592593e-05, + "loss": 1.5397, + "step": 54290 + }, + { + "epoch": 9.65, + "learning_rate": 1.7837629629629628e-05, + "loss": 1.5748, + "step": 54295 + }, + { + "epoch": 9.65, + "learning_rate": 1.7834666666666667e-05, + "loss": 1.5966, + "step": 54300 + }, + { + "epoch": 9.65, + "learning_rate": 1.7831703703703703e-05, + "loss": 1.4973, + "step": 54305 + }, + { + "epoch": 9.66, + "learning_rate": 1.782874074074074e-05, + "loss": 1.5531, + "step": 54310 + }, + { + "epoch": 9.66, + "learning_rate": 1.7825777777777777e-05, + "loss": 1.5604, + "step": 54315 + }, + { + "epoch": 9.66, + "learning_rate": 1.7822814814814816e-05, + "loss": 1.4931, + "step": 54320 + }, + { + "epoch": 9.66, + "learning_rate": 1.781985185185185e-05, + "loss": 1.6302, + "step": 54325 + }, + { + "epoch": 9.66, + "learning_rate": 1.781688888888889e-05, + "loss": 1.5227, + "step": 54330 + }, + { + "epoch": 9.66, + "learning_rate": 1.7813925925925925e-05, + "loss": 1.5484, + "step": 54335 + }, + { + "epoch": 9.66, + "learning_rate": 1.7810962962962964e-05, + "loss": 1.5015, + "step": 54340 + }, + { + "epoch": 9.66, + "learning_rate": 1.7808e-05, + "loss": 1.7295, + "step": 54345 + }, + { + "epoch": 9.66, + "learning_rate": 1.780503703703704e-05, + "loss": 1.6659, + "step": 54350 + }, + { + "epoch": 9.66, + "learning_rate": 1.7802074074074074e-05, + "loss": 1.435, + "step": 54355 + }, + { + "epoch": 9.66, + "learning_rate": 1.779911111111111e-05, + "loss": 1.4806, + "step": 54360 + }, + { + "epoch": 9.66, + "learning_rate": 1.7796148148148148e-05, + "loss": 1.4258, + "step": 54365 + }, + { + "epoch": 9.67, + "learning_rate": 1.7793185185185184e-05, + "loss": 1.6189, + "step": 54370 + }, + { + "epoch": 9.67, + "learning_rate": 1.7790222222222222e-05, + "loss": 1.4716, + "step": 54375 + }, + { + "epoch": 9.67, + "learning_rate": 1.7787259259259258e-05, + "loss": 1.4184, + "step": 54380 + }, + { + "epoch": 9.67, + "learning_rate": 1.7784296296296297e-05, + "loss": 1.5484, + "step": 54385 + }, + { + "epoch": 9.67, + "learning_rate": 1.7781333333333332e-05, + "loss": 1.4738, + "step": 54390 + }, + { + "epoch": 9.67, + "learning_rate": 1.777837037037037e-05, + "loss": 1.5376, + "step": 54395 + }, + { + "epoch": 9.67, + "learning_rate": 1.7775407407407406e-05, + "loss": 1.5581, + "step": 54400 + }, + { + "epoch": 9.67, + "learning_rate": 1.7772444444444445e-05, + "loss": 1.4833, + "step": 54405 + }, + { + "epoch": 9.67, + "learning_rate": 1.776948148148148e-05, + "loss": 1.6388, + "step": 54410 + }, + { + "epoch": 9.67, + "learning_rate": 1.776651851851852e-05, + "loss": 1.5379, + "step": 54415 + }, + { + "epoch": 9.67, + "learning_rate": 1.7763555555555555e-05, + "loss": 1.4686, + "step": 54420 + }, + { + "epoch": 9.68, + "learning_rate": 1.7760592592592594e-05, + "loss": 1.5835, + "step": 54425 + }, + { + "epoch": 9.68, + "learning_rate": 1.775762962962963e-05, + "loss": 1.5437, + "step": 54430 + }, + { + "epoch": 9.68, + "learning_rate": 1.7754666666666668e-05, + "loss": 1.4519, + "step": 54435 + }, + { + "epoch": 9.68, + "learning_rate": 1.7751703703703703e-05, + "loss": 1.4027, + "step": 54440 + }, + { + "epoch": 9.68, + "learning_rate": 1.7748740740740742e-05, + "loss": 1.4697, + "step": 54445 + }, + { + "epoch": 9.68, + "learning_rate": 1.7745777777777778e-05, + "loss": 1.5065, + "step": 54450 + }, + { + "epoch": 9.68, + "learning_rate": 1.7742814814814816e-05, + "loss": 1.5559, + "step": 54455 + }, + { + "epoch": 9.68, + "learning_rate": 1.7739851851851852e-05, + "loss": 1.6508, + "step": 54460 + }, + { + "epoch": 9.68, + "learning_rate": 1.773688888888889e-05, + "loss": 1.5425, + "step": 54465 + }, + { + "epoch": 9.68, + "learning_rate": 1.7733925925925926e-05, + "loss": 1.3459, + "step": 54470 + }, + { + "epoch": 9.68, + "learning_rate": 1.773096296296296e-05, + "loss": 1.5579, + "step": 54475 + }, + { + "epoch": 9.69, + "learning_rate": 1.7728e-05, + "loss": 1.5081, + "step": 54480 + }, + { + "epoch": 9.69, + "learning_rate": 1.7725037037037036e-05, + "loss": 1.5215, + "step": 54485 + }, + { + "epoch": 9.69, + "learning_rate": 1.7722074074074075e-05, + "loss": 1.5532, + "step": 54490 + }, + { + "epoch": 9.69, + "learning_rate": 1.771911111111111e-05, + "loss": 1.5331, + "step": 54495 + }, + { + "epoch": 9.69, + "learning_rate": 1.771614814814815e-05, + "loss": 1.4741, + "step": 54500 + }, + { + "epoch": 9.69, + "learning_rate": 1.7713185185185184e-05, + "loss": 1.5276, + "step": 54505 + }, + { + "epoch": 9.69, + "learning_rate": 1.7710222222222223e-05, + "loss": 1.6184, + "step": 54510 + }, + { + "epoch": 9.69, + "learning_rate": 1.770725925925926e-05, + "loss": 1.5541, + "step": 54515 + }, + { + "epoch": 9.69, + "learning_rate": 1.7704296296296297e-05, + "loss": 1.5962, + "step": 54520 + }, + { + "epoch": 9.69, + "learning_rate": 1.7701333333333333e-05, + "loss": 1.5272, + "step": 54525 + }, + { + "epoch": 9.69, + "learning_rate": 1.769837037037037e-05, + "loss": 1.5855, + "step": 54530 + }, + { + "epoch": 9.7, + "learning_rate": 1.7695407407407407e-05, + "loss": 1.4996, + "step": 54535 + }, + { + "epoch": 9.7, + "learning_rate": 1.7692444444444446e-05, + "loss": 1.4205, + "step": 54540 + }, + { + "epoch": 9.7, + "learning_rate": 1.768948148148148e-05, + "loss": 1.4455, + "step": 54545 + }, + { + "epoch": 9.7, + "learning_rate": 1.768651851851852e-05, + "loss": 1.5921, + "step": 54550 + }, + { + "epoch": 9.7, + "learning_rate": 1.7683555555555556e-05, + "loss": 1.4926, + "step": 54555 + }, + { + "epoch": 9.7, + "learning_rate": 1.7680592592592594e-05, + "loss": 1.5612, + "step": 54560 + }, + { + "epoch": 9.7, + "learning_rate": 1.767762962962963e-05, + "loss": 1.5093, + "step": 54565 + }, + { + "epoch": 9.7, + "learning_rate": 1.767466666666667e-05, + "loss": 1.5879, + "step": 54570 + }, + { + "epoch": 9.7, + "learning_rate": 1.7671703703703704e-05, + "loss": 1.4768, + "step": 54575 + }, + { + "epoch": 9.7, + "learning_rate": 1.7668740740740743e-05, + "loss": 1.5834, + "step": 54580 + }, + { + "epoch": 9.7, + "learning_rate": 1.766577777777778e-05, + "loss": 1.5731, + "step": 54585 + }, + { + "epoch": 9.7, + "learning_rate": 1.7662814814814814e-05, + "loss": 1.4477, + "step": 54590 + }, + { + "epoch": 9.71, + "learning_rate": 1.7659851851851853e-05, + "loss": 1.5977, + "step": 54595 + }, + { + "epoch": 9.71, + "learning_rate": 1.7656888888888888e-05, + "loss": 1.4965, + "step": 54600 + }, + { + "epoch": 9.71, + "learning_rate": 1.7653925925925927e-05, + "loss": 1.5664, + "step": 54605 + }, + { + "epoch": 9.71, + "learning_rate": 1.7650962962962962e-05, + "loss": 1.5256, + "step": 54610 + }, + { + "epoch": 9.71, + "learning_rate": 1.7648e-05, + "loss": 1.546, + "step": 54615 + }, + { + "epoch": 9.71, + "learning_rate": 1.7645037037037037e-05, + "loss": 1.525, + "step": 54620 + }, + { + "epoch": 9.71, + "learning_rate": 1.7642074074074075e-05, + "loss": 1.6668, + "step": 54625 + }, + { + "epoch": 9.71, + "learning_rate": 1.763911111111111e-05, + "loss": 1.5191, + "step": 54630 + }, + { + "epoch": 9.71, + "learning_rate": 1.763614814814815e-05, + "loss": 1.4967, + "step": 54635 + }, + { + "epoch": 9.71, + "learning_rate": 1.7633185185185185e-05, + "loss": 1.5297, + "step": 54640 + }, + { + "epoch": 9.71, + "learning_rate": 1.7630222222222224e-05, + "loss": 1.4105, + "step": 54645 + }, + { + "epoch": 9.72, + "learning_rate": 1.762725925925926e-05, + "loss": 1.4492, + "step": 54650 + }, + { + "epoch": 9.72, + "learning_rate": 1.7624296296296298e-05, + "loss": 1.5065, + "step": 54655 + }, + { + "epoch": 9.72, + "learning_rate": 1.7621333333333334e-05, + "loss": 1.6214, + "step": 54660 + }, + { + "epoch": 9.72, + "learning_rate": 1.7618370370370372e-05, + "loss": 1.6602, + "step": 54665 + }, + { + "epoch": 9.72, + "learning_rate": 1.7615407407407408e-05, + "loss": 1.47, + "step": 54670 + }, + { + "epoch": 9.72, + "learning_rate": 1.7612444444444447e-05, + "loss": 1.5095, + "step": 54675 + }, + { + "epoch": 9.72, + "learning_rate": 1.7609481481481482e-05, + "loss": 1.5392, + "step": 54680 + }, + { + "epoch": 9.72, + "learning_rate": 1.760651851851852e-05, + "loss": 1.4964, + "step": 54685 + }, + { + "epoch": 9.72, + "learning_rate": 1.7603555555555556e-05, + "loss": 1.5416, + "step": 54690 + }, + { + "epoch": 9.72, + "learning_rate": 1.7600592592592595e-05, + "loss": 1.5021, + "step": 54695 + }, + { + "epoch": 9.72, + "learning_rate": 1.759762962962963e-05, + "loss": 1.4246, + "step": 54700 + }, + { + "epoch": 9.73, + "learning_rate": 1.7594666666666666e-05, + "loss": 1.4804, + "step": 54705 + }, + { + "epoch": 9.73, + "learning_rate": 1.7591703703703705e-05, + "loss": 1.6877, + "step": 54710 + }, + { + "epoch": 9.73, + "learning_rate": 1.758874074074074e-05, + "loss": 1.6503, + "step": 54715 + }, + { + "epoch": 9.73, + "learning_rate": 1.758577777777778e-05, + "loss": 1.5562, + "step": 54720 + }, + { + "epoch": 9.73, + "learning_rate": 1.7582814814814815e-05, + "loss": 1.5744, + "step": 54725 + }, + { + "epoch": 9.73, + "learning_rate": 1.7579851851851853e-05, + "loss": 1.495, + "step": 54730 + }, + { + "epoch": 9.73, + "learning_rate": 1.757688888888889e-05, + "loss": 1.5209, + "step": 54735 + }, + { + "epoch": 9.73, + "learning_rate": 1.7573925925925928e-05, + "loss": 1.5309, + "step": 54740 + }, + { + "epoch": 9.73, + "learning_rate": 1.7570962962962963e-05, + "loss": 1.3946, + "step": 54745 + }, + { + "epoch": 9.73, + "learning_rate": 1.7568000000000002e-05, + "loss": 1.5188, + "step": 54750 + }, + { + "epoch": 9.73, + "learning_rate": 1.7565037037037037e-05, + "loss": 1.4692, + "step": 54755 + }, + { + "epoch": 9.74, + "learning_rate": 1.7562074074074076e-05, + "loss": 1.5176, + "step": 54760 + }, + { + "epoch": 9.74, + "learning_rate": 1.755911111111111e-05, + "loss": 1.4949, + "step": 54765 + }, + { + "epoch": 9.74, + "learning_rate": 1.755614814814815e-05, + "loss": 1.4614, + "step": 54770 + }, + { + "epoch": 9.74, + "learning_rate": 1.7553185185185186e-05, + "loss": 1.6215, + "step": 54775 + }, + { + "epoch": 9.74, + "learning_rate": 1.7550222222222225e-05, + "loss": 1.6982, + "step": 54780 + }, + { + "epoch": 9.74, + "learning_rate": 1.754725925925926e-05, + "loss": 1.5741, + "step": 54785 + }, + { + "epoch": 9.74, + "learning_rate": 1.75442962962963e-05, + "loss": 1.7009, + "step": 54790 + }, + { + "epoch": 9.74, + "learning_rate": 1.7541333333333334e-05, + "loss": 1.5616, + "step": 54795 + }, + { + "epoch": 9.74, + "learning_rate": 1.7538370370370373e-05, + "loss": 1.6574, + "step": 54800 + }, + { + "epoch": 9.74, + "learning_rate": 1.753540740740741e-05, + "loss": 1.5594, + "step": 54805 + }, + { + "epoch": 9.74, + "learning_rate": 1.7532444444444447e-05, + "loss": 1.6708, + "step": 54810 + }, + { + "epoch": 9.74, + "learning_rate": 1.7529481481481483e-05, + "loss": 1.5441, + "step": 54815 + }, + { + "epoch": 9.75, + "learning_rate": 1.7526518518518518e-05, + "loss": 1.4444, + "step": 54820 + }, + { + "epoch": 9.75, + "learning_rate": 1.7523555555555557e-05, + "loss": 1.5511, + "step": 54825 + }, + { + "epoch": 9.75, + "learning_rate": 1.7520592592592593e-05, + "loss": 1.497, + "step": 54830 + }, + { + "epoch": 9.75, + "learning_rate": 1.751762962962963e-05, + "loss": 1.5138, + "step": 54835 + }, + { + "epoch": 9.75, + "learning_rate": 1.7514666666666667e-05, + "loss": 1.5818, + "step": 54840 + }, + { + "epoch": 9.75, + "learning_rate": 1.7511703703703706e-05, + "loss": 1.4784, + "step": 54845 + }, + { + "epoch": 9.75, + "learning_rate": 1.750874074074074e-05, + "loss": 1.6055, + "step": 54850 + }, + { + "epoch": 9.75, + "learning_rate": 1.750577777777778e-05, + "loss": 1.574, + "step": 54855 + }, + { + "epoch": 9.75, + "learning_rate": 1.7502814814814815e-05, + "loss": 1.4169, + "step": 54860 + }, + { + "epoch": 9.75, + "learning_rate": 1.7499851851851854e-05, + "loss": 1.5426, + "step": 54865 + }, + { + "epoch": 9.75, + "learning_rate": 1.749688888888889e-05, + "loss": 1.5598, + "step": 54870 + }, + { + "epoch": 9.76, + "learning_rate": 1.749392592592593e-05, + "loss": 1.5255, + "step": 54875 + }, + { + "epoch": 9.76, + "learning_rate": 1.7490962962962964e-05, + "loss": 1.4475, + "step": 54880 + }, + { + "epoch": 9.76, + "learning_rate": 1.7488000000000003e-05, + "loss": 1.6035, + "step": 54885 + }, + { + "epoch": 9.76, + "learning_rate": 1.7485037037037038e-05, + "loss": 1.4921, + "step": 54890 + }, + { + "epoch": 9.76, + "learning_rate": 1.7482074074074077e-05, + "loss": 1.6154, + "step": 54895 + }, + { + "epoch": 9.76, + "learning_rate": 1.7479111111111112e-05, + "loss": 1.42, + "step": 54900 + }, + { + "epoch": 9.76, + "learning_rate": 1.747614814814815e-05, + "loss": 1.4745, + "step": 54905 + }, + { + "epoch": 9.76, + "learning_rate": 1.7473185185185187e-05, + "loss": 1.5056, + "step": 54910 + }, + { + "epoch": 9.76, + "learning_rate": 1.7470222222222225e-05, + "loss": 1.6073, + "step": 54915 + }, + { + "epoch": 9.76, + "learning_rate": 1.746725925925926e-05, + "loss": 1.5133, + "step": 54920 + }, + { + "epoch": 9.76, + "learning_rate": 1.74642962962963e-05, + "loss": 1.5787, + "step": 54925 + }, + { + "epoch": 9.77, + "learning_rate": 1.7461333333333335e-05, + "loss": 1.4685, + "step": 54930 + }, + { + "epoch": 9.77, + "learning_rate": 1.745837037037037e-05, + "loss": 1.4727, + "step": 54935 + }, + { + "epoch": 9.77, + "learning_rate": 1.745540740740741e-05, + "loss": 1.6172, + "step": 54940 + }, + { + "epoch": 9.77, + "learning_rate": 1.7452444444444445e-05, + "loss": 1.4875, + "step": 54945 + }, + { + "epoch": 9.77, + "learning_rate": 1.7449481481481484e-05, + "loss": 1.5996, + "step": 54950 + }, + { + "epoch": 9.77, + "learning_rate": 1.744651851851852e-05, + "loss": 1.5735, + "step": 54955 + }, + { + "epoch": 9.77, + "learning_rate": 1.7443555555555558e-05, + "loss": 1.5212, + "step": 54960 + }, + { + "epoch": 9.77, + "learning_rate": 1.7440592592592593e-05, + "loss": 1.4293, + "step": 54965 + }, + { + "epoch": 9.77, + "learning_rate": 1.7437629629629632e-05, + "loss": 1.5809, + "step": 54970 + }, + { + "epoch": 9.77, + "learning_rate": 1.7434666666666668e-05, + "loss": 1.4453, + "step": 54975 + }, + { + "epoch": 9.77, + "learning_rate": 1.7431703703703706e-05, + "loss": 1.4591, + "step": 54980 + }, + { + "epoch": 9.78, + "learning_rate": 1.7428740740740742e-05, + "loss": 1.4652, + "step": 54985 + }, + { + "epoch": 9.78, + "learning_rate": 1.742577777777778e-05, + "loss": 1.5661, + "step": 54990 + }, + { + "epoch": 9.78, + "learning_rate": 1.7422814814814816e-05, + "loss": 1.4299, + "step": 54995 + }, + { + "epoch": 9.78, + "learning_rate": 1.7419851851851855e-05, + "loss": 1.6477, + "step": 55000 + }, + { + "epoch": 9.78, + "learning_rate": 1.741688888888889e-05, + "loss": 1.4675, + "step": 55005 + }, + { + "epoch": 9.78, + "learning_rate": 1.741392592592593e-05, + "loss": 1.5621, + "step": 55010 + }, + { + "epoch": 9.78, + "learning_rate": 1.7410962962962965e-05, + "loss": 1.5241, + "step": 55015 + }, + { + "epoch": 9.78, + "learning_rate": 1.7408000000000003e-05, + "loss": 1.636, + "step": 55020 + }, + { + "epoch": 9.78, + "learning_rate": 1.740503703703704e-05, + "loss": 1.5965, + "step": 55025 + }, + { + "epoch": 9.78, + "learning_rate": 1.7402074074074078e-05, + "loss": 1.6819, + "step": 55030 + }, + { + "epoch": 9.78, + "learning_rate": 1.7399111111111113e-05, + "loss": 1.5361, + "step": 55035 + }, + { + "epoch": 9.78, + "learning_rate": 1.7396148148148152e-05, + "loss": 1.564, + "step": 55040 + }, + { + "epoch": 9.79, + "learning_rate": 1.7393185185185187e-05, + "loss": 1.4741, + "step": 55045 + }, + { + "epoch": 9.79, + "learning_rate": 1.7390222222222223e-05, + "loss": 1.5171, + "step": 55050 + }, + { + "epoch": 9.79, + "learning_rate": 1.738725925925926e-05, + "loss": 1.5494, + "step": 55055 + }, + { + "epoch": 9.79, + "learning_rate": 1.7384296296296297e-05, + "loss": 1.5259, + "step": 55060 + }, + { + "epoch": 9.79, + "learning_rate": 1.7381333333333332e-05, + "loss": 1.5216, + "step": 55065 + }, + { + "epoch": 9.79, + "learning_rate": 1.737837037037037e-05, + "loss": 1.5086, + "step": 55070 + }, + { + "epoch": 9.79, + "learning_rate": 1.7375407407407407e-05, + "loss": 1.3518, + "step": 55075 + }, + { + "epoch": 9.79, + "learning_rate": 1.7372444444444446e-05, + "loss": 1.4738, + "step": 55080 + }, + { + "epoch": 9.79, + "learning_rate": 1.736948148148148e-05, + "loss": 1.5763, + "step": 55085 + }, + { + "epoch": 9.79, + "learning_rate": 1.7366518518518516e-05, + "loss": 1.606, + "step": 55090 + }, + { + "epoch": 9.79, + "learning_rate": 1.7363555555555555e-05, + "loss": 1.4934, + "step": 55095 + }, + { + "epoch": 9.8, + "learning_rate": 1.736059259259259e-05, + "loss": 1.5381, + "step": 55100 + }, + { + "epoch": 9.8, + "learning_rate": 1.735762962962963e-05, + "loss": 1.4844, + "step": 55105 + }, + { + "epoch": 9.8, + "learning_rate": 1.7354666666666665e-05, + "loss": 1.5998, + "step": 55110 + }, + { + "epoch": 9.8, + "learning_rate": 1.7351703703703704e-05, + "loss": 1.4209, + "step": 55115 + }, + { + "epoch": 9.8, + "learning_rate": 1.734874074074074e-05, + "loss": 1.4938, + "step": 55120 + }, + { + "epoch": 9.8, + "learning_rate": 1.7345777777777778e-05, + "loss": 1.5149, + "step": 55125 + }, + { + "epoch": 9.8, + "learning_rate": 1.7342814814814813e-05, + "loss": 1.5591, + "step": 55130 + }, + { + "epoch": 9.8, + "learning_rate": 1.7339851851851852e-05, + "loss": 1.4319, + "step": 55135 + }, + { + "epoch": 9.8, + "learning_rate": 1.7336888888888888e-05, + "loss": 1.5793, + "step": 55140 + }, + { + "epoch": 9.8, + "learning_rate": 1.7333925925925927e-05, + "loss": 1.4591, + "step": 55145 + }, + { + "epoch": 9.8, + "learning_rate": 1.7330962962962962e-05, + "loss": 1.6139, + "step": 55150 + }, + { + "epoch": 9.81, + "learning_rate": 1.7328e-05, + "loss": 1.5746, + "step": 55155 + }, + { + "epoch": 9.81, + "learning_rate": 1.7325037037037036e-05, + "loss": 1.3921, + "step": 55160 + }, + { + "epoch": 9.81, + "learning_rate": 1.7322074074074075e-05, + "loss": 1.5366, + "step": 55165 + }, + { + "epoch": 9.81, + "learning_rate": 1.731911111111111e-05, + "loss": 1.5098, + "step": 55170 + }, + { + "epoch": 9.81, + "learning_rate": 1.731614814814815e-05, + "loss": 1.5341, + "step": 55175 + }, + { + "epoch": 9.81, + "learning_rate": 1.7313185185185185e-05, + "loss": 1.5865, + "step": 55180 + }, + { + "epoch": 9.81, + "learning_rate": 1.7310222222222224e-05, + "loss": 1.5945, + "step": 55185 + }, + { + "epoch": 9.81, + "learning_rate": 1.730725925925926e-05, + "loss": 1.4708, + "step": 55190 + }, + { + "epoch": 9.81, + "learning_rate": 1.7304296296296298e-05, + "loss": 1.6234, + "step": 55195 + }, + { + "epoch": 9.81, + "learning_rate": 1.7301333333333333e-05, + "loss": 1.5696, + "step": 55200 + }, + { + "epoch": 9.81, + "learning_rate": 1.729837037037037e-05, + "loss": 1.4576, + "step": 55205 + }, + { + "epoch": 9.82, + "learning_rate": 1.7295407407407408e-05, + "loss": 1.4692, + "step": 55210 + }, + { + "epoch": 9.82, + "learning_rate": 1.7292444444444443e-05, + "loss": 1.3785, + "step": 55215 + }, + { + "epoch": 9.82, + "learning_rate": 1.7289481481481482e-05, + "loss": 1.3889, + "step": 55220 + }, + { + "epoch": 9.82, + "learning_rate": 1.7286518518518517e-05, + "loss": 1.529, + "step": 55225 + }, + { + "epoch": 9.82, + "learning_rate": 1.7283555555555556e-05, + "loss": 1.6058, + "step": 55230 + }, + { + "epoch": 9.82, + "learning_rate": 1.728059259259259e-05, + "loss": 1.54, + "step": 55235 + }, + { + "epoch": 9.82, + "learning_rate": 1.727762962962963e-05, + "loss": 1.5099, + "step": 55240 + }, + { + "epoch": 9.82, + "learning_rate": 1.7274666666666666e-05, + "loss": 1.522, + "step": 55245 + }, + { + "epoch": 9.82, + "learning_rate": 1.7271703703703705e-05, + "loss": 1.5237, + "step": 55250 + }, + { + "epoch": 9.82, + "learning_rate": 1.726874074074074e-05, + "loss": 1.5013, + "step": 55255 + }, + { + "epoch": 9.82, + "learning_rate": 1.726577777777778e-05, + "loss": 1.5111, + "step": 55260 + }, + { + "epoch": 9.82, + "learning_rate": 1.7262814814814814e-05, + "loss": 1.5267, + "step": 55265 + }, + { + "epoch": 9.83, + "learning_rate": 1.7259851851851853e-05, + "loss": 1.4933, + "step": 55270 + }, + { + "epoch": 9.83, + "learning_rate": 1.725688888888889e-05, + "loss": 1.5465, + "step": 55275 + }, + { + "epoch": 9.83, + "learning_rate": 1.7253925925925927e-05, + "loss": 1.595, + "step": 55280 + }, + { + "epoch": 9.83, + "learning_rate": 1.7250962962962963e-05, + "loss": 1.4557, + "step": 55285 + }, + { + "epoch": 9.83, + "learning_rate": 1.7248e-05, + "loss": 1.5768, + "step": 55290 + }, + { + "epoch": 9.83, + "learning_rate": 1.7245037037037037e-05, + "loss": 1.6103, + "step": 55295 + }, + { + "epoch": 9.83, + "learning_rate": 1.7242074074074076e-05, + "loss": 1.5136, + "step": 55300 + }, + { + "epoch": 9.83, + "learning_rate": 1.723911111111111e-05, + "loss": 1.5994, + "step": 55305 + }, + { + "epoch": 9.83, + "learning_rate": 1.723614814814815e-05, + "loss": 1.4701, + "step": 55310 + }, + { + "epoch": 9.83, + "learning_rate": 1.7233185185185186e-05, + "loss": 1.5971, + "step": 55315 + }, + { + "epoch": 9.83, + "learning_rate": 1.723022222222222e-05, + "loss": 1.6812, + "step": 55320 + }, + { + "epoch": 9.84, + "learning_rate": 1.722725925925926e-05, + "loss": 1.5619, + "step": 55325 + }, + { + "epoch": 9.84, + "learning_rate": 1.7224296296296295e-05, + "loss": 1.5565, + "step": 55330 + }, + { + "epoch": 9.84, + "learning_rate": 1.7221333333333334e-05, + "loss": 1.5861, + "step": 55335 + }, + { + "epoch": 9.84, + "learning_rate": 1.721837037037037e-05, + "loss": 1.4722, + "step": 55340 + }, + { + "epoch": 9.84, + "learning_rate": 1.7215407407407408e-05, + "loss": 1.5776, + "step": 55345 + }, + { + "epoch": 9.84, + "learning_rate": 1.7212444444444444e-05, + "loss": 1.5992, + "step": 55350 + }, + { + "epoch": 9.84, + "learning_rate": 1.7209481481481483e-05, + "loss": 1.4286, + "step": 55355 + }, + { + "epoch": 9.84, + "learning_rate": 1.7206518518518518e-05, + "loss": 1.5741, + "step": 55360 + }, + { + "epoch": 9.84, + "learning_rate": 1.7203555555555557e-05, + "loss": 1.6209, + "step": 55365 + }, + { + "epoch": 9.84, + "learning_rate": 1.7200592592592592e-05, + "loss": 1.4816, + "step": 55370 + }, + { + "epoch": 9.84, + "learning_rate": 1.719762962962963e-05, + "loss": 1.4475, + "step": 55375 + }, + { + "epoch": 9.85, + "learning_rate": 1.7194666666666666e-05, + "loss": 1.4962, + "step": 55380 + }, + { + "epoch": 9.85, + "learning_rate": 1.7191703703703705e-05, + "loss": 1.3798, + "step": 55385 + }, + { + "epoch": 9.85, + "learning_rate": 1.718874074074074e-05, + "loss": 1.7109, + "step": 55390 + }, + { + "epoch": 9.85, + "learning_rate": 1.718577777777778e-05, + "loss": 1.3526, + "step": 55395 + }, + { + "epoch": 9.85, + "learning_rate": 1.7182814814814815e-05, + "loss": 1.5241, + "step": 55400 + }, + { + "epoch": 9.85, + "learning_rate": 1.7179851851851854e-05, + "loss": 1.4618, + "step": 55405 + }, + { + "epoch": 9.85, + "learning_rate": 1.717688888888889e-05, + "loss": 1.5312, + "step": 55410 + }, + { + "epoch": 9.85, + "learning_rate": 1.7173925925925928e-05, + "loss": 1.5948, + "step": 55415 + }, + { + "epoch": 9.85, + "learning_rate": 1.7170962962962964e-05, + "loss": 1.4865, + "step": 55420 + }, + { + "epoch": 9.85, + "learning_rate": 1.7168000000000002e-05, + "loss": 1.4777, + "step": 55425 + }, + { + "epoch": 9.85, + "learning_rate": 1.7165037037037038e-05, + "loss": 1.4915, + "step": 55430 + }, + { + "epoch": 9.86, + "learning_rate": 1.7162074074074073e-05, + "loss": 1.5157, + "step": 55435 + }, + { + "epoch": 9.86, + "learning_rate": 1.7159111111111112e-05, + "loss": 1.5839, + "step": 55440 + }, + { + "epoch": 9.86, + "learning_rate": 1.7156148148148147e-05, + "loss": 1.5197, + "step": 55445 + }, + { + "epoch": 9.86, + "learning_rate": 1.7153185185185186e-05, + "loss": 1.459, + "step": 55450 + }, + { + "epoch": 9.86, + "learning_rate": 1.7150222222222222e-05, + "loss": 1.5412, + "step": 55455 + }, + { + "epoch": 9.86, + "learning_rate": 1.714725925925926e-05, + "loss": 1.5439, + "step": 55460 + }, + { + "epoch": 9.86, + "learning_rate": 1.7144296296296296e-05, + "loss": 1.4996, + "step": 55465 + }, + { + "epoch": 9.86, + "learning_rate": 1.7141333333333335e-05, + "loss": 1.556, + "step": 55470 + }, + { + "epoch": 9.86, + "learning_rate": 1.713837037037037e-05, + "loss": 1.4095, + "step": 55475 + }, + { + "epoch": 9.86, + "learning_rate": 1.713540740740741e-05, + "loss": 1.608, + "step": 55480 + }, + { + "epoch": 9.86, + "learning_rate": 1.7132444444444444e-05, + "loss": 1.6704, + "step": 55485 + }, + { + "epoch": 9.86, + "learning_rate": 1.7129481481481483e-05, + "loss": 1.5824, + "step": 55490 + }, + { + "epoch": 9.87, + "learning_rate": 1.712651851851852e-05, + "loss": 1.502, + "step": 55495 + }, + { + "epoch": 9.87, + "learning_rate": 1.7123555555555558e-05, + "loss": 1.5069, + "step": 55500 + }, + { + "epoch": 9.87, + "learning_rate": 1.7120592592592593e-05, + "loss": 1.5466, + "step": 55505 + }, + { + "epoch": 9.87, + "learning_rate": 1.7117629629629632e-05, + "loss": 1.4501, + "step": 55510 + }, + { + "epoch": 9.87, + "learning_rate": 1.7114666666666667e-05, + "loss": 1.5647, + "step": 55515 + }, + { + "epoch": 9.87, + "learning_rate": 1.7111703703703706e-05, + "loss": 1.5654, + "step": 55520 + }, + { + "epoch": 9.87, + "learning_rate": 1.710874074074074e-05, + "loss": 1.6235, + "step": 55525 + }, + { + "epoch": 9.87, + "learning_rate": 1.710577777777778e-05, + "loss": 1.4631, + "step": 55530 + }, + { + "epoch": 9.87, + "learning_rate": 1.7102814814814816e-05, + "loss": 1.6643, + "step": 55535 + }, + { + "epoch": 9.87, + "learning_rate": 1.7099851851851855e-05, + "loss": 1.5965, + "step": 55540 + }, + { + "epoch": 9.87, + "learning_rate": 1.709688888888889e-05, + "loss": 1.597, + "step": 55545 + }, + { + "epoch": 9.88, + "learning_rate": 1.7093925925925925e-05, + "loss": 1.5985, + "step": 55550 + }, + { + "epoch": 9.88, + "learning_rate": 1.7090962962962964e-05, + "loss": 1.6827, + "step": 55555 + }, + { + "epoch": 9.88, + "learning_rate": 1.7088e-05, + "loss": 1.5294, + "step": 55560 + }, + { + "epoch": 9.88, + "learning_rate": 1.708503703703704e-05, + "loss": 1.6238, + "step": 55565 + }, + { + "epoch": 9.88, + "learning_rate": 1.7082074074074074e-05, + "loss": 1.4813, + "step": 55570 + }, + { + "epoch": 9.88, + "learning_rate": 1.7079111111111113e-05, + "loss": 1.4501, + "step": 55575 + }, + { + "epoch": 9.88, + "learning_rate": 1.7076148148148148e-05, + "loss": 1.5369, + "step": 55580 + }, + { + "epoch": 9.88, + "learning_rate": 1.7073185185185187e-05, + "loss": 1.5457, + "step": 55585 + }, + { + "epoch": 9.88, + "learning_rate": 1.7070222222222222e-05, + "loss": 1.5048, + "step": 55590 + }, + { + "epoch": 9.88, + "learning_rate": 1.706725925925926e-05, + "loss": 1.4843, + "step": 55595 + }, + { + "epoch": 9.88, + "learning_rate": 1.7064296296296297e-05, + "loss": 1.5527, + "step": 55600 + }, + { + "epoch": 9.89, + "learning_rate": 1.7061333333333336e-05, + "loss": 1.4307, + "step": 55605 + }, + { + "epoch": 9.89, + "learning_rate": 1.705837037037037e-05, + "loss": 1.6305, + "step": 55610 + }, + { + "epoch": 9.89, + "learning_rate": 1.705540740740741e-05, + "loss": 1.4328, + "step": 55615 + }, + { + "epoch": 9.89, + "learning_rate": 1.7052444444444445e-05, + "loss": 1.4845, + "step": 55620 + }, + { + "epoch": 9.89, + "learning_rate": 1.7049481481481484e-05, + "loss": 1.4451, + "step": 55625 + }, + { + "epoch": 9.89, + "learning_rate": 1.704651851851852e-05, + "loss": 1.5021, + "step": 55630 + }, + { + "epoch": 9.89, + "learning_rate": 1.704355555555556e-05, + "loss": 1.592, + "step": 55635 + }, + { + "epoch": 9.89, + "learning_rate": 1.7040592592592594e-05, + "loss": 1.4752, + "step": 55640 + }, + { + "epoch": 9.89, + "learning_rate": 1.7037629629629633e-05, + "loss": 1.4744, + "step": 55645 + }, + { + "epoch": 9.89, + "learning_rate": 1.7034666666666668e-05, + "loss": 1.6049, + "step": 55650 + }, + { + "epoch": 9.89, + "learning_rate": 1.7031703703703707e-05, + "loss": 1.5754, + "step": 55655 + }, + { + "epoch": 9.9, + "learning_rate": 1.7028740740740742e-05, + "loss": 1.5678, + "step": 55660 + }, + { + "epoch": 9.9, + "learning_rate": 1.7025777777777778e-05, + "loss": 1.5917, + "step": 55665 + }, + { + "epoch": 9.9, + "learning_rate": 1.7022814814814817e-05, + "loss": 1.569, + "step": 55670 + }, + { + "epoch": 9.9, + "learning_rate": 1.7019851851851852e-05, + "loss": 1.5961, + "step": 55675 + }, + { + "epoch": 9.9, + "learning_rate": 1.701688888888889e-05, + "loss": 1.4696, + "step": 55680 + }, + { + "epoch": 9.9, + "learning_rate": 1.7013925925925926e-05, + "loss": 1.5225, + "step": 55685 + }, + { + "epoch": 9.9, + "learning_rate": 1.7010962962962965e-05, + "loss": 1.4792, + "step": 55690 + }, + { + "epoch": 9.9, + "learning_rate": 1.7008e-05, + "loss": 1.5202, + "step": 55695 + }, + { + "epoch": 9.9, + "learning_rate": 1.700503703703704e-05, + "loss": 1.5892, + "step": 55700 + }, + { + "epoch": 9.9, + "learning_rate": 1.7002074074074075e-05, + "loss": 1.5454, + "step": 55705 + }, + { + "epoch": 9.9, + "learning_rate": 1.6999111111111114e-05, + "loss": 1.5368, + "step": 55710 + }, + { + "epoch": 9.9, + "learning_rate": 1.699614814814815e-05, + "loss": 1.543, + "step": 55715 + }, + { + "epoch": 9.91, + "learning_rate": 1.6993185185185188e-05, + "loss": 1.4906, + "step": 55720 + }, + { + "epoch": 9.91, + "learning_rate": 1.6990222222222223e-05, + "loss": 1.5221, + "step": 55725 + }, + { + "epoch": 9.91, + "learning_rate": 1.6987259259259262e-05, + "loss": 1.5514, + "step": 55730 + }, + { + "epoch": 9.91, + "learning_rate": 1.6984296296296297e-05, + "loss": 1.5906, + "step": 55735 + }, + { + "epoch": 9.91, + "learning_rate": 1.6981333333333336e-05, + "loss": 1.5805, + "step": 55740 + }, + { + "epoch": 9.91, + "learning_rate": 1.6978370370370372e-05, + "loss": 1.462, + "step": 55745 + }, + { + "epoch": 9.91, + "learning_rate": 1.697540740740741e-05, + "loss": 1.5429, + "step": 55750 + }, + { + "epoch": 9.91, + "learning_rate": 1.6972444444444446e-05, + "loss": 1.5489, + "step": 55755 + }, + { + "epoch": 9.91, + "learning_rate": 1.6969481481481485e-05, + "loss": 1.6264, + "step": 55760 + }, + { + "epoch": 9.91, + "learning_rate": 1.696651851851852e-05, + "loss": 1.6514, + "step": 55765 + }, + { + "epoch": 9.91, + "learning_rate": 1.696355555555556e-05, + "loss": 1.6208, + "step": 55770 + }, + { + "epoch": 9.92, + "learning_rate": 1.6960592592592595e-05, + "loss": 1.7229, + "step": 55775 + }, + { + "epoch": 9.92, + "learning_rate": 1.695762962962963e-05, + "loss": 1.4408, + "step": 55780 + }, + { + "epoch": 9.92, + "learning_rate": 1.695466666666667e-05, + "loss": 1.6531, + "step": 55785 + }, + { + "epoch": 9.92, + "learning_rate": 1.6951703703703704e-05, + "loss": 1.6879, + "step": 55790 + }, + { + "epoch": 9.92, + "learning_rate": 1.6948740740740743e-05, + "loss": 1.5688, + "step": 55795 + }, + { + "epoch": 9.92, + "learning_rate": 1.694577777777778e-05, + "loss": 1.5016, + "step": 55800 + }, + { + "epoch": 9.92, + "learning_rate": 1.6942814814814817e-05, + "loss": 1.445, + "step": 55805 + }, + { + "epoch": 9.92, + "learning_rate": 1.6939851851851853e-05, + "loss": 1.5127, + "step": 55810 + }, + { + "epoch": 9.92, + "learning_rate": 1.693688888888889e-05, + "loss": 1.4937, + "step": 55815 + }, + { + "epoch": 9.92, + "learning_rate": 1.6933925925925927e-05, + "loss": 1.6443, + "step": 55820 + }, + { + "epoch": 9.92, + "learning_rate": 1.6930962962962966e-05, + "loss": 1.5872, + "step": 55825 + }, + { + "epoch": 9.93, + "learning_rate": 1.6928e-05, + "loss": 1.4814, + "step": 55830 + }, + { + "epoch": 9.93, + "learning_rate": 1.6925037037037037e-05, + "loss": 1.5755, + "step": 55835 + }, + { + "epoch": 9.93, + "learning_rate": 1.6922074074074072e-05, + "loss": 1.5642, + "step": 55840 + }, + { + "epoch": 9.93, + "learning_rate": 1.691911111111111e-05, + "loss": 1.4533, + "step": 55845 + }, + { + "epoch": 9.93, + "learning_rate": 1.6916148148148146e-05, + "loss": 1.5246, + "step": 55850 + }, + { + "epoch": 9.93, + "learning_rate": 1.6913185185185185e-05, + "loss": 1.511, + "step": 55855 + }, + { + "epoch": 9.93, + "learning_rate": 1.691022222222222e-05, + "loss": 1.4451, + "step": 55860 + }, + { + "epoch": 9.93, + "learning_rate": 1.690725925925926e-05, + "loss": 1.4933, + "step": 55865 + }, + { + "epoch": 9.93, + "learning_rate": 1.6904296296296295e-05, + "loss": 1.5376, + "step": 55870 + }, + { + "epoch": 9.93, + "learning_rate": 1.6901333333333334e-05, + "loss": 1.5438, + "step": 55875 + }, + { + "epoch": 9.93, + "learning_rate": 1.689837037037037e-05, + "loss": 1.5084, + "step": 55880 + }, + { + "epoch": 9.94, + "learning_rate": 1.6895407407407408e-05, + "loss": 1.6436, + "step": 55885 + }, + { + "epoch": 9.94, + "learning_rate": 1.6892444444444443e-05, + "loss": 1.5424, + "step": 55890 + }, + { + "epoch": 9.94, + "learning_rate": 1.6889481481481482e-05, + "loss": 1.5215, + "step": 55895 + }, + { + "epoch": 9.94, + "learning_rate": 1.6886518518518518e-05, + "loss": 1.5333, + "step": 55900 + }, + { + "epoch": 9.94, + "learning_rate": 1.6883555555555556e-05, + "loss": 1.5025, + "step": 55905 + }, + { + "epoch": 9.94, + "learning_rate": 1.6880592592592592e-05, + "loss": 1.6064, + "step": 55910 + }, + { + "epoch": 9.94, + "learning_rate": 1.687762962962963e-05, + "loss": 1.456, + "step": 55915 + }, + { + "epoch": 9.94, + "learning_rate": 1.6874666666666666e-05, + "loss": 1.5671, + "step": 55920 + }, + { + "epoch": 9.94, + "learning_rate": 1.6871703703703705e-05, + "loss": 1.4753, + "step": 55925 + }, + { + "epoch": 9.94, + "learning_rate": 1.686874074074074e-05, + "loss": 1.5083, + "step": 55930 + }, + { + "epoch": 9.94, + "learning_rate": 1.686577777777778e-05, + "loss": 1.5953, + "step": 55935 + }, + { + "epoch": 9.94, + "learning_rate": 1.6862814814814815e-05, + "loss": 1.5671, + "step": 55940 + }, + { + "epoch": 9.95, + "learning_rate": 1.685985185185185e-05, + "loss": 1.537, + "step": 55945 + }, + { + "epoch": 9.95, + "learning_rate": 1.685688888888889e-05, + "loss": 1.5699, + "step": 55950 + }, + { + "epoch": 9.95, + "learning_rate": 1.6853925925925924e-05, + "loss": 1.5137, + "step": 55955 + }, + { + "epoch": 9.95, + "learning_rate": 1.6850962962962963e-05, + "loss": 1.5493, + "step": 55960 + }, + { + "epoch": 9.95, + "learning_rate": 1.6848e-05, + "loss": 1.5812, + "step": 55965 + }, + { + "epoch": 9.95, + "learning_rate": 1.6845037037037037e-05, + "loss": 1.5359, + "step": 55970 + }, + { + "epoch": 9.95, + "learning_rate": 1.6842074074074073e-05, + "loss": 1.5635, + "step": 55975 + }, + { + "epoch": 9.95, + "learning_rate": 1.683911111111111e-05, + "loss": 1.5251, + "step": 55980 + }, + { + "epoch": 9.95, + "learning_rate": 1.6836148148148147e-05, + "loss": 1.5709, + "step": 55985 + }, + { + "epoch": 9.95, + "learning_rate": 1.6833185185185186e-05, + "loss": 1.553, + "step": 55990 + }, + { + "epoch": 9.95, + "learning_rate": 1.683022222222222e-05, + "loss": 1.5106, + "step": 55995 + }, + { + "epoch": 9.96, + "learning_rate": 1.682725925925926e-05, + "loss": 1.5495, + "step": 56000 + }, + { + "epoch": 9.96, + "learning_rate": 1.6824296296296296e-05, + "loss": 1.5, + "step": 56005 + }, + { + "epoch": 9.96, + "learning_rate": 1.6821333333333334e-05, + "loss": 1.4419, + "step": 56010 + }, + { + "epoch": 9.96, + "learning_rate": 1.681837037037037e-05, + "loss": 1.711, + "step": 56015 + }, + { + "epoch": 9.96, + "learning_rate": 1.681540740740741e-05, + "loss": 1.5766, + "step": 56020 + }, + { + "epoch": 9.96, + "learning_rate": 1.6812444444444444e-05, + "loss": 1.4804, + "step": 56025 + }, + { + "epoch": 9.96, + "learning_rate": 1.6809481481481483e-05, + "loss": 1.5387, + "step": 56030 + }, + { + "epoch": 9.96, + "learning_rate": 1.680651851851852e-05, + "loss": 1.5965, + "step": 56035 + }, + { + "epoch": 9.96, + "learning_rate": 1.6803555555555557e-05, + "loss": 1.5914, + "step": 56040 + }, + { + "epoch": 9.96, + "learning_rate": 1.6800592592592593e-05, + "loss": 1.5473, + "step": 56045 + }, + { + "epoch": 9.96, + "learning_rate": 1.679762962962963e-05, + "loss": 1.5701, + "step": 56050 + }, + { + "epoch": 9.97, + "learning_rate": 1.6794666666666667e-05, + "loss": 1.5801, + "step": 56055 + }, + { + "epoch": 9.97, + "learning_rate": 1.6791703703703702e-05, + "loss": 1.575, + "step": 56060 + }, + { + "epoch": 9.97, + "learning_rate": 1.678874074074074e-05, + "loss": 1.6211, + "step": 56065 + }, + { + "epoch": 9.97, + "learning_rate": 1.6785777777777777e-05, + "loss": 1.5513, + "step": 56070 + }, + { + "epoch": 9.97, + "learning_rate": 1.6782814814814815e-05, + "loss": 1.5964, + "step": 56075 + }, + { + "epoch": 9.97, + "learning_rate": 1.677985185185185e-05, + "loss": 1.542, + "step": 56080 + }, + { + "epoch": 9.97, + "learning_rate": 1.677688888888889e-05, + "loss": 1.5463, + "step": 56085 + }, + { + "epoch": 9.97, + "learning_rate": 1.6773925925925925e-05, + "loss": 1.4798, + "step": 56090 + }, + { + "epoch": 9.97, + "learning_rate": 1.6770962962962964e-05, + "loss": 1.5966, + "step": 56095 + }, + { + "epoch": 9.97, + "learning_rate": 1.6768e-05, + "loss": 1.6393, + "step": 56100 + }, + { + "epoch": 9.97, + "learning_rate": 1.6765037037037038e-05, + "loss": 1.501, + "step": 56105 + }, + { + "epoch": 9.98, + "learning_rate": 1.6762074074074074e-05, + "loss": 1.5032, + "step": 56110 + }, + { + "epoch": 9.98, + "learning_rate": 1.6759111111111112e-05, + "loss": 1.5574, + "step": 56115 + }, + { + "epoch": 9.98, + "learning_rate": 1.6756148148148148e-05, + "loss": 1.49, + "step": 56120 + }, + { + "epoch": 9.98, + "learning_rate": 1.6753185185185187e-05, + "loss": 1.554, + "step": 56125 + }, + { + "epoch": 9.98, + "learning_rate": 1.6750222222222222e-05, + "loss": 1.5002, + "step": 56130 + }, + { + "epoch": 9.98, + "learning_rate": 1.674725925925926e-05, + "loss": 1.5316, + "step": 56135 + }, + { + "epoch": 9.98, + "learning_rate": 1.6744296296296296e-05, + "loss": 1.4794, + "step": 56140 + }, + { + "epoch": 9.98, + "learning_rate": 1.6741333333333335e-05, + "loss": 1.5091, + "step": 56145 + }, + { + "epoch": 9.98, + "learning_rate": 1.673837037037037e-05, + "loss": 1.4438, + "step": 56150 + }, + { + "epoch": 9.98, + "learning_rate": 1.673540740740741e-05, + "loss": 1.5243, + "step": 56155 + }, + { + "epoch": 9.98, + "learning_rate": 1.6732444444444445e-05, + "loss": 1.5675, + "step": 56160 + }, + { + "epoch": 9.98, + "learning_rate": 1.6729481481481484e-05, + "loss": 1.4815, + "step": 56165 + }, + { + "epoch": 9.99, + "learning_rate": 1.672651851851852e-05, + "loss": 1.616, + "step": 56170 + }, + { + "epoch": 9.99, + "learning_rate": 1.6723555555555555e-05, + "loss": 1.6924, + "step": 56175 + }, + { + "epoch": 9.99, + "learning_rate": 1.6720592592592593e-05, + "loss": 1.5124, + "step": 56180 + }, + { + "epoch": 9.99, + "learning_rate": 1.671762962962963e-05, + "loss": 1.5835, + "step": 56185 + }, + { + "epoch": 9.99, + "learning_rate": 1.6714666666666668e-05, + "loss": 1.6238, + "step": 56190 + }, + { + "epoch": 9.99, + "learning_rate": 1.6711703703703703e-05, + "loss": 1.5555, + "step": 56195 + }, + { + "epoch": 9.99, + "learning_rate": 1.6708740740740742e-05, + "loss": 1.5174, + "step": 56200 + }, + { + "epoch": 9.99, + "learning_rate": 1.6705777777777777e-05, + "loss": 1.5372, + "step": 56205 + }, + { + "epoch": 9.99, + "learning_rate": 1.6702814814814816e-05, + "loss": 1.6737, + "step": 56210 + }, + { + "epoch": 9.99, + "learning_rate": 1.669985185185185e-05, + "loss": 1.5666, + "step": 56215 + }, + { + "epoch": 9.99, + "learning_rate": 1.669688888888889e-05, + "loss": 1.5074, + "step": 56220 + }, + { + "epoch": 10.0, + "learning_rate": 1.6693925925925926e-05, + "loss": 1.5528, + "step": 56225 + }, + { + "epoch": 10.0, + "learning_rate": 1.6690962962962965e-05, + "loss": 1.5752, + "step": 56230 + }, + { + "epoch": 10.0, + "learning_rate": 1.6688e-05, + "loss": 1.4894, + "step": 56235 + }, + { + "epoch": 10.0, + "learning_rate": 1.668503703703704e-05, + "loss": 1.5343, + "step": 56240 + }, + { + "epoch": 10.0, + "learning_rate": 1.6682074074074074e-05, + "loss": 1.6339, + "step": 56245 + }, + { + "epoch": 10.0, + "learning_rate": 1.6679111111111113e-05, + "loss": 1.5617, + "step": 56250 + }, + { + "epoch": 10.0, + "learning_rate": 1.667614814814815e-05, + "loss": 1.4568, + "step": 56255 + }, + { + "epoch": 10.0, + "learning_rate": 1.6673185185185187e-05, + "loss": 1.4813, + "step": 56260 + }, + { + "epoch": 10.0, + "learning_rate": 1.6670222222222223e-05, + "loss": 1.464, + "step": 56265 + }, + { + "epoch": 10.0, + "learning_rate": 1.6667259259259262e-05, + "loss": 1.444, + "step": 56270 + }, + { + "epoch": 10.0, + "learning_rate": 1.6664296296296297e-05, + "loss": 1.5838, + "step": 56275 + }, + { + "epoch": 10.01, + "learning_rate": 1.6661333333333336e-05, + "loss": 1.3857, + "step": 56280 + }, + { + "epoch": 10.01, + "learning_rate": 1.665837037037037e-05, + "loss": 1.4634, + "step": 56285 + }, + { + "epoch": 10.01, + "learning_rate": 1.6655407407407407e-05, + "loss": 1.4352, + "step": 56290 + }, + { + "epoch": 10.01, + "learning_rate": 1.6652444444444446e-05, + "loss": 1.3871, + "step": 56295 + }, + { + "epoch": 10.01, + "learning_rate": 1.664948148148148e-05, + "loss": 1.3942, + "step": 56300 + }, + { + "epoch": 10.01, + "learning_rate": 1.664651851851852e-05, + "loss": 1.4356, + "step": 56305 + }, + { + "epoch": 10.01, + "learning_rate": 1.6643555555555555e-05, + "loss": 1.4767, + "step": 56310 + }, + { + "epoch": 10.01, + "learning_rate": 1.6640592592592594e-05, + "loss": 1.4146, + "step": 56315 + }, + { + "epoch": 10.01, + "learning_rate": 1.663762962962963e-05, + "loss": 1.4985, + "step": 56320 + }, + { + "epoch": 10.01, + "learning_rate": 1.663466666666667e-05, + "loss": 1.4298, + "step": 56325 + }, + { + "epoch": 10.01, + "learning_rate": 1.6631703703703704e-05, + "loss": 1.548, + "step": 56330 + }, + { + "epoch": 10.02, + "learning_rate": 1.6628740740740743e-05, + "loss": 1.354, + "step": 56335 + }, + { + "epoch": 10.02, + "learning_rate": 1.6625777777777778e-05, + "loss": 1.4091, + "step": 56340 + }, + { + "epoch": 10.02, + "learning_rate": 1.6622814814814817e-05, + "loss": 1.4469, + "step": 56345 + }, + { + "epoch": 10.02, + "learning_rate": 1.6619851851851852e-05, + "loss": 1.435, + "step": 56350 + }, + { + "epoch": 10.02, + "learning_rate": 1.661688888888889e-05, + "loss": 1.4372, + "step": 56355 + }, + { + "epoch": 10.02, + "learning_rate": 1.6613925925925927e-05, + "loss": 1.3682, + "step": 56360 + }, + { + "epoch": 10.02, + "learning_rate": 1.6610962962962965e-05, + "loss": 1.6016, + "step": 56365 + }, + { + "epoch": 10.02, + "learning_rate": 1.6608e-05, + "loss": 1.3731, + "step": 56370 + }, + { + "epoch": 10.02, + "learning_rate": 1.660503703703704e-05, + "loss": 1.4114, + "step": 56375 + }, + { + "epoch": 10.02, + "learning_rate": 1.6602074074074075e-05, + "loss": 1.5372, + "step": 56380 + }, + { + "epoch": 10.02, + "learning_rate": 1.6599111111111114e-05, + "loss": 1.5105, + "step": 56385 + }, + { + "epoch": 10.02, + "learning_rate": 1.659614814814815e-05, + "loss": 1.3513, + "step": 56390 + }, + { + "epoch": 10.03, + "learning_rate": 1.6593185185185188e-05, + "loss": 1.411, + "step": 56395 + }, + { + "epoch": 10.03, + "learning_rate": 1.6590222222222224e-05, + "loss": 1.374, + "step": 56400 + }, + { + "epoch": 10.03, + "learning_rate": 1.658725925925926e-05, + "loss": 1.413, + "step": 56405 + }, + { + "epoch": 10.03, + "learning_rate": 1.6584296296296298e-05, + "loss": 1.4381, + "step": 56410 + }, + { + "epoch": 10.03, + "learning_rate": 1.6581333333333333e-05, + "loss": 1.4425, + "step": 56415 + }, + { + "epoch": 10.03, + "learning_rate": 1.6578370370370372e-05, + "loss": 1.3954, + "step": 56420 + }, + { + "epoch": 10.03, + "learning_rate": 1.6575407407407408e-05, + "loss": 1.4891, + "step": 56425 + }, + { + "epoch": 10.03, + "learning_rate": 1.6572444444444446e-05, + "loss": 1.5278, + "step": 56430 + }, + { + "epoch": 10.03, + "learning_rate": 1.6569481481481482e-05, + "loss": 1.515, + "step": 56435 + }, + { + "epoch": 10.03, + "learning_rate": 1.656651851851852e-05, + "loss": 1.4845, + "step": 56440 + }, + { + "epoch": 10.03, + "learning_rate": 1.6563555555555556e-05, + "loss": 1.497, + "step": 56445 + }, + { + "epoch": 10.04, + "learning_rate": 1.6560592592592595e-05, + "loss": 1.4527, + "step": 56450 + }, + { + "epoch": 10.04, + "learning_rate": 1.655762962962963e-05, + "loss": 1.4164, + "step": 56455 + }, + { + "epoch": 10.04, + "learning_rate": 1.655466666666667e-05, + "loss": 1.4828, + "step": 56460 + }, + { + "epoch": 10.04, + "learning_rate": 1.6551703703703705e-05, + "loss": 1.5218, + "step": 56465 + }, + { + "epoch": 10.04, + "learning_rate": 1.6548740740740743e-05, + "loss": 1.4109, + "step": 56470 + }, + { + "epoch": 10.04, + "learning_rate": 1.654577777777778e-05, + "loss": 1.3946, + "step": 56475 + }, + { + "epoch": 10.04, + "learning_rate": 1.6542814814814818e-05, + "loss": 1.4448, + "step": 56480 + }, + { + "epoch": 10.04, + "learning_rate": 1.6539851851851853e-05, + "loss": 1.446, + "step": 56485 + }, + { + "epoch": 10.04, + "learning_rate": 1.6536888888888892e-05, + "loss": 1.5763, + "step": 56490 + }, + { + "epoch": 10.04, + "learning_rate": 1.6533925925925927e-05, + "loss": 1.4697, + "step": 56495 + }, + { + "epoch": 10.04, + "learning_rate": 1.6530962962962966e-05, + "loss": 1.434, + "step": 56500 + }, + { + "epoch": 10.05, + "learning_rate": 1.6528e-05, + "loss": 1.5345, + "step": 56505 + }, + { + "epoch": 10.05, + "learning_rate": 1.652503703703704e-05, + "loss": 1.475, + "step": 56510 + }, + { + "epoch": 10.05, + "learning_rate": 1.6522074074074076e-05, + "loss": 1.5443, + "step": 56515 + }, + { + "epoch": 10.05, + "learning_rate": 1.651911111111111e-05, + "loss": 1.4144, + "step": 56520 + }, + { + "epoch": 10.05, + "learning_rate": 1.651614814814815e-05, + "loss": 1.3183, + "step": 56525 + }, + { + "epoch": 10.05, + "learning_rate": 1.6513185185185186e-05, + "loss": 1.5795, + "step": 56530 + }, + { + "epoch": 10.05, + "learning_rate": 1.6510222222222224e-05, + "loss": 1.4622, + "step": 56535 + }, + { + "epoch": 10.05, + "learning_rate": 1.650725925925926e-05, + "loss": 1.4535, + "step": 56540 + }, + { + "epoch": 10.05, + "learning_rate": 1.65042962962963e-05, + "loss": 1.3773, + "step": 56545 + }, + { + "epoch": 10.05, + "learning_rate": 1.6501333333333334e-05, + "loss": 1.3614, + "step": 56550 + }, + { + "epoch": 10.05, + "learning_rate": 1.6498370370370373e-05, + "loss": 1.5734, + "step": 56555 + }, + { + "epoch": 10.06, + "learning_rate": 1.649540740740741e-05, + "loss": 1.361, + "step": 56560 + }, + { + "epoch": 10.06, + "learning_rate": 1.6492444444444447e-05, + "loss": 1.4434, + "step": 56565 + }, + { + "epoch": 10.06, + "learning_rate": 1.6489481481481483e-05, + "loss": 1.3869, + "step": 56570 + }, + { + "epoch": 10.06, + "learning_rate": 1.648651851851852e-05, + "loss": 1.4083, + "step": 56575 + }, + { + "epoch": 10.06, + "learning_rate": 1.6483555555555557e-05, + "loss": 1.5777, + "step": 56580 + }, + { + "epoch": 10.06, + "learning_rate": 1.6480592592592596e-05, + "loss": 1.4525, + "step": 56585 + }, + { + "epoch": 10.06, + "learning_rate": 1.647762962962963e-05, + "loss": 1.5079, + "step": 56590 + }, + { + "epoch": 10.06, + "learning_rate": 1.647466666666667e-05, + "loss": 1.4864, + "step": 56595 + }, + { + "epoch": 10.06, + "learning_rate": 1.6471703703703705e-05, + "loss": 1.4319, + "step": 56600 + }, + { + "epoch": 10.06, + "learning_rate": 1.6468740740740744e-05, + "loss": 1.3583, + "step": 56605 + }, + { + "epoch": 10.06, + "learning_rate": 1.6465777777777776e-05, + "loss": 1.549, + "step": 56610 + }, + { + "epoch": 10.06, + "learning_rate": 1.6462814814814815e-05, + "loss": 1.376, + "step": 56615 + }, + { + "epoch": 10.07, + "learning_rate": 1.645985185185185e-05, + "loss": 1.4434, + "step": 56620 + }, + { + "epoch": 10.07, + "learning_rate": 1.645688888888889e-05, + "loss": 1.4769, + "step": 56625 + }, + { + "epoch": 10.07, + "learning_rate": 1.6453925925925925e-05, + "loss": 1.3302, + "step": 56630 + }, + { + "epoch": 10.07, + "learning_rate": 1.6450962962962964e-05, + "loss": 1.5486, + "step": 56635 + }, + { + "epoch": 10.07, + "learning_rate": 1.6448e-05, + "loss": 1.4111, + "step": 56640 + }, + { + "epoch": 10.07, + "learning_rate": 1.6445037037037038e-05, + "loss": 1.4691, + "step": 56645 + }, + { + "epoch": 10.07, + "learning_rate": 1.6442074074074073e-05, + "loss": 1.3895, + "step": 56650 + }, + { + "epoch": 10.07, + "learning_rate": 1.6439111111111112e-05, + "loss": 1.4762, + "step": 56655 + }, + { + "epoch": 10.07, + "learning_rate": 1.6436148148148148e-05, + "loss": 1.5681, + "step": 56660 + }, + { + "epoch": 10.07, + "learning_rate": 1.6433185185185186e-05, + "loss": 1.3444, + "step": 56665 + }, + { + "epoch": 10.07, + "learning_rate": 1.6430222222222222e-05, + "loss": 1.5443, + "step": 56670 + }, + { + "epoch": 10.08, + "learning_rate": 1.6427259259259257e-05, + "loss": 1.4395, + "step": 56675 + }, + { + "epoch": 10.08, + "learning_rate": 1.6424296296296296e-05, + "loss": 1.3889, + "step": 56680 + }, + { + "epoch": 10.08, + "learning_rate": 1.642133333333333e-05, + "loss": 1.3609, + "step": 56685 + }, + { + "epoch": 10.08, + "learning_rate": 1.641837037037037e-05, + "loss": 1.4472, + "step": 56690 + }, + { + "epoch": 10.08, + "learning_rate": 1.6415407407407406e-05, + "loss": 1.3483, + "step": 56695 + }, + { + "epoch": 10.08, + "learning_rate": 1.6412444444444445e-05, + "loss": 1.458, + "step": 56700 + }, + { + "epoch": 10.08, + "learning_rate": 1.640948148148148e-05, + "loss": 1.5122, + "step": 56705 + }, + { + "epoch": 10.08, + "learning_rate": 1.640651851851852e-05, + "loss": 1.5652, + "step": 56710 + }, + { + "epoch": 10.08, + "learning_rate": 1.6403555555555554e-05, + "loss": 1.4863, + "step": 56715 + }, + { + "epoch": 10.08, + "learning_rate": 1.6400592592592593e-05, + "loss": 1.422, + "step": 56720 + }, + { + "epoch": 10.08, + "learning_rate": 1.639762962962963e-05, + "loss": 1.3409, + "step": 56725 + }, + { + "epoch": 10.09, + "learning_rate": 1.6394666666666667e-05, + "loss": 1.3657, + "step": 56730 + }, + { + "epoch": 10.09, + "learning_rate": 1.6391703703703703e-05, + "loss": 1.497, + "step": 56735 + }, + { + "epoch": 10.09, + "learning_rate": 1.638874074074074e-05, + "loss": 1.4964, + "step": 56740 + }, + { + "epoch": 10.09, + "learning_rate": 1.6385777777777777e-05, + "loss": 1.4076, + "step": 56745 + }, + { + "epoch": 10.09, + "learning_rate": 1.6382814814814816e-05, + "loss": 1.4491, + "step": 56750 + }, + { + "epoch": 10.09, + "learning_rate": 1.637985185185185e-05, + "loss": 1.4611, + "step": 56755 + }, + { + "epoch": 10.09, + "learning_rate": 1.637688888888889e-05, + "loss": 1.4653, + "step": 56760 + }, + { + "epoch": 10.09, + "learning_rate": 1.6373925925925926e-05, + "loss": 1.5266, + "step": 56765 + }, + { + "epoch": 10.09, + "learning_rate": 1.6370962962962964e-05, + "loss": 1.4873, + "step": 56770 + }, + { + "epoch": 10.09, + "learning_rate": 1.6368e-05, + "loss": 1.4298, + "step": 56775 + }, + { + "epoch": 10.09, + "learning_rate": 1.636503703703704e-05, + "loss": 1.5629, + "step": 56780 + }, + { + "epoch": 10.1, + "learning_rate": 1.636266666666667e-05, + "loss": 1.4198, + "step": 56785 + }, + { + "epoch": 10.1, + "learning_rate": 1.6359703703703704e-05, + "loss": 1.4296, + "step": 56790 + }, + { + "epoch": 10.1, + "learning_rate": 1.6356740740740743e-05, + "loss": 1.3766, + "step": 56795 + }, + { + "epoch": 10.1, + "learning_rate": 1.6353777777777778e-05, + "loss": 1.5183, + "step": 56800 + }, + { + "epoch": 10.1, + "learning_rate": 1.6350814814814817e-05, + "loss": 1.4686, + "step": 56805 + }, + { + "epoch": 10.1, + "learning_rate": 1.6347851851851852e-05, + "loss": 1.5341, + "step": 56810 + }, + { + "epoch": 10.1, + "learning_rate": 1.634488888888889e-05, + "loss": 1.4001, + "step": 56815 + }, + { + "epoch": 10.1, + "learning_rate": 1.6341925925925927e-05, + "loss": 1.4328, + "step": 56820 + }, + { + "epoch": 10.1, + "learning_rate": 1.6338962962962965e-05, + "loss": 1.3983, + "step": 56825 + }, + { + "epoch": 10.1, + "learning_rate": 1.6336e-05, + "loss": 1.4608, + "step": 56830 + }, + { + "epoch": 10.1, + "learning_rate": 1.633303703703704e-05, + "loss": 1.4269, + "step": 56835 + }, + { + "epoch": 10.1, + "learning_rate": 1.6330074074074075e-05, + "loss": 1.523, + "step": 56840 + }, + { + "epoch": 10.11, + "learning_rate": 1.6327111111111114e-05, + "loss": 1.4866, + "step": 56845 + }, + { + "epoch": 10.11, + "learning_rate": 1.632414814814815e-05, + "loss": 1.5254, + "step": 56850 + }, + { + "epoch": 10.11, + "learning_rate": 1.6321185185185188e-05, + "loss": 1.4886, + "step": 56855 + }, + { + "epoch": 10.11, + "learning_rate": 1.6318222222222224e-05, + "loss": 1.4051, + "step": 56860 + }, + { + "epoch": 10.11, + "learning_rate": 1.631525925925926e-05, + "loss": 1.479, + "step": 56865 + }, + { + "epoch": 10.11, + "learning_rate": 1.6312296296296298e-05, + "loss": 1.4943, + "step": 56870 + }, + { + "epoch": 10.11, + "learning_rate": 1.6309333333333333e-05, + "loss": 1.3982, + "step": 56875 + }, + { + "epoch": 10.11, + "learning_rate": 1.6306370370370372e-05, + "loss": 1.5515, + "step": 56880 + }, + { + "epoch": 10.11, + "learning_rate": 1.6303407407407408e-05, + "loss": 1.3565, + "step": 56885 + }, + { + "epoch": 10.11, + "learning_rate": 1.6300444444444446e-05, + "loss": 1.4249, + "step": 56890 + }, + { + "epoch": 10.11, + "learning_rate": 1.6297481481481482e-05, + "loss": 1.4261, + "step": 56895 + }, + { + "epoch": 10.12, + "learning_rate": 1.629451851851852e-05, + "loss": 1.3856, + "step": 56900 + }, + { + "epoch": 10.12, + "learning_rate": 1.6291555555555556e-05, + "loss": 1.5375, + "step": 56905 + }, + { + "epoch": 10.12, + "learning_rate": 1.6288592592592595e-05, + "loss": 1.5177, + "step": 56910 + }, + { + "epoch": 10.12, + "learning_rate": 1.628562962962963e-05, + "loss": 1.5217, + "step": 56915 + }, + { + "epoch": 10.12, + "learning_rate": 1.628266666666667e-05, + "loss": 1.522, + "step": 56920 + }, + { + "epoch": 10.12, + "learning_rate": 1.6279703703703705e-05, + "loss": 1.3629, + "step": 56925 + }, + { + "epoch": 10.12, + "learning_rate": 1.6276740740740743e-05, + "loss": 1.5598, + "step": 56930 + }, + { + "epoch": 10.12, + "learning_rate": 1.627377777777778e-05, + "loss": 1.502, + "step": 56935 + }, + { + "epoch": 10.12, + "learning_rate": 1.6270814814814818e-05, + "loss": 1.4898, + "step": 56940 + }, + { + "epoch": 10.12, + "learning_rate": 1.6267851851851853e-05, + "loss": 1.4654, + "step": 56945 + }, + { + "epoch": 10.12, + "learning_rate": 1.6264888888888892e-05, + "loss": 1.3799, + "step": 56950 + }, + { + "epoch": 10.13, + "learning_rate": 1.6261925925925927e-05, + "loss": 1.3897, + "step": 56955 + }, + { + "epoch": 10.13, + "learning_rate": 1.6258962962962966e-05, + "loss": 1.4804, + "step": 56960 + }, + { + "epoch": 10.13, + "learning_rate": 1.6256e-05, + "loss": 1.441, + "step": 56965 + }, + { + "epoch": 10.13, + "learning_rate": 1.625303703703704e-05, + "loss": 1.6212, + "step": 56970 + }, + { + "epoch": 10.13, + "learning_rate": 1.6250074074074076e-05, + "loss": 1.3991, + "step": 56975 + }, + { + "epoch": 10.13, + "learning_rate": 1.624711111111111e-05, + "loss": 1.4992, + "step": 56980 + }, + { + "epoch": 10.13, + "learning_rate": 1.624414814814815e-05, + "loss": 1.4879, + "step": 56985 + }, + { + "epoch": 10.13, + "learning_rate": 1.6241185185185186e-05, + "loss": 1.5002, + "step": 56990 + }, + { + "epoch": 10.13, + "learning_rate": 1.6238222222222224e-05, + "loss": 1.4677, + "step": 56995 + }, + { + "epoch": 10.13, + "learning_rate": 1.623525925925926e-05, + "loss": 1.4447, + "step": 57000 + }, + { + "epoch": 10.13, + "learning_rate": 1.62322962962963e-05, + "loss": 1.4389, + "step": 57005 + }, + { + "epoch": 10.14, + "learning_rate": 1.6229333333333334e-05, + "loss": 1.5065, + "step": 57010 + }, + { + "epoch": 10.14, + "learning_rate": 1.6226370370370373e-05, + "loss": 1.4818, + "step": 57015 + }, + { + "epoch": 10.14, + "learning_rate": 1.6223407407407408e-05, + "loss": 1.4784, + "step": 57020 + }, + { + "epoch": 10.14, + "learning_rate": 1.6220444444444447e-05, + "loss": 1.4065, + "step": 57025 + }, + { + "epoch": 10.14, + "learning_rate": 1.6217481481481483e-05, + "loss": 1.4242, + "step": 57030 + }, + { + "epoch": 10.14, + "learning_rate": 1.621451851851852e-05, + "loss": 1.4451, + "step": 57035 + }, + { + "epoch": 10.14, + "learning_rate": 1.6211555555555557e-05, + "loss": 1.6388, + "step": 57040 + }, + { + "epoch": 10.14, + "learning_rate": 1.6208592592592596e-05, + "loss": 1.4881, + "step": 57045 + }, + { + "epoch": 10.14, + "learning_rate": 1.620562962962963e-05, + "loss": 1.386, + "step": 57050 + }, + { + "epoch": 10.14, + "learning_rate": 1.620266666666667e-05, + "loss": 1.4108, + "step": 57055 + }, + { + "epoch": 10.14, + "learning_rate": 1.6199703703703705e-05, + "loss": 1.3407, + "step": 57060 + }, + { + "epoch": 10.14, + "learning_rate": 1.6196740740740744e-05, + "loss": 1.5643, + "step": 57065 + }, + { + "epoch": 10.15, + "learning_rate": 1.619377777777778e-05, + "loss": 1.4984, + "step": 57070 + }, + { + "epoch": 10.15, + "learning_rate": 1.6190814814814815e-05, + "loss": 1.5101, + "step": 57075 + }, + { + "epoch": 10.15, + "learning_rate": 1.618785185185185e-05, + "loss": 1.5068, + "step": 57080 + }, + { + "epoch": 10.15, + "learning_rate": 1.618488888888889e-05, + "loss": 1.2716, + "step": 57085 + }, + { + "epoch": 10.15, + "learning_rate": 1.6181925925925925e-05, + "loss": 1.3864, + "step": 57090 + }, + { + "epoch": 10.15, + "learning_rate": 1.6178962962962964e-05, + "loss": 1.5048, + "step": 57095 + }, + { + "epoch": 10.15, + "learning_rate": 1.6176e-05, + "loss": 1.3704, + "step": 57100 + }, + { + "epoch": 10.15, + "learning_rate": 1.6173037037037038e-05, + "loss": 1.5049, + "step": 57105 + }, + { + "epoch": 10.15, + "learning_rate": 1.6170074074074073e-05, + "loss": 1.4995, + "step": 57110 + }, + { + "epoch": 10.15, + "learning_rate": 1.6167111111111112e-05, + "loss": 1.4458, + "step": 57115 + }, + { + "epoch": 10.15, + "learning_rate": 1.6164148148148147e-05, + "loss": 1.4959, + "step": 57120 + }, + { + "epoch": 10.16, + "learning_rate": 1.6161185185185186e-05, + "loss": 1.5256, + "step": 57125 + }, + { + "epoch": 10.16, + "learning_rate": 1.6158222222222222e-05, + "loss": 1.4021, + "step": 57130 + }, + { + "epoch": 10.16, + "learning_rate": 1.615525925925926e-05, + "loss": 1.5498, + "step": 57135 + }, + { + "epoch": 10.16, + "learning_rate": 1.6152296296296296e-05, + "loss": 1.4001, + "step": 57140 + }, + { + "epoch": 10.16, + "learning_rate": 1.614933333333333e-05, + "loss": 1.3844, + "step": 57145 + }, + { + "epoch": 10.16, + "learning_rate": 1.614637037037037e-05, + "loss": 1.4142, + "step": 57150 + }, + { + "epoch": 10.16, + "learning_rate": 1.6143407407407406e-05, + "loss": 1.5698, + "step": 57155 + }, + { + "epoch": 10.16, + "learning_rate": 1.6140444444444444e-05, + "loss": 1.5131, + "step": 57160 + }, + { + "epoch": 10.16, + "learning_rate": 1.613748148148148e-05, + "loss": 1.4638, + "step": 57165 + }, + { + "epoch": 10.16, + "learning_rate": 1.613451851851852e-05, + "loss": 1.47, + "step": 57170 + }, + { + "epoch": 10.16, + "learning_rate": 1.6131555555555554e-05, + "loss": 1.4337, + "step": 57175 + }, + { + "epoch": 10.17, + "learning_rate": 1.6128592592592593e-05, + "loss": 1.4232, + "step": 57180 + }, + { + "epoch": 10.17, + "learning_rate": 1.612562962962963e-05, + "loss": 1.4953, + "step": 57185 + }, + { + "epoch": 10.17, + "learning_rate": 1.6122666666666667e-05, + "loss": 1.4315, + "step": 57190 + }, + { + "epoch": 10.17, + "learning_rate": 1.6119703703703703e-05, + "loss": 1.5302, + "step": 57195 + }, + { + "epoch": 10.17, + "learning_rate": 1.611674074074074e-05, + "loss": 1.516, + "step": 57200 + }, + { + "epoch": 10.17, + "learning_rate": 1.6113777777777777e-05, + "loss": 1.485, + "step": 57205 + }, + { + "epoch": 10.17, + "learning_rate": 1.6110814814814816e-05, + "loss": 1.4658, + "step": 57210 + }, + { + "epoch": 10.17, + "learning_rate": 1.610785185185185e-05, + "loss": 1.4462, + "step": 57215 + }, + { + "epoch": 10.17, + "learning_rate": 1.610488888888889e-05, + "loss": 1.4209, + "step": 57220 + }, + { + "epoch": 10.17, + "learning_rate": 1.6101925925925925e-05, + "loss": 1.4613, + "step": 57225 + }, + { + "epoch": 10.17, + "learning_rate": 1.6098962962962964e-05, + "loss": 1.4842, + "step": 57230 + }, + { + "epoch": 10.18, + "learning_rate": 1.6096e-05, + "loss": 1.4258, + "step": 57235 + }, + { + "epoch": 10.18, + "learning_rate": 1.609303703703704e-05, + "loss": 1.4375, + "step": 57240 + }, + { + "epoch": 10.18, + "learning_rate": 1.6090074074074074e-05, + "loss": 1.5497, + "step": 57245 + }, + { + "epoch": 10.18, + "learning_rate": 1.6087111111111113e-05, + "loss": 1.4033, + "step": 57250 + }, + { + "epoch": 10.18, + "learning_rate": 1.6084148148148148e-05, + "loss": 1.3823, + "step": 57255 + }, + { + "epoch": 10.18, + "learning_rate": 1.6081185185185184e-05, + "loss": 1.4156, + "step": 57260 + }, + { + "epoch": 10.18, + "learning_rate": 1.6078222222222222e-05, + "loss": 1.5244, + "step": 57265 + }, + { + "epoch": 10.18, + "learning_rate": 1.6075259259259258e-05, + "loss": 1.4995, + "step": 57270 + }, + { + "epoch": 10.18, + "learning_rate": 1.6072296296296297e-05, + "loss": 1.55, + "step": 57275 + }, + { + "epoch": 10.18, + "learning_rate": 1.6069333333333332e-05, + "loss": 1.5527, + "step": 57280 + }, + { + "epoch": 10.18, + "learning_rate": 1.606637037037037e-05, + "loss": 1.5155, + "step": 57285 + }, + { + "epoch": 10.18, + "learning_rate": 1.6063407407407406e-05, + "loss": 1.5184, + "step": 57290 + }, + { + "epoch": 10.19, + "learning_rate": 1.6060444444444445e-05, + "loss": 1.3142, + "step": 57295 + }, + { + "epoch": 10.19, + "learning_rate": 1.605748148148148e-05, + "loss": 1.5419, + "step": 57300 + }, + { + "epoch": 10.19, + "learning_rate": 1.605451851851852e-05, + "loss": 1.4331, + "step": 57305 + }, + { + "epoch": 10.19, + "learning_rate": 1.6051555555555555e-05, + "loss": 1.4807, + "step": 57310 + }, + { + "epoch": 10.19, + "learning_rate": 1.6048592592592594e-05, + "loss": 1.5338, + "step": 57315 + }, + { + "epoch": 10.19, + "learning_rate": 1.604562962962963e-05, + "loss": 1.624, + "step": 57320 + }, + { + "epoch": 10.19, + "learning_rate": 1.6042666666666668e-05, + "loss": 1.4465, + "step": 57325 + }, + { + "epoch": 10.19, + "learning_rate": 1.6039703703703703e-05, + "loss": 1.5088, + "step": 57330 + }, + { + "epoch": 10.19, + "learning_rate": 1.6036740740740742e-05, + "loss": 1.5624, + "step": 57335 + }, + { + "epoch": 10.19, + "learning_rate": 1.6033777777777778e-05, + "loss": 1.4521, + "step": 57340 + }, + { + "epoch": 10.19, + "learning_rate": 1.6030814814814817e-05, + "loss": 1.4668, + "step": 57345 + }, + { + "epoch": 10.2, + "learning_rate": 1.6027851851851852e-05, + "loss": 1.4755, + "step": 57350 + }, + { + "epoch": 10.2, + "learning_rate": 1.602488888888889e-05, + "loss": 1.4893, + "step": 57355 + }, + { + "epoch": 10.2, + "learning_rate": 1.6021925925925926e-05, + "loss": 1.4304, + "step": 57360 + }, + { + "epoch": 10.2, + "learning_rate": 1.6018962962962965e-05, + "loss": 1.3858, + "step": 57365 + }, + { + "epoch": 10.2, + "learning_rate": 1.6016e-05, + "loss": 1.4237, + "step": 57370 + }, + { + "epoch": 10.2, + "learning_rate": 1.6013037037037036e-05, + "loss": 1.5444, + "step": 57375 + }, + { + "epoch": 10.2, + "learning_rate": 1.6010074074074075e-05, + "loss": 1.4403, + "step": 57380 + }, + { + "epoch": 10.2, + "learning_rate": 1.600711111111111e-05, + "loss": 1.4722, + "step": 57385 + }, + { + "epoch": 10.2, + "learning_rate": 1.600414814814815e-05, + "loss": 1.4666, + "step": 57390 + }, + { + "epoch": 10.2, + "learning_rate": 1.6001185185185184e-05, + "loss": 1.4422, + "step": 57395 + }, + { + "epoch": 10.2, + "learning_rate": 1.5998222222222223e-05, + "loss": 1.4633, + "step": 57400 + }, + { + "epoch": 10.21, + "learning_rate": 1.599525925925926e-05, + "loss": 1.5391, + "step": 57405 + }, + { + "epoch": 10.21, + "learning_rate": 1.5992296296296298e-05, + "loss": 1.4014, + "step": 57410 + }, + { + "epoch": 10.21, + "learning_rate": 1.5989333333333333e-05, + "loss": 1.5381, + "step": 57415 + }, + { + "epoch": 10.21, + "learning_rate": 1.5986370370370372e-05, + "loss": 1.4941, + "step": 57420 + }, + { + "epoch": 10.21, + "learning_rate": 1.5983407407407407e-05, + "loss": 1.5359, + "step": 57425 + }, + { + "epoch": 10.21, + "learning_rate": 1.5980444444444446e-05, + "loss": 1.5125, + "step": 57430 + }, + { + "epoch": 10.21, + "learning_rate": 1.597748148148148e-05, + "loss": 1.4809, + "step": 57435 + }, + { + "epoch": 10.21, + "learning_rate": 1.597451851851852e-05, + "loss": 1.4194, + "step": 57440 + }, + { + "epoch": 10.21, + "learning_rate": 1.5971555555555556e-05, + "loss": 1.42, + "step": 57445 + }, + { + "epoch": 10.21, + "learning_rate": 1.5968592592592595e-05, + "loss": 1.4757, + "step": 57450 + }, + { + "epoch": 10.21, + "learning_rate": 1.596562962962963e-05, + "loss": 1.476, + "step": 57455 + }, + { + "epoch": 10.22, + "learning_rate": 1.596266666666667e-05, + "loss": 1.5578, + "step": 57460 + }, + { + "epoch": 10.22, + "learning_rate": 1.5959703703703704e-05, + "loss": 1.5182, + "step": 57465 + }, + { + "epoch": 10.22, + "learning_rate": 1.5956740740740743e-05, + "loss": 1.4282, + "step": 57470 + }, + { + "epoch": 10.22, + "learning_rate": 1.595377777777778e-05, + "loss": 1.4785, + "step": 57475 + }, + { + "epoch": 10.22, + "learning_rate": 1.5950814814814817e-05, + "loss": 1.5029, + "step": 57480 + }, + { + "epoch": 10.22, + "learning_rate": 1.5947851851851853e-05, + "loss": 1.498, + "step": 57485 + }, + { + "epoch": 10.22, + "learning_rate": 1.5944888888888888e-05, + "loss": 1.4752, + "step": 57490 + }, + { + "epoch": 10.22, + "learning_rate": 1.5941925925925927e-05, + "loss": 1.3562, + "step": 57495 + }, + { + "epoch": 10.22, + "learning_rate": 1.5938962962962962e-05, + "loss": 1.5276, + "step": 57500 + }, + { + "epoch": 10.22, + "learning_rate": 1.5936e-05, + "loss": 1.4798, + "step": 57505 + }, + { + "epoch": 10.22, + "learning_rate": 1.5933037037037037e-05, + "loss": 1.4945, + "step": 57510 + }, + { + "epoch": 10.22, + "learning_rate": 1.5930074074074076e-05, + "loss": 1.4772, + "step": 57515 + }, + { + "epoch": 10.23, + "learning_rate": 1.592711111111111e-05, + "loss": 1.3629, + "step": 57520 + }, + { + "epoch": 10.23, + "learning_rate": 1.592414814814815e-05, + "loss": 1.4873, + "step": 57525 + }, + { + "epoch": 10.23, + "learning_rate": 1.5921185185185185e-05, + "loss": 1.4423, + "step": 57530 + }, + { + "epoch": 10.23, + "learning_rate": 1.5918222222222224e-05, + "loss": 1.3348, + "step": 57535 + }, + { + "epoch": 10.23, + "learning_rate": 1.591525925925926e-05, + "loss": 1.5268, + "step": 57540 + }, + { + "epoch": 10.23, + "learning_rate": 1.5912296296296298e-05, + "loss": 1.4804, + "step": 57545 + }, + { + "epoch": 10.23, + "learning_rate": 1.5909333333333334e-05, + "loss": 1.4794, + "step": 57550 + }, + { + "epoch": 10.23, + "learning_rate": 1.5906370370370373e-05, + "loss": 1.5295, + "step": 57555 + }, + { + "epoch": 10.23, + "learning_rate": 1.5903407407407408e-05, + "loss": 1.5312, + "step": 57560 + }, + { + "epoch": 10.23, + "learning_rate": 1.5900444444444447e-05, + "loss": 1.5624, + "step": 57565 + }, + { + "epoch": 10.23, + "learning_rate": 1.5897481481481482e-05, + "loss": 1.44, + "step": 57570 + }, + { + "epoch": 10.24, + "learning_rate": 1.589451851851852e-05, + "loss": 1.4336, + "step": 57575 + }, + { + "epoch": 10.24, + "learning_rate": 1.5891555555555556e-05, + "loss": 1.5577, + "step": 57580 + }, + { + "epoch": 10.24, + "learning_rate": 1.5888592592592595e-05, + "loss": 1.4874, + "step": 57585 + }, + { + "epoch": 10.24, + "learning_rate": 1.588562962962963e-05, + "loss": 1.4328, + "step": 57590 + }, + { + "epoch": 10.24, + "learning_rate": 1.588266666666667e-05, + "loss": 1.5161, + "step": 57595 + }, + { + "epoch": 10.24, + "learning_rate": 1.5879703703703705e-05, + "loss": 1.5601, + "step": 57600 + }, + { + "epoch": 10.24, + "learning_rate": 1.587674074074074e-05, + "loss": 1.5182, + "step": 57605 + }, + { + "epoch": 10.24, + "learning_rate": 1.587377777777778e-05, + "loss": 1.4069, + "step": 57610 + }, + { + "epoch": 10.24, + "learning_rate": 1.5870814814814815e-05, + "loss": 1.453, + "step": 57615 + }, + { + "epoch": 10.24, + "learning_rate": 1.5867851851851853e-05, + "loss": 1.4956, + "step": 57620 + }, + { + "epoch": 10.24, + "learning_rate": 1.586488888888889e-05, + "loss": 1.4002, + "step": 57625 + }, + { + "epoch": 10.25, + "learning_rate": 1.5861925925925928e-05, + "loss": 1.4868, + "step": 57630 + }, + { + "epoch": 10.25, + "learning_rate": 1.5858962962962963e-05, + "loss": 1.4464, + "step": 57635 + }, + { + "epoch": 10.25, + "learning_rate": 1.5856000000000002e-05, + "loss": 1.4464, + "step": 57640 + }, + { + "epoch": 10.25, + "learning_rate": 1.5853037037037037e-05, + "loss": 1.4961, + "step": 57645 + }, + { + "epoch": 10.25, + "learning_rate": 1.5850074074074076e-05, + "loss": 1.4678, + "step": 57650 + }, + { + "epoch": 10.25, + "learning_rate": 1.5847111111111112e-05, + "loss": 1.4334, + "step": 57655 + }, + { + "epoch": 10.25, + "learning_rate": 1.584414814814815e-05, + "loss": 1.5021, + "step": 57660 + }, + { + "epoch": 10.25, + "learning_rate": 1.5841185185185186e-05, + "loss": 1.487, + "step": 57665 + }, + { + "epoch": 10.25, + "learning_rate": 1.5838222222222225e-05, + "loss": 1.4619, + "step": 57670 + }, + { + "epoch": 10.25, + "learning_rate": 1.583525925925926e-05, + "loss": 1.394, + "step": 57675 + }, + { + "epoch": 10.25, + "learning_rate": 1.58322962962963e-05, + "loss": 1.4126, + "step": 57680 + }, + { + "epoch": 10.26, + "learning_rate": 1.5829333333333334e-05, + "loss": 1.3816, + "step": 57685 + }, + { + "epoch": 10.26, + "learning_rate": 1.5826370370370373e-05, + "loss": 1.4895, + "step": 57690 + }, + { + "epoch": 10.26, + "learning_rate": 1.582340740740741e-05, + "loss": 1.5423, + "step": 57695 + }, + { + "epoch": 10.26, + "learning_rate": 1.5820444444444448e-05, + "loss": 1.4877, + "step": 57700 + }, + { + "epoch": 10.26, + "learning_rate": 1.5817481481481483e-05, + "loss": 1.3639, + "step": 57705 + }, + { + "epoch": 10.26, + "learning_rate": 1.5814518518518522e-05, + "loss": 1.479, + "step": 57710 + }, + { + "epoch": 10.26, + "learning_rate": 1.5811555555555557e-05, + "loss": 1.3829, + "step": 57715 + }, + { + "epoch": 10.26, + "learning_rate": 1.5808592592592593e-05, + "loss": 1.4367, + "step": 57720 + }, + { + "epoch": 10.26, + "learning_rate": 1.580562962962963e-05, + "loss": 1.4307, + "step": 57725 + }, + { + "epoch": 10.26, + "learning_rate": 1.5802666666666667e-05, + "loss": 1.5144, + "step": 57730 + }, + { + "epoch": 10.26, + "learning_rate": 1.5799703703703706e-05, + "loss": 1.3496, + "step": 57735 + }, + { + "epoch": 10.26, + "learning_rate": 1.579674074074074e-05, + "loss": 1.4094, + "step": 57740 + }, + { + "epoch": 10.27, + "learning_rate": 1.579377777777778e-05, + "loss": 1.4717, + "step": 57745 + }, + { + "epoch": 10.27, + "learning_rate": 1.5790814814814815e-05, + "loss": 1.4042, + "step": 57750 + }, + { + "epoch": 10.27, + "learning_rate": 1.5787851851851854e-05, + "loss": 1.4701, + "step": 57755 + }, + { + "epoch": 10.27, + "learning_rate": 1.578488888888889e-05, + "loss": 1.4628, + "step": 57760 + }, + { + "epoch": 10.27, + "learning_rate": 1.578192592592593e-05, + "loss": 1.4795, + "step": 57765 + }, + { + "epoch": 10.27, + "learning_rate": 1.5778962962962964e-05, + "loss": 1.5003, + "step": 57770 + }, + { + "epoch": 10.27, + "learning_rate": 1.5776000000000003e-05, + "loss": 1.4691, + "step": 57775 + }, + { + "epoch": 10.27, + "learning_rate": 1.5773037037037038e-05, + "loss": 1.4658, + "step": 57780 + }, + { + "epoch": 10.27, + "learning_rate": 1.5770074074074077e-05, + "loss": 1.4067, + "step": 57785 + }, + { + "epoch": 10.27, + "learning_rate": 1.5767111111111112e-05, + "loss": 1.3834, + "step": 57790 + }, + { + "epoch": 10.27, + "learning_rate": 1.576414814814815e-05, + "loss": 1.4586, + "step": 57795 + }, + { + "epoch": 10.28, + "learning_rate": 1.5761185185185187e-05, + "loss": 1.5833, + "step": 57800 + }, + { + "epoch": 10.28, + "learning_rate": 1.5758222222222226e-05, + "loss": 1.5523, + "step": 57805 + }, + { + "epoch": 10.28, + "learning_rate": 1.575525925925926e-05, + "loss": 1.3894, + "step": 57810 + }, + { + "epoch": 10.28, + "learning_rate": 1.57522962962963e-05, + "loss": 1.4667, + "step": 57815 + }, + { + "epoch": 10.28, + "learning_rate": 1.5749333333333335e-05, + "loss": 1.5777, + "step": 57820 + }, + { + "epoch": 10.28, + "learning_rate": 1.5746370370370374e-05, + "loss": 1.4126, + "step": 57825 + }, + { + "epoch": 10.28, + "learning_rate": 1.574340740740741e-05, + "loss": 1.4299, + "step": 57830 + }, + { + "epoch": 10.28, + "learning_rate": 1.5740444444444445e-05, + "loss": 1.5142, + "step": 57835 + }, + { + "epoch": 10.28, + "learning_rate": 1.5737481481481484e-05, + "loss": 1.4349, + "step": 57840 + }, + { + "epoch": 10.28, + "learning_rate": 1.573451851851852e-05, + "loss": 1.5233, + "step": 57845 + }, + { + "epoch": 10.28, + "learning_rate": 1.5731555555555555e-05, + "loss": 1.4105, + "step": 57850 + }, + { + "epoch": 10.29, + "learning_rate": 1.5728592592592593e-05, + "loss": 1.3908, + "step": 57855 + }, + { + "epoch": 10.29, + "learning_rate": 1.572562962962963e-05, + "loss": 1.3775, + "step": 57860 + }, + { + "epoch": 10.29, + "learning_rate": 1.5722666666666668e-05, + "loss": 1.4297, + "step": 57865 + }, + { + "epoch": 10.29, + "learning_rate": 1.5719703703703703e-05, + "loss": 1.4513, + "step": 57870 + }, + { + "epoch": 10.29, + "learning_rate": 1.571674074074074e-05, + "loss": 1.5963, + "step": 57875 + }, + { + "epoch": 10.29, + "learning_rate": 1.5713777777777777e-05, + "loss": 1.5029, + "step": 57880 + }, + { + "epoch": 10.29, + "learning_rate": 1.5710814814814813e-05, + "loss": 1.4527, + "step": 57885 + }, + { + "epoch": 10.29, + "learning_rate": 1.570785185185185e-05, + "loss": 1.4051, + "step": 57890 + }, + { + "epoch": 10.29, + "learning_rate": 1.5704888888888887e-05, + "loss": 1.5059, + "step": 57895 + }, + { + "epoch": 10.29, + "learning_rate": 1.5701925925925926e-05, + "loss": 1.4013, + "step": 57900 + }, + { + "epoch": 10.29, + "learning_rate": 1.569896296296296e-05, + "loss": 1.471, + "step": 57905 + }, + { + "epoch": 10.3, + "learning_rate": 1.5696e-05, + "loss": 1.4712, + "step": 57910 + }, + { + "epoch": 10.3, + "learning_rate": 1.5693037037037036e-05, + "loss": 1.5096, + "step": 57915 + }, + { + "epoch": 10.3, + "learning_rate": 1.5690074074074074e-05, + "loss": 1.4099, + "step": 57920 + }, + { + "epoch": 10.3, + "learning_rate": 1.568711111111111e-05, + "loss": 1.399, + "step": 57925 + }, + { + "epoch": 10.3, + "learning_rate": 1.568414814814815e-05, + "loss": 1.4619, + "step": 57930 + }, + { + "epoch": 10.3, + "learning_rate": 1.5681185185185184e-05, + "loss": 1.4629, + "step": 57935 + }, + { + "epoch": 10.3, + "learning_rate": 1.5678222222222223e-05, + "loss": 1.3985, + "step": 57940 + }, + { + "epoch": 10.3, + "learning_rate": 1.567525925925926e-05, + "loss": 1.3746, + "step": 57945 + }, + { + "epoch": 10.3, + "learning_rate": 1.5672296296296297e-05, + "loss": 1.5952, + "step": 57950 + }, + { + "epoch": 10.3, + "learning_rate": 1.5669333333333333e-05, + "loss": 1.4213, + "step": 57955 + }, + { + "epoch": 10.3, + "learning_rate": 1.566637037037037e-05, + "loss": 1.4808, + "step": 57960 + }, + { + "epoch": 10.3, + "learning_rate": 1.5663407407407407e-05, + "loss": 1.4624, + "step": 57965 + }, + { + "epoch": 10.31, + "learning_rate": 1.5660444444444446e-05, + "loss": 1.4431, + "step": 57970 + }, + { + "epoch": 10.31, + "learning_rate": 1.565748148148148e-05, + "loss": 1.5327, + "step": 57975 + }, + { + "epoch": 10.31, + "learning_rate": 1.565451851851852e-05, + "loss": 1.5175, + "step": 57980 + }, + { + "epoch": 10.31, + "learning_rate": 1.5651555555555555e-05, + "loss": 1.406, + "step": 57985 + }, + { + "epoch": 10.31, + "learning_rate": 1.564859259259259e-05, + "loss": 1.5648, + "step": 57990 + }, + { + "epoch": 10.31, + "learning_rate": 1.564562962962963e-05, + "loss": 1.5232, + "step": 57995 + }, + { + "epoch": 10.31, + "learning_rate": 1.5642666666666665e-05, + "loss": 1.439, + "step": 58000 + }, + { + "epoch": 10.31, + "learning_rate": 1.5639703703703704e-05, + "loss": 1.5279, + "step": 58005 + }, + { + "epoch": 10.31, + "learning_rate": 1.563674074074074e-05, + "loss": 1.466, + "step": 58010 + }, + { + "epoch": 10.31, + "learning_rate": 1.5633777777777778e-05, + "loss": 1.6846, + "step": 58015 + }, + { + "epoch": 10.31, + "learning_rate": 1.5630814814814814e-05, + "loss": 1.5833, + "step": 58020 + }, + { + "epoch": 10.32, + "learning_rate": 1.5627851851851852e-05, + "loss": 1.3396, + "step": 58025 + }, + { + "epoch": 10.32, + "learning_rate": 1.5624888888888888e-05, + "loss": 1.4698, + "step": 58030 + }, + { + "epoch": 10.32, + "learning_rate": 1.5621925925925927e-05, + "loss": 1.4349, + "step": 58035 + }, + { + "epoch": 10.32, + "learning_rate": 1.5618962962962962e-05, + "loss": 1.5199, + "step": 58040 + }, + { + "epoch": 10.32, + "learning_rate": 1.5616e-05, + "loss": 1.4957, + "step": 58045 + }, + { + "epoch": 10.32, + "learning_rate": 1.5613037037037036e-05, + "loss": 1.4374, + "step": 58050 + }, + { + "epoch": 10.32, + "learning_rate": 1.5610074074074075e-05, + "loss": 1.5204, + "step": 58055 + }, + { + "epoch": 10.32, + "learning_rate": 1.560711111111111e-05, + "loss": 1.55, + "step": 58060 + }, + { + "epoch": 10.32, + "learning_rate": 1.560414814814815e-05, + "loss": 1.4052, + "step": 58065 + }, + { + "epoch": 10.32, + "learning_rate": 1.5601185185185185e-05, + "loss": 1.6104, + "step": 58070 + }, + { + "epoch": 10.32, + "learning_rate": 1.5598222222222224e-05, + "loss": 1.4262, + "step": 58075 + }, + { + "epoch": 10.33, + "learning_rate": 1.559525925925926e-05, + "loss": 1.4609, + "step": 58080 + }, + { + "epoch": 10.33, + "learning_rate": 1.5592296296296298e-05, + "loss": 1.384, + "step": 58085 + }, + { + "epoch": 10.33, + "learning_rate": 1.5589333333333333e-05, + "loss": 1.4125, + "step": 58090 + }, + { + "epoch": 10.33, + "learning_rate": 1.5586370370370372e-05, + "loss": 1.2276, + "step": 58095 + }, + { + "epoch": 10.33, + "learning_rate": 1.5583407407407408e-05, + "loss": 1.5046, + "step": 58100 + }, + { + "epoch": 10.33, + "learning_rate": 1.5580444444444443e-05, + "loss": 1.5841, + "step": 58105 + }, + { + "epoch": 10.33, + "learning_rate": 1.5577481481481482e-05, + "loss": 1.3791, + "step": 58110 + }, + { + "epoch": 10.33, + "learning_rate": 1.5574518518518517e-05, + "loss": 1.382, + "step": 58115 + }, + { + "epoch": 10.33, + "learning_rate": 1.5571555555555556e-05, + "loss": 1.4543, + "step": 58120 + }, + { + "epoch": 10.33, + "learning_rate": 1.556859259259259e-05, + "loss": 1.5362, + "step": 58125 + }, + { + "epoch": 10.33, + "learning_rate": 1.556562962962963e-05, + "loss": 1.5292, + "step": 58130 + }, + { + "epoch": 10.34, + "learning_rate": 1.5562666666666666e-05, + "loss": 1.4859, + "step": 58135 + }, + { + "epoch": 10.34, + "learning_rate": 1.5559703703703705e-05, + "loss": 1.5177, + "step": 58140 + }, + { + "epoch": 10.34, + "learning_rate": 1.555674074074074e-05, + "loss": 1.4574, + "step": 58145 + }, + { + "epoch": 10.34, + "learning_rate": 1.555377777777778e-05, + "loss": 1.4358, + "step": 58150 + }, + { + "epoch": 10.34, + "learning_rate": 1.5550814814814814e-05, + "loss": 1.515, + "step": 58155 + }, + { + "epoch": 10.34, + "learning_rate": 1.5547851851851853e-05, + "loss": 1.4314, + "step": 58160 + }, + { + "epoch": 10.34, + "learning_rate": 1.554488888888889e-05, + "loss": 1.4331, + "step": 58165 + }, + { + "epoch": 10.34, + "learning_rate": 1.5541925925925927e-05, + "loss": 1.4103, + "step": 58170 + }, + { + "epoch": 10.34, + "learning_rate": 1.5538962962962963e-05, + "loss": 1.5063, + "step": 58175 + }, + { + "epoch": 10.34, + "learning_rate": 1.5536e-05, + "loss": 1.4246, + "step": 58180 + }, + { + "epoch": 10.34, + "learning_rate": 1.5533037037037037e-05, + "loss": 1.4631, + "step": 58185 + }, + { + "epoch": 10.34, + "learning_rate": 1.5530074074074076e-05, + "loss": 1.4796, + "step": 58190 + }, + { + "epoch": 10.35, + "learning_rate": 1.552711111111111e-05, + "loss": 1.4138, + "step": 58195 + }, + { + "epoch": 10.35, + "learning_rate": 1.552414814814815e-05, + "loss": 1.4656, + "step": 58200 + }, + { + "epoch": 10.35, + "learning_rate": 1.5521185185185186e-05, + "loss": 1.4755, + "step": 58205 + }, + { + "epoch": 10.35, + "learning_rate": 1.5518222222222224e-05, + "loss": 1.5233, + "step": 58210 + }, + { + "epoch": 10.35, + "learning_rate": 1.551525925925926e-05, + "loss": 1.4286, + "step": 58215 + }, + { + "epoch": 10.35, + "learning_rate": 1.5512296296296295e-05, + "loss": 1.5411, + "step": 58220 + }, + { + "epoch": 10.35, + "learning_rate": 1.5509333333333334e-05, + "loss": 1.3792, + "step": 58225 + }, + { + "epoch": 10.35, + "learning_rate": 1.550637037037037e-05, + "loss": 1.4245, + "step": 58230 + }, + { + "epoch": 10.35, + "learning_rate": 1.550340740740741e-05, + "loss": 1.4686, + "step": 58235 + }, + { + "epoch": 10.35, + "learning_rate": 1.5500444444444444e-05, + "loss": 1.4294, + "step": 58240 + }, + { + "epoch": 10.35, + "learning_rate": 1.5497481481481483e-05, + "loss": 1.4766, + "step": 58245 + }, + { + "epoch": 10.36, + "learning_rate": 1.5494518518518518e-05, + "loss": 1.4996, + "step": 58250 + }, + { + "epoch": 10.36, + "learning_rate": 1.5491555555555557e-05, + "loss": 1.3917, + "step": 58255 + }, + { + "epoch": 10.36, + "learning_rate": 1.5488592592592592e-05, + "loss": 1.4286, + "step": 58260 + }, + { + "epoch": 10.36, + "learning_rate": 1.548562962962963e-05, + "loss": 1.4053, + "step": 58265 + }, + { + "epoch": 10.36, + "learning_rate": 1.5482666666666667e-05, + "loss": 1.441, + "step": 58270 + }, + { + "epoch": 10.36, + "learning_rate": 1.5479703703703705e-05, + "loss": 1.4097, + "step": 58275 + }, + { + "epoch": 10.36, + "learning_rate": 1.547674074074074e-05, + "loss": 1.4893, + "step": 58280 + }, + { + "epoch": 10.36, + "learning_rate": 1.547377777777778e-05, + "loss": 1.5554, + "step": 58285 + }, + { + "epoch": 10.36, + "learning_rate": 1.5470814814814815e-05, + "loss": 1.3852, + "step": 58290 + }, + { + "epoch": 10.36, + "learning_rate": 1.5467851851851854e-05, + "loss": 1.5069, + "step": 58295 + }, + { + "epoch": 10.36, + "learning_rate": 1.546488888888889e-05, + "loss": 1.432, + "step": 58300 + }, + { + "epoch": 10.37, + "learning_rate": 1.5461925925925928e-05, + "loss": 1.555, + "step": 58305 + }, + { + "epoch": 10.37, + "learning_rate": 1.5458962962962964e-05, + "loss": 1.472, + "step": 58310 + }, + { + "epoch": 10.37, + "learning_rate": 1.5456000000000002e-05, + "loss": 1.3646, + "step": 58315 + }, + { + "epoch": 10.37, + "learning_rate": 1.5453037037037038e-05, + "loss": 1.4823, + "step": 58320 + }, + { + "epoch": 10.37, + "learning_rate": 1.5450074074074077e-05, + "loss": 1.5113, + "step": 58325 + }, + { + "epoch": 10.37, + "learning_rate": 1.5447111111111112e-05, + "loss": 1.5456, + "step": 58330 + }, + { + "epoch": 10.37, + "learning_rate": 1.5444148148148148e-05, + "loss": 1.5348, + "step": 58335 + }, + { + "epoch": 10.37, + "learning_rate": 1.5441185185185186e-05, + "loss": 1.4874, + "step": 58340 + }, + { + "epoch": 10.37, + "learning_rate": 1.5438222222222222e-05, + "loss": 1.4757, + "step": 58345 + }, + { + "epoch": 10.37, + "learning_rate": 1.543525925925926e-05, + "loss": 1.4871, + "step": 58350 + }, + { + "epoch": 10.37, + "learning_rate": 1.5432296296296296e-05, + "loss": 1.4621, + "step": 58355 + }, + { + "epoch": 10.38, + "learning_rate": 1.5429333333333335e-05, + "loss": 1.4622, + "step": 58360 + }, + { + "epoch": 10.38, + "learning_rate": 1.542637037037037e-05, + "loss": 1.4975, + "step": 58365 + }, + { + "epoch": 10.38, + "learning_rate": 1.542340740740741e-05, + "loss": 1.4183, + "step": 58370 + }, + { + "epoch": 10.38, + "learning_rate": 1.5420444444444445e-05, + "loss": 1.4316, + "step": 58375 + }, + { + "epoch": 10.38, + "learning_rate": 1.5417481481481483e-05, + "loss": 1.5553, + "step": 58380 + }, + { + "epoch": 10.38, + "learning_rate": 1.541451851851852e-05, + "loss": 1.4925, + "step": 58385 + }, + { + "epoch": 10.38, + "learning_rate": 1.5411555555555558e-05, + "loss": 1.4499, + "step": 58390 + }, + { + "epoch": 10.38, + "learning_rate": 1.5408592592592593e-05, + "loss": 1.5471, + "step": 58395 + }, + { + "epoch": 10.38, + "learning_rate": 1.5405629629629632e-05, + "loss": 1.5105, + "step": 58400 + }, + { + "epoch": 10.38, + "learning_rate": 1.5402666666666667e-05, + "loss": 1.4681, + "step": 58405 + }, + { + "epoch": 10.38, + "learning_rate": 1.5399703703703706e-05, + "loss": 1.5636, + "step": 58410 + }, + { + "epoch": 10.38, + "learning_rate": 1.539674074074074e-05, + "loss": 1.4542, + "step": 58415 + }, + { + "epoch": 10.39, + "learning_rate": 1.539377777777778e-05, + "loss": 1.4576, + "step": 58420 + }, + { + "epoch": 10.39, + "learning_rate": 1.5390814814814816e-05, + "loss": 1.51, + "step": 58425 + }, + { + "epoch": 10.39, + "learning_rate": 1.5387851851851855e-05, + "loss": 1.5478, + "step": 58430 + }, + { + "epoch": 10.39, + "learning_rate": 1.538488888888889e-05, + "loss": 1.4927, + "step": 58435 + }, + { + "epoch": 10.39, + "learning_rate": 1.538192592592593e-05, + "loss": 1.5992, + "step": 58440 + }, + { + "epoch": 10.39, + "learning_rate": 1.5378962962962964e-05, + "loss": 1.4584, + "step": 58445 + }, + { + "epoch": 10.39, + "learning_rate": 1.5376e-05, + "loss": 1.548, + "step": 58450 + }, + { + "epoch": 10.39, + "learning_rate": 1.537303703703704e-05, + "loss": 1.4723, + "step": 58455 + }, + { + "epoch": 10.39, + "learning_rate": 1.5370074074074074e-05, + "loss": 1.5418, + "step": 58460 + }, + { + "epoch": 10.39, + "learning_rate": 1.5367111111111113e-05, + "loss": 1.4756, + "step": 58465 + }, + { + "epoch": 10.39, + "learning_rate": 1.536414814814815e-05, + "loss": 1.3878, + "step": 58470 + }, + { + "epoch": 10.4, + "learning_rate": 1.5361185185185187e-05, + "loss": 1.4383, + "step": 58475 + }, + { + "epoch": 10.4, + "learning_rate": 1.5358222222222223e-05, + "loss": 1.4408, + "step": 58480 + }, + { + "epoch": 10.4, + "learning_rate": 1.535525925925926e-05, + "loss": 1.5634, + "step": 58485 + }, + { + "epoch": 10.4, + "learning_rate": 1.5352296296296297e-05, + "loss": 1.4814, + "step": 58490 + }, + { + "epoch": 10.4, + "learning_rate": 1.5349333333333336e-05, + "loss": 1.5613, + "step": 58495 + }, + { + "epoch": 10.4, + "learning_rate": 1.534637037037037e-05, + "loss": 1.5315, + "step": 58500 + }, + { + "epoch": 10.4, + "learning_rate": 1.534340740740741e-05, + "loss": 1.4643, + "step": 58505 + }, + { + "epoch": 10.4, + "learning_rate": 1.5340444444444445e-05, + "loss": 1.4133, + "step": 58510 + }, + { + "epoch": 10.4, + "learning_rate": 1.5337481481481484e-05, + "loss": 1.4707, + "step": 58515 + }, + { + "epoch": 10.4, + "learning_rate": 1.533451851851852e-05, + "loss": 1.5165, + "step": 58520 + }, + { + "epoch": 10.4, + "learning_rate": 1.533155555555556e-05, + "loss": 1.4408, + "step": 58525 + }, + { + "epoch": 10.41, + "learning_rate": 1.5328592592592594e-05, + "loss": 1.4326, + "step": 58530 + }, + { + "epoch": 10.41, + "learning_rate": 1.5325629629629633e-05, + "loss": 1.5168, + "step": 58535 + }, + { + "epoch": 10.41, + "learning_rate": 1.5322666666666668e-05, + "loss": 1.4176, + "step": 58540 + }, + { + "epoch": 10.41, + "learning_rate": 1.5319703703703707e-05, + "loss": 1.4434, + "step": 58545 + }, + { + "epoch": 10.41, + "learning_rate": 1.5316740740740742e-05, + "loss": 1.4942, + "step": 58550 + }, + { + "epoch": 10.41, + "learning_rate": 1.531377777777778e-05, + "loss": 1.476, + "step": 58555 + }, + { + "epoch": 10.41, + "learning_rate": 1.5310814814814817e-05, + "loss": 1.4115, + "step": 58560 + }, + { + "epoch": 10.41, + "learning_rate": 1.5307851851851852e-05, + "loss": 1.492, + "step": 58565 + }, + { + "epoch": 10.41, + "learning_rate": 1.530488888888889e-05, + "loss": 1.5896, + "step": 58570 + }, + { + "epoch": 10.41, + "learning_rate": 1.5301925925925926e-05, + "loss": 1.4734, + "step": 58575 + }, + { + "epoch": 10.41, + "learning_rate": 1.5298962962962965e-05, + "loss": 1.545, + "step": 58580 + }, + { + "epoch": 10.42, + "learning_rate": 1.5296e-05, + "loss": 1.4105, + "step": 58585 + }, + { + "epoch": 10.42, + "learning_rate": 1.529303703703704e-05, + "loss": 1.415, + "step": 58590 + }, + { + "epoch": 10.42, + "learning_rate": 1.5290074074074075e-05, + "loss": 1.3821, + "step": 58595 + }, + { + "epoch": 10.42, + "learning_rate": 1.5287111111111114e-05, + "loss": 1.4481, + "step": 58600 + }, + { + "epoch": 10.42, + "learning_rate": 1.528414814814815e-05, + "loss": 1.5852, + "step": 58605 + }, + { + "epoch": 10.42, + "learning_rate": 1.5281185185185188e-05, + "loss": 1.4181, + "step": 58610 + }, + { + "epoch": 10.42, + "learning_rate": 1.5278222222222223e-05, + "loss": 1.6013, + "step": 58615 + }, + { + "epoch": 10.42, + "learning_rate": 1.5275259259259262e-05, + "loss": 1.6393, + "step": 58620 + }, + { + "epoch": 10.42, + "learning_rate": 1.5272296296296294e-05, + "loss": 1.5386, + "step": 58625 + }, + { + "epoch": 10.42, + "learning_rate": 1.5269333333333333e-05, + "loss": 1.5359, + "step": 58630 + }, + { + "epoch": 10.42, + "learning_rate": 1.526637037037037e-05, + "loss": 1.4954, + "step": 58635 + }, + { + "epoch": 10.42, + "learning_rate": 1.5263407407407407e-05, + "loss": 1.4417, + "step": 58640 + }, + { + "epoch": 10.43, + "learning_rate": 1.5260444444444443e-05, + "loss": 1.5191, + "step": 58645 + }, + { + "epoch": 10.43, + "learning_rate": 1.5257481481481482e-05, + "loss": 1.4004, + "step": 58650 + }, + { + "epoch": 10.43, + "learning_rate": 1.5254518518518519e-05, + "loss": 1.5465, + "step": 58655 + }, + { + "epoch": 10.43, + "learning_rate": 1.5251555555555556e-05, + "loss": 1.486, + "step": 58660 + }, + { + "epoch": 10.43, + "learning_rate": 1.5248592592592591e-05, + "loss": 1.5559, + "step": 58665 + }, + { + "epoch": 10.43, + "learning_rate": 1.5245629629629628e-05, + "loss": 1.5072, + "step": 58670 + }, + { + "epoch": 10.43, + "learning_rate": 1.5242666666666666e-05, + "loss": 1.5274, + "step": 58675 + }, + { + "epoch": 10.43, + "learning_rate": 1.5239703703703703e-05, + "loss": 1.4976, + "step": 58680 + }, + { + "epoch": 10.43, + "learning_rate": 1.523674074074074e-05, + "loss": 1.4645, + "step": 58685 + }, + { + "epoch": 10.43, + "learning_rate": 1.5233777777777777e-05, + "loss": 1.5053, + "step": 58690 + }, + { + "epoch": 10.43, + "learning_rate": 1.5230814814814814e-05, + "loss": 1.4699, + "step": 58695 + }, + { + "epoch": 10.44, + "learning_rate": 1.5227851851851851e-05, + "loss": 1.4412, + "step": 58700 + }, + { + "epoch": 10.44, + "learning_rate": 1.5224888888888888e-05, + "loss": 1.4507, + "step": 58705 + }, + { + "epoch": 10.44, + "learning_rate": 1.5221925925925925e-05, + "loss": 1.3676, + "step": 58710 + }, + { + "epoch": 10.44, + "learning_rate": 1.5218962962962963e-05, + "loss": 1.5188, + "step": 58715 + }, + { + "epoch": 10.44, + "learning_rate": 1.5216e-05, + "loss": 1.401, + "step": 58720 + }, + { + "epoch": 10.44, + "learning_rate": 1.5213037037037037e-05, + "loss": 1.556, + "step": 58725 + }, + { + "epoch": 10.44, + "learning_rate": 1.5210074074074074e-05, + "loss": 1.5186, + "step": 58730 + }, + { + "epoch": 10.44, + "learning_rate": 1.5207111111111111e-05, + "loss": 1.5115, + "step": 58735 + }, + { + "epoch": 10.44, + "learning_rate": 1.5204148148148148e-05, + "loss": 1.4199, + "step": 58740 + }, + { + "epoch": 10.44, + "learning_rate": 1.5201185185185185e-05, + "loss": 1.3656, + "step": 58745 + }, + { + "epoch": 10.44, + "learning_rate": 1.5198222222222222e-05, + "loss": 1.6178, + "step": 58750 + }, + { + "epoch": 10.45, + "learning_rate": 1.519525925925926e-05, + "loss": 1.6593, + "step": 58755 + }, + { + "epoch": 10.45, + "learning_rate": 1.5192296296296297e-05, + "loss": 1.5589, + "step": 58760 + }, + { + "epoch": 10.45, + "learning_rate": 1.5189333333333334e-05, + "loss": 1.4319, + "step": 58765 + }, + { + "epoch": 10.45, + "learning_rate": 1.5186370370370371e-05, + "loss": 1.5043, + "step": 58770 + }, + { + "epoch": 10.45, + "learning_rate": 1.5183407407407408e-05, + "loss": 1.4125, + "step": 58775 + }, + { + "epoch": 10.45, + "learning_rate": 1.5180444444444444e-05, + "loss": 1.6009, + "step": 58780 + }, + { + "epoch": 10.45, + "learning_rate": 1.517748148148148e-05, + "loss": 1.5148, + "step": 58785 + }, + { + "epoch": 10.45, + "learning_rate": 1.5174518518518518e-05, + "loss": 1.3878, + "step": 58790 + }, + { + "epoch": 10.45, + "learning_rate": 1.5171555555555555e-05, + "loss": 1.3321, + "step": 58795 + }, + { + "epoch": 10.45, + "learning_rate": 1.5168592592592592e-05, + "loss": 1.4368, + "step": 58800 + }, + { + "epoch": 10.45, + "learning_rate": 1.5165629629629629e-05, + "loss": 1.4802, + "step": 58805 + }, + { + "epoch": 10.46, + "learning_rate": 1.5162666666666666e-05, + "loss": 1.4354, + "step": 58810 + }, + { + "epoch": 10.46, + "learning_rate": 1.5159703703703703e-05, + "loss": 1.4217, + "step": 58815 + }, + { + "epoch": 10.46, + "learning_rate": 1.515674074074074e-05, + "loss": 1.604, + "step": 58820 + }, + { + "epoch": 10.46, + "learning_rate": 1.5153777777777778e-05, + "loss": 1.5944, + "step": 58825 + }, + { + "epoch": 10.46, + "learning_rate": 1.5150814814814815e-05, + "loss": 1.5013, + "step": 58830 + }, + { + "epoch": 10.46, + "learning_rate": 1.5147851851851852e-05, + "loss": 1.5318, + "step": 58835 + }, + { + "epoch": 10.46, + "learning_rate": 1.5144888888888889e-05, + "loss": 1.4149, + "step": 58840 + }, + { + "epoch": 10.46, + "learning_rate": 1.5141925925925926e-05, + "loss": 1.5226, + "step": 58845 + }, + { + "epoch": 10.46, + "learning_rate": 1.5138962962962963e-05, + "loss": 1.4291, + "step": 58850 + }, + { + "epoch": 10.46, + "learning_rate": 1.5136e-05, + "loss": 1.3898, + "step": 58855 + }, + { + "epoch": 10.46, + "learning_rate": 1.5133037037037038e-05, + "loss": 1.5512, + "step": 58860 + }, + { + "epoch": 10.46, + "learning_rate": 1.5130074074074075e-05, + "loss": 1.5051, + "step": 58865 + }, + { + "epoch": 10.47, + "learning_rate": 1.5127111111111112e-05, + "loss": 1.5904, + "step": 58870 + }, + { + "epoch": 10.47, + "learning_rate": 1.5124148148148149e-05, + "loss": 1.5698, + "step": 58875 + }, + { + "epoch": 10.47, + "learning_rate": 1.5121185185185186e-05, + "loss": 1.4048, + "step": 58880 + }, + { + "epoch": 10.47, + "learning_rate": 1.5118222222222223e-05, + "loss": 1.5495, + "step": 58885 + }, + { + "epoch": 10.47, + "learning_rate": 1.511525925925926e-05, + "loss": 1.4548, + "step": 58890 + }, + { + "epoch": 10.47, + "learning_rate": 1.5112296296296297e-05, + "loss": 1.4213, + "step": 58895 + }, + { + "epoch": 10.47, + "learning_rate": 1.5109333333333333e-05, + "loss": 1.4604, + "step": 58900 + }, + { + "epoch": 10.47, + "learning_rate": 1.510637037037037e-05, + "loss": 1.5712, + "step": 58905 + }, + { + "epoch": 10.47, + "learning_rate": 1.5103407407407407e-05, + "loss": 1.3939, + "step": 58910 + }, + { + "epoch": 10.47, + "learning_rate": 1.5100444444444444e-05, + "loss": 1.4122, + "step": 58915 + }, + { + "epoch": 10.47, + "learning_rate": 1.5097481481481481e-05, + "loss": 1.4492, + "step": 58920 + }, + { + "epoch": 10.48, + "learning_rate": 1.5094518518518519e-05, + "loss": 1.5293, + "step": 58925 + }, + { + "epoch": 10.48, + "learning_rate": 1.5091555555555556e-05, + "loss": 1.5234, + "step": 58930 + }, + { + "epoch": 10.48, + "learning_rate": 1.5088592592592593e-05, + "loss": 1.4366, + "step": 58935 + }, + { + "epoch": 10.48, + "learning_rate": 1.508562962962963e-05, + "loss": 1.4986, + "step": 58940 + }, + { + "epoch": 10.48, + "learning_rate": 1.5082666666666667e-05, + "loss": 1.4934, + "step": 58945 + }, + { + "epoch": 10.48, + "learning_rate": 1.5079703703703704e-05, + "loss": 1.4953, + "step": 58950 + }, + { + "epoch": 10.48, + "learning_rate": 1.5076740740740741e-05, + "loss": 1.4866, + "step": 58955 + }, + { + "epoch": 10.48, + "learning_rate": 1.5073777777777778e-05, + "loss": 1.5742, + "step": 58960 + }, + { + "epoch": 10.48, + "learning_rate": 1.5070814814814816e-05, + "loss": 1.4206, + "step": 58965 + }, + { + "epoch": 10.48, + "learning_rate": 1.5067851851851853e-05, + "loss": 1.401, + "step": 58970 + }, + { + "epoch": 10.48, + "learning_rate": 1.506488888888889e-05, + "loss": 1.344, + "step": 58975 + }, + { + "epoch": 10.49, + "learning_rate": 1.5061925925925927e-05, + "loss": 1.4353, + "step": 58980 + }, + { + "epoch": 10.49, + "learning_rate": 1.5058962962962964e-05, + "loss": 1.5807, + "step": 58985 + }, + { + "epoch": 10.49, + "learning_rate": 1.5056000000000001e-05, + "loss": 1.3981, + "step": 58990 + }, + { + "epoch": 10.49, + "learning_rate": 1.5053037037037038e-05, + "loss": 1.3942, + "step": 58995 + }, + { + "epoch": 10.49, + "learning_rate": 1.5050074074074075e-05, + "loss": 1.5138, + "step": 59000 + }, + { + "epoch": 10.49, + "learning_rate": 1.5047111111111113e-05, + "loss": 1.5463, + "step": 59005 + }, + { + "epoch": 10.49, + "learning_rate": 1.504414814814815e-05, + "loss": 1.5116, + "step": 59010 + }, + { + "epoch": 10.49, + "learning_rate": 1.5041185185185185e-05, + "loss": 1.4823, + "step": 59015 + }, + { + "epoch": 10.49, + "learning_rate": 1.5038222222222222e-05, + "loss": 1.4437, + "step": 59020 + }, + { + "epoch": 10.49, + "learning_rate": 1.503525925925926e-05, + "loss": 1.5179, + "step": 59025 + }, + { + "epoch": 10.49, + "learning_rate": 1.5032296296296297e-05, + "loss": 1.4892, + "step": 59030 + }, + { + "epoch": 10.5, + "learning_rate": 1.5029333333333334e-05, + "loss": 1.4386, + "step": 59035 + }, + { + "epoch": 10.5, + "learning_rate": 1.502637037037037e-05, + "loss": 1.5707, + "step": 59040 + }, + { + "epoch": 10.5, + "learning_rate": 1.5023407407407408e-05, + "loss": 1.3956, + "step": 59045 + }, + { + "epoch": 10.5, + "learning_rate": 1.5020444444444445e-05, + "loss": 1.6347, + "step": 59050 + }, + { + "epoch": 10.5, + "learning_rate": 1.5017481481481482e-05, + "loss": 1.5217, + "step": 59055 + }, + { + "epoch": 10.5, + "learning_rate": 1.501451851851852e-05, + "loss": 1.5199, + "step": 59060 + }, + { + "epoch": 10.5, + "learning_rate": 1.5011555555555556e-05, + "loss": 1.373, + "step": 59065 + }, + { + "epoch": 10.5, + "learning_rate": 1.5008592592592594e-05, + "loss": 1.5163, + "step": 59070 + }, + { + "epoch": 10.5, + "learning_rate": 1.500562962962963e-05, + "loss": 1.5799, + "step": 59075 + }, + { + "epoch": 10.5, + "learning_rate": 1.5002666666666668e-05, + "loss": 1.5092, + "step": 59080 + }, + { + "epoch": 10.5, + "learning_rate": 1.4999703703703705e-05, + "loss": 1.4816, + "step": 59085 + }, + { + "epoch": 10.5, + "learning_rate": 1.4996740740740742e-05, + "loss": 1.4873, + "step": 59090 + }, + { + "epoch": 10.51, + "learning_rate": 1.499377777777778e-05, + "loss": 1.4824, + "step": 59095 + }, + { + "epoch": 10.51, + "learning_rate": 1.4990814814814816e-05, + "loss": 1.4378, + "step": 59100 + }, + { + "epoch": 10.51, + "learning_rate": 1.4987851851851853e-05, + "loss": 1.4483, + "step": 59105 + }, + { + "epoch": 10.51, + "learning_rate": 1.498488888888889e-05, + "loss": 1.5145, + "step": 59110 + }, + { + "epoch": 10.51, + "learning_rate": 1.4981925925925928e-05, + "loss": 1.5637, + "step": 59115 + }, + { + "epoch": 10.51, + "learning_rate": 1.4978962962962965e-05, + "loss": 1.5308, + "step": 59120 + }, + { + "epoch": 10.51, + "learning_rate": 1.4976000000000002e-05, + "loss": 1.521, + "step": 59125 + }, + { + "epoch": 10.51, + "learning_rate": 1.4973037037037037e-05, + "loss": 1.3467, + "step": 59130 + }, + { + "epoch": 10.51, + "learning_rate": 1.4970074074074075e-05, + "loss": 1.4773, + "step": 59135 + }, + { + "epoch": 10.51, + "learning_rate": 1.4967111111111112e-05, + "loss": 1.4491, + "step": 59140 + }, + { + "epoch": 10.51, + "learning_rate": 1.4964148148148149e-05, + "loss": 1.4323, + "step": 59145 + }, + { + "epoch": 10.52, + "learning_rate": 1.4961185185185186e-05, + "loss": 1.3991, + "step": 59150 + }, + { + "epoch": 10.52, + "learning_rate": 1.4958222222222223e-05, + "loss": 1.5583, + "step": 59155 + }, + { + "epoch": 10.52, + "learning_rate": 1.495525925925926e-05, + "loss": 1.444, + "step": 59160 + }, + { + "epoch": 10.52, + "learning_rate": 1.4952296296296297e-05, + "loss": 1.4432, + "step": 59165 + }, + { + "epoch": 10.52, + "learning_rate": 1.4949333333333334e-05, + "loss": 1.3875, + "step": 59170 + }, + { + "epoch": 10.52, + "learning_rate": 1.4946370370370372e-05, + "loss": 1.4553, + "step": 59175 + }, + { + "epoch": 10.52, + "learning_rate": 1.4943407407407409e-05, + "loss": 1.498, + "step": 59180 + }, + { + "epoch": 10.52, + "learning_rate": 1.4940444444444446e-05, + "loss": 1.3265, + "step": 59185 + }, + { + "epoch": 10.52, + "learning_rate": 1.4937481481481483e-05, + "loss": 1.5591, + "step": 59190 + }, + { + "epoch": 10.52, + "learning_rate": 1.493451851851852e-05, + "loss": 1.3483, + "step": 59195 + }, + { + "epoch": 10.52, + "learning_rate": 1.4931555555555557e-05, + "loss": 1.5502, + "step": 59200 + }, + { + "epoch": 10.53, + "learning_rate": 1.4928592592592594e-05, + "loss": 1.4743, + "step": 59205 + }, + { + "epoch": 10.53, + "learning_rate": 1.4925629629629631e-05, + "loss": 1.4783, + "step": 59210 + }, + { + "epoch": 10.53, + "learning_rate": 1.4922666666666669e-05, + "loss": 1.439, + "step": 59215 + }, + { + "epoch": 10.53, + "learning_rate": 1.4919703703703706e-05, + "loss": 1.3919, + "step": 59220 + }, + { + "epoch": 10.53, + "learning_rate": 1.4916740740740743e-05, + "loss": 1.5234, + "step": 59225 + }, + { + "epoch": 10.53, + "learning_rate": 1.491377777777778e-05, + "loss": 1.4989, + "step": 59230 + }, + { + "epoch": 10.53, + "learning_rate": 1.4910814814814817e-05, + "loss": 1.4481, + "step": 59235 + }, + { + "epoch": 10.53, + "learning_rate": 1.4907851851851854e-05, + "loss": 1.5067, + "step": 59240 + }, + { + "epoch": 10.53, + "learning_rate": 1.490488888888889e-05, + "loss": 1.5316, + "step": 59245 + }, + { + "epoch": 10.53, + "learning_rate": 1.4901925925925927e-05, + "loss": 1.5063, + "step": 59250 + }, + { + "epoch": 10.53, + "learning_rate": 1.4898962962962964e-05, + "loss": 1.4686, + "step": 59255 + }, + { + "epoch": 10.54, + "learning_rate": 1.4896000000000001e-05, + "loss": 1.4907, + "step": 59260 + }, + { + "epoch": 10.54, + "learning_rate": 1.4893037037037038e-05, + "loss": 1.449, + "step": 59265 + }, + { + "epoch": 10.54, + "learning_rate": 1.4890074074074075e-05, + "loss": 1.4979, + "step": 59270 + }, + { + "epoch": 10.54, + "learning_rate": 1.4887111111111112e-05, + "loss": 1.518, + "step": 59275 + }, + { + "epoch": 10.54, + "learning_rate": 1.488414814814815e-05, + "loss": 1.527, + "step": 59280 + }, + { + "epoch": 10.54, + "learning_rate": 1.4881185185185187e-05, + "loss": 1.4579, + "step": 59285 + }, + { + "epoch": 10.54, + "learning_rate": 1.4878222222222224e-05, + "loss": 1.3996, + "step": 59290 + }, + { + "epoch": 10.54, + "learning_rate": 1.4875259259259261e-05, + "loss": 1.4874, + "step": 59295 + }, + { + "epoch": 10.54, + "learning_rate": 1.4872296296296298e-05, + "loss": 1.5069, + "step": 59300 + }, + { + "epoch": 10.54, + "learning_rate": 1.4869333333333335e-05, + "loss": 1.5242, + "step": 59305 + }, + { + "epoch": 10.54, + "learning_rate": 1.4866370370370372e-05, + "loss": 1.5465, + "step": 59310 + }, + { + "epoch": 10.54, + "learning_rate": 1.486340740740741e-05, + "loss": 1.5863, + "step": 59315 + }, + { + "epoch": 10.55, + "learning_rate": 1.4860444444444447e-05, + "loss": 1.5668, + "step": 59320 + }, + { + "epoch": 10.55, + "learning_rate": 1.4857481481481484e-05, + "loss": 1.5052, + "step": 59325 + }, + { + "epoch": 10.55, + "learning_rate": 1.485451851851852e-05, + "loss": 1.5289, + "step": 59330 + }, + { + "epoch": 10.55, + "learning_rate": 1.4851555555555558e-05, + "loss": 1.5435, + "step": 59335 + }, + { + "epoch": 10.55, + "learning_rate": 1.4848592592592595e-05, + "loss": 1.5255, + "step": 59340 + }, + { + "epoch": 10.55, + "learning_rate": 1.4845629629629632e-05, + "loss": 1.4009, + "step": 59345 + }, + { + "epoch": 10.55, + "learning_rate": 1.484266666666667e-05, + "loss": 1.4632, + "step": 59350 + }, + { + "epoch": 10.55, + "learning_rate": 1.4839703703703706e-05, + "loss": 1.4995, + "step": 59355 + }, + { + "epoch": 10.55, + "learning_rate": 1.4836740740740742e-05, + "loss": 1.5939, + "step": 59360 + }, + { + "epoch": 10.55, + "learning_rate": 1.4833777777777779e-05, + "loss": 1.5393, + "step": 59365 + }, + { + "epoch": 10.55, + "learning_rate": 1.4830814814814816e-05, + "loss": 1.312, + "step": 59370 + }, + { + "epoch": 10.56, + "learning_rate": 1.4827851851851853e-05, + "loss": 1.4138, + "step": 59375 + }, + { + "epoch": 10.56, + "learning_rate": 1.482488888888889e-05, + "loss": 1.5072, + "step": 59380 + }, + { + "epoch": 10.56, + "learning_rate": 1.4821925925925928e-05, + "loss": 1.5742, + "step": 59385 + }, + { + "epoch": 10.56, + "learning_rate": 1.4818962962962965e-05, + "loss": 1.44, + "step": 59390 + }, + { + "epoch": 10.56, + "learning_rate": 1.4816e-05, + "loss": 1.4811, + "step": 59395 + }, + { + "epoch": 10.56, + "learning_rate": 1.4813037037037036e-05, + "loss": 1.3953, + "step": 59400 + }, + { + "epoch": 10.56, + "learning_rate": 1.4810074074074073e-05, + "loss": 1.465, + "step": 59405 + }, + { + "epoch": 10.56, + "learning_rate": 1.480711111111111e-05, + "loss": 1.4889, + "step": 59410 + }, + { + "epoch": 10.56, + "learning_rate": 1.4804148148148147e-05, + "loss": 1.5823, + "step": 59415 + }, + { + "epoch": 10.56, + "learning_rate": 1.4801185185185184e-05, + "loss": 1.4349, + "step": 59420 + }, + { + "epoch": 10.56, + "learning_rate": 1.4798222222222221e-05, + "loss": 1.4311, + "step": 59425 + }, + { + "epoch": 10.57, + "learning_rate": 1.4795259259259258e-05, + "loss": 1.5111, + "step": 59430 + }, + { + "epoch": 10.57, + "learning_rate": 1.4792296296296295e-05, + "loss": 1.4303, + "step": 59435 + }, + { + "epoch": 10.57, + "learning_rate": 1.4789333333333333e-05, + "loss": 1.434, + "step": 59440 + }, + { + "epoch": 10.57, + "learning_rate": 1.478637037037037e-05, + "loss": 1.4622, + "step": 59445 + }, + { + "epoch": 10.57, + "learning_rate": 1.4783407407407407e-05, + "loss": 1.4281, + "step": 59450 + }, + { + "epoch": 10.57, + "learning_rate": 1.4780444444444444e-05, + "loss": 1.5744, + "step": 59455 + }, + { + "epoch": 10.57, + "learning_rate": 1.4777481481481481e-05, + "loss": 1.5579, + "step": 59460 + }, + { + "epoch": 10.57, + "learning_rate": 1.4774518518518518e-05, + "loss": 1.5868, + "step": 59465 + }, + { + "epoch": 10.57, + "learning_rate": 1.4771555555555555e-05, + "loss": 1.5097, + "step": 59470 + }, + { + "epoch": 10.57, + "learning_rate": 1.4768592592592592e-05, + "loss": 1.3369, + "step": 59475 + }, + { + "epoch": 10.57, + "learning_rate": 1.476562962962963e-05, + "loss": 1.4505, + "step": 59480 + }, + { + "epoch": 10.58, + "learning_rate": 1.4762666666666667e-05, + "loss": 1.4633, + "step": 59485 + }, + { + "epoch": 10.58, + "learning_rate": 1.4759703703703704e-05, + "loss": 1.5249, + "step": 59490 + }, + { + "epoch": 10.58, + "learning_rate": 1.4756740740740741e-05, + "loss": 1.5538, + "step": 59495 + }, + { + "epoch": 10.58, + "learning_rate": 1.4753777777777778e-05, + "loss": 1.4517, + "step": 59500 + }, + { + "epoch": 10.58, + "learning_rate": 1.4750814814814815e-05, + "loss": 1.5316, + "step": 59505 + }, + { + "epoch": 10.58, + "learning_rate": 1.4747851851851852e-05, + "loss": 1.4518, + "step": 59510 + }, + { + "epoch": 10.58, + "learning_rate": 1.4744888888888888e-05, + "loss": 1.479, + "step": 59515 + }, + { + "epoch": 10.58, + "learning_rate": 1.4741925925925925e-05, + "loss": 1.342, + "step": 59520 + }, + { + "epoch": 10.58, + "learning_rate": 1.4738962962962962e-05, + "loss": 1.4356, + "step": 59525 + }, + { + "epoch": 10.58, + "learning_rate": 1.4736e-05, + "loss": 1.513, + "step": 59530 + }, + { + "epoch": 10.58, + "learning_rate": 1.4733037037037036e-05, + "loss": 1.4033, + "step": 59535 + }, + { + "epoch": 10.58, + "learning_rate": 1.4730074074074073e-05, + "loss": 1.4627, + "step": 59540 + }, + { + "epoch": 10.59, + "learning_rate": 1.472711111111111e-05, + "loss": 1.5144, + "step": 59545 + }, + { + "epoch": 10.59, + "learning_rate": 1.4724148148148148e-05, + "loss": 1.4642, + "step": 59550 + }, + { + "epoch": 10.59, + "learning_rate": 1.4721185185185185e-05, + "loss": 1.6015, + "step": 59555 + }, + { + "epoch": 10.59, + "learning_rate": 1.4718222222222222e-05, + "loss": 1.4545, + "step": 59560 + }, + { + "epoch": 10.59, + "learning_rate": 1.4715259259259259e-05, + "loss": 1.4569, + "step": 59565 + }, + { + "epoch": 10.59, + "learning_rate": 1.4712296296296296e-05, + "loss": 1.5536, + "step": 59570 + }, + { + "epoch": 10.59, + "learning_rate": 1.4709333333333333e-05, + "loss": 1.574, + "step": 59575 + }, + { + "epoch": 10.59, + "learning_rate": 1.470637037037037e-05, + "loss": 1.5928, + "step": 59580 + }, + { + "epoch": 10.59, + "learning_rate": 1.4703407407407408e-05, + "loss": 1.557, + "step": 59585 + }, + { + "epoch": 10.59, + "learning_rate": 1.4700444444444445e-05, + "loss": 1.5245, + "step": 59590 + }, + { + "epoch": 10.59, + "learning_rate": 1.4697481481481482e-05, + "loss": 1.5897, + "step": 59595 + }, + { + "epoch": 10.6, + "learning_rate": 1.4694518518518519e-05, + "loss": 1.4137, + "step": 59600 + }, + { + "epoch": 10.6, + "learning_rate": 1.4691555555555556e-05, + "loss": 1.5963, + "step": 59605 + }, + { + "epoch": 10.6, + "learning_rate": 1.4688592592592593e-05, + "loss": 1.4987, + "step": 59610 + }, + { + "epoch": 10.6, + "learning_rate": 1.468562962962963e-05, + "loss": 1.4535, + "step": 59615 + }, + { + "epoch": 10.6, + "learning_rate": 1.4682666666666667e-05, + "loss": 1.4947, + "step": 59620 + }, + { + "epoch": 10.6, + "learning_rate": 1.4679703703703705e-05, + "loss": 1.5308, + "step": 59625 + }, + { + "epoch": 10.6, + "learning_rate": 1.4676740740740742e-05, + "loss": 1.4693, + "step": 59630 + }, + { + "epoch": 10.6, + "learning_rate": 1.4673777777777777e-05, + "loss": 1.5759, + "step": 59635 + }, + { + "epoch": 10.6, + "learning_rate": 1.4670814814814814e-05, + "loss": 1.489, + "step": 59640 + }, + { + "epoch": 10.6, + "learning_rate": 1.4667851851851851e-05, + "loss": 1.4272, + "step": 59645 + }, + { + "epoch": 10.6, + "learning_rate": 1.4664888888888889e-05, + "loss": 1.4291, + "step": 59650 + }, + { + "epoch": 10.61, + "learning_rate": 1.4661925925925926e-05, + "loss": 1.562, + "step": 59655 + }, + { + "epoch": 10.61, + "learning_rate": 1.4658962962962963e-05, + "loss": 1.4417, + "step": 59660 + }, + { + "epoch": 10.61, + "learning_rate": 1.4656e-05, + "loss": 1.5057, + "step": 59665 + }, + { + "epoch": 10.61, + "learning_rate": 1.4653037037037037e-05, + "loss": 1.5495, + "step": 59670 + }, + { + "epoch": 10.61, + "learning_rate": 1.4650074074074074e-05, + "loss": 1.5777, + "step": 59675 + }, + { + "epoch": 10.61, + "learning_rate": 1.4647111111111111e-05, + "loss": 1.4937, + "step": 59680 + }, + { + "epoch": 10.61, + "learning_rate": 1.4644148148148148e-05, + "loss": 1.5622, + "step": 59685 + }, + { + "epoch": 10.61, + "learning_rate": 1.4641185185185186e-05, + "loss": 1.5449, + "step": 59690 + }, + { + "epoch": 10.61, + "learning_rate": 1.4638222222222223e-05, + "loss": 1.4458, + "step": 59695 + }, + { + "epoch": 10.61, + "learning_rate": 1.463525925925926e-05, + "loss": 1.5572, + "step": 59700 + }, + { + "epoch": 10.61, + "learning_rate": 1.4632296296296297e-05, + "loss": 1.3597, + "step": 59705 + }, + { + "epoch": 10.62, + "learning_rate": 1.4629333333333334e-05, + "loss": 1.5151, + "step": 59710 + }, + { + "epoch": 10.62, + "learning_rate": 1.4626370370370371e-05, + "loss": 1.4862, + "step": 59715 + }, + { + "epoch": 10.62, + "learning_rate": 1.4623407407407408e-05, + "loss": 1.478, + "step": 59720 + }, + { + "epoch": 10.62, + "learning_rate": 1.4620444444444445e-05, + "loss": 1.4345, + "step": 59725 + }, + { + "epoch": 10.62, + "learning_rate": 1.4617481481481483e-05, + "loss": 1.5415, + "step": 59730 + }, + { + "epoch": 10.62, + "learning_rate": 1.461451851851852e-05, + "loss": 1.6176, + "step": 59735 + }, + { + "epoch": 10.62, + "learning_rate": 1.4611555555555557e-05, + "loss": 1.563, + "step": 59740 + }, + { + "epoch": 10.62, + "learning_rate": 1.4608592592592594e-05, + "loss": 1.3982, + "step": 59745 + }, + { + "epoch": 10.62, + "learning_rate": 1.460562962962963e-05, + "loss": 1.4572, + "step": 59750 + }, + { + "epoch": 10.62, + "learning_rate": 1.4602666666666667e-05, + "loss": 1.3848, + "step": 59755 + }, + { + "epoch": 10.62, + "learning_rate": 1.4599703703703704e-05, + "loss": 1.4139, + "step": 59760 + }, + { + "epoch": 10.62, + "learning_rate": 1.459674074074074e-05, + "loss": 1.5477, + "step": 59765 + }, + { + "epoch": 10.63, + "learning_rate": 1.4593777777777778e-05, + "loss": 1.4756, + "step": 59770 + }, + { + "epoch": 10.63, + "learning_rate": 1.4590814814814815e-05, + "loss": 1.5747, + "step": 59775 + }, + { + "epoch": 10.63, + "learning_rate": 1.4587851851851852e-05, + "loss": 1.5008, + "step": 59780 + }, + { + "epoch": 10.63, + "learning_rate": 1.458488888888889e-05, + "loss": 1.5302, + "step": 59785 + }, + { + "epoch": 10.63, + "learning_rate": 1.4581925925925926e-05, + "loss": 1.3975, + "step": 59790 + }, + { + "epoch": 10.63, + "learning_rate": 1.4578962962962964e-05, + "loss": 1.4491, + "step": 59795 + }, + { + "epoch": 10.63, + "learning_rate": 1.4576e-05, + "loss": 1.4996, + "step": 59800 + }, + { + "epoch": 10.63, + "learning_rate": 1.4573037037037038e-05, + "loss": 1.5466, + "step": 59805 + }, + { + "epoch": 10.63, + "learning_rate": 1.4570074074074075e-05, + "loss": 1.6001, + "step": 59810 + }, + { + "epoch": 10.63, + "learning_rate": 1.4567111111111112e-05, + "loss": 1.469, + "step": 59815 + }, + { + "epoch": 10.63, + "learning_rate": 1.456414814814815e-05, + "loss": 1.5242, + "step": 59820 + }, + { + "epoch": 10.64, + "learning_rate": 1.4561185185185186e-05, + "loss": 1.3997, + "step": 59825 + }, + { + "epoch": 10.64, + "learning_rate": 1.4558222222222223e-05, + "loss": 1.3717, + "step": 59830 + }, + { + "epoch": 10.64, + "learning_rate": 1.455525925925926e-05, + "loss": 1.4614, + "step": 59835 + }, + { + "epoch": 10.64, + "learning_rate": 1.4552296296296298e-05, + "loss": 1.4632, + "step": 59840 + }, + { + "epoch": 10.64, + "learning_rate": 1.4549333333333335e-05, + "loss": 1.5575, + "step": 59845 + }, + { + "epoch": 10.64, + "learning_rate": 1.4546370370370372e-05, + "loss": 1.5949, + "step": 59850 + }, + { + "epoch": 10.64, + "learning_rate": 1.4543407407407409e-05, + "loss": 1.5025, + "step": 59855 + }, + { + "epoch": 10.64, + "learning_rate": 1.4540444444444446e-05, + "loss": 1.5178, + "step": 59860 + }, + { + "epoch": 10.64, + "learning_rate": 1.4537481481481482e-05, + "loss": 1.4239, + "step": 59865 + }, + { + "epoch": 10.64, + "learning_rate": 1.4534518518518519e-05, + "loss": 1.4941, + "step": 59870 + }, + { + "epoch": 10.64, + "learning_rate": 1.4531555555555556e-05, + "loss": 1.4488, + "step": 59875 + }, + { + "epoch": 10.65, + "learning_rate": 1.4528592592592593e-05, + "loss": 1.4208, + "step": 59880 + }, + { + "epoch": 10.65, + "learning_rate": 1.452562962962963e-05, + "loss": 1.5486, + "step": 59885 + }, + { + "epoch": 10.65, + "learning_rate": 1.4522666666666667e-05, + "loss": 1.5851, + "step": 59890 + }, + { + "epoch": 10.65, + "learning_rate": 1.4519703703703704e-05, + "loss": 1.4702, + "step": 59895 + }, + { + "epoch": 10.65, + "learning_rate": 1.4516740740740742e-05, + "loss": 1.4877, + "step": 59900 + }, + { + "epoch": 10.65, + "learning_rate": 1.4513777777777779e-05, + "loss": 1.5204, + "step": 59905 + }, + { + "epoch": 10.65, + "learning_rate": 1.4510814814814816e-05, + "loss": 1.5545, + "step": 59910 + }, + { + "epoch": 10.65, + "learning_rate": 1.4507851851851853e-05, + "loss": 1.4911, + "step": 59915 + }, + { + "epoch": 10.65, + "learning_rate": 1.450488888888889e-05, + "loss": 1.4137, + "step": 59920 + }, + { + "epoch": 10.65, + "learning_rate": 1.4501925925925927e-05, + "loss": 1.5622, + "step": 59925 + }, + { + "epoch": 10.65, + "learning_rate": 1.4498962962962964e-05, + "loss": 1.4953, + "step": 59930 + }, + { + "epoch": 10.66, + "learning_rate": 1.4496000000000001e-05, + "loss": 1.5193, + "step": 59935 + }, + { + "epoch": 10.66, + "learning_rate": 1.4493037037037039e-05, + "loss": 1.539, + "step": 59940 + }, + { + "epoch": 10.66, + "learning_rate": 1.4490074074074076e-05, + "loss": 1.4207, + "step": 59945 + }, + { + "epoch": 10.66, + "learning_rate": 1.4487111111111113e-05, + "loss": 1.5879, + "step": 59950 + }, + { + "epoch": 10.66, + "learning_rate": 1.448414814814815e-05, + "loss": 1.4598, + "step": 59955 + }, + { + "epoch": 10.66, + "learning_rate": 1.4481185185185187e-05, + "loss": 1.472, + "step": 59960 + }, + { + "epoch": 10.66, + "learning_rate": 1.4478222222222224e-05, + "loss": 1.5657, + "step": 59965 + }, + { + "epoch": 10.66, + "learning_rate": 1.4475259259259261e-05, + "loss": 1.4782, + "step": 59970 + }, + { + "epoch": 10.66, + "learning_rate": 1.4472296296296299e-05, + "loss": 1.5402, + "step": 59975 + }, + { + "epoch": 10.66, + "learning_rate": 1.4469333333333334e-05, + "loss": 1.544, + "step": 59980 + }, + { + "epoch": 10.66, + "learning_rate": 1.4466370370370371e-05, + "loss": 1.3854, + "step": 59985 + }, + { + "epoch": 10.66, + "learning_rate": 1.4463407407407408e-05, + "loss": 1.4544, + "step": 59990 + }, + { + "epoch": 10.67, + "learning_rate": 1.4460444444444445e-05, + "loss": 1.5513, + "step": 59995 + }, + { + "epoch": 10.67, + "learning_rate": 1.4457481481481482e-05, + "loss": 1.5374, + "step": 60000 + }, + { + "epoch": 10.67, + "eval_loss": 1.3671331405639648, + "eval_rouge2_fmeasure": 0.2064, + "eval_rouge2_precision": 0.241, + "eval_rouge2_recall": 0.1899, + "eval_runtime": 37898.643, + "eval_samples_per_second": 0.132, + "eval_steps_per_second": 0.066, + "step": 60000 + }, + { + "epoch": 10.67, + "learning_rate": 1.445451851851852e-05, + "loss": 1.5456, + "step": 60005 + }, + { + "epoch": 10.67, + "learning_rate": 1.4451555555555557e-05, + "loss": 1.5704, + "step": 60010 + }, + { + "epoch": 10.67, + "learning_rate": 1.4448592592592594e-05, + "loss": 1.4521, + "step": 60015 + }, + { + "epoch": 10.67, + "learning_rate": 1.4445629629629631e-05, + "loss": 1.4357, + "step": 60020 + }, + { + "epoch": 10.67, + "learning_rate": 1.4442666666666668e-05, + "loss": 1.5525, + "step": 60025 + }, + { + "epoch": 10.67, + "learning_rate": 1.4439703703703705e-05, + "loss": 1.5162, + "step": 60030 + }, + { + "epoch": 10.67, + "learning_rate": 1.4436740740740742e-05, + "loss": 1.4784, + "step": 60035 + }, + { + "epoch": 10.67, + "learning_rate": 1.443377777777778e-05, + "loss": 1.6106, + "step": 60040 + }, + { + "epoch": 10.67, + "learning_rate": 1.4430814814814817e-05, + "loss": 1.4565, + "step": 60045 + }, + { + "epoch": 10.68, + "learning_rate": 1.4427851851851854e-05, + "loss": 1.5233, + "step": 60050 + }, + { + "epoch": 10.68, + "learning_rate": 1.4424888888888891e-05, + "loss": 1.5845, + "step": 60055 + }, + { + "epoch": 10.68, + "learning_rate": 1.4421925925925928e-05, + "loss": 1.4287, + "step": 60060 + }, + { + "epoch": 10.68, + "learning_rate": 1.4418962962962965e-05, + "loss": 1.5147, + "step": 60065 + }, + { + "epoch": 10.68, + "learning_rate": 1.4416000000000002e-05, + "loss": 1.5221, + "step": 60070 + }, + { + "epoch": 10.68, + "learning_rate": 1.441303703703704e-05, + "loss": 1.4068, + "step": 60075 + }, + { + "epoch": 10.68, + "learning_rate": 1.4410074074074077e-05, + "loss": 1.4833, + "step": 60080 + }, + { + "epoch": 10.68, + "learning_rate": 1.4407111111111114e-05, + "loss": 1.6188, + "step": 60085 + }, + { + "epoch": 10.68, + "learning_rate": 1.440414814814815e-05, + "loss": 1.5162, + "step": 60090 + }, + { + "epoch": 10.68, + "learning_rate": 1.4401185185185186e-05, + "loss": 1.332, + "step": 60095 + }, + { + "epoch": 10.68, + "learning_rate": 1.4398222222222223e-05, + "loss": 1.5525, + "step": 60100 + }, + { + "epoch": 10.69, + "learning_rate": 1.439525925925926e-05, + "loss": 1.3926, + "step": 60105 + }, + { + "epoch": 10.69, + "learning_rate": 1.4392296296296298e-05, + "loss": 1.4938, + "step": 60110 + }, + { + "epoch": 10.69, + "learning_rate": 1.4389333333333335e-05, + "loss": 1.4017, + "step": 60115 + }, + { + "epoch": 10.69, + "learning_rate": 1.4386370370370372e-05, + "loss": 1.4176, + "step": 60120 + }, + { + "epoch": 10.69, + "learning_rate": 1.4383407407407409e-05, + "loss": 1.5972, + "step": 60125 + }, + { + "epoch": 10.69, + "learning_rate": 1.4380444444444446e-05, + "loss": 1.4271, + "step": 60130 + }, + { + "epoch": 10.69, + "learning_rate": 1.4377481481481483e-05, + "loss": 1.4331, + "step": 60135 + }, + { + "epoch": 10.69, + "learning_rate": 1.437451851851852e-05, + "loss": 1.5158, + "step": 60140 + }, + { + "epoch": 10.69, + "learning_rate": 1.4371555555555557e-05, + "loss": 1.4903, + "step": 60145 + }, + { + "epoch": 10.69, + "learning_rate": 1.4368592592592595e-05, + "loss": 1.512, + "step": 60150 + }, + { + "epoch": 10.69, + "learning_rate": 1.4365629629629632e-05, + "loss": 1.407, + "step": 60155 + }, + { + "epoch": 10.7, + "learning_rate": 1.4362666666666669e-05, + "loss": 1.5375, + "step": 60160 + }, + { + "epoch": 10.7, + "learning_rate": 1.4359703703703706e-05, + "loss": 1.4681, + "step": 60165 + }, + { + "epoch": 10.7, + "learning_rate": 1.435674074074074e-05, + "loss": 1.4373, + "step": 60170 + }, + { + "epoch": 10.7, + "learning_rate": 1.4353777777777777e-05, + "loss": 1.5577, + "step": 60175 + }, + { + "epoch": 10.7, + "learning_rate": 1.4350814814814814e-05, + "loss": 1.4308, + "step": 60180 + }, + { + "epoch": 10.7, + "learning_rate": 1.4347851851851851e-05, + "loss": 1.456, + "step": 60185 + }, + { + "epoch": 10.7, + "learning_rate": 1.4344888888888888e-05, + "loss": 1.4482, + "step": 60190 + }, + { + "epoch": 10.7, + "learning_rate": 1.4341925925925925e-05, + "loss": 1.5602, + "step": 60195 + }, + { + "epoch": 10.7, + "learning_rate": 1.4338962962962962e-05, + "loss": 1.5001, + "step": 60200 + }, + { + "epoch": 10.7, + "learning_rate": 1.4336e-05, + "loss": 1.4545, + "step": 60205 + }, + { + "epoch": 10.7, + "learning_rate": 1.4333037037037037e-05, + "loss": 1.4525, + "step": 60210 + }, + { + "epoch": 10.7, + "learning_rate": 1.4330074074074074e-05, + "loss": 1.5842, + "step": 60215 + }, + { + "epoch": 10.71, + "learning_rate": 1.4327111111111111e-05, + "loss": 1.4506, + "step": 60220 + }, + { + "epoch": 10.71, + "learning_rate": 1.4324148148148148e-05, + "loss": 1.5572, + "step": 60225 + }, + { + "epoch": 10.71, + "learning_rate": 1.4321185185185185e-05, + "loss": 1.3788, + "step": 60230 + }, + { + "epoch": 10.71, + "learning_rate": 1.4318222222222222e-05, + "loss": 1.5026, + "step": 60235 + }, + { + "epoch": 10.71, + "learning_rate": 1.431525925925926e-05, + "loss": 1.5083, + "step": 60240 + }, + { + "epoch": 10.71, + "learning_rate": 1.4312296296296297e-05, + "loss": 1.4996, + "step": 60245 + }, + { + "epoch": 10.71, + "learning_rate": 1.4309333333333332e-05, + "loss": 1.4328, + "step": 60250 + }, + { + "epoch": 10.71, + "learning_rate": 1.430637037037037e-05, + "loss": 1.4383, + "step": 60255 + }, + { + "epoch": 10.71, + "learning_rate": 1.4303407407407406e-05, + "loss": 1.4603, + "step": 60260 + }, + { + "epoch": 10.71, + "learning_rate": 1.4300444444444443e-05, + "loss": 1.6414, + "step": 60265 + }, + { + "epoch": 10.71, + "learning_rate": 1.429748148148148e-05, + "loss": 1.5872, + "step": 60270 + }, + { + "epoch": 10.72, + "learning_rate": 1.4294518518518518e-05, + "loss": 1.4806, + "step": 60275 + }, + { + "epoch": 10.72, + "learning_rate": 1.4291555555555555e-05, + "loss": 1.3273, + "step": 60280 + }, + { + "epoch": 10.72, + "learning_rate": 1.4288592592592592e-05, + "loss": 1.4482, + "step": 60285 + }, + { + "epoch": 10.72, + "learning_rate": 1.4285629629629629e-05, + "loss": 1.4408, + "step": 60290 + }, + { + "epoch": 10.72, + "learning_rate": 1.4282666666666666e-05, + "loss": 1.3322, + "step": 60295 + }, + { + "epoch": 10.72, + "learning_rate": 1.4279703703703703e-05, + "loss": 1.5857, + "step": 60300 + }, + { + "epoch": 10.72, + "learning_rate": 1.427674074074074e-05, + "loss": 1.3632, + "step": 60305 + }, + { + "epoch": 10.72, + "learning_rate": 1.4273777777777778e-05, + "loss": 1.4963, + "step": 60310 + }, + { + "epoch": 10.72, + "learning_rate": 1.4270814814814815e-05, + "loss": 1.5187, + "step": 60315 + }, + { + "epoch": 10.72, + "learning_rate": 1.4267851851851852e-05, + "loss": 1.5726, + "step": 60320 + }, + { + "epoch": 10.72, + "learning_rate": 1.4264888888888889e-05, + "loss": 1.4157, + "step": 60325 + }, + { + "epoch": 10.73, + "learning_rate": 1.4261925925925926e-05, + "loss": 1.5694, + "step": 60330 + }, + { + "epoch": 10.73, + "learning_rate": 1.4258962962962963e-05, + "loss": 1.4503, + "step": 60335 + }, + { + "epoch": 10.73, + "learning_rate": 1.4256e-05, + "loss": 1.3732, + "step": 60340 + }, + { + "epoch": 10.73, + "learning_rate": 1.4253037037037038e-05, + "loss": 1.571, + "step": 60345 + }, + { + "epoch": 10.73, + "learning_rate": 1.4250074074074075e-05, + "loss": 1.496, + "step": 60350 + }, + { + "epoch": 10.73, + "learning_rate": 1.4247111111111112e-05, + "loss": 1.4736, + "step": 60355 + }, + { + "epoch": 10.73, + "learning_rate": 1.4244148148148149e-05, + "loss": 1.5226, + "step": 60360 + }, + { + "epoch": 10.73, + "learning_rate": 1.4241185185185186e-05, + "loss": 1.4895, + "step": 60365 + }, + { + "epoch": 10.73, + "learning_rate": 1.4238222222222221e-05, + "loss": 1.5708, + "step": 60370 + }, + { + "epoch": 10.73, + "learning_rate": 1.4235259259259259e-05, + "loss": 1.4889, + "step": 60375 + }, + { + "epoch": 10.73, + "learning_rate": 1.4232296296296296e-05, + "loss": 1.5046, + "step": 60380 + }, + { + "epoch": 10.74, + "learning_rate": 1.4229333333333333e-05, + "loss": 1.5581, + "step": 60385 + }, + { + "epoch": 10.74, + "learning_rate": 1.422637037037037e-05, + "loss": 1.3962, + "step": 60390 + }, + { + "epoch": 10.74, + "learning_rate": 1.4223407407407407e-05, + "loss": 1.4948, + "step": 60395 + }, + { + "epoch": 10.74, + "learning_rate": 1.4220444444444444e-05, + "loss": 1.4324, + "step": 60400 + }, + { + "epoch": 10.74, + "learning_rate": 1.4217481481481481e-05, + "loss": 1.5764, + "step": 60405 + }, + { + "epoch": 10.74, + "learning_rate": 1.4214518518518518e-05, + "loss": 1.5, + "step": 60410 + }, + { + "epoch": 10.74, + "learning_rate": 1.4211555555555556e-05, + "loss": 1.4142, + "step": 60415 + }, + { + "epoch": 10.74, + "learning_rate": 1.4208592592592593e-05, + "loss": 1.4421, + "step": 60420 + }, + { + "epoch": 10.74, + "learning_rate": 1.420562962962963e-05, + "loss": 1.4746, + "step": 60425 + }, + { + "epoch": 10.74, + "learning_rate": 1.4202666666666667e-05, + "loss": 1.4443, + "step": 60430 + }, + { + "epoch": 10.74, + "learning_rate": 1.4199703703703704e-05, + "loss": 1.4639, + "step": 60435 + }, + { + "epoch": 10.74, + "learning_rate": 1.4196740740740741e-05, + "loss": 1.5154, + "step": 60440 + }, + { + "epoch": 10.75, + "learning_rate": 1.4193777777777778e-05, + "loss": 1.4681, + "step": 60445 + }, + { + "epoch": 10.75, + "learning_rate": 1.4190814814814816e-05, + "loss": 1.447, + "step": 60450 + }, + { + "epoch": 10.75, + "learning_rate": 1.4187851851851853e-05, + "loss": 1.5505, + "step": 60455 + }, + { + "epoch": 10.75, + "learning_rate": 1.418488888888889e-05, + "loss": 1.5202, + "step": 60460 + }, + { + "epoch": 10.75, + "learning_rate": 1.4181925925925927e-05, + "loss": 1.5247, + "step": 60465 + }, + { + "epoch": 10.75, + "learning_rate": 1.4178962962962964e-05, + "loss": 1.3956, + "step": 60470 + }, + { + "epoch": 10.75, + "learning_rate": 1.4176000000000001e-05, + "loss": 1.5164, + "step": 60475 + }, + { + "epoch": 10.75, + "learning_rate": 1.4173037037037038e-05, + "loss": 1.5039, + "step": 60480 + }, + { + "epoch": 10.75, + "learning_rate": 1.4170074074074074e-05, + "loss": 1.592, + "step": 60485 + }, + { + "epoch": 10.75, + "learning_rate": 1.416711111111111e-05, + "loss": 1.602, + "step": 60490 + }, + { + "epoch": 10.75, + "learning_rate": 1.4164148148148148e-05, + "loss": 1.5388, + "step": 60495 + }, + { + "epoch": 10.76, + "learning_rate": 1.4161185185185185e-05, + "loss": 1.5217, + "step": 60500 + }, + { + "epoch": 10.76, + "learning_rate": 1.4158222222222222e-05, + "loss": 1.6331, + "step": 60505 + }, + { + "epoch": 10.76, + "learning_rate": 1.415525925925926e-05, + "loss": 1.3241, + "step": 60510 + }, + { + "epoch": 10.76, + "learning_rate": 1.4152296296296296e-05, + "loss": 1.3566, + "step": 60515 + }, + { + "epoch": 10.76, + "learning_rate": 1.4149333333333334e-05, + "loss": 1.4882, + "step": 60520 + }, + { + "epoch": 10.76, + "learning_rate": 1.414637037037037e-05, + "loss": 1.4998, + "step": 60525 + }, + { + "epoch": 10.76, + "learning_rate": 1.4143407407407408e-05, + "loss": 1.4572, + "step": 60530 + }, + { + "epoch": 10.76, + "learning_rate": 1.4140444444444445e-05, + "loss": 1.3891, + "step": 60535 + }, + { + "epoch": 10.76, + "learning_rate": 1.4137481481481482e-05, + "loss": 1.402, + "step": 60540 + }, + { + "epoch": 10.76, + "learning_rate": 1.413451851851852e-05, + "loss": 1.4221, + "step": 60545 + }, + { + "epoch": 10.76, + "learning_rate": 1.4131555555555556e-05, + "loss": 1.3952, + "step": 60550 + }, + { + "epoch": 10.77, + "learning_rate": 1.4128592592592594e-05, + "loss": 1.5275, + "step": 60555 + }, + { + "epoch": 10.77, + "learning_rate": 1.412562962962963e-05, + "loss": 1.5083, + "step": 60560 + }, + { + "epoch": 10.77, + "learning_rate": 1.4122666666666668e-05, + "loss": 1.41, + "step": 60565 + }, + { + "epoch": 10.77, + "learning_rate": 1.4119703703703705e-05, + "loss": 1.45, + "step": 60570 + }, + { + "epoch": 10.77, + "learning_rate": 1.4116740740740742e-05, + "loss": 1.482, + "step": 60575 + }, + { + "epoch": 10.77, + "learning_rate": 1.4113777777777779e-05, + "loss": 1.5258, + "step": 60580 + }, + { + "epoch": 10.77, + "learning_rate": 1.4110814814814816e-05, + "loss": 1.4711, + "step": 60585 + }, + { + "epoch": 10.77, + "learning_rate": 1.4107851851851853e-05, + "loss": 1.5044, + "step": 60590 + }, + { + "epoch": 10.77, + "learning_rate": 1.410488888888889e-05, + "loss": 1.4309, + "step": 60595 + }, + { + "epoch": 10.77, + "learning_rate": 1.4101925925925926e-05, + "loss": 1.5276, + "step": 60600 + }, + { + "epoch": 10.77, + "learning_rate": 1.4098962962962963e-05, + "loss": 1.5159, + "step": 60605 + }, + { + "epoch": 10.78, + "learning_rate": 1.4096e-05, + "loss": 1.4738, + "step": 60610 + }, + { + "epoch": 10.78, + "learning_rate": 1.4093037037037037e-05, + "loss": 1.5394, + "step": 60615 + }, + { + "epoch": 10.78, + "learning_rate": 1.4090074074074074e-05, + "loss": 1.4643, + "step": 60620 + }, + { + "epoch": 10.78, + "learning_rate": 1.4087111111111112e-05, + "loss": 1.4516, + "step": 60625 + }, + { + "epoch": 10.78, + "learning_rate": 1.4084148148148149e-05, + "loss": 1.5585, + "step": 60630 + }, + { + "epoch": 10.78, + "learning_rate": 1.4081185185185186e-05, + "loss": 1.3662, + "step": 60635 + }, + { + "epoch": 10.78, + "learning_rate": 1.4078222222222223e-05, + "loss": 1.5321, + "step": 60640 + }, + { + "epoch": 10.78, + "learning_rate": 1.407525925925926e-05, + "loss": 1.3711, + "step": 60645 + }, + { + "epoch": 10.78, + "learning_rate": 1.4072296296296297e-05, + "loss": 1.642, + "step": 60650 + }, + { + "epoch": 10.78, + "learning_rate": 1.4069333333333334e-05, + "loss": 1.3266, + "step": 60655 + }, + { + "epoch": 10.78, + "learning_rate": 1.4066370370370372e-05, + "loss": 1.6406, + "step": 60660 + }, + { + "epoch": 10.78, + "learning_rate": 1.4063407407407409e-05, + "loss": 1.4431, + "step": 60665 + }, + { + "epoch": 10.79, + "learning_rate": 1.4060444444444446e-05, + "loss": 1.4774, + "step": 60670 + }, + { + "epoch": 10.79, + "learning_rate": 1.4057481481481483e-05, + "loss": 1.5469, + "step": 60675 + }, + { + "epoch": 10.79, + "learning_rate": 1.405451851851852e-05, + "loss": 1.378, + "step": 60680 + }, + { + "epoch": 10.79, + "learning_rate": 1.4051555555555557e-05, + "loss": 1.5359, + "step": 60685 + }, + { + "epoch": 10.79, + "learning_rate": 1.4048592592592594e-05, + "loss": 1.475, + "step": 60690 + }, + { + "epoch": 10.79, + "learning_rate": 1.4045629629629631e-05, + "loss": 1.4788, + "step": 60695 + }, + { + "epoch": 10.79, + "learning_rate": 1.4042666666666669e-05, + "loss": 1.5485, + "step": 60700 + }, + { + "epoch": 10.79, + "learning_rate": 1.4039703703703706e-05, + "loss": 1.4579, + "step": 60705 + }, + { + "epoch": 10.79, + "learning_rate": 1.4036740740740743e-05, + "loss": 1.395, + "step": 60710 + }, + { + "epoch": 10.79, + "learning_rate": 1.4033777777777778e-05, + "loss": 1.4205, + "step": 60715 + }, + { + "epoch": 10.79, + "learning_rate": 1.4030814814814815e-05, + "loss": 1.48, + "step": 60720 + }, + { + "epoch": 10.8, + "learning_rate": 1.4027851851851852e-05, + "loss": 1.4741, + "step": 60725 + }, + { + "epoch": 10.8, + "learning_rate": 1.402488888888889e-05, + "loss": 1.4177, + "step": 60730 + }, + { + "epoch": 10.8, + "learning_rate": 1.4021925925925927e-05, + "loss": 1.4869, + "step": 60735 + }, + { + "epoch": 10.8, + "learning_rate": 1.4018962962962964e-05, + "loss": 1.4335, + "step": 60740 + }, + { + "epoch": 10.8, + "learning_rate": 1.4016000000000001e-05, + "loss": 1.4961, + "step": 60745 + }, + { + "epoch": 10.8, + "learning_rate": 1.4013037037037038e-05, + "loss": 1.5344, + "step": 60750 + }, + { + "epoch": 10.8, + "learning_rate": 1.4010074074074075e-05, + "loss": 1.4651, + "step": 60755 + }, + { + "epoch": 10.8, + "learning_rate": 1.4007111111111112e-05, + "loss": 1.5028, + "step": 60760 + }, + { + "epoch": 10.8, + "learning_rate": 1.400414814814815e-05, + "loss": 1.4334, + "step": 60765 + }, + { + "epoch": 10.8, + "learning_rate": 1.4001185185185187e-05, + "loss": 1.4436, + "step": 60770 + }, + { + "epoch": 10.8, + "learning_rate": 1.3998222222222224e-05, + "loss": 1.4604, + "step": 60775 + }, + { + "epoch": 10.81, + "learning_rate": 1.3995259259259261e-05, + "loss": 1.4823, + "step": 60780 + }, + { + "epoch": 10.81, + "learning_rate": 1.3992296296296298e-05, + "loss": 1.5768, + "step": 60785 + }, + { + "epoch": 10.81, + "learning_rate": 1.3989333333333335e-05, + "loss": 1.4512, + "step": 60790 + }, + { + "epoch": 10.81, + "learning_rate": 1.3986370370370372e-05, + "loss": 1.5093, + "step": 60795 + }, + { + "epoch": 10.81, + "learning_rate": 1.398340740740741e-05, + "loss": 1.5598, + "step": 60800 + }, + { + "epoch": 10.81, + "learning_rate": 1.3980444444444447e-05, + "loss": 1.5159, + "step": 60805 + }, + { + "epoch": 10.81, + "learning_rate": 1.3977481481481484e-05, + "loss": 1.5411, + "step": 60810 + }, + { + "epoch": 10.81, + "learning_rate": 1.397451851851852e-05, + "loss": 1.4923, + "step": 60815 + }, + { + "epoch": 10.81, + "learning_rate": 1.3971555555555558e-05, + "loss": 1.6446, + "step": 60820 + }, + { + "epoch": 10.81, + "learning_rate": 1.3968592592592595e-05, + "loss": 1.5503, + "step": 60825 + }, + { + "epoch": 10.81, + "learning_rate": 1.396562962962963e-05, + "loss": 1.3748, + "step": 60830 + }, + { + "epoch": 10.82, + "learning_rate": 1.3962666666666668e-05, + "loss": 1.4955, + "step": 60835 + }, + { + "epoch": 10.82, + "learning_rate": 1.3959703703703705e-05, + "loss": 1.506, + "step": 60840 + }, + { + "epoch": 10.82, + "learning_rate": 1.3956740740740742e-05, + "loss": 1.4908, + "step": 60845 + }, + { + "epoch": 10.82, + "learning_rate": 1.3953777777777779e-05, + "loss": 1.536, + "step": 60850 + }, + { + "epoch": 10.82, + "learning_rate": 1.3950814814814816e-05, + "loss": 1.5315, + "step": 60855 + }, + { + "epoch": 10.82, + "learning_rate": 1.3947851851851853e-05, + "loss": 1.4839, + "step": 60860 + }, + { + "epoch": 10.82, + "learning_rate": 1.394488888888889e-05, + "loss": 1.5444, + "step": 60865 + }, + { + "epoch": 10.82, + "learning_rate": 1.3941925925925928e-05, + "loss": 1.5274, + "step": 60870 + }, + { + "epoch": 10.82, + "learning_rate": 1.3938962962962965e-05, + "loss": 1.4289, + "step": 60875 + }, + { + "epoch": 10.82, + "learning_rate": 1.3936000000000002e-05, + "loss": 1.5858, + "step": 60880 + }, + { + "epoch": 10.82, + "learning_rate": 1.3933037037037039e-05, + "loss": 1.5229, + "step": 60885 + }, + { + "epoch": 10.82, + "learning_rate": 1.3930074074074076e-05, + "loss": 1.4989, + "step": 60890 + }, + { + "epoch": 10.83, + "learning_rate": 1.3927111111111113e-05, + "loss": 1.5028, + "step": 60895 + }, + { + "epoch": 10.83, + "learning_rate": 1.392414814814815e-05, + "loss": 1.4146, + "step": 60900 + }, + { + "epoch": 10.83, + "learning_rate": 1.3921185185185187e-05, + "loss": 1.5247, + "step": 60905 + }, + { + "epoch": 10.83, + "learning_rate": 1.3918814814814815e-05, + "loss": 1.4705, + "step": 60910 + }, + { + "epoch": 10.83, + "learning_rate": 1.3915851851851853e-05, + "loss": 1.4779, + "step": 60915 + }, + { + "epoch": 10.83, + "learning_rate": 1.391288888888889e-05, + "loss": 1.5891, + "step": 60920 + }, + { + "epoch": 10.83, + "learning_rate": 1.3909925925925927e-05, + "loss": 1.4385, + "step": 60925 + }, + { + "epoch": 10.83, + "learning_rate": 1.3906962962962964e-05, + "loss": 1.3966, + "step": 60930 + }, + { + "epoch": 10.83, + "learning_rate": 1.3904000000000001e-05, + "loss": 1.441, + "step": 60935 + }, + { + "epoch": 10.83, + "learning_rate": 1.3901037037037038e-05, + "loss": 1.5257, + "step": 60940 + }, + { + "epoch": 10.83, + "learning_rate": 1.3898074074074075e-05, + "loss": 1.535, + "step": 60945 + }, + { + "epoch": 10.84, + "learning_rate": 1.389511111111111e-05, + "loss": 1.5025, + "step": 60950 + }, + { + "epoch": 10.84, + "learning_rate": 1.3892148148148148e-05, + "loss": 1.6283, + "step": 60955 + }, + { + "epoch": 10.84, + "learning_rate": 1.3889185185185185e-05, + "loss": 1.4912, + "step": 60960 + }, + { + "epoch": 10.84, + "learning_rate": 1.3886222222222222e-05, + "loss": 1.551, + "step": 60965 + }, + { + "epoch": 10.84, + "learning_rate": 1.388325925925926e-05, + "loss": 1.4639, + "step": 60970 + }, + { + "epoch": 10.84, + "learning_rate": 1.3880296296296296e-05, + "loss": 1.558, + "step": 60975 + }, + { + "epoch": 10.84, + "learning_rate": 1.3877333333333334e-05, + "loss": 1.5029, + "step": 60980 + }, + { + "epoch": 10.84, + "learning_rate": 1.387437037037037e-05, + "loss": 1.5696, + "step": 60985 + }, + { + "epoch": 10.84, + "learning_rate": 1.3871407407407408e-05, + "loss": 1.5208, + "step": 60990 + }, + { + "epoch": 10.84, + "learning_rate": 1.3868444444444445e-05, + "loss": 1.4529, + "step": 60995 + }, + { + "epoch": 10.84, + "learning_rate": 1.3865481481481482e-05, + "loss": 1.477, + "step": 61000 + }, + { + "epoch": 10.85, + "learning_rate": 1.3862518518518519e-05, + "loss": 1.5563, + "step": 61005 + }, + { + "epoch": 10.85, + "learning_rate": 1.3859555555555556e-05, + "loss": 1.4448, + "step": 61010 + }, + { + "epoch": 10.85, + "learning_rate": 1.3856592592592593e-05, + "loss": 1.5204, + "step": 61015 + }, + { + "epoch": 10.85, + "learning_rate": 1.385362962962963e-05, + "loss": 1.4065, + "step": 61020 + }, + { + "epoch": 10.85, + "learning_rate": 1.3850666666666668e-05, + "loss": 1.4736, + "step": 61025 + }, + { + "epoch": 10.85, + "learning_rate": 1.3847703703703705e-05, + "loss": 1.4789, + "step": 61030 + }, + { + "epoch": 10.85, + "learning_rate": 1.3844740740740742e-05, + "loss": 1.5453, + "step": 61035 + }, + { + "epoch": 10.85, + "learning_rate": 1.3841777777777779e-05, + "loss": 1.4828, + "step": 61040 + }, + { + "epoch": 10.85, + "learning_rate": 1.3838814814814816e-05, + "loss": 1.5385, + "step": 61045 + }, + { + "epoch": 10.85, + "learning_rate": 1.3835851851851853e-05, + "loss": 1.4465, + "step": 61050 + }, + { + "epoch": 10.85, + "learning_rate": 1.383288888888889e-05, + "loss": 1.559, + "step": 61055 + }, + { + "epoch": 10.86, + "learning_rate": 1.3829925925925928e-05, + "loss": 1.5849, + "step": 61060 + }, + { + "epoch": 10.86, + "learning_rate": 1.3826962962962963e-05, + "loss": 1.4584, + "step": 61065 + }, + { + "epoch": 10.86, + "learning_rate": 1.3824e-05, + "loss": 1.4701, + "step": 61070 + }, + { + "epoch": 10.86, + "learning_rate": 1.3821037037037037e-05, + "loss": 1.4577, + "step": 61075 + }, + { + "epoch": 10.86, + "learning_rate": 1.3818074074074074e-05, + "loss": 1.507, + "step": 61080 + }, + { + "epoch": 10.86, + "learning_rate": 1.3815111111111112e-05, + "loss": 1.4794, + "step": 61085 + }, + { + "epoch": 10.86, + "learning_rate": 1.3812148148148149e-05, + "loss": 1.4863, + "step": 61090 + }, + { + "epoch": 10.86, + "learning_rate": 1.3809185185185186e-05, + "loss": 1.4413, + "step": 61095 + }, + { + "epoch": 10.86, + "learning_rate": 1.3806222222222223e-05, + "loss": 1.4945, + "step": 61100 + }, + { + "epoch": 10.86, + "learning_rate": 1.380325925925926e-05, + "loss": 1.3642, + "step": 61105 + }, + { + "epoch": 10.86, + "learning_rate": 1.3800296296296297e-05, + "loss": 1.4319, + "step": 61110 + }, + { + "epoch": 10.86, + "learning_rate": 1.3797333333333334e-05, + "loss": 1.4072, + "step": 61115 + }, + { + "epoch": 10.87, + "learning_rate": 1.3794370370370371e-05, + "loss": 1.5702, + "step": 61120 + }, + { + "epoch": 10.87, + "learning_rate": 1.3791407407407409e-05, + "loss": 1.4985, + "step": 61125 + }, + { + "epoch": 10.87, + "learning_rate": 1.3788444444444446e-05, + "loss": 1.5686, + "step": 61130 + }, + { + "epoch": 10.87, + "learning_rate": 1.3785481481481483e-05, + "loss": 1.4985, + "step": 61135 + }, + { + "epoch": 10.87, + "learning_rate": 1.378251851851852e-05, + "loss": 1.5289, + "step": 61140 + }, + { + "epoch": 10.87, + "learning_rate": 1.3779555555555557e-05, + "loss": 1.6471, + "step": 61145 + }, + { + "epoch": 10.87, + "learning_rate": 1.3776592592592594e-05, + "loss": 1.5163, + "step": 61150 + }, + { + "epoch": 10.87, + "learning_rate": 1.3773629629629631e-05, + "loss": 1.5686, + "step": 61155 + }, + { + "epoch": 10.87, + "learning_rate": 1.3770666666666668e-05, + "loss": 1.5007, + "step": 61160 + }, + { + "epoch": 10.87, + "learning_rate": 1.3767703703703706e-05, + "loss": 1.5711, + "step": 61165 + }, + { + "epoch": 10.87, + "learning_rate": 1.3764740740740743e-05, + "loss": 1.4783, + "step": 61170 + }, + { + "epoch": 10.88, + "learning_rate": 1.376177777777778e-05, + "loss": 1.4889, + "step": 61175 + }, + { + "epoch": 10.88, + "learning_rate": 1.3758814814814815e-05, + "loss": 1.4393, + "step": 61180 + }, + { + "epoch": 10.88, + "learning_rate": 1.3755851851851852e-05, + "loss": 1.4724, + "step": 61185 + }, + { + "epoch": 10.88, + "learning_rate": 1.375288888888889e-05, + "loss": 1.4998, + "step": 61190 + }, + { + "epoch": 10.88, + "learning_rate": 1.3749925925925927e-05, + "loss": 1.5213, + "step": 61195 + }, + { + "epoch": 10.88, + "learning_rate": 1.3746962962962964e-05, + "loss": 1.4467, + "step": 61200 + }, + { + "epoch": 10.88, + "learning_rate": 1.3744000000000001e-05, + "loss": 1.4373, + "step": 61205 + }, + { + "epoch": 10.88, + "learning_rate": 1.3741037037037038e-05, + "loss": 1.5972, + "step": 61210 + }, + { + "epoch": 10.88, + "learning_rate": 1.3738074074074075e-05, + "loss": 1.5606, + "step": 61215 + }, + { + "epoch": 10.88, + "learning_rate": 1.3735111111111112e-05, + "loss": 1.4841, + "step": 61220 + }, + { + "epoch": 10.88, + "learning_rate": 1.373214814814815e-05, + "loss": 1.3935, + "step": 61225 + }, + { + "epoch": 10.89, + "learning_rate": 1.3729185185185187e-05, + "loss": 1.4767, + "step": 61230 + }, + { + "epoch": 10.89, + "learning_rate": 1.3726222222222224e-05, + "loss": 1.5002, + "step": 61235 + }, + { + "epoch": 10.89, + "learning_rate": 1.372325925925926e-05, + "loss": 1.4437, + "step": 61240 + }, + { + "epoch": 10.89, + "learning_rate": 1.3720296296296298e-05, + "loss": 1.5449, + "step": 61245 + }, + { + "epoch": 10.89, + "learning_rate": 1.3717333333333335e-05, + "loss": 1.4299, + "step": 61250 + }, + { + "epoch": 10.89, + "learning_rate": 1.3714370370370372e-05, + "loss": 1.4137, + "step": 61255 + }, + { + "epoch": 10.89, + "learning_rate": 1.371140740740741e-05, + "loss": 1.5176, + "step": 61260 + }, + { + "epoch": 10.89, + "learning_rate": 1.3708444444444446e-05, + "loss": 1.5481, + "step": 61265 + }, + { + "epoch": 10.89, + "learning_rate": 1.3705481481481484e-05, + "loss": 1.4137, + "step": 61270 + }, + { + "epoch": 10.89, + "learning_rate": 1.370251851851852e-05, + "loss": 1.4913, + "step": 61275 + }, + { + "epoch": 10.89, + "learning_rate": 1.3699555555555558e-05, + "loss": 1.5252, + "step": 61280 + }, + { + "epoch": 10.9, + "learning_rate": 1.3696592592592595e-05, + "loss": 1.4621, + "step": 61285 + }, + { + "epoch": 10.9, + "learning_rate": 1.3693629629629632e-05, + "loss": 1.496, + "step": 61290 + }, + { + "epoch": 10.9, + "learning_rate": 1.3690666666666667e-05, + "loss": 1.3599, + "step": 61295 + }, + { + "epoch": 10.9, + "learning_rate": 1.3687703703703705e-05, + "loss": 1.4028, + "step": 61300 + }, + { + "epoch": 10.9, + "learning_rate": 1.3684740740740742e-05, + "loss": 1.4168, + "step": 61305 + }, + { + "epoch": 10.9, + "learning_rate": 1.3681777777777779e-05, + "loss": 1.5474, + "step": 61310 + }, + { + "epoch": 10.9, + "learning_rate": 1.3678814814814816e-05, + "loss": 1.4723, + "step": 61315 + }, + { + "epoch": 10.9, + "learning_rate": 1.3675851851851853e-05, + "loss": 1.4583, + "step": 61320 + }, + { + "epoch": 10.9, + "learning_rate": 1.367288888888889e-05, + "loss": 1.6029, + "step": 61325 + }, + { + "epoch": 10.9, + "learning_rate": 1.3669925925925927e-05, + "loss": 1.4726, + "step": 61330 + }, + { + "epoch": 10.9, + "learning_rate": 1.3666962962962965e-05, + "loss": 1.4167, + "step": 61335 + }, + { + "epoch": 10.9, + "learning_rate": 1.3664000000000002e-05, + "loss": 1.5281, + "step": 61340 + }, + { + "epoch": 10.91, + "learning_rate": 1.3661037037037039e-05, + "loss": 1.3605, + "step": 61345 + }, + { + "epoch": 10.91, + "learning_rate": 1.3658074074074076e-05, + "loss": 1.4417, + "step": 61350 + }, + { + "epoch": 10.91, + "learning_rate": 1.3655111111111113e-05, + "loss": 1.4952, + "step": 61355 + }, + { + "epoch": 10.91, + "learning_rate": 1.365214814814815e-05, + "loss": 1.5668, + "step": 61360 + }, + { + "epoch": 10.91, + "learning_rate": 1.3649185185185187e-05, + "loss": 1.5079, + "step": 61365 + }, + { + "epoch": 10.91, + "learning_rate": 1.3646222222222224e-05, + "loss": 1.4569, + "step": 61370 + }, + { + "epoch": 10.91, + "learning_rate": 1.3643259259259262e-05, + "loss": 1.4849, + "step": 61375 + }, + { + "epoch": 10.91, + "learning_rate": 1.3640296296296299e-05, + "loss": 1.5349, + "step": 61380 + }, + { + "epoch": 10.91, + "learning_rate": 1.3637333333333336e-05, + "loss": 1.5154, + "step": 61385 + }, + { + "epoch": 10.91, + "learning_rate": 1.3634370370370373e-05, + "loss": 1.4101, + "step": 61390 + }, + { + "epoch": 10.91, + "learning_rate": 1.363140740740741e-05, + "loss": 1.5637, + "step": 61395 + }, + { + "epoch": 10.92, + "learning_rate": 1.3628444444444447e-05, + "loss": 1.4237, + "step": 61400 + }, + { + "epoch": 10.92, + "learning_rate": 1.3625481481481484e-05, + "loss": 1.4898, + "step": 61405 + }, + { + "epoch": 10.92, + "learning_rate": 1.3622518518518518e-05, + "loss": 1.486, + "step": 61410 + }, + { + "epoch": 10.92, + "learning_rate": 1.3619555555555555e-05, + "loss": 1.4525, + "step": 61415 + }, + { + "epoch": 10.92, + "learning_rate": 1.3616592592592592e-05, + "loss": 1.4803, + "step": 61420 + }, + { + "epoch": 10.92, + "learning_rate": 1.361362962962963e-05, + "loss": 1.5547, + "step": 61425 + }, + { + "epoch": 10.92, + "learning_rate": 1.3610666666666667e-05, + "loss": 1.5006, + "step": 61430 + }, + { + "epoch": 10.92, + "learning_rate": 1.3607703703703704e-05, + "loss": 1.453, + "step": 61435 + }, + { + "epoch": 10.92, + "learning_rate": 1.360474074074074e-05, + "loss": 1.5266, + "step": 61440 + }, + { + "epoch": 10.92, + "learning_rate": 1.3601777777777778e-05, + "loss": 1.4771, + "step": 61445 + }, + { + "epoch": 10.92, + "learning_rate": 1.3598814814814813e-05, + "loss": 1.5533, + "step": 61450 + }, + { + "epoch": 10.93, + "learning_rate": 1.359585185185185e-05, + "loss": 1.5165, + "step": 61455 + }, + { + "epoch": 10.93, + "learning_rate": 1.3592888888888888e-05, + "loss": 1.4918, + "step": 61460 + }, + { + "epoch": 10.93, + "learning_rate": 1.3589925925925925e-05, + "loss": 1.5114, + "step": 61465 + }, + { + "epoch": 10.93, + "learning_rate": 1.3586962962962962e-05, + "loss": 1.5091, + "step": 61470 + }, + { + "epoch": 10.93, + "learning_rate": 1.3583999999999999e-05, + "loss": 1.5227, + "step": 61475 + }, + { + "epoch": 10.93, + "learning_rate": 1.3581037037037036e-05, + "loss": 1.4602, + "step": 61480 + }, + { + "epoch": 10.93, + "learning_rate": 1.3578074074074073e-05, + "loss": 1.4675, + "step": 61485 + }, + { + "epoch": 10.93, + "learning_rate": 1.357511111111111e-05, + "loss": 1.5081, + "step": 61490 + }, + { + "epoch": 10.93, + "learning_rate": 1.3572148148148148e-05, + "loss": 1.5707, + "step": 61495 + }, + { + "epoch": 10.93, + "learning_rate": 1.3569185185185185e-05, + "loss": 1.5891, + "step": 61500 + }, + { + "epoch": 10.93, + "learning_rate": 1.3566222222222222e-05, + "loss": 1.5937, + "step": 61505 + }, + { + "epoch": 10.94, + "learning_rate": 1.3563259259259259e-05, + "loss": 1.4343, + "step": 61510 + }, + { + "epoch": 10.94, + "learning_rate": 1.3560296296296296e-05, + "loss": 1.5453, + "step": 61515 + }, + { + "epoch": 10.94, + "learning_rate": 1.3557333333333333e-05, + "loss": 1.4873, + "step": 61520 + }, + { + "epoch": 10.94, + "learning_rate": 1.355437037037037e-05, + "loss": 1.5869, + "step": 61525 + }, + { + "epoch": 10.94, + "learning_rate": 1.3551407407407407e-05, + "loss": 1.4988, + "step": 61530 + }, + { + "epoch": 10.94, + "learning_rate": 1.3548444444444445e-05, + "loss": 1.5813, + "step": 61535 + }, + { + "epoch": 10.94, + "learning_rate": 1.3545481481481482e-05, + "loss": 1.4467, + "step": 61540 + }, + { + "epoch": 10.94, + "learning_rate": 1.3542518518518519e-05, + "loss": 1.6053, + "step": 61545 + }, + { + "epoch": 10.94, + "learning_rate": 1.3539555555555556e-05, + "loss": 1.509, + "step": 61550 + }, + { + "epoch": 10.94, + "learning_rate": 1.3536592592592593e-05, + "loss": 1.473, + "step": 61555 + }, + { + "epoch": 10.94, + "learning_rate": 1.353362962962963e-05, + "loss": 1.3753, + "step": 61560 + }, + { + "epoch": 10.94, + "learning_rate": 1.3530666666666667e-05, + "loss": 1.4439, + "step": 61565 + }, + { + "epoch": 10.95, + "learning_rate": 1.3527703703703703e-05, + "loss": 1.4831, + "step": 61570 + }, + { + "epoch": 10.95, + "learning_rate": 1.352474074074074e-05, + "loss": 1.5335, + "step": 61575 + }, + { + "epoch": 10.95, + "learning_rate": 1.3521777777777777e-05, + "loss": 1.5248, + "step": 61580 + }, + { + "epoch": 10.95, + "learning_rate": 1.3518814814814814e-05, + "loss": 1.5045, + "step": 61585 + }, + { + "epoch": 10.95, + "learning_rate": 1.3515851851851851e-05, + "loss": 1.5527, + "step": 61590 + }, + { + "epoch": 10.95, + "learning_rate": 1.3512888888888888e-05, + "loss": 1.4771, + "step": 61595 + }, + { + "epoch": 10.95, + "learning_rate": 1.3509925925925926e-05, + "loss": 1.5255, + "step": 61600 + }, + { + "epoch": 10.95, + "learning_rate": 1.3506962962962963e-05, + "loss": 1.4246, + "step": 61605 + }, + { + "epoch": 10.95, + "learning_rate": 1.3504e-05, + "loss": 1.4403, + "step": 61610 + }, + { + "epoch": 10.95, + "learning_rate": 1.3501037037037037e-05, + "loss": 1.5184, + "step": 61615 + }, + { + "epoch": 10.95, + "learning_rate": 1.3498074074074074e-05, + "loss": 1.4688, + "step": 61620 + }, + { + "epoch": 10.96, + "learning_rate": 1.3495111111111111e-05, + "loss": 1.4908, + "step": 61625 + }, + { + "epoch": 10.96, + "learning_rate": 1.3492148148148148e-05, + "loss": 1.4648, + "step": 61630 + }, + { + "epoch": 10.96, + "learning_rate": 1.3489185185185185e-05, + "loss": 1.4734, + "step": 61635 + }, + { + "epoch": 10.96, + "learning_rate": 1.3486222222222223e-05, + "loss": 1.4293, + "step": 61640 + }, + { + "epoch": 10.96, + "learning_rate": 1.348325925925926e-05, + "loss": 1.4557, + "step": 61645 + }, + { + "epoch": 10.96, + "learning_rate": 1.3480296296296297e-05, + "loss": 1.5062, + "step": 61650 + }, + { + "epoch": 10.96, + "learning_rate": 1.3477333333333334e-05, + "loss": 1.5551, + "step": 61655 + }, + { + "epoch": 10.96, + "learning_rate": 1.3474370370370371e-05, + "loss": 1.5217, + "step": 61660 + }, + { + "epoch": 10.96, + "learning_rate": 1.3471407407407408e-05, + "loss": 1.5396, + "step": 61665 + }, + { + "epoch": 10.96, + "learning_rate": 1.3468444444444445e-05, + "loss": 1.5263, + "step": 61670 + }, + { + "epoch": 10.96, + "learning_rate": 1.3465481481481482e-05, + "loss": 1.5292, + "step": 61675 + }, + { + "epoch": 10.97, + "learning_rate": 1.346251851851852e-05, + "loss": 1.4142, + "step": 61680 + }, + { + "epoch": 10.97, + "learning_rate": 1.3459555555555555e-05, + "loss": 1.552, + "step": 61685 + }, + { + "epoch": 10.97, + "learning_rate": 1.3456592592592592e-05, + "loss": 1.3939, + "step": 61690 + }, + { + "epoch": 10.97, + "learning_rate": 1.345362962962963e-05, + "loss": 1.6079, + "step": 61695 + }, + { + "epoch": 10.97, + "learning_rate": 1.3450666666666666e-05, + "loss": 1.4173, + "step": 61700 + }, + { + "epoch": 10.97, + "learning_rate": 1.3447703703703704e-05, + "loss": 1.5368, + "step": 61705 + }, + { + "epoch": 10.97, + "learning_rate": 1.344474074074074e-05, + "loss": 1.4957, + "step": 61710 + }, + { + "epoch": 10.97, + "learning_rate": 1.3441777777777778e-05, + "loss": 1.4505, + "step": 61715 + }, + { + "epoch": 10.97, + "learning_rate": 1.3438814814814815e-05, + "loss": 1.5222, + "step": 61720 + }, + { + "epoch": 10.97, + "learning_rate": 1.3435851851851852e-05, + "loss": 1.5929, + "step": 61725 + }, + { + "epoch": 10.97, + "learning_rate": 1.343288888888889e-05, + "loss": 1.5399, + "step": 61730 + }, + { + "epoch": 10.98, + "learning_rate": 1.3429925925925926e-05, + "loss": 1.3692, + "step": 61735 + }, + { + "epoch": 10.98, + "learning_rate": 1.3426962962962963e-05, + "loss": 1.4934, + "step": 61740 + }, + { + "epoch": 10.98, + "learning_rate": 1.3424e-05, + "loss": 1.6251, + "step": 61745 + }, + { + "epoch": 10.98, + "learning_rate": 1.3421037037037038e-05, + "loss": 1.6087, + "step": 61750 + }, + { + "epoch": 10.98, + "learning_rate": 1.3418074074074075e-05, + "loss": 1.5132, + "step": 61755 + }, + { + "epoch": 10.98, + "learning_rate": 1.3415111111111112e-05, + "loss": 1.4847, + "step": 61760 + }, + { + "epoch": 10.98, + "learning_rate": 1.3412148148148149e-05, + "loss": 1.516, + "step": 61765 + }, + { + "epoch": 10.98, + "learning_rate": 1.3409185185185186e-05, + "loss": 1.3766, + "step": 61770 + }, + { + "epoch": 10.98, + "learning_rate": 1.3406222222222223e-05, + "loss": 1.377, + "step": 61775 + }, + { + "epoch": 10.98, + "learning_rate": 1.340325925925926e-05, + "loss": 1.4542, + "step": 61780 + }, + { + "epoch": 10.98, + "learning_rate": 1.3400296296296298e-05, + "loss": 1.6086, + "step": 61785 + }, + { + "epoch": 10.98, + "learning_rate": 1.3397333333333335e-05, + "loss": 1.5306, + "step": 61790 + }, + { + "epoch": 10.99, + "learning_rate": 1.3394370370370372e-05, + "loss": 1.3323, + "step": 61795 + }, + { + "epoch": 10.99, + "learning_rate": 1.3391407407407407e-05, + "loss": 1.3893, + "step": 61800 + }, + { + "epoch": 10.99, + "learning_rate": 1.3388444444444444e-05, + "loss": 1.5448, + "step": 61805 + }, + { + "epoch": 10.99, + "learning_rate": 1.3385481481481482e-05, + "loss": 1.4792, + "step": 61810 + }, + { + "epoch": 10.99, + "learning_rate": 1.3382518518518519e-05, + "loss": 1.4159, + "step": 61815 + }, + { + "epoch": 10.99, + "learning_rate": 1.3379555555555556e-05, + "loss": 1.5375, + "step": 61820 + }, + { + "epoch": 10.99, + "learning_rate": 1.3376592592592593e-05, + "loss": 1.4492, + "step": 61825 + }, + { + "epoch": 10.99, + "learning_rate": 1.337362962962963e-05, + "loss": 1.4078, + "step": 61830 + }, + { + "epoch": 10.99, + "learning_rate": 1.3370666666666667e-05, + "loss": 1.3954, + "step": 61835 + }, + { + "epoch": 10.99, + "learning_rate": 1.3367703703703704e-05, + "loss": 1.4495, + "step": 61840 + }, + { + "epoch": 10.99, + "learning_rate": 1.3364740740740741e-05, + "loss": 1.4143, + "step": 61845 + }, + { + "epoch": 11.0, + "learning_rate": 1.3361777777777779e-05, + "loss": 1.4637, + "step": 61850 + }, + { + "epoch": 11.0, + "learning_rate": 1.3358814814814816e-05, + "loss": 1.4407, + "step": 61855 + }, + { + "epoch": 11.0, + "learning_rate": 1.3355851851851853e-05, + "loss": 1.4594, + "step": 61860 + }, + { + "epoch": 11.0, + "learning_rate": 1.335288888888889e-05, + "loss": 1.447, + "step": 61865 + }, + { + "epoch": 11.0, + "learning_rate": 1.3349925925925927e-05, + "loss": 1.472, + "step": 61870 + }, + { + "epoch": 11.0, + "learning_rate": 1.3346962962962964e-05, + "loss": 1.4909, + "step": 61875 + }, + { + "epoch": 11.0, + "learning_rate": 1.3344000000000001e-05, + "loss": 1.4133, + "step": 61880 + }, + { + "epoch": 11.0, + "learning_rate": 1.3341037037037038e-05, + "loss": 1.4068, + "step": 61885 + }, + { + "epoch": 11.0, + "learning_rate": 1.3338074074074076e-05, + "loss": 1.5331, + "step": 61890 + }, + { + "epoch": 11.0, + "learning_rate": 1.3335111111111113e-05, + "loss": 1.3475, + "step": 61895 + }, + { + "epoch": 11.0, + "learning_rate": 1.333214814814815e-05, + "loss": 1.3808, + "step": 61900 + }, + { + "epoch": 11.01, + "learning_rate": 1.3329185185185187e-05, + "loss": 1.4218, + "step": 61905 + }, + { + "epoch": 11.01, + "learning_rate": 1.3326222222222224e-05, + "loss": 1.3575, + "step": 61910 + }, + { + "epoch": 11.01, + "learning_rate": 1.332325925925926e-05, + "loss": 1.3837, + "step": 61915 + }, + { + "epoch": 11.01, + "learning_rate": 1.3320296296296297e-05, + "loss": 1.449, + "step": 61920 + }, + { + "epoch": 11.01, + "learning_rate": 1.3317333333333334e-05, + "loss": 1.4727, + "step": 61925 + }, + { + "epoch": 11.01, + "learning_rate": 1.3314370370370371e-05, + "loss": 1.4079, + "step": 61930 + }, + { + "epoch": 11.01, + "learning_rate": 1.3311407407407408e-05, + "loss": 1.4416, + "step": 61935 + }, + { + "epoch": 11.01, + "learning_rate": 1.3308444444444445e-05, + "loss": 1.4915, + "step": 61940 + }, + { + "epoch": 11.01, + "learning_rate": 1.3305481481481482e-05, + "loss": 1.4474, + "step": 61945 + }, + { + "epoch": 11.01, + "learning_rate": 1.330251851851852e-05, + "loss": 1.3913, + "step": 61950 + }, + { + "epoch": 11.01, + "learning_rate": 1.3299555555555557e-05, + "loss": 1.4863, + "step": 61955 + }, + { + "epoch": 11.02, + "learning_rate": 1.3296592592592594e-05, + "loss": 1.2624, + "step": 61960 + }, + { + "epoch": 11.02, + "learning_rate": 1.329362962962963e-05, + "loss": 1.462, + "step": 61965 + }, + { + "epoch": 11.02, + "learning_rate": 1.3290666666666668e-05, + "loss": 1.3583, + "step": 61970 + }, + { + "epoch": 11.02, + "learning_rate": 1.3287703703703705e-05, + "loss": 1.3833, + "step": 61975 + }, + { + "epoch": 11.02, + "learning_rate": 1.3284740740740742e-05, + "loss": 1.4646, + "step": 61980 + }, + { + "epoch": 11.02, + "learning_rate": 1.328177777777778e-05, + "loss": 1.4493, + "step": 61985 + }, + { + "epoch": 11.02, + "learning_rate": 1.3278814814814816e-05, + "loss": 1.3662, + "step": 61990 + }, + { + "epoch": 11.02, + "learning_rate": 1.3275851851851854e-05, + "loss": 1.4863, + "step": 61995 + }, + { + "epoch": 11.02, + "learning_rate": 1.327288888888889e-05, + "loss": 1.3113, + "step": 62000 + }, + { + "epoch": 11.02, + "learning_rate": 1.3269925925925928e-05, + "loss": 1.4307, + "step": 62005 + }, + { + "epoch": 11.02, + "learning_rate": 1.3266962962962965e-05, + "loss": 1.4471, + "step": 62010 + }, + { + "epoch": 11.02, + "learning_rate": 1.3264000000000002e-05, + "loss": 1.48, + "step": 62015 + }, + { + "epoch": 11.03, + "learning_rate": 1.326103703703704e-05, + "loss": 1.4442, + "step": 62020 + }, + { + "epoch": 11.03, + "learning_rate": 1.3258074074074076e-05, + "loss": 1.4229, + "step": 62025 + }, + { + "epoch": 11.03, + "learning_rate": 1.3255111111111112e-05, + "loss": 1.3587, + "step": 62030 + }, + { + "epoch": 11.03, + "learning_rate": 1.3252148148148149e-05, + "loss": 1.3338, + "step": 62035 + }, + { + "epoch": 11.03, + "learning_rate": 1.3249185185185186e-05, + "loss": 1.4004, + "step": 62040 + }, + { + "epoch": 11.03, + "learning_rate": 1.3246222222222223e-05, + "loss": 1.4567, + "step": 62045 + }, + { + "epoch": 11.03, + "learning_rate": 1.324325925925926e-05, + "loss": 1.3829, + "step": 62050 + }, + { + "epoch": 11.03, + "learning_rate": 1.3240296296296297e-05, + "loss": 1.4231, + "step": 62055 + }, + { + "epoch": 11.03, + "learning_rate": 1.3237333333333335e-05, + "loss": 1.3665, + "step": 62060 + }, + { + "epoch": 11.03, + "learning_rate": 1.3234370370370372e-05, + "loss": 1.4033, + "step": 62065 + }, + { + "epoch": 11.03, + "learning_rate": 1.3231407407407409e-05, + "loss": 1.3873, + "step": 62070 + }, + { + "epoch": 11.04, + "learning_rate": 1.3228444444444446e-05, + "loss": 1.4874, + "step": 62075 + }, + { + "epoch": 11.04, + "learning_rate": 1.3225481481481483e-05, + "loss": 1.4459, + "step": 62080 + }, + { + "epoch": 11.04, + "learning_rate": 1.322251851851852e-05, + "loss": 1.4785, + "step": 62085 + }, + { + "epoch": 11.04, + "learning_rate": 1.3219555555555557e-05, + "loss": 1.4257, + "step": 62090 + }, + { + "epoch": 11.04, + "learning_rate": 1.3216592592592594e-05, + "loss": 1.3861, + "step": 62095 + }, + { + "epoch": 11.04, + "learning_rate": 1.3213629629629632e-05, + "loss": 1.4343, + "step": 62100 + }, + { + "epoch": 11.04, + "learning_rate": 1.3210666666666669e-05, + "loss": 1.4426, + "step": 62105 + }, + { + "epoch": 11.04, + "learning_rate": 1.3207703703703706e-05, + "loss": 1.493, + "step": 62110 + }, + { + "epoch": 11.04, + "learning_rate": 1.3204740740740743e-05, + "loss": 1.4734, + "step": 62115 + }, + { + "epoch": 11.04, + "learning_rate": 1.320177777777778e-05, + "loss": 1.3785, + "step": 62120 + }, + { + "epoch": 11.04, + "learning_rate": 1.3198814814814817e-05, + "loss": 1.3405, + "step": 62125 + }, + { + "epoch": 11.05, + "learning_rate": 1.3195851851851854e-05, + "loss": 1.4027, + "step": 62130 + }, + { + "epoch": 11.05, + "learning_rate": 1.3192888888888891e-05, + "loss": 1.3828, + "step": 62135 + }, + { + "epoch": 11.05, + "learning_rate": 1.3189925925925929e-05, + "loss": 1.4075, + "step": 62140 + }, + { + "epoch": 11.05, + "learning_rate": 1.3186962962962964e-05, + "loss": 1.3446, + "step": 62145 + }, + { + "epoch": 11.05, + "learning_rate": 1.3184000000000001e-05, + "loss": 1.3724, + "step": 62150 + }, + { + "epoch": 11.05, + "learning_rate": 1.3181037037037038e-05, + "loss": 1.4064, + "step": 62155 + }, + { + "epoch": 11.05, + "learning_rate": 1.3178074074074075e-05, + "loss": 1.4636, + "step": 62160 + }, + { + "epoch": 11.05, + "learning_rate": 1.3175111111111113e-05, + "loss": 1.3908, + "step": 62165 + }, + { + "epoch": 11.05, + "learning_rate": 1.317214814814815e-05, + "loss": 1.4878, + "step": 62170 + }, + { + "epoch": 11.05, + "learning_rate": 1.3169185185185187e-05, + "loss": 1.4843, + "step": 62175 + }, + { + "epoch": 11.05, + "learning_rate": 1.3166222222222224e-05, + "loss": 1.378, + "step": 62180 + }, + { + "epoch": 11.06, + "learning_rate": 1.3163259259259258e-05, + "loss": 1.4966, + "step": 62185 + }, + { + "epoch": 11.06, + "learning_rate": 1.3160296296296295e-05, + "loss": 1.4666, + "step": 62190 + }, + { + "epoch": 11.06, + "learning_rate": 1.3157333333333332e-05, + "loss": 1.4291, + "step": 62195 + }, + { + "epoch": 11.06, + "learning_rate": 1.3154370370370369e-05, + "loss": 1.5085, + "step": 62200 + }, + { + "epoch": 11.06, + "learning_rate": 1.3151407407407406e-05, + "loss": 1.3034, + "step": 62205 + }, + { + "epoch": 11.06, + "learning_rate": 1.3148444444444443e-05, + "loss": 1.387, + "step": 62210 + }, + { + "epoch": 11.06, + "learning_rate": 1.314548148148148e-05, + "loss": 1.4746, + "step": 62215 + }, + { + "epoch": 11.06, + "learning_rate": 1.3142518518518518e-05, + "loss": 1.3871, + "step": 62220 + }, + { + "epoch": 11.06, + "learning_rate": 1.3139555555555555e-05, + "loss": 1.619, + "step": 62225 + }, + { + "epoch": 11.06, + "learning_rate": 1.3136592592592592e-05, + "loss": 1.4775, + "step": 62230 + }, + { + "epoch": 11.06, + "learning_rate": 1.3133629629629629e-05, + "loss": 1.4864, + "step": 62235 + }, + { + "epoch": 11.06, + "learning_rate": 1.3130666666666666e-05, + "loss": 1.4858, + "step": 62240 + }, + { + "epoch": 11.07, + "learning_rate": 1.3127703703703703e-05, + "loss": 1.4496, + "step": 62245 + }, + { + "epoch": 11.07, + "learning_rate": 1.312474074074074e-05, + "loss": 1.566, + "step": 62250 + }, + { + "epoch": 11.07, + "learning_rate": 1.3121777777777777e-05, + "loss": 1.365, + "step": 62255 + }, + { + "epoch": 11.07, + "learning_rate": 1.3118814814814815e-05, + "loss": 1.411, + "step": 62260 + }, + { + "epoch": 11.07, + "learning_rate": 1.3115851851851852e-05, + "loss": 1.4052, + "step": 62265 + }, + { + "epoch": 11.07, + "learning_rate": 1.3112888888888889e-05, + "loss": 1.5939, + "step": 62270 + }, + { + "epoch": 11.07, + "learning_rate": 1.3109925925925926e-05, + "loss": 1.3822, + "step": 62275 + }, + { + "epoch": 11.07, + "learning_rate": 1.3106962962962963e-05, + "loss": 1.4119, + "step": 62280 + }, + { + "epoch": 11.07, + "learning_rate": 1.3104e-05, + "loss": 1.4517, + "step": 62285 + }, + { + "epoch": 11.07, + "learning_rate": 1.3101037037037037e-05, + "loss": 1.3412, + "step": 62290 + }, + { + "epoch": 11.07, + "learning_rate": 1.3098074074074074e-05, + "loss": 1.5226, + "step": 62295 + }, + { + "epoch": 11.08, + "learning_rate": 1.3095111111111112e-05, + "loss": 1.4079, + "step": 62300 + }, + { + "epoch": 11.08, + "learning_rate": 1.3092148148148147e-05, + "loss": 1.4322, + "step": 62305 + }, + { + "epoch": 11.08, + "learning_rate": 1.3089185185185184e-05, + "loss": 1.3837, + "step": 62310 + }, + { + "epoch": 11.08, + "learning_rate": 1.3086222222222221e-05, + "loss": 1.5039, + "step": 62315 + }, + { + "epoch": 11.08, + "learning_rate": 1.3083259259259258e-05, + "loss": 1.3157, + "step": 62320 + }, + { + "epoch": 11.08, + "learning_rate": 1.3080296296296296e-05, + "loss": 1.4661, + "step": 62325 + }, + { + "epoch": 11.08, + "learning_rate": 1.3077333333333333e-05, + "loss": 1.2867, + "step": 62330 + }, + { + "epoch": 11.08, + "learning_rate": 1.307437037037037e-05, + "loss": 1.3055, + "step": 62335 + }, + { + "epoch": 11.08, + "learning_rate": 1.3071407407407407e-05, + "loss": 1.4608, + "step": 62340 + }, + { + "epoch": 11.08, + "learning_rate": 1.3068444444444444e-05, + "loss": 1.489, + "step": 62345 + }, + { + "epoch": 11.08, + "learning_rate": 1.3065481481481481e-05, + "loss": 1.4414, + "step": 62350 + }, + { + "epoch": 11.09, + "learning_rate": 1.3062518518518518e-05, + "loss": 1.5302, + "step": 62355 + }, + { + "epoch": 11.09, + "learning_rate": 1.3059555555555555e-05, + "loss": 1.4479, + "step": 62360 + }, + { + "epoch": 11.09, + "learning_rate": 1.3056592592592593e-05, + "loss": 1.486, + "step": 62365 + }, + { + "epoch": 11.09, + "learning_rate": 1.305362962962963e-05, + "loss": 1.4216, + "step": 62370 + }, + { + "epoch": 11.09, + "learning_rate": 1.3050666666666667e-05, + "loss": 1.4287, + "step": 62375 + }, + { + "epoch": 11.09, + "learning_rate": 1.3047703703703704e-05, + "loss": 1.3774, + "step": 62380 + }, + { + "epoch": 11.09, + "learning_rate": 1.3044740740740741e-05, + "loss": 1.4408, + "step": 62385 + }, + { + "epoch": 11.09, + "learning_rate": 1.3041777777777778e-05, + "loss": 1.4273, + "step": 62390 + }, + { + "epoch": 11.09, + "learning_rate": 1.3038814814814815e-05, + "loss": 1.4712, + "step": 62395 + }, + { + "epoch": 11.09, + "learning_rate": 1.3035851851851852e-05, + "loss": 1.4498, + "step": 62400 + }, + { + "epoch": 11.09, + "learning_rate": 1.303288888888889e-05, + "loss": 1.4075, + "step": 62405 + }, + { + "epoch": 11.1, + "learning_rate": 1.3029925925925927e-05, + "loss": 1.4966, + "step": 62410 + }, + { + "epoch": 11.1, + "learning_rate": 1.3026962962962964e-05, + "loss": 1.3855, + "step": 62415 + }, + { + "epoch": 11.1, + "learning_rate": 1.3024e-05, + "loss": 1.3437, + "step": 62420 + }, + { + "epoch": 11.1, + "learning_rate": 1.3021037037037036e-05, + "loss": 1.4148, + "step": 62425 + }, + { + "epoch": 11.1, + "learning_rate": 1.3018074074074074e-05, + "loss": 1.538, + "step": 62430 + }, + { + "epoch": 11.1, + "learning_rate": 1.301511111111111e-05, + "loss": 1.369, + "step": 62435 + }, + { + "epoch": 11.1, + "learning_rate": 1.3012148148148148e-05, + "loss": 1.5188, + "step": 62440 + }, + { + "epoch": 11.1, + "learning_rate": 1.3009185185185185e-05, + "loss": 1.36, + "step": 62445 + }, + { + "epoch": 11.1, + "learning_rate": 1.3006222222222222e-05, + "loss": 1.449, + "step": 62450 + }, + { + "epoch": 11.1, + "learning_rate": 1.300325925925926e-05, + "loss": 1.4689, + "step": 62455 + }, + { + "epoch": 11.1, + "learning_rate": 1.3000296296296296e-05, + "loss": 1.3568, + "step": 62460 + }, + { + "epoch": 11.1, + "learning_rate": 1.2997333333333333e-05, + "loss": 1.4439, + "step": 62465 + }, + { + "epoch": 11.11, + "learning_rate": 1.299437037037037e-05, + "loss": 1.4756, + "step": 62470 + }, + { + "epoch": 11.11, + "learning_rate": 1.2991407407407408e-05, + "loss": 1.5335, + "step": 62475 + }, + { + "epoch": 11.11, + "learning_rate": 1.2988444444444445e-05, + "loss": 1.4442, + "step": 62480 + }, + { + "epoch": 11.11, + "learning_rate": 1.2985481481481482e-05, + "loss": 1.3576, + "step": 62485 + }, + { + "epoch": 11.11, + "learning_rate": 1.2982518518518519e-05, + "loss": 1.3434, + "step": 62490 + }, + { + "epoch": 11.11, + "learning_rate": 1.2979555555555556e-05, + "loss": 1.3303, + "step": 62495 + }, + { + "epoch": 11.11, + "learning_rate": 1.2976592592592593e-05, + "loss": 1.3664, + "step": 62500 + }, + { + "epoch": 11.11, + "learning_rate": 1.297362962962963e-05, + "loss": 1.352, + "step": 62505 + }, + { + "epoch": 11.11, + "learning_rate": 1.2970666666666668e-05, + "loss": 1.454, + "step": 62510 + }, + { + "epoch": 11.11, + "learning_rate": 1.2967703703703705e-05, + "loss": 1.3895, + "step": 62515 + }, + { + "epoch": 11.11, + "learning_rate": 1.2964740740740742e-05, + "loss": 1.4973, + "step": 62520 + }, + { + "epoch": 11.12, + "learning_rate": 1.2961777777777779e-05, + "loss": 1.4923, + "step": 62525 + }, + { + "epoch": 11.12, + "learning_rate": 1.2958814814814816e-05, + "loss": 1.4156, + "step": 62530 + }, + { + "epoch": 11.12, + "learning_rate": 1.2955851851851852e-05, + "loss": 1.3343, + "step": 62535 + }, + { + "epoch": 11.12, + "learning_rate": 1.2952888888888889e-05, + "loss": 1.4482, + "step": 62540 + }, + { + "epoch": 11.12, + "learning_rate": 1.2949925925925926e-05, + "loss": 1.4333, + "step": 62545 + }, + { + "epoch": 11.12, + "learning_rate": 1.2946962962962963e-05, + "loss": 1.4253, + "step": 62550 + }, + { + "epoch": 11.12, + "learning_rate": 1.2944e-05, + "loss": 1.4877, + "step": 62555 + }, + { + "epoch": 11.12, + "learning_rate": 1.2941037037037037e-05, + "loss": 1.4449, + "step": 62560 + }, + { + "epoch": 11.12, + "learning_rate": 1.2938074074074074e-05, + "loss": 1.4745, + "step": 62565 + }, + { + "epoch": 11.12, + "learning_rate": 1.2935111111111111e-05, + "loss": 1.4347, + "step": 62570 + }, + { + "epoch": 11.12, + "learning_rate": 1.2932148148148149e-05, + "loss": 1.4227, + "step": 62575 + }, + { + "epoch": 11.13, + "learning_rate": 1.2929185185185186e-05, + "loss": 1.4772, + "step": 62580 + }, + { + "epoch": 11.13, + "learning_rate": 1.2926222222222223e-05, + "loss": 1.3895, + "step": 62585 + }, + { + "epoch": 11.13, + "learning_rate": 1.292325925925926e-05, + "loss": 1.464, + "step": 62590 + }, + { + "epoch": 11.13, + "learning_rate": 1.2920296296296297e-05, + "loss": 1.3754, + "step": 62595 + }, + { + "epoch": 11.13, + "learning_rate": 1.2917333333333334e-05, + "loss": 1.4082, + "step": 62600 + }, + { + "epoch": 11.13, + "learning_rate": 1.2914370370370371e-05, + "loss": 1.5368, + "step": 62605 + }, + { + "epoch": 11.13, + "learning_rate": 1.2911407407407408e-05, + "loss": 1.6053, + "step": 62610 + }, + { + "epoch": 11.13, + "learning_rate": 1.2908444444444446e-05, + "loss": 1.445, + "step": 62615 + }, + { + "epoch": 11.13, + "learning_rate": 1.2905481481481483e-05, + "loss": 1.3734, + "step": 62620 + }, + { + "epoch": 11.13, + "learning_rate": 1.290251851851852e-05, + "loss": 1.4722, + "step": 62625 + }, + { + "epoch": 11.13, + "learning_rate": 1.2899555555555557e-05, + "loss": 1.423, + "step": 62630 + }, + { + "epoch": 11.14, + "learning_rate": 1.2896592592592594e-05, + "loss": 1.3289, + "step": 62635 + }, + { + "epoch": 11.14, + "learning_rate": 1.2893629629629631e-05, + "loss": 1.4246, + "step": 62640 + }, + { + "epoch": 11.14, + "learning_rate": 1.2890666666666668e-05, + "loss": 1.4601, + "step": 62645 + }, + { + "epoch": 11.14, + "learning_rate": 1.2887703703703704e-05, + "loss": 1.4235, + "step": 62650 + }, + { + "epoch": 11.14, + "learning_rate": 1.2884740740740741e-05, + "loss": 1.4778, + "step": 62655 + }, + { + "epoch": 11.14, + "learning_rate": 1.2881777777777778e-05, + "loss": 1.5063, + "step": 62660 + }, + { + "epoch": 11.14, + "learning_rate": 1.2878814814814815e-05, + "loss": 1.4612, + "step": 62665 + }, + { + "epoch": 11.14, + "learning_rate": 1.2875851851851852e-05, + "loss": 1.4322, + "step": 62670 + }, + { + "epoch": 11.14, + "learning_rate": 1.287288888888889e-05, + "loss": 1.3647, + "step": 62675 + }, + { + "epoch": 11.14, + "learning_rate": 1.2869925925925927e-05, + "loss": 1.4895, + "step": 62680 + }, + { + "epoch": 11.14, + "learning_rate": 1.2866962962962964e-05, + "loss": 1.4091, + "step": 62685 + }, + { + "epoch": 11.14, + "learning_rate": 1.2864e-05, + "loss": 1.5034, + "step": 62690 + }, + { + "epoch": 11.15, + "learning_rate": 1.2861037037037038e-05, + "loss": 1.5016, + "step": 62695 + }, + { + "epoch": 11.15, + "learning_rate": 1.2858074074074075e-05, + "loss": 1.406, + "step": 62700 + }, + { + "epoch": 11.15, + "learning_rate": 1.2855111111111112e-05, + "loss": 1.4634, + "step": 62705 + }, + { + "epoch": 11.15, + "learning_rate": 1.285214814814815e-05, + "loss": 1.4158, + "step": 62710 + }, + { + "epoch": 11.15, + "learning_rate": 1.2849185185185186e-05, + "loss": 1.5406, + "step": 62715 + }, + { + "epoch": 11.15, + "learning_rate": 1.2846222222222224e-05, + "loss": 1.5597, + "step": 62720 + }, + { + "epoch": 11.15, + "learning_rate": 1.284325925925926e-05, + "loss": 1.418, + "step": 62725 + }, + { + "epoch": 11.15, + "learning_rate": 1.2840888888888889e-05, + "loss": 1.3906, + "step": 62730 + }, + { + "epoch": 11.15, + "learning_rate": 1.2837925925925926e-05, + "loss": 1.4865, + "step": 62735 + }, + { + "epoch": 11.15, + "learning_rate": 1.2834962962962963e-05, + "loss": 1.5078, + "step": 62740 + }, + { + "epoch": 11.15, + "learning_rate": 1.2832e-05, + "loss": 1.4061, + "step": 62745 + }, + { + "epoch": 11.16, + "learning_rate": 1.2829037037037037e-05, + "loss": 1.441, + "step": 62750 + }, + { + "epoch": 11.16, + "learning_rate": 1.2826074074074074e-05, + "loss": 1.4335, + "step": 62755 + }, + { + "epoch": 11.16, + "learning_rate": 1.2823111111111112e-05, + "loss": 1.4624, + "step": 62760 + }, + { + "epoch": 11.16, + "learning_rate": 1.2820148148148149e-05, + "loss": 1.4073, + "step": 62765 + }, + { + "epoch": 11.16, + "learning_rate": 1.2817185185185184e-05, + "loss": 1.508, + "step": 62770 + }, + { + "epoch": 11.16, + "learning_rate": 1.2814222222222221e-05, + "loss": 1.3509, + "step": 62775 + }, + { + "epoch": 11.16, + "learning_rate": 1.2811259259259258e-05, + "loss": 1.4573, + "step": 62780 + }, + { + "epoch": 11.16, + "learning_rate": 1.2808296296296295e-05, + "loss": 1.3846, + "step": 62785 + }, + { + "epoch": 11.16, + "learning_rate": 1.2805333333333333e-05, + "loss": 1.4614, + "step": 62790 + }, + { + "epoch": 11.16, + "learning_rate": 1.280237037037037e-05, + "loss": 1.4623, + "step": 62795 + }, + { + "epoch": 11.16, + "learning_rate": 1.2799407407407407e-05, + "loss": 1.508, + "step": 62800 + }, + { + "epoch": 11.17, + "learning_rate": 1.2796444444444444e-05, + "loss": 1.4451, + "step": 62805 + }, + { + "epoch": 11.17, + "learning_rate": 1.2793481481481481e-05, + "loss": 1.4005, + "step": 62810 + }, + { + "epoch": 11.17, + "learning_rate": 1.2790518518518518e-05, + "loss": 1.3819, + "step": 62815 + }, + { + "epoch": 11.17, + "learning_rate": 1.2787555555555555e-05, + "loss": 1.4465, + "step": 62820 + }, + { + "epoch": 11.17, + "learning_rate": 1.2784592592592592e-05, + "loss": 1.4123, + "step": 62825 + }, + { + "epoch": 11.17, + "learning_rate": 1.278162962962963e-05, + "loss": 1.3381, + "step": 62830 + }, + { + "epoch": 11.17, + "learning_rate": 1.2778666666666667e-05, + "loss": 1.3782, + "step": 62835 + }, + { + "epoch": 11.17, + "learning_rate": 1.2775703703703704e-05, + "loss": 1.4416, + "step": 62840 + }, + { + "epoch": 11.17, + "learning_rate": 1.2772740740740741e-05, + "loss": 1.4405, + "step": 62845 + }, + { + "epoch": 11.17, + "learning_rate": 1.2769777777777778e-05, + "loss": 1.3594, + "step": 62850 + }, + { + "epoch": 11.17, + "learning_rate": 1.2766814814814815e-05, + "loss": 1.4442, + "step": 62855 + }, + { + "epoch": 11.18, + "learning_rate": 1.2763851851851852e-05, + "loss": 1.4712, + "step": 62860 + }, + { + "epoch": 11.18, + "learning_rate": 1.276088888888889e-05, + "loss": 1.4451, + "step": 62865 + }, + { + "epoch": 11.18, + "learning_rate": 1.2757925925925927e-05, + "loss": 1.4091, + "step": 62870 + }, + { + "epoch": 11.18, + "learning_rate": 1.2754962962962964e-05, + "loss": 1.3955, + "step": 62875 + }, + { + "epoch": 11.18, + "learning_rate": 1.2752000000000001e-05, + "loss": 1.4398, + "step": 62880 + }, + { + "epoch": 11.18, + "learning_rate": 1.2749037037037036e-05, + "loss": 1.4119, + "step": 62885 + }, + { + "epoch": 11.18, + "learning_rate": 1.2746074074074073e-05, + "loss": 1.2501, + "step": 62890 + }, + { + "epoch": 11.18, + "learning_rate": 1.274311111111111e-05, + "loss": 1.3734, + "step": 62895 + }, + { + "epoch": 11.18, + "learning_rate": 1.2740148148148148e-05, + "loss": 1.5163, + "step": 62900 + }, + { + "epoch": 11.18, + "learning_rate": 1.2737185185185185e-05, + "loss": 1.4326, + "step": 62905 + }, + { + "epoch": 11.18, + "learning_rate": 1.2734222222222222e-05, + "loss": 1.4178, + "step": 62910 + }, + { + "epoch": 11.18, + "learning_rate": 1.2731259259259259e-05, + "loss": 1.3932, + "step": 62915 + }, + { + "epoch": 11.19, + "learning_rate": 1.2728296296296296e-05, + "loss": 1.4501, + "step": 62920 + }, + { + "epoch": 11.19, + "learning_rate": 1.2725333333333333e-05, + "loss": 1.4281, + "step": 62925 + }, + { + "epoch": 11.19, + "learning_rate": 1.272237037037037e-05, + "loss": 1.4741, + "step": 62930 + }, + { + "epoch": 11.19, + "learning_rate": 1.2719407407407408e-05, + "loss": 1.3972, + "step": 62935 + }, + { + "epoch": 11.19, + "learning_rate": 1.2716444444444445e-05, + "loss": 1.424, + "step": 62940 + }, + { + "epoch": 11.19, + "learning_rate": 1.2713481481481482e-05, + "loss": 1.3897, + "step": 62945 + }, + { + "epoch": 11.19, + "learning_rate": 1.2710518518518519e-05, + "loss": 1.3829, + "step": 62950 + }, + { + "epoch": 11.19, + "learning_rate": 1.2707555555555556e-05, + "loss": 1.4174, + "step": 62955 + }, + { + "epoch": 11.19, + "learning_rate": 1.2704592592592593e-05, + "loss": 1.4653, + "step": 62960 + }, + { + "epoch": 11.19, + "learning_rate": 1.270162962962963e-05, + "loss": 1.3815, + "step": 62965 + }, + { + "epoch": 11.19, + "learning_rate": 1.2698666666666668e-05, + "loss": 1.3248, + "step": 62970 + }, + { + "epoch": 11.2, + "learning_rate": 1.2695703703703705e-05, + "loss": 1.4613, + "step": 62975 + }, + { + "epoch": 11.2, + "learning_rate": 1.2692740740740742e-05, + "loss": 1.459, + "step": 62980 + }, + { + "epoch": 11.2, + "learning_rate": 1.2689777777777779e-05, + "loss": 1.462, + "step": 62985 + }, + { + "epoch": 11.2, + "learning_rate": 1.2686814814814816e-05, + "loss": 1.378, + "step": 62990 + }, + { + "epoch": 11.2, + "learning_rate": 1.2683851851851853e-05, + "loss": 1.5159, + "step": 62995 + }, + { + "epoch": 11.2, + "learning_rate": 1.2680888888888889e-05, + "loss": 1.3656, + "step": 63000 + }, + { + "epoch": 11.2, + "learning_rate": 1.2677925925925926e-05, + "loss": 1.3683, + "step": 63005 + }, + { + "epoch": 11.2, + "learning_rate": 1.2674962962962963e-05, + "loss": 1.4099, + "step": 63010 + }, + { + "epoch": 11.2, + "learning_rate": 1.2672e-05, + "loss": 1.4831, + "step": 63015 + }, + { + "epoch": 11.2, + "learning_rate": 1.2669037037037037e-05, + "loss": 1.5326, + "step": 63020 + }, + { + "epoch": 11.2, + "learning_rate": 1.2666074074074074e-05, + "loss": 1.45, + "step": 63025 + }, + { + "epoch": 11.21, + "learning_rate": 1.2663111111111111e-05, + "loss": 1.438, + "step": 63030 + }, + { + "epoch": 11.21, + "learning_rate": 1.2660148148148148e-05, + "loss": 1.4529, + "step": 63035 + }, + { + "epoch": 11.21, + "learning_rate": 1.2657185185185186e-05, + "loss": 1.3854, + "step": 63040 + }, + { + "epoch": 11.21, + "learning_rate": 1.2654222222222223e-05, + "loss": 1.4134, + "step": 63045 + }, + { + "epoch": 11.21, + "learning_rate": 1.265125925925926e-05, + "loss": 1.444, + "step": 63050 + }, + { + "epoch": 11.21, + "learning_rate": 1.2648296296296297e-05, + "loss": 1.4758, + "step": 63055 + }, + { + "epoch": 11.21, + "learning_rate": 1.2645333333333334e-05, + "loss": 1.4061, + "step": 63060 + }, + { + "epoch": 11.21, + "learning_rate": 1.2642370370370371e-05, + "loss": 1.5346, + "step": 63065 + }, + { + "epoch": 11.21, + "learning_rate": 1.2639407407407408e-05, + "loss": 1.448, + "step": 63070 + }, + { + "epoch": 11.21, + "learning_rate": 1.2636444444444446e-05, + "loss": 1.3911, + "step": 63075 + }, + { + "epoch": 11.21, + "learning_rate": 1.2633481481481483e-05, + "loss": 1.5641, + "step": 63080 + }, + { + "epoch": 11.22, + "learning_rate": 1.263051851851852e-05, + "loss": 1.4784, + "step": 63085 + }, + { + "epoch": 11.22, + "learning_rate": 1.2627555555555557e-05, + "loss": 1.477, + "step": 63090 + }, + { + "epoch": 11.22, + "learning_rate": 1.2624592592592594e-05, + "loss": 1.302, + "step": 63095 + }, + { + "epoch": 11.22, + "learning_rate": 1.2621629629629631e-05, + "loss": 1.4063, + "step": 63100 + }, + { + "epoch": 11.22, + "learning_rate": 1.2618666666666668e-05, + "loss": 1.4619, + "step": 63105 + }, + { + "epoch": 11.22, + "learning_rate": 1.2615703703703705e-05, + "loss": 1.3234, + "step": 63110 + }, + { + "epoch": 11.22, + "learning_rate": 1.261274074074074e-05, + "loss": 1.3497, + "step": 63115 + }, + { + "epoch": 11.22, + "learning_rate": 1.2609777777777778e-05, + "loss": 1.5233, + "step": 63120 + }, + { + "epoch": 11.22, + "learning_rate": 1.2606814814814815e-05, + "loss": 1.3763, + "step": 63125 + }, + { + "epoch": 11.22, + "learning_rate": 1.2603851851851852e-05, + "loss": 1.5741, + "step": 63130 + }, + { + "epoch": 11.22, + "learning_rate": 1.260088888888889e-05, + "loss": 1.4273, + "step": 63135 + }, + { + "epoch": 11.22, + "learning_rate": 1.2597925925925926e-05, + "loss": 1.4434, + "step": 63140 + }, + { + "epoch": 11.23, + "learning_rate": 1.2594962962962964e-05, + "loss": 1.4281, + "step": 63145 + }, + { + "epoch": 11.23, + "learning_rate": 1.2592e-05, + "loss": 1.6038, + "step": 63150 + }, + { + "epoch": 11.23, + "learning_rate": 1.2589037037037038e-05, + "loss": 1.5052, + "step": 63155 + }, + { + "epoch": 11.23, + "learning_rate": 1.2586074074074075e-05, + "loss": 1.3966, + "step": 63160 + }, + { + "epoch": 11.23, + "learning_rate": 1.2583111111111112e-05, + "loss": 1.2816, + "step": 63165 + }, + { + "epoch": 11.23, + "learning_rate": 1.258014814814815e-05, + "loss": 1.4384, + "step": 63170 + }, + { + "epoch": 11.23, + "learning_rate": 1.2577185185185186e-05, + "loss": 1.4818, + "step": 63175 + }, + { + "epoch": 11.23, + "learning_rate": 1.2574222222222224e-05, + "loss": 1.391, + "step": 63180 + }, + { + "epoch": 11.23, + "learning_rate": 1.257125925925926e-05, + "loss": 1.3863, + "step": 63185 + }, + { + "epoch": 11.23, + "learning_rate": 1.2568296296296298e-05, + "loss": 1.5218, + "step": 63190 + }, + { + "epoch": 11.23, + "learning_rate": 1.2565333333333335e-05, + "loss": 1.5048, + "step": 63195 + }, + { + "epoch": 11.24, + "learning_rate": 1.2562370370370372e-05, + "loss": 1.4771, + "step": 63200 + }, + { + "epoch": 11.24, + "learning_rate": 1.2559407407407409e-05, + "loss": 1.3952, + "step": 63205 + }, + { + "epoch": 11.24, + "learning_rate": 1.2556444444444446e-05, + "loss": 1.2982, + "step": 63210 + }, + { + "epoch": 11.24, + "learning_rate": 1.2553481481481483e-05, + "loss": 1.248, + "step": 63215 + }, + { + "epoch": 11.24, + "learning_rate": 1.255051851851852e-05, + "loss": 1.4309, + "step": 63220 + }, + { + "epoch": 11.24, + "learning_rate": 1.2547555555555558e-05, + "loss": 1.4947, + "step": 63225 + }, + { + "epoch": 11.24, + "learning_rate": 1.2544592592592593e-05, + "loss": 1.4467, + "step": 63230 + }, + { + "epoch": 11.24, + "learning_rate": 1.254162962962963e-05, + "loss": 1.4155, + "step": 63235 + }, + { + "epoch": 11.24, + "learning_rate": 1.2538666666666667e-05, + "loss": 1.5096, + "step": 63240 + }, + { + "epoch": 11.24, + "learning_rate": 1.2535703703703704e-05, + "loss": 1.4144, + "step": 63245 + }, + { + "epoch": 11.24, + "learning_rate": 1.2532740740740742e-05, + "loss": 1.3855, + "step": 63250 + }, + { + "epoch": 11.25, + "learning_rate": 1.2529777777777779e-05, + "loss": 1.5263, + "step": 63255 + }, + { + "epoch": 11.25, + "learning_rate": 1.2526814814814816e-05, + "loss": 1.3757, + "step": 63260 + }, + { + "epoch": 11.25, + "learning_rate": 1.2523851851851853e-05, + "loss": 1.4024, + "step": 63265 + }, + { + "epoch": 11.25, + "learning_rate": 1.252088888888889e-05, + "loss": 1.4251, + "step": 63270 + }, + { + "epoch": 11.25, + "learning_rate": 1.2517925925925927e-05, + "loss": 1.4224, + "step": 63275 + }, + { + "epoch": 11.25, + "learning_rate": 1.2514962962962964e-05, + "loss": 1.4568, + "step": 63280 + }, + { + "epoch": 11.25, + "learning_rate": 1.2512000000000002e-05, + "loss": 1.4631, + "step": 63285 + }, + { + "epoch": 11.25, + "learning_rate": 1.2509037037037039e-05, + "loss": 1.37, + "step": 63290 + }, + { + "epoch": 11.25, + "learning_rate": 1.2506074074074076e-05, + "loss": 1.5101, + "step": 63295 + }, + { + "epoch": 11.25, + "learning_rate": 1.2503111111111113e-05, + "loss": 1.494, + "step": 63300 + }, + { + "epoch": 11.25, + "learning_rate": 1.250014814814815e-05, + "loss": 1.4636, + "step": 63305 + }, + { + "epoch": 11.26, + "learning_rate": 1.2497185185185185e-05, + "loss": 1.4333, + "step": 63310 + }, + { + "epoch": 11.26, + "learning_rate": 1.2494222222222223e-05, + "loss": 1.2915, + "step": 63315 + }, + { + "epoch": 11.26, + "learning_rate": 1.249125925925926e-05, + "loss": 1.5589, + "step": 63320 + }, + { + "epoch": 11.26, + "learning_rate": 1.2488296296296297e-05, + "loss": 1.4958, + "step": 63325 + }, + { + "epoch": 11.26, + "learning_rate": 1.2485333333333334e-05, + "loss": 1.3333, + "step": 63330 + }, + { + "epoch": 11.26, + "learning_rate": 1.2482370370370371e-05, + "loss": 1.3119, + "step": 63335 + }, + { + "epoch": 11.26, + "learning_rate": 1.2479407407407408e-05, + "loss": 1.4723, + "step": 63340 + }, + { + "epoch": 11.26, + "learning_rate": 1.2476444444444445e-05, + "loss": 1.5131, + "step": 63345 + }, + { + "epoch": 11.26, + "learning_rate": 1.2473481481481482e-05, + "loss": 1.5058, + "step": 63350 + }, + { + "epoch": 11.26, + "learning_rate": 1.247051851851852e-05, + "loss": 1.4424, + "step": 63355 + }, + { + "epoch": 11.26, + "learning_rate": 1.2467555555555557e-05, + "loss": 1.4529, + "step": 63360 + }, + { + "epoch": 11.26, + "learning_rate": 1.2464592592592592e-05, + "loss": 1.4008, + "step": 63365 + }, + { + "epoch": 11.27, + "learning_rate": 1.246162962962963e-05, + "loss": 1.4442, + "step": 63370 + }, + { + "epoch": 11.27, + "learning_rate": 1.2458666666666666e-05, + "loss": 1.4131, + "step": 63375 + }, + { + "epoch": 11.27, + "learning_rate": 1.2455703703703704e-05, + "loss": 1.4425, + "step": 63380 + }, + { + "epoch": 11.27, + "learning_rate": 1.245274074074074e-05, + "loss": 1.4593, + "step": 63385 + }, + { + "epoch": 11.27, + "learning_rate": 1.2449777777777778e-05, + "loss": 1.3284, + "step": 63390 + }, + { + "epoch": 11.27, + "learning_rate": 1.2446814814814815e-05, + "loss": 1.3679, + "step": 63395 + }, + { + "epoch": 11.27, + "learning_rate": 1.2443851851851852e-05, + "loss": 1.449, + "step": 63400 + }, + { + "epoch": 11.27, + "learning_rate": 1.244088888888889e-05, + "loss": 1.3136, + "step": 63405 + }, + { + "epoch": 11.27, + "learning_rate": 1.2437925925925926e-05, + "loss": 1.5921, + "step": 63410 + }, + { + "epoch": 11.27, + "learning_rate": 1.2434962962962963e-05, + "loss": 1.3995, + "step": 63415 + }, + { + "epoch": 11.27, + "learning_rate": 1.2432e-05, + "loss": 1.5115, + "step": 63420 + }, + { + "epoch": 11.28, + "learning_rate": 1.2429037037037038e-05, + "loss": 1.3916, + "step": 63425 + }, + { + "epoch": 11.28, + "learning_rate": 1.2426074074074075e-05, + "loss": 1.4975, + "step": 63430 + }, + { + "epoch": 11.28, + "learning_rate": 1.2423111111111112e-05, + "loss": 1.4304, + "step": 63435 + }, + { + "epoch": 11.28, + "learning_rate": 1.2420148148148149e-05, + "loss": 1.3127, + "step": 63440 + }, + { + "epoch": 11.28, + "learning_rate": 1.2417185185185186e-05, + "loss": 1.3067, + "step": 63445 + }, + { + "epoch": 11.28, + "learning_rate": 1.2414222222222223e-05, + "loss": 1.4566, + "step": 63450 + }, + { + "epoch": 11.28, + "learning_rate": 1.241125925925926e-05, + "loss": 1.3178, + "step": 63455 + }, + { + "epoch": 11.28, + "learning_rate": 1.2408296296296298e-05, + "loss": 1.4906, + "step": 63460 + }, + { + "epoch": 11.28, + "learning_rate": 1.2405333333333335e-05, + "loss": 1.3916, + "step": 63465 + }, + { + "epoch": 11.28, + "learning_rate": 1.2402370370370372e-05, + "loss": 1.4068, + "step": 63470 + }, + { + "epoch": 11.28, + "learning_rate": 1.2399407407407409e-05, + "loss": 1.4206, + "step": 63475 + }, + { + "epoch": 11.29, + "learning_rate": 1.2396444444444444e-05, + "loss": 1.4474, + "step": 63480 + }, + { + "epoch": 11.29, + "learning_rate": 1.2393481481481482e-05, + "loss": 1.4051, + "step": 63485 + }, + { + "epoch": 11.29, + "learning_rate": 1.2390518518518519e-05, + "loss": 1.3416, + "step": 63490 + }, + { + "epoch": 11.29, + "learning_rate": 1.2387555555555556e-05, + "loss": 1.3681, + "step": 63495 + }, + { + "epoch": 11.29, + "learning_rate": 1.2384592592592593e-05, + "loss": 1.4256, + "step": 63500 + }, + { + "epoch": 11.29, + "learning_rate": 1.238162962962963e-05, + "loss": 1.4887, + "step": 63505 + }, + { + "epoch": 11.29, + "learning_rate": 1.2378666666666667e-05, + "loss": 1.4836, + "step": 63510 + }, + { + "epoch": 11.29, + "learning_rate": 1.2375703703703704e-05, + "loss": 1.4317, + "step": 63515 + }, + { + "epoch": 11.29, + "learning_rate": 1.2372740740740741e-05, + "loss": 1.5093, + "step": 63520 + }, + { + "epoch": 11.29, + "learning_rate": 1.2369777777777779e-05, + "loss": 1.4812, + "step": 63525 + }, + { + "epoch": 11.29, + "learning_rate": 1.2366814814814816e-05, + "loss": 1.4008, + "step": 63530 + }, + { + "epoch": 11.3, + "learning_rate": 1.2363851851851853e-05, + "loss": 1.4007, + "step": 63535 + }, + { + "epoch": 11.3, + "learning_rate": 1.236088888888889e-05, + "loss": 1.4426, + "step": 63540 + }, + { + "epoch": 11.3, + "learning_rate": 1.2357925925925927e-05, + "loss": 1.4207, + "step": 63545 + }, + { + "epoch": 11.3, + "learning_rate": 1.2354962962962964e-05, + "loss": 1.388, + "step": 63550 + }, + { + "epoch": 11.3, + "learning_rate": 1.2352000000000001e-05, + "loss": 1.4497, + "step": 63555 + }, + { + "epoch": 11.3, + "learning_rate": 1.2349037037037038e-05, + "loss": 1.3574, + "step": 63560 + }, + { + "epoch": 11.3, + "learning_rate": 1.2346074074074076e-05, + "loss": 1.5136, + "step": 63565 + }, + { + "epoch": 11.3, + "learning_rate": 1.2343111111111113e-05, + "loss": 1.396, + "step": 63570 + }, + { + "epoch": 11.3, + "learning_rate": 1.234014814814815e-05, + "loss": 1.4039, + "step": 63575 + }, + { + "epoch": 11.3, + "learning_rate": 1.2337185185185187e-05, + "loss": 1.4086, + "step": 63580 + }, + { + "epoch": 11.3, + "learning_rate": 1.2334222222222224e-05, + "loss": 1.3543, + "step": 63585 + }, + { + "epoch": 11.3, + "learning_rate": 1.2331259259259261e-05, + "loss": 1.3999, + "step": 63590 + }, + { + "epoch": 11.31, + "learning_rate": 1.2328296296296297e-05, + "loss": 1.4088, + "step": 63595 + }, + { + "epoch": 11.31, + "learning_rate": 1.2325333333333334e-05, + "loss": 1.3915, + "step": 63600 + }, + { + "epoch": 11.31, + "learning_rate": 1.2322370370370371e-05, + "loss": 1.4533, + "step": 63605 + }, + { + "epoch": 11.31, + "learning_rate": 1.2319407407407408e-05, + "loss": 1.3593, + "step": 63610 + }, + { + "epoch": 11.31, + "learning_rate": 1.2316444444444445e-05, + "loss": 1.4548, + "step": 63615 + }, + { + "epoch": 11.31, + "learning_rate": 1.231348148148148e-05, + "loss": 1.4336, + "step": 63620 + }, + { + "epoch": 11.31, + "learning_rate": 1.2310518518518518e-05, + "loss": 1.5669, + "step": 63625 + }, + { + "epoch": 11.31, + "learning_rate": 1.2307555555555555e-05, + "loss": 1.4884, + "step": 63630 + }, + { + "epoch": 11.31, + "learning_rate": 1.2304592592592592e-05, + "loss": 1.51, + "step": 63635 + }, + { + "epoch": 11.31, + "learning_rate": 1.2301629629629629e-05, + "loss": 1.4248, + "step": 63640 + }, + { + "epoch": 11.31, + "learning_rate": 1.2298666666666666e-05, + "loss": 1.3863, + "step": 63645 + }, + { + "epoch": 11.32, + "learning_rate": 1.2295703703703703e-05, + "loss": 1.4853, + "step": 63650 + }, + { + "epoch": 11.32, + "learning_rate": 1.229274074074074e-05, + "loss": 1.6204, + "step": 63655 + }, + { + "epoch": 11.32, + "learning_rate": 1.2289777777777778e-05, + "loss": 1.3527, + "step": 63660 + }, + { + "epoch": 11.32, + "learning_rate": 1.2286814814814815e-05, + "loss": 1.4132, + "step": 63665 + }, + { + "epoch": 11.32, + "learning_rate": 1.2283851851851852e-05, + "loss": 1.4646, + "step": 63670 + }, + { + "epoch": 11.32, + "learning_rate": 1.2280888888888889e-05, + "loss": 1.5045, + "step": 63675 + }, + { + "epoch": 11.32, + "learning_rate": 1.2277925925925926e-05, + "loss": 1.5071, + "step": 63680 + }, + { + "epoch": 11.32, + "learning_rate": 1.2274962962962963e-05, + "loss": 1.3902, + "step": 63685 + }, + { + "epoch": 11.32, + "learning_rate": 1.2272e-05, + "loss": 1.4731, + "step": 63690 + }, + { + "epoch": 11.32, + "learning_rate": 1.2269037037037038e-05, + "loss": 1.4404, + "step": 63695 + }, + { + "epoch": 11.32, + "learning_rate": 1.2266074074074075e-05, + "loss": 1.5084, + "step": 63700 + }, + { + "epoch": 11.33, + "learning_rate": 1.2263111111111112e-05, + "loss": 1.4721, + "step": 63705 + }, + { + "epoch": 11.33, + "learning_rate": 1.2260148148148149e-05, + "loss": 1.2777, + "step": 63710 + }, + { + "epoch": 11.33, + "learning_rate": 1.2257185185185186e-05, + "loss": 1.4879, + "step": 63715 + }, + { + "epoch": 11.33, + "learning_rate": 1.2254222222222223e-05, + "loss": 1.4895, + "step": 63720 + }, + { + "epoch": 11.33, + "learning_rate": 1.225125925925926e-05, + "loss": 1.4538, + "step": 63725 + }, + { + "epoch": 11.33, + "learning_rate": 1.2248296296296297e-05, + "loss": 1.4, + "step": 63730 + }, + { + "epoch": 11.33, + "learning_rate": 1.2245333333333333e-05, + "loss": 1.4021, + "step": 63735 + }, + { + "epoch": 11.33, + "learning_rate": 1.224237037037037e-05, + "loss": 1.4455, + "step": 63740 + }, + { + "epoch": 11.33, + "learning_rate": 1.2239407407407407e-05, + "loss": 1.4138, + "step": 63745 + }, + { + "epoch": 11.33, + "learning_rate": 1.2236444444444444e-05, + "loss": 1.4656, + "step": 63750 + }, + { + "epoch": 11.33, + "learning_rate": 1.2233481481481481e-05, + "loss": 1.3558, + "step": 63755 + }, + { + "epoch": 11.34, + "learning_rate": 1.2230518518518519e-05, + "loss": 1.409, + "step": 63760 + }, + { + "epoch": 11.34, + "learning_rate": 1.2227555555555556e-05, + "loss": 1.395, + "step": 63765 + }, + { + "epoch": 11.34, + "learning_rate": 1.2224592592592593e-05, + "loss": 1.3842, + "step": 63770 + }, + { + "epoch": 11.34, + "learning_rate": 1.222162962962963e-05, + "loss": 1.4286, + "step": 63775 + }, + { + "epoch": 11.34, + "learning_rate": 1.2218666666666667e-05, + "loss": 1.4097, + "step": 63780 + }, + { + "epoch": 11.34, + "learning_rate": 1.2215703703703704e-05, + "loss": 1.3628, + "step": 63785 + }, + { + "epoch": 11.34, + "learning_rate": 1.2212740740740741e-05, + "loss": 1.4684, + "step": 63790 + }, + { + "epoch": 11.34, + "learning_rate": 1.2209777777777778e-05, + "loss": 1.4058, + "step": 63795 + }, + { + "epoch": 11.34, + "learning_rate": 1.2206814814814816e-05, + "loss": 1.4203, + "step": 63800 + }, + { + "epoch": 11.34, + "learning_rate": 1.2203851851851853e-05, + "loss": 1.5195, + "step": 63805 + }, + { + "epoch": 11.34, + "learning_rate": 1.220088888888889e-05, + "loss": 1.4547, + "step": 63810 + }, + { + "epoch": 11.34, + "learning_rate": 1.2197925925925927e-05, + "loss": 1.4276, + "step": 63815 + }, + { + "epoch": 11.35, + "learning_rate": 1.2194962962962964e-05, + "loss": 1.3875, + "step": 63820 + }, + { + "epoch": 11.35, + "learning_rate": 1.2192000000000001e-05, + "loss": 1.476, + "step": 63825 + }, + { + "epoch": 11.35, + "learning_rate": 1.2189037037037038e-05, + "loss": 1.4638, + "step": 63830 + }, + { + "epoch": 11.35, + "learning_rate": 1.2186074074074075e-05, + "loss": 1.3933, + "step": 63835 + }, + { + "epoch": 11.35, + "learning_rate": 1.2183111111111113e-05, + "loss": 1.3857, + "step": 63840 + }, + { + "epoch": 11.35, + "learning_rate": 1.218014814814815e-05, + "loss": 1.5262, + "step": 63845 + }, + { + "epoch": 11.35, + "learning_rate": 1.2177185185185185e-05, + "loss": 1.4174, + "step": 63850 + }, + { + "epoch": 11.35, + "learning_rate": 1.2174222222222222e-05, + "loss": 1.4584, + "step": 63855 + }, + { + "epoch": 11.35, + "learning_rate": 1.217125925925926e-05, + "loss": 1.5363, + "step": 63860 + }, + { + "epoch": 11.35, + "learning_rate": 1.2168296296296297e-05, + "loss": 1.537, + "step": 63865 + }, + { + "epoch": 11.35, + "learning_rate": 1.2165333333333334e-05, + "loss": 1.4268, + "step": 63870 + }, + { + "epoch": 11.36, + "learning_rate": 1.216237037037037e-05, + "loss": 1.4448, + "step": 63875 + }, + { + "epoch": 11.36, + "learning_rate": 1.2159407407407408e-05, + "loss": 1.4206, + "step": 63880 + }, + { + "epoch": 11.36, + "learning_rate": 1.2156444444444445e-05, + "loss": 1.3317, + "step": 63885 + }, + { + "epoch": 11.36, + "learning_rate": 1.2153481481481482e-05, + "loss": 1.5224, + "step": 63890 + }, + { + "epoch": 11.36, + "learning_rate": 1.215051851851852e-05, + "loss": 1.4148, + "step": 63895 + }, + { + "epoch": 11.36, + "learning_rate": 1.2147555555555556e-05, + "loss": 1.4214, + "step": 63900 + }, + { + "epoch": 11.36, + "learning_rate": 1.2144592592592594e-05, + "loss": 1.3718, + "step": 63905 + }, + { + "epoch": 11.36, + "learning_rate": 1.214162962962963e-05, + "loss": 1.3296, + "step": 63910 + }, + { + "epoch": 11.36, + "learning_rate": 1.2138666666666668e-05, + "loss": 1.447, + "step": 63915 + }, + { + "epoch": 11.36, + "learning_rate": 1.2135703703703705e-05, + "loss": 1.399, + "step": 63920 + }, + { + "epoch": 11.36, + "learning_rate": 1.2132740740740742e-05, + "loss": 1.3159, + "step": 63925 + }, + { + "epoch": 11.37, + "learning_rate": 1.212977777777778e-05, + "loss": 1.4368, + "step": 63930 + }, + { + "epoch": 11.37, + "learning_rate": 1.2126814814814816e-05, + "loss": 1.5324, + "step": 63935 + }, + { + "epoch": 11.37, + "learning_rate": 1.2123851851851853e-05, + "loss": 1.5732, + "step": 63940 + }, + { + "epoch": 11.37, + "learning_rate": 1.212088888888889e-05, + "loss": 1.4191, + "step": 63945 + }, + { + "epoch": 11.37, + "learning_rate": 1.2117925925925928e-05, + "loss": 1.4841, + "step": 63950 + }, + { + "epoch": 11.37, + "learning_rate": 1.2114962962962965e-05, + "loss": 1.4358, + "step": 63955 + }, + { + "epoch": 11.37, + "learning_rate": 1.2112000000000002e-05, + "loss": 1.3871, + "step": 63960 + }, + { + "epoch": 11.37, + "learning_rate": 1.2109037037037037e-05, + "loss": 1.4708, + "step": 63965 + }, + { + "epoch": 11.37, + "learning_rate": 1.2106074074074075e-05, + "loss": 1.4338, + "step": 63970 + }, + { + "epoch": 11.37, + "learning_rate": 1.2103111111111112e-05, + "loss": 1.6058, + "step": 63975 + }, + { + "epoch": 11.37, + "learning_rate": 1.2100148148148149e-05, + "loss": 1.4204, + "step": 63980 + }, + { + "epoch": 11.38, + "learning_rate": 1.2097185185185186e-05, + "loss": 1.3751, + "step": 63985 + }, + { + "epoch": 11.38, + "learning_rate": 1.2094222222222223e-05, + "loss": 1.4953, + "step": 63990 + }, + { + "epoch": 11.38, + "learning_rate": 1.209125925925926e-05, + "loss": 1.5018, + "step": 63995 + }, + { + "epoch": 11.38, + "learning_rate": 1.2088296296296297e-05, + "loss": 1.4578, + "step": 64000 + }, + { + "epoch": 11.38, + "learning_rate": 1.2085333333333333e-05, + "loss": 1.3368, + "step": 64005 + }, + { + "epoch": 11.38, + "learning_rate": 1.208237037037037e-05, + "loss": 1.4855, + "step": 64010 + }, + { + "epoch": 11.38, + "learning_rate": 1.2079407407407407e-05, + "loss": 1.5567, + "step": 64015 + }, + { + "epoch": 11.38, + "learning_rate": 1.2076444444444444e-05, + "loss": 1.4216, + "step": 64020 + }, + { + "epoch": 11.38, + "learning_rate": 1.2073481481481481e-05, + "loss": 1.4041, + "step": 64025 + }, + { + "epoch": 11.38, + "learning_rate": 1.2070518518518518e-05, + "loss": 1.501, + "step": 64030 + }, + { + "epoch": 11.38, + "learning_rate": 1.2067555555555555e-05, + "loss": 1.3653, + "step": 64035 + }, + { + "epoch": 11.38, + "learning_rate": 1.2064592592592593e-05, + "loss": 1.441, + "step": 64040 + }, + { + "epoch": 11.39, + "learning_rate": 1.206162962962963e-05, + "loss": 1.3726, + "step": 64045 + }, + { + "epoch": 11.39, + "learning_rate": 1.2058666666666667e-05, + "loss": 1.4443, + "step": 64050 + }, + { + "epoch": 11.39, + "learning_rate": 1.2055703703703704e-05, + "loss": 1.2689, + "step": 64055 + }, + { + "epoch": 11.39, + "learning_rate": 1.2052740740740741e-05, + "loss": 1.3833, + "step": 64060 + }, + { + "epoch": 11.39, + "learning_rate": 1.2049777777777778e-05, + "loss": 1.4577, + "step": 64065 + }, + { + "epoch": 11.39, + "learning_rate": 1.2046814814814815e-05, + "loss": 1.4814, + "step": 64070 + }, + { + "epoch": 11.39, + "learning_rate": 1.2043851851851853e-05, + "loss": 1.4675, + "step": 64075 + }, + { + "epoch": 11.39, + "learning_rate": 1.204088888888889e-05, + "loss": 1.5369, + "step": 64080 + }, + { + "epoch": 11.39, + "learning_rate": 1.2037925925925927e-05, + "loss": 1.4042, + "step": 64085 + }, + { + "epoch": 11.39, + "learning_rate": 1.2034962962962964e-05, + "loss": 1.4081, + "step": 64090 + }, + { + "epoch": 11.39, + "learning_rate": 1.2032000000000001e-05, + "loss": 1.4858, + "step": 64095 + }, + { + "epoch": 11.4, + "learning_rate": 1.2029037037037036e-05, + "loss": 1.3878, + "step": 64100 + }, + { + "epoch": 11.4, + "learning_rate": 1.2026074074074074e-05, + "loss": 1.4497, + "step": 64105 + }, + { + "epoch": 11.4, + "learning_rate": 1.202311111111111e-05, + "loss": 1.2881, + "step": 64110 + }, + { + "epoch": 11.4, + "learning_rate": 1.2020148148148148e-05, + "loss": 1.472, + "step": 64115 + }, + { + "epoch": 11.4, + "learning_rate": 1.2017185185185185e-05, + "loss": 1.409, + "step": 64120 + }, + { + "epoch": 11.4, + "learning_rate": 1.2014222222222222e-05, + "loss": 1.4294, + "step": 64125 + }, + { + "epoch": 11.4, + "learning_rate": 1.201125925925926e-05, + "loss": 1.4415, + "step": 64130 + }, + { + "epoch": 11.4, + "learning_rate": 1.2008296296296296e-05, + "loss": 1.3761, + "step": 64135 + }, + { + "epoch": 11.4, + "learning_rate": 1.2005333333333333e-05, + "loss": 1.4763, + "step": 64140 + }, + { + "epoch": 11.4, + "learning_rate": 1.200237037037037e-05, + "loss": 1.3813, + "step": 64145 + }, + { + "epoch": 11.4, + "learning_rate": 1.1999407407407408e-05, + "loss": 1.4211, + "step": 64150 + }, + { + "epoch": 11.41, + "learning_rate": 1.1996444444444445e-05, + "loss": 1.4975, + "step": 64155 + }, + { + "epoch": 11.41, + "learning_rate": 1.1993481481481482e-05, + "loss": 1.3922, + "step": 64160 + }, + { + "epoch": 11.41, + "learning_rate": 1.1990518518518519e-05, + "loss": 1.4213, + "step": 64165 + }, + { + "epoch": 11.41, + "learning_rate": 1.1987555555555556e-05, + "loss": 1.4342, + "step": 64170 + }, + { + "epoch": 11.41, + "learning_rate": 1.1984592592592593e-05, + "loss": 1.5983, + "step": 64175 + }, + { + "epoch": 11.41, + "learning_rate": 1.198162962962963e-05, + "loss": 1.383, + "step": 64180 + }, + { + "epoch": 11.41, + "learning_rate": 1.1978666666666668e-05, + "loss": 1.4656, + "step": 64185 + }, + { + "epoch": 11.41, + "learning_rate": 1.1975703703703705e-05, + "loss": 1.4681, + "step": 64190 + }, + { + "epoch": 11.41, + "learning_rate": 1.1972740740740742e-05, + "loss": 1.5319, + "step": 64195 + }, + { + "epoch": 11.41, + "learning_rate": 1.1969777777777779e-05, + "loss": 1.3237, + "step": 64200 + }, + { + "epoch": 11.41, + "learning_rate": 1.1966814814814816e-05, + "loss": 1.478, + "step": 64205 + }, + { + "epoch": 11.42, + "learning_rate": 1.1963851851851853e-05, + "loss": 1.4051, + "step": 64210 + }, + { + "epoch": 11.42, + "learning_rate": 1.1960888888888889e-05, + "loss": 1.5332, + "step": 64215 + }, + { + "epoch": 11.42, + "learning_rate": 1.1957925925925926e-05, + "loss": 1.4351, + "step": 64220 + }, + { + "epoch": 11.42, + "learning_rate": 1.1954962962962963e-05, + "loss": 1.4731, + "step": 64225 + }, + { + "epoch": 11.42, + "learning_rate": 1.1952e-05, + "loss": 1.5974, + "step": 64230 + }, + { + "epoch": 11.42, + "learning_rate": 1.1949037037037037e-05, + "loss": 1.3687, + "step": 64235 + }, + { + "epoch": 11.42, + "learning_rate": 1.1946074074074074e-05, + "loss": 1.382, + "step": 64240 + }, + { + "epoch": 11.42, + "learning_rate": 1.1943111111111111e-05, + "loss": 1.3731, + "step": 64245 + }, + { + "epoch": 11.42, + "learning_rate": 1.1940148148148149e-05, + "loss": 1.4651, + "step": 64250 + }, + { + "epoch": 11.42, + "learning_rate": 1.1937185185185186e-05, + "loss": 1.3834, + "step": 64255 + }, + { + "epoch": 11.42, + "learning_rate": 1.1934222222222223e-05, + "loss": 1.4097, + "step": 64260 + }, + { + "epoch": 11.42, + "learning_rate": 1.193125925925926e-05, + "loss": 1.5593, + "step": 64265 + }, + { + "epoch": 11.43, + "learning_rate": 1.1928296296296297e-05, + "loss": 1.4307, + "step": 64270 + }, + { + "epoch": 11.43, + "learning_rate": 1.1925333333333334e-05, + "loss": 1.4859, + "step": 64275 + }, + { + "epoch": 11.43, + "learning_rate": 1.1922370370370371e-05, + "loss": 1.4241, + "step": 64280 + }, + { + "epoch": 11.43, + "learning_rate": 1.1919407407407408e-05, + "loss": 1.4335, + "step": 64285 + }, + { + "epoch": 11.43, + "learning_rate": 1.1916444444444446e-05, + "loss": 1.5566, + "step": 64290 + }, + { + "epoch": 11.43, + "learning_rate": 1.1913481481481483e-05, + "loss": 1.5053, + "step": 64295 + }, + { + "epoch": 11.43, + "learning_rate": 1.191051851851852e-05, + "loss": 1.5723, + "step": 64300 + }, + { + "epoch": 11.43, + "learning_rate": 1.1907555555555557e-05, + "loss": 1.4238, + "step": 64305 + }, + { + "epoch": 11.43, + "learning_rate": 1.1904592592592594e-05, + "loss": 1.3484, + "step": 64310 + }, + { + "epoch": 11.43, + "learning_rate": 1.1901629629629631e-05, + "loss": 1.5124, + "step": 64315 + }, + { + "epoch": 11.43, + "learning_rate": 1.1898666666666668e-05, + "loss": 1.4343, + "step": 64320 + }, + { + "epoch": 11.44, + "learning_rate": 1.1895703703703706e-05, + "loss": 1.335, + "step": 64325 + }, + { + "epoch": 11.44, + "learning_rate": 1.1892740740740741e-05, + "loss": 1.4694, + "step": 64330 + }, + { + "epoch": 11.44, + "learning_rate": 1.1889777777777778e-05, + "loss": 1.5566, + "step": 64335 + }, + { + "epoch": 11.44, + "learning_rate": 1.1886814814814815e-05, + "loss": 1.5253, + "step": 64340 + }, + { + "epoch": 11.44, + "learning_rate": 1.1883851851851852e-05, + "loss": 1.4767, + "step": 64345 + }, + { + "epoch": 11.44, + "learning_rate": 1.188088888888889e-05, + "loss": 1.4058, + "step": 64350 + }, + { + "epoch": 11.44, + "learning_rate": 1.1877925925925927e-05, + "loss": 1.5159, + "step": 64355 + }, + { + "epoch": 11.44, + "learning_rate": 1.1874962962962964e-05, + "loss": 1.5364, + "step": 64360 + }, + { + "epoch": 11.44, + "learning_rate": 1.1872000000000001e-05, + "loss": 1.6023, + "step": 64365 + }, + { + "epoch": 11.44, + "learning_rate": 1.1869037037037038e-05, + "loss": 1.3556, + "step": 64370 + }, + { + "epoch": 11.44, + "learning_rate": 1.1866074074074075e-05, + "loss": 1.4972, + "step": 64375 + }, + { + "epoch": 11.45, + "learning_rate": 1.1863111111111112e-05, + "loss": 1.5108, + "step": 64380 + }, + { + "epoch": 11.45, + "learning_rate": 1.186014814814815e-05, + "loss": 1.363, + "step": 64385 + }, + { + "epoch": 11.45, + "learning_rate": 1.1857185185185185e-05, + "loss": 1.4665, + "step": 64390 + }, + { + "epoch": 11.45, + "learning_rate": 1.1854222222222222e-05, + "loss": 1.4564, + "step": 64395 + }, + { + "epoch": 11.45, + "learning_rate": 1.1851259259259259e-05, + "loss": 1.4022, + "step": 64400 + }, + { + "epoch": 11.45, + "learning_rate": 1.1848296296296296e-05, + "loss": 1.5894, + "step": 64405 + }, + { + "epoch": 11.45, + "learning_rate": 1.1845333333333333e-05, + "loss": 1.4424, + "step": 64410 + }, + { + "epoch": 11.45, + "learning_rate": 1.184237037037037e-05, + "loss": 1.4059, + "step": 64415 + }, + { + "epoch": 11.45, + "learning_rate": 1.1839407407407408e-05, + "loss": 1.559, + "step": 64420 + }, + { + "epoch": 11.45, + "learning_rate": 1.1836444444444445e-05, + "loss": 1.5001, + "step": 64425 + }, + { + "epoch": 11.45, + "learning_rate": 1.1833481481481482e-05, + "loss": 1.4889, + "step": 64430 + }, + { + "epoch": 11.46, + "learning_rate": 1.1830518518518519e-05, + "loss": 1.4373, + "step": 64435 + }, + { + "epoch": 11.46, + "learning_rate": 1.1827555555555556e-05, + "loss": 1.4314, + "step": 64440 + }, + { + "epoch": 11.46, + "learning_rate": 1.1824592592592593e-05, + "loss": 1.4094, + "step": 64445 + }, + { + "epoch": 11.46, + "learning_rate": 1.182162962962963e-05, + "loss": 1.4187, + "step": 64450 + }, + { + "epoch": 11.46, + "learning_rate": 1.1818666666666667e-05, + "loss": 1.4395, + "step": 64455 + }, + { + "epoch": 11.46, + "learning_rate": 1.1815703703703705e-05, + "loss": 1.4671, + "step": 64460 + }, + { + "epoch": 11.46, + "learning_rate": 1.1812740740740742e-05, + "loss": 1.4288, + "step": 64465 + }, + { + "epoch": 11.46, + "learning_rate": 1.1809777777777777e-05, + "loss": 1.4545, + "step": 64470 + }, + { + "epoch": 11.46, + "learning_rate": 1.1806814814814814e-05, + "loss": 1.3199, + "step": 64475 + }, + { + "epoch": 11.46, + "learning_rate": 1.1803851851851851e-05, + "loss": 1.4931, + "step": 64480 + }, + { + "epoch": 11.46, + "learning_rate": 1.1800888888888889e-05, + "loss": 1.4519, + "step": 64485 + }, + { + "epoch": 11.46, + "learning_rate": 1.1797925925925926e-05, + "loss": 1.4743, + "step": 64490 + }, + { + "epoch": 11.47, + "learning_rate": 1.1794962962962963e-05, + "loss": 1.4663, + "step": 64495 + }, + { + "epoch": 11.47, + "learning_rate": 1.1792e-05, + "loss": 1.3287, + "step": 64500 + }, + { + "epoch": 11.47, + "learning_rate": 1.1789037037037037e-05, + "loss": 1.3607, + "step": 64505 + }, + { + "epoch": 11.47, + "learning_rate": 1.1786074074074074e-05, + "loss": 1.5085, + "step": 64510 + }, + { + "epoch": 11.47, + "learning_rate": 1.1783111111111111e-05, + "loss": 1.4652, + "step": 64515 + }, + { + "epoch": 11.47, + "learning_rate": 1.1780148148148148e-05, + "loss": 1.4134, + "step": 64520 + }, + { + "epoch": 11.47, + "learning_rate": 1.1777185185185186e-05, + "loss": 1.4206, + "step": 64525 + }, + { + "epoch": 11.47, + "learning_rate": 1.1774222222222223e-05, + "loss": 1.3942, + "step": 64530 + }, + { + "epoch": 11.47, + "learning_rate": 1.177125925925926e-05, + "loss": 1.4854, + "step": 64535 + }, + { + "epoch": 11.47, + "learning_rate": 1.1768296296296297e-05, + "loss": 1.4309, + "step": 64540 + }, + { + "epoch": 11.47, + "learning_rate": 1.1765333333333334e-05, + "loss": 1.5314, + "step": 64545 + }, + { + "epoch": 11.48, + "learning_rate": 1.1762370370370371e-05, + "loss": 1.3848, + "step": 64550 + }, + { + "epoch": 11.48, + "learning_rate": 1.1759407407407408e-05, + "loss": 1.3669, + "step": 64555 + }, + { + "epoch": 11.48, + "learning_rate": 1.1756444444444445e-05, + "loss": 1.517, + "step": 64560 + }, + { + "epoch": 11.48, + "learning_rate": 1.1753481481481483e-05, + "loss": 1.5267, + "step": 64565 + }, + { + "epoch": 11.48, + "learning_rate": 1.175051851851852e-05, + "loss": 1.4814, + "step": 64570 + }, + { + "epoch": 11.48, + "learning_rate": 1.1747555555555557e-05, + "loss": 1.3463, + "step": 64575 + }, + { + "epoch": 11.48, + "learning_rate": 1.1744592592592594e-05, + "loss": 1.4455, + "step": 64580 + }, + { + "epoch": 11.48, + "learning_rate": 1.174162962962963e-05, + "loss": 1.5548, + "step": 64585 + }, + { + "epoch": 11.48, + "learning_rate": 1.1738666666666667e-05, + "loss": 1.5267, + "step": 64590 + }, + { + "epoch": 11.48, + "learning_rate": 1.1735703703703704e-05, + "loss": 1.3188, + "step": 64595 + }, + { + "epoch": 11.48, + "learning_rate": 1.173274074074074e-05, + "loss": 1.5194, + "step": 64600 + }, + { + "epoch": 11.49, + "learning_rate": 1.1729777777777778e-05, + "loss": 1.374, + "step": 64605 + }, + { + "epoch": 11.49, + "learning_rate": 1.1726814814814815e-05, + "loss": 1.3236, + "step": 64610 + }, + { + "epoch": 11.49, + "learning_rate": 1.1723851851851852e-05, + "loss": 1.4847, + "step": 64615 + }, + { + "epoch": 11.49, + "learning_rate": 1.172088888888889e-05, + "loss": 1.4281, + "step": 64620 + }, + { + "epoch": 11.49, + "learning_rate": 1.1717925925925926e-05, + "loss": 1.5842, + "step": 64625 + }, + { + "epoch": 11.49, + "learning_rate": 1.1714962962962964e-05, + "loss": 1.4239, + "step": 64630 + }, + { + "epoch": 11.49, + "learning_rate": 1.1712e-05, + "loss": 1.499, + "step": 64635 + }, + { + "epoch": 11.49, + "learning_rate": 1.1709037037037038e-05, + "loss": 1.3439, + "step": 64640 + }, + { + "epoch": 11.49, + "learning_rate": 1.1706074074074075e-05, + "loss": 1.3417, + "step": 64645 + }, + { + "epoch": 11.49, + "learning_rate": 1.1703111111111112e-05, + "loss": 1.4448, + "step": 64650 + }, + { + "epoch": 11.49, + "learning_rate": 1.170014814814815e-05, + "loss": 1.4739, + "step": 64655 + }, + { + "epoch": 11.5, + "learning_rate": 1.1697185185185186e-05, + "loss": 1.4692, + "step": 64660 + }, + { + "epoch": 11.5, + "learning_rate": 1.1694222222222223e-05, + "loss": 1.4785, + "step": 64665 + }, + { + "epoch": 11.5, + "learning_rate": 1.169125925925926e-05, + "loss": 1.507, + "step": 64670 + }, + { + "epoch": 11.5, + "learning_rate": 1.1688296296296298e-05, + "loss": 1.4331, + "step": 64675 + }, + { + "epoch": 11.5, + "learning_rate": 1.1685333333333335e-05, + "loss": 1.3896, + "step": 64680 + }, + { + "epoch": 11.5, + "learning_rate": 1.1682370370370372e-05, + "loss": 1.4109, + "step": 64685 + }, + { + "epoch": 11.5, + "learning_rate": 1.1679407407407409e-05, + "loss": 1.4181, + "step": 64690 + }, + { + "epoch": 11.5, + "learning_rate": 1.1676444444444446e-05, + "loss": 1.4497, + "step": 64695 + }, + { + "epoch": 11.5, + "learning_rate": 1.1673481481481482e-05, + "loss": 1.434, + "step": 64700 + }, + { + "epoch": 11.5, + "learning_rate": 1.1670518518518519e-05, + "loss": 1.4931, + "step": 64705 + }, + { + "epoch": 11.5, + "learning_rate": 1.1667555555555556e-05, + "loss": 1.5909, + "step": 64710 + }, + { + "epoch": 11.5, + "learning_rate": 1.1664592592592593e-05, + "loss": 1.3753, + "step": 64715 + }, + { + "epoch": 11.51, + "learning_rate": 1.166162962962963e-05, + "loss": 1.343, + "step": 64720 + }, + { + "epoch": 11.51, + "learning_rate": 1.1658666666666667e-05, + "loss": 1.4428, + "step": 64725 + }, + { + "epoch": 11.51, + "learning_rate": 1.1655703703703704e-05, + "loss": 1.447, + "step": 64730 + }, + { + "epoch": 11.51, + "learning_rate": 1.1652740740740742e-05, + "loss": 1.3478, + "step": 64735 + }, + { + "epoch": 11.51, + "learning_rate": 1.1649777777777779e-05, + "loss": 1.4299, + "step": 64740 + }, + { + "epoch": 11.51, + "learning_rate": 1.1646814814814816e-05, + "loss": 1.5469, + "step": 64745 + }, + { + "epoch": 11.51, + "learning_rate": 1.1643851851851853e-05, + "loss": 1.4951, + "step": 64750 + }, + { + "epoch": 11.51, + "learning_rate": 1.164088888888889e-05, + "loss": 1.4089, + "step": 64755 + }, + { + "epoch": 11.51, + "learning_rate": 1.1637925925925927e-05, + "loss": 1.4509, + "step": 64760 + }, + { + "epoch": 11.51, + "learning_rate": 1.1634962962962964e-05, + "loss": 1.4477, + "step": 64765 + }, + { + "epoch": 11.51, + "learning_rate": 1.1632000000000001e-05, + "loss": 1.3557, + "step": 64770 + }, + { + "epoch": 11.52, + "learning_rate": 1.1629037037037039e-05, + "loss": 1.3025, + "step": 64775 + }, + { + "epoch": 11.52, + "learning_rate": 1.1626074074074074e-05, + "loss": 1.4078, + "step": 64780 + }, + { + "epoch": 11.52, + "learning_rate": 1.1623111111111111e-05, + "loss": 1.4943, + "step": 64785 + }, + { + "epoch": 11.52, + "learning_rate": 1.1620148148148148e-05, + "loss": 1.2919, + "step": 64790 + }, + { + "epoch": 11.52, + "learning_rate": 1.1617185185185185e-05, + "loss": 1.5148, + "step": 64795 + }, + { + "epoch": 11.52, + "learning_rate": 1.1614222222222223e-05, + "loss": 1.444, + "step": 64800 + }, + { + "epoch": 11.52, + "learning_rate": 1.161125925925926e-05, + "loss": 1.384, + "step": 64805 + }, + { + "epoch": 11.52, + "learning_rate": 1.1608296296296297e-05, + "loss": 1.5051, + "step": 64810 + }, + { + "epoch": 11.52, + "learning_rate": 1.1605333333333334e-05, + "loss": 1.4891, + "step": 64815 + }, + { + "epoch": 11.52, + "learning_rate": 1.1602370370370371e-05, + "loss": 1.4748, + "step": 64820 + }, + { + "epoch": 11.52, + "learning_rate": 1.1599407407407408e-05, + "loss": 1.4126, + "step": 64825 + }, + { + "epoch": 11.53, + "learning_rate": 1.1596444444444445e-05, + "loss": 1.3885, + "step": 64830 + }, + { + "epoch": 11.53, + "learning_rate": 1.159348148148148e-05, + "loss": 1.4666, + "step": 64835 + }, + { + "epoch": 11.53, + "learning_rate": 1.1590518518518518e-05, + "loss": 1.5625, + "step": 64840 + } + ], + "max_steps": 84375, + "num_train_epochs": 15, + "total_flos": 2.801300503831511e+18, + "trial_name": null, + "trial_params": null +}