diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,40232 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9999850703930966, + "eval_steps": 500, + "global_step": 33490, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.971931919976113e-08, + "loss": 1.4585, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2.9859659599880566e-07, + "loss": 1.6256, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 5.971931919976113e-07, + "loss": 1.7173, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 8.957897879964169e-07, + "loss": 1.7771, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 1.1943863839952226e-06, + "loss": 1.7536, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.4929829799940282e-06, + "loss": 1.6147, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.7915795759928338e-06, + "loss": 1.6947, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 2.0901761719916392e-06, + "loss": 1.5887, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 2.3887727679904453e-06, + "loss": 1.6785, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 2.6873693639892504e-06, + "loss": 1.6417, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 2.9859659599880565e-06, + "loss": 1.7147, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 3.284562555986862e-06, + "loss": 1.7063, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 3.5831591519856677e-06, + "loss": 1.6572, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 3.881755747984473e-06, + "loss": 1.5187, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 4.1803523439832785e-06, + "loss": 1.62, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 4.478948939982085e-06, + "loss": 1.6468, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 4.7775455359808905e-06, + "loss": 1.5068, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 5.076142131979695e-06, + "loss": 1.5015, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 5.374738727978501e-06, + "loss": 1.508, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 5.673335323977307e-06, + "loss": 1.585, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 5.971931919976113e-06, + "loss": 1.6127, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 6.2705285159749185e-06, + "loss": 1.4654, + "step": 105 + }, + { + "epoch": 0.0, + "learning_rate": 6.569125111973724e-06, + "loss": 1.5766, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 6.86772170797253e-06, + "loss": 1.5265, + "step": 115 + }, + { + "epoch": 0.0, + "learning_rate": 7.166318303971335e-06, + "loss": 1.5008, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 7.464914899970141e-06, + "loss": 1.4499, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 7.763511495968946e-06, + "loss": 1.3783, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 8.062108091967751e-06, + "loss": 1.4394, + "step": 135 + }, + { + "epoch": 0.0, + "learning_rate": 8.360704687966557e-06, + "loss": 1.3813, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 8.659301283965363e-06, + "loss": 1.3199, + "step": 145 + }, + { + "epoch": 0.0, + "learning_rate": 8.95789787996417e-06, + "loss": 1.3726, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 9.256494475962975e-06, + "loss": 1.3185, + "step": 155 + }, + { + "epoch": 0.0, + "learning_rate": 9.555091071961781e-06, + "loss": 1.3442, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 9.853687667960585e-06, + "loss": 1.3801, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 1.015228426395939e-05, + "loss": 1.39, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 1.0450880859958196e-05, + "loss": 1.3099, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 1.0749477455957002e-05, + "loss": 1.2959, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.1048074051955807e-05, + "loss": 1.3294, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 1.1346670647954615e-05, + "loss": 1.4781, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 1.164526724395342e-05, + "loss": 1.2775, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 1.1943863839952226e-05, + "loss": 1.2934, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 1.2242460435951031e-05, + "loss": 1.3089, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 1.2541057031949837e-05, + "loss": 1.283, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 1.2839653627948641e-05, + "loss": 1.4208, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 1.3138250223947448e-05, + "loss": 1.2776, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 1.3436846819946252e-05, + "loss": 1.2515, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 1.373544341594506e-05, + "loss": 1.2587, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 1.4034040011943863e-05, + "loss": 1.2766, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 1.433263660794267e-05, + "loss": 1.1937, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 1.4631233203941475e-05, + "loss": 1.2291, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 1.4929829799940282e-05, + "loss": 1.2244, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 1.5228426395939088e-05, + "loss": 1.2706, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 1.552702299193789e-05, + "loss": 1.242, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 1.58256195879367e-05, + "loss": 1.2132, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 1.6124216183935503e-05, + "loss": 1.2591, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 1.642281277993431e-05, + "loss": 1.2301, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 1.6721409375933114e-05, + "loss": 1.2511, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 1.702000597193192e-05, + "loss": 1.2778, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 1.7318602567930725e-05, + "loss": 1.2718, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 1.761719916392953e-05, + "loss": 1.2428, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 1.791579575992834e-05, + "loss": 1.2418, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.8214392355927142e-05, + "loss": 1.2644, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 1.851298895192595e-05, + "loss": 1.2524, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 1.8811585547924753e-05, + "loss": 1.3222, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 1.9110182143923562e-05, + "loss": 1.1725, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 1.9408778739922364e-05, + "loss": 1.2395, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 1.970737533592117e-05, + "loss": 1.179, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 2.000597193191998e-05, + "loss": 1.1765, + "step": 335 + }, + { + "epoch": 0.01, + "learning_rate": 2.030456852791878e-05, + "loss": 1.2232, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 2.060316512391759e-05, + "loss": 1.1905, + "step": 345 + }, + { + "epoch": 0.01, + "learning_rate": 2.0901761719916392e-05, + "loss": 1.2126, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 2.12003583159152e-05, + "loss": 1.1298, + "step": 355 + }, + { + "epoch": 0.01, + "learning_rate": 2.1498954911914004e-05, + "loss": 1.1882, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 2.1797551507912813e-05, + "loss": 1.2301, + "step": 365 + }, + { + "epoch": 0.01, + "learning_rate": 2.2096148103911615e-05, + "loss": 1.1727, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 2.2394744699910424e-05, + "loss": 1.0941, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 2.269334129590923e-05, + "loss": 1.224, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 2.299193789190803e-05, + "loss": 1.1372, + "step": 385 + }, + { + "epoch": 0.01, + "learning_rate": 2.329053448790684e-05, + "loss": 1.2249, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 2.3589131083905643e-05, + "loss": 1.2718, + "step": 395 + }, + { + "epoch": 0.01, + "learning_rate": 2.3887727679904452e-05, + "loss": 1.0996, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 2.4186324275903254e-05, + "loss": 1.1589, + "step": 405 + }, + { + "epoch": 0.01, + "learning_rate": 2.4484920871902063e-05, + "loss": 1.2235, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 2.4783517467900865e-05, + "loss": 1.1143, + "step": 415 + }, + { + "epoch": 0.01, + "learning_rate": 2.5082114063899674e-05, + "loss": 1.1644, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 2.5380710659898476e-05, + "loss": 1.163, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 2.5679307255897282e-05, + "loss": 1.086, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 2.597790385189609e-05, + "loss": 1.1865, + "step": 435 + }, + { + "epoch": 0.01, + "learning_rate": 2.6276500447894897e-05, + "loss": 1.1587, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 2.65750970438937e-05, + "loss": 1.1743, + "step": 445 + }, + { + "epoch": 0.01, + "learning_rate": 2.6873693639892504e-05, + "loss": 1.2178, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 2.7172290235891313e-05, + "loss": 1.1798, + "step": 455 + }, + { + "epoch": 0.01, + "learning_rate": 2.747088683189012e-05, + "loss": 1.1444, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 2.7769483427888925e-05, + "loss": 1.2425, + "step": 465 + }, + { + "epoch": 0.01, + "learning_rate": 2.8068080023887727e-05, + "loss": 1.1264, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 2.8366676619886532e-05, + "loss": 1.1555, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 2.866527321588534e-05, + "loss": 1.2075, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 2.8963869811884147e-05, + "loss": 1.1457, + "step": 485 + }, + { + "epoch": 0.01, + "learning_rate": 2.926246640788295e-05, + "loss": 1.1104, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 2.9561063003881755e-05, + "loss": 1.2528, + "step": 495 + }, + { + "epoch": 0.01, + "learning_rate": 2.9859659599880564e-05, + "loss": 1.1005, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 3.015825619587937e-05, + "loss": 1.1275, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 3.0456852791878175e-05, + "loss": 1.1357, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 3.075544938787698e-05, + "loss": 1.1721, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 3.105404598387578e-05, + "loss": 1.1692, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 3.135264257987459e-05, + "loss": 1.142, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 3.16512391758734e-05, + "loss": 1.1517, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 3.1949835771872196e-05, + "loss": 1.1611, + "step": 535 + }, + { + "epoch": 0.02, + "learning_rate": 3.2248432367871005e-05, + "loss": 1.2085, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 3.2547028963869814e-05, + "loss": 1.0626, + "step": 545 + }, + { + "epoch": 0.02, + "learning_rate": 3.284562555986862e-05, + "loss": 1.1229, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 3.3144222155867426e-05, + "loss": 1.151, + "step": 555 + }, + { + "epoch": 0.02, + "learning_rate": 3.344281875186623e-05, + "loss": 1.0884, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 3.374141534786504e-05, + "loss": 1.1217, + "step": 565 + }, + { + "epoch": 0.02, + "learning_rate": 3.404001194386384e-05, + "loss": 1.151, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 3.433860853986265e-05, + "loss": 1.1489, + "step": 575 + }, + { + "epoch": 0.02, + "learning_rate": 3.463720513586145e-05, + "loss": 1.1657, + "step": 580 + }, + { + "epoch": 0.02, + "learning_rate": 3.493580173186026e-05, + "loss": 1.1612, + "step": 585 + }, + { + "epoch": 0.02, + "learning_rate": 3.523439832785906e-05, + "loss": 1.1955, + "step": 590 + }, + { + "epoch": 0.02, + "learning_rate": 3.553299492385787e-05, + "loss": 1.047, + "step": 595 + }, + { + "epoch": 0.02, + "learning_rate": 3.583159151985668e-05, + "loss": 1.0708, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 3.613018811585548e-05, + "loss": 1.2311, + "step": 605 + }, + { + "epoch": 0.02, + "learning_rate": 3.6428784711854284e-05, + "loss": 1.2616, + "step": 610 + }, + { + "epoch": 0.02, + "learning_rate": 3.672738130785309e-05, + "loss": 1.1666, + "step": 615 + }, + { + "epoch": 0.02, + "learning_rate": 3.70259779038519e-05, + "loss": 1.1123, + "step": 620 + }, + { + "epoch": 0.02, + "learning_rate": 3.7324574499850704e-05, + "loss": 1.1402, + "step": 625 + }, + { + "epoch": 0.02, + "learning_rate": 3.7623171095849506e-05, + "loss": 1.1215, + "step": 630 + }, + { + "epoch": 0.02, + "learning_rate": 3.7921767691848315e-05, + "loss": 1.0725, + "step": 635 + }, + { + "epoch": 0.02, + "learning_rate": 3.8220364287847124e-05, + "loss": 1.1213, + "step": 640 + }, + { + "epoch": 0.02, + "learning_rate": 3.8518960883845926e-05, + "loss": 1.1224, + "step": 645 + }, + { + "epoch": 0.02, + "learning_rate": 3.881755747984473e-05, + "loss": 1.1197, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 3.911615407584354e-05, + "loss": 1.1283, + "step": 655 + }, + { + "epoch": 0.02, + "learning_rate": 3.941475067184234e-05, + "loss": 1.1655, + "step": 660 + }, + { + "epoch": 0.02, + "learning_rate": 3.971334726784115e-05, + "loss": 1.0951, + "step": 665 + }, + { + "epoch": 0.02, + "learning_rate": 4.001194386383996e-05, + "loss": 1.1543, + "step": 670 + }, + { + "epoch": 0.02, + "learning_rate": 4.031054045983876e-05, + "loss": 1.0536, + "step": 675 + }, + { + "epoch": 0.02, + "learning_rate": 4.060913705583756e-05, + "loss": 1.1131, + "step": 680 + }, + { + "epoch": 0.02, + "learning_rate": 4.090773365183637e-05, + "loss": 1.0949, + "step": 685 + }, + { + "epoch": 0.02, + "learning_rate": 4.120633024783518e-05, + "loss": 1.1285, + "step": 690 + }, + { + "epoch": 0.02, + "learning_rate": 4.150492684383398e-05, + "loss": 1.1501, + "step": 695 + }, + { + "epoch": 0.02, + "learning_rate": 4.1803523439832785e-05, + "loss": 1.1675, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 4.2102120035831594e-05, + "loss": 1.1537, + "step": 705 + }, + { + "epoch": 0.02, + "learning_rate": 4.24007166318304e-05, + "loss": 1.0697, + "step": 710 + }, + { + "epoch": 0.02, + "learning_rate": 4.2699313227829205e-05, + "loss": 1.0955, + "step": 715 + }, + { + "epoch": 0.02, + "learning_rate": 4.299790982382801e-05, + "loss": 1.0604, + "step": 720 + }, + { + "epoch": 0.02, + "learning_rate": 4.3296506419826816e-05, + "loss": 1.0595, + "step": 725 + }, + { + "epoch": 0.02, + "learning_rate": 4.3595103015825625e-05, + "loss": 1.064, + "step": 730 + }, + { + "epoch": 0.02, + "learning_rate": 4.389369961182443e-05, + "loss": 1.1457, + "step": 735 + }, + { + "epoch": 0.02, + "learning_rate": 4.419229620782323e-05, + "loss": 1.2189, + "step": 740 + }, + { + "epoch": 0.02, + "learning_rate": 4.449089280382204e-05, + "loss": 1.1428, + "step": 745 + }, + { + "epoch": 0.02, + "learning_rate": 4.478948939982085e-05, + "loss": 1.0917, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 4.508808599581965e-05, + "loss": 1.0878, + "step": 755 + }, + { + "epoch": 0.02, + "learning_rate": 4.538668259181846e-05, + "loss": 1.1408, + "step": 760 + }, + { + "epoch": 0.02, + "learning_rate": 4.568527918781726e-05, + "loss": 1.1336, + "step": 765 + }, + { + "epoch": 0.02, + "learning_rate": 4.598387578381606e-05, + "loss": 1.1708, + "step": 770 + }, + { + "epoch": 0.02, + "learning_rate": 4.628247237981487e-05, + "loss": 1.1405, + "step": 775 + }, + { + "epoch": 0.02, + "learning_rate": 4.658106897581368e-05, + "loss": 1.1571, + "step": 780 + }, + { + "epoch": 0.02, + "learning_rate": 4.687966557181248e-05, + "loss": 1.1289, + "step": 785 + }, + { + "epoch": 0.02, + "learning_rate": 4.7178262167811286e-05, + "loss": 1.1346, + "step": 790 + }, + { + "epoch": 0.02, + "learning_rate": 4.7476858763810095e-05, + "loss": 1.1744, + "step": 795 + }, + { + "epoch": 0.02, + "learning_rate": 4.7775455359808904e-05, + "loss": 1.1404, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 4.8074051955807706e-05, + "loss": 1.1259, + "step": 805 + }, + { + "epoch": 0.02, + "learning_rate": 4.837264855180651e-05, + "loss": 1.0889, + "step": 810 + }, + { + "epoch": 0.02, + "learning_rate": 4.867124514780532e-05, + "loss": 1.1308, + "step": 815 + }, + { + "epoch": 0.02, + "learning_rate": 4.8969841743804126e-05, + "loss": 1.2417, + "step": 820 + }, + { + "epoch": 0.02, + "learning_rate": 4.926843833980293e-05, + "loss": 1.0697, + "step": 825 + }, + { + "epoch": 0.02, + "learning_rate": 4.956703493580173e-05, + "loss": 1.0881, + "step": 830 + }, + { + "epoch": 0.02, + "learning_rate": 4.986563153180054e-05, + "loss": 1.1786, + "step": 835 + }, + { + "epoch": 0.03, + "learning_rate": 5.016422812779935e-05, + "loss": 1.1313, + "step": 840 + }, + { + "epoch": 0.03, + "learning_rate": 5.046282472379815e-05, + "loss": 1.193, + "step": 845 + }, + { + "epoch": 0.03, + "learning_rate": 5.076142131979695e-05, + "loss": 1.0761, + "step": 850 + }, + { + "epoch": 0.03, + "learning_rate": 5.106001791579577e-05, + "loss": 1.1518, + "step": 855 + }, + { + "epoch": 0.03, + "learning_rate": 5.1358614511794564e-05, + "loss": 1.0664, + "step": 860 + }, + { + "epoch": 0.03, + "learning_rate": 5.1657211107793366e-05, + "loss": 1.1468, + "step": 865 + }, + { + "epoch": 0.03, + "learning_rate": 5.195580770379218e-05, + "loss": 1.1731, + "step": 870 + }, + { + "epoch": 0.03, + "learning_rate": 5.2254404299790984e-05, + "loss": 1.0429, + "step": 875 + }, + { + "epoch": 0.03, + "learning_rate": 5.255300089578979e-05, + "loss": 1.0916, + "step": 880 + }, + { + "epoch": 0.03, + "learning_rate": 5.2851597491788595e-05, + "loss": 1.1151, + "step": 885 + }, + { + "epoch": 0.03, + "learning_rate": 5.31501940877874e-05, + "loss": 1.1738, + "step": 890 + }, + { + "epoch": 0.03, + "learning_rate": 5.344879068378621e-05, + "loss": 1.1225, + "step": 895 + }, + { + "epoch": 0.03, + "learning_rate": 5.374738727978501e-05, + "loss": 1.1228, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 5.4045983875783825e-05, + "loss": 1.1076, + "step": 905 + }, + { + "epoch": 0.03, + "learning_rate": 5.434458047178263e-05, + "loss": 1.0883, + "step": 910 + }, + { + "epoch": 0.03, + "learning_rate": 5.464317706778143e-05, + "loss": 1.0699, + "step": 915 + }, + { + "epoch": 0.03, + "learning_rate": 5.494177366378024e-05, + "loss": 1.0786, + "step": 920 + }, + { + "epoch": 0.03, + "learning_rate": 5.524037025977904e-05, + "loss": 1.0524, + "step": 925 + }, + { + "epoch": 0.03, + "learning_rate": 5.553896685577785e-05, + "loss": 1.1435, + "step": 930 + }, + { + "epoch": 0.03, + "learning_rate": 5.583756345177665e-05, + "loss": 1.0922, + "step": 935 + }, + { + "epoch": 0.03, + "learning_rate": 5.6136160047775454e-05, + "loss": 1.0464, + "step": 940 + }, + { + "epoch": 0.03, + "learning_rate": 5.643475664377427e-05, + "loss": 1.1485, + "step": 945 + }, + { + "epoch": 0.03, + "learning_rate": 5.6733353239773065e-05, + "loss": 1.1303, + "step": 950 + }, + { + "epoch": 0.03, + "learning_rate": 5.703194983577187e-05, + "loss": 1.1807, + "step": 955 + }, + { + "epoch": 0.03, + "learning_rate": 5.733054643177068e-05, + "loss": 1.1524, + "step": 960 + }, + { + "epoch": 0.03, + "learning_rate": 5.7629143027769485e-05, + "loss": 1.1062, + "step": 965 + }, + { + "epoch": 0.03, + "learning_rate": 5.7927739623768294e-05, + "loss": 1.1626, + "step": 970 + }, + { + "epoch": 0.03, + "learning_rate": 5.8226336219767096e-05, + "loss": 1.1152, + "step": 975 + }, + { + "epoch": 0.03, + "learning_rate": 5.85249328157659e-05, + "loss": 1.0504, + "step": 980 + }, + { + "epoch": 0.03, + "learning_rate": 5.882352941176471e-05, + "loss": 1.1192, + "step": 985 + }, + { + "epoch": 0.03, + "learning_rate": 5.912212600776351e-05, + "loss": 1.1953, + "step": 990 + }, + { + "epoch": 0.03, + "learning_rate": 5.9420722603762325e-05, + "loss": 1.1602, + "step": 995 + }, + { + "epoch": 0.03, + "learning_rate": 5.971931919976113e-05, + "loss": 1.1221, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 6.001791579575993e-05, + "loss": 1.1316, + "step": 1005 + }, + { + "epoch": 0.03, + "learning_rate": 6.031651239175874e-05, + "loss": 1.0403, + "step": 1010 + }, + { + "epoch": 0.03, + "learning_rate": 6.061510898775754e-05, + "loss": 1.1145, + "step": 1015 + }, + { + "epoch": 0.03, + "learning_rate": 6.091370558375635e-05, + "loss": 1.1008, + "step": 1020 + }, + { + "epoch": 0.03, + "learning_rate": 6.121230217975516e-05, + "loss": 1.1407, + "step": 1025 + }, + { + "epoch": 0.03, + "learning_rate": 6.151089877575396e-05, + "loss": 1.1395, + "step": 1030 + }, + { + "epoch": 0.03, + "learning_rate": 6.180949537175276e-05, + "loss": 1.0828, + "step": 1035 + }, + { + "epoch": 0.03, + "learning_rate": 6.210809196775157e-05, + "loss": 1.1553, + "step": 1040 + }, + { + "epoch": 0.03, + "learning_rate": 6.240668856375038e-05, + "loss": 1.1672, + "step": 1045 + }, + { + "epoch": 0.03, + "learning_rate": 6.270528515974918e-05, + "loss": 1.1025, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 6.300388175574799e-05, + "loss": 1.1906, + "step": 1055 + }, + { + "epoch": 0.03, + "learning_rate": 6.33024783517468e-05, + "loss": 1.0398, + "step": 1060 + }, + { + "epoch": 0.03, + "learning_rate": 6.36010749477456e-05, + "loss": 1.0955, + "step": 1065 + }, + { + "epoch": 0.03, + "learning_rate": 6.389967154374439e-05, + "loss": 1.1328, + "step": 1070 + }, + { + "epoch": 0.03, + "learning_rate": 6.419826813974321e-05, + "loss": 1.1968, + "step": 1075 + }, + { + "epoch": 0.03, + "learning_rate": 6.449686473574201e-05, + "loss": 1.1023, + "step": 1080 + }, + { + "epoch": 0.03, + "learning_rate": 6.479546133174083e-05, + "loss": 1.1678, + "step": 1085 + }, + { + "epoch": 0.03, + "learning_rate": 6.509405792773963e-05, + "loss": 1.0655, + "step": 1090 + }, + { + "epoch": 0.03, + "learning_rate": 6.539265452373843e-05, + "loss": 1.2225, + "step": 1095 + }, + { + "epoch": 0.03, + "learning_rate": 6.569125111973725e-05, + "loss": 1.0962, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 6.598984771573604e-05, + "loss": 1.128, + "step": 1105 + }, + { + "epoch": 0.03, + "learning_rate": 6.628844431173485e-05, + "loss": 1.0188, + "step": 1110 + }, + { + "epoch": 0.03, + "learning_rate": 6.658704090773365e-05, + "loss": 1.1631, + "step": 1115 + }, + { + "epoch": 0.03, + "learning_rate": 6.688563750373246e-05, + "loss": 1.1258, + "step": 1120 + }, + { + "epoch": 0.03, + "learning_rate": 6.718423409973127e-05, + "loss": 1.0345, + "step": 1125 + }, + { + "epoch": 0.03, + "learning_rate": 6.748283069573007e-05, + "loss": 1.0857, + "step": 1130 + }, + { + "epoch": 0.03, + "learning_rate": 6.778142729172889e-05, + "loss": 1.1195, + "step": 1135 + }, + { + "epoch": 0.03, + "learning_rate": 6.808002388772768e-05, + "loss": 1.1317, + "step": 1140 + }, + { + "epoch": 0.03, + "learning_rate": 6.837862048372648e-05, + "loss": 1.0549, + "step": 1145 + }, + { + "epoch": 0.03, + "learning_rate": 6.86772170797253e-05, + "loss": 1.0701, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 6.89758136757241e-05, + "loss": 1.1227, + "step": 1155 + }, + { + "epoch": 0.03, + "learning_rate": 6.92744102717229e-05, + "loss": 1.0756, + "step": 1160 + }, + { + "epoch": 0.03, + "learning_rate": 6.957300686772172e-05, + "loss": 1.1506, + "step": 1165 + }, + { + "epoch": 0.03, + "learning_rate": 6.987160346372052e-05, + "loss": 1.1475, + "step": 1170 + }, + { + "epoch": 0.04, + "learning_rate": 7.017020005971932e-05, + "loss": 1.1378, + "step": 1175 + }, + { + "epoch": 0.04, + "learning_rate": 7.046879665571812e-05, + "loss": 1.0497, + "step": 1180 + }, + { + "epoch": 0.04, + "learning_rate": 7.076739325171692e-05, + "loss": 1.0586, + "step": 1185 + }, + { + "epoch": 0.04, + "learning_rate": 7.106598984771574e-05, + "loss": 1.085, + "step": 1190 + }, + { + "epoch": 0.04, + "learning_rate": 7.136458644371454e-05, + "loss": 1.1794, + "step": 1195 + }, + { + "epoch": 0.04, + "learning_rate": 7.166318303971336e-05, + "loss": 1.0421, + "step": 1200 + }, + { + "epoch": 0.04, + "learning_rate": 7.196177963571216e-05, + "loss": 1.0732, + "step": 1205 + }, + { + "epoch": 0.04, + "learning_rate": 7.226037623171096e-05, + "loss": 1.0582, + "step": 1210 + }, + { + "epoch": 0.04, + "learning_rate": 7.255897282770977e-05, + "loss": 1.0111, + "step": 1215 + }, + { + "epoch": 0.04, + "learning_rate": 7.285756942370857e-05, + "loss": 1.0703, + "step": 1220 + }, + { + "epoch": 0.04, + "learning_rate": 7.315616601970738e-05, + "loss": 1.1753, + "step": 1225 + }, + { + "epoch": 0.04, + "learning_rate": 7.345476261570619e-05, + "loss": 1.1485, + "step": 1230 + }, + { + "epoch": 0.04, + "learning_rate": 7.375335921170499e-05, + "loss": 1.0741, + "step": 1235 + }, + { + "epoch": 0.04, + "learning_rate": 7.40519558077038e-05, + "loss": 1.1243, + "step": 1240 + }, + { + "epoch": 0.04, + "learning_rate": 7.43505524037026e-05, + "loss": 1.0602, + "step": 1245 + }, + { + "epoch": 0.04, + "learning_rate": 7.464914899970141e-05, + "loss": 1.1495, + "step": 1250 + }, + { + "epoch": 0.04, + "learning_rate": 7.494774559570021e-05, + "loss": 1.1245, + "step": 1255 + }, + { + "epoch": 0.04, + "learning_rate": 7.524634219169901e-05, + "loss": 1.0684, + "step": 1260 + }, + { + "epoch": 0.04, + "learning_rate": 7.554493878769783e-05, + "loss": 1.1399, + "step": 1265 + }, + { + "epoch": 0.04, + "learning_rate": 7.584353538369663e-05, + "loss": 1.0606, + "step": 1270 + }, + { + "epoch": 0.04, + "learning_rate": 7.614213197969543e-05, + "loss": 1.107, + "step": 1275 + }, + { + "epoch": 0.04, + "learning_rate": 7.644072857569425e-05, + "loss": 1.11, + "step": 1280 + }, + { + "epoch": 0.04, + "learning_rate": 7.673932517169305e-05, + "loss": 1.1872, + "step": 1285 + }, + { + "epoch": 0.04, + "learning_rate": 7.703792176769185e-05, + "loss": 1.1942, + "step": 1290 + }, + { + "epoch": 0.04, + "learning_rate": 7.733651836369065e-05, + "loss": 1.1292, + "step": 1295 + }, + { + "epoch": 0.04, + "learning_rate": 7.763511495968946e-05, + "loss": 1.0424, + "step": 1300 + }, + { + "epoch": 0.04, + "learning_rate": 7.793371155568827e-05, + "loss": 1.1061, + "step": 1305 + }, + { + "epoch": 0.04, + "learning_rate": 7.823230815168708e-05, + "loss": 1.1405, + "step": 1310 + }, + { + "epoch": 0.04, + "learning_rate": 7.853090474768589e-05, + "loss": 1.1527, + "step": 1315 + }, + { + "epoch": 0.04, + "learning_rate": 7.882950134368468e-05, + "loss": 1.0768, + "step": 1320 + }, + { + "epoch": 0.04, + "learning_rate": 7.912809793968348e-05, + "loss": 1.1521, + "step": 1325 + }, + { + "epoch": 0.04, + "learning_rate": 7.94266945356823e-05, + "loss": 1.1386, + "step": 1330 + }, + { + "epoch": 0.04, + "learning_rate": 7.97252911316811e-05, + "loss": 1.1183, + "step": 1335 + }, + { + "epoch": 0.04, + "learning_rate": 8.002388772767992e-05, + "loss": 1.1109, + "step": 1340 + }, + { + "epoch": 0.04, + "learning_rate": 8.032248432367872e-05, + "loss": 1.0986, + "step": 1345 + }, + { + "epoch": 0.04, + "learning_rate": 8.062108091967752e-05, + "loss": 1.1451, + "step": 1350 + }, + { + "epoch": 0.04, + "learning_rate": 8.091967751567632e-05, + "loss": 1.1205, + "step": 1355 + }, + { + "epoch": 0.04, + "learning_rate": 8.121827411167512e-05, + "loss": 1.0236, + "step": 1360 + }, + { + "epoch": 0.04, + "learning_rate": 8.151687070767393e-05, + "loss": 1.0733, + "step": 1365 + }, + { + "epoch": 0.04, + "learning_rate": 8.181546730367274e-05, + "loss": 1.0487, + "step": 1370 + }, + { + "epoch": 0.04, + "learning_rate": 8.211406389967154e-05, + "loss": 1.0805, + "step": 1375 + }, + { + "epoch": 0.04, + "learning_rate": 8.241266049567036e-05, + "loss": 1.0982, + "step": 1380 + }, + { + "epoch": 0.04, + "learning_rate": 8.271125709166916e-05, + "loss": 1.0558, + "step": 1385 + }, + { + "epoch": 0.04, + "learning_rate": 8.300985368766796e-05, + "loss": 1.0834, + "step": 1390 + }, + { + "epoch": 0.04, + "learning_rate": 8.330845028366677e-05, + "loss": 1.1727, + "step": 1395 + }, + { + "epoch": 0.04, + "learning_rate": 8.360704687966557e-05, + "loss": 1.1002, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 8.390564347566439e-05, + "loss": 1.142, + "step": 1405 + }, + { + "epoch": 0.04, + "learning_rate": 8.420424007166319e-05, + "loss": 1.0874, + "step": 1410 + }, + { + "epoch": 0.04, + "learning_rate": 8.450283666766199e-05, + "loss": 1.0821, + "step": 1415 + }, + { + "epoch": 0.04, + "learning_rate": 8.48014332636608e-05, + "loss": 1.0858, + "step": 1420 + }, + { + "epoch": 0.04, + "learning_rate": 8.510002985965961e-05, + "loss": 1.0737, + "step": 1425 + }, + { + "epoch": 0.04, + "learning_rate": 8.539862645565841e-05, + "loss": 1.0453, + "step": 1430 + }, + { + "epoch": 0.04, + "learning_rate": 8.569722305165721e-05, + "loss": 1.1273, + "step": 1435 + }, + { + "epoch": 0.04, + "learning_rate": 8.599581964765601e-05, + "loss": 1.0571, + "step": 1440 + }, + { + "epoch": 0.04, + "learning_rate": 8.629441624365483e-05, + "loss": 1.0576, + "step": 1445 + }, + { + "epoch": 0.04, + "learning_rate": 8.659301283965363e-05, + "loss": 1.1567, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 8.689160943565243e-05, + "loss": 1.157, + "step": 1455 + }, + { + "epoch": 0.04, + "learning_rate": 8.719020603165125e-05, + "loss": 1.1469, + "step": 1460 + }, + { + "epoch": 0.04, + "learning_rate": 8.748880262765005e-05, + "loss": 1.1209, + "step": 1465 + }, + { + "epoch": 0.04, + "learning_rate": 8.778739922364885e-05, + "loss": 1.0979, + "step": 1470 + }, + { + "epoch": 0.04, + "learning_rate": 8.808599581964766e-05, + "loss": 1.1423, + "step": 1475 + }, + { + "epoch": 0.04, + "learning_rate": 8.838459241564646e-05, + "loss": 1.1072, + "step": 1480 + }, + { + "epoch": 0.04, + "learning_rate": 8.868318901164527e-05, + "loss": 1.1439, + "step": 1485 + }, + { + "epoch": 0.04, + "learning_rate": 8.898178560764408e-05, + "loss": 1.0568, + "step": 1490 + }, + { + "epoch": 0.04, + "learning_rate": 8.928038220364289e-05, + "loss": 1.183, + "step": 1495 + }, + { + "epoch": 0.04, + "learning_rate": 8.95789787996417e-05, + "loss": 1.0632, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 8.987757539564048e-05, + "loss": 1.0712, + "step": 1505 + }, + { + "epoch": 0.05, + "learning_rate": 9.01761719916393e-05, + "loss": 1.0539, + "step": 1510 + }, + { + "epoch": 0.05, + "learning_rate": 9.04747685876381e-05, + "loss": 1.053, + "step": 1515 + }, + { + "epoch": 0.05, + "learning_rate": 9.077336518363692e-05, + "loss": 1.0939, + "step": 1520 + }, + { + "epoch": 0.05, + "learning_rate": 9.107196177963572e-05, + "loss": 1.0298, + "step": 1525 + }, + { + "epoch": 0.05, + "learning_rate": 9.137055837563452e-05, + "loss": 1.114, + "step": 1530 + }, + { + "epoch": 0.05, + "learning_rate": 9.166915497163334e-05, + "loss": 1.0674, + "step": 1535 + }, + { + "epoch": 0.05, + "learning_rate": 9.196775156763213e-05, + "loss": 1.1114, + "step": 1540 + }, + { + "epoch": 0.05, + "learning_rate": 9.226634816363093e-05, + "loss": 1.0624, + "step": 1545 + }, + { + "epoch": 0.05, + "learning_rate": 9.256494475962974e-05, + "loss": 1.0214, + "step": 1550 + }, + { + "epoch": 0.05, + "learning_rate": 9.286354135562855e-05, + "loss": 1.0792, + "step": 1555 + }, + { + "epoch": 0.05, + "learning_rate": 9.316213795162736e-05, + "loss": 1.0711, + "step": 1560 + }, + { + "epoch": 0.05, + "learning_rate": 9.346073454762616e-05, + "loss": 1.0985, + "step": 1565 + }, + { + "epoch": 0.05, + "learning_rate": 9.375933114362497e-05, + "loss": 1.0999, + "step": 1570 + }, + { + "epoch": 0.05, + "learning_rate": 9.405792773962377e-05, + "loss": 1.1437, + "step": 1575 + }, + { + "epoch": 0.05, + "learning_rate": 9.435652433562257e-05, + "loss": 1.1081, + "step": 1580 + }, + { + "epoch": 0.05, + "learning_rate": 9.465512093162139e-05, + "loss": 1.1424, + "step": 1585 + }, + { + "epoch": 0.05, + "learning_rate": 9.495371752762019e-05, + "loss": 1.0566, + "step": 1590 + }, + { + "epoch": 0.05, + "learning_rate": 9.525231412361899e-05, + "loss": 1.0845, + "step": 1595 + }, + { + "epoch": 0.05, + "learning_rate": 9.555091071961781e-05, + "loss": 1.0832, + "step": 1600 + }, + { + "epoch": 0.05, + "learning_rate": 9.584950731561661e-05, + "loss": 1.1048, + "step": 1605 + }, + { + "epoch": 0.05, + "learning_rate": 9.614810391161541e-05, + "loss": 1.0849, + "step": 1610 + }, + { + "epoch": 0.05, + "learning_rate": 9.644670050761421e-05, + "loss": 1.0663, + "step": 1615 + }, + { + "epoch": 0.05, + "learning_rate": 9.674529710361302e-05, + "loss": 1.0951, + "step": 1620 + }, + { + "epoch": 0.05, + "learning_rate": 9.704389369961183e-05, + "loss": 1.0655, + "step": 1625 + }, + { + "epoch": 0.05, + "learning_rate": 9.734249029561063e-05, + "loss": 1.081, + "step": 1630 + }, + { + "epoch": 0.05, + "learning_rate": 9.764108689160945e-05, + "loss": 1.0622, + "step": 1635 + }, + { + "epoch": 0.05, + "learning_rate": 9.793968348760825e-05, + "loss": 1.0711, + "step": 1640 + }, + { + "epoch": 0.05, + "learning_rate": 9.823828008360705e-05, + "loss": 1.0491, + "step": 1645 + }, + { + "epoch": 0.05, + "learning_rate": 9.853687667960586e-05, + "loss": 1.1492, + "step": 1650 + }, + { + "epoch": 0.05, + "learning_rate": 9.883547327560466e-05, + "loss": 1.1284, + "step": 1655 + }, + { + "epoch": 0.05, + "learning_rate": 9.913406987160346e-05, + "loss": 1.0678, + "step": 1660 + }, + { + "epoch": 0.05, + "learning_rate": 9.943266646760228e-05, + "loss": 1.1252, + "step": 1665 + }, + { + "epoch": 0.05, + "learning_rate": 9.973126306360108e-05, + "loss": 1.1307, + "step": 1670 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010002985965959988, + "loss": 1.0359, + "step": 1675 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001003284562555987, + "loss": 1.1049, + "step": 1680 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001006270528515975, + "loss": 1.1471, + "step": 1685 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001009256494475963, + "loss": 1.1246, + "step": 1690 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001012242460435951, + "loss": 1.076, + "step": 1695 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001015228426395939, + "loss": 1.076, + "step": 1700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010182143923559271, + "loss": 1.1156, + "step": 1705 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010212003583159154, + "loss": 1.0868, + "step": 1710 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010241863242759034, + "loss": 1.0114, + "step": 1715 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010271722902358913, + "loss": 1.1403, + "step": 1720 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010301582561958793, + "loss": 1.1826, + "step": 1725 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010331442221558673, + "loss": 1.064, + "step": 1730 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010361301881158556, + "loss": 1.0273, + "step": 1735 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010391161540758436, + "loss": 1.1297, + "step": 1740 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010421021200358317, + "loss": 1.16, + "step": 1745 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010450880859958197, + "loss": 1.0746, + "step": 1750 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010480740519558077, + "loss": 1.1209, + "step": 1755 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010510600179157959, + "loss": 1.0853, + "step": 1760 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010540459838757839, + "loss": 1.1119, + "step": 1765 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010570319498357719, + "loss": 1.078, + "step": 1770 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010600179157957599, + "loss": 1.1514, + "step": 1775 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001063003881755748, + "loss": 1.1006, + "step": 1780 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010659898477157362, + "loss": 1.0693, + "step": 1785 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010689758136757241, + "loss": 1.0863, + "step": 1790 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010719617796357122, + "loss": 1.1365, + "step": 1795 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010749477455957002, + "loss": 1.1399, + "step": 1800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010779337115556882, + "loss": 1.0507, + "step": 1805 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010809196775156765, + "loss": 1.0029, + "step": 1810 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010839056434756645, + "loss": 1.2431, + "step": 1815 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010868916094356525, + "loss": 1.1512, + "step": 1820 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010898775753956406, + "loss": 1.1036, + "step": 1825 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010928635413556286, + "loss": 1.0572, + "step": 1830 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010958495073156167, + "loss": 1.1712, + "step": 1835 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010988354732756048, + "loss": 1.0528, + "step": 1840 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011018214392355928, + "loss": 1.0917, + "step": 1845 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011048074051955808, + "loss": 1.0847, + "step": 1850 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011077933711555688, + "loss": 1.1046, + "step": 1855 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001110779337115557, + "loss": 1.0963, + "step": 1860 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001113765303075545, + "loss": 1.1022, + "step": 1865 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001116751269035533, + "loss": 1.1821, + "step": 1870 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001119737234995521, + "loss": 1.1033, + "step": 1875 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011227232009555091, + "loss": 1.1271, + "step": 1880 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011257091669154974, + "loss": 1.0473, + "step": 1885 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011286951328754854, + "loss": 1.1074, + "step": 1890 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011316810988354734, + "loss": 0.9955, + "step": 1895 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011346670647954613, + "loss": 1.1734, + "step": 1900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011376530307554493, + "loss": 1.1348, + "step": 1905 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011406389967154373, + "loss": 1.2305, + "step": 1910 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011436249626754256, + "loss": 1.0754, + "step": 1915 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011466109286354137, + "loss": 1.1334, + "step": 1920 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011495968945954017, + "loss": 1.1121, + "step": 1925 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011525828605553897, + "loss": 1.0733, + "step": 1930 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011555688265153777, + "loss": 1.1399, + "step": 1935 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011585547924753659, + "loss": 1.102, + "step": 1940 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011615407584353539, + "loss": 1.0491, + "step": 1945 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011645267243953419, + "loss": 1.0837, + "step": 1950 + }, + { + "epoch": 0.06, + "learning_rate": 0.000116751269035533, + "loss": 1.0573, + "step": 1955 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001170498656315318, + "loss": 1.1107, + "step": 1960 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011734846222753063, + "loss": 1.1348, + "step": 1965 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011764705882352942, + "loss": 1.0779, + "step": 1970 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011794565541952822, + "loss": 1.0714, + "step": 1975 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011824425201552702, + "loss": 1.0405, + "step": 1980 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011854284861152582, + "loss": 1.1107, + "step": 1985 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011884144520752465, + "loss": 1.1086, + "step": 1990 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011914004180352345, + "loss": 1.0811, + "step": 1995 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011943863839952226, + "loss": 1.0469, + "step": 2000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011973723499552106, + "loss": 1.0867, + "step": 2005 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012003583159151986, + "loss": 1.0944, + "step": 2010 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012033442818751868, + "loss": 1.1057, + "step": 2015 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012063302478351748, + "loss": 1.0371, + "step": 2020 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012093162137951628, + "loss": 1.0589, + "step": 2025 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012123021797551508, + "loss": 1.1048, + "step": 2030 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012152881457151388, + "loss": 1.098, + "step": 2035 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001218274111675127, + "loss": 1.0762, + "step": 2040 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001221260077635115, + "loss": 1.1303, + "step": 2045 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012242460435951032, + "loss": 1.1129, + "step": 2050 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001227232009555091, + "loss": 1.0332, + "step": 2055 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012302179755150792, + "loss": 1.0965, + "step": 2060 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012332039414750674, + "loss": 1.183, + "step": 2065 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012361899074350553, + "loss": 1.013, + "step": 2070 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012391758733950434, + "loss": 1.098, + "step": 2075 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012421618393550313, + "loss": 1.1352, + "step": 2080 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012451478053150195, + "loss": 1.135, + "step": 2085 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012481337712750076, + "loss": 1.0648, + "step": 2090 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012511197372349955, + "loss": 1.0729, + "step": 2095 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012541057031949837, + "loss": 1.1195, + "step": 2100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012570916691549716, + "loss": 1.0619, + "step": 2105 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012600776351149597, + "loss": 1.0518, + "step": 2110 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012630636010749476, + "loss": 1.107, + "step": 2115 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001266049567034936, + "loss": 1.1509, + "step": 2120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001269035532994924, + "loss": 1.0284, + "step": 2125 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001272021498954912, + "loss": 1.0268, + "step": 2130 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012750074649149, + "loss": 1.0742, + "step": 2135 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012779934308748879, + "loss": 1.1363, + "step": 2140 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012809793968348763, + "loss": 1.0428, + "step": 2145 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012839653627948642, + "loss": 1.0828, + "step": 2150 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012869513287548523, + "loss": 1.057, + "step": 2155 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012899372947148402, + "loss": 1.1283, + "step": 2160 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012929232606748284, + "loss": 1.0634, + "step": 2165 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012959092266348165, + "loss": 1.04, + "step": 2170 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012988951925948044, + "loss": 1.0368, + "step": 2175 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013018811585547926, + "loss": 1.1117, + "step": 2180 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013048671245147805, + "loss": 1.1447, + "step": 2185 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013078530904747686, + "loss": 1.0725, + "step": 2190 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013108390564347568, + "loss": 1.0388, + "step": 2195 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001313825022394745, + "loss": 1.1068, + "step": 2200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013168109883547328, + "loss": 1.0089, + "step": 2205 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013197969543147207, + "loss": 1.0564, + "step": 2210 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013227829202747089, + "loss": 1.0147, + "step": 2215 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001325768886234697, + "loss": 1.0147, + "step": 2220 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013287548521946852, + "loss": 1.1182, + "step": 2225 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001331740818154673, + "loss": 1.0915, + "step": 2230 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013347267841146612, + "loss": 1.1495, + "step": 2235 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001337712750074649, + "loss": 1.0869, + "step": 2240 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013406987160346373, + "loss": 1.0828, + "step": 2245 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013436846819946254, + "loss": 1.0157, + "step": 2250 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013466706479546133, + "loss": 1.1523, + "step": 2255 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013496566139146015, + "loss": 1.0472, + "step": 2260 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013526425798745894, + "loss": 1.1676, + "step": 2265 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013556285458345778, + "loss": 1.1472, + "step": 2270 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013586145117945657, + "loss": 1.0972, + "step": 2275 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013616004777545536, + "loss": 1.0558, + "step": 2280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013645864437145417, + "loss": 1.1049, + "step": 2285 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013675724096745296, + "loss": 1.0733, + "step": 2290 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013705583756345178, + "loss": 1.0504, + "step": 2295 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001373544341594506, + "loss": 1.1005, + "step": 2300 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001376530307554494, + "loss": 1.0958, + "step": 2305 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001379516273514482, + "loss": 1.1191, + "step": 2310 + }, + { + "epoch": 0.07, + "learning_rate": 0.000138250223947447, + "loss": 1.1083, + "step": 2315 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001385488205434458, + "loss": 1.0621, + "step": 2320 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013884741713944462, + "loss": 1.0963, + "step": 2325 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013914601373544343, + "loss": 1.0541, + "step": 2330 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013944461033144222, + "loss": 1.1256, + "step": 2335 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013974320692744104, + "loss": 1.1294, + "step": 2340 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014004180352343983, + "loss": 1.0728, + "step": 2345 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014034040011943864, + "loss": 1.0333, + "step": 2350 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014063899671543746, + "loss": 1.1385, + "step": 2355 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014093759331143625, + "loss": 1.1034, + "step": 2360 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014123618990743506, + "loss": 1.1511, + "step": 2365 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014153478650343385, + "loss": 1.0998, + "step": 2370 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001418333830994327, + "loss": 1.0593, + "step": 2375 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014213197969543148, + "loss": 1.1902, + "step": 2380 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001424305762914303, + "loss": 1.109, + "step": 2385 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014272917288742909, + "loss": 1.0562, + "step": 2390 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014302776948342787, + "loss": 1.0737, + "step": 2395 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014332636607942672, + "loss": 1.0729, + "step": 2400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001436249626754255, + "loss": 1.0294, + "step": 2405 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014392355927142432, + "loss": 1.057, + "step": 2410 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001442221558674231, + "loss": 1.0926, + "step": 2415 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014452075246342193, + "loss": 1.0654, + "step": 2420 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014481934905942074, + "loss": 1.0966, + "step": 2425 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014511794565541953, + "loss": 1.0289, + "step": 2430 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014541654225141835, + "loss": 1.0597, + "step": 2435 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014571513884741714, + "loss": 1.0193, + "step": 2440 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014601373544341595, + "loss": 1.0012, + "step": 2445 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014631233203941477, + "loss": 1.0725, + "step": 2450 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014661092863541358, + "loss": 1.0239, + "step": 2455 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014690952523141237, + "loss": 1.1049, + "step": 2460 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014720812182741116, + "loss": 1.0171, + "step": 2465 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014750671842340998, + "loss": 1.0581, + "step": 2470 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001478053150194088, + "loss": 1.1126, + "step": 2475 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001481039116154076, + "loss": 1.054, + "step": 2480 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001484025082114064, + "loss": 1.0213, + "step": 2485 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001487011048074052, + "loss": 1.1767, + "step": 2490 + }, + { + "epoch": 0.07, + "learning_rate": 0.000148999701403404, + "loss": 1.0246, + "step": 2495 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014929829799940282, + "loss": 1.153, + "step": 2500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014959689459540163, + "loss": 1.0123, + "step": 2505 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014989549119140042, + "loss": 1.054, + "step": 2510 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015019408778739924, + "loss": 1.145, + "step": 2515 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015049268438339802, + "loss": 1.0201, + "step": 2520 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015079128097939684, + "loss": 1.0261, + "step": 2525 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015108987757539566, + "loss": 1.0733, + "step": 2530 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015138847417139444, + "loss": 1.1479, + "step": 2535 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015168707076739326, + "loss": 1.0782, + "step": 2540 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015198566736339205, + "loss": 1.1269, + "step": 2545 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015228426395939087, + "loss": 1.1491, + "step": 2550 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015258286055538968, + "loss": 1.1119, + "step": 2555 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001528814571513885, + "loss": 1.1064, + "step": 2560 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015318005374738729, + "loss": 1.1304, + "step": 2565 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001534786503433861, + "loss": 1.062, + "step": 2570 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001537772469393849, + "loss": 1.1846, + "step": 2575 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001540758435353837, + "loss": 1.1742, + "step": 2580 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015437444013138252, + "loss": 1.0942, + "step": 2585 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001546730367273813, + "loss": 1.1366, + "step": 2590 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015497163332338013, + "loss": 1.1467, + "step": 2595 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015527022991937891, + "loss": 1.0348, + "step": 2600 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015556882651537773, + "loss": 1.0252, + "step": 2605 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015586742311137655, + "loss": 1.0639, + "step": 2610 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015616601970737533, + "loss": 1.0559, + "step": 2615 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015646461630337415, + "loss": 1.0854, + "step": 2620 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015676321289937294, + "loss": 1.0716, + "step": 2625 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015706180949537178, + "loss": 1.0255, + "step": 2630 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015736040609137057, + "loss": 1.0305, + "step": 2635 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015765900268736936, + "loss": 1.0547, + "step": 2640 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015795759928336818, + "loss": 1.1318, + "step": 2645 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015825619587936696, + "loss": 1.0905, + "step": 2650 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001585547924753658, + "loss": 1.0716, + "step": 2655 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001588533890713646, + "loss": 1.1074, + "step": 2660 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001591519856673634, + "loss": 1.1428, + "step": 2665 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001594505822633622, + "loss": 1.0202, + "step": 2670 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015974917885936102, + "loss": 1.042, + "step": 2675 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016004777545535983, + "loss": 1.1255, + "step": 2680 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016034637205135862, + "loss": 1.0699, + "step": 2685 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016064496864735744, + "loss": 1.1759, + "step": 2690 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016094356524335622, + "loss": 1.0454, + "step": 2695 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016124216183935504, + "loss": 1.0747, + "step": 2700 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016154075843535383, + "loss": 1.071, + "step": 2705 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016183935503135264, + "loss": 1.0476, + "step": 2710 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016213795162735146, + "loss": 1.155, + "step": 2715 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016243654822335025, + "loss": 1.0276, + "step": 2720 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016273514481934906, + "loss": 1.0591, + "step": 2725 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016303374141534785, + "loss": 1.0769, + "step": 2730 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001633323380113467, + "loss": 1.097, + "step": 2735 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016363093460734548, + "loss": 1.0199, + "step": 2740 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001639295312033443, + "loss": 1.0936, + "step": 2745 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001642281277993431, + "loss": 1.086, + "step": 2750 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016452672439534188, + "loss": 1.1206, + "step": 2755 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016482532099134072, + "loss": 1.1686, + "step": 2760 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001651239175873395, + "loss": 1.1004, + "step": 2765 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016542251418333833, + "loss": 1.0702, + "step": 2770 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016572111077933711, + "loss": 1.0575, + "step": 2775 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016601970737533593, + "loss": 1.0978, + "step": 2780 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016631830397133475, + "loss": 1.1407, + "step": 2785 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016661690056733353, + "loss": 1.1511, + "step": 2790 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016691549716333235, + "loss": 1.2012, + "step": 2795 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016721409375933114, + "loss": 1.1247, + "step": 2800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016751269035532995, + "loss": 1.1194, + "step": 2805 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016781128695132877, + "loss": 1.0245, + "step": 2810 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016810988354732759, + "loss": 1.0469, + "step": 2815 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016840848014332637, + "loss": 1.0938, + "step": 2820 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016870707673932516, + "loss": 1.1133, + "step": 2825 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016900567333532398, + "loss": 1.023, + "step": 2830 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001693042699313228, + "loss": 1.1506, + "step": 2835 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001696028665273216, + "loss": 1.1429, + "step": 2840 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001699014631233204, + "loss": 1.0354, + "step": 2845 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017020005971931921, + "loss": 1.0391, + "step": 2850 + }, + { + "epoch": 0.09, + "learning_rate": 0.000170498656315318, + "loss": 1.0791, + "step": 2855 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017079725291131682, + "loss": 1.1075, + "step": 2860 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017109584950731564, + "loss": 1.1059, + "step": 2865 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017139444610331442, + "loss": 1.0659, + "step": 2870 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017169304269931324, + "loss": 1.1229, + "step": 2875 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017199163929531203, + "loss": 1.0697, + "step": 2880 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017229023589131087, + "loss": 1.0755, + "step": 2885 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017258883248730966, + "loss": 1.1134, + "step": 2890 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017288742908330845, + "loss": 1.0988, + "step": 2895 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017318602567930726, + "loss": 1.0538, + "step": 2900 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017348462227530605, + "loss": 1.0201, + "step": 2905 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017378321887130487, + "loss": 1.141, + "step": 2910 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017408181546730368, + "loss": 1.1097, + "step": 2915 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001743804120633025, + "loss": 1.066, + "step": 2920 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001746790086593013, + "loss": 1.0917, + "step": 2925 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001749776052553001, + "loss": 1.1559, + "step": 2930 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001752762018512989, + "loss": 1.1427, + "step": 2935 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001755747984472977, + "loss": 1.0514, + "step": 2940 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017587339504329652, + "loss": 1.0922, + "step": 2945 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001761719916392953, + "loss": 1.0638, + "step": 2950 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017647058823529413, + "loss": 1.1429, + "step": 2955 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017676918483129292, + "loss": 1.0247, + "step": 2960 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017706778142729173, + "loss": 1.0268, + "step": 2965 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017736637802329055, + "loss": 1.1297, + "step": 2970 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017766497461928934, + "loss": 1.041, + "step": 2975 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017796357121528815, + "loss": 1.0589, + "step": 2980 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017826216781128694, + "loss": 1.0459, + "step": 2985 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017856076440728579, + "loss": 1.1031, + "step": 2990 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017885936100328457, + "loss": 1.1287, + "step": 2995 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001791579575992834, + "loss": 1.0877, + "step": 3000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017945655419528218, + "loss": 1.0522, + "step": 3005 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017975515079128097, + "loss": 1.1099, + "step": 3010 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001800537473872798, + "loss": 1.0418, + "step": 3015 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001803523439832786, + "loss": 1.0555, + "step": 3020 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018065094057927741, + "loss": 1.1016, + "step": 3025 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001809495371752762, + "loss": 1.1006, + "step": 3030 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018124813377127502, + "loss": 1.0478, + "step": 3035 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018154673036727383, + "loss": 1.1453, + "step": 3040 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018184532696327262, + "loss": 1.0999, + "step": 3045 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018214392355927144, + "loss": 1.0918, + "step": 3050 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018244252015527023, + "loss": 1.1002, + "step": 3055 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018274111675126904, + "loss": 1.0768, + "step": 3060 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018303971334726786, + "loss": 1.1154, + "step": 3065 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018333830994326668, + "loss": 1.0765, + "step": 3070 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018363690653926546, + "loss": 1.1575, + "step": 3075 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018393550313526425, + "loss": 1.1789, + "step": 3080 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018423409973126307, + "loss": 1.1259, + "step": 3085 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018453269632726186, + "loss": 1.1294, + "step": 3090 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001848312929232607, + "loss": 1.129, + "step": 3095 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001851298895192595, + "loss": 1.0955, + "step": 3100 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001854284861152583, + "loss": 1.1485, + "step": 3105 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001857270827112571, + "loss": 1.0539, + "step": 3110 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001860256793072559, + "loss": 1.1297, + "step": 3115 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018632427590325472, + "loss": 1.0511, + "step": 3120 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001866228724992535, + "loss": 1.1104, + "step": 3125 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018692146909525233, + "loss": 1.1456, + "step": 3130 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018722006569125112, + "loss": 1.0181, + "step": 3135 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018751866228724993, + "loss": 1.1191, + "step": 3140 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018781725888324875, + "loss": 1.0653, + "step": 3145 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018811585547924754, + "loss": 1.2023, + "step": 3150 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018841445207524635, + "loss": 1.0825, + "step": 3155 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018871304867124514, + "loss": 1.0431, + "step": 3160 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018901164526724396, + "loss": 1.1006, + "step": 3165 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018931024186324277, + "loss": 1.149, + "step": 3170 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001896088384592416, + "loss": 1.0379, + "step": 3175 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018990743505524038, + "loss": 1.0628, + "step": 3180 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001902060316512392, + "loss": 1.0216, + "step": 3185 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019050462824723798, + "loss": 1.0309, + "step": 3190 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001908032248432368, + "loss": 1.0195, + "step": 3195 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019110182143923561, + "loss": 1.0972, + "step": 3200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001914004180352344, + "loss": 1.1071, + "step": 3205 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019169901463123322, + "loss": 1.0252, + "step": 3210 + }, + { + "epoch": 0.1, + "learning_rate": 0.000191997611227232, + "loss": 1.0535, + "step": 3215 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019229620782323082, + "loss": 1.1252, + "step": 3220 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019259480441922964, + "loss": 1.0827, + "step": 3225 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019289340101522843, + "loss": 1.1525, + "step": 3230 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019319199761122724, + "loss": 1.0793, + "step": 3235 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019349059420722603, + "loss": 1.093, + "step": 3240 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019378919080322487, + "loss": 1.1159, + "step": 3245 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019408778739922366, + "loss": 1.076, + "step": 3250 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019438638399522245, + "loss": 1.0385, + "step": 3255 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019468498059122127, + "loss": 1.0041, + "step": 3260 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019498357718722006, + "loss": 1.0801, + "step": 3265 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001952821737832189, + "loss": 1.1181, + "step": 3270 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001955807703792177, + "loss": 1.0675, + "step": 3275 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001958793669752165, + "loss": 1.0912, + "step": 3280 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001961779635712153, + "loss": 1.1491, + "step": 3285 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001964765601672141, + "loss": 1.1827, + "step": 3290 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001967751567632129, + "loss": 1.1387, + "step": 3295 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001970737533592117, + "loss": 1.094, + "step": 3300 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019737234995521053, + "loss": 1.0641, + "step": 3305 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019767094655120932, + "loss": 1.0688, + "step": 3310 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019796954314720813, + "loss": 1.0866, + "step": 3315 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019826813974320692, + "loss": 1.0183, + "step": 3320 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019856673633920574, + "loss": 1.0316, + "step": 3325 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019886533293520455, + "loss": 1.0154, + "step": 3330 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019916392953120334, + "loss": 1.1154, + "step": 3335 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019946252612720216, + "loss": 1.0715, + "step": 3340 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019976112272320095, + "loss": 1.1234, + "step": 3345 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999999945680667, + "loss": 1.1382, + "step": 3350 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999998044504044, + "loss": 1.1766, + "step": 3355 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999999342736132, + "loss": 1.0361, + "step": 3360 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999986094253744, + "loss": 1.0135, + "step": 3365 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999976045183312, + "loss": 1.0417, + "step": 3370 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999996328015275, + "loss": 1.0215, + "step": 3375 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999994779916553, + "loss": 1.0643, + "step": 3380 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999929602225854, + "loss": 1.2135, + "step": 3385 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999908689338663, + "loss": 1.1343, + "step": 3390 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999885060509635, + "loss": 1.0845, + "step": 3395 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999858715745195, + "loss": 1.198, + "step": 3400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999982965505249, + "loss": 1.1071, + "step": 3405 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999797878439415, + "loss": 1.1457, + "step": 3410 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999763385914604, + "loss": 1.0965, + "step": 3415 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999972617748742, + "loss": 1.1234, + "step": 3420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999686253167976, + "loss": 1.1075, + "step": 3425 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999643612967106, + "loss": 1.1002, + "step": 3430 + }, + { + "epoch": 0.1, + "learning_rate": 0.000199995982568964, + "loss": 1.1558, + "step": 3435 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999955018496817, + "loss": 1.2144, + "step": 3440 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999949939719547, + "loss": 1.1135, + "step": 3445 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999445893592106, + "loss": 1.1371, + "step": 3450 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999389674172594, + "loss": 1.0312, + "step": 3455 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999330738952216, + "loss": 1.1131, + "step": 3460 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999269087946973, + "loss": 1.104, + "step": 3465 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019999204721173609, + "loss": 1.1234, + "step": 3470 + }, + { + "epoch": 0.1, + "learning_rate": 0.000199991376386496, + "loss": 1.1372, + "step": 3475 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999906784039318, + "loss": 1.141, + "step": 3480 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999899532642329, + "loss": 1.1108, + "step": 3485 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998920096759638, + "loss": 1.0414, + "step": 3490 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998842151422647, + "loss": 1.07, + "step": 3495 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999876149043349, + "loss": 1.017, + "step": 3500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998678113814075, + "loss": 1.1036, + "step": 3505 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998592021587045, + "loss": 1.0656, + "step": 3510 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019998503213775783, + "loss": 1.0716, + "step": 3515 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998411690404412, + "loss": 1.0759, + "step": 3520 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998317451497784, + "loss": 1.0861, + "step": 3525 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998220497081498, + "loss": 1.0306, + "step": 3530 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998120827181883, + "loss": 1.1173, + "step": 3535 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019998018441826014, + "loss": 1.0895, + "step": 3540 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997913341041694, + "loss": 1.1571, + "step": 3545 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999780552485747, + "loss": 1.0958, + "step": 3550 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999769499330262, + "loss": 1.0831, + "step": 3555 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997581746407174, + "loss": 1.1515, + "step": 3560 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997465784201882, + "loss": 1.048, + "step": 3565 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999734710671824, + "loss": 1.0868, + "step": 3570 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999722571398848, + "loss": 1.0319, + "step": 3575 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019997101606045577, + "loss": 1.0128, + "step": 3580 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999697478292323, + "loss": 1.0703, + "step": 3585 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996845244655892, + "loss": 1.1442, + "step": 3590 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996712991278735, + "loss": 1.1207, + "step": 3595 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996578022827687, + "loss": 1.047, + "step": 3600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996440339339406, + "loss": 1.0814, + "step": 3605 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996299940851277, + "loss": 1.0791, + "step": 3610 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996156827401445, + "loss": 1.1105, + "step": 3615 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996010999028766, + "loss": 1.0932, + "step": 3620 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995862455772856, + "loss": 1.1267, + "step": 3625 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995711197674053, + "loss": 1.1348, + "step": 3630 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999555722477344, + "loss": 1.0513, + "step": 3635 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995400537112836, + "loss": 1.0844, + "step": 3640 + }, + { + "epoch": 0.11, + "learning_rate": 0.000199952411347348, + "loss": 1.1631, + "step": 3645 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999507901768262, + "loss": 1.1233, + "step": 3650 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994914186000328, + "loss": 1.088, + "step": 3655 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994746639732692, + "loss": 1.0896, + "step": 3660 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994576378925215, + "loss": 1.1069, + "step": 3665 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994403403624146, + "loss": 1.1112, + "step": 3670 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999422771387646, + "loss": 1.0745, + "step": 3675 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999404930972987, + "loss": 1.1232, + "step": 3680 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019993868191232834, + "loss": 1.0648, + "step": 3685 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019993684358434545, + "loss": 1.171, + "step": 3690 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019993497811384927, + "loss": 1.0275, + "step": 3695 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999330855013465, + "loss": 1.1045, + "step": 3700 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019993116574735113, + "loss": 1.1415, + "step": 3705 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999292188523846, + "loss": 1.0976, + "step": 3710 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999272448169756, + "loss": 1.133, + "step": 3715 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019992524364166036, + "loss": 0.9992, + "step": 3720 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019992321532698236, + "loss": 1.0332, + "step": 3725 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999211598734925, + "loss": 1.1238, + "step": 3730 + }, + { + "epoch": 0.11, + "learning_rate": 0.000199919077281749, + "loss": 1.0483, + "step": 3735 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019991696755231747, + "loss": 1.0224, + "step": 3740 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019991483068577097, + "loss": 1.0906, + "step": 3745 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019991266668268983, + "loss": 1.1205, + "step": 3750 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019991047554366177, + "loss": 1.0803, + "step": 3755 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999082572692819, + "loss": 1.1077, + "step": 3760 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019990601186015275, + "loss": 1.1253, + "step": 3765 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999037393168841, + "loss": 1.1671, + "step": 3770 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999014396400932, + "loss": 1.0707, + "step": 3775 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019989911283040462, + "loss": 1.0694, + "step": 3780 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019989675888845034, + "loss": 1.0888, + "step": 3785 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019989437781486963, + "loss": 1.1157, + "step": 3790 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019989196961030925, + "loss": 1.0365, + "step": 3795 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001998895342754232, + "loss": 1.0746, + "step": 3800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019988707181087291, + "loss": 1.0803, + "step": 3805 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019988458221732725, + "loss": 1.0568, + "step": 3810 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001998820654954623, + "loss": 1.071, + "step": 3815 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019987952164596164, + "loss": 1.0827, + "step": 3820 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019987695066951618, + "loss": 1.0955, + "step": 3825 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019987435256682413, + "loss": 1.0791, + "step": 3830 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001998717273385912, + "loss": 1.0974, + "step": 3835 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019986907498553032, + "loss": 1.0471, + "step": 3840 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019986639550836191, + "loss": 1.0482, + "step": 3845 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019986368890781368, + "loss": 1.0733, + "step": 3850 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019986095518462077, + "loss": 1.0684, + "step": 3855 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019985819433952564, + "loss": 1.1437, + "step": 3860 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019985540637327808, + "loss": 1.1195, + "step": 3865 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019985259128663536, + "loss": 1.1315, + "step": 3870 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199849749080362, + "loss": 1.098, + "step": 3875 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019984687975522993, + "loss": 1.0512, + "step": 3880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998439833120185, + "loss": 1.0707, + "step": 3885 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019984105975151434, + "loss": 1.0753, + "step": 3890 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019983810907451145, + "loss": 1.1256, + "step": 3895 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019983513128181128, + "loss": 1.0676, + "step": 3900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019983212637422258, + "loss": 0.9838, + "step": 3905 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019982909435256144, + "loss": 1.0884, + "step": 3910 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019982603521765134, + "loss": 1.1129, + "step": 3915 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001998229489703232, + "loss": 1.1245, + "step": 3920 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019981983561141516, + "loss": 1.0162, + "step": 3925 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019981669514177282, + "loss": 1.0832, + "step": 3930 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019981352756224917, + "loss": 1.0793, + "step": 3935 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019981033287370443, + "loss": 1.0764, + "step": 3940 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019980711107700632, + "loss": 1.1139, + "step": 3945 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019980386217302985, + "loss": 1.0912, + "step": 3950 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019980058616265743, + "loss": 1.0632, + "step": 3955 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019979728304677882, + "loss": 1.0528, + "step": 3960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997939528262911, + "loss": 1.0304, + "step": 3965 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019979059550209876, + "loss": 1.1203, + "step": 3970 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019978721107511365, + "loss": 1.1041, + "step": 3975 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019978379954625495, + "loss": 1.1427, + "step": 3980 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019978036091644925, + "loss": 1.007, + "step": 3985 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019977689518663042, + "loss": 1.0742, + "step": 3990 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019977340235773983, + "loss": 1.0635, + "step": 3995 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019976988243072604, + "loss": 1.0774, + "step": 4000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019976633540654505, + "loss": 1.0696, + "step": 4005 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019976276128616028, + "loss": 1.134, + "step": 4010 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997591600705424, + "loss": 1.0233, + "step": 4015 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997555317606695, + "loss": 1.0641, + "step": 4020 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199751876357527, + "loss": 0.9879, + "step": 4025 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974819386210774, + "loss": 1.0829, + "step": 4030 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974448427541184, + "loss": 1.0976, + "step": 4035 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974074759844678, + "loss": 1.089, + "step": 4040 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973698383222752, + "loss": 1.0393, + "step": 4045 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997331929777762, + "loss": 1.0799, + "step": 4050 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997293750361225, + "loss": 1.084, + "step": 4055 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972553000830324, + "loss": 1.0347, + "step": 4060 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972165789536276, + "loss": 1.1614, + "step": 4065 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019971775869835276, + "loss": 1.0459, + "step": 4070 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019971383241833222, + "loss": 1.0581, + "step": 4075 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997098790563675, + "loss": 1.0846, + "step": 4080 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019970589861353227, + "loss": 1.078, + "step": 4085 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019970189109090773, + "loss": 1.1187, + "step": 4090 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996978564895822, + "loss": 1.0696, + "step": 4095 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019969379481065149, + "loss": 1.1331, + "step": 4100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019968970605521876, + "loss": 1.1067, + "step": 4105 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019968559022439452, + "loss": 1.1005, + "step": 4110 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019968144731929654, + "loss": 1.0483, + "step": 4115 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019967727734105007, + "loss": 1.068, + "step": 4120 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019967308029078767, + "loss": 1.0471, + "step": 4125 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019966885616964924, + "loss": 1.0161, + "step": 4130 + }, + { + "epoch": 0.12, + "learning_rate": 0.000199664604978782, + "loss": 1.1243, + "step": 4135 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019966032671934062, + "loss": 1.1375, + "step": 4140 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019965602139248703, + "loss": 1.0319, + "step": 4145 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019965168899939052, + "loss": 1.1084, + "step": 4150 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996473295412278, + "loss": 1.1189, + "step": 4155 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019964294301918285, + "loss": 1.0556, + "step": 4160 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019963852943444702, + "loss": 1.0722, + "step": 4165 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996340887882191, + "loss": 1.0373, + "step": 4170 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019962962108170508, + "loss": 1.0413, + "step": 4175 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996251263161184, + "loss": 1.1025, + "step": 4180 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001996206044926798, + "loss": 1.1334, + "step": 4185 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019961605561261744, + "loss": 1.0642, + "step": 4190 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019961147967716673, + "loss": 1.065, + "step": 4195 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019960687668757053, + "loss": 1.0835, + "step": 4200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019960224664507896, + "loss": 1.1132, + "step": 4205 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019959758955094957, + "loss": 1.1363, + "step": 4210 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019959290540644712, + "loss": 1.0778, + "step": 4215 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995881942128439, + "loss": 1.1452, + "step": 4220 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995834559714194, + "loss": 1.0822, + "step": 4225 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019957869068346052, + "loss": 1.1601, + "step": 4230 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995738983502615, + "loss": 1.0361, + "step": 4235 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199569078973124, + "loss": 1.0654, + "step": 4240 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019956423255335678, + "loss": 1.1747, + "step": 4245 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955935909227624, + "loss": 1.0899, + "step": 4250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019955445859120595, + "loss": 1.0899, + "step": 4255 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019954953105147692, + "loss": 1.1152, + "step": 4260 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995445764744274, + "loss": 1.0925, + "step": 4265 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019953959486140302, + "loss": 1.1296, + "step": 4270 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995345862137568, + "loss": 1.0637, + "step": 4275 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995295505328491, + "loss": 1.0797, + "step": 4280 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019952448782004756, + "loss": 1.0704, + "step": 4285 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001995193980767272, + "loss": 1.1394, + "step": 4290 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019951428130427038, + "loss": 1.0521, + "step": 4295 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019950913750406678, + "loss": 1.1602, + "step": 4300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019950396667751344, + "loss": 1.1037, + "step": 4305 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994987688260148, + "loss": 1.0254, + "step": 4310 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019949354395098253, + "loss": 1.0934, + "step": 4315 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019948829205383568, + "loss": 1.1284, + "step": 4320 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994830131360007, + "loss": 1.0931, + "step": 4325 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994777071989112, + "loss": 1.105, + "step": 4330 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994723742440084, + "loss": 1.0215, + "step": 4335 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019946701427274067, + "loss": 1.1079, + "step": 4340 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019946162728656374, + "loss": 1.0819, + "step": 4345 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994562132869407, + "loss": 1.0461, + "step": 4350 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019945077227534195, + "loss": 1.0445, + "step": 4355 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994453042532453, + "loss": 1.1002, + "step": 4360 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019943980922213582, + "loss": 1.1169, + "step": 4365 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019943428718350591, + "loss": 1.0898, + "step": 4370 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994287381388554, + "loss": 1.068, + "step": 4375 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994231620896914, + "loss": 1.1344, + "step": 4380 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019941755903752828, + "loss": 1.1067, + "step": 4385 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019941192898388784, + "loss": 1.0948, + "step": 4390 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001994062719302992, + "loss": 1.1338, + "step": 4395 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019940058787829877, + "loss": 0.9293, + "step": 4400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019939487682943033, + "loss": 1.0346, + "step": 4405 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199389138785245, + "loss": 1.0545, + "step": 4410 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001993833737473012, + "loss": 1.0076, + "step": 4415 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019937758171716468, + "loss": 1.0951, + "step": 4420 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019937176269640855, + "loss": 1.044, + "step": 4425 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019936591668661324, + "loss": 1.1229, + "step": 4430 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001993600436893665, + "loss": 0.9646, + "step": 4435 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019935414370626342, + "loss": 1.0925, + "step": 4440 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019934821673890643, + "loss": 1.0375, + "step": 4445 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019934226278890525, + "loss": 1.0375, + "step": 4450 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019933628185787695, + "loss": 1.0886, + "step": 4455 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019933027394744595, + "loss": 1.1145, + "step": 4460 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199324239059244, + "loss": 1.1474, + "step": 4465 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019931817719491006, + "loss": 1.1015, + "step": 4470 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019931208835609064, + "loss": 1.068, + "step": 4475 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019930597254443937, + "loss": 1.1134, + "step": 4480 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019929982976161728, + "loss": 1.0303, + "step": 4485 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019929366000929276, + "loss": 1.0893, + "step": 4490 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019928746328914148, + "loss": 1.0784, + "step": 4495 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001992812396028465, + "loss": 1.071, + "step": 4500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019927498895209807, + "loss": 1.1054, + "step": 4505 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019926871133859388, + "loss": 1.0855, + "step": 4510 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019926240676403892, + "loss": 1.0519, + "step": 4515 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019925607523014547, + "loss": 1.0823, + "step": 4520 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019924971673863318, + "loss": 1.0435, + "step": 4525 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019924333129122895, + "loss": 1.1264, + "step": 4530 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019923691888966714, + "loss": 1.0691, + "step": 4535 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019923047953568924, + "loss": 1.0732, + "step": 4540 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019922401323104415, + "loss": 1.0695, + "step": 4545 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001992175199774882, + "loss": 1.1293, + "step": 4550 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019921099977678484, + "loss": 1.053, + "step": 4555 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019920445263070494, + "loss": 1.0902, + "step": 4560 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019919787854102676, + "loss": 1.1227, + "step": 4565 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019919127750953573, + "loss": 1.0759, + "step": 4570 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019918464953802472, + "loss": 1.0376, + "step": 4575 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001991779946282938, + "loss": 1.0365, + "step": 4580 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001991713127821505, + "loss": 1.0152, + "step": 4585 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001991646040014095, + "loss": 1.0486, + "step": 4590 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019915786828789297, + "loss": 1.0088, + "step": 4595 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001991511056434302, + "loss": 1.0394, + "step": 4600 + }, + { + "epoch": 0.14, + "learning_rate": 0.000199144316069858, + "loss": 1.1839, + "step": 4605 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019913749956902038, + "loss": 1.0251, + "step": 4610 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019913065614276862, + "loss": 1.0267, + "step": 4615 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019912378579296146, + "loss": 1.0685, + "step": 4620 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019911688852146476, + "loss": 1.1112, + "step": 4625 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019910996433015189, + "loss": 1.013, + "step": 4630 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019910301322090338, + "loss": 1.0462, + "step": 4635 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019909603519560712, + "loss": 1.0048, + "step": 4640 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019908903025615842, + "loss": 1.0672, + "step": 4645 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019908199840445968, + "loss": 1.2213, + "step": 4650 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019907493964242074, + "loss": 1.0137, + "step": 4655 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001990678539719588, + "loss": 1.0608, + "step": 4660 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019906074139499827, + "loss": 1.0901, + "step": 4665 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019905360191347092, + "loss": 1.0626, + "step": 4670 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001990464355293158, + "loss": 1.0526, + "step": 4675 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001990392422444792, + "loss": 1.1368, + "step": 4680 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019903202206091495, + "loss": 1.071, + "step": 4685 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019902477498058387, + "loss": 1.0363, + "step": 4690 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019901750100545434, + "loss": 1.0452, + "step": 4695 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001990102001375019, + "loss": 1.204, + "step": 4700 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001990028723787095, + "loss": 1.1102, + "step": 4705 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019899551773106726, + "loss": 1.1238, + "step": 4710 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019898813619657272, + "loss": 1.1649, + "step": 4715 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001989807277772307, + "loss": 1.1148, + "step": 4720 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019897329247505323, + "loss": 1.0787, + "step": 4725 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001989658302920598, + "loss": 1.0655, + "step": 4730 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019895834123027703, + "loss": 1.0368, + "step": 4735 + }, + { + "epoch": 0.14, + "learning_rate": 0.000198950825291739, + "loss": 1.0231, + "step": 4740 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019894328247848698, + "loss": 1.1136, + "step": 4745 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019893571279256956, + "loss": 1.0852, + "step": 4750 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019892811623604267, + "loss": 1.0852, + "step": 4755 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019892049281096944, + "loss": 1.0646, + "step": 4760 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019891284251942048, + "loss": 1.059, + "step": 4765 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019890516536347352, + "loss": 1.0181, + "step": 4770 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019889746134521365, + "loss": 1.0821, + "step": 4775 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019888973046673324, + "loss": 1.0235, + "step": 4780 + }, + { + "epoch": 0.14, + "learning_rate": 0.000198881972730132, + "loss": 1.0848, + "step": 4785 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001988741881375169, + "loss": 1.1709, + "step": 4790 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019886637669100222, + "loss": 1.1003, + "step": 4795 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019885853839270948, + "loss": 1.0877, + "step": 4800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001988506732447676, + "loss": 1.0929, + "step": 4805 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019884278124931266, + "loss": 1.1319, + "step": 4810 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019883486240848816, + "loss": 0.9905, + "step": 4815 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019882691672444476, + "loss": 1.1209, + "step": 4820 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019881894419934056, + "loss": 1.0181, + "step": 4825 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019881094483534083, + "loss": 1.0518, + "step": 4830 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019880291863461817, + "loss": 1.0303, + "step": 4835 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019879486559935247, + "loss": 1.0751, + "step": 4840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001987867857317309, + "loss": 1.0305, + "step": 4845 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019877867903394795, + "loss": 1.052, + "step": 4850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001987705455082054, + "loss": 1.1089, + "step": 4855 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001987623851567122, + "loss": 1.1282, + "step": 4860 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019875419798168472, + "loss": 1.025, + "step": 4865 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019874598398534658, + "loss": 1.0621, + "step": 4870 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001987377431699287, + "loss": 1.0967, + "step": 4875 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001987294755376692, + "loss": 1.1446, + "step": 4880 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019872118109081355, + "loss": 1.0542, + "step": 4885 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019871285983161452, + "loss": 1.0261, + "step": 4890 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019870451176233212, + "loss": 1.0884, + "step": 4895 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019869613688523367, + "loss": 1.0708, + "step": 4900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019868773520259375, + "loss": 1.0196, + "step": 4905 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019867930671669422, + "loss": 1.0376, + "step": 4910 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019867085142982426, + "loss": 1.1106, + "step": 4915 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019866236934428026, + "loss": 0.9639, + "step": 4920 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019865386046236596, + "loss": 1.047, + "step": 4925 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019864532478639234, + "loss": 1.0621, + "step": 4930 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001986367623186776, + "loss": 1.0867, + "step": 4935 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019862817306154736, + "loss": 1.1177, + "step": 4940 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001986195570173344, + "loss": 1.1182, + "step": 4945 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019861091418837878, + "loss": 1.1056, + "step": 4950 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001986022445770279, + "loss": 1.0565, + "step": 4955 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019859354818563643, + "loss": 1.0851, + "step": 4960 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001985848250165662, + "loss": 1.0588, + "step": 4965 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019857607507218648, + "loss": 1.0763, + "step": 4970 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019856729835487363, + "loss": 1.0228, + "step": 4975 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019855849486701143, + "loss": 1.0125, + "step": 4980 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001985496646109909, + "loss": 1.127, + "step": 4985 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019854080758921025, + "loss": 1.0706, + "step": 4990 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001985319238040751, + "loss": 1.0399, + "step": 4995 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019852301325799817, + "loss": 1.0319, + "step": 5000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019851407595339958, + "loss": 1.0729, + "step": 5005 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019850511189270663, + "loss": 1.1789, + "step": 5010 + }, + { + "epoch": 0.15, + "learning_rate": 0.000198496121078354, + "loss": 1.0771, + "step": 5015 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019848710351278354, + "loss": 1.0313, + "step": 5020 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019847805919844436, + "loss": 1.0176, + "step": 5025 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001984689881377929, + "loss": 1.1203, + "step": 5030 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019845989033329277, + "loss": 1.0339, + "step": 5035 + }, + { + "epoch": 0.15, + "learning_rate": 0.000198450765787415, + "loss": 1.0743, + "step": 5040 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001984416145026377, + "loss": 1.0582, + "step": 5045 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019843243648144635, + "loss": 1.0595, + "step": 5050 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001984232317263337, + "loss": 1.0853, + "step": 5055 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019841400023979975, + "loss": 1.1285, + "step": 5060 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019840474202435164, + "loss": 1.1024, + "step": 5065 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019839545708250398, + "loss": 1.0348, + "step": 5070 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019838614541677844, + "loss": 1.0373, + "step": 5075 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001983768070297041, + "loss": 1.1146, + "step": 5080 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019836744192381725, + "loss": 1.1097, + "step": 5085 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019835805010166136, + "loss": 1.0626, + "step": 5090 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019834863156578724, + "loss": 0.9856, + "step": 5095 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019833918631875292, + "loss": 1.1017, + "step": 5100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019832971436312374, + "loss": 1.0223, + "step": 5105 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001983202157014722, + "loss": 1.0143, + "step": 5110 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019831069033637817, + "loss": 1.0442, + "step": 5115 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019830113827042865, + "loss": 1.1573, + "step": 5120 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019829155950621796, + "loss": 1.0452, + "step": 5125 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001982819540463477, + "loss": 1.1239, + "step": 5130 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001982723218934266, + "loss": 1.0805, + "step": 5135 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001982626630500708, + "loss": 1.1184, + "step": 5140 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019825297751890358, + "loss": 0.976, + "step": 5145 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019824326530255547, + "loss": 1.0452, + "step": 5150 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019823352640366434, + "loss": 1.0741, + "step": 5155 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019822376082487516, + "loss": 1.0332, + "step": 5160 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019821396856884032, + "loss": 1.102, + "step": 5165 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001982041496382193, + "loss": 1.1308, + "step": 5170 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019819430403567888, + "loss": 1.0755, + "step": 5175 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019818443176389315, + "loss": 1.1165, + "step": 5180 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019817453282554333, + "loss": 1.129, + "step": 5185 + }, + { + "epoch": 0.15, + "learning_rate": 0.000198164607223318, + "loss": 1.1256, + "step": 5190 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019815465495991288, + "loss": 1.0689, + "step": 5195 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019814467603803095, + "loss": 1.0428, + "step": 5200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001981346704603825, + "loss": 1.1491, + "step": 5205 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019812463822968497, + "loss": 1.1501, + "step": 5210 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001981145793486631, + "loss": 1.0981, + "step": 5215 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019810449382004887, + "loss": 1.069, + "step": 5220 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019809438164658145, + "loss": 1.0498, + "step": 5225 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019808424283100726, + "loss": 1.0319, + "step": 5230 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019807407737608, + "loss": 1.1154, + "step": 5235 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019806388528456056, + "loss": 1.0625, + "step": 5240 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019805366655921707, + "loss": 1.1906, + "step": 5245 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001980434212028249, + "loss": 1.0769, + "step": 5250 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019803314921816668, + "loss": 1.0853, + "step": 5255 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001980228506080322, + "loss": 1.0669, + "step": 5260 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001980125253752186, + "loss": 1.0237, + "step": 5265 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019800217352253013, + "loss": 1.0978, + "step": 5270 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001979917950527783, + "loss": 1.0863, + "step": 5275 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019798138996878192, + "loss": 1.0266, + "step": 5280 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001979709582733669, + "loss": 1.0415, + "step": 5285 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019796049996936656, + "loss": 1.021, + "step": 5290 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019795001505962126, + "loss": 1.0419, + "step": 5295 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001979395035469787, + "loss": 1.0269, + "step": 5300 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019792896543429375, + "loss": 1.0916, + "step": 5305 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019791840072442855, + "loss": 1.1082, + "step": 5310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001979078094202524, + "loss": 1.1307, + "step": 5315 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019789719152464192, + "loss": 1.0728, + "step": 5320 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019788654704048088, + "loss": 1.0874, + "step": 5325 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019787587597066024, + "loss": 1.1152, + "step": 5330 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019786517831807827, + "loss": 1.0201, + "step": 5335 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001978544540856404, + "loss": 1.1357, + "step": 5340 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019784370327625931, + "loss": 1.1342, + "step": 5345 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001978329258928549, + "loss": 0.9875, + "step": 5350 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019782212193835422, + "loss": 1.1138, + "step": 5355 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001978112914156916, + "loss": 1.0139, + "step": 5360 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001978004343278086, + "loss": 1.0608, + "step": 5365 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019778955067765398, + "loss": 1.0497, + "step": 5370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019777864046818366, + "loss": 1.1149, + "step": 5375 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019776770370236082, + "loss": 1.1508, + "step": 5380 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001977567403831559, + "loss": 1.1393, + "step": 5385 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019774575051354645, + "loss": 0.991, + "step": 5390 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001977347340965173, + "loss": 1.0558, + "step": 5395 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001977236911350605, + "loss": 1.1075, + "step": 5400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001977126216321752, + "loss": 1.0433, + "step": 5405 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019770152559086794, + "loss": 1.1164, + "step": 5410 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019769040301415228, + "loss": 1.0112, + "step": 5415 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019767925390504914, + "loss": 1.0151, + "step": 5420 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019766807826658656, + "loss": 1.058, + "step": 5425 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001976568761017998, + "loss": 1.0654, + "step": 5430 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019764564741373133, + "loss": 1.072, + "step": 5435 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019763439220543084, + "loss": 1.0295, + "step": 5440 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019762311047995517, + "loss": 1.1007, + "step": 5445 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019761180224036844, + "loss": 1.063, + "step": 5450 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001976004674897419, + "loss": 1.0566, + "step": 5455 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019758910623115405, + "loss": 1.073, + "step": 5460 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019757771846769058, + "loss": 1.0545, + "step": 5465 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019756630420244433, + "loss": 1.0589, + "step": 5470 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019755486343851541, + "loss": 1.0361, + "step": 5475 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019754339617901107, + "loss": 1.0427, + "step": 5480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001975319024270458, + "loss": 1.0694, + "step": 5485 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019752038218574124, + "loss": 1.0612, + "step": 5490 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001975088354582263, + "loss": 1.0657, + "step": 5495 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019749726224763697, + "loss": 1.1525, + "step": 5500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019748566255711652, + "loss": 1.1052, + "step": 5505 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001974740363898154, + "loss": 1.0906, + "step": 5510 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019746238374889125, + "loss": 1.0021, + "step": 5515 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019745070463750885, + "loss": 1.1803, + "step": 5520 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019743899905884022, + "loss": 1.0912, + "step": 5525 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019742726701606458, + "loss": 1.0049, + "step": 5530 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019741550851236825, + "loss": 1.138, + "step": 5535 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001974037235509449, + "loss": 1.0151, + "step": 5540 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001973919121349952, + "loss": 1.0765, + "step": 5545 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001973800742677271, + "loss": 1.0131, + "step": 5550 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001973682099523558, + "loss": 1.0848, + "step": 5555 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001973563191921035, + "loss": 1.0464, + "step": 5560 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019734440199019982, + "loss": 0.9965, + "step": 5565 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001973324583498813, + "loss": 1.1585, + "step": 5570 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019732048827439188, + "loss": 1.0624, + "step": 5575 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019730849176698254, + "loss": 1.022, + "step": 5580 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019729646883091153, + "loss": 1.0323, + "step": 5585 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019728441946944422, + "loss": 1.1031, + "step": 5590 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019727234368585318, + "loss": 1.0758, + "step": 5595 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019726024148341816, + "loss": 1.099, + "step": 5600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001972481128654261, + "loss": 1.1152, + "step": 5605 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019723595783517102, + "loss": 1.0904, + "step": 5610 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019722377639595423, + "loss": 1.0967, + "step": 5615 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001972115685510842, + "loss": 1.0337, + "step": 5620 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971993343038765, + "loss": 1.1187, + "step": 5625 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971870736576539, + "loss": 1.0138, + "step": 5630 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019717478661574638, + "loss": 1.0121, + "step": 5635 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019716247318149103, + "loss": 1.084, + "step": 5640 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971501333582322, + "loss": 1.1152, + "step": 5645 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019713776714932126, + "loss": 1.0011, + "step": 5650 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019712537455811687, + "loss": 1.0922, + "step": 5655 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019711295558798487, + "loss": 1.0545, + "step": 5660 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001971005102422981, + "loss": 1.0796, + "step": 5665 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019708803852443678, + "loss": 1.1074, + "step": 5670 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019707554043778811, + "loss": 1.1439, + "step": 5675 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001970630159857466, + "loss": 0.9909, + "step": 5680 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019705046517171378, + "loss": 1.0679, + "step": 5685 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001970378879990984, + "loss": 1.0229, + "step": 5690 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019702528447131646, + "loss": 1.0642, + "step": 5695 + }, + { + "epoch": 0.17, + "learning_rate": 0.000197012654591791, + "loss": 1.0415, + "step": 5700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019699999836395225, + "loss": 1.1193, + "step": 5705 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019698731579123754, + "loss": 1.1045, + "step": 5710 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019697460687709154, + "loss": 1.0317, + "step": 5715 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019696187162496583, + "loss": 1.0721, + "step": 5720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019694911003831933, + "loss": 1.0118, + "step": 5725 + }, + { + "epoch": 0.17, + "learning_rate": 0.000196936322120618, + "loss": 1.0759, + "step": 5730 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019692350787533505, + "loss": 1.0189, + "step": 5735 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019691066730595077, + "loss": 1.1207, + "step": 5740 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019689780041595256, + "loss": 1.0603, + "step": 5745 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001968849072088351, + "loss": 0.9801, + "step": 5750 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019687198768810008, + "loss": 1.0711, + "step": 5755 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019685904185725645, + "loss": 1.0938, + "step": 5760 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019684606971982023, + "loss": 1.0413, + "step": 5765 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019683307127931458, + "loss": 1.0297, + "step": 5770 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019682004653926992, + "loss": 1.0707, + "step": 5775 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001968069955032236, + "loss": 1.1785, + "step": 5780 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019679391817472035, + "loss": 1.0661, + "step": 5785 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019678081455731187, + "loss": 1.1462, + "step": 5790 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001967676846545571, + "loss": 1.1029, + "step": 5795 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019675452847002205, + "loss": 1.077, + "step": 5800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019674134600727988, + "loss": 1.1297, + "step": 5805 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019672813726991092, + "loss": 1.0142, + "step": 5810 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019671490226150264, + "loss": 1.0151, + "step": 5815 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001967016409856496, + "loss": 1.0813, + "step": 5820 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019668835344595353, + "loss": 1.0649, + "step": 5825 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019667503964602325, + "loss": 1.0729, + "step": 5830 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001966616995894748, + "loss": 1.084, + "step": 5835 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019664833327993125, + "loss": 1.1161, + "step": 5840 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019663494072102284, + "loss": 1.062, + "step": 5845 + }, + { + "epoch": 0.17, + "learning_rate": 0.000196621521916387, + "loss": 1.1028, + "step": 5850 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019660807686966818, + "loss": 1.1866, + "step": 5855 + }, + { + "epoch": 0.17, + "learning_rate": 0.000196594605584518, + "loss": 1.0915, + "step": 5860 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019658110806459526, + "loss": 1.0741, + "step": 5865 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019656758431356582, + "loss": 0.9842, + "step": 5870 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019655403433510268, + "loss": 1.0389, + "step": 5875 + }, + { + "epoch": 0.18, + "learning_rate": 0.000196540458132886, + "loss": 0.9803, + "step": 5880 + }, + { + "epoch": 0.18, + "learning_rate": 0.000196526855710603, + "loss": 1.0118, + "step": 5885 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019651322707194804, + "loss": 1.0507, + "step": 5890 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019649957222062266, + "loss": 1.0712, + "step": 5895 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019648589116033543, + "loss": 0.9621, + "step": 5900 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001964721838948021, + "loss": 1.0436, + "step": 5905 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019645845042774553, + "loss": 1.1087, + "step": 5910 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019644469076289563, + "loss": 1.074, + "step": 5915 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019643090490398954, + "loss": 1.0794, + "step": 5920 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019641709285477145, + "loss": 1.0439, + "step": 5925 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019640325461899263, + "loss": 1.1111, + "step": 5930 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963893902004115, + "loss": 1.0678, + "step": 5935 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963754996027936, + "loss": 1.0038, + "step": 5940 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963615828299116, + "loss": 1.0867, + "step": 5945 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019634763988554522, + "loss": 1.0338, + "step": 5950 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963336707734813, + "loss": 1.1219, + "step": 5955 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019631967549751386, + "loss": 1.1158, + "step": 5960 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001963056540614439, + "loss": 1.0276, + "step": 5965 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019629160646907965, + "loss": 1.0499, + "step": 5970 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019627753272423638, + "loss": 0.9816, + "step": 5975 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019626343283073644, + "loss": 1.0808, + "step": 5980 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019624930679240933, + "loss": 1.0684, + "step": 5985 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019623515461309165, + "loss": 1.0807, + "step": 5990 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019622097629662708, + "loss": 1.0857, + "step": 5995 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001962067718468664, + "loss": 1.107, + "step": 6000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001961925412676675, + "loss": 1.0473, + "step": 6005 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019617828456289533, + "loss": 1.1362, + "step": 6010 + }, + { + "epoch": 0.18, + "learning_rate": 0.000196164001736422, + "loss": 0.9959, + "step": 6015 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019614969279212665, + "loss": 1.0562, + "step": 6020 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019613535773389554, + "loss": 1.1135, + "step": 6025 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019612099656562203, + "loss": 1.0536, + "step": 6030 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019610660929120658, + "loss": 1.0403, + "step": 6035 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019609219591455675, + "loss": 1.1076, + "step": 6040 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001960777564395871, + "loss": 1.1449, + "step": 6045 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019606329087021936, + "loss": 1.0475, + "step": 6050 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019604879921038238, + "loss": 1.0646, + "step": 6055 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019603428146401197, + "loss": 1.1391, + "step": 6060 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019601973763505118, + "loss": 1.109, + "step": 6065 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019600516772745, + "loss": 1.035, + "step": 6070 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001959905717451656, + "loss": 1.0514, + "step": 6075 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001959759496921622, + "loss": 1.0511, + "step": 6080 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019596130157241112, + "loss": 1.0169, + "step": 6085 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001959466273898907, + "loss": 1.0509, + "step": 6090 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001959319271485864, + "loss": 1.1169, + "step": 6095 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019591720085249082, + "loss": 1.0444, + "step": 6100 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019590244850560347, + "loss": 1.1796, + "step": 6105 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019588767011193112, + "loss": 1.0999, + "step": 6110 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019587286567548752, + "loss": 1.0524, + "step": 6115 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019585803520029348, + "loss": 1.0346, + "step": 6120 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001958431786903769, + "loss": 1.086, + "step": 6125 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019582829614977283, + "loss": 1.0802, + "step": 6130 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001958133875825232, + "loss": 1.0511, + "step": 6135 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019579845299267726, + "loss": 1.1482, + "step": 6140 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019578349238429107, + "loss": 1.1057, + "step": 6145 + }, + { + "epoch": 0.18, + "learning_rate": 0.000195768505761428, + "loss": 0.996, + "step": 6150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019575349312815824, + "loss": 1.032, + "step": 6155 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001957384544885593, + "loss": 1.0306, + "step": 6160 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019572338984671553, + "loss": 1.1493, + "step": 6165 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019570829920671846, + "loss": 1.0969, + "step": 6170 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019569318257266666, + "loss": 1.094, + "step": 6175 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001956780399486658, + "loss": 1.1324, + "step": 6180 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019566287133882852, + "loss": 1.0979, + "step": 6185 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019564767674727456, + "loss": 1.01, + "step": 6190 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019563245617813072, + "loss": 1.0648, + "step": 6195 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001956172096355309, + "loss": 1.0528, + "step": 6200 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019560193712361596, + "loss": 1.0651, + "step": 6205 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019558663864653388, + "loss": 1.1136, + "step": 6210 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001955713142084397, + "loss": 1.1032, + "step": 6215 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019555596381349547, + "loss": 1.0741, + "step": 6220 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019554058746587026, + "loss": 1.0934, + "step": 6225 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001955251851697403, + "loss": 1.1092, + "step": 6230 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001955097569292888, + "loss": 1.0315, + "step": 6235 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019549430274870596, + "loss": 1.0634, + "step": 6240 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019547882263218914, + "loss": 1.1065, + "step": 6245 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019546331658394265, + "loss": 1.0974, + "step": 6250 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019544778460817792, + "loss": 1.0553, + "step": 6255 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019543222670911336, + "loss": 1.0858, + "step": 6260 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001954166428909744, + "loss": 1.0592, + "step": 6265 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019540103315799368, + "loss": 1.0981, + "step": 6270 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019538539751441062, + "loss": 1.1368, + "step": 6275 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019536973596447185, + "loss": 1.0476, + "step": 6280 + }, + { + "epoch": 0.19, + "learning_rate": 0.000195354048512431, + "loss": 1.075, + "step": 6285 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019533833516254876, + "loss": 1.0192, + "step": 6290 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001953225959190928, + "loss": 1.0262, + "step": 6295 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001953068307863378, + "loss": 1.0238, + "step": 6300 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001952910397685656, + "loss": 0.9787, + "step": 6305 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019527522287006495, + "loss": 0.9966, + "step": 6310 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019525938009513162, + "loss": 1.1149, + "step": 6315 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019524351144806855, + "loss": 1.059, + "step": 6320 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019522761693318552, + "loss": 1.101, + "step": 6325 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001952116965547995, + "loss": 1.1542, + "step": 6330 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019519575031723437, + "loss": 1.0849, + "step": 6335 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001951797782248211, + "loss": 1.0792, + "step": 6340 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001951637802818976, + "loss": 1.0201, + "step": 6345 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001951477564928089, + "loss": 1.0709, + "step": 6350 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019513170686190706, + "loss": 1.069, + "step": 6355 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019511563139355099, + "loss": 1.0179, + "step": 6360 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019509953009210683, + "loss": 1.0761, + "step": 6365 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001950834029619476, + "loss": 1.1206, + "step": 6370 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019506725000745337, + "loss": 1.0902, + "step": 6375 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019505107123301128, + "loss": 1.0567, + "step": 6380 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019503486664301534, + "loss": 0.9986, + "step": 6385 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019501863624186676, + "loss": 1.1395, + "step": 6390 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019500238003397358, + "loss": 1.0032, + "step": 6395 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019498609802375097, + "loss": 0.9435, + "step": 6400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001949697902156211, + "loss": 1.0698, + "step": 6405 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019495345661401305, + "loss": 1.175, + "step": 6410 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019493709722336302, + "loss": 1.1948, + "step": 6415 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019492071204811414, + "loss": 1.0924, + "step": 6420 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001949043010927166, + "loss": 1.0597, + "step": 6425 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019488786436162753, + "loss": 1.1123, + "step": 6430 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001948714018593111, + "loss": 1.0924, + "step": 6435 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019485491359023848, + "loss": 1.1378, + "step": 6440 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019483839955888784, + "loss": 1.1007, + "step": 6445 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001948218597697443, + "loss": 1.0708, + "step": 6450 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019480529422730004, + "loss": 1.1415, + "step": 6455 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019478870293605416, + "loss": 0.9645, + "step": 6460 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019477208590051287, + "loss": 1.1225, + "step": 6465 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019475544312518923, + "loss": 1.0065, + "step": 6470 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019473877461460344, + "loss": 1.0561, + "step": 6475 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019472208037328256, + "loss": 1.0846, + "step": 6480 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019470536040576067, + "loss": 1.1218, + "step": 6485 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001946886147165789, + "loss": 1.1326, + "step": 6490 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019467184331028534, + "loss": 1.143, + "step": 6495 + }, + { + "epoch": 0.19, + "learning_rate": 0.000194655046191435, + "loss": 0.9918, + "step": 6500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019463822336458994, + "loss": 1.0854, + "step": 6505 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001946213748343192, + "loss": 1.1453, + "step": 6510 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019460450060519876, + "loss": 1.0637, + "step": 6515 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001945876006818116, + "loss": 1.0748, + "step": 6520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001945706750687477, + "loss": 1.0499, + "step": 6525 + }, + { + "epoch": 0.19, + "learning_rate": 0.000194553723770604, + "loss": 1.0572, + "step": 6530 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001945367467919844, + "loss": 1.0715, + "step": 6535 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019451974413749981, + "loss": 1.0852, + "step": 6540 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001945027158117681, + "loss": 1.1172, + "step": 6545 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001944856618194141, + "loss": 1.0434, + "step": 6550 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001944685821650696, + "loss": 1.1306, + "step": 6555 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019445147685337338, + "loss": 1.081, + "step": 6560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001944343458889712, + "loss": 0.9441, + "step": 6565 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019441718927651577, + "loss": 1.0113, + "step": 6570 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019440000702066677, + "loss": 1.0873, + "step": 6575 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019438279912609082, + "loss": 1.0596, + "step": 6580 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019436556559746158, + "loss": 1.0867, + "step": 6585 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019434830643945953, + "loss": 1.0603, + "step": 6590 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019433102165677228, + "loss": 1.1341, + "step": 6595 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001943137112540943, + "loss": 1.1013, + "step": 6600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019429637523612705, + "loss": 1.0778, + "step": 6605 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001942790136075789, + "loss": 0.9962, + "step": 6610 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001942616263731652, + "loss": 1.0115, + "step": 6615 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019424421353760832, + "loss": 1.0727, + "step": 6620 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019422677510563748, + "loss": 1.1123, + "step": 6625 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019420931108198893, + "loss": 1.0533, + "step": 6630 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019419182147140582, + "loss": 1.1516, + "step": 6635 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019417430627863828, + "loss": 1.0532, + "step": 6640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001941567655084434, + "loss": 1.1083, + "step": 6645 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019413919916558514, + "loss": 1.0768, + "step": 6650 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001941216072548345, + "loss": 1.0665, + "step": 6655 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001941039897809694, + "loss": 0.9859, + "step": 6660 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019408634674877463, + "loss": 1.0962, + "step": 6665 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019406867816304204, + "loss": 1.0535, + "step": 6670 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019405098402857033, + "loss": 1.1598, + "step": 6675 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019403326435016513, + "loss": 1.1472, + "step": 6680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001940155191326391, + "loss": 1.0788, + "step": 6685 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019399774838081178, + "loss": 1.0634, + "step": 6690 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019397995209950963, + "loss": 1.0429, + "step": 6695 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019396213029356608, + "loss": 1.0056, + "step": 6700 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019394428296782144, + "loss": 1.1964, + "step": 6705 + }, + { + "epoch": 0.2, + "learning_rate": 0.000193926410127123, + "loss": 1.0845, + "step": 6710 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019390851177632497, + "loss": 1.1189, + "step": 6715 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019389058792028846, + "loss": 1.005, + "step": 6720 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019387263856388156, + "loss": 1.1551, + "step": 6725 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019385466371197923, + "loss": 1.0764, + "step": 6730 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001938366633694634, + "loss": 1.052, + "step": 6735 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019381863754122292, + "loss": 1.1638, + "step": 6740 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001938005862321535, + "loss": 1.1142, + "step": 6745 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019378250944715784, + "loss": 1.1115, + "step": 6750 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001937644071911455, + "loss": 1.0206, + "step": 6755 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019374627946903305, + "loss": 1.0454, + "step": 6760 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019372812628574385, + "loss": 1.0975, + "step": 6765 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019370994764620833, + "loss": 1.0171, + "step": 6770 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001936917435553637, + "loss": 1.0474, + "step": 6775 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019367351401815413, + "loss": 1.0654, + "step": 6780 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019365525903953067, + "loss": 1.0801, + "step": 6785 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019363697862445137, + "loss": 1.0106, + "step": 6790 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019361867277788112, + "loss": 1.0548, + "step": 6795 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001936003415047917, + "loss": 0.9941, + "step": 6800 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019358198481016182, + "loss": 1.0454, + "step": 6805 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019356360269897713, + "loss": 1.1103, + "step": 6810 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019354519517623012, + "loss": 1.0954, + "step": 6815 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019352676224692022, + "loss": 1.0873, + "step": 6820 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019350830391605376, + "loss": 1.1164, + "step": 6825 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019348982018864397, + "loss": 1.0316, + "step": 6830 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019347131106971094, + "loss": 1.0965, + "step": 6835 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001934527765642817, + "loss": 1.111, + "step": 6840 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001934342166773902, + "loss": 1.1366, + "step": 6845 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019341563141407714, + "loss": 1.0946, + "step": 6850 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001933970207793903, + "loss": 1.0986, + "step": 6855 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019337838477838428, + "loss": 1.0962, + "step": 6860 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019335972341612048, + "loss": 1.0555, + "step": 6865 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001933410366976673, + "loss": 1.0412, + "step": 6870 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019332232462810004, + "loss": 1.0282, + "step": 6875 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019330358721250074, + "loss": 1.1347, + "step": 6880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001932848244559585, + "loss": 1.0403, + "step": 6885 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019326603636356918, + "loss": 1.0159, + "step": 6890 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019324722294043558, + "loss": 1.0784, + "step": 6895 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019322838419166737, + "loss": 1.1826, + "step": 6900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019320952012238108, + "loss": 1.033, + "step": 6905 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001931906307377001, + "loss": 1.1023, + "step": 6910 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019317171604275478, + "loss": 1.1325, + "step": 6915 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019315277604268225, + "loss": 1.1143, + "step": 6920 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019313381074262655, + "loss": 1.0861, + "step": 6925 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019311482014773863, + "loss": 1.0554, + "step": 6930 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019309580426317622, + "loss": 1.0179, + "step": 6935 + }, + { + "epoch": 0.21, + "learning_rate": 0.000193076763094104, + "loss": 1.1075, + "step": 6940 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019305769664569348, + "loss": 1.0568, + "step": 6945 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019303860492312307, + "loss": 1.1335, + "step": 6950 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019301948793157796, + "loss": 1.0968, + "step": 6955 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019300034567625032, + "loss": 1.0492, + "step": 6960 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001929811781623391, + "loss": 1.0504, + "step": 6965 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019296198539505013, + "loss": 1.0451, + "step": 6970 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001929427673795961, + "loss": 1.0916, + "step": 6975 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019292352412119657, + "loss": 1.0767, + "step": 6980 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019290425562507792, + "loss": 1.044, + "step": 6985 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019288496189647346, + "loss": 1.048, + "step": 6990 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019286564294062323, + "loss": 1.1075, + "step": 6995 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019284629876277425, + "loss": 1.0377, + "step": 7000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019282692936818031, + "loss": 1.0853, + "step": 7005 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001928075347621021, + "loss": 1.0473, + "step": 7010 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001927881149498071, + "loss": 1.079, + "step": 7015 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001927686699365697, + "loss": 1.0701, + "step": 7020 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019274919972767104, + "loss": 1.0047, + "step": 7025 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019272970432839922, + "loss": 1.0778, + "step": 7030 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001927101837440491, + "loss": 1.0638, + "step": 7035 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019269063797992242, + "loss": 1.1327, + "step": 7040 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019267106704132773, + "loss": 1.0794, + "step": 7045 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019265147093358044, + "loss": 0.9926, + "step": 7050 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019263184966200278, + "loss": 1.0287, + "step": 7055 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019261220323192385, + "loss": 1.052, + "step": 7060 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001925925316486795, + "loss": 1.0628, + "step": 7065 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019257283491761254, + "loss": 0.9556, + "step": 7070 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019255311304407243, + "loss": 1.1124, + "step": 7075 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001925333660334157, + "loss": 1.0557, + "step": 7080 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019251359389100547, + "loss": 1.1685, + "step": 7085 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019249379662221181, + "loss": 1.1111, + "step": 7090 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019247397423241162, + "loss": 1.1248, + "step": 7095 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019245412672698857, + "loss": 0.9916, + "step": 7100 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019243425411133317, + "loss": 1.0128, + "step": 7105 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001924143563908428, + "loss": 1.0776, + "step": 7110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019239443357092157, + "loss": 1.0848, + "step": 7115 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019237448565698046, + "loss": 1.1113, + "step": 7120 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019235451265443726, + "loss": 1.0992, + "step": 7125 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019233451456871658, + "loss": 1.0531, + "step": 7130 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019231449140524984, + "loss": 1.0496, + "step": 7135 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019229444316947524, + "loss": 1.0508, + "step": 7140 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019227436986683782, + "loss": 1.0668, + "step": 7145 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019225427150278943, + "loss": 1.0097, + "step": 7150 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001922341480827887, + "loss": 1.0788, + "step": 7155 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019221399961230113, + "loss": 1.0439, + "step": 7160 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019219382609679894, + "loss": 1.0776, + "step": 7165 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001921736275417612, + "loss": 1.1287, + "step": 7170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019215340395267375, + "loss": 1.0896, + "step": 7175 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019213315533502928, + "loss": 1.0834, + "step": 7180 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019211288169432723, + "loss": 1.0397, + "step": 7185 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019209258303607384, + "loss": 1.1279, + "step": 7190 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019207225936578216, + "loss": 1.0745, + "step": 7195 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001920519106889721, + "loss": 0.9946, + "step": 7200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001920315370111702, + "loss": 1.0628, + "step": 7205 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001920111383379099, + "loss": 1.0947, + "step": 7210 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001919907146747315, + "loss": 1.009, + "step": 7215 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019197026602718186, + "loss": 1.0339, + "step": 7220 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001919497924008149, + "loss": 1.0508, + "step": 7225 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019192929380119108, + "loss": 1.0446, + "step": 7230 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001919087702338778, + "loss": 1.0769, + "step": 7235 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019188822170444918, + "loss": 1.0916, + "step": 7240 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019186764821848616, + "loss": 0.9858, + "step": 7245 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019184704978157638, + "loss": 1.0113, + "step": 7250 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019182642639931438, + "loss": 1.1053, + "step": 7255 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019180577807730134, + "loss": 1.0728, + "step": 7260 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001917851048211453, + "loss": 1.1403, + "step": 7265 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019176440663646102, + "loss": 1.1083, + "step": 7270 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019174368352887008, + "loss": 1.0488, + "step": 7275 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019172293550400084, + "loss": 1.1161, + "step": 7280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001917021625674883, + "loss": 1.0215, + "step": 7285 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019168136472497445, + "loss": 1.0804, + "step": 7290 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001916605419821078, + "loss": 1.0917, + "step": 7295 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019163969434454382, + "loss": 1.0453, + "step": 7300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019161882181794457, + "loss": 1.0144, + "step": 7305 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019159792440797904, + "loss": 1.091, + "step": 7310 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001915770021203229, + "loss": 0.9946, + "step": 7315 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019155605496065848, + "loss": 1.0256, + "step": 7320 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019153508293467503, + "loss": 1.0741, + "step": 7325 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019151408604806846, + "loss": 1.0902, + "step": 7330 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001914930643065415, + "loss": 1.0409, + "step": 7335 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019147201771580355, + "loss": 1.0306, + "step": 7340 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019145094628157078, + "loss": 1.0588, + "step": 7345 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019142985000956614, + "loss": 1.0233, + "step": 7350 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001914087289055193, + "loss": 1.0628, + "step": 7355 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019138758297516668, + "loss": 1.0893, + "step": 7360 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019136641222425148, + "loss": 1.0242, + "step": 7365 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019134521665852352, + "loss": 1.0498, + "step": 7370 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001913239962837395, + "loss": 1.1378, + "step": 7375 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019130275110566285, + "loss": 1.0647, + "step": 7380 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019128148113006362, + "loss": 1.0919, + "step": 7385 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019126018636271867, + "loss": 1.0667, + "step": 7390 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019123886680941162, + "loss": 1.0603, + "step": 7395 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019121752247593275, + "loss": 1.105, + "step": 7400 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019119615336807917, + "loss": 1.068, + "step": 7405 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001911747594916546, + "loss": 1.0312, + "step": 7410 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019115334085246957, + "loss": 1.1432, + "step": 7415 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001911318974563413, + "loss": 1.033, + "step": 7420 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019111042930909376, + "loss": 1.0094, + "step": 7425 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019108893641655761, + "loss": 0.9923, + "step": 7430 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019106741878457026, + "loss": 1.048, + "step": 7435 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019104587641897582, + "loss": 0.9729, + "step": 7440 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019102430932562515, + "loss": 1.1082, + "step": 7445 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019100271751037575, + "loss": 0.9876, + "step": 7450 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019098110097909193, + "loss": 1.1132, + "step": 7455 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019095945973764464, + "loss": 1.0795, + "step": 7460 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001909377937919116, + "loss": 1.094, + "step": 7465 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019091610314777717, + "loss": 1.0292, + "step": 7470 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019089438781113248, + "loss": 1.049, + "step": 7475 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019087264778787534, + "loss": 1.1129, + "step": 7480 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019085088308391028, + "loss": 1.0573, + "step": 7485 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001908290937051485, + "loss": 1.0603, + "step": 7490 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001908072796575079, + "loss": 1.0293, + "step": 7495 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001907854409469132, + "loss": 1.0986, + "step": 7500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001907635775792956, + "loss": 1.1646, + "step": 7505 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019074168956059316, + "loss": 1.0537, + "step": 7510 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019071977689675065, + "loss": 1.0483, + "step": 7515 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019069783959371943, + "loss": 1.1612, + "step": 7520 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019067587765745757, + "loss": 1.0754, + "step": 7525 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001906538910939299, + "loss": 1.1354, + "step": 7530 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001906318799091079, + "loss": 1.0109, + "step": 7535 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019060984410896968, + "loss": 1.0934, + "step": 7540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019058778369950016, + "loss": 1.1387, + "step": 7545 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019056569868669082, + "loss": 1.1044, + "step": 7550 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001905435890765399, + "loss": 1.0475, + "step": 7555 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019052145487505234, + "loss": 1.0782, + "step": 7560 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019049929608823963, + "loss": 1.0668, + "step": 7565 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019047711272212006, + "loss": 1.1293, + "step": 7570 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019045490478271856, + "loss": 1.1171, + "step": 7575 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019043267227606675, + "loss": 1.1093, + "step": 7580 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001904104152082029, + "loss": 1.0032, + "step": 7585 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019038813358517192, + "loss": 1.0426, + "step": 7590 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019036582741302548, + "loss": 1.1175, + "step": 7595 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019034349669782178, + "loss": 1.0634, + "step": 7600 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019032114144562582, + "loss": 0.9559, + "step": 7605 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019029876166250923, + "loss": 1.0568, + "step": 7610 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019027635735455027, + "loss": 1.1106, + "step": 7615 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019025392852783387, + "loss": 1.0473, + "step": 7620 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019023147518845162, + "loss": 1.0409, + "step": 7625 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019020899734250178, + "loss": 1.01, + "step": 7630 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019018649499608921, + "loss": 1.0053, + "step": 7635 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019016396815532555, + "loss": 1.1377, + "step": 7640 + }, + { + "epoch": 0.23, + "learning_rate": 0.000190141416826329, + "loss": 0.9869, + "step": 7645 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019011884101522437, + "loss": 1.1015, + "step": 7650 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019009624072814326, + "loss": 1.0546, + "step": 7655 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019007361597122373, + "loss": 1.0258, + "step": 7660 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019005096675061065, + "loss": 1.0672, + "step": 7665 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019002829307245548, + "loss": 1.0932, + "step": 7670 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001900055949429163, + "loss": 1.0997, + "step": 7675 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018998287236815785, + "loss": 1.1079, + "step": 7680 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018996012535435147, + "loss": 1.0259, + "step": 7685 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018993735390767527, + "loss": 1.0451, + "step": 7690 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018991455803431378, + "loss": 1.1488, + "step": 7695 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018989173774045835, + "loss": 1.1009, + "step": 7700 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001898688930323069, + "loss": 1.0727, + "step": 7705 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018984602391606395, + "loss": 1.1288, + "step": 7710 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018982313039794067, + "loss": 0.9979, + "step": 7715 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018980021248415487, + "loss": 1.104, + "step": 7720 + }, + { + "epoch": 0.23, + "learning_rate": 0.000189777270180931, + "loss": 1.0332, + "step": 7725 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001897543034945001, + "loss": 1.1018, + "step": 7730 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018973131243109988, + "loss": 1.0029, + "step": 7735 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018970829699697456, + "loss": 1.0737, + "step": 7740 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001896852571983751, + "loss": 1.1074, + "step": 7745 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018966219304155906, + "loss": 1.0807, + "step": 7750 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001896391045327905, + "loss": 1.1124, + "step": 7755 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001896159916783403, + "loss": 1.0085, + "step": 7760 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018959285448448573, + "loss": 1.0485, + "step": 7765 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018956969295751084, + "loss": 1.0846, + "step": 7770 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018954650710370619, + "loss": 1.0411, + "step": 7775 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018952329692936899, + "loss": 1.0668, + "step": 7780 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018950006244080307, + "loss": 1.0369, + "step": 7785 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018947680364431882, + "loss": 1.1261, + "step": 7790 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001894535205462332, + "loss": 1.0033, + "step": 7795 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001894302131528699, + "loss": 1.059, + "step": 7800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001894068814705591, + "loss": 1.1585, + "step": 7805 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018938352550563762, + "loss": 1.0501, + "step": 7810 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018936014526444884, + "loss": 1.0361, + "step": 7815 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018933674075334274, + "loss": 0.931, + "step": 7820 + }, + { + "epoch": 0.23, + "learning_rate": 0.000189313311978676, + "loss": 1.0988, + "step": 7825 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001892898589468117, + "loss": 1.0829, + "step": 7830 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018926638166411958, + "loss": 1.1268, + "step": 7835 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001892428801369761, + "loss": 1.0373, + "step": 7840 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018921935437176416, + "loss": 1.0947, + "step": 7845 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001891958043748732, + "loss": 1.0747, + "step": 7850 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018917223015269943, + "loss": 1.0562, + "step": 7855 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018914863171164546, + "loss": 1.0468, + "step": 7860 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018912500905812058, + "loss": 1.0434, + "step": 7865 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018910136219854061, + "loss": 1.0742, + "step": 7870 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018907769113932798, + "loss": 1.0173, + "step": 7875 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018905399588691163, + "loss": 1.0872, + "step": 7880 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018903027644772715, + "loss": 1.0333, + "step": 7885 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018900653282821666, + "loss": 1.0405, + "step": 7890 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001889827650348288, + "loss": 1.0363, + "step": 7895 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018895897307401888, + "loss": 0.9642, + "step": 7900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001889351569522487, + "loss": 1.041, + "step": 7905 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018891131667598662, + "loss": 1.0259, + "step": 7910 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001888874522517076, + "loss": 1.0784, + "step": 7915 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001888635636858931, + "loss": 1.0443, + "step": 7920 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001888396509850312, + "loss": 1.094, + "step": 7925 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018881571415561656, + "loss": 1.0991, + "step": 7930 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018879175320415028, + "loss": 1.0486, + "step": 7935 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001887677681371401, + "loss": 1.0873, + "step": 7940 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018874375896110027, + "loss": 1.0915, + "step": 7945 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001887197256825516, + "loss": 1.0575, + "step": 7950 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018869566830802148, + "loss": 1.1026, + "step": 7955 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018867158684404375, + "loss": 1.0201, + "step": 7960 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018864748129715896, + "loss": 1.0621, + "step": 7965 + }, + { + "epoch": 0.24, + "learning_rate": 0.000188623351673914, + "loss": 1.1016, + "step": 7970 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018859919798086238, + "loss": 1.0232, + "step": 7975 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018857502022456423, + "loss": 1.0368, + "step": 7980 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018855081841158614, + "loss": 1.0235, + "step": 7985 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018852659254850126, + "loss": 1.0425, + "step": 7990 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001885023426418892, + "loss": 1.0677, + "step": 7995 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018847806869833614, + "loss": 1.0818, + "step": 8000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001884537707244349, + "loss": 1.0938, + "step": 8005 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018842944872678462, + "loss": 1.0528, + "step": 8010 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018840510271199112, + "loss": 1.0229, + "step": 8015 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018838073268666672, + "loss": 1.0359, + "step": 8020 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001883563386574302, + "loss": 1.0493, + "step": 8025 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018833192063090693, + "loss": 1.0767, + "step": 8030 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018830747861372873, + "loss": 1.0127, + "step": 8035 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018828301261253398, + "loss": 1.1243, + "step": 8040 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018825852263396758, + "loss": 1.0462, + "step": 8045 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018823400868468092, + "loss": 1.0391, + "step": 8050 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018820947077133186, + "loss": 1.0506, + "step": 8055 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001881849089005849, + "loss": 1.0589, + "step": 8060 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018816032307911092, + "loss": 1.0248, + "step": 8065 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018813571331358734, + "loss": 1.0853, + "step": 8070 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018811107961069808, + "loss": 1.0075, + "step": 8075 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018808642197713362, + "loss": 1.0741, + "step": 8080 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018806174041959085, + "loss": 1.0568, + "step": 8085 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018803703494477323, + "loss": 1.0012, + "step": 8090 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018801230555939063, + "loss": 1.0675, + "step": 8095 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001879875522701595, + "loss": 1.0536, + "step": 8100 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001879627750838028, + "loss": 1.0919, + "step": 8105 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018793797400704985, + "loss": 1.0975, + "step": 8110 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018791314904663655, + "loss": 1.1052, + "step": 8115 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018788830020930535, + "loss": 1.0374, + "step": 8120 + }, + { + "epoch": 0.24, + "learning_rate": 0.000187863427501805, + "loss": 1.0021, + "step": 8125 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018783853093089095, + "loss": 1.0822, + "step": 8130 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018781361050332498, + "loss": 1.0305, + "step": 8135 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001877886662258754, + "loss": 1.1041, + "step": 8140 + }, + { + "epoch": 0.24, + "learning_rate": 0.000187763698105317, + "loss": 1.0321, + "step": 8145 + }, + { + "epoch": 0.24, + "learning_rate": 0.000187738706148431, + "loss": 1.0294, + "step": 8150 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018771369036200513, + "loss": 1.0225, + "step": 8155 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018768865075283366, + "loss": 1.0785, + "step": 8160 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018766358732771722, + "loss": 1.0822, + "step": 8165 + }, + { + "epoch": 0.24, + "learning_rate": 0.000187638500093463, + "loss": 1.0403, + "step": 8170 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018761338905688452, + "loss": 1.0497, + "step": 8175 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018758825422480192, + "loss": 1.0821, + "step": 8180 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018756309560404174, + "loss": 1.0261, + "step": 8185 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018753791320143693, + "loss": 1.0271, + "step": 8190 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018751270702382697, + "loss": 1.0672, + "step": 8195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001874874770780578, + "loss": 1.0327, + "step": 8200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018746222337098173, + "loss": 1.0406, + "step": 8205 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018743694590945767, + "loss": 1.0679, + "step": 8210 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001874116447003508, + "loss": 0.9973, + "step": 8215 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001873863197505329, + "loss": 1.0753, + "step": 8220 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018736097106688215, + "loss": 1.0561, + "step": 8225 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001873355986562831, + "loss": 1.0845, + "step": 8230 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018731020252562693, + "loss": 1.0671, + "step": 8235 + }, + { + "epoch": 0.25, + "learning_rate": 0.000187284782681811, + "loss": 1.0487, + "step": 8240 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018725933913173938, + "loss": 1.0248, + "step": 8245 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001872338718823224, + "loss": 0.9849, + "step": 8250 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018720838094047685, + "loss": 1.1194, + "step": 8255 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018718286631312604, + "loss": 0.9687, + "step": 8260 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018715732800719963, + "loss": 0.9876, + "step": 8265 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018713176602963374, + "loss": 1.0317, + "step": 8270 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001871061803873709, + "loss": 1.112, + "step": 8275 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018708057108736013, + "loss": 1.0606, + "step": 8280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001870549381365568, + "loss": 1.0588, + "step": 8285 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018702928154192274, + "loss": 1.1281, + "step": 8290 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018700360131042621, + "loss": 1.0934, + "step": 8295 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018697789744904185, + "loss": 1.0193, + "step": 8300 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018695216996475074, + "loss": 1.0087, + "step": 8305 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001869264188645404, + "loss": 0.9902, + "step": 8310 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018690064415540473, + "loss": 1.1592, + "step": 8315 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018687484584434407, + "loss": 1.062, + "step": 8320 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018684902393836515, + "loss": 1.1288, + "step": 8325 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001868231784444811, + "loss": 1.0769, + "step": 8330 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018679730936971148, + "loss": 1.101, + "step": 8335 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018677141672108225, + "loss": 1.1072, + "step": 8340 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018674550050562576, + "loss": 1.1311, + "step": 8345 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018671956073038076, + "loss": 1.117, + "step": 8350 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001866935974023924, + "loss": 1.1281, + "step": 8355 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018666761052871228, + "loss": 1.0334, + "step": 8360 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001866416001163983, + "loss": 1.0716, + "step": 8365 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018661556617251482, + "loss": 1.0865, + "step": 8370 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018658950870413255, + "loss": 1.1423, + "step": 8375 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018656342771832866, + "loss": 1.0613, + "step": 8380 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001865373232221866, + "loss": 1.0547, + "step": 8385 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018651119522279633, + "loss": 1.1119, + "step": 8390 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001864850437272541, + "loss": 1.0498, + "step": 8395 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018645886874266252, + "loss": 1.0417, + "step": 8400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018643267027613068, + "loss": 1.0226, + "step": 8405 + }, + { + "epoch": 0.25, + "learning_rate": 0.000186406448334774, + "loss": 1.1333, + "step": 8410 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018638020292571422, + "loss": 1.0347, + "step": 8415 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018635393405607958, + "loss": 1.1059, + "step": 8420 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018632764173300457, + "loss": 1.0667, + "step": 8425 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018630132596363007, + "loss": 1.0696, + "step": 8430 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001862749867551034, + "loss": 1.069, + "step": 8435 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018624862411457824, + "loss": 1.0165, + "step": 8440 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018622223804921453, + "loss": 1.0232, + "step": 8445 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018619582856617864, + "loss": 1.0774, + "step": 8450 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001861693956726433, + "loss": 1.0932, + "step": 8455 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018614293937578764, + "loss": 1.0742, + "step": 8460 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018611645968279707, + "loss": 1.0233, + "step": 8465 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018608995660086337, + "loss": 1.0958, + "step": 8470 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018606343013718472, + "loss": 1.1775, + "step": 8475 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001860368802989656, + "loss": 1.0299, + "step": 8480 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018601030709341685, + "loss": 1.0571, + "step": 8485 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018598371052775566, + "loss": 1.0272, + "step": 8490 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018595709060920563, + "loss": 1.0953, + "step": 8495 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018593044734499655, + "loss": 1.0284, + "step": 8500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018590378074236473, + "loss": 1.0557, + "step": 8505 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018587709080855266, + "loss": 1.0895, + "step": 8510 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018585037755080926, + "loss": 1.044, + "step": 8515 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018582364097638978, + "loss": 1.0315, + "step": 8520 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018579688109255575, + "loss": 1.0601, + "step": 8525 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001857700979065751, + "loss": 1.0313, + "step": 8530 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018574329142572206, + "loss": 1.0689, + "step": 8535 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018571646165727713, + "loss": 1.1464, + "step": 8540 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001856896086085272, + "loss": 1.0778, + "step": 8545 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018566273228676553, + "loss": 1.0185, + "step": 8550 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018563583269929157, + "loss": 0.9554, + "step": 8555 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001856089098534112, + "loss": 1.0443, + "step": 8560 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018558196375643654, + "loss": 1.0408, + "step": 8565 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018555499441568607, + "loss": 1.1809, + "step": 8570 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018552800183848457, + "loss": 0.9881, + "step": 8575 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018550098603216318, + "loss": 1.0447, + "step": 8580 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018547394700405925, + "loss": 1.0367, + "step": 8585 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001854468847615165, + "loss": 1.053, + "step": 8590 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018541979931188494, + "loss": 1.1557, + "step": 8595 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018539269066252095, + "loss": 1.0831, + "step": 8600 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018536555882078706, + "loss": 1.0323, + "step": 8605 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001853384037940522, + "loss": 1.0565, + "step": 8610 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018531122558969166, + "loss": 1.0819, + "step": 8615 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018528402421508688, + "loss": 1.096, + "step": 8620 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018525679967762568, + "loss": 1.0257, + "step": 8625 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018522955198470214, + "loss": 1.0263, + "step": 8630 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018520228114371669, + "loss": 0.9635, + "step": 8635 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018517498716207596, + "loss": 1.0757, + "step": 8640 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001851476700471929, + "loss": 1.0224, + "step": 8645 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018512032980648674, + "loss": 1.1312, + "step": 8650 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018509296644738304, + "loss": 1.0558, + "step": 8655 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001850655799773136, + "loss": 1.1064, + "step": 8660 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018503817040371642, + "loss": 1.1367, + "step": 8665 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018501073773403593, + "loss": 1.1039, + "step": 8670 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001849832819757227, + "loss": 1.0733, + "step": 8675 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018495580313623365, + "loss": 1.1023, + "step": 8680 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018492830122303193, + "loss": 1.0357, + "step": 8685 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018490077624358698, + "loss": 0.9803, + "step": 8690 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018487322820537444, + "loss": 1.0748, + "step": 8695 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018484565711587636, + "loss": 0.994, + "step": 8700 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018481806298258088, + "loss": 0.9863, + "step": 8705 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001847904458129825, + "loss": 1.0737, + "step": 8710 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018476280561458195, + "loss": 1.041, + "step": 8715 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018473514239488622, + "loss": 1.0958, + "step": 8720 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018470745616140855, + "loss": 1.1243, + "step": 8725 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001846797469216684, + "loss": 1.0914, + "step": 8730 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018465201468319156, + "loss": 1.069, + "step": 8735 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018462425945350996, + "loss": 1.1206, + "step": 8740 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018459648124016185, + "loss": 1.0171, + "step": 8745 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001845686800506917, + "loss": 1.1079, + "step": 8750 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001845408558926502, + "loss": 1.0418, + "step": 8755 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018451300877359437, + "loss": 1.0546, + "step": 8760 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001844851387010873, + "loss": 1.0015, + "step": 8765 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018445724568269845, + "loss": 1.0903, + "step": 8770 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001844293297260035, + "loss": 1.0207, + "step": 8775 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001844013908385843, + "loss": 1.1067, + "step": 8780 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018437342902802897, + "loss": 1.0979, + "step": 8785 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018434544430193182, + "loss": 1.0979, + "step": 8790 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018431743666789342, + "loss": 1.047, + "step": 8795 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018428940613352058, + "loss": 1.0804, + "step": 8800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001842613527064262, + "loss": 1.043, + "step": 8805 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018423327639422964, + "loss": 1.0372, + "step": 8810 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001842051772045563, + "loss": 1.1403, + "step": 8815 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001841770551450377, + "loss": 1.0577, + "step": 8820 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018414891022331184, + "loss": 1.1079, + "step": 8825 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018412074244702274, + "loss": 1.0391, + "step": 8830 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018409255182382065, + "loss": 0.9934, + "step": 8835 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018406433836136205, + "loss": 1.0312, + "step": 8840 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018403610206730968, + "loss": 1.0983, + "step": 8845 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018400784294933238, + "loss": 0.9886, + "step": 8850 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018397956101510522, + "loss": 1.0446, + "step": 8855 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018395125627230949, + "loss": 1.1094, + "step": 8860 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018392292872863267, + "loss": 1.0722, + "step": 8865 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018389457839176844, + "loss": 1.0138, + "step": 8870 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018386620526941662, + "loss": 1.0333, + "step": 8875 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001838378093692833, + "loss": 1.1228, + "step": 8880 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018380939069908067, + "loss": 1.1053, + "step": 8885 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018378094926652718, + "loss": 1.0006, + "step": 8890 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001837524850793474, + "loss": 1.0861, + "step": 8895 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001837239981452721, + "loss": 1.0508, + "step": 8900 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001836954884720383, + "loss": 1.1438, + "step": 8905 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018366695606738908, + "loss": 1.0971, + "step": 8910 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018363840093907376, + "loss": 1.0189, + "step": 8915 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001836098230948478, + "loss": 1.0605, + "step": 8920 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018358122254247286, + "loss": 1.0267, + "step": 8925 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018355259928971678, + "loss": 1.086, + "step": 8930 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001835239533443535, + "loss": 1.1447, + "step": 8935 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018349528471416315, + "loss": 1.0877, + "step": 8940 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001834665934069321, + "loss": 1.0966, + "step": 8945 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018343787943045275, + "loss": 1.0805, + "step": 8950 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018340914279252376, + "loss": 1.0764, + "step": 8955 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018338038350094985, + "loss": 1.0628, + "step": 8960 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018335160156354206, + "loss": 1.056, + "step": 8965 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018332279698811735, + "loss": 1.1383, + "step": 8970 + }, + { + "epoch": 0.27, + "learning_rate": 0.000183293969782499, + "loss": 1.1231, + "step": 8975 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018326511995451637, + "loss": 1.0614, + "step": 8980 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018323624751200498, + "loss": 1.0065, + "step": 8985 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018320735246280654, + "loss": 1.1525, + "step": 8990 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018317843481476876, + "loss": 1.0112, + "step": 8995 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001831494945757456, + "loss": 1.1021, + "step": 9000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018312053175359718, + "loss": 1.0999, + "step": 9005 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018309154635618965, + "loss": 1.0045, + "step": 9010 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001830625383913954, + "loss": 1.0683, + "step": 9015 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018303350786709287, + "loss": 1.0421, + "step": 9020 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018300445479116663, + "loss": 0.9429, + "step": 9025 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018297537917150747, + "loss": 1.045, + "step": 9030 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018294628101601213, + "loss": 1.1988, + "step": 9035 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018291716033258366, + "loss": 1.1228, + "step": 9040 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001828880171291311, + "loss": 1.0659, + "step": 9045 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018285885141356963, + "loss": 0.9692, + "step": 9050 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018282966319382057, + "loss": 1.075, + "step": 9055 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001828004524778114, + "loss": 1.0313, + "step": 9060 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018277121927347557, + "loss": 1.0588, + "step": 9065 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001827419635887528, + "loss": 1.1489, + "step": 9070 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018271268543158876, + "loss": 1.1478, + "step": 9075 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018268338480993535, + "loss": 1.0938, + "step": 9080 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018265406173175052, + "loss": 1.0669, + "step": 9085 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018262471620499832, + "loss": 1.0314, + "step": 9090 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018259534823764886, + "loss": 1.0462, + "step": 9095 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018256595783767843, + "loss": 1.1094, + "step": 9100 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001825365450130693, + "loss": 1.0894, + "step": 9105 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018250710977180993, + "loss": 0.9947, + "step": 9110 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001824776521218949, + "loss": 0.9896, + "step": 9115 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001824481720713247, + "loss": 0.9662, + "step": 9120 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018241866962810609, + "loss": 1.0737, + "step": 9125 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001823891448002518, + "loss": 1.1008, + "step": 9130 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018235959759578066, + "loss": 1.1576, + "step": 9135 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018233002802271763, + "loss": 1.0693, + "step": 9140 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001823004360890937, + "loss": 1.0303, + "step": 9145 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018227082180294593, + "loss": 1.1354, + "step": 9150 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018224118517231745, + "loss": 0.9801, + "step": 9155 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001822115262052575, + "loss": 1.1462, + "step": 9160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018218184490982134, + "loss": 1.089, + "step": 9165 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001821521412940703, + "loss": 1.0919, + "step": 9170 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018212241536607178, + "loss": 1.0244, + "step": 9175 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018209266713389928, + "loss": 1.0901, + "step": 9180 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018206289660563227, + "loss": 1.0607, + "step": 9185 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018203310378935638, + "loss": 1.129, + "step": 9190 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001820032886931632, + "loss": 1.0977, + "step": 9195 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018197345132515044, + "loss": 1.0385, + "step": 9200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001819435916934218, + "loss": 1.0308, + "step": 9205 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018191370980608705, + "loss": 1.1238, + "step": 9210 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018188380567126205, + "loss": 1.0373, + "step": 9215 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018185387929706866, + "loss": 1.069, + "step": 9220 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018182393069163477, + "loss": 1.107, + "step": 9225 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018179395986309428, + "loss": 1.0122, + "step": 9230 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018176396681958724, + "loss": 1.0674, + "step": 9235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001817339515692596, + "loss": 1.0335, + "step": 9240 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018170391412026344, + "loss": 0.9976, + "step": 9245 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018167385448075683, + "loss": 1.09, + "step": 9250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018164377265890384, + "loss": 1.1133, + "step": 9255 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018161366866287462, + "loss": 1.1153, + "step": 9260 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018158354250084527, + "loss": 0.9831, + "step": 9265 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018155339418099802, + "loss": 1.0122, + "step": 9270 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018152322371152102, + "loss": 1.0698, + "step": 9275 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018149303110060846, + "loss": 1.0615, + "step": 9280 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018146281635646059, + "loss": 1.0508, + "step": 9285 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018143257948728358, + "loss": 1.0151, + "step": 9290 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001814023205012897, + "loss": 1.0845, + "step": 9295 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018137203940669715, + "loss": 1.0291, + "step": 9300 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018134173621173023, + "loss": 1.0044, + "step": 9305 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018131141092461917, + "loss": 1.0676, + "step": 9310 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001812810635536002, + "loss": 1.0104, + "step": 9315 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018125069410691554, + "loss": 1.0304, + "step": 9320 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001812203025928135, + "loss": 1.0728, + "step": 9325 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018118988901954825, + "loss": 1.0693, + "step": 9330 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018115945339538007, + "loss": 1.1296, + "step": 9335 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018112899572857513, + "loss": 1.0553, + "step": 9340 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018109851602740563, + "loss": 1.0454, + "step": 9345 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001810680143001498, + "loss": 1.1059, + "step": 9350 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018103749055509177, + "loss": 1.0721, + "step": 9355 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018100694480052167, + "loss": 1.0436, + "step": 9360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001809763770447357, + "loss": 1.0557, + "step": 9365 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018094578729603583, + "loss": 1.0236, + "step": 9370 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018091517556273029, + "loss": 1.0323, + "step": 9375 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018088454185313303, + "loss": 1.0606, + "step": 9380 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018085388617556408, + "loss": 1.0591, + "step": 9385 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001808232085383494, + "loss": 1.0361, + "step": 9390 + }, + { + "epoch": 0.28, + "learning_rate": 0.000180792508949821, + "loss": 1.0301, + "step": 9395 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018076178741831668, + "loss": 1.0147, + "step": 9400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001807310439521804, + "loss": 1.1033, + "step": 9405 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018070027855976198, + "loss": 1.123, + "step": 9410 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018066949124941712, + "loss": 1.1029, + "step": 9415 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018063868202950763, + "loss": 1.0138, + "step": 9420 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018060785090840114, + "loss": 1.03, + "step": 9425 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018057699789447132, + "loss": 0.9927, + "step": 9430 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001805461229960977, + "loss": 1.0876, + "step": 9435 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018051522622166585, + "loss": 1.0637, + "step": 9440 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001804843075795672, + "loss": 1.035, + "step": 9445 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018045336707819912, + "loss": 1.0558, + "step": 9450 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018042240472596502, + "loss": 1.0774, + "step": 9455 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018039142053127412, + "loss": 1.0296, + "step": 9460 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018036041450254165, + "loss": 1.0875, + "step": 9465 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018032938664818871, + "loss": 1.1091, + "step": 9470 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001802983369766424, + "loss": 1.0048, + "step": 9475 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018026726549633573, + "loss": 1.0819, + "step": 9480 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001802361722157075, + "loss": 1.0892, + "step": 9485 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018020505714320267, + "loss": 0.9882, + "step": 9490 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001801739202872719, + "loss": 1.0099, + "step": 9495 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018014276165637187, + "loss": 1.0049, + "step": 9500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018011158125896522, + "loss": 0.9832, + "step": 9505 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018008037910352038, + "loss": 0.9622, + "step": 9510 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018004915519851178, + "loss": 1.0992, + "step": 9515 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018001790955241972, + "loss": 1.0749, + "step": 9520 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001799866421737304, + "loss": 1.0929, + "step": 9525 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017995535307093597, + "loss": 1.0834, + "step": 9530 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001799240422525344, + "loss": 1.0512, + "step": 9535 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017989270972702966, + "loss": 1.0923, + "step": 9540 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001798613555029315, + "loss": 1.1023, + "step": 9545 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017982997958875565, + "loss": 1.0273, + "step": 9550 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017979858199302374, + "loss": 1.091, + "step": 9555 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017976716272426318, + "loss": 1.1111, + "step": 9560 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001797357217910074, + "loss": 1.071, + "step": 9565 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017970425920179562, + "loss": 1.0787, + "step": 9570 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017967277496517298, + "loss": 1.0982, + "step": 9575 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017964126908969047, + "loss": 1.1133, + "step": 9580 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017960974158390505, + "loss": 1.0864, + "step": 9585 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017957819245637943, + "loss": 1.1479, + "step": 9590 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017954662171568226, + "loss": 1.0617, + "step": 9595 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017951502937038803, + "loss": 1.0668, + "step": 9600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017948341542907713, + "loss": 0.9874, + "step": 9605 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017945177990033584, + "loss": 1.0718, + "step": 9610 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001794201227927562, + "loss": 1.0583, + "step": 9615 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017938844411493623, + "loss": 1.0286, + "step": 9620 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001793567438754797, + "loss": 1.0601, + "step": 9625 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001793250220829963, + "loss": 0.981, + "step": 9630 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001792932787461016, + "loss": 1.097, + "step": 9635 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017926151387341697, + "loss": 1.0536, + "step": 9640 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017922972747356963, + "loss": 1.1318, + "step": 9645 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017919791955519268, + "loss": 1.0816, + "step": 9650 + }, + { + "epoch": 0.29, + "learning_rate": 0.000179166090126925, + "loss": 0.9683, + "step": 9655 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001791342391974114, + "loss": 1.0296, + "step": 9660 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017910236677530248, + "loss": 1.0833, + "step": 9665 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017907047286925466, + "loss": 1.0989, + "step": 9670 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017903855748793027, + "loss": 1.0359, + "step": 9675 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017900662063999735, + "loss": 0.9875, + "step": 9680 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001789746623341299, + "loss": 1.1201, + "step": 9685 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017894268257900766, + "loss": 1.0228, + "step": 9690 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017891068138331623, + "loss": 1.0629, + "step": 9695 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017887865875574702, + "loss": 1.0374, + "step": 9700 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017884661470499733, + "loss": 1.0532, + "step": 9705 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001788145492397701, + "loss": 1.007, + "step": 9710 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017878246236877431, + "loss": 1.1285, + "step": 9715 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017875035410072458, + "loss": 1.0121, + "step": 9720 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017871822444434144, + "loss": 1.0373, + "step": 9725 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001786860734083512, + "loss": 1.0169, + "step": 9730 + }, + { + "epoch": 0.29, + "learning_rate": 0.000178653901001486, + "loss": 1.0481, + "step": 9735 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017862170723248371, + "loss": 1.0503, + "step": 9740 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017858949211008802, + "loss": 1.0542, + "step": 9745 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017855725564304854, + "loss": 1.0643, + "step": 9750 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017852499784012048, + "loss": 1.0859, + "step": 9755 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017849271871006508, + "loss": 0.9938, + "step": 9760 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017846041826164916, + "loss": 1.0597, + "step": 9765 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017842809650364541, + "loss": 1.1135, + "step": 9770 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017839575344483238, + "loss": 0.991, + "step": 9775 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017836338909399423, + "loss": 1.0387, + "step": 9780 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017833100345992112, + "loss": 1.0582, + "step": 9785 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001782985965514088, + "loss": 0.9943, + "step": 9790 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017826616837725894, + "loss": 1.0798, + "step": 9795 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017823371894627884, + "loss": 1.0353, + "step": 9800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017820124826728172, + "loss": 1.14, + "step": 9805 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017816875634908653, + "loss": 1.1337, + "step": 9810 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017813624320051793, + "loss": 1.0926, + "step": 9815 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017810370883040636, + "loss": 1.0967, + "step": 9820 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001780711532475881, + "loss": 1.1147, + "step": 9825 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001780385764609051, + "loss": 1.126, + "step": 9830 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001780059784792051, + "loss": 1.0112, + "step": 9835 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017797335931134164, + "loss": 1.1067, + "step": 9840 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017794071896617397, + "loss": 1.0192, + "step": 9845 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017790805745256704, + "loss": 1.0365, + "step": 9850 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017787537477939168, + "loss": 1.0413, + "step": 9855 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017784267095552437, + "loss": 1.0762, + "step": 9860 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017780994598984737, + "loss": 1.1271, + "step": 9865 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017777719989124866, + "loss": 0.9765, + "step": 9870 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017774443266862196, + "loss": 1.1807, + "step": 9875 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017771164433086676, + "loss": 1.1155, + "step": 9880 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017767883488688824, + "loss": 1.1016, + "step": 9885 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017764600434559734, + "loss": 1.0402, + "step": 9890 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017761315271591074, + "loss": 1.0907, + "step": 9895 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017758028000675085, + "loss": 1.04, + "step": 9900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017754738622704575, + "loss": 1.0962, + "step": 9905 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001775144713857293, + "loss": 1.0974, + "step": 9910 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017748153549174101, + "loss": 1.1375, + "step": 9915 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017744857855402625, + "loss": 0.9718, + "step": 9920 + }, + { + "epoch": 0.3, + "learning_rate": 0.000177415600581536, + "loss": 0.9461, + "step": 9925 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017738260158322688, + "loss": 1.0948, + "step": 9930 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017734958156806138, + "loss": 0.9827, + "step": 9935 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001773165405450076, + "loss": 1.0447, + "step": 9940 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017728347852303942, + "loss": 1.0658, + "step": 9945 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017725039551113632, + "loss": 1.0579, + "step": 9950 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017721729151828355, + "loss": 1.0644, + "step": 9955 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017718416655347204, + "loss": 1.0203, + "step": 9960 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017715102062569843, + "loss": 1.0373, + "step": 9965 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017711785374396506, + "loss": 1.0775, + "step": 9970 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017708466591727994, + "loss": 1.0262, + "step": 9975 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001770514571546567, + "loss": 1.0232, + "step": 9980 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001770182274651148, + "loss": 1.0877, + "step": 9985 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001769849768576793, + "loss": 0.999, + "step": 9990 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017695170534138099, + "loss": 1.1102, + "step": 9995 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017691841292525625, + "loss": 1.0055, + "step": 10000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017688509961834718, + "loss": 1.0518, + "step": 10005 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017685176542970164, + "loss": 1.0937, + "step": 10010 + }, + { + "epoch": 0.3, + "learning_rate": 0.000176818410368373, + "loss": 1.1077, + "step": 10015 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017678503444342041, + "loss": 1.0592, + "step": 10020 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017675163766390868, + "loss": 1.0446, + "step": 10025 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017671822003890823, + "loss": 1.1023, + "step": 10030 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017668478157749522, + "loss": 1.0301, + "step": 10035 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017665132228875138, + "loss": 1.0979, + "step": 10040 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001766178421817642, + "loss": 0.9809, + "step": 10045 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017658434126562668, + "loss": 1.1656, + "step": 10050 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017655081954943763, + "loss": 1.0992, + "step": 10055 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001765172770423014, + "loss": 1.0201, + "step": 10060 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017648371375332804, + "loss": 1.0051, + "step": 10065 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017645012969163323, + "loss": 1.0845, + "step": 10070 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017641652486633826, + "loss": 0.9871, + "step": 10075 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001763828992865701, + "loss": 1.105, + "step": 10080 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017634925296146136, + "loss": 0.976, + "step": 10085 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017631558590015028, + "loss": 1.1195, + "step": 10090 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001762818981117807, + "loss": 1.0407, + "step": 10095 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001762481896055021, + "loss": 1.0617, + "step": 10100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017621446039046962, + "loss": 1.0943, + "step": 10105 + }, + { + "epoch": 0.3, + "learning_rate": 0.000176180710475844, + "loss": 1.0255, + "step": 10110 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017614693987079162, + "loss": 1.0764, + "step": 10115 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001761131485844844, + "loss": 1.0446, + "step": 10120 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001760793366261, + "loss": 1.0476, + "step": 10125 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017604550400482163, + "loss": 1.0885, + "step": 10130 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001760116507298381, + "loss": 1.0298, + "step": 10135 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017597777681034385, + "loss": 1.0527, + "step": 10140 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017594388225553894, + "loss": 1.074, + "step": 10145 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017590996707462898, + "loss": 1.098, + "step": 10150 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017587603127682525, + "loss": 1.027, + "step": 10155 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001758420748713446, + "loss": 1.0795, + "step": 10160 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017580809786740947, + "loss": 1.0218, + "step": 10165 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001757741002742479, + "loss": 1.1484, + "step": 10170 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017574008210109355, + "loss": 1.1184, + "step": 10175 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001757060433571856, + "loss": 1.0346, + "step": 10180 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017567198405176883, + "loss": 0.9837, + "step": 10185 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017563790419409373, + "loss": 1.1164, + "step": 10190 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001756038037934162, + "loss": 1.1031, + "step": 10195 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017556968285899785, + "loss": 1.057, + "step": 10200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017553554140010573, + "loss": 1.1212, + "step": 10205 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017550137942601263, + "loss": 1.1493, + "step": 10210 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017546719694599682, + "loss": 1.0712, + "step": 10215 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017543299396934208, + "loss": 1.0703, + "step": 10220 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001753987705053379, + "loss": 1.0419, + "step": 10225 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001753645265632792, + "loss": 1.0542, + "step": 10230 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017533026215246655, + "loss": 1.0051, + "step": 10235 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017529597728220607, + "loss": 0.9742, + "step": 10240 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017526167196180937, + "loss": 1.0363, + "step": 10245 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017522734620059369, + "loss": 1.0681, + "step": 10250 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017519300000788177, + "loss": 1.0744, + "step": 10255 + }, + { + "epoch": 0.31, + "learning_rate": 0.000175158633393002, + "loss": 0.9856, + "step": 10260 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017512424636528811, + "loss": 0.9904, + "step": 10265 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017508983893407958, + "loss": 1.043, + "step": 10270 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017505541110872134, + "loss": 1.0616, + "step": 10275 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017502096289856388, + "loss": 1.0931, + "step": 10280 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017498649431296322, + "loss": 1.1059, + "step": 10285 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001749520053612809, + "loss": 1.1393, + "step": 10290 + }, + { + "epoch": 0.31, + "learning_rate": 0.000174917496052884, + "loss": 1.1346, + "step": 10295 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017488296639714515, + "loss": 1.0574, + "step": 10300 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017484841640344248, + "loss": 1.0793, + "step": 10305 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017481384608115965, + "loss": 1.0286, + "step": 10310 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017477925543968586, + "loss": 1.106, + "step": 10315 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001747446444884158, + "loss": 1.0845, + "step": 10320 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017471001323674968, + "loss": 1.0591, + "step": 10325 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017467536169409322, + "loss": 1.0897, + "step": 10330 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017464068986985774, + "loss": 0.9603, + "step": 10335 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001746059977734599, + "loss": 1.1038, + "step": 10340 + }, + { + "epoch": 0.31, + "learning_rate": 0.000174571285414322, + "loss": 1.1185, + "step": 10345 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001745365528018718, + "loss": 1.0566, + "step": 10350 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017450179994554253, + "loss": 1.0225, + "step": 10355 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017446702685477303, + "loss": 1.113, + "step": 10360 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001744322335390075, + "loss": 0.9604, + "step": 10365 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017439742000769566, + "loss": 1.0882, + "step": 10370 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001743625862702928, + "loss": 1.0483, + "step": 10375 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017432773233625961, + "loss": 0.9613, + "step": 10380 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017429285821506232, + "loss": 0.9571, + "step": 10385 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017425796391617264, + "loss": 1.1105, + "step": 10390 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001742230494490677, + "loss": 1.1211, + "step": 10395 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017418811482323022, + "loss": 1.0489, + "step": 10400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001741531600481483, + "loss": 1.0653, + "step": 10405 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017411818513331549, + "loss": 1.149, + "step": 10410 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017408319008823092, + "loss": 0.9796, + "step": 10415 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001740481749223991, + "loss": 1.0434, + "step": 10420 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017401313964533004, + "loss": 1.0935, + "step": 10425 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017397808426653923, + "loss": 1.0577, + "step": 10430 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001739430087955475, + "loss": 1.0501, + "step": 10435 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017390791324188137, + "loss": 1.0698, + "step": 10440 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017387279761507261, + "loss": 1.0553, + "step": 10445 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017383766192465846, + "loss": 1.1076, + "step": 10450 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001738025061801817, + "loss": 1.1294, + "step": 10455 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017376733039119055, + "loss": 1.093, + "step": 10460 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017373213456723855, + "loss": 1.094, + "step": 10465 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017369691871788487, + "loss": 1.047, + "step": 10470 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017366168285269393, + "loss": 0.9834, + "step": 10475 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001736264269812357, + "loss": 1.0605, + "step": 10480 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017359115111308557, + "loss": 1.0071, + "step": 10485 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017355585525782435, + "loss": 1.0752, + "step": 10490 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017352053942503828, + "loss": 0.9945, + "step": 10495 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017348520362431898, + "loss": 1.0059, + "step": 10500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001734498478652636, + "loss": 1.0507, + "step": 10505 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001734144721574746, + "loss": 1.117, + "step": 10510 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017337907651055989, + "loss": 1.1572, + "step": 10515 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017334366093413287, + "loss": 1.0091, + "step": 10520 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017330822543781225, + "loss": 1.0622, + "step": 10525 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017327277003122217, + "loss": 1.0245, + "step": 10530 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017323729472399228, + "loss": 1.0173, + "step": 10535 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001732017995257575, + "loss": 1.0354, + "step": 10540 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017316628444615818, + "loss": 1.1327, + "step": 10545 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017313074949484015, + "loss": 1.1031, + "step": 10550 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001730951946814546, + "loss": 1.035, + "step": 10555 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017305962001565802, + "loss": 0.9861, + "step": 10560 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017302402550711243, + "loss": 1.1012, + "step": 10565 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017298841116548515, + "loss": 1.0657, + "step": 10570 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017295277700044896, + "loss": 1.0602, + "step": 10575 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017291712302168192, + "loss": 1.0591, + "step": 10580 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017288144923886757, + "loss": 1.0226, + "step": 10585 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017284575566169481, + "loss": 1.0894, + "step": 10590 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017281004229985784, + "loss": 1.1207, + "step": 10595 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017277430916305634, + "loss": 1.0861, + "step": 10600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001727385562609953, + "loss": 1.0469, + "step": 10605 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017270278360338503, + "loss": 1.1543, + "step": 10610 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017266699119994136, + "loss": 1.0736, + "step": 10615 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001726311790603853, + "loss": 1.0649, + "step": 10620 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017259534719444339, + "loss": 1.0869, + "step": 10625 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017255949561184738, + "loss": 1.094, + "step": 10630 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017252362432233447, + "loss": 1.0225, + "step": 10635 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017248773333564715, + "loss": 1.0428, + "step": 10640 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017245182266153336, + "loss": 1.0885, + "step": 10645 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017241589230974626, + "loss": 1.0467, + "step": 10650 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017237994229004445, + "loss": 1.129, + "step": 10655 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001723439726121918, + "loss": 1.031, + "step": 10660 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017230798328595758, + "loss": 1.0598, + "step": 10665 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017227197432111635, + "loss": 1.1726, + "step": 10670 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017223594572744806, + "loss": 1.0586, + "step": 10675 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001721998975147379, + "loss": 1.0251, + "step": 10680 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001721638296927765, + "loss": 1.0656, + "step": 10685 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017212774227135972, + "loss": 1.1257, + "step": 10690 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017209163526028882, + "loss": 1.1391, + "step": 10695 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017205550866937036, + "loss": 1.0993, + "step": 10700 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017201936250841612, + "loss": 1.0638, + "step": 10705 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001719831967872433, + "loss": 1.0655, + "step": 10710 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017194701151567444, + "loss": 1.1308, + "step": 10715 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017191080670353733, + "loss": 1.0275, + "step": 10720 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017187458236066504, + "loss": 1.1002, + "step": 10725 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017183833849689596, + "loss": 1.1032, + "step": 10730 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017180207512207388, + "loss": 1.0557, + "step": 10735 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017176579224604778, + "loss": 1.1544, + "step": 10740 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001717294898786719, + "loss": 0.9852, + "step": 10745 + }, + { + "epoch": 0.32, + "learning_rate": 0.000171693168029806, + "loss": 1.0723, + "step": 10750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001716568267093148, + "loss": 1.1809, + "step": 10755 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001716204659270686, + "loss": 0.9814, + "step": 10760 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001715840856929428, + "loss": 1.0341, + "step": 10765 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017154768601681817, + "loss": 1.0411, + "step": 10770 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017151126690858072, + "loss": 0.991, + "step": 10775 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017147482837812184, + "loss": 1.0587, + "step": 10780 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017143837043533802, + "loss": 1.0361, + "step": 10785 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017140189309013115, + "loss": 1.0721, + "step": 10790 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017136539635240837, + "loss": 1.1646, + "step": 10795 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017132888023208207, + "loss": 1.0641, + "step": 10800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017129234473906986, + "loss": 1.0283, + "step": 10805 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017125578988329472, + "loss": 1.1348, + "step": 10810 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001712192156746848, + "loss": 1.0673, + "step": 10815 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017118262212317354, + "loss": 1.0256, + "step": 10820 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001711460092386996, + "loss": 1.027, + "step": 10825 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017110937703120692, + "loss": 1.0684, + "step": 10830 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017107272551064473, + "loss": 1.0521, + "step": 10835 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017103605468696742, + "loss": 1.0737, + "step": 10840 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001709993645701347, + "loss": 1.1187, + "step": 10845 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017096265517011142, + "loss": 1.0724, + "step": 10850 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001709259264968678, + "loss": 1.0554, + "step": 10855 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017088917856037918, + "loss": 1.071, + "step": 10860 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001708524113706262, + "loss": 1.0793, + "step": 10865 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017081562493759473, + "loss": 1.0794, + "step": 10870 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017077881927127578, + "loss": 1.0713, + "step": 10875 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017074199438166566, + "loss": 0.9661, + "step": 10880 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001707051502787659, + "loss": 1.1395, + "step": 10885 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017066828697258327, + "loss": 1.0334, + "step": 10890 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017063140447312969, + "loss": 1.0667, + "step": 10895 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017059450279042232, + "loss": 0.9435, + "step": 10900 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001705575819344835, + "loss": 1.1204, + "step": 10905 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017052064191534088, + "loss": 0.9949, + "step": 10910 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001704836827430272, + "loss": 1.0463, + "step": 10915 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017044670442758052, + "loss": 1.141, + "step": 10920 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017040970697904394, + "loss": 1.1366, + "step": 10925 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017037269040746583, + "loss": 1.0973, + "step": 10930 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017033565472289984, + "loss": 1.0018, + "step": 10935 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017029859993540473, + "loss": 1.0307, + "step": 10940 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017026152605504442, + "loss": 1.114, + "step": 10945 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017022443309188808, + "loss": 1.1354, + "step": 10950 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017018732105600999, + "loss": 0.9942, + "step": 10955 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017015018995748972, + "loss": 1.142, + "step": 10960 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001701130398064119, + "loss": 1.0744, + "step": 10965 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001700758706128664, + "loss": 1.0051, + "step": 10970 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017003868238694829, + "loss": 1.0617, + "step": 10975 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017000147513875774, + "loss": 1.1149, + "step": 10980 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016996424887840008, + "loss": 1.0244, + "step": 10985 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016992700361598585, + "loss": 1.1042, + "step": 10990 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016988973936163078, + "loss": 1.0517, + "step": 10995 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001698524561254557, + "loss": 1.0554, + "step": 11000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016981515391758666, + "loss": 1.0733, + "step": 11005 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016977783274815466, + "loss": 1.0433, + "step": 11010 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016974049262729617, + "loss": 1.0617, + "step": 11015 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016970313356515256, + "loss": 1.1643, + "step": 11020 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016966575557187042, + "loss": 1.0726, + "step": 11025 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016962835865760157, + "loss": 1.0293, + "step": 11030 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016959094283250278, + "loss": 1.1168, + "step": 11035 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016955350810673612, + "loss": 1.0544, + "step": 11040 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016951605449046876, + "loss": 1.0167, + "step": 11045 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016947858199387294, + "loss": 1.1325, + "step": 11050 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016944109062712608, + "loss": 1.1149, + "step": 11055 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016940358040041067, + "loss": 1.0613, + "step": 11060 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016936605132391442, + "loss": 1.0369, + "step": 11065 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016932850340783007, + "loss": 1.0442, + "step": 11070 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016929093666235548, + "loss": 1.0915, + "step": 11075 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016925335109769374, + "loss": 0.9899, + "step": 11080 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001692157467240529, + "loss": 1.1513, + "step": 11085 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001691781235516462, + "loss": 1.0328, + "step": 11090 + }, + { + "epoch": 0.33, + "learning_rate": 0.000169140481590692, + "loss": 1.1203, + "step": 11095 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001691028208514136, + "loss": 1.1032, + "step": 11100 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001690651413440397, + "loss": 1.0514, + "step": 11105 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016902744307880382, + "loss": 1.0804, + "step": 11110 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016898972606594473, + "loss": 1.0344, + "step": 11115 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016895199031570623, + "loss": 1.0962, + "step": 11120 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016891423583833724, + "loss": 1.075, + "step": 11125 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001688764626440917, + "loss": 1.0056, + "step": 11130 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016883867074322874, + "loss": 1.0226, + "step": 11135 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016880086014601248, + "loss": 1.0332, + "step": 11140 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001687630308627122, + "loss": 1.0638, + "step": 11145 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016872518290360214, + "loss": 1.0057, + "step": 11150 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016868731627896173, + "loss": 1.0786, + "step": 11155 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001686494309990754, + "loss": 1.0649, + "step": 11160 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016861152707423264, + "loss": 1.0194, + "step": 11165 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001685736045147281, + "loss": 1.1267, + "step": 11170 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016853566333086136, + "loss": 0.9675, + "step": 11175 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001684977035329371, + "loss": 1.1057, + "step": 11180 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016845972513126518, + "loss": 1.0367, + "step": 11185 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016842172813616029, + "loss": 1.0302, + "step": 11190 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016838371255794234, + "loss": 1.0392, + "step": 11195 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016834567840693623, + "loss": 1.0329, + "step": 11200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016830762569347191, + "loss": 0.9874, + "step": 11205 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016826955442788438, + "loss": 0.9987, + "step": 11210 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016823146462051364, + "loss": 1.0931, + "step": 11215 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016819335628170476, + "loss": 1.06, + "step": 11220 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016815522942180782, + "loss": 1.0694, + "step": 11225 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016811708405117806, + "loss": 1.097, + "step": 11230 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001680789201801755, + "loss": 1.0368, + "step": 11235 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016804073781916538, + "loss": 1.0322, + "step": 11240 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016800253697851785, + "loss": 1.0542, + "step": 11245 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016796431766860826, + "loss": 1.0591, + "step": 11250 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001679260798998167, + "loss": 1.032, + "step": 11255 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001678878236825285, + "loss": 0.9596, + "step": 11260 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001678495490271339, + "loss": 1.1499, + "step": 11265 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016781125594402817, + "loss": 1.0385, + "step": 11270 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016777294444361157, + "loss": 1.0725, + "step": 11275 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016773461453628943, + "loss": 1.0684, + "step": 11280 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016769626623247197, + "loss": 1.0652, + "step": 11285 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016765789954257447, + "loss": 0.9965, + "step": 11290 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016761951447701718, + "loss": 1.0424, + "step": 11295 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001675811110462254, + "loss": 1.0728, + "step": 11300 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016754268926062938, + "loss": 1.0953, + "step": 11305 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016750424913066427, + "loss": 1.1191, + "step": 11310 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016746579066677036, + "loss": 1.0527, + "step": 11315 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001674273138793928, + "loss": 1.0268, + "step": 11320 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016738881877898174, + "loss": 1.0805, + "step": 11325 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001673503053759924, + "loss": 1.032, + "step": 11330 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016731177368088483, + "loss": 0.9756, + "step": 11335 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001672732237041241, + "loss": 1.1145, + "step": 11340 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001672346554561803, + "loss": 1.1231, + "step": 11345 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001671960689475284, + "loss": 1.0336, + "step": 11350 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016715746418864837, + "loss": 1.0601, + "step": 11355 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016711884119002517, + "loss": 1.1045, + "step": 11360 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016708019996214863, + "loss": 1.0638, + "step": 11365 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001670415405155136, + "loss": 1.1021, + "step": 11370 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016700286286061987, + "loss": 1.0261, + "step": 11375 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016696416700797214, + "loss": 1.0588, + "step": 11380 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001669254529680801, + "loss": 1.0451, + "step": 11385 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016688672075145831, + "loss": 1.0668, + "step": 11390 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016684797036862635, + "loss": 1.0468, + "step": 11395 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016680920183010872, + "loss": 1.0722, + "step": 11400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016677041514643476, + "loss": 1.0765, + "step": 11405 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001667316103281388, + "loss": 1.162, + "step": 11410 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016669278738576019, + "loss": 1.0987, + "step": 11415 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016665394632984302, + "loss": 1.1128, + "step": 11420 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001666150871709364, + "loss": 1.0165, + "step": 11425 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001665762099195944, + "loss": 1.0056, + "step": 11430 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016653731458637593, + "loss": 0.9907, + "step": 11435 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001664984011818448, + "loss": 1.0796, + "step": 11440 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016645946971656985, + "loss": 1.0061, + "step": 11445 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016642052020112463, + "loss": 1.0139, + "step": 11450 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016638155264608772, + "loss": 1.0964, + "step": 11455 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016634256706204265, + "loss": 1.0438, + "step": 11460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016630356345957769, + "loss": 1.1069, + "step": 11465 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016626454184928612, + "loss": 1.0651, + "step": 11470 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016622550224176612, + "loss": 1.1476, + "step": 11475 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016618644464762064, + "loss": 1.0643, + "step": 11480 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016614736907745768, + "loss": 1.0212, + "step": 11485 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001661082755418899, + "loss": 1.1331, + "step": 11490 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016606916405153513, + "loss": 1.0847, + "step": 11495 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016603003461701583, + "loss": 1.0255, + "step": 11500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016599088724895947, + "loss": 1.0759, + "step": 11505 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016595172195799828, + "loss": 1.0984, + "step": 11510 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001659125387547695, + "loss": 1.073, + "step": 11515 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016587333764991508, + "loss": 1.0789, + "step": 11520 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016583411865408196, + "loss": 1.0268, + "step": 11525 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016579488177792187, + "loss": 1.0709, + "step": 11530 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016575562703209144, + "loss": 1.0498, + "step": 11535 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016571635442725208, + "loss": 1.0779, + "step": 11540 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016567706397407016, + "loss": 1.0348, + "step": 11545 + }, + { + "epoch": 0.34, + "learning_rate": 0.00016563775568321677, + "loss": 1.0354, + "step": 11550 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016559842956536799, + "loss": 1.0427, + "step": 11555 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016555908563120457, + "loss": 1.1559, + "step": 11560 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016551972389141227, + "loss": 1.0096, + "step": 11565 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016548034435668154, + "loss": 0.9997, + "step": 11570 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001654409470377078, + "loss": 1.0042, + "step": 11575 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016540153194519115, + "loss": 1.0752, + "step": 11580 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016536209908983668, + "loss": 1.0325, + "step": 11585 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016532264848235416, + "loss": 1.1081, + "step": 11590 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016528318013345832, + "loss": 1.0106, + "step": 11595 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016524369405386853, + "loss": 1.092, + "step": 11600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016520419025430915, + "loss": 1.0487, + "step": 11605 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016516466874550925, + "loss": 1.1423, + "step": 11610 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016512512953820272, + "loss": 1.0689, + "step": 11615 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016508557264312835, + "loss": 1.0832, + "step": 11620 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016504599807102958, + "loss": 1.0373, + "step": 11625 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001650064058326548, + "loss": 1.0593, + "step": 11630 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016496679593875705, + "loss": 1.0384, + "step": 11635 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001649271684000943, + "loss": 1.041, + "step": 11640 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001648875232274293, + "loss": 1.0803, + "step": 11645 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016484786043152947, + "loss": 1.0552, + "step": 11650 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016480818002316712, + "loss": 1.0733, + "step": 11655 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016476848201311928, + "loss": 1.0528, + "step": 11660 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016472876641216786, + "loss": 1.1617, + "step": 11665 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016468903323109947, + "loss": 1.0528, + "step": 11670 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016464928248070547, + "loss": 1.0346, + "step": 11675 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016460951417178205, + "loss": 1.0367, + "step": 11680 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016456972831513017, + "loss": 1.0163, + "step": 11685 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016452992492155553, + "loss": 1.1528, + "step": 11690 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001644901040018686, + "loss": 1.0737, + "step": 11695 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001644502655668846, + "loss": 1.2226, + "step": 11700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001644104096274235, + "loss": 1.0894, + "step": 11705 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016437053619431006, + "loss": 1.1269, + "step": 11710 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016433064527837375, + "loss": 1.014, + "step": 11715 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001642907368904489, + "loss": 1.0894, + "step": 11720 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016425081104137434, + "loss": 1.0442, + "step": 11725 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016421086774199396, + "loss": 1.0189, + "step": 11730 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016417090700315606, + "loss": 1.0383, + "step": 11735 + }, + { + "epoch": 0.35, + "learning_rate": 0.000164130928835714, + "loss": 1.0017, + "step": 11740 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016409093325052562, + "loss": 1.0848, + "step": 11745 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016405092025845362, + "loss": 1.05, + "step": 11750 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016401088987036538, + "loss": 1.0706, + "step": 11755 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016397084209713304, + "loss": 1.0387, + "step": 11760 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016393077694963344, + "loss": 1.0628, + "step": 11765 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016389069443874813, + "loss": 1.0059, + "step": 11770 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016385059457536336, + "loss": 1.0952, + "step": 11775 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001638104773703702, + "loss": 1.1202, + "step": 11780 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016377034283466423, + "loss": 0.9865, + "step": 11785 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016373019097914594, + "loss": 0.9898, + "step": 11790 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016369002181472047, + "loss": 1.046, + "step": 11795 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016364983535229754, + "loss": 1.0893, + "step": 11800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001636096316027917, + "loss": 1.0112, + "step": 11805 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016356941057712218, + "loss": 1.0174, + "step": 11810 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016352917228621284, + "loss": 1.0089, + "step": 11815 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001634889167409923, + "loss": 1.022, + "step": 11820 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016344864395239378, + "loss": 1.0968, + "step": 11825 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016340835393135528, + "loss": 1.1273, + "step": 11830 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016336804668881945, + "loss": 1.1258, + "step": 11835 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016332772223573352, + "loss": 0.9953, + "step": 11840 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016328738058304956, + "loss": 1.0602, + "step": 11845 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016324702174172416, + "loss": 1.0642, + "step": 11850 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016320664572271873, + "loss": 1.0208, + "step": 11855 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001631662525369992, + "loss": 1.0466, + "step": 11860 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016312584219553626, + "loss": 1.1004, + "step": 11865 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001630854147093052, + "loss": 1.1379, + "step": 11870 + }, + { + "epoch": 0.35, + "learning_rate": 0.000163044970089286, + "loss": 1.0261, + "step": 11875 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016300450834646327, + "loss": 1.0207, + "step": 11880 + }, + { + "epoch": 0.35, + "learning_rate": 0.00016296402949182628, + "loss": 1.1076, + "step": 11885 + }, + { + "epoch": 0.36, + "learning_rate": 0.000162923533536369, + "loss": 1.0206, + "step": 11890 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001628830204910899, + "loss": 1.0338, + "step": 11895 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016284249036699228, + "loss": 1.0762, + "step": 11900 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016280194317508398, + "loss": 1.0633, + "step": 11905 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016276137892637743, + "loss": 1.0292, + "step": 11910 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001627207976318898, + "loss": 1.049, + "step": 11915 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001626801993026428, + "loss": 1.0657, + "step": 11920 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001626395839496628, + "loss": 1.0183, + "step": 11925 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001625989515839808, + "loss": 1.0882, + "step": 11930 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016255830221663243, + "loss": 1.1033, + "step": 11935 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016251763585865793, + "loss": 1.0454, + "step": 11940 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001624769525211021, + "loss": 1.1028, + "step": 11945 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001624362522150144, + "loss": 1.0967, + "step": 11950 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016239553495144896, + "loss": 1.1508, + "step": 11955 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001623548007414644, + "loss": 0.979, + "step": 11960 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016231404959612405, + "loss": 1.0233, + "step": 11965 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001622732815264957, + "loss": 1.0314, + "step": 11970 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001622324965436519, + "loss": 1.0959, + "step": 11975 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016219169465866965, + "loss": 1.0261, + "step": 11980 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016215087588263068, + "loss": 1.0652, + "step": 11985 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016211004022662117, + "loss": 1.1337, + "step": 11990 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016206918770173196, + "loss": 1.0871, + "step": 11995 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001620283183190585, + "loss": 1.0926, + "step": 12000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001619874320897007, + "loss": 0.9973, + "step": 12005 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001619465290247632, + "loss": 1.0675, + "step": 12010 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016190560913535514, + "loss": 1.0662, + "step": 12015 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001618646724325902, + "loss": 1.0407, + "step": 12020 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016182371892758658, + "loss": 1.0701, + "step": 12025 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016178274863146722, + "loss": 1.1339, + "step": 12030 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016174176155535946, + "loss": 1.0564, + "step": 12035 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016170075771039527, + "loss": 1.0982, + "step": 12040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001616597371077112, + "loss": 1.0472, + "step": 12045 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016161869975844822, + "loss": 1.112, + "step": 12050 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016157764567375197, + "loss": 1.0499, + "step": 12055 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016153657486477264, + "loss": 1.1265, + "step": 12060 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001614954873426649, + "loss": 1.0425, + "step": 12065 + }, + { + "epoch": 0.36, + "learning_rate": 0.000161454383118588, + "loss": 1.0358, + "step": 12070 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016141326220370564, + "loss": 1.1033, + "step": 12075 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016137212460918624, + "loss": 1.0941, + "step": 12080 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016133097034620253, + "loss": 1.0244, + "step": 12085 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016128979942593193, + "loss": 1.0387, + "step": 12090 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016124861185955628, + "loss": 1.091, + "step": 12095 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016120740765826206, + "loss": 1.0611, + "step": 12100 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001611661868332401, + "loss": 1.027, + "step": 12105 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016112494939568589, + "loss": 1.0849, + "step": 12110 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001610836953567994, + "loss": 1.0287, + "step": 12115 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016104242472778503, + "loss": 1.0451, + "step": 12120 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001610011375198518, + "loss": 1.0043, + "step": 12125 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016095983374421314, + "loss": 1.0372, + "step": 12130 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016091851341208706, + "loss": 1.0414, + "step": 12135 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016087717653469598, + "loss": 1.1234, + "step": 12140 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016083582312326685, + "loss": 1.07, + "step": 12145 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001607944531890312, + "loss": 1.0498, + "step": 12150 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016075306674322488, + "loss": 1.066, + "step": 12155 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016071166379708833, + "loss": 1.0404, + "step": 12160 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016067024436186648, + "loss": 1.062, + "step": 12165 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001606288084488087, + "loss": 1.1035, + "step": 12170 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016058735606916882, + "loss": 1.0286, + "step": 12175 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016054588723420521, + "loss": 1.0172, + "step": 12180 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016050440195518063, + "loss": 1.0529, + "step": 12185 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016046290024336236, + "loss": 1.0007, + "step": 12190 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016042138211002215, + "loss": 1.0681, + "step": 12195 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001603798475664361, + "loss": 1.0855, + "step": 12200 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016033829662388498, + "loss": 1.0965, + "step": 12205 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001602967292936538, + "loss": 1.0717, + "step": 12210 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001602551455870321, + "loss": 1.0609, + "step": 12215 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016021354551531394, + "loss": 0.9945, + "step": 12220 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016017192908979774, + "loss": 1.0446, + "step": 12225 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016013029632178634, + "loss": 1.0, + "step": 12230 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001600886472225871, + "loss": 1.0896, + "step": 12235 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016004698180351176, + "loss": 1.0985, + "step": 12240 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001600053000758765, + "loss": 1.0819, + "step": 12245 + }, + { + "epoch": 0.37, + "learning_rate": 0.000159963602051002, + "loss": 1.0969, + "step": 12250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015992188774021325, + "loss": 1.0584, + "step": 12255 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015988015715483967, + "loss": 1.0449, + "step": 12260 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015983841030621526, + "loss": 1.0808, + "step": 12265 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001597966472056782, + "loss": 1.1722, + "step": 12270 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015975486786457137, + "loss": 1.0571, + "step": 12275 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015971307229424174, + "loss": 1.0479, + "step": 12280 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015967126050604093, + "loss": 1.1316, + "step": 12285 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015962943251132488, + "loss": 1.1198, + "step": 12290 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015958758832145388, + "loss": 0.9917, + "step": 12295 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015954572794779273, + "loss": 1.0472, + "step": 12300 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015950385140171057, + "loss": 1.0713, + "step": 12305 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015946195869458093, + "loss": 1.0398, + "step": 12310 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015942004983778168, + "loss": 1.06, + "step": 12315 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015937812484269513, + "loss": 1.044, + "step": 12320 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015933618372070805, + "loss": 0.9675, + "step": 12325 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001592942264832114, + "loss": 1.0668, + "step": 12330 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001592522531416007, + "loss": 0.9494, + "step": 12335 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015921026370727573, + "loss": 1.0085, + "step": 12340 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001591682581916407, + "loss": 1.0809, + "step": 12345 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001591262366061042, + "loss": 1.0572, + "step": 12350 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001590841989620791, + "loss": 1.0494, + "step": 12355 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015904214527098272, + "loss": 1.0348, + "step": 12360 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015900007554423665, + "loss": 1.1366, + "step": 12365 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015895798979326692, + "loss": 1.0334, + "step": 12370 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015891588802950394, + "loss": 1.1262, + "step": 12375 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015887377026438227, + "loss": 1.0053, + "step": 12380 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001588316365093411, + "loss": 0.9844, + "step": 12385 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015878948677582372, + "loss": 1.0509, + "step": 12390 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001587473210752779, + "loss": 1.0638, + "step": 12395 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001587051394191557, + "loss": 1.0847, + "step": 12400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001586629418189135, + "loss": 1.0602, + "step": 12405 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015862072828601203, + "loss": 1.1459, + "step": 12410 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015857849883191635, + "loss": 1.1086, + "step": 12415 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015853625346809585, + "loss": 1.047, + "step": 12420 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015849399220602424, + "loss": 1.0071, + "step": 12425 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015845171505717947, + "loss": 1.0305, + "step": 12430 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015840942203304396, + "loss": 0.9619, + "step": 12435 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001583671131451043, + "loss": 1.0425, + "step": 12440 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015832478840485147, + "loss": 1.1414, + "step": 12445 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001582824478237807, + "loss": 1.0897, + "step": 12450 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015824009141339156, + "loss": 1.1626, + "step": 12455 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015819771918518794, + "loss": 1.1347, + "step": 12460 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015815533115067794, + "loss": 1.1648, + "step": 12465 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001581129273213741, + "loss": 1.0929, + "step": 12470 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015807050770879304, + "loss": 1.0203, + "step": 12475 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015802807232445586, + "loss": 1.0773, + "step": 12480 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015798562117988785, + "loss": 1.1076, + "step": 12485 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001579431542866186, + "loss": 1.0848, + "step": 12490 + }, + { + "epoch": 0.37, + "learning_rate": 0.000157900671656182, + "loss": 1.0672, + "step": 12495 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015785817330011615, + "loss": 1.0444, + "step": 12500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001578156592299635, + "loss": 1.0385, + "step": 12505 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015777312945727072, + "loss": 1.1397, + "step": 12510 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001577305839935887, + "loss": 1.0314, + "step": 12515 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015768802285047272, + "loss": 1.061, + "step": 12520 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015764544603948224, + "loss": 1.0831, + "step": 12525 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001576028535721809, + "loss": 1.0409, + "step": 12530 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015756024546013676, + "loss": 1.0264, + "step": 12535 + }, + { + "epoch": 0.37, + "learning_rate": 0.000157517621714922, + "loss": 1.0486, + "step": 12540 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001574749823481131, + "loss": 1.1393, + "step": 12545 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001574323273712908, + "loss": 0.9918, + "step": 12550 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015738965679603997, + "loss": 0.9591, + "step": 12555 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015734697063394986, + "loss": 1.0308, + "step": 12560 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015730426889661386, + "loss": 1.1057, + "step": 12565 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015726155159562964, + "loss": 1.0179, + "step": 12570 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015721881874259904, + "loss": 1.033, + "step": 12575 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001571760703491282, + "loss": 1.0896, + "step": 12580 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015713330642682744, + "loss": 1.078, + "step": 12585 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015709052698731127, + "loss": 1.0376, + "step": 12590 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015704773204219848, + "loss": 1.0438, + "step": 12595 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015700492160311197, + "loss": 0.9981, + "step": 12600 + }, + { + "epoch": 0.38, + "learning_rate": 0.000156962095681679, + "loss": 1.1354, + "step": 12605 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015691925428953089, + "loss": 1.1314, + "step": 12610 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015687639743830324, + "loss": 1.1792, + "step": 12615 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001568335251396358, + "loss": 1.0431, + "step": 12620 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015679063740517256, + "loss": 1.0919, + "step": 12625 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001567477342465617, + "loss": 1.0962, + "step": 12630 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001567048156754556, + "loss": 1.0615, + "step": 12635 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015666188170351068, + "loss": 1.0596, + "step": 12640 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001566189323423878, + "loss": 1.0684, + "step": 12645 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015657596760375179, + "loss": 1.0383, + "step": 12650 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015653298749927178, + "loss": 1.0603, + "step": 12655 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015648999204062098, + "loss": 1.0703, + "step": 12660 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015644698123947682, + "loss": 1.062, + "step": 12665 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015640395510752093, + "loss": 1.1088, + "step": 12670 + }, + { + "epoch": 0.38, + "learning_rate": 0.000156360913656439, + "loss": 1.084, + "step": 12675 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015631785689792098, + "loss": 1.0751, + "step": 12680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001562747848436609, + "loss": 1.1549, + "step": 12685 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015623169750535708, + "loss": 0.9869, + "step": 12690 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001561885948947118, + "loss": 1.124, + "step": 12695 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015614547702343166, + "loss": 0.9965, + "step": 12700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001561023439032273, + "loss": 1.0271, + "step": 12705 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001560591955458135, + "loss": 1.0701, + "step": 12710 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015601603196290924, + "loss": 1.1278, + "step": 12715 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015597285316623757, + "loss": 1.1064, + "step": 12720 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001559296591675258, + "loss": 1.1004, + "step": 12725 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015588644997850519, + "loss": 1.0756, + "step": 12730 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015584322561091124, + "loss": 0.9904, + "step": 12735 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015579998607648352, + "loss": 1.0268, + "step": 12740 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015575673138696578, + "loss": 1.1451, + "step": 12745 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001557134615541058, + "loss": 1.1465, + "step": 12750 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015567017658965564, + "loss": 1.1158, + "step": 12755 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001556268765053712, + "loss": 1.0284, + "step": 12760 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015558356131301272, + "loss": 1.1098, + "step": 12765 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015554023102434445, + "loss": 1.0548, + "step": 12770 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015549688565113476, + "loss": 1.0587, + "step": 12775 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001554535252051561, + "loss": 1.1288, + "step": 12780 + }, + { + "epoch": 0.38, + "learning_rate": 0.000155410149698185, + "loss": 1.0819, + "step": 12785 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015536675914200213, + "loss": 1.0428, + "step": 12790 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001553233535483922, + "loss": 1.077, + "step": 12795 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015527993292914407, + "loss": 1.0254, + "step": 12800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001552364972960506, + "loss": 1.0026, + "step": 12805 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015519304666090876, + "loss": 1.0738, + "step": 12810 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001551495810355196, + "loss": 1.1154, + "step": 12815 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015510610043168824, + "loss": 1.0234, + "step": 12820 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015506260486122385, + "loss": 1.078, + "step": 12825 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015501909433593972, + "loss": 1.0976, + "step": 12830 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015497556886765316, + "loss": 1.0619, + "step": 12835 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015493202846818553, + "loss": 1.0863, + "step": 12840 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015488847314936224, + "loss": 1.0133, + "step": 12845 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001548449029230128, + "loss": 0.9726, + "step": 12850 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001548013178009707, + "loss": 1.0892, + "step": 12855 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015475771779507356, + "loss": 1.0856, + "step": 12860 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015471410291716293, + "loss": 1.0923, + "step": 12865 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015467047317908455, + "loss": 1.0328, + "step": 12870 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015462682859268803, + "loss": 0.9618, + "step": 12875 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015458316916982715, + "loss": 1.0252, + "step": 12880 + }, + { + "epoch": 0.38, + "learning_rate": 0.00015453949492235964, + "loss": 0.9738, + "step": 12885 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001544958058621473, + "loss": 1.1019, + "step": 12890 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015445210200105592, + "loss": 1.0605, + "step": 12895 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001544083833509553, + "loss": 1.0122, + "step": 12900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015436464992371932, + "loss": 0.9601, + "step": 12905 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001543209017312258, + "loss": 1.0791, + "step": 12910 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015427713878535663, + "loss": 1.1111, + "step": 12915 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015423336109799765, + "loss": 1.1112, + "step": 12920 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015418956868103878, + "loss": 1.0873, + "step": 12925 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015414576154637385, + "loss": 1.1185, + "step": 12930 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015410193970590073, + "loss": 1.0208, + "step": 12935 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015405810317152134, + "loss": 1.024, + "step": 12940 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015401425195514146, + "loss": 1.0212, + "step": 12945 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015397038606867103, + "loss": 1.0881, + "step": 12950 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015392650552402374, + "loss": 1.0419, + "step": 12955 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015388261033311753, + "loss": 1.04, + "step": 12960 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015383870050787412, + "loss": 1.0422, + "step": 12965 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015379477606021933, + "loss": 1.0576, + "step": 12970 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015375083700208282, + "loss": 1.065, + "step": 12975 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015370688334539833, + "loss": 1.0505, + "step": 12980 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015366291510210354, + "loss": 1.1005, + "step": 12985 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015361893228414003, + "loss": 1.0852, + "step": 12990 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015357493490345342, + "loss": 1.2097, + "step": 12995 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015353092297199327, + "loss": 1.0543, + "step": 13000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015348689650171305, + "loss": 1.0732, + "step": 13005 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015344285550457018, + "loss": 1.093, + "step": 13010 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015339879999252606, + "loss": 1.0503, + "step": 13015 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001533547299775461, + "loss": 1.1156, + "step": 13020 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015331064547159944, + "loss": 1.1332, + "step": 13025 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015326654648665938, + "loss": 0.9897, + "step": 13030 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015322243303470302, + "loss": 0.9512, + "step": 13035 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015317830512771142, + "loss": 1.1189, + "step": 13040 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001531341627776696, + "loss": 0.9897, + "step": 13045 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015309000599656646, + "loss": 1.0516, + "step": 13050 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015304583479639478, + "loss": 0.98, + "step": 13055 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015300164918915138, + "loss": 1.0644, + "step": 13060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015295744918683696, + "loss": 1.0332, + "step": 13065 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015291323480145597, + "loss": 1.0145, + "step": 13070 + }, + { + "epoch": 0.39, + "learning_rate": 0.000152869006045017, + "loss": 0.9817, + "step": 13075 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015282476292953237, + "loss": 1.0523, + "step": 13080 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001527805054670184, + "loss": 1.0293, + "step": 13085 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015273623366949523, + "loss": 1.1496, + "step": 13090 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015269194754898697, + "loss": 1.0444, + "step": 13095 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015264764711752157, + "loss": 1.0438, + "step": 13100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015260333238713088, + "loss": 1.0975, + "step": 13105 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015255900336985062, + "loss": 1.0362, + "step": 13110 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015251466007772045, + "loss": 1.0738, + "step": 13115 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015247030252278377, + "loss": 0.9671, + "step": 13120 + }, + { + "epoch": 0.39, + "learning_rate": 0.000152425930717088, + "loss": 1.0807, + "step": 13125 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001523815446726844, + "loss": 1.0393, + "step": 13130 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015233714440162808, + "loss": 1.0638, + "step": 13135 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001522927299159779, + "loss": 1.0394, + "step": 13140 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015224830122779679, + "loss": 1.0758, + "step": 13145 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001522038583491514, + "loss": 1.059, + "step": 13150 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015215940129211224, + "loss": 1.0288, + "step": 13155 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001521149300687537, + "loss": 1.069, + "step": 13160 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001520704446911541, + "loss": 1.0558, + "step": 13165 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015202594517139544, + "loss": 1.0036, + "step": 13170 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001519814315215637, + "loss": 1.076, + "step": 13175 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015193690375374852, + "loss": 1.0702, + "step": 13180 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015189236188004363, + "loss": 1.0632, + "step": 13185 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015184780591254637, + "loss": 1.0536, + "step": 13190 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015180323586335806, + "loss": 1.0451, + "step": 13195 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015175865174458367, + "loss": 1.0509, + "step": 13200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001517140535683322, + "loss": 1.0305, + "step": 13205 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015166944134671634, + "loss": 1.0793, + "step": 13210 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015162481509185257, + "loss": 1.0426, + "step": 13215 + }, + { + "epoch": 0.39, + "learning_rate": 0.00015158017481586134, + "loss": 1.0461, + "step": 13220 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001515355205308667, + "loss": 1.1281, + "step": 13225 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001514908522489966, + "loss": 1.0555, + "step": 13230 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001514461699823829, + "loss": 1.0629, + "step": 13235 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015140147374316106, + "loss": 1.0857, + "step": 13240 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015135676354347045, + "loss": 1.0637, + "step": 13245 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015131203939545423, + "loss": 1.0243, + "step": 13250 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001512673013112593, + "loss": 1.0476, + "step": 13255 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015122254930303642, + "loss": 1.0002, + "step": 13260 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015117778338294, + "loss": 1.0315, + "step": 13265 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015113300356312843, + "loss": 1.0639, + "step": 13270 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001510882098557637, + "loss": 1.0171, + "step": 13275 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015104340227301162, + "loss": 1.0584, + "step": 13280 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015099858082704182, + "loss": 1.0284, + "step": 13285 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015095374553002758, + "loss": 1.0266, + "step": 13290 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001509088963941461, + "loss": 1.0756, + "step": 13295 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001508640334315782, + "loss": 1.09, + "step": 13300 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015081915665450855, + "loss": 1.0361, + "step": 13305 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015077426607512553, + "loss": 1.0428, + "step": 13310 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001507293617056212, + "loss": 1.0335, + "step": 13315 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015068444355819158, + "loss": 1.0661, + "step": 13320 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015063951164503615, + "loss": 1.0256, + "step": 13325 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015059456597835835, + "loss": 1.0328, + "step": 13330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015054960657036518, + "loss": 1.0321, + "step": 13335 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001505046334332676, + "loss": 1.0711, + "step": 13340 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015045964657928006, + "loss": 0.9869, + "step": 13345 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015041464602062087, + "loss": 1.1421, + "step": 13350 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015036963176951205, + "loss": 1.0609, + "step": 13355 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001503246038381793, + "loss": 1.0057, + "step": 13360 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015027956223885206, + "loss": 0.974, + "step": 13365 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001502345069837635, + "loss": 1.0747, + "step": 13370 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001501894380851504, + "loss": 1.004, + "step": 13375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015014435555525343, + "loss": 1.2044, + "step": 13380 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001500992594063168, + "loss": 1.046, + "step": 13385 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015005414965058847, + "loss": 1.0737, + "step": 13390 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015000902630032007, + "loss": 1.0965, + "step": 13395 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014996388936776701, + "loss": 1.0508, + "step": 13400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014991873886518832, + "loss": 0.9723, + "step": 13405 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014987357480484673, + "loss": 1.1091, + "step": 13410 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001498283971990086, + "loss": 1.0224, + "step": 13415 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014978320605994405, + "loss": 1.0962, + "step": 13420 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014973800139992684, + "loss": 1.0474, + "step": 13425 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014969278323123444, + "loss": 1.0073, + "step": 13430 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014964755156614786, + "loss": 1.0044, + "step": 13435 + }, + { + "epoch": 0.4, + "learning_rate": 0.000149602306416952, + "loss": 1.0293, + "step": 13440 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014955704779593516, + "loss": 1.029, + "step": 13445 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001495117757153895, + "loss": 1.0528, + "step": 13450 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014946649018761078, + "loss": 1.0178, + "step": 13455 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014942119122489836, + "loss": 1.0568, + "step": 13460 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014937587883955534, + "loss": 1.0409, + "step": 13465 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001493305530438883, + "loss": 1.0719, + "step": 13470 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001492852138502077, + "loss": 1.0127, + "step": 13475 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014923986127082746, + "loss": 1.1046, + "step": 13480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001491944953180652, + "loss": 1.0061, + "step": 13485 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014914911600424213, + "loss": 1.0597, + "step": 13490 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014910372334168314, + "loss": 1.048, + "step": 13495 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014905831734271676, + "loss": 0.9974, + "step": 13500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014901289801967505, + "loss": 1.0682, + "step": 13505 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001489674653848938, + "loss": 1.0801, + "step": 13510 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014892201945071234, + "loss": 1.0247, + "step": 13515 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001488765602294736, + "loss": 1.034, + "step": 13520 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001488310877335242, + "loss": 1.0562, + "step": 13525 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001487856019752143, + "loss": 1.0554, + "step": 13530 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001487401029668977, + "loss": 1.0207, + "step": 13535 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014869459072093172, + "loss": 1.047, + "step": 13540 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001486490652496774, + "loss": 1.0301, + "step": 13545 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014860352656549924, + "loss": 0.9327, + "step": 13550 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014855797468076552, + "loss": 1.0279, + "step": 13555 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014851240960784783, + "loss": 1.0293, + "step": 13560 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014846683135912158, + "loss": 1.1313, + "step": 13565 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014842123994696565, + "loss": 1.0193, + "step": 13570 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014837563538376252, + "loss": 1.0356, + "step": 13575 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001483300176818982, + "loss": 1.0761, + "step": 13580 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014828438685376237, + "loss": 0.9864, + "step": 13585 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014823874291174812, + "loss": 0.9976, + "step": 13590 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001481930858682523, + "loss": 1.0158, + "step": 13595 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014814741573567514, + "loss": 1.0162, + "step": 13600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014810173252642055, + "loss": 1.019, + "step": 13605 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014805603625289585, + "loss": 1.0016, + "step": 13610 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001480103269275121, + "loss": 1.0892, + "step": 13615 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001479646045626837, + "loss": 1.0501, + "step": 13620 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014791886917082876, + "loss": 1.0575, + "step": 13625 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014787312076436883, + "loss": 1.0397, + "step": 13630 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014782735935572904, + "loss": 1.0976, + "step": 13635 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014778158495733806, + "loss": 1.0846, + "step": 13640 + }, + { + "epoch": 0.41, + "learning_rate": 0.000147735797581628, + "loss": 1.059, + "step": 13645 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014768999724103457, + "loss": 1.0423, + "step": 13650 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014764418394799705, + "loss": 1.0667, + "step": 13655 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014759835771495814, + "loss": 1.0948, + "step": 13660 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014755251855436407, + "loss": 1.0605, + "step": 13665 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014750666647866465, + "loss": 1.0129, + "step": 13670 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014746080150031311, + "loss": 1.0129, + "step": 13675 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014741492363176627, + "loss": 1.0827, + "step": 13680 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014736903288548433, + "loss": 1.0618, + "step": 13685 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014732312927393112, + "loss": 1.1006, + "step": 13690 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014727721280957388, + "loss": 0.9995, + "step": 13695 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014723128350488344, + "loss": 1.1303, + "step": 13700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014718534137233395, + "loss": 1.0614, + "step": 13705 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001471393864244032, + "loss": 1.1748, + "step": 13710 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014709341867357237, + "loss": 1.1271, + "step": 13715 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014704743813232619, + "loss": 1.0585, + "step": 13720 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014700144481315274, + "loss": 1.0674, + "step": 13725 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014695543872854375, + "loss": 1.0323, + "step": 13730 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014690941989099426, + "loss": 1.1482, + "step": 13735 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014686338831300283, + "loss": 1.1189, + "step": 13740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014681734400707151, + "loss": 1.0277, + "step": 13745 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014677128698570576, + "loss": 1.0566, + "step": 13750 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014672521726141454, + "loss": 1.1427, + "step": 13755 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014667913484671022, + "loss": 1.1434, + "step": 13760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014663303975410863, + "loss": 1.0007, + "step": 13765 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014658693199612902, + "loss": 1.1359, + "step": 13770 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001465408115852941, + "loss": 1.067, + "step": 13775 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014649467853413008, + "loss": 1.0032, + "step": 13780 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001464485328551665, + "loss": 1.0022, + "step": 13785 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014640237456093634, + "loss": 1.0932, + "step": 13790 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001463562036639761, + "loss": 1.0174, + "step": 13795 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014631002017682565, + "loss": 1.0359, + "step": 13800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014626382411202818, + "loss": 1.0693, + "step": 13805 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014621761548213048, + "loss": 1.0296, + "step": 13810 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014617139429968262, + "loss": 1.0243, + "step": 13815 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014612516057723813, + "loss": 1.0603, + "step": 13820 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001460789143273539, + "loss": 1.0186, + "step": 13825 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014603265556259034, + "loss": 1.045, + "step": 13830 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014598638429551107, + "loss": 1.0088, + "step": 13835 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014594010053868328, + "loss": 1.0959, + "step": 13840 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014589380430467746, + "loss": 1.095, + "step": 13845 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014584749560606755, + "loss": 1.0814, + "step": 13850 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014580117445543077, + "loss": 0.9897, + "step": 13855 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014575484086534786, + "loss": 1.0571, + "step": 13860 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014570849484840282, + "loss": 1.0434, + "step": 13865 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014566213641718307, + "loss": 1.0236, + "step": 13870 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014561576558427946, + "loss": 1.0775, + "step": 13875 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001455693823622861, + "loss": 1.0416, + "step": 13880 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014552298676380056, + "loss": 1.1144, + "step": 13885 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001454765788014237, + "loss": 1.0046, + "step": 13890 + }, + { + "epoch": 0.41, + "learning_rate": 0.00014543015848775975, + "loss": 1.0213, + "step": 13895 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014538372583541636, + "loss": 1.0347, + "step": 13900 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014533728085700448, + "loss": 1.0445, + "step": 13905 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014529082356513835, + "loss": 1.0752, + "step": 13910 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014524435397243567, + "loss": 1.0793, + "step": 13915 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001451978720915174, + "loss": 1.0458, + "step": 13920 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001451513779350079, + "loss": 1.0832, + "step": 13925 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001451048715155348, + "loss": 1.1649, + "step": 13930 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001450583528457291, + "loss": 1.1211, + "step": 13935 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001450118219382251, + "loss": 1.0345, + "step": 13940 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014496527880566044, + "loss": 1.1002, + "step": 13945 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001449187234606761, + "loss": 1.0623, + "step": 13950 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014487215591591635, + "loss": 1.0251, + "step": 13955 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014482557618402875, + "loss": 0.9953, + "step": 13960 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014477898427766426, + "loss": 1.0456, + "step": 13965 + }, + { + "epoch": 0.42, + "learning_rate": 0.000144732380209477, + "loss": 1.1081, + "step": 13970 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014468576399212458, + "loss": 1.1075, + "step": 13975 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014463913563826772, + "loss": 1.0408, + "step": 13980 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001445924951605706, + "loss": 1.0718, + "step": 13985 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014454584257170055, + "loss": 1.0414, + "step": 13990 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001444991778843283, + "loss": 1.1168, + "step": 13995 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014445250111112782, + "loss": 1.0559, + "step": 14000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014440581226477634, + "loss": 1.068, + "step": 14005 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014435911135795442, + "loss": 1.092, + "step": 14010 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014431239840334586, + "loss": 1.015, + "step": 14015 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001442656734136378, + "loss": 1.0918, + "step": 14020 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014421893640152047, + "loss": 1.003, + "step": 14025 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001441721873796876, + "loss": 0.9942, + "step": 14030 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014412542636083598, + "loss": 1.0676, + "step": 14035 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001440786533576658, + "loss": 1.0617, + "step": 14040 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014403186838288047, + "loss": 1.0472, + "step": 14045 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014398507144918656, + "loss": 1.0381, + "step": 14050 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014393826256929406, + "loss": 0.9979, + "step": 14055 + }, + { + "epoch": 0.42, + "learning_rate": 0.000143891441755916, + "loss": 1.034, + "step": 14060 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014384460902176885, + "loss": 1.0551, + "step": 14065 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014379776437957218, + "loss": 0.9728, + "step": 14070 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014375090784204882, + "loss": 1.0445, + "step": 14075 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014370403942192485, + "loss": 1.08, + "step": 14080 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001436571591319296, + "loss": 0.9922, + "step": 14085 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014361026698479562, + "loss": 1.1206, + "step": 14090 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014356336299325864, + "loss": 0.9838, + "step": 14095 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014351644717005763, + "loss": 1.0318, + "step": 14100 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014346951952793472, + "loss": 1.1325, + "step": 14105 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001434225800796354, + "loss": 1.032, + "step": 14110 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014337562883790818, + "loss": 1.0701, + "step": 14115 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014332866581550494, + "loss": 1.0635, + "step": 14120 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014328169102518056, + "loss": 1.0074, + "step": 14125 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014323470447969336, + "loss": 1.0799, + "step": 14130 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014318770619180468, + "loss": 1.0537, + "step": 14135 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014314069617427907, + "loss": 1.035, + "step": 14140 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014309367443988437, + "loss": 0.9921, + "step": 14145 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014304664100139145, + "loss": 1.0854, + "step": 14150 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014299959587157446, + "loss": 1.048, + "step": 14155 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014295253906321072, + "loss": 1.0854, + "step": 14160 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014290547058908067, + "loss": 1.0692, + "step": 14165 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014285839046196798, + "loss": 1.1194, + "step": 14170 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014281129869465943, + "loss": 1.0178, + "step": 14175 + }, + { + "epoch": 0.42, + "learning_rate": 0.000142764195299945, + "loss": 1.0124, + "step": 14180 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014271708029061785, + "loss": 1.0741, + "step": 14185 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014266995367947418, + "loss": 1.0356, + "step": 14190 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014262281547931343, + "loss": 1.1438, + "step": 14195 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014257566570293829, + "loss": 1.0794, + "step": 14200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014252850436315435, + "loss": 1.0509, + "step": 14205 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014248133147277056, + "loss": 0.9975, + "step": 14210 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014243414704459884, + "loss": 1.111, + "step": 14215 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014238695109145444, + "loss": 1.0887, + "step": 14220 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014233974362615548, + "loss": 1.0505, + "step": 14225 + }, + { + "epoch": 0.42, + "learning_rate": 0.00014229252466152345, + "loss": 1.0235, + "step": 14230 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014224529421038283, + "loss": 1.1275, + "step": 14235 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014219805228556128, + "loss": 1.1162, + "step": 14240 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001421507988998895, + "loss": 1.0005, + "step": 14245 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001421035340662014, + "loss": 0.9781, + "step": 14250 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014205625779733393, + "loss": 1.0385, + "step": 14255 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014200897010612718, + "loss": 1.1089, + "step": 14260 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014196167100542428, + "loss": 1.0399, + "step": 14265 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014191436050807157, + "loss": 1.1444, + "step": 14270 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001418670386269184, + "loss": 0.9819, + "step": 14275 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014181970537481718, + "loss": 1.063, + "step": 14280 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014177236076462358, + "loss": 1.086, + "step": 14285 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014172500480919616, + "loss": 1.0422, + "step": 14290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014167763752139664, + "loss": 1.0741, + "step": 14295 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014163025891408983, + "loss": 0.9531, + "step": 14300 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014158286900014361, + "loss": 1.1285, + "step": 14305 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014153546779242893, + "loss": 1.0485, + "step": 14310 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014148805530381976, + "loss": 1.0489, + "step": 14315 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001414406315471932, + "loss": 1.038, + "step": 14320 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001413931965354294, + "loss": 0.9999, + "step": 14325 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001413457502814115, + "loss": 1.0843, + "step": 14330 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014129829279802583, + "loss": 1.0535, + "step": 14335 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014125082409816158, + "loss": 1.0475, + "step": 14340 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014120334419471118, + "loss": 0.9993, + "step": 14345 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014115585310056994, + "loss": 0.9795, + "step": 14350 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014110835082863636, + "loss": 1.0308, + "step": 14355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014106083739181182, + "loss": 1.0444, + "step": 14360 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001410133128030009, + "loss": 1.0206, + "step": 14365 + }, + { + "epoch": 0.43, + "learning_rate": 0.000140965777075111, + "loss": 1.0684, + "step": 14370 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014091823022105277, + "loss": 1.1241, + "step": 14375 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014087067225373973, + "loss": 1.0873, + "step": 14380 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001408231031860885, + "loss": 1.08, + "step": 14385 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001407755230310186, + "loss": 1.0919, + "step": 14390 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014072793180145275, + "loss": 1.0844, + "step": 14395 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001406803295103165, + "loss": 1.0701, + "step": 14400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001406327161705385, + "loss": 1.0434, + "step": 14405 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014058509179505036, + "loss": 1.0751, + "step": 14410 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014053745639678667, + "loss": 1.1146, + "step": 14415 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014048980998868514, + "loss": 1.0226, + "step": 14420 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014044215258368628, + "loss": 1.096, + "step": 14425 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014039448419473372, + "loss": 1.1701, + "step": 14430 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014034680483477404, + "loss": 1.1423, + "step": 14435 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014029911451675673, + "loss": 1.0496, + "step": 14440 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014025141325363447, + "loss": 1.0815, + "step": 14445 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001402037010583626, + "loss": 0.996, + "step": 14450 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014015597794389973, + "loss": 1.0518, + "step": 14455 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001401082439232072, + "loss": 1.067, + "step": 14460 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014006049900924943, + "loss": 1.0787, + "step": 14465 + }, + { + "epoch": 0.43, + "learning_rate": 0.00014001274321499382, + "loss": 1.0599, + "step": 14470 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013996497655341066, + "loss": 1.0192, + "step": 14475 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013991719903747323, + "loss": 1.0792, + "step": 14480 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001398694106801577, + "loss": 0.9789, + "step": 14485 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013982161149444329, + "loss": 1.0406, + "step": 14490 + }, + { + "epoch": 0.43, + "learning_rate": 0.000139773801493312, + "loss": 1.0086, + "step": 14495 + }, + { + "epoch": 0.43, + "learning_rate": 0.000139725980689749, + "loss": 0.9748, + "step": 14500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013967814909674218, + "loss": 0.9832, + "step": 14505 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013963030672728245, + "loss": 1.0357, + "step": 14510 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013958245359436361, + "loss": 1.0804, + "step": 14515 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001395345897109825, + "loss": 1.105, + "step": 14520 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013948671509013871, + "loss": 1.0039, + "step": 14525 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013943882974483485, + "loss": 1.0231, + "step": 14530 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013939093368807643, + "loss": 1.0047, + "step": 14535 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013934302693287184, + "loss": 1.1002, + "step": 14540 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013929510949223245, + "loss": 1.1112, + "step": 14545 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001392471813791724, + "loss": 1.1057, + "step": 14550 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013919924260670882, + "loss": 1.1032, + "step": 14555 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013915129318786175, + "loss": 1.064, + "step": 14560 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001391033331356541, + "loss": 1.054, + "step": 14565 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013905536246311162, + "loss": 1.0475, + "step": 14570 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013900738118326303, + "loss": 1.0812, + "step": 14575 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013895938930913988, + "loss": 1.0614, + "step": 14580 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013891138685377655, + "loss": 1.0801, + "step": 14585 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001388633738302104, + "loss": 1.0978, + "step": 14590 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013881535025148155, + "loss": 0.9829, + "step": 14595 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013876731613063315, + "loss": 1.0799, + "step": 14600 + }, + { + "epoch": 0.44, + "learning_rate": 0.000138719271480711, + "loss": 1.0072, + "step": 14605 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001386712163147639, + "loss": 1.0646, + "step": 14610 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001386231506458435, + "loss": 1.1524, + "step": 14615 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013857507448700423, + "loss": 1.151, + "step": 14620 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013852698785130348, + "loss": 1.0918, + "step": 14625 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013847889075180135, + "loss": 1.0708, + "step": 14630 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013843078320156088, + "loss": 1.0876, + "step": 14635 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013838266521364796, + "loss": 1.0218, + "step": 14640 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001383345368011312, + "loss": 1.0623, + "step": 14645 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013828639797708214, + "loss": 1.0666, + "step": 14650 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013823824875457515, + "loss": 1.0269, + "step": 14655 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013819008914668738, + "loss": 1.0994, + "step": 14660 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013814191916649885, + "loss": 0.9915, + "step": 14665 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001380937388270923, + "loss": 1.0923, + "step": 14670 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001380455481415534, + "loss": 1.1469, + "step": 14675 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013799734712297059, + "loss": 1.0616, + "step": 14680 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013794913578443508, + "loss": 1.0857, + "step": 14685 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013790091413904088, + "loss": 1.0141, + "step": 14690 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013785268219988487, + "loss": 0.9453, + "step": 14695 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001378044399800667, + "loss": 1.0303, + "step": 14700 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013775618749268875, + "loss": 1.0637, + "step": 14705 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013770792475085625, + "loss": 1.0883, + "step": 14710 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001376596517676772, + "loss": 1.1404, + "step": 14715 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013761136855626243, + "loss": 1.074, + "step": 14720 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013756307512972538, + "loss": 1.081, + "step": 14725 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001375147715011825, + "loss": 1.0106, + "step": 14730 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013746645768375286, + "loss": 0.9788, + "step": 14735 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013741813369055832, + "loss": 1.043, + "step": 14740 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013736979953472354, + "loss": 1.0413, + "step": 14745 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013732145522937582, + "loss": 1.067, + "step": 14750 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013727310078764547, + "loss": 1.0588, + "step": 14755 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013722473622266524, + "loss": 1.0372, + "step": 14760 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001371763615475709, + "loss": 1.0794, + "step": 14765 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001371279767755008, + "loss": 0.9561, + "step": 14770 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013707958191959608, + "loss": 1.0172, + "step": 14775 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013703117699300066, + "loss": 1.0306, + "step": 14780 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013698276200886108, + "loss": 1.023, + "step": 14785 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001369343369803268, + "loss": 1.1297, + "step": 14790 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013688590192054978, + "loss": 1.0682, + "step": 14795 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013683745684268486, + "loss": 1.0522, + "step": 14800 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013678900175988963, + "loss": 1.1, + "step": 14805 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013674053668532422, + "loss": 1.0242, + "step": 14810 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013669206163215164, + "loss": 1.0332, + "step": 14815 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013664357661353758, + "loss": 1.0912, + "step": 14820 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013659508164265037, + "loss": 0.9765, + "step": 14825 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013654657673266104, + "loss": 1.035, + "step": 14830 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001364980618967435, + "loss": 1.0322, + "step": 14835 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013644953714807403, + "loss": 1.0447, + "step": 14840 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013640100249983196, + "loss": 1.1066, + "step": 14845 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013635245796519903, + "loss": 1.0897, + "step": 14850 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013630390355735985, + "loss": 1.061, + "step": 14855 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013625533928950153, + "loss": 1.0751, + "step": 14860 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001362067651748141, + "loss": 0.988, + "step": 14865 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013615818122649, + "loss": 1.1014, + "step": 14870 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013610958745772456, + "loss": 1.0257, + "step": 14875 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013606098388171562, + "loss": 1.0674, + "step": 14880 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001360123705116638, + "loss": 1.1612, + "step": 14885 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013596374736077232, + "loss": 1.079, + "step": 14890 + }, + { + "epoch": 0.44, + "learning_rate": 0.00013591511444224704, + "loss": 1.0389, + "step": 14895 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001358664717692965, + "loss": 1.0477, + "step": 14900 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013581781935513188, + "loss": 1.0284, + "step": 14905 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013576915721296709, + "loss": 1.0312, + "step": 14910 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013572048535601851, + "loss": 1.0662, + "step": 14915 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001356718037975053, + "loss": 1.1158, + "step": 14920 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013562311255064922, + "loss": 1.072, + "step": 14925 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001355744116286746, + "loss": 1.0199, + "step": 14930 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001355257010448085, + "loss": 1.0483, + "step": 14935 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013547698081228053, + "loss": 1.0035, + "step": 14940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013542825094432293, + "loss": 1.0901, + "step": 14945 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001353795114541706, + "loss": 1.0666, + "step": 14950 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013533076235506102, + "loss": 1.1802, + "step": 14955 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013528200366023425, + "loss": 1.0289, + "step": 14960 + }, + { + "epoch": 0.45, + "learning_rate": 0.000135233235382933, + "loss": 1.1238, + "step": 14965 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001351844575364026, + "loss": 1.0667, + "step": 14970 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013513567013389088, + "loss": 1.0561, + "step": 14975 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013508687318864845, + "loss": 0.9961, + "step": 14980 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013503806671392828, + "loss": 0.9969, + "step": 14985 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013498925072298615, + "loss": 1.0206, + "step": 14990 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013494042522908022, + "loss": 0.9305, + "step": 14995 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013489159024547142, + "loss": 1.1151, + "step": 15000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001348427457854231, + "loss": 1.0533, + "step": 15005 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013479389186220127, + "loss": 1.1152, + "step": 15010 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013474502848907454, + "loss": 1.0926, + "step": 15015 + }, + { + "epoch": 0.45, + "learning_rate": 0.000134696155679314, + "loss": 1.0811, + "step": 15020 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013464727344619331, + "loss": 0.994, + "step": 15025 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001345983818029888, + "loss": 1.0541, + "step": 15030 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001345494807629792, + "loss": 1.008, + "step": 15035 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001345005703394459, + "loss": 1.0462, + "step": 15040 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013445165054567286, + "loss": 1.0486, + "step": 15045 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013440272139494643, + "loss": 1.0209, + "step": 15050 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001343537829005557, + "loss": 1.1139, + "step": 15055 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013430483507579212, + "loss": 0.9679, + "step": 15060 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013425587793394983, + "loss": 0.9779, + "step": 15065 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013420691148832535, + "loss": 1.0195, + "step": 15070 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013415793575221786, + "loss": 1.0203, + "step": 15075 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013410895073892901, + "loss": 1.1401, + "step": 15080 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013405995646176294, + "loss": 1.062, + "step": 15085 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001340109529340263, + "loss": 1.0685, + "step": 15090 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001339619401690284, + "loss": 1.032, + "step": 15095 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013391291818008082, + "loss": 1.0704, + "step": 15100 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013386388698049783, + "loss": 1.0899, + "step": 15105 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013381484658359612, + "loss": 1.0541, + "step": 15110 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013376579700269492, + "loss": 1.1297, + "step": 15115 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013371673825111587, + "loss": 0.9925, + "step": 15120 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013366767034218326, + "loss": 1.0554, + "step": 15125 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001336185932892237, + "loss": 1.025, + "step": 15130 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013356950710556638, + "loss": 1.0518, + "step": 15135 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013352041180454292, + "loss": 1.0574, + "step": 15140 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001334713073994875, + "loss": 1.0028, + "step": 15145 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001334221939037366, + "loss": 1.0092, + "step": 15150 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013337307133062942, + "loss": 1.0167, + "step": 15155 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013332393969350738, + "loss": 1.0498, + "step": 15160 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013327479900571452, + "loss": 1.0821, + "step": 15165 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013322564928059728, + "loss": 1.0405, + "step": 15170 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013317649053150456, + "loss": 1.0196, + "step": 15175 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001331273227717877, + "loss": 1.0123, + "step": 15180 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001330781460148005, + "loss": 1.0579, + "step": 15185 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013302896027389923, + "loss": 1.114, + "step": 15190 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013297976556244257, + "loss": 1.0077, + "step": 15195 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013293056189379158, + "loss": 1.0462, + "step": 15200 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013288134928130988, + "loss": 1.0836, + "step": 15205 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013283212773836342, + "loss": 1.0526, + "step": 15210 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013278289727832062, + "loss": 1.008, + "step": 15215 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001327336579145523, + "loss": 1.0593, + "step": 15220 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001326844096604317, + "loss": 1.0639, + "step": 15225 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001326351525293345, + "loss": 1.0483, + "step": 15230 + }, + { + "epoch": 0.45, + "learning_rate": 0.00013258588653463878, + "loss": 1.0358, + "step": 15235 + }, + { + "epoch": 0.46, + "learning_rate": 0.000132536611689725, + "loss": 1.137, + "step": 15240 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013248732800797604, + "loss": 1.0667, + "step": 15245 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013243803550277715, + "loss": 1.1036, + "step": 15250 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013238873418751611, + "loss": 1.0985, + "step": 15255 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013233942407558287, + "loss": 1.0511, + "step": 15260 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013229010518037002, + "loss": 1.0785, + "step": 15265 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013224077751527232, + "loss": 1.0117, + "step": 15270 + }, + { + "epoch": 0.46, + "learning_rate": 0.000132191441093687, + "loss": 1.0678, + "step": 15275 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001321420959290137, + "loss": 1.0776, + "step": 15280 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001320927420346544, + "loss": 1.1453, + "step": 15285 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013204337942401346, + "loss": 0.9941, + "step": 15290 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013199400811049757, + "loss": 1.0364, + "step": 15295 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001319446281075158, + "loss": 1.0369, + "step": 15300 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013189523942847966, + "loss": 1.091, + "step": 15305 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001318458420868029, + "loss": 1.0526, + "step": 15310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013179643609590168, + "loss": 1.042, + "step": 15315 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013174702146919454, + "loss": 1.1163, + "step": 15320 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013169759822010226, + "loss": 1.0297, + "step": 15325 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013164816636204808, + "loss": 1.0572, + "step": 15330 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001315987259084575, + "loss": 1.0496, + "step": 15335 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001315492768727584, + "loss": 1.0029, + "step": 15340 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013149981926838098, + "loss": 1.0399, + "step": 15345 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013145035310875773, + "loss": 1.0575, + "step": 15350 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013140087840732355, + "loss": 1.1036, + "step": 15355 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013135139517751553, + "loss": 1.0369, + "step": 15360 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013130190343277318, + "loss": 1.0183, + "step": 15365 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013125240318653835, + "loss": 1.0165, + "step": 15370 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013120289445225501, + "loss": 1.0417, + "step": 15375 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013115337724336974, + "loss": 1.0339, + "step": 15380 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001311038515733311, + "loss": 1.0261, + "step": 15385 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013105431745559019, + "loss": 1.0969, + "step": 15390 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013100477490360024, + "loss": 1.1461, + "step": 15395 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001309552239308169, + "loss": 1.0499, + "step": 15400 + }, + { + "epoch": 0.46, + "learning_rate": 0.000130905664550698, + "loss": 1.0664, + "step": 15405 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013085609677670375, + "loss": 1.0011, + "step": 15410 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013080652062229657, + "loss": 1.0376, + "step": 15415 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013075693610094117, + "loss": 1.1185, + "step": 15420 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013070734322610455, + "loss": 1.1076, + "step": 15425 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013065774201125597, + "loss": 1.0464, + "step": 15430 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013060813246986697, + "loss": 1.0318, + "step": 15435 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001305585146154113, + "loss": 1.0371, + "step": 15440 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013050888846136502, + "loss": 1.0216, + "step": 15445 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013045925402120645, + "loss": 1.0305, + "step": 15450 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013040961130841608, + "loss": 1.1001, + "step": 15455 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013035996033647677, + "loss": 1.0506, + "step": 15460 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001303103011188735, + "loss": 1.0245, + "step": 15465 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013026063366909364, + "loss": 1.0199, + "step": 15470 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001302109580006266, + "loss": 1.0855, + "step": 15475 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001301612741269642, + "loss": 1.0364, + "step": 15480 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013011158206160034, + "loss": 1.0515, + "step": 15485 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001300618818180313, + "loss": 1.0078, + "step": 15490 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013001217340975543, + "loss": 1.1065, + "step": 15495 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012996245685027345, + "loss": 1.0, + "step": 15500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012991273215308811, + "loss": 1.1157, + "step": 15505 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012986299933170453, + "loss": 1.0566, + "step": 15510 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012981325839963, + "loss": 1.087, + "step": 15515 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012976350937037395, + "loss": 1.0439, + "step": 15520 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001297137522574481, + "loss": 1.0016, + "step": 15525 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001296639870743662, + "loss": 1.0378, + "step": 15530 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012961421383464445, + "loss": 1.1103, + "step": 15535 + }, + { + "epoch": 0.46, + "learning_rate": 0.000129564432551801, + "loss": 1.0795, + "step": 15540 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012951464323935634, + "loss": 1.0755, + "step": 15545 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012946484591083303, + "loss": 1.1002, + "step": 15550 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001294150405797559, + "loss": 1.1738, + "step": 15555 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001293652272596519, + "loss": 1.1307, + "step": 15560 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012931540596405013, + "loss": 1.0777, + "step": 15565 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012926557670648193, + "loss": 1.031, + "step": 15570 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012921573950048075, + "loss": 1.0327, + "step": 15575 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012916589435958223, + "loss": 0.9593, + "step": 15580 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012911604129732407, + "loss": 1.1054, + "step": 15585 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012906618032724627, + "loss": 1.0416, + "step": 15590 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012901631146289085, + "loss": 1.0927, + "step": 15595 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012896643471780206, + "loss": 1.0218, + "step": 15600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001289165501055263, + "loss": 1.0354, + "step": 15605 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012886665763961193, + "loss": 1.1281, + "step": 15610 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012881675733360972, + "loss": 1.0319, + "step": 15615 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012876684920107233, + "loss": 1.057, + "step": 15620 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012871693325555472, + "loss": 1.1276, + "step": 15625 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012866700951061382, + "loss": 1.1126, + "step": 15630 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001286170779798088, + "loss": 1.0219, + "step": 15635 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001285671386767009, + "loss": 1.0463, + "step": 15640 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012851719161485344, + "loss": 1.0797, + "step": 15645 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001284672368078319, + "loss": 1.0441, + "step": 15650 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001284172742692038, + "loss": 1.0381, + "step": 15655 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012836730401253886, + "loss": 1.1222, + "step": 15660 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012831732605140877, + "loss": 0.9904, + "step": 15665 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001282673403993874, + "loss": 1.0483, + "step": 15670 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012821734707005073, + "loss": 1.1289, + "step": 15675 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012816734607697672, + "loss": 1.1209, + "step": 15680 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012811733743374552, + "loss": 1.1041, + "step": 15685 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001280673211539393, + "loss": 0.9789, + "step": 15690 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012801729725114226, + "loss": 1.1433, + "step": 15695 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001279672657389408, + "loss": 1.0536, + "step": 15700 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012791722663092327, + "loss": 1.0813, + "step": 15705 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012786717994068013, + "loss": 1.041, + "step": 15710 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012781712568180393, + "loss": 1.0024, + "step": 15715 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012776706386788918, + "loss": 1.0545, + "step": 15720 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012771699451253254, + "loss": 1.0062, + "step": 15725 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012766691762933267, + "loss": 1.0329, + "step": 15730 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001276168332318903, + "loss": 1.0642, + "step": 15735 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012756674133380816, + "loss": 0.9884, + "step": 15740 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012751664194869105, + "loss": 0.9666, + "step": 15745 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001274665350901458, + "loss": 1.0517, + "step": 15750 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012741642077178123, + "loss": 1.0895, + "step": 15755 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001273662990072083, + "loss": 0.9901, + "step": 15760 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012731616981003985, + "loss": 1.1071, + "step": 15765 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012726603319389084, + "loss": 1.0259, + "step": 15770 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012721588917237816, + "loss": 1.021, + "step": 15775 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012716573775912085, + "loss": 1.068, + "step": 15780 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012711557896773974, + "loss": 1.0371, + "step": 15785 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001270654128118579, + "loss": 1.0592, + "step": 15790 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012701523930510022, + "loss": 1.0905, + "step": 15795 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001269650584610937, + "loss": 0.9654, + "step": 15800 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012691487029346728, + "loss": 1.0484, + "step": 15805 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012686467481585188, + "loss": 1.0274, + "step": 15810 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001268144720418804, + "loss": 1.1199, + "step": 15815 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012676426198518785, + "loss": 1.0497, + "step": 15820 + }, + { + "epoch": 0.47, + "learning_rate": 0.000126714044659411, + "loss": 1.0591, + "step": 15825 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012666382007818876, + "loss": 1.114, + "step": 15830 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012661358825516198, + "loss": 0.9804, + "step": 15835 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001265633492039734, + "loss": 1.0641, + "step": 15840 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012651310293826783, + "loss": 1.1478, + "step": 15845 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012646284947169195, + "loss": 1.1467, + "step": 15850 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012641258881789446, + "loss": 1.0927, + "step": 15855 + }, + { + "epoch": 0.47, + "learning_rate": 0.000126362320990526, + "loss": 1.098, + "step": 15860 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012631204600323907, + "loss": 1.0826, + "step": 15865 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012626176386968828, + "loss": 1.0308, + "step": 15870 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012621147460353, + "loss": 1.0685, + "step": 15875 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012616117821842273, + "loss": 1.1317, + "step": 15880 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001261108747280267, + "loss": 1.0326, + "step": 15885 + }, + { + "epoch": 0.47, + "learning_rate": 0.00012606056414600424, + "loss": 1.0435, + "step": 15890 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001260102464860195, + "loss": 1.0244, + "step": 15895 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001259599217617386, + "loss": 1.0027, + "step": 15900 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001259095899868296, + "loss": 1.0935, + "step": 15905 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012585925117496237, + "loss": 1.0335, + "step": 15910 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012580890533980883, + "loss": 1.0029, + "step": 15915 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012575855249504267, + "loss": 1.026, + "step": 15920 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012570819265433968, + "loss": 1.1028, + "step": 15925 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012565782583137723, + "loss": 0.9975, + "step": 15930 + }, + { + "epoch": 0.48, + "learning_rate": 0.000125607452039835, + "loss": 1.0539, + "step": 15935 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012555707129339417, + "loss": 1.0188, + "step": 15940 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012550668360573807, + "loss": 1.0348, + "step": 15945 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001254562889905518, + "loss": 1.0054, + "step": 15950 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001254058874615224, + "loss": 1.1159, + "step": 15955 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012535547903233873, + "loss": 0.9767, + "step": 15960 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012530506371669154, + "loss": 1.0589, + "step": 15965 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012525464152827347, + "loss": 0.9356, + "step": 15970 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012520421248077908, + "loss": 1.0232, + "step": 15975 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012515377658790463, + "loss": 0.9649, + "step": 15980 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001251033338633484, + "loss": 0.9755, + "step": 15985 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012505288432081044, + "loss": 1.0673, + "step": 15990 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001250024279739927, + "loss": 0.9671, + "step": 15995 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012495196483659892, + "loss": 1.0316, + "step": 16000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001249014949223348, + "loss": 0.9738, + "step": 16005 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012485101824490772, + "loss": 1.0598, + "step": 16010 + }, + { + "epoch": 0.48, + "learning_rate": 0.000124800534818027, + "loss": 1.011, + "step": 16015 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012475004465540375, + "loss": 1.0397, + "step": 16020 + }, + { + "epoch": 0.48, + "learning_rate": 0.000124699547770751, + "loss": 1.0628, + "step": 16025 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012464904417778345, + "loss": 1.0137, + "step": 16030 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012459853389021775, + "loss": 0.9918, + "step": 16035 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012454801692177232, + "loss": 1.0899, + "step": 16040 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012449749328616742, + "loss": 0.996, + "step": 16045 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012444696299712506, + "loss": 1.0728, + "step": 16050 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012439642606836915, + "loss": 1.0236, + "step": 16055 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001243458825136253, + "loss": 1.0046, + "step": 16060 + }, + { + "epoch": 0.48, + "learning_rate": 0.000124295332346621, + "loss": 1.0115, + "step": 16065 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012424477558108546, + "loss": 1.0178, + "step": 16070 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012419421223074982, + "loss": 1.0456, + "step": 16075 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012414364230934686, + "loss": 1.0005, + "step": 16080 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012409306583061118, + "loss": 1.1397, + "step": 16085 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001240424828082792, + "loss": 1.0506, + "step": 16090 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012399189325608916, + "loss": 1.0607, + "step": 16095 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012394129718778092, + "loss": 1.0455, + "step": 16100 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012389069461709627, + "loss": 1.0779, + "step": 16105 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012384008555777867, + "loss": 1.0092, + "step": 16110 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012378947002357338, + "loss": 1.1115, + "step": 16115 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012373884802822737, + "loss": 1.0782, + "step": 16120 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001236882195854895, + "loss": 0.9904, + "step": 16125 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012363758470911019, + "loss": 1.0706, + "step": 16130 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012358694341284176, + "loss": 1.0316, + "step": 16135 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001235362957104382, + "loss": 1.0686, + "step": 16140 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012348564161565522, + "loss": 1.0992, + "step": 16145 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012343498114225038, + "loss": 1.0069, + "step": 16150 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012338431430398283, + "loss": 1.057, + "step": 16155 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001233336411146136, + "loss": 1.1273, + "step": 16160 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001232829615879052, + "loss": 1.0728, + "step": 16165 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001232322757376222, + "loss": 1.0924, + "step": 16170 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012318158357753058, + "loss": 1.0298, + "step": 16175 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012313088512139824, + "loss": 1.0546, + "step": 16180 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012308018038299464, + "loss": 1.0093, + "step": 16185 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001230294693760911, + "loss": 0.9931, + "step": 16190 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001229787521144605, + "loss": 0.9841, + "step": 16195 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001229280286118775, + "loss": 1.1518, + "step": 16200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001228772988821184, + "loss": 1.0384, + "step": 16205 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001228265629389613, + "loss": 0.979, + "step": 16210 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012277582079618586, + "loss": 0.9979, + "step": 16215 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001227250724675735, + "loss": 1.0261, + "step": 16220 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012267431796690722, + "loss": 1.0265, + "step": 16225 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001226235573079719, + "loss": 1.0429, + "step": 16230 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012257279050455387, + "loss": 1.0686, + "step": 16235 + }, + { + "epoch": 0.48, + "learning_rate": 0.00012252201757044127, + "loss": 1.0279, + "step": 16240 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001224712385194238, + "loss": 1.0125, + "step": 16245 + }, + { + "epoch": 0.49, + "learning_rate": 0.000122420453365293, + "loss": 1.0541, + "step": 16250 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001223696621218418, + "loss": 1.0824, + "step": 16255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012231886480286507, + "loss": 1.0409, + "step": 16260 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012226806142215908, + "loss": 1.0078, + "step": 16265 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012221725199352195, + "loss": 0.9656, + "step": 16270 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012216643653075327, + "loss": 1.0255, + "step": 16275 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012211561504765444, + "loss": 1.0577, + "step": 16280 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012206478755802828, + "loss": 1.025, + "step": 16285 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012201395407567948, + "loss": 1.0809, + "step": 16290 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001219631146144142, + "loss": 0.9843, + "step": 16295 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001219122691880403, + "loss": 1.0386, + "step": 16300 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012186141781036717, + "loss": 0.9607, + "step": 16305 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012181056049520591, + "loss": 1.0641, + "step": 16310 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012175969725636924, + "loss": 1.0501, + "step": 16315 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012170882810767132, + "loss": 1.0764, + "step": 16320 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001216579530629282, + "loss": 1.0614, + "step": 16325 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012160707213595722, + "loss": 1.0667, + "step": 16330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012155618534057757, + "loss": 1.0269, + "step": 16335 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001215052926906099, + "loss": 1.0035, + "step": 16340 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012145439419987652, + "loss": 1.0758, + "step": 16345 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012140348988220123, + "loss": 1.1086, + "step": 16350 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001213525797514095, + "loss": 1.0853, + "step": 16355 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012130166382132837, + "loss": 1.0806, + "step": 16360 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012125074210578642, + "loss": 1.0597, + "step": 16365 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012119981461861383, + "loss": 1.048, + "step": 16370 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012114888137364228, + "loss": 0.9718, + "step": 16375 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012109794238470515, + "loss": 0.9895, + "step": 16380 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012104699766563727, + "loss": 1.0457, + "step": 16385 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012099604723027502, + "loss": 1.1585, + "step": 16390 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012094509109245644, + "loss": 0.9743, + "step": 16395 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012089412926602095, + "loss": 1.0225, + "step": 16400 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012084316176480973, + "loss": 1.166, + "step": 16405 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012079218860266526, + "loss": 1.0567, + "step": 16410 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001207412097934318, + "loss": 1.0273, + "step": 16415 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012069022535095491, + "loss": 1.055, + "step": 16420 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001206392352890819, + "loss": 1.116, + "step": 16425 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012058823962166139, + "loss": 0.991, + "step": 16430 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012053723836254373, + "loss": 1.0946, + "step": 16435 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012048623152558065, + "loss": 1.0626, + "step": 16440 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012043521912462545, + "loss": 1.0081, + "step": 16445 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001203842011735329, + "loss": 1.0255, + "step": 16450 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012033317768615935, + "loss": 1.1157, + "step": 16455 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012028214867636259, + "loss": 1.0739, + "step": 16460 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012023111415800191, + "loss": 1.1111, + "step": 16465 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001201800741449381, + "loss": 1.0721, + "step": 16470 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012012902865103353, + "loss": 0.9954, + "step": 16475 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012007797769015192, + "loss": 1.054, + "step": 16480 + }, + { + "epoch": 0.49, + "learning_rate": 0.00012002692127615856, + "loss": 1.0193, + "step": 16485 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001199758594229202, + "loss": 1.0071, + "step": 16490 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011992479214430508, + "loss": 1.0317, + "step": 16495 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011987371945418288, + "loss": 1.0935, + "step": 16500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011982264136642483, + "loss": 0.9796, + "step": 16505 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011977155789490347, + "loss": 1.0642, + "step": 16510 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011972046905349299, + "loss": 0.9863, + "step": 16515 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011966937485606891, + "loss": 1.0263, + "step": 16520 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011961827531650824, + "loss": 1.1578, + "step": 16525 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011956717044868945, + "loss": 1.0804, + "step": 16530 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011951606026649245, + "loss": 1.0899, + "step": 16535 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011946494478379863, + "loss": 1.0095, + "step": 16540 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001194138240144907, + "loss": 1.1139, + "step": 16545 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011936269797245301, + "loss": 1.0368, + "step": 16550 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001193115666715711, + "loss": 1.0337, + "step": 16555 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011926043012573216, + "loss": 1.0379, + "step": 16560 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011920928834882464, + "loss": 1.023, + "step": 16565 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001191581413547385, + "loss": 1.1335, + "step": 16570 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011910698915736507, + "loss": 1.0723, + "step": 16575 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011905583177059716, + "loss": 1.073, + "step": 16580 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011900466920832892, + "loss": 1.0348, + "step": 16585 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011895350148445594, + "loss": 1.0707, + "step": 16590 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001189023286128752, + "loss": 1.0724, + "step": 16595 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011885115060748507, + "loss": 1.0944, + "step": 16600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011879996748218532, + "loss": 1.0977, + "step": 16605 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011874877925087715, + "loss": 1.068, + "step": 16610 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011869758592746309, + "loss": 1.1718, + "step": 16615 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011864638752584707, + "loss": 0.9944, + "step": 16620 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011859518405993442, + "loss": 1.0414, + "step": 16625 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011854397554363183, + "loss": 0.9956, + "step": 16630 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011849276199084736, + "loss": 1.0472, + "step": 16635 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011844154341549041, + "loss": 1.0578, + "step": 16640 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001183903198314718, + "loss": 0.9445, + "step": 16645 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011833909125270372, + "loss": 1.049, + "step": 16650 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001182878576930996, + "loss": 1.0062, + "step": 16655 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001182366191665744, + "loss": 1.035, + "step": 16660 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011818537568704424, + "loss": 1.0667, + "step": 16665 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011813412726842676, + "loss": 1.0438, + "step": 16670 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001180828739246408, + "loss": 1.0216, + "step": 16675 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011803161566960661, + "loss": 0.9874, + "step": 16680 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011798035251724577, + "loss": 0.9954, + "step": 16685 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011792908448148119, + "loss": 1.0287, + "step": 16690 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011787781157623709, + "loss": 1.047, + "step": 16695 + }, + { + "epoch": 0.5, + "learning_rate": 0.000117826533815439, + "loss": 1.0276, + "step": 16700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011777525121301383, + "loss": 1.0766, + "step": 16705 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011772396378288973, + "loss": 1.041, + "step": 16710 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001176726715389962, + "loss": 1.0318, + "step": 16715 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011762137449526404, + "loss": 1.1067, + "step": 16720 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011757007266562536, + "loss": 1.0843, + "step": 16725 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001175187660640136, + "loss": 1.127, + "step": 16730 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001174674547043634, + "loss": 1.0288, + "step": 16735 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011741613860061078, + "loss": 1.0833, + "step": 16740 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011736481776669306, + "loss": 1.0307, + "step": 16745 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011731349221654875, + "loss": 1.0487, + "step": 16750 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011726216196411773, + "loss": 1.0504, + "step": 16755 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001172108270233411, + "loss": 1.0458, + "step": 16760 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001171594874081613, + "loss": 1.1016, + "step": 16765 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011710814313252197, + "loss": 1.1063, + "step": 16770 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011705679421036804, + "loss": 1.0885, + "step": 16775 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011700544065564569, + "loss": 1.0588, + "step": 16780 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011695408248230245, + "loss": 0.9937, + "step": 16785 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011690271970428692, + "loss": 1.0229, + "step": 16790 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011685135233554917, + "loss": 1.0234, + "step": 16795 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001167999803900403, + "loss": 0.9762, + "step": 16800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011674860388171287, + "loss": 1.018, + "step": 16805 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011669722282452044, + "loss": 1.0489, + "step": 16810 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011664583723241808, + "loss": 0.9754, + "step": 16815 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001165944471193618, + "loss": 1.0288, + "step": 16820 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001165430524993091, + "loss": 0.9729, + "step": 16825 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011649165338621851, + "loss": 1.0008, + "step": 16830 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011644024979404991, + "loss": 1.059, + "step": 16835 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001163888417367643, + "loss": 1.03, + "step": 16840 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011633742922832395, + "loss": 1.0356, + "step": 16845 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011628601228269236, + "loss": 1.1086, + "step": 16850 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011623459091383418, + "loss": 1.0117, + "step": 16855 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011618316513571525, + "loss": 0.9786, + "step": 16860 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011613173496230268, + "loss": 1.0918, + "step": 16865 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001160803004075647, + "loss": 1.0918, + "step": 16870 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001160288614854708, + "loss": 1.0374, + "step": 16875 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001159774182099916, + "loss": 1.0492, + "step": 16880 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011592597059509893, + "loss": 1.031, + "step": 16885 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011587451865476575, + "loss": 1.0997, + "step": 16890 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011582306240296632, + "loss": 1.0536, + "step": 16895 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001157716018536759, + "loss": 0.9987, + "step": 16900 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011572013702087107, + "loss": 1.0397, + "step": 16905 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011566866791852944, + "loss": 1.1162, + "step": 16910 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011561719456062994, + "loss": 1.1106, + "step": 16915 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011556571696115244, + "loss": 1.1285, + "step": 16920 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001155142351340782, + "loss": 1.0485, + "step": 16925 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011546274909338945, + "loss": 1.0473, + "step": 16930 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001154112588530696, + "loss": 1.0767, + "step": 16935 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011535976442710334, + "loss": 1.016, + "step": 16940 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011530826582947624, + "loss": 1.0699, + "step": 16945 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011525676307417528, + "loss": 1.0717, + "step": 16950 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011520525617518831, + "loss": 1.0533, + "step": 16955 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011515374514650457, + "loss": 1.0271, + "step": 16960 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011510223000211415, + "loss": 0.954, + "step": 16965 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011505071075600846, + "loss": 1.0581, + "step": 16970 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011499918742217996, + "loss": 0.9361, + "step": 16975 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001149476600146222, + "loss": 1.0547, + "step": 16980 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011489612854732988, + "loss": 1.0908, + "step": 16985 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011484459303429871, + "loss": 1.0489, + "step": 16990 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011479305348952563, + "loss": 1.0477, + "step": 16995 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011474150992700856, + "loss": 1.1402, + "step": 17000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011468996236074658, + "loss": 1.0611, + "step": 17005 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011463841080473984, + "loss": 1.0811, + "step": 17010 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001145868552729896, + "loss": 1.0542, + "step": 17015 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001145352957794981, + "loss": 1.0246, + "step": 17020 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011448373233826878, + "loss": 1.0224, + "step": 17025 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011443216496330608, + "loss": 1.0713, + "step": 17030 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011438059366861551, + "loss": 1.0387, + "step": 17035 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011432901846820374, + "loss": 0.9866, + "step": 17040 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011427743937607829, + "loss": 1.0394, + "step": 17045 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011422585640624801, + "loss": 1.0594, + "step": 17050 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011417426957272255, + "loss": 1.0903, + "step": 17055 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001141226788895128, + "loss": 1.1227, + "step": 17060 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011407108437063057, + "loss": 1.0171, + "step": 17065 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011401948603008877, + "loss": 1.0606, + "step": 17070 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011396788388190133, + "loss": 0.9801, + "step": 17075 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011391627794008326, + "loss": 1.0562, + "step": 17080 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011386466821865052, + "loss": 1.0734, + "step": 17085 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011381305473162016, + "loss": 1.2374, + "step": 17090 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011376143749301023, + "loss": 1.1451, + "step": 17095 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011370981651683978, + "loss": 1.0992, + "step": 17100 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011365819181712891, + "loss": 0.998, + "step": 17105 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011360656340789873, + "loss": 1.0326, + "step": 17110 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011355493130317129, + "loss": 1.089, + "step": 17115 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011350329551696978, + "loss": 1.0561, + "step": 17120 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011345165606331825, + "loss": 1.0282, + "step": 17125 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011340001295624181, + "loss": 1.0662, + "step": 17130 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011334836620976654, + "loss": 1.1069, + "step": 17135 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011329671583791959, + "loss": 1.0205, + "step": 17140 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011324506185472894, + "loss": 1.0117, + "step": 17145 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011319340427422372, + "loss": 1.1055, + "step": 17150 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011314174311043391, + "loss": 1.0506, + "step": 17155 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011309007837739049, + "loss": 1.0017, + "step": 17160 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001130384100891255, + "loss": 1.0514, + "step": 17165 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011298673825967183, + "loss": 1.0381, + "step": 17170 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011293506290306335, + "loss": 1.0109, + "step": 17175 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011288338403333494, + "loss": 1.0245, + "step": 17180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011283170166452246, + "loss": 1.0513, + "step": 17185 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011278001581066258, + "loss": 1.1285, + "step": 17190 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001127283264857931, + "loss": 1.1582, + "step": 17195 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011267663370395256, + "loss": 1.1311, + "step": 17200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011262493747918064, + "loss": 1.0675, + "step": 17205 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011257323782551779, + "loss": 1.0747, + "step": 17210 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011252153475700551, + "loss": 1.0397, + "step": 17215 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001124698282876862, + "loss": 1.0503, + "step": 17220 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001124181184316031, + "loss": 1.0436, + "step": 17225 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011236640520280049, + "loss": 1.0455, + "step": 17230 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011231468861532346, + "loss": 1.0997, + "step": 17235 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011226296868321813, + "loss": 1.0674, + "step": 17240 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011221124542053139, + "loss": 1.1412, + "step": 17245 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011215951884131115, + "loss": 1.0457, + "step": 17250 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011210778895960616, + "loss": 1.0397, + "step": 17255 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011205605578946609, + "loss": 1.103, + "step": 17260 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011200431934494152, + "loss": 1.0319, + "step": 17265 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011195257964008383, + "loss": 0.9845, + "step": 17270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001119008366889454, + "loss": 0.98, + "step": 17275 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011184909050557944, + "loss": 0.9878, + "step": 17280 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011179734110404003, + "loss": 1.1495, + "step": 17285 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011174558849838213, + "loss": 1.0501, + "step": 17290 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011169383270266163, + "loss": 1.0553, + "step": 17295 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011164207373093512, + "loss": 0.9852, + "step": 17300 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001115903115972603, + "loss": 1.0407, + "step": 17305 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011153854631569544, + "loss": 1.1059, + "step": 17310 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011148677790029997, + "loss": 1.0298, + "step": 17315 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001114350063651339, + "loss": 0.9959, + "step": 17320 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011138323172425829, + "loss": 0.9879, + "step": 17325 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011133145399173492, + "loss": 1.061, + "step": 17330 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011127967318162644, + "loss": 1.0454, + "step": 17335 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001112278893079964, + "loss": 1.0903, + "step": 17340 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011117610238490903, + "loss": 1.0882, + "step": 17345 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001111243124264296, + "loss": 1.0967, + "step": 17350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011107251944662398, + "loss": 1.072, + "step": 17355 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011102072345955903, + "loss": 1.0874, + "step": 17360 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011096892447930238, + "loss": 1.0951, + "step": 17365 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011091712251992244, + "loss": 1.0882, + "step": 17370 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011086531759548843, + "loss": 1.0147, + "step": 17375 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011081350972007043, + "loss": 1.0421, + "step": 17380 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011076169890773929, + "loss": 1.0815, + "step": 17385 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011070988517256659, + "loss": 1.0586, + "step": 17390 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011065806852862484, + "loss": 1.0462, + "step": 17395 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011060624898998724, + "loss": 0.9747, + "step": 17400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011055442657072781, + "loss": 1.1752, + "step": 17405 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011050260128492131, + "loss": 1.0365, + "step": 17410 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011045077314664336, + "loss": 0.9845, + "step": 17415 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001103989421699703, + "loss": 1.0461, + "step": 17420 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011034710836897921, + "loss": 1.1143, + "step": 17425 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011029527175774807, + "loss": 1.0354, + "step": 17430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011024343235035541, + "loss": 1.0174, + "step": 17435 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011019159016088077, + "loss": 1.03, + "step": 17440 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011013974520340418, + "loss": 1.0119, + "step": 17445 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011008789749200668, + "loss": 1.1158, + "step": 17450 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011003604704076984, + "loss": 1.0306, + "step": 17455 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010998419386377612, + "loss": 1.0364, + "step": 17460 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010993233797510866, + "loss": 0.9333, + "step": 17465 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010988047938885135, + "loss": 1.0868, + "step": 17470 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010982861811908882, + "loss": 0.9888, + "step": 17475 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010977675417990638, + "loss": 1.0597, + "step": 17480 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010972488758539015, + "loss": 1.0489, + "step": 17485 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010967301834962689, + "loss": 0.9978, + "step": 17490 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001096211464867041, + "loss": 0.9844, + "step": 17495 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010956927201071005, + "loss": 0.9725, + "step": 17500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010951739493573365, + "loss": 1.0761, + "step": 17505 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010946551527586452, + "loss": 1.0293, + "step": 17510 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010941363304519305, + "loss": 1.0689, + "step": 17515 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010936174825781024, + "loss": 0.9875, + "step": 17520 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010930986092780781, + "loss": 1.0431, + "step": 17525 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010925797106927828, + "loss": 1.0609, + "step": 17530 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010920607869631465, + "loss": 1.0282, + "step": 17535 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010915418382301079, + "loss": 1.0159, + "step": 17540 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010910228646346115, + "loss": 1.0636, + "step": 17545 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010905038663176086, + "loss": 1.1105, + "step": 17550 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010899848434200577, + "loss": 1.0585, + "step": 17555 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010894657960829235, + "loss": 1.0458, + "step": 17560 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010889467244471777, + "loss": 1.0937, + "step": 17565 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010884276286537978, + "loss": 0.9878, + "step": 17570 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010879085088437698, + "loss": 1.0137, + "step": 17575 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010873893651580833, + "loss": 1.0564, + "step": 17580 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010868701977377376, + "loss": 1.092, + "step": 17585 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010863510067237355, + "loss": 1.077, + "step": 17590 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010858317922570885, + "loss": 1.0662, + "step": 17595 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010853125544788125, + "loss": 1.0016, + "step": 17600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010847932935299319, + "loss": 1.0857, + "step": 17605 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010842740095514754, + "loss": 1.084, + "step": 17610 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010837547026844794, + "loss": 1.0184, + "step": 17615 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010832353730699854, + "loss": 1.0341, + "step": 17620 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010827160208490418, + "loss": 1.03, + "step": 17625 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001082196646162703, + "loss": 1.0349, + "step": 17630 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010816772491520293, + "loss": 1.0325, + "step": 17635 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010811578299580874, + "loss": 1.0324, + "step": 17640 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010806383887219496, + "loss": 1.0353, + "step": 17645 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010801189255846944, + "loss": 1.0845, + "step": 17650 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010795994406874064, + "loss": 0.9926, + "step": 17655 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010790799341711756, + "loss": 1.0083, + "step": 17660 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010785604061770991, + "loss": 0.9906, + "step": 17665 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010780408568462778, + "loss": 1.0359, + "step": 17670 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010775212863198208, + "loss": 1.1053, + "step": 17675 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010770016947388407, + "loss": 1.0255, + "step": 17680 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010764820822444574, + "loss": 1.0949, + "step": 17685 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010759624489777955, + "loss": 1.0844, + "step": 17690 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010754427950799861, + "loss": 1.0976, + "step": 17695 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010749231206921653, + "loss": 1.1104, + "step": 17700 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001074403425955475, + "loss": 0.9957, + "step": 17705 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010738837110110622, + "loss": 1.062, + "step": 17710 + }, + { + "epoch": 0.53, + "learning_rate": 0.000107336397600008, + "loss": 0.9972, + "step": 17715 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010728442210636868, + "loss": 1.0418, + "step": 17720 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010723244463430464, + "loss": 0.9245, + "step": 17725 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010718046519793276, + "loss": 0.9977, + "step": 17730 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010712848381137048, + "loss": 1.0144, + "step": 17735 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010707650048873579, + "loss": 1.0433, + "step": 17740 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010702451524414718, + "loss": 1.0439, + "step": 17745 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010697252809172366, + "loss": 1.1379, + "step": 17750 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010692053904558479, + "loss": 1.0929, + "step": 17755 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010686854811985058, + "loss": 1.0676, + "step": 17760 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010681655532864165, + "loss": 1.0808, + "step": 17765 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010676456068607901, + "loss": 0.9935, + "step": 17770 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010671256420628428, + "loss": 1.0908, + "step": 17775 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001066605659033795, + "loss": 1.0484, + "step": 17780 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010660856579148724, + "loss": 1.063, + "step": 17785 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010655656388473056, + "loss": 1.0618, + "step": 17790 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010650456019723301, + "loss": 1.0161, + "step": 17795 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010645255474311862, + "loss": 1.0504, + "step": 17800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010640054753651187, + "loss": 1.06, + "step": 17805 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010634853859153779, + "loss": 1.0437, + "step": 17810 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010629652792232178, + "loss": 1.0186, + "step": 17815 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010624451554298984, + "loss": 0.9854, + "step": 17820 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010619250146766826, + "loss": 1.0962, + "step": 17825 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010614048571048401, + "loss": 1.0874, + "step": 17830 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010608846828556427, + "loss": 1.0166, + "step": 17835 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010603644920703691, + "loss": 1.0522, + "step": 17840 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010598442848903, + "loss": 1.0537, + "step": 17845 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010593240614567237, + "loss": 1.0014, + "step": 17850 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010588038219109298, + "loss": 1.0654, + "step": 17855 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010582835663942141, + "loss": 1.0212, + "step": 17860 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010577632950478764, + "loss": 1.0352, + "step": 17865 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010572430080132205, + "loss": 1.0201, + "step": 17870 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010567227054315545, + "loss": 1.0028, + "step": 17875 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010562023874441911, + "loss": 1.0203, + "step": 17880 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010556820541924466, + "loss": 0.9729, + "step": 17885 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010551617058176422, + "loss": 1.0253, + "step": 17890 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010546413424611023, + "loss": 1.0138, + "step": 17895 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010541209642641563, + "loss": 1.0885, + "step": 17900 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001053600571368137, + "loss": 0.9752, + "step": 17905 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010530801639143812, + "loss": 1.0413, + "step": 17910 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010525597420442299, + "loss": 1.137, + "step": 17915 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010520393058990285, + "loss": 1.0034, + "step": 17920 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010515188556201247, + "loss": 1.0115, + "step": 17925 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010509983913488722, + "loss": 1.037, + "step": 17930 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001050477913226626, + "loss": 1.0491, + "step": 17935 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010499574213947479, + "loss": 1.0316, + "step": 17940 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010494369159946001, + "loss": 1.0817, + "step": 17945 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010489163971675514, + "loss": 1.0928, + "step": 17950 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010483958650549721, + "loss": 0.9814, + "step": 17955 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010478753197982372, + "loss": 1.0371, + "step": 17960 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010473547615387256, + "loss": 1.0381, + "step": 17965 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010468341904178182, + "loss": 1.1164, + "step": 17970 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010463136065769016, + "loss": 1.0019, + "step": 17975 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010457930101573636, + "loss": 1.0667, + "step": 17980 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010452724013005973, + "loss": 1.0587, + "step": 17985 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010447517801479971, + "loss": 1.1089, + "step": 17990 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010442311468409631, + "loss": 0.984, + "step": 17995 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010437105015208972, + "loss": 1.0928, + "step": 18000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001043189844329205, + "loss": 1.0368, + "step": 18005 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010426691754072952, + "loss": 1.074, + "step": 18010 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010421484948965797, + "loss": 1.0114, + "step": 18015 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010416278029384737, + "loss": 1.0457, + "step": 18020 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001041107099674395, + "loss": 1.0856, + "step": 18025 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010405863852457655, + "loss": 1.0463, + "step": 18030 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001040065659794009, + "loss": 1.0739, + "step": 18035 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010395449234605532, + "loss": 1.0107, + "step": 18040 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001039024176386828, + "loss": 1.0824, + "step": 18045 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010385034187142667, + "loss": 1.0984, + "step": 18050 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010379826505843054, + "loss": 0.9847, + "step": 18055 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010374618721383827, + "loss": 1.0309, + "step": 18060 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010369410835179408, + "loss": 0.9811, + "step": 18065 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010364202848644235, + "loss": 1.0825, + "step": 18070 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010358994763192788, + "loss": 1.1538, + "step": 18075 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010353786580239559, + "loss": 1.052, + "step": 18080 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010348578301199077, + "loss": 1.0487, + "step": 18085 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010343369927485888, + "loss": 1.0251, + "step": 18090 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010338161460514577, + "loss": 1.0128, + "step": 18095 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010332952901699738, + "loss": 1.0485, + "step": 18100 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010327744252456003, + "loss": 1.0709, + "step": 18105 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010322535514198021, + "loss": 1.0185, + "step": 18110 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010317326688340472, + "loss": 1.0852, + "step": 18115 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010312117776298054, + "loss": 1.0435, + "step": 18120 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010306908779485487, + "loss": 1.0555, + "step": 18125 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001030169969931752, + "loss": 1.0726, + "step": 18130 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010296490537208923, + "loss": 1.0965, + "step": 18135 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010291281294574481, + "loss": 0.975, + "step": 18140 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010286071972829016, + "loss": 1.0603, + "step": 18145 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010280862573387355, + "loss": 1.0459, + "step": 18150 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010275653097664357, + "loss": 1.0516, + "step": 18155 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010270443547074899, + "loss": 1.0926, + "step": 18160 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010265233923033874, + "loss": 1.0855, + "step": 18165 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010260024226956202, + "loss": 1.0325, + "step": 18170 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010254814460256817, + "loss": 1.041, + "step": 18175 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010249604624350672, + "loss": 1.0165, + "step": 18180 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010244394720652748, + "loss": 0.9926, + "step": 18185 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001023918475057803, + "loss": 1.1287, + "step": 18190 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010233974715541535, + "loss": 1.0507, + "step": 18195 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010228764616958287, + "loss": 1.0089, + "step": 18200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001022355445624333, + "loss": 1.066, + "step": 18205 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010218344234811732, + "loss": 0.9528, + "step": 18210 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010213133954078564, + "loss": 1.079, + "step": 18215 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001020792361545893, + "loss": 1.0801, + "step": 18220 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001020271322036793, + "loss": 0.9493, + "step": 18225 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010197502770220701, + "loss": 1.0335, + "step": 18230 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010192292266432377, + "loss": 1.0173, + "step": 18235 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010187081710418116, + "loss": 1.0136, + "step": 18240 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010181871103593084, + "loss": 0.9877, + "step": 18245 + }, + { + "epoch": 0.54, + "learning_rate": 0.00010176660447372468, + "loss": 1.0047, + "step": 18250 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010171449743171466, + "loss": 1.0244, + "step": 18255 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010166238992405283, + "loss": 1.0421, + "step": 18260 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010161028196489147, + "loss": 1.01, + "step": 18265 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010155817356838288, + "loss": 1.0295, + "step": 18270 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010150606474867956, + "loss": 1.1261, + "step": 18275 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010145395551993408, + "loss": 1.0279, + "step": 18280 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010140184589629913, + "loss": 0.9927, + "step": 18285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010134973589192751, + "loss": 1.0194, + "step": 18290 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010129762552097213, + "loss": 1.0692, + "step": 18295 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010124551479758597, + "loss": 1.0795, + "step": 18300 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010119340373592214, + "loss": 1.0817, + "step": 18305 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001011412923501339, + "loss": 1.043, + "step": 18310 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001010891806543744, + "loss": 1.093, + "step": 18315 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010103706866279712, + "loss": 1.0912, + "step": 18320 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010098495638955538, + "loss": 1.0872, + "step": 18325 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010093284384880283, + "loss": 1.1468, + "step": 18330 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010088073105469297, + "loss": 0.9912, + "step": 18335 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010082861802137955, + "loss": 1.0617, + "step": 18340 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010077650476301619, + "loss": 1.0886, + "step": 18345 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010072439129375672, + "loss": 1.0772, + "step": 18350 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010067227762775501, + "loss": 1.0387, + "step": 18355 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010062016377916495, + "loss": 0.9528, + "step": 18360 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001005680497621405, + "loss": 1.0717, + "step": 18365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010051593559083557, + "loss": 1.1101, + "step": 18370 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001004638212794043, + "loss": 1.0997, + "step": 18375 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010041170684200067, + "loss": 1.039, + "step": 18380 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010035959229277889, + "loss": 1.1067, + "step": 18385 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010030747764589301, + "loss": 1.0728, + "step": 18390 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010025536291549724, + "loss": 1.0266, + "step": 18395 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010020324811574575, + "loss": 1.1265, + "step": 18400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010015113326079275, + "loss": 1.065, + "step": 18405 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010009901836479245, + "loss": 1.0613, + "step": 18410 + }, + { + "epoch": 0.55, + "learning_rate": 0.00010004690344189912, + "loss": 1.0724, + "step": 18415 + }, + { + "epoch": 0.55, + "learning_rate": 9.999478850626694e-05, + "loss": 1.0653, + "step": 18420 + }, + { + "epoch": 0.55, + "learning_rate": 9.99426735720502e-05, + "loss": 1.0401, + "step": 18425 + }, + { + "epoch": 0.55, + "learning_rate": 9.989055865340313e-05, + "loss": 1.0151, + "step": 18430 + }, + { + "epoch": 0.55, + "learning_rate": 9.983844376447994e-05, + "loss": 1.0327, + "step": 18435 + }, + { + "epoch": 0.55, + "learning_rate": 9.978632891943493e-05, + "loss": 1.0252, + "step": 18440 + }, + { + "epoch": 0.55, + "learning_rate": 9.973421413242225e-05, + "loss": 1.1135, + "step": 18445 + }, + { + "epoch": 0.55, + "learning_rate": 9.968209941759612e-05, + "loss": 1.079, + "step": 18450 + }, + { + "epoch": 0.55, + "learning_rate": 9.962998478911071e-05, + "loss": 1.0002, + "step": 18455 + }, + { + "epoch": 0.55, + "learning_rate": 9.95778702611202e-05, + "loss": 1.0053, + "step": 18460 + }, + { + "epoch": 0.55, + "learning_rate": 9.952575584777877e-05, + "loss": 1.0481, + "step": 18465 + }, + { + "epoch": 0.55, + "learning_rate": 9.947364156324044e-05, + "loss": 1.0076, + "step": 18470 + }, + { + "epoch": 0.55, + "learning_rate": 9.942152742165928e-05, + "loss": 1.0447, + "step": 18475 + }, + { + "epoch": 0.55, + "learning_rate": 9.936941343718937e-05, + "loss": 1.044, + "step": 18480 + }, + { + "epoch": 0.55, + "learning_rate": 9.931729962398468e-05, + "loss": 1.0084, + "step": 18485 + }, + { + "epoch": 0.55, + "learning_rate": 9.926518599619911e-05, + "loss": 0.9854, + "step": 18490 + }, + { + "epoch": 0.55, + "learning_rate": 9.921307256798655e-05, + "loss": 1.0553, + "step": 18495 + }, + { + "epoch": 0.55, + "learning_rate": 9.916095935350088e-05, + "loss": 1.0122, + "step": 18500 + }, + { + "epoch": 0.55, + "learning_rate": 9.910884636689585e-05, + "loss": 1.0318, + "step": 18505 + }, + { + "epoch": 0.55, + "learning_rate": 9.90567336223252e-05, + "loss": 1.0592, + "step": 18510 + }, + { + "epoch": 0.55, + "learning_rate": 9.900462113394246e-05, + "loss": 1.0216, + "step": 18515 + }, + { + "epoch": 0.55, + "learning_rate": 9.89525089159014e-05, + "loss": 0.9709, + "step": 18520 + }, + { + "epoch": 0.55, + "learning_rate": 9.890039698235539e-05, + "loss": 1.0353, + "step": 18525 + }, + { + "epoch": 0.55, + "learning_rate": 9.884828534745787e-05, + "loss": 1.0568, + "step": 18530 + }, + { + "epoch": 0.55, + "learning_rate": 9.87961740253622e-05, + "loss": 0.986, + "step": 18535 + }, + { + "epoch": 0.55, + "learning_rate": 9.87440630302217e-05, + "loss": 1.0718, + "step": 18540 + }, + { + "epoch": 0.55, + "learning_rate": 9.869195237618946e-05, + "loss": 0.9821, + "step": 18545 + }, + { + "epoch": 0.55, + "learning_rate": 9.863984207741859e-05, + "loss": 1.0511, + "step": 18550 + }, + { + "epoch": 0.55, + "learning_rate": 9.858773214806206e-05, + "loss": 1.0493, + "step": 18555 + }, + { + "epoch": 0.55, + "learning_rate": 9.853562260227279e-05, + "loss": 1.0913, + "step": 18560 + }, + { + "epoch": 0.55, + "learning_rate": 9.848351345420354e-05, + "loss": 1.1277, + "step": 18565 + }, + { + "epoch": 0.55, + "learning_rate": 9.843140471800696e-05, + "loss": 1.0604, + "step": 18570 + }, + { + "epoch": 0.55, + "learning_rate": 9.837929640783563e-05, + "loss": 1.0629, + "step": 18575 + }, + { + "epoch": 0.55, + "learning_rate": 9.832718853784196e-05, + "loss": 1.0326, + "step": 18580 + }, + { + "epoch": 0.55, + "learning_rate": 9.827508112217835e-05, + "loss": 1.0234, + "step": 18585 + }, + { + "epoch": 0.56, + "learning_rate": 9.822297417499693e-05, + "loss": 1.0812, + "step": 18590 + }, + { + "epoch": 0.56, + "learning_rate": 9.81708677104498e-05, + "loss": 1.0066, + "step": 18595 + }, + { + "epoch": 0.56, + "learning_rate": 9.811876174268888e-05, + "loss": 0.9738, + "step": 18600 + }, + { + "epoch": 0.56, + "learning_rate": 9.806665628586599e-05, + "loss": 0.9336, + "step": 18605 + }, + { + "epoch": 0.56, + "learning_rate": 9.801455135413284e-05, + "loss": 1.0476, + "step": 18610 + }, + { + "epoch": 0.56, + "learning_rate": 9.79624469616409e-05, + "loss": 0.9799, + "step": 18615 + }, + { + "epoch": 0.56, + "learning_rate": 9.791034312254153e-05, + "loss": 1.1205, + "step": 18620 + }, + { + "epoch": 0.56, + "learning_rate": 9.785823985098603e-05, + "loss": 1.0532, + "step": 18625 + }, + { + "epoch": 0.56, + "learning_rate": 9.780613716112545e-05, + "loss": 1.1407, + "step": 18630 + }, + { + "epoch": 0.56, + "learning_rate": 9.77540350671107e-05, + "loss": 1.0658, + "step": 18635 + }, + { + "epoch": 0.56, + "learning_rate": 9.770193358309248e-05, + "loss": 1.1047, + "step": 18640 + }, + { + "epoch": 0.56, + "learning_rate": 9.764983272322147e-05, + "loss": 1.0169, + "step": 18645 + }, + { + "epoch": 0.56, + "learning_rate": 9.759773250164805e-05, + "loss": 1.1041, + "step": 18650 + }, + { + "epoch": 0.56, + "learning_rate": 9.754563293252249e-05, + "loss": 1.1431, + "step": 18655 + }, + { + "epoch": 0.56, + "learning_rate": 9.749353402999476e-05, + "loss": 1.0489, + "step": 18660 + }, + { + "epoch": 0.56, + "learning_rate": 9.744143580821489e-05, + "loss": 1.0622, + "step": 18665 + }, + { + "epoch": 0.56, + "learning_rate": 9.738933828133251e-05, + "loss": 1.0615, + "step": 18670 + }, + { + "epoch": 0.56, + "learning_rate": 9.733724146349714e-05, + "loss": 1.0008, + "step": 18675 + }, + { + "epoch": 0.56, + "learning_rate": 9.728514536885808e-05, + "loss": 1.0968, + "step": 18680 + }, + { + "epoch": 0.56, + "learning_rate": 9.723305001156452e-05, + "loss": 0.9672, + "step": 18685 + }, + { + "epoch": 0.56, + "learning_rate": 9.718095540576531e-05, + "loss": 1.0255, + "step": 18690 + }, + { + "epoch": 0.56, + "learning_rate": 9.712886156560923e-05, + "loss": 1.0624, + "step": 18695 + }, + { + "epoch": 0.56, + "learning_rate": 9.707676850524473e-05, + "loss": 0.975, + "step": 18700 + }, + { + "epoch": 0.56, + "learning_rate": 9.702467623882017e-05, + "loss": 1.0515, + "step": 18705 + }, + { + "epoch": 0.56, + "learning_rate": 9.697258478048366e-05, + "loss": 1.016, + "step": 18710 + }, + { + "epoch": 0.56, + "learning_rate": 9.692049414438299e-05, + "loss": 1.0567, + "step": 18715 + }, + { + "epoch": 0.56, + "learning_rate": 9.68684043446658e-05, + "loss": 1.0485, + "step": 18720 + }, + { + "epoch": 0.56, + "learning_rate": 9.681631539547958e-05, + "loss": 1.1189, + "step": 18725 + }, + { + "epoch": 0.56, + "learning_rate": 9.676422731097149e-05, + "loss": 1.0126, + "step": 18730 + }, + { + "epoch": 0.56, + "learning_rate": 9.671214010528845e-05, + "loss": 1.1028, + "step": 18735 + }, + { + "epoch": 0.56, + "learning_rate": 9.666005379257715e-05, + "loss": 1.0622, + "step": 18740 + }, + { + "epoch": 0.56, + "learning_rate": 9.660796838698413e-05, + "loss": 1.1035, + "step": 18745 + }, + { + "epoch": 0.56, + "learning_rate": 9.655588390265558e-05, + "loss": 1.0244, + "step": 18750 + }, + { + "epoch": 0.56, + "learning_rate": 9.650380035373749e-05, + "loss": 1.0085, + "step": 18755 + }, + { + "epoch": 0.56, + "learning_rate": 9.645171775437548e-05, + "loss": 1.0342, + "step": 18760 + }, + { + "epoch": 0.56, + "learning_rate": 9.639963611871517e-05, + "loss": 1.0005, + "step": 18765 + }, + { + "epoch": 0.56, + "learning_rate": 9.634755546090164e-05, + "loss": 1.0919, + "step": 18770 + }, + { + "epoch": 0.56, + "learning_rate": 9.62954757950799e-05, + "loss": 1.0313, + "step": 18775 + }, + { + "epoch": 0.56, + "learning_rate": 9.624339713539454e-05, + "loss": 0.9648, + "step": 18780 + }, + { + "epoch": 0.56, + "learning_rate": 9.619131949598997e-05, + "loss": 1.0076, + "step": 18785 + }, + { + "epoch": 0.56, + "learning_rate": 9.613924289101033e-05, + "loss": 1.0311, + "step": 18790 + }, + { + "epoch": 0.56, + "learning_rate": 9.608716733459945e-05, + "loss": 1.0399, + "step": 18795 + }, + { + "epoch": 0.56, + "learning_rate": 9.603509284090089e-05, + "loss": 1.0373, + "step": 18800 + }, + { + "epoch": 0.56, + "learning_rate": 9.598301942405781e-05, + "loss": 1.0153, + "step": 18805 + }, + { + "epoch": 0.56, + "learning_rate": 9.59309470982133e-05, + "loss": 0.9998, + "step": 18810 + }, + { + "epoch": 0.56, + "learning_rate": 9.587887587750998e-05, + "loss": 1.0996, + "step": 18815 + }, + { + "epoch": 0.56, + "learning_rate": 9.582680577609022e-05, + "loss": 1.0624, + "step": 18820 + }, + { + "epoch": 0.56, + "learning_rate": 9.577473680809607e-05, + "loss": 1.0477, + "step": 18825 + }, + { + "epoch": 0.56, + "learning_rate": 9.572266898766934e-05, + "loss": 1.0601, + "step": 18830 + }, + { + "epoch": 0.56, + "learning_rate": 9.567060232895143e-05, + "loss": 1.062, + "step": 18835 + }, + { + "epoch": 0.56, + "learning_rate": 9.561853684608348e-05, + "loss": 1.0563, + "step": 18840 + }, + { + "epoch": 0.56, + "learning_rate": 9.556647255320627e-05, + "loss": 1.0382, + "step": 18845 + }, + { + "epoch": 0.56, + "learning_rate": 9.551440946446034e-05, + "loss": 1.0021, + "step": 18850 + }, + { + "epoch": 0.56, + "learning_rate": 9.546234759398586e-05, + "loss": 1.0535, + "step": 18855 + }, + { + "epoch": 0.56, + "learning_rate": 9.541028695592263e-05, + "loss": 1.0387, + "step": 18860 + }, + { + "epoch": 0.56, + "learning_rate": 9.535822756441011e-05, + "loss": 1.1016, + "step": 18865 + }, + { + "epoch": 0.56, + "learning_rate": 9.53061694335875e-05, + "loss": 1.0378, + "step": 18870 + }, + { + "epoch": 0.56, + "learning_rate": 9.525411257759367e-05, + "loss": 1.0364, + "step": 18875 + }, + { + "epoch": 0.56, + "learning_rate": 9.520205701056698e-05, + "loss": 1.05, + "step": 18880 + }, + { + "epoch": 0.56, + "learning_rate": 9.515000274664558e-05, + "loss": 1.1044, + "step": 18885 + }, + { + "epoch": 0.56, + "learning_rate": 9.509794979996727e-05, + "loss": 0.9341, + "step": 18890 + }, + { + "epoch": 0.56, + "learning_rate": 9.504589818466946e-05, + "loss": 1.0315, + "step": 18895 + }, + { + "epoch": 0.56, + "learning_rate": 9.499384791488918e-05, + "loss": 1.0559, + "step": 18900 + }, + { + "epoch": 0.56, + "learning_rate": 9.494179900476303e-05, + "loss": 1.0413, + "step": 18905 + }, + { + "epoch": 0.56, + "learning_rate": 9.488975146842746e-05, + "loss": 1.0278, + "step": 18910 + }, + { + "epoch": 0.56, + "learning_rate": 9.483770532001833e-05, + "loss": 1.062, + "step": 18915 + }, + { + "epoch": 0.56, + "learning_rate": 9.478566057367124e-05, + "loss": 1.0192, + "step": 18920 + }, + { + "epoch": 0.57, + "learning_rate": 9.473361724352126e-05, + "loss": 1.0762, + "step": 18925 + }, + { + "epoch": 0.57, + "learning_rate": 9.468157534370333e-05, + "loss": 1.0652, + "step": 18930 + }, + { + "epoch": 0.57, + "learning_rate": 9.462953488835177e-05, + "loss": 1.0606, + "step": 18935 + }, + { + "epoch": 0.57, + "learning_rate": 9.457749589160061e-05, + "loss": 1.0173, + "step": 18940 + }, + { + "epoch": 0.57, + "learning_rate": 9.452545836758347e-05, + "loss": 0.9727, + "step": 18945 + }, + { + "epoch": 0.57, + "learning_rate": 9.447342233043357e-05, + "loss": 1.0488, + "step": 18950 + }, + { + "epoch": 0.57, + "learning_rate": 9.442138779428376e-05, + "loss": 1.1852, + "step": 18955 + }, + { + "epoch": 0.57, + "learning_rate": 9.436935477326638e-05, + "loss": 1.0184, + "step": 18960 + }, + { + "epoch": 0.57, + "learning_rate": 9.431732328151344e-05, + "loss": 1.0133, + "step": 18965 + }, + { + "epoch": 0.57, + "learning_rate": 9.426529333315657e-05, + "loss": 1.065, + "step": 18970 + }, + { + "epoch": 0.57, + "learning_rate": 9.421326494232691e-05, + "loss": 1.0759, + "step": 18975 + }, + { + "epoch": 0.57, + "learning_rate": 9.416123812315515e-05, + "loss": 1.0336, + "step": 18980 + }, + { + "epoch": 0.57, + "learning_rate": 9.410921288977165e-05, + "loss": 0.9559, + "step": 18985 + }, + { + "epoch": 0.57, + "learning_rate": 9.405718925630625e-05, + "loss": 1.0605, + "step": 18990 + }, + { + "epoch": 0.57, + "learning_rate": 9.400516723688842e-05, + "loss": 1.0237, + "step": 18995 + }, + { + "epoch": 0.57, + "learning_rate": 9.395314684564721e-05, + "loss": 1.0342, + "step": 19000 + }, + { + "epoch": 0.57, + "learning_rate": 9.390112809671112e-05, + "loss": 1.0295, + "step": 19005 + }, + { + "epoch": 0.57, + "learning_rate": 9.384911100420826e-05, + "loss": 1.062, + "step": 19010 + }, + { + "epoch": 0.57, + "learning_rate": 9.379709558226634e-05, + "loss": 1.076, + "step": 19015 + }, + { + "epoch": 0.57, + "learning_rate": 9.374508184501261e-05, + "loss": 1.0548, + "step": 19020 + }, + { + "epoch": 0.57, + "learning_rate": 9.369306980657374e-05, + "loss": 1.0735, + "step": 19025 + }, + { + "epoch": 0.57, + "learning_rate": 9.364105948107604e-05, + "loss": 0.9875, + "step": 19030 + }, + { + "epoch": 0.57, + "learning_rate": 9.35890508826454e-05, + "loss": 1.0767, + "step": 19035 + }, + { + "epoch": 0.57, + "learning_rate": 9.353704402540715e-05, + "loss": 1.0084, + "step": 19040 + }, + { + "epoch": 0.57, + "learning_rate": 9.34850389234862e-05, + "loss": 1.0401, + "step": 19045 + }, + { + "epoch": 0.57, + "learning_rate": 9.343303559100688e-05, + "loss": 1.1108, + "step": 19050 + }, + { + "epoch": 0.57, + "learning_rate": 9.338103404209325e-05, + "loss": 1.0995, + "step": 19055 + }, + { + "epoch": 0.57, + "learning_rate": 9.332903429086865e-05, + "loss": 1.0767, + "step": 19060 + }, + { + "epoch": 0.57, + "learning_rate": 9.327703635145609e-05, + "loss": 1.0869, + "step": 19065 + }, + { + "epoch": 0.57, + "learning_rate": 9.3225040237978e-05, + "loss": 1.0413, + "step": 19070 + }, + { + "epoch": 0.57, + "learning_rate": 9.317304596455642e-05, + "loss": 1.086, + "step": 19075 + }, + { + "epoch": 0.57, + "learning_rate": 9.312105354531275e-05, + "loss": 1.0192, + "step": 19080 + }, + { + "epoch": 0.57, + "learning_rate": 9.3069062994368e-05, + "loss": 0.9762, + "step": 19085 + }, + { + "epoch": 0.57, + "learning_rate": 9.301707432584257e-05, + "loss": 0.9809, + "step": 19090 + }, + { + "epoch": 0.57, + "learning_rate": 9.296508755385649e-05, + "loss": 1.0015, + "step": 19095 + }, + { + "epoch": 0.57, + "learning_rate": 9.291310269252919e-05, + "loss": 1.1162, + "step": 19100 + }, + { + "epoch": 0.57, + "learning_rate": 9.286111975597951e-05, + "loss": 1.0403, + "step": 19105 + }, + { + "epoch": 0.57, + "learning_rate": 9.280913875832587e-05, + "loss": 0.9406, + "step": 19110 + }, + { + "epoch": 0.57, + "learning_rate": 9.275715971368618e-05, + "loss": 0.9841, + "step": 19115 + }, + { + "epoch": 0.57, + "learning_rate": 9.270518263617776e-05, + "loss": 1.0812, + "step": 19120 + }, + { + "epoch": 0.57, + "learning_rate": 9.265320753991738e-05, + "loss": 1.1473, + "step": 19125 + }, + { + "epoch": 0.57, + "learning_rate": 9.260123443902129e-05, + "loss": 1.0274, + "step": 19130 + }, + { + "epoch": 0.57, + "learning_rate": 9.254926334760526e-05, + "loss": 1.1219, + "step": 19135 + }, + { + "epoch": 0.57, + "learning_rate": 9.249729427978445e-05, + "loss": 0.9909, + "step": 19140 + }, + { + "epoch": 0.57, + "learning_rate": 9.24453272496735e-05, + "loss": 1.058, + "step": 19145 + }, + { + "epoch": 0.57, + "learning_rate": 9.23933622713864e-05, + "loss": 1.045, + "step": 19150 + }, + { + "epoch": 0.57, + "learning_rate": 9.234139935903678e-05, + "loss": 1.0022, + "step": 19155 + }, + { + "epoch": 0.57, + "learning_rate": 9.228943852673754e-05, + "loss": 1.1066, + "step": 19160 + }, + { + "epoch": 0.57, + "learning_rate": 9.223747978860108e-05, + "loss": 1.1173, + "step": 19165 + }, + { + "epoch": 0.57, + "learning_rate": 9.218552315873913e-05, + "loss": 1.068, + "step": 19170 + }, + { + "epoch": 0.57, + "learning_rate": 9.21335686512631e-05, + "loss": 0.9987, + "step": 19175 + }, + { + "epoch": 0.57, + "learning_rate": 9.208161628028355e-05, + "loss": 0.9943, + "step": 19180 + }, + { + "epoch": 0.57, + "learning_rate": 9.202966605991059e-05, + "loss": 1.0014, + "step": 19185 + }, + { + "epoch": 0.57, + "learning_rate": 9.197771800425377e-05, + "loss": 1.1158, + "step": 19190 + }, + { + "epoch": 0.57, + "learning_rate": 9.19257721274219e-05, + "loss": 1.0035, + "step": 19195 + }, + { + "epoch": 0.57, + "learning_rate": 9.187382844352345e-05, + "loss": 1.0211, + "step": 19200 + }, + { + "epoch": 0.57, + "learning_rate": 9.182188696666605e-05, + "loss": 1.0379, + "step": 19205 + }, + { + "epoch": 0.57, + "learning_rate": 9.176994771095687e-05, + "loss": 1.0895, + "step": 19210 + }, + { + "epoch": 0.57, + "learning_rate": 9.17180106905024e-05, + "loss": 1.0134, + "step": 19215 + }, + { + "epoch": 0.57, + "learning_rate": 9.166607591940864e-05, + "loss": 1.0929, + "step": 19220 + }, + { + "epoch": 0.57, + "learning_rate": 9.161414341178084e-05, + "loss": 1.0536, + "step": 19225 + }, + { + "epoch": 0.57, + "learning_rate": 9.156221318172371e-05, + "loss": 1.0845, + "step": 19230 + }, + { + "epoch": 0.57, + "learning_rate": 9.15102852433413e-05, + "loss": 1.0895, + "step": 19235 + }, + { + "epoch": 0.57, + "learning_rate": 9.145835961073711e-05, + "loss": 1.0186, + "step": 19240 + }, + { + "epoch": 0.57, + "learning_rate": 9.140643629801398e-05, + "loss": 0.9856, + "step": 19245 + }, + { + "epoch": 0.57, + "learning_rate": 9.135451531927403e-05, + "loss": 1.0144, + "step": 19250 + }, + { + "epoch": 0.57, + "learning_rate": 9.130259668861889e-05, + "loss": 0.9828, + "step": 19255 + }, + { + "epoch": 0.58, + "learning_rate": 9.125068042014946e-05, + "loss": 1.0741, + "step": 19260 + }, + { + "epoch": 0.58, + "learning_rate": 9.119876652796608e-05, + "loss": 1.1327, + "step": 19265 + }, + { + "epoch": 0.58, + "learning_rate": 9.11468550261683e-05, + "loss": 1.0403, + "step": 19270 + }, + { + "epoch": 0.58, + "learning_rate": 9.109494592885515e-05, + "loss": 1.0563, + "step": 19275 + }, + { + "epoch": 0.58, + "learning_rate": 9.104303925012499e-05, + "loss": 1.0044, + "step": 19280 + }, + { + "epoch": 0.58, + "learning_rate": 9.099113500407547e-05, + "loss": 1.0216, + "step": 19285 + }, + { + "epoch": 0.58, + "learning_rate": 9.093923320480366e-05, + "loss": 1.009, + "step": 19290 + }, + { + "epoch": 0.58, + "learning_rate": 9.088733386640582e-05, + "loss": 1.0848, + "step": 19295 + }, + { + "epoch": 0.58, + "learning_rate": 9.083543700297776e-05, + "loss": 1.0012, + "step": 19300 + }, + { + "epoch": 0.58, + "learning_rate": 9.07835426286144e-05, + "loss": 1.1211, + "step": 19305 + }, + { + "epoch": 0.58, + "learning_rate": 9.073165075741014e-05, + "loss": 1.0528, + "step": 19310 + }, + { + "epoch": 0.58, + "learning_rate": 9.067976140345854e-05, + "loss": 1.0084, + "step": 19315 + }, + { + "epoch": 0.58, + "learning_rate": 9.062787458085271e-05, + "loss": 0.985, + "step": 19320 + }, + { + "epoch": 0.58, + "learning_rate": 9.057599030368486e-05, + "loss": 1.0455, + "step": 19325 + }, + { + "epoch": 0.58, + "learning_rate": 9.052410858604658e-05, + "loss": 1.0717, + "step": 19330 + }, + { + "epoch": 0.58, + "learning_rate": 9.047222944202878e-05, + "loss": 1.0461, + "step": 19335 + }, + { + "epoch": 0.58, + "learning_rate": 9.042035288572171e-05, + "loss": 0.9991, + "step": 19340 + }, + { + "epoch": 0.58, + "learning_rate": 9.036847893121482e-05, + "loss": 0.9998, + "step": 19345 + }, + { + "epoch": 0.58, + "learning_rate": 9.031660759259693e-05, + "loss": 1.0555, + "step": 19350 + }, + { + "epoch": 0.58, + "learning_rate": 9.026473888395607e-05, + "loss": 1.0372, + "step": 19355 + }, + { + "epoch": 0.58, + "learning_rate": 9.021287281937967e-05, + "loss": 1.0222, + "step": 19360 + }, + { + "epoch": 0.58, + "learning_rate": 9.016100941295436e-05, + "loss": 1.0414, + "step": 19365 + }, + { + "epoch": 0.58, + "learning_rate": 9.010914867876608e-05, + "loss": 1.0488, + "step": 19370 + }, + { + "epoch": 0.58, + "learning_rate": 9.005729063089996e-05, + "loss": 1.0192, + "step": 19375 + }, + { + "epoch": 0.58, + "learning_rate": 9.000543528344057e-05, + "loss": 1.0481, + "step": 19380 + }, + { + "epoch": 0.58, + "learning_rate": 8.995358265047161e-05, + "loss": 1.0587, + "step": 19385 + }, + { + "epoch": 0.58, + "learning_rate": 8.99017327460761e-05, + "loss": 1.0652, + "step": 19390 + }, + { + "epoch": 0.58, + "learning_rate": 8.984988558433626e-05, + "loss": 1.1306, + "step": 19395 + }, + { + "epoch": 0.58, + "learning_rate": 8.979804117933361e-05, + "loss": 0.9957, + "step": 19400 + }, + { + "epoch": 0.58, + "learning_rate": 8.974619954514895e-05, + "loss": 1.0876, + "step": 19405 + }, + { + "epoch": 0.58, + "learning_rate": 8.96943606958623e-05, + "loss": 1.0128, + "step": 19410 + }, + { + "epoch": 0.58, + "learning_rate": 8.964252464555288e-05, + "loss": 1.1002, + "step": 19415 + }, + { + "epoch": 0.58, + "learning_rate": 8.95906914082992e-05, + "loss": 1.0111, + "step": 19420 + }, + { + "epoch": 0.58, + "learning_rate": 8.953886099817901e-05, + "loss": 0.999, + "step": 19425 + }, + { + "epoch": 0.58, + "learning_rate": 8.948703342926927e-05, + "loss": 1.0582, + "step": 19430 + }, + { + "epoch": 0.58, + "learning_rate": 8.943520871564619e-05, + "loss": 1.0095, + "step": 19435 + }, + { + "epoch": 0.58, + "learning_rate": 8.938338687138511e-05, + "loss": 1.0403, + "step": 19440 + }, + { + "epoch": 0.58, + "learning_rate": 8.933156791056081e-05, + "loss": 1.0413, + "step": 19445 + }, + { + "epoch": 0.58, + "learning_rate": 8.927975184724704e-05, + "loss": 1.1094, + "step": 19450 + }, + { + "epoch": 0.58, + "learning_rate": 8.922793869551691e-05, + "loss": 1.0715, + "step": 19455 + }, + { + "epoch": 0.58, + "learning_rate": 8.917612846944263e-05, + "loss": 1.0836, + "step": 19460 + }, + { + "epoch": 0.58, + "learning_rate": 8.912432118309582e-05, + "loss": 0.9936, + "step": 19465 + }, + { + "epoch": 0.58, + "learning_rate": 8.907251685054705e-05, + "loss": 1.0693, + "step": 19470 + }, + { + "epoch": 0.58, + "learning_rate": 8.902071548586624e-05, + "loss": 1.0952, + "step": 19475 + }, + { + "epoch": 0.58, + "learning_rate": 8.896891710312247e-05, + "loss": 1.0376, + "step": 19480 + }, + { + "epoch": 0.58, + "learning_rate": 8.891712171638402e-05, + "loss": 1.0159, + "step": 19485 + }, + { + "epoch": 0.58, + "learning_rate": 8.886532933971835e-05, + "loss": 1.0278, + "step": 19490 + }, + { + "epoch": 0.58, + "learning_rate": 8.881353998719206e-05, + "loss": 0.9362, + "step": 19495 + }, + { + "epoch": 0.58, + "learning_rate": 8.876175367287096e-05, + "loss": 1.0408, + "step": 19500 + }, + { + "epoch": 0.58, + "learning_rate": 8.870997041082008e-05, + "loss": 1.0393, + "step": 19505 + }, + { + "epoch": 0.58, + "learning_rate": 8.865819021510361e-05, + "loss": 1.021, + "step": 19510 + }, + { + "epoch": 0.58, + "learning_rate": 8.86064130997848e-05, + "loss": 0.9919, + "step": 19515 + }, + { + "epoch": 0.58, + "learning_rate": 8.855463907892615e-05, + "loss": 1.0068, + "step": 19520 + }, + { + "epoch": 0.58, + "learning_rate": 8.850286816658937e-05, + "loss": 1.0133, + "step": 19525 + }, + { + "epoch": 0.58, + "learning_rate": 8.845110037683523e-05, + "loss": 1.0431, + "step": 19530 + }, + { + "epoch": 0.58, + "learning_rate": 8.839933572372372e-05, + "loss": 0.9891, + "step": 19535 + }, + { + "epoch": 0.58, + "learning_rate": 8.834757422131385e-05, + "loss": 0.9849, + "step": 19540 + }, + { + "epoch": 0.58, + "learning_rate": 8.8295815883664e-05, + "loss": 1.0081, + "step": 19545 + }, + { + "epoch": 0.58, + "learning_rate": 8.824406072483149e-05, + "loss": 0.9854, + "step": 19550 + }, + { + "epoch": 0.58, + "learning_rate": 8.819230875887288e-05, + "loss": 1.0988, + "step": 19555 + }, + { + "epoch": 0.58, + "learning_rate": 8.814055999984374e-05, + "loss": 1.0118, + "step": 19560 + }, + { + "epoch": 0.58, + "learning_rate": 8.808881446179902e-05, + "loss": 1.0347, + "step": 19565 + }, + { + "epoch": 0.58, + "learning_rate": 8.803707215879252e-05, + "loss": 1.0522, + "step": 19570 + }, + { + "epoch": 0.58, + "learning_rate": 8.79853331048773e-05, + "loss": 1.0272, + "step": 19575 + }, + { + "epoch": 0.58, + "learning_rate": 8.79335973141055e-05, + "loss": 1.0951, + "step": 19580 + }, + { + "epoch": 0.58, + "learning_rate": 8.788186480052844e-05, + "loss": 1.0286, + "step": 19585 + }, + { + "epoch": 0.58, + "learning_rate": 8.783013557819647e-05, + "loss": 1.0635, + "step": 19590 + }, + { + "epoch": 0.59, + "learning_rate": 8.777840966115907e-05, + "loss": 1.0037, + "step": 19595 + }, + { + "epoch": 0.59, + "learning_rate": 8.77266870634648e-05, + "loss": 1.0106, + "step": 19600 + }, + { + "epoch": 0.59, + "learning_rate": 8.767496779916138e-05, + "loss": 1.0343, + "step": 19605 + }, + { + "epoch": 0.59, + "learning_rate": 8.76232518822956e-05, + "loss": 0.9862, + "step": 19610 + }, + { + "epoch": 0.59, + "learning_rate": 8.757153932691332e-05, + "loss": 1.1068, + "step": 19615 + }, + { + "epoch": 0.59, + "learning_rate": 8.751983014705948e-05, + "loss": 1.0674, + "step": 19620 + }, + { + "epoch": 0.59, + "learning_rate": 8.746812435677809e-05, + "loss": 0.9786, + "step": 19625 + }, + { + "epoch": 0.59, + "learning_rate": 8.741642197011235e-05, + "loss": 1.0812, + "step": 19630 + }, + { + "epoch": 0.59, + "learning_rate": 8.736472300110443e-05, + "loss": 1.0084, + "step": 19635 + }, + { + "epoch": 0.59, + "learning_rate": 8.731302746379557e-05, + "loss": 1.1247, + "step": 19640 + }, + { + "epoch": 0.59, + "learning_rate": 8.726133537222609e-05, + "loss": 1.0363, + "step": 19645 + }, + { + "epoch": 0.59, + "learning_rate": 8.720964674043546e-05, + "loss": 1.0824, + "step": 19650 + }, + { + "epoch": 0.59, + "learning_rate": 8.71579615824621e-05, + "loss": 0.9967, + "step": 19655 + }, + { + "epoch": 0.59, + "learning_rate": 8.710627991234351e-05, + "loss": 1.0766, + "step": 19660 + }, + { + "epoch": 0.59, + "learning_rate": 8.705460174411624e-05, + "loss": 1.0158, + "step": 19665 + }, + { + "epoch": 0.59, + "learning_rate": 8.700292709181599e-05, + "loss": 1.0214, + "step": 19670 + }, + { + "epoch": 0.59, + "learning_rate": 8.695125596947736e-05, + "loss": 1.0265, + "step": 19675 + }, + { + "epoch": 0.59, + "learning_rate": 8.68995883911341e-05, + "loss": 1.0964, + "step": 19680 + }, + { + "epoch": 0.59, + "learning_rate": 8.684792437081884e-05, + "loss": 1.0707, + "step": 19685 + }, + { + "epoch": 0.59, + "learning_rate": 8.679626392256352e-05, + "loss": 1.0274, + "step": 19690 + }, + { + "epoch": 0.59, + "learning_rate": 8.674460706039884e-05, + "loss": 1.023, + "step": 19695 + }, + { + "epoch": 0.59, + "learning_rate": 8.669295379835467e-05, + "loss": 1.0721, + "step": 19700 + }, + { + "epoch": 0.59, + "learning_rate": 8.664130415045978e-05, + "loss": 0.9963, + "step": 19705 + }, + { + "epoch": 0.59, + "learning_rate": 8.658965813074219e-05, + "loss": 1.0424, + "step": 19710 + }, + { + "epoch": 0.59, + "learning_rate": 8.653801575322867e-05, + "loss": 1.0207, + "step": 19715 + }, + { + "epoch": 0.59, + "learning_rate": 8.648637703194516e-05, + "loss": 1.0594, + "step": 19720 + }, + { + "epoch": 0.59, + "learning_rate": 8.643474198091653e-05, + "loss": 0.9973, + "step": 19725 + }, + { + "epoch": 0.59, + "learning_rate": 8.638311061416675e-05, + "loss": 0.9855, + "step": 19730 + }, + { + "epoch": 0.59, + "learning_rate": 8.63314829457187e-05, + "loss": 1.0401, + "step": 19735 + }, + { + "epoch": 0.59, + "learning_rate": 8.627985898959426e-05, + "loss": 1.0647, + "step": 19740 + }, + { + "epoch": 0.59, + "learning_rate": 8.622823875981433e-05, + "loss": 1.072, + "step": 19745 + }, + { + "epoch": 0.59, + "learning_rate": 8.61766222703988e-05, + "loss": 0.9667, + "step": 19750 + }, + { + "epoch": 0.59, + "learning_rate": 8.612500953536659e-05, + "loss": 1.0564, + "step": 19755 + }, + { + "epoch": 0.59, + "learning_rate": 8.607340056873545e-05, + "loss": 1.043, + "step": 19760 + }, + { + "epoch": 0.59, + "learning_rate": 8.602179538452222e-05, + "loss": 1.0367, + "step": 19765 + }, + { + "epoch": 0.59, + "learning_rate": 8.597019399674277e-05, + "loss": 1.0277, + "step": 19770 + }, + { + "epoch": 0.59, + "learning_rate": 8.591859641941181e-05, + "loss": 0.9827, + "step": 19775 + }, + { + "epoch": 0.59, + "learning_rate": 8.586700266654309e-05, + "loss": 1.1811, + "step": 19780 + }, + { + "epoch": 0.59, + "learning_rate": 8.581541275214926e-05, + "loss": 1.0014, + "step": 19785 + }, + { + "epoch": 0.59, + "learning_rate": 8.576382669024202e-05, + "loss": 1.0769, + "step": 19790 + }, + { + "epoch": 0.59, + "learning_rate": 8.571224449483194e-05, + "loss": 0.9879, + "step": 19795 + }, + { + "epoch": 0.59, + "learning_rate": 8.566066617992861e-05, + "loss": 1.0132, + "step": 19800 + }, + { + "epoch": 0.59, + "learning_rate": 8.560909175954048e-05, + "loss": 1.0454, + "step": 19805 + }, + { + "epoch": 0.59, + "learning_rate": 8.555752124767498e-05, + "loss": 1.0743, + "step": 19810 + }, + { + "epoch": 0.59, + "learning_rate": 8.550595465833855e-05, + "loss": 1.0543, + "step": 19815 + }, + { + "epoch": 0.59, + "learning_rate": 8.545439200553647e-05, + "loss": 1.0509, + "step": 19820 + }, + { + "epoch": 0.59, + "learning_rate": 8.540283330327302e-05, + "loss": 1.0275, + "step": 19825 + }, + { + "epoch": 0.59, + "learning_rate": 8.535127856555126e-05, + "loss": 1.0079, + "step": 19830 + }, + { + "epoch": 0.59, + "learning_rate": 8.529972780637345e-05, + "loss": 1.007, + "step": 19835 + }, + { + "epoch": 0.59, + "learning_rate": 8.524818103974047e-05, + "loss": 0.9611, + "step": 19840 + }, + { + "epoch": 0.59, + "learning_rate": 8.519663827965235e-05, + "loss": 1.0132, + "step": 19845 + }, + { + "epoch": 0.59, + "learning_rate": 8.514509954010779e-05, + "loss": 1.0575, + "step": 19850 + }, + { + "epoch": 0.59, + "learning_rate": 8.509356483510471e-05, + "loss": 0.9422, + "step": 19855 + }, + { + "epoch": 0.59, + "learning_rate": 8.504203417863964e-05, + "loss": 0.997, + "step": 19860 + }, + { + "epoch": 0.59, + "learning_rate": 8.499050758470822e-05, + "loss": 1.1067, + "step": 19865 + }, + { + "epoch": 0.59, + "learning_rate": 8.49389850673048e-05, + "loss": 1.0309, + "step": 19870 + }, + { + "epoch": 0.59, + "learning_rate": 8.488746664042282e-05, + "loss": 1.0246, + "step": 19875 + }, + { + "epoch": 0.59, + "learning_rate": 8.48359523180545e-05, + "loss": 0.9817, + "step": 19880 + }, + { + "epoch": 0.59, + "learning_rate": 8.47844421141909e-05, + "loss": 1.0363, + "step": 19885 + }, + { + "epoch": 0.59, + "learning_rate": 8.473293604282204e-05, + "loss": 1.0398, + "step": 19890 + }, + { + "epoch": 0.59, + "learning_rate": 8.468143411793683e-05, + "loss": 0.9841, + "step": 19895 + }, + { + "epoch": 0.59, + "learning_rate": 8.462993635352303e-05, + "loss": 1.0846, + "step": 19900 + }, + { + "epoch": 0.59, + "learning_rate": 8.457844276356721e-05, + "loss": 1.0432, + "step": 19905 + }, + { + "epoch": 0.59, + "learning_rate": 8.452695336205485e-05, + "loss": 1.0889, + "step": 19910 + }, + { + "epoch": 0.59, + "learning_rate": 8.447546816297037e-05, + "loss": 0.9778, + "step": 19915 + }, + { + "epoch": 0.59, + "learning_rate": 8.442398718029691e-05, + "loss": 1.098, + "step": 19920 + }, + { + "epoch": 0.59, + "learning_rate": 8.43725104280166e-05, + "loss": 1.0139, + "step": 19925 + }, + { + "epoch": 0.6, + "learning_rate": 8.432103792011027e-05, + "loss": 1.1115, + "step": 19930 + }, + { + "epoch": 0.6, + "learning_rate": 8.426956967055774e-05, + "loss": 1.0365, + "step": 19935 + }, + { + "epoch": 0.6, + "learning_rate": 8.421810569333761e-05, + "loss": 1.0237, + "step": 19940 + }, + { + "epoch": 0.6, + "learning_rate": 8.416664600242733e-05, + "loss": 1.025, + "step": 19945 + }, + { + "epoch": 0.6, + "learning_rate": 8.41151906118031e-05, + "loss": 1.0474, + "step": 19950 + }, + { + "epoch": 0.6, + "learning_rate": 8.406373953544018e-05, + "loss": 0.9769, + "step": 19955 + }, + { + "epoch": 0.6, + "learning_rate": 8.40122927873124e-05, + "loss": 1.0006, + "step": 19960 + }, + { + "epoch": 0.6, + "learning_rate": 8.396085038139254e-05, + "loss": 1.0814, + "step": 19965 + }, + { + "epoch": 0.6, + "learning_rate": 8.390941233165219e-05, + "loss": 1.0761, + "step": 19970 + }, + { + "epoch": 0.6, + "learning_rate": 8.385797865206178e-05, + "loss": 1.0671, + "step": 19975 + }, + { + "epoch": 0.6, + "learning_rate": 8.380654935659054e-05, + "loss": 1.0598, + "step": 19980 + }, + { + "epoch": 0.6, + "learning_rate": 8.375512445920644e-05, + "loss": 1.0675, + "step": 19985 + }, + { + "epoch": 0.6, + "learning_rate": 8.37037039738763e-05, + "loss": 1.0792, + "step": 19990 + }, + { + "epoch": 0.6, + "learning_rate": 8.365228791456582e-05, + "loss": 0.9376, + "step": 19995 + }, + { + "epoch": 0.6, + "learning_rate": 8.360087629523941e-05, + "loss": 1.0024, + "step": 20000 + }, + { + "epoch": 0.6, + "learning_rate": 8.354946912986027e-05, + "loss": 1.0184, + "step": 20005 + }, + { + "epoch": 0.6, + "learning_rate": 8.349806643239042e-05, + "loss": 1.0091, + "step": 20010 + }, + { + "epoch": 0.6, + "learning_rate": 8.344666821679064e-05, + "loss": 0.9846, + "step": 20015 + }, + { + "epoch": 0.6, + "learning_rate": 8.339527449702057e-05, + "loss": 1.0314, + "step": 20020 + }, + { + "epoch": 0.6, + "learning_rate": 8.334388528703856e-05, + "loss": 1.038, + "step": 20025 + }, + { + "epoch": 0.6, + "learning_rate": 8.329250060080171e-05, + "loss": 0.9777, + "step": 20030 + }, + { + "epoch": 0.6, + "learning_rate": 8.324112045226593e-05, + "loss": 1.0191, + "step": 20035 + }, + { + "epoch": 0.6, + "learning_rate": 8.318974485538594e-05, + "loss": 1.0784, + "step": 20040 + }, + { + "epoch": 0.6, + "learning_rate": 8.313837382411518e-05, + "loss": 1.0337, + "step": 20045 + }, + { + "epoch": 0.6, + "learning_rate": 8.308700737240581e-05, + "loss": 1.0218, + "step": 20050 + }, + { + "epoch": 0.6, + "learning_rate": 8.303564551420878e-05, + "loss": 1.0479, + "step": 20055 + }, + { + "epoch": 0.6, + "learning_rate": 8.298428826347385e-05, + "loss": 1.039, + "step": 20060 + }, + { + "epoch": 0.6, + "learning_rate": 8.293293563414945e-05, + "loss": 1.0485, + "step": 20065 + }, + { + "epoch": 0.6, + "learning_rate": 8.28815876401828e-05, + "loss": 0.9933, + "step": 20070 + }, + { + "epoch": 0.6, + "learning_rate": 8.28302442955198e-05, + "loss": 1.0571, + "step": 20075 + }, + { + "epoch": 0.6, + "learning_rate": 8.277890561410518e-05, + "loss": 1.0464, + "step": 20080 + }, + { + "epoch": 0.6, + "learning_rate": 8.272757160988232e-05, + "loss": 1.0745, + "step": 20085 + }, + { + "epoch": 0.6, + "learning_rate": 8.267624229679342e-05, + "loss": 1.0487, + "step": 20090 + }, + { + "epoch": 0.6, + "learning_rate": 8.262491768877923e-05, + "loss": 1.0406, + "step": 20095 + }, + { + "epoch": 0.6, + "learning_rate": 8.257359779977948e-05, + "loss": 1.0946, + "step": 20100 + }, + { + "epoch": 0.6, + "learning_rate": 8.252228264373243e-05, + "loss": 1.1052, + "step": 20105 + }, + { + "epoch": 0.6, + "learning_rate": 8.247097223457509e-05, + "loss": 1.0344, + "step": 20110 + }, + { + "epoch": 0.6, + "learning_rate": 8.241966658624316e-05, + "loss": 0.989, + "step": 20115 + }, + { + "epoch": 0.6, + "learning_rate": 8.236836571267117e-05, + "loss": 0.9706, + "step": 20120 + }, + { + "epoch": 0.6, + "learning_rate": 8.231706962779222e-05, + "loss": 0.9942, + "step": 20125 + }, + { + "epoch": 0.6, + "learning_rate": 8.226577834553817e-05, + "loss": 1.0648, + "step": 20130 + }, + { + "epoch": 0.6, + "learning_rate": 8.221449187983951e-05, + "loss": 0.9843, + "step": 20135 + }, + { + "epoch": 0.6, + "learning_rate": 8.216321024462553e-05, + "loss": 0.9998, + "step": 20140 + }, + { + "epoch": 0.6, + "learning_rate": 8.211193345382417e-05, + "loss": 1.0423, + "step": 20145 + }, + { + "epoch": 0.6, + "learning_rate": 8.206066152136196e-05, + "loss": 1.1048, + "step": 20150 + }, + { + "epoch": 0.6, + "learning_rate": 8.20093944611642e-05, + "loss": 1.0661, + "step": 20155 + }, + { + "epoch": 0.6, + "learning_rate": 8.195813228715492e-05, + "loss": 1.0182, + "step": 20160 + }, + { + "epoch": 0.6, + "learning_rate": 8.19068750132567e-05, + "loss": 1.0461, + "step": 20165 + }, + { + "epoch": 0.6, + "learning_rate": 8.185562265339088e-05, + "loss": 0.9456, + "step": 20170 + }, + { + "epoch": 0.6, + "learning_rate": 8.180437522147736e-05, + "loss": 1.1003, + "step": 20175 + }, + { + "epoch": 0.6, + "learning_rate": 8.175313273143484e-05, + "loss": 1.0519, + "step": 20180 + }, + { + "epoch": 0.6, + "learning_rate": 8.17018951971806e-05, + "loss": 1.0309, + "step": 20185 + }, + { + "epoch": 0.6, + "learning_rate": 8.165066263263058e-05, + "loss": 1.0265, + "step": 20190 + }, + { + "epoch": 0.6, + "learning_rate": 8.159943505169929e-05, + "loss": 1.1492, + "step": 20195 + }, + { + "epoch": 0.6, + "learning_rate": 8.154821246830013e-05, + "loss": 1.0117, + "step": 20200 + }, + { + "epoch": 0.6, + "learning_rate": 8.149699489634485e-05, + "loss": 1.0529, + "step": 20205 + }, + { + "epoch": 0.6, + "learning_rate": 8.144578234974405e-05, + "loss": 1.0871, + "step": 20210 + }, + { + "epoch": 0.6, + "learning_rate": 8.139457484240687e-05, + "loss": 1.0707, + "step": 20215 + }, + { + "epoch": 0.6, + "learning_rate": 8.1343372388241e-05, + "loss": 1.0291, + "step": 20220 + }, + { + "epoch": 0.6, + "learning_rate": 8.129217500115302e-05, + "loss": 1.0278, + "step": 20225 + }, + { + "epoch": 0.6, + "learning_rate": 8.124098269504787e-05, + "loss": 1.0559, + "step": 20230 + }, + { + "epoch": 0.6, + "learning_rate": 8.118979548382924e-05, + "loss": 1.0535, + "step": 20235 + }, + { + "epoch": 0.6, + "learning_rate": 8.113861338139934e-05, + "loss": 1.119, + "step": 20240 + }, + { + "epoch": 0.6, + "learning_rate": 8.108743640165917e-05, + "loss": 1.1151, + "step": 20245 + }, + { + "epoch": 0.6, + "learning_rate": 8.103626455850815e-05, + "loss": 1.0692, + "step": 20250 + }, + { + "epoch": 0.6, + "learning_rate": 8.09850978658444e-05, + "loss": 1.0262, + "step": 20255 + }, + { + "epoch": 0.6, + "learning_rate": 8.093393633756459e-05, + "loss": 1.0921, + "step": 20260 + }, + { + "epoch": 0.61, + "learning_rate": 8.088277998756411e-05, + "loss": 1.0789, + "step": 20265 + }, + { + "epoch": 0.61, + "learning_rate": 8.083162882973679e-05, + "loss": 1.0858, + "step": 20270 + }, + { + "epoch": 0.61, + "learning_rate": 8.078048287797512e-05, + "loss": 1.0099, + "step": 20275 + }, + { + "epoch": 0.61, + "learning_rate": 8.072934214617014e-05, + "loss": 0.9959, + "step": 20280 + }, + { + "epoch": 0.61, + "learning_rate": 8.067820664821155e-05, + "loss": 1.0503, + "step": 20285 + }, + { + "epoch": 0.61, + "learning_rate": 8.06270763979876e-05, + "loss": 1.0409, + "step": 20290 + }, + { + "epoch": 0.61, + "learning_rate": 8.057595140938502e-05, + "loss": 1.0584, + "step": 20295 + }, + { + "epoch": 0.61, + "learning_rate": 8.052483169628923e-05, + "loss": 1.049, + "step": 20300 + }, + { + "epoch": 0.61, + "learning_rate": 8.04737172725842e-05, + "loss": 1.0352, + "step": 20305 + }, + { + "epoch": 0.61, + "learning_rate": 8.042260815215237e-05, + "loss": 1.0053, + "step": 20310 + }, + { + "epoch": 0.61, + "learning_rate": 8.03715043488749e-05, + "loss": 1.1098, + "step": 20315 + }, + { + "epoch": 0.61, + "learning_rate": 8.032040587663129e-05, + "loss": 0.9724, + "step": 20320 + }, + { + "epoch": 0.61, + "learning_rate": 8.02693127492998e-05, + "loss": 1.075, + "step": 20325 + }, + { + "epoch": 0.61, + "learning_rate": 8.021822498075715e-05, + "loss": 0.9909, + "step": 20330 + }, + { + "epoch": 0.61, + "learning_rate": 8.01671425848786e-05, + "loss": 1.0475, + "step": 20335 + }, + { + "epoch": 0.61, + "learning_rate": 8.01160655755379e-05, + "loss": 1.0481, + "step": 20340 + }, + { + "epoch": 0.61, + "learning_rate": 8.00649939666075e-05, + "loss": 0.9658, + "step": 20345 + }, + { + "epoch": 0.61, + "learning_rate": 8.001392777195821e-05, + "loss": 1.0676, + "step": 20350 + }, + { + "epoch": 0.61, + "learning_rate": 7.996286700545944e-05, + "loss": 1.0189, + "step": 20355 + }, + { + "epoch": 0.61, + "learning_rate": 7.991181168097912e-05, + "loss": 1.0566, + "step": 20360 + }, + { + "epoch": 0.61, + "learning_rate": 7.986076181238374e-05, + "loss": 1.0728, + "step": 20365 + }, + { + "epoch": 0.61, + "learning_rate": 7.980971741353828e-05, + "loss": 1.1276, + "step": 20370 + }, + { + "epoch": 0.61, + "learning_rate": 7.975867849830618e-05, + "loss": 0.9845, + "step": 20375 + }, + { + "epoch": 0.61, + "learning_rate": 7.970764508054944e-05, + "loss": 1.0863, + "step": 20380 + }, + { + "epoch": 0.61, + "learning_rate": 7.965661717412861e-05, + "loss": 1.0802, + "step": 20385 + }, + { + "epoch": 0.61, + "learning_rate": 7.960559479290271e-05, + "loss": 1.0586, + "step": 20390 + }, + { + "epoch": 0.61, + "learning_rate": 7.955457795072918e-05, + "loss": 0.9488, + "step": 20395 + }, + { + "epoch": 0.61, + "learning_rate": 7.950356666146404e-05, + "loss": 1.0166, + "step": 20400 + }, + { + "epoch": 0.61, + "learning_rate": 7.945256093896182e-05, + "loss": 1.0272, + "step": 20405 + }, + { + "epoch": 0.61, + "learning_rate": 7.940156079707552e-05, + "loss": 1.0289, + "step": 20410 + }, + { + "epoch": 0.61, + "learning_rate": 7.935056624965658e-05, + "loss": 1.2075, + "step": 20415 + }, + { + "epoch": 0.61, + "learning_rate": 7.929957731055491e-05, + "loss": 1.1268, + "step": 20420 + }, + { + "epoch": 0.61, + "learning_rate": 7.924859399361898e-05, + "loss": 1.0521, + "step": 20425 + }, + { + "epoch": 0.61, + "learning_rate": 7.919761631269568e-05, + "loss": 1.0836, + "step": 20430 + }, + { + "epoch": 0.61, + "learning_rate": 7.914664428163044e-05, + "loss": 1.0285, + "step": 20435 + }, + { + "epoch": 0.61, + "learning_rate": 7.909567791426697e-05, + "loss": 0.9632, + "step": 20440 + }, + { + "epoch": 0.61, + "learning_rate": 7.904471722444764e-05, + "loss": 1.0638, + "step": 20445 + }, + { + "epoch": 0.61, + "learning_rate": 7.899376222601319e-05, + "loss": 1.115, + "step": 20450 + }, + { + "epoch": 0.61, + "learning_rate": 7.894281293280284e-05, + "loss": 1.0408, + "step": 20455 + }, + { + "epoch": 0.61, + "learning_rate": 7.889186935865425e-05, + "loss": 0.9961, + "step": 20460 + }, + { + "epoch": 0.61, + "learning_rate": 7.884093151740348e-05, + "loss": 1.0027, + "step": 20465 + }, + { + "epoch": 0.61, + "learning_rate": 7.878999942288514e-05, + "loss": 1.063, + "step": 20470 + }, + { + "epoch": 0.61, + "learning_rate": 7.873907308893218e-05, + "loss": 1.0017, + "step": 20475 + }, + { + "epoch": 0.61, + "learning_rate": 7.868815252937604e-05, + "loss": 1.1122, + "step": 20480 + }, + { + "epoch": 0.61, + "learning_rate": 7.863723775804651e-05, + "loss": 1.034, + "step": 20485 + }, + { + "epoch": 0.61, + "learning_rate": 7.858632878877199e-05, + "loss": 1.0552, + "step": 20490 + }, + { + "epoch": 0.61, + "learning_rate": 7.85354256353791e-05, + "loss": 1.0095, + "step": 20495 + }, + { + "epoch": 0.61, + "learning_rate": 7.848452831169298e-05, + "loss": 1.0155, + "step": 20500 + }, + { + "epoch": 0.61, + "learning_rate": 7.843363683153717e-05, + "loss": 1.071, + "step": 20505 + }, + { + "epoch": 0.61, + "learning_rate": 7.838275120873367e-05, + "loss": 0.9675, + "step": 20510 + }, + { + "epoch": 0.61, + "learning_rate": 7.833187145710282e-05, + "loss": 1.0667, + "step": 20515 + }, + { + "epoch": 0.61, + "learning_rate": 7.828099759046336e-05, + "loss": 1.0239, + "step": 20520 + }, + { + "epoch": 0.61, + "learning_rate": 7.823012962263248e-05, + "loss": 0.9948, + "step": 20525 + }, + { + "epoch": 0.61, + "learning_rate": 7.817926756742576e-05, + "loss": 1.0068, + "step": 20530 + }, + { + "epoch": 0.61, + "learning_rate": 7.812841143865719e-05, + "loss": 1.0498, + "step": 20535 + }, + { + "epoch": 0.61, + "learning_rate": 7.807756125013907e-05, + "loss": 1.0642, + "step": 20540 + }, + { + "epoch": 0.61, + "learning_rate": 7.802671701568215e-05, + "loss": 1.0512, + "step": 20545 + }, + { + "epoch": 0.61, + "learning_rate": 7.797587874909561e-05, + "loss": 1.0076, + "step": 20550 + }, + { + "epoch": 0.61, + "learning_rate": 7.79250464641869e-05, + "loss": 1.0092, + "step": 20555 + }, + { + "epoch": 0.61, + "learning_rate": 7.787422017476195e-05, + "loss": 1.0627, + "step": 20560 + }, + { + "epoch": 0.61, + "learning_rate": 7.782339989462493e-05, + "loss": 0.9308, + "step": 20565 + }, + { + "epoch": 0.61, + "learning_rate": 7.777258563757851e-05, + "loss": 0.9622, + "step": 20570 + }, + { + "epoch": 0.61, + "learning_rate": 7.772177741742369e-05, + "loss": 1.0578, + "step": 20575 + }, + { + "epoch": 0.61, + "learning_rate": 7.767097524795979e-05, + "loss": 1.0613, + "step": 20580 + }, + { + "epoch": 0.61, + "learning_rate": 7.762017914298444e-05, + "loss": 0.9722, + "step": 20585 + }, + { + "epoch": 0.61, + "learning_rate": 7.756938911629385e-05, + "loss": 0.9329, + "step": 20590 + }, + { + "epoch": 0.61, + "learning_rate": 7.751860518168229e-05, + "loss": 1.0925, + "step": 20595 + }, + { + "epoch": 0.62, + "learning_rate": 7.746782735294257e-05, + "loss": 1.064, + "step": 20600 + }, + { + "epoch": 0.62, + "learning_rate": 7.741705564386572e-05, + "loss": 1.0534, + "step": 20605 + }, + { + "epoch": 0.62, + "learning_rate": 7.736629006824127e-05, + "loss": 1.0442, + "step": 20610 + }, + { + "epoch": 0.62, + "learning_rate": 7.731553063985688e-05, + "loss": 1.0477, + "step": 20615 + }, + { + "epoch": 0.62, + "learning_rate": 7.726477737249869e-05, + "loss": 1.0563, + "step": 20620 + }, + { + "epoch": 0.62, + "learning_rate": 7.721403027995112e-05, + "loss": 1.014, + "step": 20625 + }, + { + "epoch": 0.62, + "learning_rate": 7.716328937599684e-05, + "loss": 1.0804, + "step": 20630 + }, + { + "epoch": 0.62, + "learning_rate": 7.711255467441702e-05, + "loss": 0.9797, + "step": 20635 + }, + { + "epoch": 0.62, + "learning_rate": 7.706182618899099e-05, + "loss": 1.0717, + "step": 20640 + }, + { + "epoch": 0.62, + "learning_rate": 7.701110393349641e-05, + "loss": 0.9689, + "step": 20645 + }, + { + "epoch": 0.62, + "learning_rate": 7.696038792170928e-05, + "loss": 1.0407, + "step": 20650 + }, + { + "epoch": 0.62, + "learning_rate": 7.690967816740394e-05, + "loss": 1.0465, + "step": 20655 + }, + { + "epoch": 0.62, + "learning_rate": 7.685897468435299e-05, + "loss": 1.0416, + "step": 20660 + }, + { + "epoch": 0.62, + "learning_rate": 7.680827748632728e-05, + "loss": 1.019, + "step": 20665 + }, + { + "epoch": 0.62, + "learning_rate": 7.675758658709599e-05, + "loss": 1.0309, + "step": 20670 + }, + { + "epoch": 0.62, + "learning_rate": 7.670690200042667e-05, + "loss": 1.071, + "step": 20675 + }, + { + "epoch": 0.62, + "learning_rate": 7.665622374008506e-05, + "loss": 1.1, + "step": 20680 + }, + { + "epoch": 0.62, + "learning_rate": 7.660555181983518e-05, + "loss": 1.0473, + "step": 20685 + }, + { + "epoch": 0.62, + "learning_rate": 7.655488625343934e-05, + "loss": 1.0667, + "step": 20690 + }, + { + "epoch": 0.62, + "learning_rate": 7.650422705465818e-05, + "loss": 1.0735, + "step": 20695 + }, + { + "epoch": 0.62, + "learning_rate": 7.645357423725057e-05, + "loss": 1.0172, + "step": 20700 + }, + { + "epoch": 0.62, + "learning_rate": 7.640292781497365e-05, + "loss": 1.059, + "step": 20705 + }, + { + "epoch": 0.62, + "learning_rate": 7.635228780158274e-05, + "loss": 0.9599, + "step": 20710 + }, + { + "epoch": 0.62, + "learning_rate": 7.630165421083161e-05, + "loss": 1.0773, + "step": 20715 + }, + { + "epoch": 0.62, + "learning_rate": 7.625102705647212e-05, + "loss": 0.9966, + "step": 20720 + }, + { + "epoch": 0.62, + "learning_rate": 7.620040635225444e-05, + "loss": 0.9644, + "step": 20725 + }, + { + "epoch": 0.62, + "learning_rate": 7.614979211192693e-05, + "loss": 1.0969, + "step": 20730 + }, + { + "epoch": 0.62, + "learning_rate": 7.609918434923637e-05, + "loss": 1.0133, + "step": 20735 + }, + { + "epoch": 0.62, + "learning_rate": 7.604858307792758e-05, + "loss": 1.1019, + "step": 20740 + }, + { + "epoch": 0.62, + "learning_rate": 7.599798831174371e-05, + "loss": 1.0323, + "step": 20745 + }, + { + "epoch": 0.62, + "learning_rate": 7.59474000644261e-05, + "loss": 1.0687, + "step": 20750 + }, + { + "epoch": 0.62, + "learning_rate": 7.589681834971443e-05, + "loss": 1.0403, + "step": 20755 + }, + { + "epoch": 0.62, + "learning_rate": 7.584624318134643e-05, + "loss": 1.0164, + "step": 20760 + }, + { + "epoch": 0.62, + "learning_rate": 7.579567457305821e-05, + "loss": 1.0435, + "step": 20765 + }, + { + "epoch": 0.62, + "learning_rate": 7.574511253858399e-05, + "loss": 1.0278, + "step": 20770 + }, + { + "epoch": 0.62, + "learning_rate": 7.569455709165629e-05, + "loss": 0.972, + "step": 20775 + }, + { + "epoch": 0.62, + "learning_rate": 7.564400824600583e-05, + "loss": 1.0591, + "step": 20780 + }, + { + "epoch": 0.62, + "learning_rate": 7.559346601536142e-05, + "loss": 1.0275, + "step": 20785 + }, + { + "epoch": 0.62, + "learning_rate": 7.55429304134502e-05, + "loss": 1.1189, + "step": 20790 + }, + { + "epoch": 0.62, + "learning_rate": 7.549240145399749e-05, + "loss": 1.0474, + "step": 20795 + }, + { + "epoch": 0.62, + "learning_rate": 7.544187915072676e-05, + "loss": 1.0348, + "step": 20800 + }, + { + "epoch": 0.62, + "learning_rate": 7.539136351735975e-05, + "loss": 1.1389, + "step": 20805 + }, + { + "epoch": 0.62, + "learning_rate": 7.534085456761625e-05, + "loss": 1.0451, + "step": 20810 + }, + { + "epoch": 0.62, + "learning_rate": 7.52903523152144e-05, + "loss": 1.095, + "step": 20815 + }, + { + "epoch": 0.62, + "learning_rate": 7.52398567738704e-05, + "loss": 1.0086, + "step": 20820 + }, + { + "epoch": 0.62, + "learning_rate": 7.518936795729872e-05, + "loss": 1.0101, + "step": 20825 + }, + { + "epoch": 0.62, + "learning_rate": 7.513888587921189e-05, + "loss": 1.0322, + "step": 20830 + }, + { + "epoch": 0.62, + "learning_rate": 7.508841055332069e-05, + "loss": 0.9902, + "step": 20835 + }, + { + "epoch": 0.62, + "learning_rate": 7.503794199333408e-05, + "loss": 1.0357, + "step": 20840 + }, + { + "epoch": 0.62, + "learning_rate": 7.498748021295914e-05, + "loss": 1.0299, + "step": 20845 + }, + { + "epoch": 0.62, + "learning_rate": 7.493702522590114e-05, + "loss": 0.963, + "step": 20850 + }, + { + "epoch": 0.62, + "learning_rate": 7.488657704586342e-05, + "loss": 1.0577, + "step": 20855 + }, + { + "epoch": 0.62, + "learning_rate": 7.48361356865476e-05, + "loss": 1.0572, + "step": 20860 + }, + { + "epoch": 0.62, + "learning_rate": 7.478570116165335e-05, + "loss": 1.035, + "step": 20865 + }, + { + "epoch": 0.62, + "learning_rate": 7.473527348487859e-05, + "loss": 1.0197, + "step": 20870 + }, + { + "epoch": 0.62, + "learning_rate": 7.468485266991916e-05, + "loss": 0.9865, + "step": 20875 + }, + { + "epoch": 0.62, + "learning_rate": 7.463443873046936e-05, + "loss": 1.0324, + "step": 20880 + }, + { + "epoch": 0.62, + "learning_rate": 7.458403168022133e-05, + "loss": 1.0307, + "step": 20885 + }, + { + "epoch": 0.62, + "learning_rate": 7.45336315328655e-05, + "loss": 1.0425, + "step": 20890 + }, + { + "epoch": 0.62, + "learning_rate": 7.448323830209037e-05, + "loss": 0.9878, + "step": 20895 + }, + { + "epoch": 0.62, + "learning_rate": 7.443285200158256e-05, + "loss": 0.9753, + "step": 20900 + }, + { + "epoch": 0.62, + "learning_rate": 7.43824726450269e-05, + "loss": 1.0525, + "step": 20905 + }, + { + "epoch": 0.62, + "learning_rate": 7.433210024610615e-05, + "loss": 1.0253, + "step": 20910 + }, + { + "epoch": 0.62, + "learning_rate": 7.428173481850131e-05, + "loss": 1.0437, + "step": 20915 + }, + { + "epoch": 0.62, + "learning_rate": 7.42313763758915e-05, + "loss": 1.075, + "step": 20920 + }, + { + "epoch": 0.62, + "learning_rate": 7.418102493195391e-05, + "loss": 1.0196, + "step": 20925 + }, + { + "epoch": 0.62, + "learning_rate": 7.413068050036379e-05, + "loss": 1.0875, + "step": 20930 + }, + { + "epoch": 0.63, + "learning_rate": 7.40803430947945e-05, + "loss": 1.065, + "step": 20935 + }, + { + "epoch": 0.63, + "learning_rate": 7.403001272891758e-05, + "loss": 1.1119, + "step": 20940 + }, + { + "epoch": 0.63, + "learning_rate": 7.397968941640254e-05, + "loss": 1.0132, + "step": 20945 + }, + { + "epoch": 0.63, + "learning_rate": 7.392937317091704e-05, + "loss": 1.0434, + "step": 20950 + }, + { + "epoch": 0.63, + "learning_rate": 7.387906400612678e-05, + "loss": 1.0286, + "step": 20955 + }, + { + "epoch": 0.63, + "learning_rate": 7.382876193569559e-05, + "loss": 1.0776, + "step": 20960 + }, + { + "epoch": 0.63, + "learning_rate": 7.377846697328533e-05, + "loss": 1.0002, + "step": 20965 + }, + { + "epoch": 0.63, + "learning_rate": 7.372817913255598e-05, + "loss": 1.0014, + "step": 20970 + }, + { + "epoch": 0.63, + "learning_rate": 7.367789842716546e-05, + "loss": 1.03, + "step": 20975 + }, + { + "epoch": 0.63, + "learning_rate": 7.362762487076995e-05, + "loss": 1.0707, + "step": 20980 + }, + { + "epoch": 0.63, + "learning_rate": 7.357735847702351e-05, + "loss": 0.9777, + "step": 20985 + }, + { + "epoch": 0.63, + "learning_rate": 7.352709925957837e-05, + "loss": 1.0573, + "step": 20990 + }, + { + "epoch": 0.63, + "learning_rate": 7.34768472320847e-05, + "loss": 1.0957, + "step": 20995 + }, + { + "epoch": 0.63, + "learning_rate": 7.342660240819087e-05, + "loss": 1.0524, + "step": 21000 + }, + { + "epoch": 0.63, + "learning_rate": 7.337636480154314e-05, + "loss": 1.1013, + "step": 21005 + }, + { + "epoch": 0.63, + "learning_rate": 7.33261344257859e-05, + "loss": 0.9895, + "step": 21010 + }, + { + "epoch": 0.63, + "learning_rate": 7.327591129456153e-05, + "loss": 1.099, + "step": 21015 + }, + { + "epoch": 0.63, + "learning_rate": 7.32256954215105e-05, + "loss": 1.0233, + "step": 21020 + }, + { + "epoch": 0.63, + "learning_rate": 7.317548682027127e-05, + "loss": 1.0572, + "step": 21025 + }, + { + "epoch": 0.63, + "learning_rate": 7.312528550448033e-05, + "loss": 1.0569, + "step": 21030 + }, + { + "epoch": 0.63, + "learning_rate": 7.307509148777214e-05, + "loss": 1.0385, + "step": 21035 + }, + { + "epoch": 0.63, + "learning_rate": 7.302490478377925e-05, + "loss": 1.0655, + "step": 21040 + }, + { + "epoch": 0.63, + "learning_rate": 7.297472540613227e-05, + "loss": 1.01, + "step": 21045 + }, + { + "epoch": 0.63, + "learning_rate": 7.292455336845967e-05, + "loss": 1.0154, + "step": 21050 + }, + { + "epoch": 0.63, + "learning_rate": 7.287438868438804e-05, + "loss": 1.0352, + "step": 21055 + }, + { + "epoch": 0.63, + "learning_rate": 7.282423136754191e-05, + "loss": 1.0227, + "step": 21060 + }, + { + "epoch": 0.63, + "learning_rate": 7.27740814315439e-05, + "loss": 1.0199, + "step": 21065 + }, + { + "epoch": 0.63, + "learning_rate": 7.272393889001454e-05, + "loss": 1.0171, + "step": 21070 + }, + { + "epoch": 0.63, + "learning_rate": 7.267380375657234e-05, + "loss": 1.0911, + "step": 21075 + }, + { + "epoch": 0.63, + "learning_rate": 7.262367604483384e-05, + "loss": 1.0005, + "step": 21080 + }, + { + "epoch": 0.63, + "learning_rate": 7.257355576841363e-05, + "loss": 0.9668, + "step": 21085 + }, + { + "epoch": 0.63, + "learning_rate": 7.252344294092413e-05, + "loss": 1.0422, + "step": 21090 + }, + { + "epoch": 0.63, + "learning_rate": 7.24733375759759e-05, + "loss": 1.0111, + "step": 21095 + }, + { + "epoch": 0.63, + "learning_rate": 7.242323968717727e-05, + "loss": 1.041, + "step": 21100 + }, + { + "epoch": 0.63, + "learning_rate": 7.237314928813477e-05, + "loss": 1.115, + "step": 21105 + }, + { + "epoch": 0.63, + "learning_rate": 7.232306639245273e-05, + "loss": 1.0297, + "step": 21110 + }, + { + "epoch": 0.63, + "learning_rate": 7.227299101373352e-05, + "loss": 1.0608, + "step": 21115 + }, + { + "epoch": 0.63, + "learning_rate": 7.222292316557739e-05, + "loss": 1.0178, + "step": 21120 + }, + { + "epoch": 0.63, + "learning_rate": 7.217286286158272e-05, + "loss": 1.1094, + "step": 21125 + }, + { + "epoch": 0.63, + "learning_rate": 7.212281011534561e-05, + "loss": 1.0823, + "step": 21130 + }, + { + "epoch": 0.63, + "learning_rate": 7.207276494046027e-05, + "loss": 1.0058, + "step": 21135 + }, + { + "epoch": 0.63, + "learning_rate": 7.202272735051877e-05, + "loss": 1.0488, + "step": 21140 + }, + { + "epoch": 0.63, + "learning_rate": 7.197269735911121e-05, + "loss": 1.007, + "step": 21145 + }, + { + "epoch": 0.63, + "learning_rate": 7.19226749798255e-05, + "loss": 0.9527, + "step": 21150 + }, + { + "epoch": 0.63, + "learning_rate": 7.18726602262476e-05, + "loss": 1.003, + "step": 21155 + }, + { + "epoch": 0.63, + "learning_rate": 7.18226531119613e-05, + "loss": 0.9969, + "step": 21160 + }, + { + "epoch": 0.63, + "learning_rate": 7.177265365054842e-05, + "loss": 1.0685, + "step": 21165 + }, + { + "epoch": 0.63, + "learning_rate": 7.172266185558865e-05, + "loss": 1.0057, + "step": 21170 + }, + { + "epoch": 0.63, + "learning_rate": 7.167267774065956e-05, + "loss": 1.0718, + "step": 21175 + }, + { + "epoch": 0.63, + "learning_rate": 7.162270131933664e-05, + "loss": 1.0914, + "step": 21180 + }, + { + "epoch": 0.63, + "learning_rate": 7.157273260519337e-05, + "loss": 1.0093, + "step": 21185 + }, + { + "epoch": 0.63, + "learning_rate": 7.15227716118011e-05, + "loss": 1.0721, + "step": 21190 + }, + { + "epoch": 0.63, + "learning_rate": 7.147281835272906e-05, + "loss": 1.0187, + "step": 21195 + }, + { + "epoch": 0.63, + "learning_rate": 7.14228728415443e-05, + "loss": 1.0526, + "step": 21200 + }, + { + "epoch": 0.63, + "learning_rate": 7.137293509181197e-05, + "loss": 0.9904, + "step": 21205 + }, + { + "epoch": 0.63, + "learning_rate": 7.132300511709495e-05, + "loss": 0.9864, + "step": 21210 + }, + { + "epoch": 0.63, + "learning_rate": 7.127308293095408e-05, + "loss": 1.0133, + "step": 21215 + }, + { + "epoch": 0.63, + "learning_rate": 7.122316854694797e-05, + "loss": 1.0415, + "step": 21220 + }, + { + "epoch": 0.63, + "learning_rate": 7.117326197863333e-05, + "loss": 0.9956, + "step": 21225 + }, + { + "epoch": 0.63, + "learning_rate": 7.112336323956453e-05, + "loss": 0.9833, + "step": 21230 + }, + { + "epoch": 0.63, + "learning_rate": 7.107347234329393e-05, + "loss": 1.0292, + "step": 21235 + }, + { + "epoch": 0.63, + "learning_rate": 7.102358930337174e-05, + "loss": 0.9817, + "step": 21240 + }, + { + "epoch": 0.63, + "learning_rate": 7.097371413334596e-05, + "loss": 0.9572, + "step": 21245 + }, + { + "epoch": 0.63, + "learning_rate": 7.092384684676262e-05, + "loss": 1.0192, + "step": 21250 + }, + { + "epoch": 0.63, + "learning_rate": 7.087398745716544e-05, + "loss": 1.0864, + "step": 21255 + }, + { + "epoch": 0.63, + "learning_rate": 7.082413597809612e-05, + "loss": 1.0634, + "step": 21260 + }, + { + "epoch": 0.63, + "learning_rate": 7.077429242309405e-05, + "loss": 1.0421, + "step": 21265 + }, + { + "epoch": 0.64, + "learning_rate": 7.07244568056967e-05, + "loss": 0.997, + "step": 21270 + }, + { + "epoch": 0.64, + "learning_rate": 7.067462913943919e-05, + "loss": 0.977, + "step": 21275 + }, + { + "epoch": 0.64, + "learning_rate": 7.062480943785455e-05, + "loss": 1.0244, + "step": 21280 + }, + { + "epoch": 0.64, + "learning_rate": 7.057499771447364e-05, + "loss": 0.9681, + "step": 21285 + }, + { + "epoch": 0.64, + "learning_rate": 7.05251939828252e-05, + "loss": 1.0397, + "step": 21290 + }, + { + "epoch": 0.64, + "learning_rate": 7.047539825643572e-05, + "loss": 0.9898, + "step": 21295 + }, + { + "epoch": 0.64, + "learning_rate": 7.042561054882956e-05, + "loss": 0.9757, + "step": 21300 + }, + { + "epoch": 0.64, + "learning_rate": 7.037583087352886e-05, + "loss": 1.0079, + "step": 21305 + }, + { + "epoch": 0.64, + "learning_rate": 7.032605924405369e-05, + "loss": 1.0019, + "step": 21310 + }, + { + "epoch": 0.64, + "learning_rate": 7.027629567392185e-05, + "loss": 1.012, + "step": 21315 + }, + { + "epoch": 0.64, + "learning_rate": 7.02265401766489e-05, + "loss": 1.0205, + "step": 21320 + }, + { + "epoch": 0.64, + "learning_rate": 7.017679276574824e-05, + "loss": 1.007, + "step": 21325 + }, + { + "epoch": 0.64, + "learning_rate": 7.012705345473122e-05, + "loss": 0.9901, + "step": 21330 + }, + { + "epoch": 0.64, + "learning_rate": 7.00773222571068e-05, + "loss": 1.0259, + "step": 21335 + }, + { + "epoch": 0.64, + "learning_rate": 7.002759918638183e-05, + "loss": 1.0575, + "step": 21340 + }, + { + "epoch": 0.64, + "learning_rate": 6.997788425606087e-05, + "loss": 0.9954, + "step": 21345 + }, + { + "epoch": 0.64, + "learning_rate": 6.992817747964639e-05, + "loss": 0.9667, + "step": 21350 + }, + { + "epoch": 0.64, + "learning_rate": 6.987847887063857e-05, + "loss": 1.0434, + "step": 21355 + }, + { + "epoch": 0.64, + "learning_rate": 6.982878844253542e-05, + "loss": 1.0775, + "step": 21360 + }, + { + "epoch": 0.64, + "learning_rate": 6.977910620883259e-05, + "loss": 1.0187, + "step": 21365 + }, + { + "epoch": 0.64, + "learning_rate": 6.972943218302374e-05, + "loss": 1.0345, + "step": 21370 + }, + { + "epoch": 0.64, + "learning_rate": 6.967976637860009e-05, + "loss": 0.9984, + "step": 21375 + }, + { + "epoch": 0.64, + "learning_rate": 6.963010880905071e-05, + "loss": 0.9697, + "step": 21380 + }, + { + "epoch": 0.64, + "learning_rate": 6.958045948786245e-05, + "loss": 1.0702, + "step": 21385 + }, + { + "epoch": 0.64, + "learning_rate": 6.953081842851992e-05, + "loss": 0.9962, + "step": 21390 + }, + { + "epoch": 0.64, + "learning_rate": 6.948118564450543e-05, + "loss": 1.1215, + "step": 21395 + }, + { + "epoch": 0.64, + "learning_rate": 6.943156114929907e-05, + "loss": 1.0921, + "step": 21400 + }, + { + "epoch": 0.64, + "learning_rate": 6.938194495637871e-05, + "loss": 1.0253, + "step": 21405 + }, + { + "epoch": 0.64, + "learning_rate": 6.933233707921996e-05, + "loss": 1.0581, + "step": 21410 + }, + { + "epoch": 0.64, + "learning_rate": 6.928273753129613e-05, + "loss": 0.9602, + "step": 21415 + }, + { + "epoch": 0.64, + "learning_rate": 6.92331463260783e-05, + "loss": 1.0642, + "step": 21420 + }, + { + "epoch": 0.64, + "learning_rate": 6.918356347703523e-05, + "loss": 1.0929, + "step": 21425 + }, + { + "epoch": 0.64, + "learning_rate": 6.913398899763352e-05, + "loss": 1.0574, + "step": 21430 + }, + { + "epoch": 0.64, + "learning_rate": 6.908442290133743e-05, + "loss": 1.1696, + "step": 21435 + }, + { + "epoch": 0.64, + "learning_rate": 6.903486520160893e-05, + "loss": 0.9912, + "step": 21440 + }, + { + "epoch": 0.64, + "learning_rate": 6.898531591190771e-05, + "loss": 1.0293, + "step": 21445 + }, + { + "epoch": 0.64, + "learning_rate": 6.893577504569119e-05, + "loss": 0.9497, + "step": 21450 + }, + { + "epoch": 0.64, + "learning_rate": 6.888624261641453e-05, + "loss": 1.0432, + "step": 21455 + }, + { + "epoch": 0.64, + "learning_rate": 6.883671863753059e-05, + "loss": 1.0041, + "step": 21460 + }, + { + "epoch": 0.64, + "learning_rate": 6.878720312248985e-05, + "loss": 1.034, + "step": 21465 + }, + { + "epoch": 0.64, + "learning_rate": 6.873769608474059e-05, + "loss": 1.1052, + "step": 21470 + }, + { + "epoch": 0.64, + "learning_rate": 6.868819753772879e-05, + "loss": 1.0642, + "step": 21475 + }, + { + "epoch": 0.64, + "learning_rate": 6.863870749489805e-05, + "loss": 0.964, + "step": 21480 + }, + { + "epoch": 0.64, + "learning_rate": 6.858922596968974e-05, + "loss": 1.0292, + "step": 21485 + }, + { + "epoch": 0.64, + "learning_rate": 6.85397529755428e-05, + "loss": 1.0618, + "step": 21490 + }, + { + "epoch": 0.64, + "learning_rate": 6.849028852589403e-05, + "loss": 1.0198, + "step": 21495 + }, + { + "epoch": 0.64, + "learning_rate": 6.844083263417773e-05, + "loss": 1.0171, + "step": 21500 + }, + { + "epoch": 0.64, + "learning_rate": 6.839138531382603e-05, + "loss": 1.0647, + "step": 21505 + }, + { + "epoch": 0.64, + "learning_rate": 6.834194657826853e-05, + "loss": 0.9924, + "step": 21510 + }, + { + "epoch": 0.64, + "learning_rate": 6.82925164409328e-05, + "loss": 1.1799, + "step": 21515 + }, + { + "epoch": 0.64, + "learning_rate": 6.824309491524377e-05, + "loss": 1.0201, + "step": 21520 + }, + { + "epoch": 0.64, + "learning_rate": 6.81936820146242e-05, + "loss": 1.0877, + "step": 21525 + }, + { + "epoch": 0.64, + "learning_rate": 6.814427775249444e-05, + "loss": 1.0297, + "step": 21530 + }, + { + "epoch": 0.64, + "learning_rate": 6.809488214227262e-05, + "loss": 1.0273, + "step": 21535 + }, + { + "epoch": 0.64, + "learning_rate": 6.804549519737431e-05, + "loss": 1.0013, + "step": 21540 + }, + { + "epoch": 0.64, + "learning_rate": 6.79961169312129e-05, + "loss": 1.0006, + "step": 21545 + }, + { + "epoch": 0.64, + "learning_rate": 6.794674735719932e-05, + "loss": 0.9971, + "step": 21550 + }, + { + "epoch": 0.64, + "learning_rate": 6.789738648874223e-05, + "loss": 1.036, + "step": 21555 + }, + { + "epoch": 0.64, + "learning_rate": 6.784803433924788e-05, + "loss": 1.0316, + "step": 21560 + }, + { + "epoch": 0.64, + "learning_rate": 6.77986909221201e-05, + "loss": 1.018, + "step": 21565 + }, + { + "epoch": 0.64, + "learning_rate": 6.77493562507604e-05, + "loss": 1.042, + "step": 21570 + }, + { + "epoch": 0.64, + "learning_rate": 6.770003033856795e-05, + "loss": 1.0631, + "step": 21575 + }, + { + "epoch": 0.64, + "learning_rate": 6.765071319893951e-05, + "loss": 1.0074, + "step": 21580 + }, + { + "epoch": 0.64, + "learning_rate": 6.760140484526943e-05, + "loss": 0.9571, + "step": 21585 + }, + { + "epoch": 0.64, + "learning_rate": 6.755210529094966e-05, + "loss": 1.0485, + "step": 21590 + }, + { + "epoch": 0.64, + "learning_rate": 6.750281454936984e-05, + "loss": 1.0482, + "step": 21595 + }, + { + "epoch": 0.64, + "learning_rate": 6.745353263391718e-05, + "loss": 1.0444, + "step": 21600 + }, + { + "epoch": 0.65, + "learning_rate": 6.740425955797645e-05, + "loss": 1.016, + "step": 21605 + }, + { + "epoch": 0.65, + "learning_rate": 6.735499533493003e-05, + "loss": 0.9718, + "step": 21610 + }, + { + "epoch": 0.65, + "learning_rate": 6.7305739978158e-05, + "loss": 1.0851, + "step": 21615 + }, + { + "epoch": 0.65, + "learning_rate": 6.725649350103788e-05, + "loss": 0.9837, + "step": 21620 + }, + { + "epoch": 0.65, + "learning_rate": 6.72072559169449e-05, + "loss": 1.1297, + "step": 21625 + }, + { + "epoch": 0.65, + "learning_rate": 6.71580272392518e-05, + "loss": 1.0344, + "step": 21630 + }, + { + "epoch": 0.65, + "learning_rate": 6.710880748132887e-05, + "loss": 1.0933, + "step": 21635 + }, + { + "epoch": 0.65, + "learning_rate": 6.705959665654411e-05, + "loss": 1.0745, + "step": 21640 + }, + { + "epoch": 0.65, + "learning_rate": 6.7010394778263e-05, + "loss": 1.0246, + "step": 21645 + }, + { + "epoch": 0.65, + "learning_rate": 6.69612018598486e-05, + "loss": 1.0386, + "step": 21650 + }, + { + "epoch": 0.65, + "learning_rate": 6.691201791466148e-05, + "loss": 1.0815, + "step": 21655 + }, + { + "epoch": 0.65, + "learning_rate": 6.686284295605996e-05, + "loss": 0.9723, + "step": 21660 + }, + { + "epoch": 0.65, + "learning_rate": 6.681367699739971e-05, + "loss": 1.1018, + "step": 21665 + }, + { + "epoch": 0.65, + "learning_rate": 6.676452005203406e-05, + "loss": 0.9758, + "step": 21670 + }, + { + "epoch": 0.65, + "learning_rate": 6.671537213331383e-05, + "loss": 1.0906, + "step": 21675 + }, + { + "epoch": 0.65, + "learning_rate": 6.666623325458754e-05, + "loss": 1.0115, + "step": 21680 + }, + { + "epoch": 0.65, + "learning_rate": 6.661710342920102e-05, + "loss": 1.0424, + "step": 21685 + }, + { + "epoch": 0.65, + "learning_rate": 6.656798267049786e-05, + "loss": 1.0698, + "step": 21690 + }, + { + "epoch": 0.65, + "learning_rate": 6.651887099181902e-05, + "loss": 0.9792, + "step": 21695 + }, + { + "epoch": 0.65, + "learning_rate": 6.646976840650312e-05, + "loss": 1.0206, + "step": 21700 + }, + { + "epoch": 0.65, + "learning_rate": 6.642067492788626e-05, + "loss": 0.9743, + "step": 21705 + }, + { + "epoch": 0.65, + "learning_rate": 6.637159056930204e-05, + "loss": 1.0986, + "step": 21710 + }, + { + "epoch": 0.65, + "learning_rate": 6.632251534408161e-05, + "loss": 0.9439, + "step": 21715 + }, + { + "epoch": 0.65, + "learning_rate": 6.627344926555363e-05, + "loss": 1.0671, + "step": 21720 + }, + { + "epoch": 0.65, + "learning_rate": 6.622439234704433e-05, + "loss": 1.0098, + "step": 21725 + }, + { + "epoch": 0.65, + "learning_rate": 6.617534460187737e-05, + "loss": 1.0365, + "step": 21730 + }, + { + "epoch": 0.65, + "learning_rate": 6.612630604337394e-05, + "loss": 1.0042, + "step": 21735 + }, + { + "epoch": 0.65, + "learning_rate": 6.607727668485276e-05, + "loss": 1.0486, + "step": 21740 + }, + { + "epoch": 0.65, + "learning_rate": 6.602825653963006e-05, + "loss": 0.9115, + "step": 21745 + }, + { + "epoch": 0.65, + "learning_rate": 6.597924562101956e-05, + "loss": 0.977, + "step": 21750 + }, + { + "epoch": 0.65, + "learning_rate": 6.593024394233236e-05, + "loss": 1.025, + "step": 21755 + }, + { + "epoch": 0.65, + "learning_rate": 6.58812515168773e-05, + "loss": 1.0721, + "step": 21760 + }, + { + "epoch": 0.65, + "learning_rate": 6.583226835796046e-05, + "loss": 1.0149, + "step": 21765 + }, + { + "epoch": 0.65, + "learning_rate": 6.578329447888552e-05, + "loss": 1.0312, + "step": 21770 + }, + { + "epoch": 0.65, + "learning_rate": 6.573432989295363e-05, + "loss": 1.0394, + "step": 21775 + }, + { + "epoch": 0.65, + "learning_rate": 6.568537461346345e-05, + "loss": 0.974, + "step": 21780 + }, + { + "epoch": 0.65, + "learning_rate": 6.5636428653711e-05, + "loss": 0.9556, + "step": 21785 + }, + { + "epoch": 0.65, + "learning_rate": 6.558749202698988e-05, + "loss": 1.0889, + "step": 21790 + }, + { + "epoch": 0.65, + "learning_rate": 6.553856474659108e-05, + "loss": 1.0775, + "step": 21795 + }, + { + "epoch": 0.65, + "learning_rate": 6.548964682580312e-05, + "loss": 0.9611, + "step": 21800 + }, + { + "epoch": 0.65, + "learning_rate": 6.544073827791197e-05, + "loss": 0.9315, + "step": 21805 + }, + { + "epoch": 0.65, + "learning_rate": 6.539183911620097e-05, + "loss": 1.0437, + "step": 21810 + }, + { + "epoch": 0.65, + "learning_rate": 6.534294935395096e-05, + "loss": 1.0201, + "step": 21815 + }, + { + "epoch": 0.65, + "learning_rate": 6.52940690044403e-05, + "loss": 0.9683, + "step": 21820 + }, + { + "epoch": 0.65, + "learning_rate": 6.524519808094472e-05, + "loss": 1.0442, + "step": 21825 + }, + { + "epoch": 0.65, + "learning_rate": 6.519633659673736e-05, + "loss": 1.0363, + "step": 21830 + }, + { + "epoch": 0.65, + "learning_rate": 6.514748456508885e-05, + "loss": 1.0045, + "step": 21835 + }, + { + "epoch": 0.65, + "learning_rate": 6.509864199926722e-05, + "loss": 1.0309, + "step": 21840 + }, + { + "epoch": 0.65, + "learning_rate": 6.5049808912538e-05, + "loss": 1.0241, + "step": 21845 + }, + { + "epoch": 0.65, + "learning_rate": 6.500098531816408e-05, + "loss": 1.053, + "step": 21850 + }, + { + "epoch": 0.65, + "learning_rate": 6.495217122940573e-05, + "loss": 1.0612, + "step": 21855 + }, + { + "epoch": 0.65, + "learning_rate": 6.490336665952075e-05, + "loss": 1.0393, + "step": 21860 + }, + { + "epoch": 0.65, + "learning_rate": 6.485457162176427e-05, + "loss": 0.9692, + "step": 21865 + }, + { + "epoch": 0.65, + "learning_rate": 6.48057861293889e-05, + "loss": 1.0318, + "step": 21870 + }, + { + "epoch": 0.65, + "learning_rate": 6.475701019564457e-05, + "loss": 1.0194, + "step": 21875 + }, + { + "epoch": 0.65, + "learning_rate": 6.470824383377867e-05, + "loss": 0.9939, + "step": 21880 + }, + { + "epoch": 0.65, + "learning_rate": 6.465948705703601e-05, + "loss": 0.9914, + "step": 21885 + }, + { + "epoch": 0.65, + "learning_rate": 6.461073987865873e-05, + "loss": 0.9881, + "step": 21890 + }, + { + "epoch": 0.65, + "learning_rate": 6.456200231188645e-05, + "loss": 1.0155, + "step": 21895 + }, + { + "epoch": 0.65, + "learning_rate": 6.451327436995605e-05, + "loss": 1.0796, + "step": 21900 + }, + { + "epoch": 0.65, + "learning_rate": 6.446455606610196e-05, + "loss": 1.087, + "step": 21905 + }, + { + "epoch": 0.65, + "learning_rate": 6.441584741355587e-05, + "loss": 1.1039, + "step": 21910 + }, + { + "epoch": 0.65, + "learning_rate": 6.43671484255469e-05, + "loss": 1.0081, + "step": 21915 + }, + { + "epoch": 0.65, + "learning_rate": 6.43184591153015e-05, + "loss": 1.0351, + "step": 21920 + }, + { + "epoch": 0.65, + "learning_rate": 6.426977949604358e-05, + "loss": 0.9946, + "step": 21925 + }, + { + "epoch": 0.65, + "learning_rate": 6.422110958099432e-05, + "loss": 1.0562, + "step": 21930 + }, + { + "epoch": 0.65, + "learning_rate": 6.417244938337232e-05, + "loss": 1.0539, + "step": 21935 + }, + { + "epoch": 0.66, + "learning_rate": 6.412379891639349e-05, + "loss": 1.0099, + "step": 21940 + }, + { + "epoch": 0.66, + "learning_rate": 6.40751581932712e-05, + "loss": 1.0153, + "step": 21945 + }, + { + "epoch": 0.66, + "learning_rate": 6.40265272272161e-05, + "loss": 1.0834, + "step": 21950 + }, + { + "epoch": 0.66, + "learning_rate": 6.397790603143616e-05, + "loss": 0.9907, + "step": 21955 + }, + { + "epoch": 0.66, + "learning_rate": 6.392929461913671e-05, + "loss": 1.0546, + "step": 21960 + }, + { + "epoch": 0.66, + "learning_rate": 6.388069300352053e-05, + "loss": 1.0626, + "step": 21965 + }, + { + "epoch": 0.66, + "learning_rate": 6.38321011977876e-05, + "loss": 1.0607, + "step": 21970 + }, + { + "epoch": 0.66, + "learning_rate": 6.378351921513533e-05, + "loss": 1.0059, + "step": 21975 + }, + { + "epoch": 0.66, + "learning_rate": 6.373494706875836e-05, + "loss": 1.0457, + "step": 21980 + }, + { + "epoch": 0.66, + "learning_rate": 6.36863847718488e-05, + "loss": 1.0239, + "step": 21985 + }, + { + "epoch": 0.66, + "learning_rate": 6.363783233759594e-05, + "loss": 1.0057, + "step": 21990 + }, + { + "epoch": 0.66, + "learning_rate": 6.358928977918654e-05, + "loss": 1.081, + "step": 21995 + }, + { + "epoch": 0.66, + "learning_rate": 6.354075710980449e-05, + "loss": 0.9775, + "step": 22000 + }, + { + "epoch": 0.66, + "learning_rate": 6.34922343426312e-05, + "loss": 1.0266, + "step": 22005 + }, + { + "epoch": 0.66, + "learning_rate": 6.344372149084523e-05, + "loss": 1.1396, + "step": 22010 + }, + { + "epoch": 0.66, + "learning_rate": 6.339521856762254e-05, + "loss": 1.018, + "step": 22015 + }, + { + "epoch": 0.66, + "learning_rate": 6.334672558613634e-05, + "loss": 1.0971, + "step": 22020 + }, + { + "epoch": 0.66, + "learning_rate": 6.32982425595572e-05, + "loss": 0.9912, + "step": 22025 + }, + { + "epoch": 0.66, + "learning_rate": 6.32497695010529e-05, + "loss": 0.9475, + "step": 22030 + }, + { + "epoch": 0.66, + "learning_rate": 6.320130642378859e-05, + "loss": 0.9811, + "step": 22035 + }, + { + "epoch": 0.66, + "learning_rate": 6.315285334092668e-05, + "loss": 1.0516, + "step": 22040 + }, + { + "epoch": 0.66, + "learning_rate": 6.310441026562681e-05, + "loss": 1.0314, + "step": 22045 + }, + { + "epoch": 0.66, + "learning_rate": 6.305597721104606e-05, + "loss": 0.9786, + "step": 22050 + }, + { + "epoch": 0.66, + "learning_rate": 6.300755419033861e-05, + "loss": 1.0361, + "step": 22055 + }, + { + "epoch": 0.66, + "learning_rate": 6.295914121665602e-05, + "loss": 0.9937, + "step": 22060 + }, + { + "epoch": 0.66, + "learning_rate": 6.291073830314706e-05, + "loss": 1.0318, + "step": 22065 + }, + { + "epoch": 0.66, + "learning_rate": 6.286234546295788e-05, + "loss": 1.0173, + "step": 22070 + }, + { + "epoch": 0.66, + "learning_rate": 6.281396270923172e-05, + "loss": 1.0642, + "step": 22075 + }, + { + "epoch": 0.66, + "learning_rate": 6.276559005510922e-05, + "loss": 0.9787, + "step": 22080 + }, + { + "epoch": 0.66, + "learning_rate": 6.271722751372821e-05, + "loss": 1.0253, + "step": 22085 + }, + { + "epoch": 0.66, + "learning_rate": 6.266887509822383e-05, + "loss": 1.0239, + "step": 22090 + }, + { + "epoch": 0.66, + "learning_rate": 6.262053282172842e-05, + "loss": 0.9752, + "step": 22095 + }, + { + "epoch": 0.66, + "learning_rate": 6.257220069737156e-05, + "loss": 1.0337, + "step": 22100 + }, + { + "epoch": 0.66, + "learning_rate": 6.25238787382801e-05, + "loss": 1.0883, + "step": 22105 + }, + { + "epoch": 0.66, + "learning_rate": 6.247556695757814e-05, + "loss": 1.0738, + "step": 22110 + }, + { + "epoch": 0.66, + "learning_rate": 6.242726536838699e-05, + "loss": 1.0183, + "step": 22115 + }, + { + "epoch": 0.66, + "learning_rate": 6.237897398382522e-05, + "loss": 1.01, + "step": 22120 + }, + { + "epoch": 0.66, + "learning_rate": 6.233069281700856e-05, + "loss": 0.998, + "step": 22125 + }, + { + "epoch": 0.66, + "learning_rate": 6.228242188105004e-05, + "loss": 0.9595, + "step": 22130 + }, + { + "epoch": 0.66, + "learning_rate": 6.223416118905992e-05, + "loss": 0.9883, + "step": 22135 + }, + { + "epoch": 0.66, + "learning_rate": 6.218591075414562e-05, + "loss": 1.0987, + "step": 22140 + }, + { + "epoch": 0.66, + "learning_rate": 6.213767058941172e-05, + "loss": 1.0562, + "step": 22145 + }, + { + "epoch": 0.66, + "learning_rate": 6.208944070796022e-05, + "loss": 1.0778, + "step": 22150 + }, + { + "epoch": 0.66, + "learning_rate": 6.204122112289012e-05, + "loss": 1.0006, + "step": 22155 + }, + { + "epoch": 0.66, + "learning_rate": 6.19930118472977e-05, + "loss": 1.0669, + "step": 22160 + }, + { + "epoch": 0.66, + "learning_rate": 6.194481289427643e-05, + "loss": 1.0686, + "step": 22165 + }, + { + "epoch": 0.66, + "learning_rate": 6.189662427691704e-05, + "loss": 0.9937, + "step": 22170 + }, + { + "epoch": 0.66, + "learning_rate": 6.184844600830735e-05, + "loss": 1.0599, + "step": 22175 + }, + { + "epoch": 0.66, + "learning_rate": 6.180027810153241e-05, + "loss": 1.0469, + "step": 22180 + }, + { + "epoch": 0.66, + "learning_rate": 6.175212056967447e-05, + "loss": 0.9516, + "step": 22185 + }, + { + "epoch": 0.66, + "learning_rate": 6.1703973425813e-05, + "loss": 0.925, + "step": 22190 + }, + { + "epoch": 0.66, + "learning_rate": 6.165583668302457e-05, + "loss": 0.9317, + "step": 22195 + }, + { + "epoch": 0.66, + "learning_rate": 6.160771035438295e-05, + "loss": 1.0561, + "step": 22200 + }, + { + "epoch": 0.66, + "learning_rate": 6.155959445295906e-05, + "loss": 1.0619, + "step": 22205 + }, + { + "epoch": 0.66, + "learning_rate": 6.15114889918211e-05, + "loss": 1.0342, + "step": 22210 + }, + { + "epoch": 0.66, + "learning_rate": 6.146339398403431e-05, + "loss": 1.0479, + "step": 22215 + }, + { + "epoch": 0.66, + "learning_rate": 6.141530944266113e-05, + "loss": 1.0351, + "step": 22220 + }, + { + "epoch": 0.66, + "learning_rate": 6.136723538076112e-05, + "loss": 1.0337, + "step": 22225 + }, + { + "epoch": 0.66, + "learning_rate": 6.131917181139112e-05, + "loss": 1.0554, + "step": 22230 + }, + { + "epoch": 0.66, + "learning_rate": 6.127111874760499e-05, + "loss": 0.9963, + "step": 22235 + }, + { + "epoch": 0.66, + "learning_rate": 6.12230762024538e-05, + "loss": 1.1237, + "step": 22240 + }, + { + "epoch": 0.66, + "learning_rate": 6.11750441889857e-05, + "loss": 0.952, + "step": 22245 + }, + { + "epoch": 0.66, + "learning_rate": 6.112702272024605e-05, + "loss": 1.0486, + "step": 22250 + }, + { + "epoch": 0.66, + "learning_rate": 6.107901180927734e-05, + "loss": 0.9922, + "step": 22255 + }, + { + "epoch": 0.66, + "learning_rate": 6.103101146911916e-05, + "loss": 1.0016, + "step": 22260 + }, + { + "epoch": 0.66, + "learning_rate": 6.0983021712808264e-05, + "loss": 1.0003, + "step": 22265 + }, + { + "epoch": 0.66, + "learning_rate": 6.093504255337844e-05, + "loss": 1.0429, + "step": 22270 + }, + { + "epoch": 0.67, + "learning_rate": 6.088707400386073e-05, + "loss": 1.1042, + "step": 22275 + }, + { + "epoch": 0.67, + "learning_rate": 6.0839116077283234e-05, + "loss": 1.0487, + "step": 22280 + }, + { + "epoch": 0.67, + "learning_rate": 6.079116878667115e-05, + "loss": 1.0229, + "step": 22285 + }, + { + "epoch": 0.67, + "learning_rate": 6.074323214504675e-05, + "loss": 0.9849, + "step": 22290 + }, + { + "epoch": 0.67, + "learning_rate": 6.069530616542958e-05, + "loss": 1.0102, + "step": 22295 + }, + { + "epoch": 0.67, + "learning_rate": 6.064739086083607e-05, + "loss": 1.0163, + "step": 22300 + }, + { + "epoch": 0.67, + "learning_rate": 6.0599486244279914e-05, + "loss": 0.968, + "step": 22305 + }, + { + "epoch": 0.67, + "learning_rate": 6.055159232877181e-05, + "loss": 1.0006, + "step": 22310 + }, + { + "epoch": 0.67, + "learning_rate": 6.050370912731964e-05, + "loss": 1.0839, + "step": 22315 + }, + { + "epoch": 0.67, + "learning_rate": 6.045583665292826e-05, + "loss": 1.1559, + "step": 22320 + }, + { + "epoch": 0.67, + "learning_rate": 6.040797491859972e-05, + "loss": 1.1212, + "step": 22325 + }, + { + "epoch": 0.67, + "learning_rate": 6.036012393733306e-05, + "loss": 1.0982, + "step": 22330 + }, + { + "epoch": 0.67, + "learning_rate": 6.03122837221245e-05, + "loss": 1.0124, + "step": 22335 + }, + { + "epoch": 0.67, + "learning_rate": 6.0264454285967274e-05, + "loss": 1.0703, + "step": 22340 + }, + { + "epoch": 0.67, + "learning_rate": 6.021663564185167e-05, + "loss": 1.0294, + "step": 22345 + }, + { + "epoch": 0.67, + "learning_rate": 6.0168827802765047e-05, + "loss": 0.9891, + "step": 22350 + }, + { + "epoch": 0.67, + "learning_rate": 6.0121030781691936e-05, + "loss": 1.0205, + "step": 22355 + }, + { + "epoch": 0.67, + "learning_rate": 6.007324459161381e-05, + "loss": 1.0753, + "step": 22360 + }, + { + "epoch": 0.67, + "learning_rate": 6.0025469245509205e-05, + "loss": 0.9947, + "step": 22365 + }, + { + "epoch": 0.67, + "learning_rate": 5.997770475635377e-05, + "loss": 1.0358, + "step": 22370 + }, + { + "epoch": 0.67, + "learning_rate": 5.992995113712019e-05, + "loss": 0.9991, + "step": 22375 + }, + { + "epoch": 0.67, + "learning_rate": 5.9882208400778195e-05, + "loss": 1.0785, + "step": 22380 + }, + { + "epoch": 0.67, + "learning_rate": 5.983447656029456e-05, + "loss": 1.068, + "step": 22385 + }, + { + "epoch": 0.67, + "learning_rate": 5.978675562863301e-05, + "loss": 1.034, + "step": 22390 + }, + { + "epoch": 0.67, + "learning_rate": 5.973904561875452e-05, + "loss": 1.0238, + "step": 22395 + }, + { + "epoch": 0.67, + "learning_rate": 5.969134654361688e-05, + "loss": 1.0538, + "step": 22400 + }, + { + "epoch": 0.67, + "learning_rate": 5.964365841617503e-05, + "loss": 1.0011, + "step": 22405 + }, + { + "epoch": 0.67, + "learning_rate": 5.9595981249380884e-05, + "loss": 1.0089, + "step": 22410 + }, + { + "epoch": 0.67, + "learning_rate": 5.954831505618346e-05, + "loss": 1.0016, + "step": 22415 + }, + { + "epoch": 0.67, + "learning_rate": 5.9500659849528664e-05, + "loss": 1.0507, + "step": 22420 + }, + { + "epoch": 0.67, + "learning_rate": 5.945301564235952e-05, + "loss": 1.0141, + "step": 22425 + }, + { + "epoch": 0.67, + "learning_rate": 5.9405382447616034e-05, + "loss": 1.0033, + "step": 22430 + }, + { + "epoch": 0.67, + "learning_rate": 5.935776027823523e-05, + "loss": 1.1135, + "step": 22435 + }, + { + "epoch": 0.67, + "learning_rate": 5.931014914715116e-05, + "loss": 1.0443, + "step": 22440 + }, + { + "epoch": 0.67, + "learning_rate": 5.92625490672948e-05, + "loss": 1.0286, + "step": 22445 + }, + { + "epoch": 0.67, + "learning_rate": 5.9214960051594185e-05, + "loss": 1.0384, + "step": 22450 + }, + { + "epoch": 0.67, + "learning_rate": 5.91673821129743e-05, + "loss": 1.0772, + "step": 22455 + }, + { + "epoch": 0.67, + "learning_rate": 5.9119815264357245e-05, + "loss": 1.0308, + "step": 22460 + }, + { + "epoch": 0.67, + "learning_rate": 5.907225951866195e-05, + "loss": 1.0125, + "step": 22465 + }, + { + "epoch": 0.67, + "learning_rate": 5.90247148888044e-05, + "loss": 1.0407, + "step": 22470 + }, + { + "epoch": 0.67, + "learning_rate": 5.897718138769755e-05, + "loss": 1.0572, + "step": 22475 + }, + { + "epoch": 0.67, + "learning_rate": 5.892965902825138e-05, + "loss": 1.0788, + "step": 22480 + }, + { + "epoch": 0.67, + "learning_rate": 5.88821478233728e-05, + "loss": 1.102, + "step": 22485 + }, + { + "epoch": 0.67, + "learning_rate": 5.8834647785965656e-05, + "loss": 1.0168, + "step": 22490 + }, + { + "epoch": 0.67, + "learning_rate": 5.8787158928930805e-05, + "loss": 1.0518, + "step": 22495 + }, + { + "epoch": 0.67, + "learning_rate": 5.8739681265166094e-05, + "loss": 1.0672, + "step": 22500 + }, + { + "epoch": 0.67, + "learning_rate": 5.869221480756628e-05, + "loss": 1.0906, + "step": 22505 + }, + { + "epoch": 0.67, + "learning_rate": 5.864475956902312e-05, + "loss": 1.0837, + "step": 22510 + }, + { + "epoch": 0.67, + "learning_rate": 5.859731556242524e-05, + "loss": 1.0575, + "step": 22515 + }, + { + "epoch": 0.67, + "learning_rate": 5.854988280065832e-05, + "loss": 1.0595, + "step": 22520 + }, + { + "epoch": 0.67, + "learning_rate": 5.8502461296604935e-05, + "loss": 1.016, + "step": 22525 + }, + { + "epoch": 0.67, + "learning_rate": 5.8455051063144614e-05, + "loss": 1.0779, + "step": 22530 + }, + { + "epoch": 0.67, + "learning_rate": 5.840765211315376e-05, + "loss": 1.034, + "step": 22535 + }, + { + "epoch": 0.67, + "learning_rate": 5.8360264459505846e-05, + "loss": 1.0922, + "step": 22540 + }, + { + "epoch": 0.67, + "learning_rate": 5.8312888115071183e-05, + "loss": 1.0531, + "step": 22545 + }, + { + "epoch": 0.67, + "learning_rate": 5.826552309271702e-05, + "loss": 1.0164, + "step": 22550 + }, + { + "epoch": 0.67, + "learning_rate": 5.821816940530749e-05, + "loss": 1.0518, + "step": 22555 + }, + { + "epoch": 0.67, + "learning_rate": 5.8170827065703745e-05, + "loss": 1.1282, + "step": 22560 + }, + { + "epoch": 0.67, + "learning_rate": 5.812349608676384e-05, + "loss": 0.995, + "step": 22565 + }, + { + "epoch": 0.67, + "learning_rate": 5.807617648134268e-05, + "loss": 1.0809, + "step": 22570 + }, + { + "epoch": 0.67, + "learning_rate": 5.8028868262292055e-05, + "loss": 1.0238, + "step": 22575 + }, + { + "epoch": 0.67, + "learning_rate": 5.798157144246077e-05, + "loss": 1.0387, + "step": 22580 + }, + { + "epoch": 0.67, + "learning_rate": 5.793428603469453e-05, + "loss": 1.0427, + "step": 22585 + }, + { + "epoch": 0.67, + "learning_rate": 5.7887012051835844e-05, + "loss": 1.0098, + "step": 22590 + }, + { + "epoch": 0.67, + "learning_rate": 5.7839749506724086e-05, + "loss": 1.0782, + "step": 22595 + }, + { + "epoch": 0.67, + "learning_rate": 5.7792498412195764e-05, + "loss": 1.0559, + "step": 22600 + }, + { + "epoch": 0.67, + "learning_rate": 5.7745258781084056e-05, + "loss": 1.0369, + "step": 22605 + }, + { + "epoch": 0.68, + "learning_rate": 5.769803062621904e-05, + "loss": 0.9849, + "step": 22610 + }, + { + "epoch": 0.68, + "learning_rate": 5.765081396042773e-05, + "loss": 1.0181, + "step": 22615 + }, + { + "epoch": 0.68, + "learning_rate": 5.760360879653412e-05, + "loss": 1.0398, + "step": 22620 + }, + { + "epoch": 0.68, + "learning_rate": 5.7556415147358875e-05, + "loss": 1.0062, + "step": 22625 + }, + { + "epoch": 0.68, + "learning_rate": 5.7509233025719646e-05, + "loss": 1.0936, + "step": 22630 + }, + { + "epoch": 0.68, + "learning_rate": 5.746206244443093e-05, + "loss": 1.06, + "step": 22635 + }, + { + "epoch": 0.68, + "learning_rate": 5.741490341630418e-05, + "loss": 0.972, + "step": 22640 + }, + { + "epoch": 0.68, + "learning_rate": 5.736775595414756e-05, + "loss": 1.0549, + "step": 22645 + }, + { + "epoch": 0.68, + "learning_rate": 5.732062007076613e-05, + "loss": 1.046, + "step": 22650 + }, + { + "epoch": 0.68, + "learning_rate": 5.727349577896194e-05, + "loss": 1.0119, + "step": 22655 + }, + { + "epoch": 0.68, + "learning_rate": 5.7226383091533685e-05, + "loss": 1.0378, + "step": 22660 + }, + { + "epoch": 0.68, + "learning_rate": 5.717928202127707e-05, + "loss": 1.0774, + "step": 22665 + }, + { + "epoch": 0.68, + "learning_rate": 5.7132192580984615e-05, + "loss": 1.0147, + "step": 22670 + }, + { + "epoch": 0.68, + "learning_rate": 5.7085114783445627e-05, + "loss": 1.0307, + "step": 22675 + }, + { + "epoch": 0.68, + "learning_rate": 5.703804864144624e-05, + "loss": 0.982, + "step": 22680 + }, + { + "epoch": 0.68, + "learning_rate": 5.699099416776947e-05, + "loss": 1.0381, + "step": 22685 + }, + { + "epoch": 0.68, + "learning_rate": 5.694395137519525e-05, + "loss": 1.0664, + "step": 22690 + }, + { + "epoch": 0.68, + "learning_rate": 5.6896920276500165e-05, + "loss": 1.0649, + "step": 22695 + }, + { + "epoch": 0.68, + "learning_rate": 5.6849900884457674e-05, + "loss": 1.0693, + "step": 22700 + }, + { + "epoch": 0.68, + "learning_rate": 5.680289321183813e-05, + "loss": 0.9577, + "step": 22705 + }, + { + "epoch": 0.68, + "learning_rate": 5.6755897271408684e-05, + "loss": 1.1129, + "step": 22710 + }, + { + "epoch": 0.68, + "learning_rate": 5.670891307593327e-05, + "loss": 0.9631, + "step": 22715 + }, + { + "epoch": 0.68, + "learning_rate": 5.666194063817257e-05, + "loss": 0.9776, + "step": 22720 + }, + { + "epoch": 0.68, + "learning_rate": 5.6614979970884196e-05, + "loss": 1.067, + "step": 22725 + }, + { + "epoch": 0.68, + "learning_rate": 5.656803108682254e-05, + "loss": 1.0042, + "step": 22730 + }, + { + "epoch": 0.68, + "learning_rate": 5.652109399873873e-05, + "loss": 1.0149, + "step": 22735 + }, + { + "epoch": 0.68, + "learning_rate": 5.6474168719380625e-05, + "loss": 1.0012, + "step": 22740 + }, + { + "epoch": 0.68, + "learning_rate": 5.642725526149316e-05, + "loss": 1.0228, + "step": 22745 + }, + { + "epoch": 0.68, + "learning_rate": 5.638035363781777e-05, + "loss": 1.0947, + "step": 22750 + }, + { + "epoch": 0.68, + "learning_rate": 5.633346386109277e-05, + "loss": 1.0123, + "step": 22755 + }, + { + "epoch": 0.68, + "learning_rate": 5.628658594405327e-05, + "loss": 1.0747, + "step": 22760 + }, + { + "epoch": 0.68, + "learning_rate": 5.623971989943122e-05, + "loss": 1.0502, + "step": 22765 + }, + { + "epoch": 0.68, + "learning_rate": 5.6192865739955236e-05, + "loss": 0.96, + "step": 22770 + }, + { + "epoch": 0.68, + "learning_rate": 5.6146023478350693e-05, + "loss": 1.0684, + "step": 22775 + }, + { + "epoch": 0.68, + "learning_rate": 5.6099193127339864e-05, + "loss": 1.0147, + "step": 22780 + }, + { + "epoch": 0.68, + "learning_rate": 5.6052374699641755e-05, + "loss": 1.1096, + "step": 22785 + }, + { + "epoch": 0.68, + "learning_rate": 5.6005568207972024e-05, + "loss": 1.1119, + "step": 22790 + }, + { + "epoch": 0.68, + "learning_rate": 5.595877366504314e-05, + "loss": 1.0665, + "step": 22795 + }, + { + "epoch": 0.68, + "learning_rate": 5.591199108356437e-05, + "loss": 1.0583, + "step": 22800 + }, + { + "epoch": 0.68, + "learning_rate": 5.5865220476241784e-05, + "loss": 1.0542, + "step": 22805 + }, + { + "epoch": 0.68, + "learning_rate": 5.5818461855777994e-05, + "loss": 1.0864, + "step": 22810 + }, + { + "epoch": 0.68, + "learning_rate": 5.577171523487259e-05, + "loss": 1.1055, + "step": 22815 + }, + { + "epoch": 0.68, + "learning_rate": 5.5724980626221735e-05, + "loss": 1.0969, + "step": 22820 + }, + { + "epoch": 0.68, + "learning_rate": 5.567825804251843e-05, + "loss": 1.0596, + "step": 22825 + }, + { + "epoch": 0.68, + "learning_rate": 5.563154749645233e-05, + "loss": 1.0339, + "step": 22830 + }, + { + "epoch": 0.68, + "learning_rate": 5.5584849000709926e-05, + "loss": 1.0841, + "step": 22835 + }, + { + "epoch": 0.68, + "learning_rate": 5.5538162567974296e-05, + "loss": 1.0568, + "step": 22840 + }, + { + "epoch": 0.68, + "learning_rate": 5.54914882109254e-05, + "loss": 0.962, + "step": 22845 + }, + { + "epoch": 0.68, + "learning_rate": 5.5444825942239765e-05, + "loss": 1.0881, + "step": 22850 + }, + { + "epoch": 0.68, + "learning_rate": 5.539817577459078e-05, + "loss": 1.0252, + "step": 22855 + }, + { + "epoch": 0.68, + "learning_rate": 5.535153772064843e-05, + "loss": 0.997, + "step": 22860 + }, + { + "epoch": 0.68, + "learning_rate": 5.530491179307942e-05, + "loss": 1.0913, + "step": 22865 + }, + { + "epoch": 0.68, + "learning_rate": 5.5258298004547246e-05, + "loss": 1.0363, + "step": 22870 + }, + { + "epoch": 0.68, + "learning_rate": 5.521169636771209e-05, + "loss": 1.0639, + "step": 22875 + }, + { + "epoch": 0.68, + "learning_rate": 5.5165106895230756e-05, + "loss": 0.988, + "step": 22880 + }, + { + "epoch": 0.68, + "learning_rate": 5.51185295997567e-05, + "loss": 1.0001, + "step": 22885 + }, + { + "epoch": 0.68, + "learning_rate": 5.507196449394034e-05, + "loss": 1.066, + "step": 22890 + }, + { + "epoch": 0.68, + "learning_rate": 5.502541159042853e-05, + "loss": 1.1076, + "step": 22895 + }, + { + "epoch": 0.68, + "learning_rate": 5.497887090186482e-05, + "loss": 1.0608, + "step": 22900 + }, + { + "epoch": 0.68, + "learning_rate": 5.4932342440889564e-05, + "loss": 1.0028, + "step": 22905 + }, + { + "epoch": 0.68, + "learning_rate": 5.4885826220139757e-05, + "loss": 1.1048, + "step": 22910 + }, + { + "epoch": 0.68, + "learning_rate": 5.4839322252249016e-05, + "loss": 1.024, + "step": 22915 + }, + { + "epoch": 0.68, + "learning_rate": 5.4792830549847626e-05, + "loss": 1.0747, + "step": 22920 + }, + { + "epoch": 0.68, + "learning_rate": 5.4746351125562633e-05, + "loss": 1.0503, + "step": 22925 + }, + { + "epoch": 0.68, + "learning_rate": 5.469988399201773e-05, + "loss": 1.0338, + "step": 22930 + }, + { + "epoch": 0.68, + "learning_rate": 5.4653429161833166e-05, + "loss": 1.0143, + "step": 22935 + }, + { + "epoch": 0.68, + "learning_rate": 5.460698664762591e-05, + "loss": 1.0853, + "step": 22940 + }, + { + "epoch": 0.69, + "learning_rate": 5.4560556462009615e-05, + "loss": 1.0827, + "step": 22945 + }, + { + "epoch": 0.69, + "learning_rate": 5.451413861759461e-05, + "loss": 1.0079, + "step": 22950 + }, + { + "epoch": 0.69, + "learning_rate": 5.4467733126987766e-05, + "loss": 1.0716, + "step": 22955 + }, + { + "epoch": 0.69, + "learning_rate": 5.44213400027927e-05, + "loss": 1.0328, + "step": 22960 + }, + { + "epoch": 0.69, + "learning_rate": 5.437495925760958e-05, + "loss": 1.0953, + "step": 22965 + }, + { + "epoch": 0.69, + "learning_rate": 5.432859090403532e-05, + "loss": 1.0937, + "step": 22970 + }, + { + "epoch": 0.69, + "learning_rate": 5.428223495466337e-05, + "loss": 1.0848, + "step": 22975 + }, + { + "epoch": 0.69, + "learning_rate": 5.4235891422083886e-05, + "loss": 1.022, + "step": 22980 + }, + { + "epoch": 0.69, + "learning_rate": 5.418956031888356e-05, + "loss": 1.0855, + "step": 22985 + }, + { + "epoch": 0.69, + "learning_rate": 5.414324165764584e-05, + "loss": 1.0069, + "step": 22990 + }, + { + "epoch": 0.69, + "learning_rate": 5.4096935450950634e-05, + "loss": 1.0144, + "step": 22995 + }, + { + "epoch": 0.69, + "learning_rate": 5.405064171137465e-05, + "loss": 1.0191, + "step": 23000 + }, + { + "epoch": 0.69, + "learning_rate": 5.400436045149102e-05, + "loss": 0.9528, + "step": 23005 + }, + { + "epoch": 0.69, + "learning_rate": 5.395809168386962e-05, + "loss": 1.0537, + "step": 23010 + }, + { + "epoch": 0.69, + "learning_rate": 5.391183542107693e-05, + "loss": 1.0501, + "step": 23015 + }, + { + "epoch": 0.69, + "learning_rate": 5.386559167567596e-05, + "loss": 1.0477, + "step": 23020 + }, + { + "epoch": 0.69, + "learning_rate": 5.381936046022633e-05, + "loss": 1.0276, + "step": 23025 + }, + { + "epoch": 0.69, + "learning_rate": 5.37731417872843e-05, + "loss": 1.0735, + "step": 23030 + }, + { + "epoch": 0.69, + "learning_rate": 5.372693566940277e-05, + "loss": 0.9587, + "step": 23035 + }, + { + "epoch": 0.69, + "learning_rate": 5.368074211913112e-05, + "loss": 1.0012, + "step": 23040 + }, + { + "epoch": 0.69, + "learning_rate": 5.3634561149015304e-05, + "loss": 1.0758, + "step": 23045 + }, + { + "epoch": 0.69, + "learning_rate": 5.3588392771598e-05, + "loss": 1.0512, + "step": 23050 + }, + { + "epoch": 0.69, + "learning_rate": 5.3542236999418385e-05, + "loss": 1.014, + "step": 23055 + }, + { + "epoch": 0.69, + "learning_rate": 5.3496093845012196e-05, + "loss": 1.0385, + "step": 23060 + }, + { + "epoch": 0.69, + "learning_rate": 5.344996332091171e-05, + "loss": 1.0693, + "step": 23065 + }, + { + "epoch": 0.69, + "learning_rate": 5.340384543964587e-05, + "loss": 1.0734, + "step": 23070 + }, + { + "epoch": 0.69, + "learning_rate": 5.335774021374018e-05, + "loss": 1.0545, + "step": 23075 + }, + { + "epoch": 0.69, + "learning_rate": 5.331164765571661e-05, + "loss": 1.0067, + "step": 23080 + }, + { + "epoch": 0.69, + "learning_rate": 5.3265567778093725e-05, + "loss": 1.0438, + "step": 23085 + }, + { + "epoch": 0.69, + "learning_rate": 5.3219500593386696e-05, + "loss": 0.9867, + "step": 23090 + }, + { + "epoch": 0.69, + "learning_rate": 5.3173446114107265e-05, + "loss": 0.9986, + "step": 23095 + }, + { + "epoch": 0.69, + "learning_rate": 5.3127404352763574e-05, + "loss": 1.0792, + "step": 23100 + }, + { + "epoch": 0.69, + "learning_rate": 5.3081375321860514e-05, + "loss": 1.0735, + "step": 23105 + }, + { + "epoch": 0.69, + "learning_rate": 5.303535903389932e-05, + "loss": 1.0221, + "step": 23110 + }, + { + "epoch": 0.69, + "learning_rate": 5.298935550137796e-05, + "loss": 1.0265, + "step": 23115 + }, + { + "epoch": 0.69, + "learning_rate": 5.294336473679075e-05, + "loss": 0.9942, + "step": 23120 + }, + { + "epoch": 0.69, + "learning_rate": 5.2897386752628694e-05, + "loss": 0.9674, + "step": 23125 + }, + { + "epoch": 0.69, + "learning_rate": 5.285142156137921e-05, + "loss": 1.0765, + "step": 23130 + }, + { + "epoch": 0.69, + "learning_rate": 5.2805469175526345e-05, + "loss": 1.046, + "step": 23135 + }, + { + "epoch": 0.69, + "learning_rate": 5.275952960755055e-05, + "loss": 0.9572, + "step": 23140 + }, + { + "epoch": 0.69, + "learning_rate": 5.271360286992893e-05, + "loss": 1.0608, + "step": 23145 + }, + { + "epoch": 0.69, + "learning_rate": 5.266768897513494e-05, + "loss": 0.9907, + "step": 23150 + }, + { + "epoch": 0.69, + "learning_rate": 5.26217879356387e-05, + "loss": 0.9444, + "step": 23155 + }, + { + "epoch": 0.69, + "learning_rate": 5.2575899763906824e-05, + "loss": 1.0413, + "step": 23160 + }, + { + "epoch": 0.69, + "learning_rate": 5.253002447240233e-05, + "loss": 1.0723, + "step": 23165 + }, + { + "epoch": 0.69, + "learning_rate": 5.2484162073584765e-05, + "loss": 1.026, + "step": 23170 + }, + { + "epoch": 0.69, + "learning_rate": 5.243831257991023e-05, + "loss": 1.058, + "step": 23175 + }, + { + "epoch": 0.69, + "learning_rate": 5.239247600383136e-05, + "loss": 0.9806, + "step": 23180 + }, + { + "epoch": 0.69, + "learning_rate": 5.2346652357797146e-05, + "loss": 1.1136, + "step": 23185 + }, + { + "epoch": 0.69, + "learning_rate": 5.230084165425312e-05, + "loss": 1.1165, + "step": 23190 + }, + { + "epoch": 0.69, + "learning_rate": 5.2255043905641343e-05, + "loss": 1.1087, + "step": 23195 + }, + { + "epoch": 0.69, + "learning_rate": 5.220925912440038e-05, + "loss": 1.1278, + "step": 23200 + }, + { + "epoch": 0.69, + "learning_rate": 5.2163487322965166e-05, + "loss": 1.0854, + "step": 23205 + }, + { + "epoch": 0.69, + "learning_rate": 5.211772851376715e-05, + "loss": 0.9943, + "step": 23210 + }, + { + "epoch": 0.69, + "learning_rate": 5.2071982709234324e-05, + "loss": 1.0254, + "step": 23215 + }, + { + "epoch": 0.69, + "learning_rate": 5.202624992179109e-05, + "loss": 1.0516, + "step": 23220 + }, + { + "epoch": 0.69, + "learning_rate": 5.198053016385833e-05, + "loss": 1.037, + "step": 23225 + }, + { + "epoch": 0.69, + "learning_rate": 5.193482344785327e-05, + "loss": 1.0886, + "step": 23230 + }, + { + "epoch": 0.69, + "learning_rate": 5.188912978618985e-05, + "loss": 0.9994, + "step": 23235 + }, + { + "epoch": 0.69, + "learning_rate": 5.184344919127827e-05, + "loss": 1.0089, + "step": 23240 + }, + { + "epoch": 0.69, + "learning_rate": 5.179778167552517e-05, + "loss": 1.0749, + "step": 23245 + }, + { + "epoch": 0.69, + "learning_rate": 5.175212725133374e-05, + "loss": 0.9711, + "step": 23250 + }, + { + "epoch": 0.69, + "learning_rate": 5.1706485931103586e-05, + "loss": 1.0018, + "step": 23255 + }, + { + "epoch": 0.69, + "learning_rate": 5.166085772723073e-05, + "loss": 1.0036, + "step": 23260 + }, + { + "epoch": 0.69, + "learning_rate": 5.1615242652107574e-05, + "loss": 1.0598, + "step": 23265 + }, + { + "epoch": 0.69, + "learning_rate": 5.1569640718123136e-05, + "loss": 0.9796, + "step": 23270 + }, + { + "epoch": 0.69, + "learning_rate": 5.152405193766263e-05, + "loss": 0.9443, + "step": 23275 + }, + { + "epoch": 0.7, + "learning_rate": 5.1478476323107914e-05, + "loss": 1.1372, + "step": 23280 + }, + { + "epoch": 0.7, + "learning_rate": 5.143291388683709e-05, + "loss": 1.0268, + "step": 23285 + }, + { + "epoch": 0.7, + "learning_rate": 5.138736464122484e-05, + "loss": 1.0774, + "step": 23290 + }, + { + "epoch": 0.7, + "learning_rate": 5.134182859864214e-05, + "loss": 1.0254, + "step": 23295 + }, + { + "epoch": 0.7, + "learning_rate": 5.129630577145641e-05, + "loss": 1.0682, + "step": 23300 + }, + { + "epoch": 0.7, + "learning_rate": 5.125079617203158e-05, + "loss": 1.1262, + "step": 23305 + }, + { + "epoch": 0.7, + "learning_rate": 5.1205299812727855e-05, + "loss": 1.0814, + "step": 23310 + }, + { + "epoch": 0.7, + "learning_rate": 5.1159816705901844e-05, + "loss": 0.9872, + "step": 23315 + }, + { + "epoch": 0.7, + "learning_rate": 5.111434686390666e-05, + "loss": 1.0258, + "step": 23320 + }, + { + "epoch": 0.7, + "learning_rate": 5.10688902990918e-05, + "loss": 1.0399, + "step": 23325 + }, + { + "epoch": 0.7, + "learning_rate": 5.102344702380306e-05, + "loss": 1.0754, + "step": 23330 + }, + { + "epoch": 0.7, + "learning_rate": 5.0978017050382664e-05, + "loss": 0.9977, + "step": 23335 + }, + { + "epoch": 0.7, + "learning_rate": 5.093260039116927e-05, + "loss": 0.9582, + "step": 23340 + }, + { + "epoch": 0.7, + "learning_rate": 5.088719705849791e-05, + "loss": 0.9749, + "step": 23345 + }, + { + "epoch": 0.7, + "learning_rate": 5.0841807064699986e-05, + "loss": 1.0317, + "step": 23350 + }, + { + "epoch": 0.7, + "learning_rate": 5.079643042210319e-05, + "loss": 0.9753, + "step": 23355 + }, + { + "epoch": 0.7, + "learning_rate": 5.075106714303173e-05, + "loss": 0.9702, + "step": 23360 + }, + { + "epoch": 0.7, + "learning_rate": 5.0705717239806125e-05, + "loss": 1.0298, + "step": 23365 + }, + { + "epoch": 0.7, + "learning_rate": 5.066038072474326e-05, + "loss": 1.0973, + "step": 23370 + }, + { + "epoch": 0.7, + "learning_rate": 5.061505761015629e-05, + "loss": 1.0243, + "step": 23375 + }, + { + "epoch": 0.7, + "learning_rate": 5.056974790835496e-05, + "loss": 1.027, + "step": 23380 + }, + { + "epoch": 0.7, + "learning_rate": 5.0524451631645174e-05, + "loss": 1.1367, + "step": 23385 + }, + { + "epoch": 0.7, + "learning_rate": 5.047916879232921e-05, + "loss": 1.0595, + "step": 23390 + }, + { + "epoch": 0.7, + "learning_rate": 5.043389940270578e-05, + "loss": 1.1675, + "step": 23395 + }, + { + "epoch": 0.7, + "learning_rate": 5.0388643475069906e-05, + "loss": 1.0172, + "step": 23400 + }, + { + "epoch": 0.7, + "learning_rate": 5.034340102171293e-05, + "loss": 1.0002, + "step": 23405 + }, + { + "epoch": 0.7, + "learning_rate": 5.0298172054922534e-05, + "loss": 1.0191, + "step": 23410 + }, + { + "epoch": 0.7, + "learning_rate": 5.025295658698276e-05, + "loss": 1.0589, + "step": 23415 + }, + { + "epoch": 0.7, + "learning_rate": 5.020775463017403e-05, + "loss": 1.1018, + "step": 23420 + }, + { + "epoch": 0.7, + "learning_rate": 5.016256619677301e-05, + "loss": 1.0038, + "step": 23425 + }, + { + "epoch": 0.7, + "learning_rate": 5.0117391299052684e-05, + "loss": 1.0303, + "step": 23430 + }, + { + "epoch": 0.7, + "learning_rate": 5.007222994928243e-05, + "loss": 1.075, + "step": 23435 + }, + { + "epoch": 0.7, + "learning_rate": 5.002708215972799e-05, + "loss": 1.0731, + "step": 23440 + }, + { + "epoch": 0.7, + "learning_rate": 4.998194794265124e-05, + "loss": 1.0187, + "step": 23445 + }, + { + "epoch": 0.7, + "learning_rate": 4.9936827310310595e-05, + "loss": 0.9958, + "step": 23450 + }, + { + "epoch": 0.7, + "learning_rate": 4.9891720274960566e-05, + "loss": 1.0743, + "step": 23455 + }, + { + "epoch": 0.7, + "learning_rate": 4.984662684885215e-05, + "loss": 0.9918, + "step": 23460 + }, + { + "epoch": 0.7, + "learning_rate": 4.980154704423251e-05, + "loss": 1.1038, + "step": 23465 + }, + { + "epoch": 0.7, + "learning_rate": 4.9756480873345235e-05, + "loss": 0.9632, + "step": 23470 + }, + { + "epoch": 0.7, + "learning_rate": 4.971142834843011e-05, + "loss": 1.02, + "step": 23475 + }, + { + "epoch": 0.7, + "learning_rate": 4.966638948172322e-05, + "loss": 1.0382, + "step": 23480 + }, + { + "epoch": 0.7, + "learning_rate": 4.9621364285456995e-05, + "loss": 1.0099, + "step": 23485 + }, + { + "epoch": 0.7, + "learning_rate": 4.957635277186018e-05, + "loss": 0.9854, + "step": 23490 + }, + { + "epoch": 0.7, + "learning_rate": 4.9531354953157704e-05, + "loss": 1.0535, + "step": 23495 + }, + { + "epoch": 0.7, + "learning_rate": 4.9486370841570803e-05, + "loss": 0.9094, + "step": 23500 + }, + { + "epoch": 0.7, + "learning_rate": 4.9441400449317033e-05, + "loss": 1.0067, + "step": 23505 + }, + { + "epoch": 0.7, + "learning_rate": 4.939644378861024e-05, + "loss": 0.9878, + "step": 23510 + }, + { + "epoch": 0.7, + "learning_rate": 4.9351500871660474e-05, + "loss": 1.0436, + "step": 23515 + }, + { + "epoch": 0.7, + "learning_rate": 4.9306571710674e-05, + "loss": 1.0238, + "step": 23520 + }, + { + "epoch": 0.7, + "learning_rate": 4.9261656317853575e-05, + "loss": 1.0529, + "step": 23525 + }, + { + "epoch": 0.7, + "learning_rate": 4.921675470539801e-05, + "loss": 0.9522, + "step": 23530 + }, + { + "epoch": 0.7, + "learning_rate": 4.9171866885502375e-05, + "loss": 1.0172, + "step": 23535 + }, + { + "epoch": 0.7, + "learning_rate": 4.912699287035811e-05, + "loss": 1.0055, + "step": 23540 + }, + { + "epoch": 0.7, + "learning_rate": 4.908213267215287e-05, + "loss": 0.9824, + "step": 23545 + }, + { + "epoch": 0.7, + "learning_rate": 4.90372863030705e-05, + "loss": 1.0748, + "step": 23550 + }, + { + "epoch": 0.7, + "learning_rate": 4.89924537752911e-05, + "loss": 1.1017, + "step": 23555 + }, + { + "epoch": 0.7, + "learning_rate": 4.894763510099106e-05, + "loss": 1.021, + "step": 23560 + }, + { + "epoch": 0.7, + "learning_rate": 4.8902830292343024e-05, + "loss": 0.999, + "step": 23565 + }, + { + "epoch": 0.7, + "learning_rate": 4.8858039361515795e-05, + "loss": 0.9548, + "step": 23570 + }, + { + "epoch": 0.7, + "learning_rate": 4.8813262320674394e-05, + "loss": 1.0854, + "step": 23575 + }, + { + "epoch": 0.7, + "learning_rate": 4.8768499181980146e-05, + "loss": 1.1565, + "step": 23580 + }, + { + "epoch": 0.7, + "learning_rate": 4.8723749957590625e-05, + "loss": 0.934, + "step": 23585 + }, + { + "epoch": 0.7, + "learning_rate": 4.867901465965949e-05, + "loss": 1.0272, + "step": 23590 + }, + { + "epoch": 0.7, + "learning_rate": 4.863429330033677e-05, + "loss": 1.0316, + "step": 23595 + }, + { + "epoch": 0.7, + "learning_rate": 4.858958589176857e-05, + "loss": 0.9774, + "step": 23600 + }, + { + "epoch": 0.7, + "learning_rate": 4.8544892446097343e-05, + "loss": 1.0794, + "step": 23605 + }, + { + "epoch": 0.7, + "learning_rate": 4.85002129754616e-05, + "loss": 0.9816, + "step": 23610 + }, + { + "epoch": 0.71, + "learning_rate": 4.845554749199622e-05, + "loss": 0.9492, + "step": 23615 + }, + { + "epoch": 0.71, + "learning_rate": 4.841089600783213e-05, + "loss": 1.0576, + "step": 23620 + }, + { + "epoch": 0.71, + "learning_rate": 4.8366258535096574e-05, + "loss": 0.9775, + "step": 23625 + }, + { + "epoch": 0.71, + "learning_rate": 4.832163508591289e-05, + "loss": 1.1189, + "step": 23630 + }, + { + "epoch": 0.71, + "learning_rate": 4.827702567240072e-05, + "loss": 1.0222, + "step": 23635 + }, + { + "epoch": 0.71, + "learning_rate": 4.823243030667576e-05, + "loss": 0.9981, + "step": 23640 + }, + { + "epoch": 0.71, + "learning_rate": 4.8187849000850035e-05, + "loss": 1.0213, + "step": 23645 + }, + { + "epoch": 0.71, + "learning_rate": 4.814328176703161e-05, + "loss": 1.0586, + "step": 23650 + }, + { + "epoch": 0.71, + "learning_rate": 4.8098728617324873e-05, + "loss": 1.0136, + "step": 23655 + }, + { + "epoch": 0.71, + "learning_rate": 4.8054189563830234e-05, + "loss": 1.0523, + "step": 23660 + }, + { + "epoch": 0.71, + "learning_rate": 4.800966461864438e-05, + "loss": 1.161, + "step": 23665 + }, + { + "epoch": 0.71, + "learning_rate": 4.796515379386019e-05, + "loss": 1.0076, + "step": 23670 + }, + { + "epoch": 0.71, + "learning_rate": 4.79206571015666e-05, + "loss": 1.0145, + "step": 23675 + }, + { + "epoch": 0.71, + "learning_rate": 4.787617455384874e-05, + "loss": 0.972, + "step": 23680 + }, + { + "epoch": 0.71, + "learning_rate": 4.7831706162787957e-05, + "loss": 1.0061, + "step": 23685 + }, + { + "epoch": 0.71, + "learning_rate": 4.7787251940461754e-05, + "loss": 1.0057, + "step": 23690 + }, + { + "epoch": 0.71, + "learning_rate": 4.7742811898943703e-05, + "loss": 0.9991, + "step": 23695 + }, + { + "epoch": 0.71, + "learning_rate": 4.7698386050303544e-05, + "loss": 1.0247, + "step": 23700 + }, + { + "epoch": 0.71, + "learning_rate": 4.7653974406607226e-05, + "loss": 1.039, + "step": 23705 + }, + { + "epoch": 0.71, + "learning_rate": 4.7609576979916845e-05, + "loss": 1.0237, + "step": 23710 + }, + { + "epoch": 0.71, + "learning_rate": 4.756519378229054e-05, + "loss": 1.0395, + "step": 23715 + }, + { + "epoch": 0.71, + "learning_rate": 4.752082482578262e-05, + "loss": 1.0263, + "step": 23720 + }, + { + "epoch": 0.71, + "learning_rate": 4.7476470122443576e-05, + "loss": 1.0659, + "step": 23725 + }, + { + "epoch": 0.71, + "learning_rate": 4.743212968432003e-05, + "loss": 1.0864, + "step": 23730 + }, + { + "epoch": 0.71, + "learning_rate": 4.738780352345462e-05, + "loss": 1.0443, + "step": 23735 + }, + { + "epoch": 0.71, + "learning_rate": 4.734349165188626e-05, + "loss": 0.9634, + "step": 23740 + }, + { + "epoch": 0.71, + "learning_rate": 4.7299194081649856e-05, + "loss": 1.0846, + "step": 23745 + }, + { + "epoch": 0.71, + "learning_rate": 4.725491082477652e-05, + "loss": 1.0824, + "step": 23750 + }, + { + "epoch": 0.71, + "learning_rate": 4.7210641893293384e-05, + "loss": 1.1069, + "step": 23755 + }, + { + "epoch": 0.71, + "learning_rate": 4.7166387299223815e-05, + "loss": 1.0246, + "step": 23760 + }, + { + "epoch": 0.71, + "learning_rate": 4.7122147054587115e-05, + "loss": 1.0064, + "step": 23765 + }, + { + "epoch": 0.71, + "learning_rate": 4.7077921171398895e-05, + "loss": 1.0085, + "step": 23770 + }, + { + "epoch": 0.71, + "learning_rate": 4.7033709661670664e-05, + "loss": 1.0802, + "step": 23775 + }, + { + "epoch": 0.71, + "learning_rate": 4.69895125374102e-05, + "loss": 1.0185, + "step": 23780 + }, + { + "epoch": 0.71, + "learning_rate": 4.694532981062122e-05, + "loss": 1.0976, + "step": 23785 + }, + { + "epoch": 0.71, + "learning_rate": 4.690116149330367e-05, + "loss": 0.9947, + "step": 23790 + }, + { + "epoch": 0.71, + "learning_rate": 4.6857007597453464e-05, + "loss": 1.0323, + "step": 23795 + }, + { + "epoch": 0.71, + "learning_rate": 4.68128681350627e-05, + "loss": 1.0424, + "step": 23800 + }, + { + "epoch": 0.71, + "learning_rate": 4.6768743118119465e-05, + "loss": 1.0363, + "step": 23805 + }, + { + "epoch": 0.71, + "learning_rate": 4.6724632558607974e-05, + "loss": 1.0163, + "step": 23810 + }, + { + "epoch": 0.71, + "learning_rate": 4.668053646850856e-05, + "loss": 1.0354, + "step": 23815 + }, + { + "epoch": 0.71, + "learning_rate": 4.663645485979754e-05, + "loss": 1.0464, + "step": 23820 + }, + { + "epoch": 0.71, + "learning_rate": 4.65923877444473e-05, + "loss": 0.9962, + "step": 23825 + }, + { + "epoch": 0.71, + "learning_rate": 4.6548335134426336e-05, + "loss": 1.0191, + "step": 23830 + }, + { + "epoch": 0.71, + "learning_rate": 4.650429704169924e-05, + "loss": 0.9682, + "step": 23835 + }, + { + "epoch": 0.71, + "learning_rate": 4.646027347822657e-05, + "loss": 1.103, + "step": 23840 + }, + { + "epoch": 0.71, + "learning_rate": 4.641626445596495e-05, + "loss": 1.0213, + "step": 23845 + }, + { + "epoch": 0.71, + "learning_rate": 4.6372269986867114e-05, + "loss": 1.0144, + "step": 23850 + }, + { + "epoch": 0.71, + "learning_rate": 4.6328290082881856e-05, + "loss": 0.9686, + "step": 23855 + }, + { + "epoch": 0.71, + "learning_rate": 4.628432475595393e-05, + "loss": 0.9751, + "step": 23860 + }, + { + "epoch": 0.71, + "learning_rate": 4.62403740180241e-05, + "loss": 1.0068, + "step": 23865 + }, + { + "epoch": 0.71, + "learning_rate": 4.6196437881029385e-05, + "loss": 1.0581, + "step": 23870 + }, + { + "epoch": 0.71, + "learning_rate": 4.615251635690262e-05, + "loss": 0.985, + "step": 23875 + }, + { + "epoch": 0.71, + "learning_rate": 4.6108609457572706e-05, + "loss": 1.0123, + "step": 23880 + }, + { + "epoch": 0.71, + "learning_rate": 4.6064717194964676e-05, + "loss": 1.0942, + "step": 23885 + }, + { + "epoch": 0.71, + "learning_rate": 4.602083958099947e-05, + "loss": 1.0989, + "step": 23890 + }, + { + "epoch": 0.71, + "learning_rate": 4.597697662759417e-05, + "loss": 1.0915, + "step": 23895 + }, + { + "epoch": 0.71, + "learning_rate": 4.593312834666171e-05, + "loss": 1.0765, + "step": 23900 + }, + { + "epoch": 0.71, + "learning_rate": 4.588929475011125e-05, + "loss": 1.0501, + "step": 23905 + }, + { + "epoch": 0.71, + "learning_rate": 4.584547584984774e-05, + "loss": 1.0934, + "step": 23910 + }, + { + "epoch": 0.71, + "learning_rate": 4.5801671657772325e-05, + "loss": 0.9727, + "step": 23915 + }, + { + "epoch": 0.71, + "learning_rate": 4.575788218578203e-05, + "loss": 0.9537, + "step": 23920 + }, + { + "epoch": 0.71, + "learning_rate": 4.571410744576998e-05, + "loss": 1.0179, + "step": 23925 + }, + { + "epoch": 0.71, + "learning_rate": 4.567034744962517e-05, + "loss": 1.0323, + "step": 23930 + }, + { + "epoch": 0.71, + "learning_rate": 4.562660220923277e-05, + "loss": 1.0962, + "step": 23935 + }, + { + "epoch": 0.71, + "learning_rate": 4.558287173647374e-05, + "loss": 1.0285, + "step": 23940 + }, + { + "epoch": 0.71, + "learning_rate": 4.5539156043225215e-05, + "loss": 0.9922, + "step": 23945 + }, + { + "epoch": 0.72, + "learning_rate": 4.5495455141360165e-05, + "loss": 1.0047, + "step": 23950 + }, + { + "epoch": 0.72, + "learning_rate": 4.545176904274764e-05, + "loss": 0.9985, + "step": 23955 + }, + { + "epoch": 0.72, + "learning_rate": 4.540809775925267e-05, + "loss": 1.0361, + "step": 23960 + }, + { + "epoch": 0.72, + "learning_rate": 4.536444130273618e-05, + "loss": 1.0133, + "step": 23965 + }, + { + "epoch": 0.72, + "learning_rate": 4.532079968505511e-05, + "loss": 1.021, + "step": 23970 + }, + { + "epoch": 0.72, + "learning_rate": 4.527717291806242e-05, + "loss": 1.0596, + "step": 23975 + }, + { + "epoch": 0.72, + "learning_rate": 4.5233561013606995e-05, + "loss": 1.0078, + "step": 23980 + }, + { + "epoch": 0.72, + "learning_rate": 4.518996398353367e-05, + "loss": 1.1767, + "step": 23985 + }, + { + "epoch": 0.72, + "learning_rate": 4.5146381839683215e-05, + "loss": 1.0408, + "step": 23990 + }, + { + "epoch": 0.72, + "learning_rate": 4.510281459389242e-05, + "loss": 1.0506, + "step": 23995 + }, + { + "epoch": 0.72, + "learning_rate": 4.505926225799405e-05, + "loss": 1.1078, + "step": 24000 + }, + { + "epoch": 0.72, + "learning_rate": 4.501572484381673e-05, + "loss": 1.0494, + "step": 24005 + }, + { + "epoch": 0.72, + "learning_rate": 4.497220236318502e-05, + "loss": 0.9725, + "step": 24010 + }, + { + "epoch": 0.72, + "learning_rate": 4.4928694827919614e-05, + "loss": 1.0271, + "step": 24015 + }, + { + "epoch": 0.72, + "learning_rate": 4.4885202249836936e-05, + "loss": 1.0323, + "step": 24020 + }, + { + "epoch": 0.72, + "learning_rate": 4.4841724640749396e-05, + "loss": 1.0988, + "step": 24025 + }, + { + "epoch": 0.72, + "learning_rate": 4.4798262012465395e-05, + "loss": 1.0998, + "step": 24030 + }, + { + "epoch": 0.72, + "learning_rate": 4.4754814376789304e-05, + "loss": 1.011, + "step": 24035 + }, + { + "epoch": 0.72, + "learning_rate": 4.4711381745521275e-05, + "loss": 1.0341, + "step": 24040 + }, + { + "epoch": 0.72, + "learning_rate": 4.4667964130457455e-05, + "loss": 1.1316, + "step": 24045 + }, + { + "epoch": 0.72, + "learning_rate": 4.4624561543389965e-05, + "loss": 1.082, + "step": 24050 + }, + { + "epoch": 0.72, + "learning_rate": 4.4581173996106815e-05, + "loss": 1.0976, + "step": 24055 + }, + { + "epoch": 0.72, + "learning_rate": 4.4537801500391905e-05, + "loss": 1.0122, + "step": 24060 + }, + { + "epoch": 0.72, + "learning_rate": 4.449444406802502e-05, + "loss": 1.0056, + "step": 24065 + }, + { + "epoch": 0.72, + "learning_rate": 4.4451101710781926e-05, + "loss": 1.0614, + "step": 24070 + }, + { + "epoch": 0.72, + "learning_rate": 4.4407774440434304e-05, + "loss": 1.0769, + "step": 24075 + }, + { + "epoch": 0.72, + "learning_rate": 4.436446226874961e-05, + "loss": 1.0284, + "step": 24080 + }, + { + "epoch": 0.72, + "learning_rate": 4.432116520749137e-05, + "loss": 1.0566, + "step": 24085 + }, + { + "epoch": 0.72, + "learning_rate": 4.427788326841887e-05, + "loss": 1.0947, + "step": 24090 + }, + { + "epoch": 0.72, + "learning_rate": 4.423461646328733e-05, + "loss": 1.0225, + "step": 24095 + }, + { + "epoch": 0.72, + "learning_rate": 4.419136480384788e-05, + "loss": 0.9583, + "step": 24100 + }, + { + "epoch": 0.72, + "learning_rate": 4.414812830184758e-05, + "loss": 1.0862, + "step": 24105 + }, + { + "epoch": 0.72, + "learning_rate": 4.410490696902928e-05, + "loss": 1.0767, + "step": 24110 + }, + { + "epoch": 0.72, + "learning_rate": 4.4061700817131714e-05, + "loss": 1.0255, + "step": 24115 + }, + { + "epoch": 0.72, + "learning_rate": 4.4018509857889555e-05, + "loss": 0.9876, + "step": 24120 + }, + { + "epoch": 0.72, + "learning_rate": 4.397533410303337e-05, + "loss": 1.0428, + "step": 24125 + }, + { + "epoch": 0.72, + "learning_rate": 4.3932173564289506e-05, + "loss": 1.0191, + "step": 24130 + }, + { + "epoch": 0.72, + "learning_rate": 4.388902825338019e-05, + "loss": 1.0826, + "step": 24135 + }, + { + "epoch": 0.72, + "learning_rate": 4.3845898182023593e-05, + "loss": 0.9883, + "step": 24140 + }, + { + "epoch": 0.72, + "learning_rate": 4.380278336193371e-05, + "loss": 1.0221, + "step": 24145 + }, + { + "epoch": 0.72, + "learning_rate": 4.375968380482036e-05, + "loss": 1.0634, + "step": 24150 + }, + { + "epoch": 0.72, + "learning_rate": 4.371659952238917e-05, + "loss": 1.0419, + "step": 24155 + }, + { + "epoch": 0.72, + "learning_rate": 4.367353052634181e-05, + "loss": 1.0532, + "step": 24160 + }, + { + "epoch": 0.72, + "learning_rate": 4.363047682837563e-05, + "loss": 0.9826, + "step": 24165 + }, + { + "epoch": 0.72, + "learning_rate": 4.358743844018384e-05, + "loss": 1.0173, + "step": 24170 + }, + { + "epoch": 0.72, + "learning_rate": 4.354441537345553e-05, + "loss": 1.0544, + "step": 24175 + }, + { + "epoch": 0.72, + "learning_rate": 4.350140763987567e-05, + "loss": 0.9892, + "step": 24180 + }, + { + "epoch": 0.72, + "learning_rate": 4.345841525112497e-05, + "loss": 1.1121, + "step": 24185 + }, + { + "epoch": 0.72, + "learning_rate": 4.341543821888001e-05, + "loss": 0.9874, + "step": 24190 + }, + { + "epoch": 0.72, + "learning_rate": 4.3372476554813234e-05, + "loss": 1.0357, + "step": 24195 + }, + { + "epoch": 0.72, + "learning_rate": 4.332953027059291e-05, + "loss": 1.0759, + "step": 24200 + }, + { + "epoch": 0.72, + "learning_rate": 4.328659937788307e-05, + "loss": 1.0526, + "step": 24205 + }, + { + "epoch": 0.72, + "learning_rate": 4.324368388834359e-05, + "loss": 1.0329, + "step": 24210 + }, + { + "epoch": 0.72, + "learning_rate": 4.3200783813630184e-05, + "loss": 1.0259, + "step": 24215 + }, + { + "epoch": 0.72, + "learning_rate": 4.315789916539441e-05, + "loss": 1.0497, + "step": 24220 + }, + { + "epoch": 0.72, + "learning_rate": 4.311502995528354e-05, + "loss": 0.9784, + "step": 24225 + }, + { + "epoch": 0.72, + "learning_rate": 4.307217619494075e-05, + "loss": 1.0273, + "step": 24230 + }, + { + "epoch": 0.72, + "learning_rate": 4.302933789600493e-05, + "loss": 1.0561, + "step": 24235 + }, + { + "epoch": 0.72, + "learning_rate": 4.298651507011089e-05, + "loss": 1.0762, + "step": 24240 + }, + { + "epoch": 0.72, + "learning_rate": 4.2943707728889096e-05, + "loss": 1.0775, + "step": 24245 + }, + { + "epoch": 0.72, + "learning_rate": 4.290091588396593e-05, + "loss": 1.0461, + "step": 24250 + }, + { + "epoch": 0.72, + "learning_rate": 4.285813954696346e-05, + "loss": 1.0256, + "step": 24255 + }, + { + "epoch": 0.72, + "learning_rate": 4.281537872949967e-05, + "loss": 1.0484, + "step": 24260 + }, + { + "epoch": 0.72, + "learning_rate": 4.277263344318817e-05, + "loss": 1.0277, + "step": 24265 + }, + { + "epoch": 0.72, + "learning_rate": 4.272990369963853e-05, + "loss": 1.0292, + "step": 24270 + }, + { + "epoch": 0.72, + "learning_rate": 4.268718951045592e-05, + "loss": 1.0142, + "step": 24275 + }, + { + "epoch": 0.72, + "learning_rate": 4.264449088724144e-05, + "loss": 1.0345, + "step": 24280 + }, + { + "epoch": 0.73, + "learning_rate": 4.2601807841591826e-05, + "loss": 0.981, + "step": 24285 + }, + { + "epoch": 0.73, + "learning_rate": 4.255914038509973e-05, + "loss": 1.0421, + "step": 24290 + }, + { + "epoch": 0.73, + "learning_rate": 4.2516488529353436e-05, + "loss": 0.9925, + "step": 24295 + }, + { + "epoch": 0.73, + "learning_rate": 4.247385228593699e-05, + "loss": 1.1191, + "step": 24300 + }, + { + "epoch": 0.73, + "learning_rate": 4.243123166643039e-05, + "loss": 1.0789, + "step": 24305 + }, + { + "epoch": 0.73, + "learning_rate": 4.2388626682409194e-05, + "loss": 0.9518, + "step": 24310 + }, + { + "epoch": 0.73, + "learning_rate": 4.2346037345444734e-05, + "loss": 1.0821, + "step": 24315 + }, + { + "epoch": 0.73, + "learning_rate": 4.2303463667104146e-05, + "loss": 1.0193, + "step": 24320 + }, + { + "epoch": 0.73, + "learning_rate": 4.2260905658950366e-05, + "loss": 0.9792, + "step": 24325 + }, + { + "epoch": 0.73, + "learning_rate": 4.2218363332541944e-05, + "loss": 1.101, + "step": 24330 + }, + { + "epoch": 0.73, + "learning_rate": 4.217583669943322e-05, + "loss": 0.9779, + "step": 24335 + }, + { + "epoch": 0.73, + "learning_rate": 4.21333257711743e-05, + "loss": 1.0956, + "step": 24340 + }, + { + "epoch": 0.73, + "learning_rate": 4.209083055931108e-05, + "loss": 1.0498, + "step": 24345 + }, + { + "epoch": 0.73, + "learning_rate": 4.204835107538504e-05, + "loss": 0.9724, + "step": 24350 + }, + { + "epoch": 0.73, + "learning_rate": 4.2005887330933457e-05, + "loss": 1.0702, + "step": 24355 + }, + { + "epoch": 0.73, + "learning_rate": 4.1963439337489374e-05, + "loss": 0.9962, + "step": 24360 + }, + { + "epoch": 0.73, + "learning_rate": 4.192100710658155e-05, + "loss": 0.9585, + "step": 24365 + }, + { + "epoch": 0.73, + "learning_rate": 4.1878590649734385e-05, + "loss": 1.0712, + "step": 24370 + }, + { + "epoch": 0.73, + "learning_rate": 4.183618997846811e-05, + "loss": 0.985, + "step": 24375 + }, + { + "epoch": 0.73, + "learning_rate": 4.179380510429852e-05, + "loss": 1.0523, + "step": 24380 + }, + { + "epoch": 0.73, + "learning_rate": 4.175143603873729e-05, + "loss": 0.985, + "step": 24385 + }, + { + "epoch": 0.73, + "learning_rate": 4.170908279329164e-05, + "loss": 1.0437, + "step": 24390 + }, + { + "epoch": 0.73, + "learning_rate": 4.166674537946466e-05, + "loss": 0.9906, + "step": 24395 + }, + { + "epoch": 0.73, + "learning_rate": 4.1624423808754956e-05, + "loss": 1.0445, + "step": 24400 + }, + { + "epoch": 0.73, + "learning_rate": 4.1582118092656994e-05, + "loss": 1.079, + "step": 24405 + }, + { + "epoch": 0.73, + "learning_rate": 4.1539828242660826e-05, + "loss": 1.0078, + "step": 24410 + }, + { + "epoch": 0.73, + "learning_rate": 4.149755427025227e-05, + "loss": 1.1474, + "step": 24415 + }, + { + "epoch": 0.73, + "learning_rate": 4.145529618691274e-05, + "loss": 1.0133, + "step": 24420 + }, + { + "epoch": 0.73, + "learning_rate": 4.1413054004119466e-05, + "loss": 1.0607, + "step": 24425 + }, + { + "epoch": 0.73, + "learning_rate": 4.137082773334521e-05, + "loss": 1.0482, + "step": 24430 + }, + { + "epoch": 0.73, + "learning_rate": 4.1328617386058545e-05, + "loss": 1.0178, + "step": 24435 + }, + { + "epoch": 0.73, + "learning_rate": 4.12864229737236e-05, + "loss": 1.1005, + "step": 24440 + }, + { + "epoch": 0.73, + "learning_rate": 4.124424450780029e-05, + "loss": 1.0106, + "step": 24445 + }, + { + "epoch": 0.73, + "learning_rate": 4.120208199974415e-05, + "loss": 1.0358, + "step": 24450 + }, + { + "epoch": 0.73, + "learning_rate": 4.115993546100635e-05, + "loss": 1.0837, + "step": 24455 + }, + { + "epoch": 0.73, + "learning_rate": 4.1117804903033716e-05, + "loss": 1.0712, + "step": 24460 + }, + { + "epoch": 0.73, + "learning_rate": 4.1075690337268804e-05, + "loss": 1.0587, + "step": 24465 + }, + { + "epoch": 0.73, + "learning_rate": 4.103359177514983e-05, + "loss": 1.0568, + "step": 24470 + }, + { + "epoch": 0.73, + "learning_rate": 4.099150922811058e-05, + "loss": 1.0275, + "step": 24475 + }, + { + "epoch": 0.73, + "learning_rate": 4.09494427075805e-05, + "loss": 1.0289, + "step": 24480 + }, + { + "epoch": 0.73, + "learning_rate": 4.090739222498475e-05, + "loss": 1.0327, + "step": 24485 + }, + { + "epoch": 0.73, + "learning_rate": 4.086535779174415e-05, + "loss": 1.0277, + "step": 24490 + }, + { + "epoch": 0.73, + "learning_rate": 4.0823339419275054e-05, + "loss": 0.9374, + "step": 24495 + }, + { + "epoch": 0.73, + "learning_rate": 4.078133711898949e-05, + "loss": 1.0074, + "step": 24500 + }, + { + "epoch": 0.73, + "learning_rate": 4.073935090229518e-05, + "loss": 1.0913, + "step": 24505 + }, + { + "epoch": 0.73, + "learning_rate": 4.0697380780595464e-05, + "loss": 1.1023, + "step": 24510 + }, + { + "epoch": 0.73, + "learning_rate": 4.065542676528923e-05, + "loss": 0.9609, + "step": 24515 + }, + { + "epoch": 0.73, + "learning_rate": 4.06134888677711e-05, + "loss": 0.9609, + "step": 24520 + }, + { + "epoch": 0.73, + "learning_rate": 4.057156709943122e-05, + "loss": 1.0638, + "step": 24525 + }, + { + "epoch": 0.73, + "learning_rate": 4.0529661471655456e-05, + "loss": 1.0388, + "step": 24530 + }, + { + "epoch": 0.73, + "learning_rate": 4.048777199582518e-05, + "loss": 1.0745, + "step": 24535 + }, + { + "epoch": 0.73, + "learning_rate": 4.04458986833175e-05, + "loss": 0.9541, + "step": 24540 + }, + { + "epoch": 0.73, + "learning_rate": 4.040404154550498e-05, + "loss": 1.0863, + "step": 24545 + }, + { + "epoch": 0.73, + "learning_rate": 4.036220059375596e-05, + "loss": 1.0682, + "step": 24550 + }, + { + "epoch": 0.73, + "learning_rate": 4.0320375839434255e-05, + "loss": 1.035, + "step": 24555 + }, + { + "epoch": 0.73, + "learning_rate": 4.027856729389936e-05, + "loss": 0.9574, + "step": 24560 + }, + { + "epoch": 0.73, + "learning_rate": 4.02367749685063e-05, + "loss": 0.9858, + "step": 24565 + }, + { + "epoch": 0.73, + "learning_rate": 4.0194998874605786e-05, + "loss": 1.0886, + "step": 24570 + }, + { + "epoch": 0.73, + "learning_rate": 4.0153239023544e-05, + "loss": 1.0576, + "step": 24575 + }, + { + "epoch": 0.73, + "learning_rate": 4.011149542666284e-05, + "loss": 1.0024, + "step": 24580 + }, + { + "epoch": 0.73, + "learning_rate": 4.0069768095299656e-05, + "loss": 1.0798, + "step": 24585 + }, + { + "epoch": 0.73, + "learning_rate": 4.00280570407875e-05, + "loss": 1.0521, + "step": 24590 + }, + { + "epoch": 0.73, + "learning_rate": 3.998636227445499e-05, + "loss": 0.9686, + "step": 24595 + }, + { + "epoch": 0.73, + "learning_rate": 3.9944683807626234e-05, + "loss": 1.0113, + "step": 24600 + }, + { + "epoch": 0.73, + "learning_rate": 3.9903021651620954e-05, + "loss": 1.0971, + "step": 24605 + }, + { + "epoch": 0.73, + "learning_rate": 3.986137581775446e-05, + "loss": 1.0351, + "step": 24610 + }, + { + "epoch": 0.73, + "learning_rate": 3.981974631733768e-05, + "loss": 1.0056, + "step": 24615 + }, + { + "epoch": 0.74, + "learning_rate": 3.977813316167699e-05, + "loss": 1.0296, + "step": 24620 + }, + { + "epoch": 0.74, + "learning_rate": 3.973653636207437e-05, + "loss": 1.0432, + "step": 24625 + }, + { + "epoch": 0.74, + "learning_rate": 3.9694955929827405e-05, + "loss": 1.0553, + "step": 24630 + }, + { + "epoch": 0.74, + "learning_rate": 3.9653391876229215e-05, + "loss": 0.9968, + "step": 24635 + }, + { + "epoch": 0.74, + "learning_rate": 3.961184421256845e-05, + "loss": 1.0706, + "step": 24640 + }, + { + "epoch": 0.74, + "learning_rate": 3.957031295012924e-05, + "loss": 0.9676, + "step": 24645 + }, + { + "epoch": 0.74, + "learning_rate": 3.952879810019147e-05, + "loss": 1.0219, + "step": 24650 + }, + { + "epoch": 0.74, + "learning_rate": 3.948729967403035e-05, + "loss": 1.0704, + "step": 24655 + }, + { + "epoch": 0.74, + "learning_rate": 3.9445817682916716e-05, + "loss": 0.959, + "step": 24660 + }, + { + "epoch": 0.74, + "learning_rate": 3.940435213811695e-05, + "loss": 1.0821, + "step": 24665 + }, + { + "epoch": 0.74, + "learning_rate": 3.9362903050892996e-05, + "loss": 1.0856, + "step": 24670 + }, + { + "epoch": 0.74, + "learning_rate": 3.932147043250225e-05, + "loss": 1.0722, + "step": 24675 + }, + { + "epoch": 0.74, + "learning_rate": 3.9280054294197634e-05, + "loss": 1.0352, + "step": 24680 + }, + { + "epoch": 0.74, + "learning_rate": 3.923865464722768e-05, + "loss": 1.0356, + "step": 24685 + }, + { + "epoch": 0.74, + "learning_rate": 3.919727150283642e-05, + "loss": 1.0199, + "step": 24690 + }, + { + "epoch": 0.74, + "learning_rate": 3.915590487226335e-05, + "loss": 1.0598, + "step": 24695 + }, + { + "epoch": 0.74, + "learning_rate": 3.911455476674346e-05, + "loss": 1.0317, + "step": 24700 + }, + { + "epoch": 0.74, + "learning_rate": 3.907322119750738e-05, + "loss": 1.0269, + "step": 24705 + }, + { + "epoch": 0.74, + "learning_rate": 3.903190417578111e-05, + "loss": 1.0629, + "step": 24710 + }, + { + "epoch": 0.74, + "learning_rate": 3.899060371278627e-05, + "loss": 1.0572, + "step": 24715 + }, + { + "epoch": 0.74, + "learning_rate": 3.894931981973985e-05, + "loss": 1.0297, + "step": 24720 + }, + { + "epoch": 0.74, + "learning_rate": 3.890805250785451e-05, + "loss": 0.9756, + "step": 24725 + }, + { + "epoch": 0.74, + "learning_rate": 3.8866801788338234e-05, + "loss": 0.9862, + "step": 24730 + }, + { + "epoch": 0.74, + "learning_rate": 3.882556767239463e-05, + "loss": 1.0501, + "step": 24735 + }, + { + "epoch": 0.74, + "learning_rate": 3.878435017122274e-05, + "loss": 1.0846, + "step": 24740 + }, + { + "epoch": 0.74, + "learning_rate": 3.87431492960171e-05, + "loss": 0.9786, + "step": 24745 + }, + { + "epoch": 0.74, + "learning_rate": 3.870196505796769e-05, + "loss": 1.0771, + "step": 24750 + }, + { + "epoch": 0.74, + "learning_rate": 3.8660797468260047e-05, + "loss": 0.9828, + "step": 24755 + }, + { + "epoch": 0.74, + "learning_rate": 3.8619646538075184e-05, + "loss": 0.9843, + "step": 24760 + }, + { + "epoch": 0.74, + "learning_rate": 3.8578512278589505e-05, + "loss": 1.0299, + "step": 24765 + }, + { + "epoch": 0.74, + "learning_rate": 3.853739470097493e-05, + "loss": 1.0329, + "step": 24770 + }, + { + "epoch": 0.74, + "learning_rate": 3.849629381639886e-05, + "loss": 1.069, + "step": 24775 + }, + { + "epoch": 0.74, + "learning_rate": 3.8455209636024235e-05, + "loss": 0.9475, + "step": 24780 + }, + { + "epoch": 0.74, + "learning_rate": 3.84141421710093e-05, + "loss": 0.9991, + "step": 24785 + }, + { + "epoch": 0.74, + "learning_rate": 3.837309143250781e-05, + "loss": 1.0701, + "step": 24790 + }, + { + "epoch": 0.74, + "learning_rate": 3.833205743166913e-05, + "loss": 0.9952, + "step": 24795 + }, + { + "epoch": 0.74, + "learning_rate": 3.829104017963789e-05, + "loss": 1.0295, + "step": 24800 + }, + { + "epoch": 0.74, + "learning_rate": 3.8250039687554195e-05, + "loss": 0.9371, + "step": 24805 + }, + { + "epoch": 0.74, + "learning_rate": 3.82090559665537e-05, + "loss": 1.0486, + "step": 24810 + }, + { + "epoch": 0.74, + "learning_rate": 3.816808902776746e-05, + "loss": 0.9272, + "step": 24815 + }, + { + "epoch": 0.74, + "learning_rate": 3.812713888232193e-05, + "loss": 1.0242, + "step": 24820 + }, + { + "epoch": 0.74, + "learning_rate": 3.8086205541339e-05, + "loss": 1.08, + "step": 24825 + }, + { + "epoch": 0.74, + "learning_rate": 3.804528901593606e-05, + "loss": 1.0033, + "step": 24830 + }, + { + "epoch": 0.74, + "learning_rate": 3.800438931722594e-05, + "loss": 0.9959, + "step": 24835 + }, + { + "epoch": 0.74, + "learning_rate": 3.7963506456316835e-05, + "loss": 1.0322, + "step": 24840 + }, + { + "epoch": 0.74, + "learning_rate": 3.792264044431235e-05, + "loss": 1.0896, + "step": 24845 + }, + { + "epoch": 0.74, + "learning_rate": 3.788179129231159e-05, + "loss": 1.0203, + "step": 24850 + }, + { + "epoch": 0.74, + "learning_rate": 3.784095901140908e-05, + "loss": 1.0591, + "step": 24855 + }, + { + "epoch": 0.74, + "learning_rate": 3.7800143612694704e-05, + "loss": 0.9945, + "step": 24860 + }, + { + "epoch": 0.74, + "learning_rate": 3.7759345107253764e-05, + "loss": 0.9986, + "step": 24865 + }, + { + "epoch": 0.74, + "learning_rate": 3.7718563506167e-05, + "loss": 1.0429, + "step": 24870 + }, + { + "epoch": 0.74, + "learning_rate": 3.767779882051063e-05, + "loss": 1.0375, + "step": 24875 + }, + { + "epoch": 0.74, + "learning_rate": 3.7637051061356124e-05, + "loss": 0.9869, + "step": 24880 + }, + { + "epoch": 0.74, + "learning_rate": 3.759632023977049e-05, + "loss": 1.05, + "step": 24885 + }, + { + "epoch": 0.74, + "learning_rate": 3.7555606366816034e-05, + "loss": 1.0418, + "step": 24890 + }, + { + "epoch": 0.74, + "learning_rate": 3.7514909453550575e-05, + "loss": 1.0846, + "step": 24895 + }, + { + "epoch": 0.74, + "learning_rate": 3.747422951102718e-05, + "loss": 1.0166, + "step": 24900 + }, + { + "epoch": 0.74, + "learning_rate": 3.7433566550294464e-05, + "loss": 1.0291, + "step": 24905 + }, + { + "epoch": 0.74, + "learning_rate": 3.739292058239632e-05, + "loss": 1.0282, + "step": 24910 + }, + { + "epoch": 0.74, + "learning_rate": 3.7352291618372006e-05, + "loss": 1.0929, + "step": 24915 + }, + { + "epoch": 0.74, + "learning_rate": 3.731167966925626e-05, + "loss": 1.0216, + "step": 24920 + }, + { + "epoch": 0.74, + "learning_rate": 3.727108474607919e-05, + "loss": 1.0365, + "step": 24925 + }, + { + "epoch": 0.74, + "learning_rate": 3.7230506859866186e-05, + "loss": 1.003, + "step": 24930 + }, + { + "epoch": 0.74, + "learning_rate": 3.718994602163803e-05, + "loss": 0.9694, + "step": 24935 + }, + { + "epoch": 0.74, + "learning_rate": 3.7149402242411023e-05, + "loss": 0.9899, + "step": 24940 + }, + { + "epoch": 0.74, + "learning_rate": 3.710887553319666e-05, + "loss": 1.0177, + "step": 24945 + }, + { + "epoch": 0.74, + "learning_rate": 3.706836590500183e-05, + "loss": 1.0651, + "step": 24950 + }, + { + "epoch": 0.75, + "learning_rate": 3.702787336882883e-05, + "loss": 1.0599, + "step": 24955 + }, + { + "epoch": 0.75, + "learning_rate": 3.698739793567536e-05, + "loss": 1.0101, + "step": 24960 + }, + { + "epoch": 0.75, + "learning_rate": 3.694693961653436e-05, + "loss": 1.0426, + "step": 24965 + }, + { + "epoch": 0.75, + "learning_rate": 3.690649842239413e-05, + "loss": 1.1313, + "step": 24970 + }, + { + "epoch": 0.75, + "learning_rate": 3.686607436423842e-05, + "loss": 0.9959, + "step": 24975 + }, + { + "epoch": 0.75, + "learning_rate": 3.68256674530463e-05, + "loss": 1.0177, + "step": 24980 + }, + { + "epoch": 0.75, + "learning_rate": 3.678527769979211e-05, + "loss": 1.112, + "step": 24985 + }, + { + "epoch": 0.75, + "learning_rate": 3.674490511544553e-05, + "loss": 1.002, + "step": 24990 + }, + { + "epoch": 0.75, + "learning_rate": 3.670454971097167e-05, + "loss": 1.0708, + "step": 24995 + }, + { + "epoch": 0.75, + "learning_rate": 3.666421149733095e-05, + "loss": 1.0295, + "step": 25000 + }, + { + "epoch": 0.75, + "learning_rate": 3.662389048547906e-05, + "loss": 1.0685, + "step": 25005 + }, + { + "epoch": 0.75, + "learning_rate": 3.658358668636702e-05, + "loss": 1.0458, + "step": 25010 + }, + { + "epoch": 0.75, + "learning_rate": 3.654330011094124e-05, + "loss": 1.0473, + "step": 25015 + }, + { + "epoch": 0.75, + "learning_rate": 3.650303077014345e-05, + "loss": 0.9706, + "step": 25020 + }, + { + "epoch": 0.75, + "learning_rate": 3.6462778674910613e-05, + "loss": 1.0385, + "step": 25025 + }, + { + "epoch": 0.75, + "learning_rate": 3.642254383617511e-05, + "loss": 1.1033, + "step": 25030 + }, + { + "epoch": 0.75, + "learning_rate": 3.638232626486455e-05, + "loss": 1.0382, + "step": 25035 + }, + { + "epoch": 0.75, + "learning_rate": 3.634212597190194e-05, + "loss": 1.1013, + "step": 25040 + }, + { + "epoch": 0.75, + "learning_rate": 3.630194296820548e-05, + "loss": 1.0228, + "step": 25045 + }, + { + "epoch": 0.75, + "learning_rate": 3.62617772646888e-05, + "loss": 0.9911, + "step": 25050 + }, + { + "epoch": 0.75, + "learning_rate": 3.622162887226073e-05, + "loss": 1.0289, + "step": 25055 + }, + { + "epoch": 0.75, + "learning_rate": 3.618149780182547e-05, + "loss": 1.0379, + "step": 25060 + }, + { + "epoch": 0.75, + "learning_rate": 3.6141384064282444e-05, + "loss": 1.084, + "step": 25065 + }, + { + "epoch": 0.75, + "learning_rate": 3.610128767052647e-05, + "loss": 1.0367, + "step": 25070 + }, + { + "epoch": 0.75, + "learning_rate": 3.606120863144753e-05, + "loss": 1.0422, + "step": 25075 + }, + { + "epoch": 0.75, + "learning_rate": 3.6021146957930985e-05, + "loss": 1.12, + "step": 25080 + }, + { + "epoch": 0.75, + "learning_rate": 3.598110266085748e-05, + "loss": 1.0735, + "step": 25085 + }, + { + "epoch": 0.75, + "learning_rate": 3.5941075751102894e-05, + "loss": 0.9916, + "step": 25090 + }, + { + "epoch": 0.75, + "learning_rate": 3.5901066239538364e-05, + "loss": 0.9859, + "step": 25095 + }, + { + "epoch": 0.75, + "learning_rate": 3.586107413703036e-05, + "loss": 1.0192, + "step": 25100 + }, + { + "epoch": 0.75, + "learning_rate": 3.5821099454440656e-05, + "loss": 1.0239, + "step": 25105 + }, + { + "epoch": 0.75, + "learning_rate": 3.5781142202626195e-05, + "loss": 1.0951, + "step": 25110 + }, + { + "epoch": 0.75, + "learning_rate": 3.574120239243921e-05, + "loss": 0.9859, + "step": 25115 + }, + { + "epoch": 0.75, + "learning_rate": 3.570128003472725e-05, + "loss": 1.0418, + "step": 25120 + }, + { + "epoch": 0.75, + "learning_rate": 3.566137514033312e-05, + "loss": 1.0314, + "step": 25125 + }, + { + "epoch": 0.75, + "learning_rate": 3.562148772009484e-05, + "loss": 0.9898, + "step": 25130 + }, + { + "epoch": 0.75, + "learning_rate": 3.558161778484566e-05, + "loss": 1.0646, + "step": 25135 + }, + { + "epoch": 0.75, + "learning_rate": 3.554176534541414e-05, + "loss": 0.9335, + "step": 25140 + }, + { + "epoch": 0.75, + "learning_rate": 3.5501930412624106e-05, + "loss": 1.0956, + "step": 25145 + }, + { + "epoch": 0.75, + "learning_rate": 3.5462112997294586e-05, + "loss": 1.0649, + "step": 25150 + }, + { + "epoch": 0.75, + "learning_rate": 3.542231311023979e-05, + "loss": 1.0705, + "step": 25155 + }, + { + "epoch": 0.75, + "learning_rate": 3.538253076226928e-05, + "loss": 1.0392, + "step": 25160 + }, + { + "epoch": 0.75, + "learning_rate": 3.5342765964187864e-05, + "loss": 1.0846, + "step": 25165 + }, + { + "epoch": 0.75, + "learning_rate": 3.5303018726795434e-05, + "loss": 0.9589, + "step": 25170 + }, + { + "epoch": 0.75, + "learning_rate": 3.5263289060887284e-05, + "loss": 1.1038, + "step": 25175 + }, + { + "epoch": 0.75, + "learning_rate": 3.5223576977253794e-05, + "loss": 0.978, + "step": 25180 + }, + { + "epoch": 0.75, + "learning_rate": 3.518388248668069e-05, + "loss": 1.0487, + "step": 25185 + }, + { + "epoch": 0.75, + "learning_rate": 3.514420559994881e-05, + "loss": 1.0262, + "step": 25190 + }, + { + "epoch": 0.75, + "learning_rate": 3.510454632783432e-05, + "loss": 0.9691, + "step": 25195 + }, + { + "epoch": 0.75, + "learning_rate": 3.506490468110848e-05, + "loss": 1.0164, + "step": 25200 + }, + { + "epoch": 0.75, + "learning_rate": 3.502528067053791e-05, + "loss": 1.044, + "step": 25205 + }, + { + "epoch": 0.75, + "learning_rate": 3.4985674306884274e-05, + "loss": 0.9652, + "step": 25210 + }, + { + "epoch": 0.75, + "learning_rate": 3.49460856009046e-05, + "loss": 1.0593, + "step": 25215 + }, + { + "epoch": 0.75, + "learning_rate": 3.490651456335099e-05, + "loss": 1.1057, + "step": 25220 + }, + { + "epoch": 0.75, + "learning_rate": 3.486696120497083e-05, + "loss": 1.04, + "step": 25225 + }, + { + "epoch": 0.75, + "learning_rate": 3.482742553650672e-05, + "loss": 1.0138, + "step": 25230 + }, + { + "epoch": 0.75, + "learning_rate": 3.4787907568696374e-05, + "loss": 1.0212, + "step": 25235 + }, + { + "epoch": 0.75, + "learning_rate": 3.474840731227272e-05, + "loss": 0.9836, + "step": 25240 + }, + { + "epoch": 0.75, + "learning_rate": 3.4708924777963924e-05, + "loss": 1.0747, + "step": 25245 + }, + { + "epoch": 0.75, + "learning_rate": 3.466945997649333e-05, + "loss": 1.0274, + "step": 25250 + }, + { + "epoch": 0.75, + "learning_rate": 3.463001291857943e-05, + "loss": 1.0325, + "step": 25255 + }, + { + "epoch": 0.75, + "learning_rate": 3.459058361493588e-05, + "loss": 1.0744, + "step": 25260 + }, + { + "epoch": 0.75, + "learning_rate": 3.455117207627157e-05, + "loss": 0.9569, + "step": 25265 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511778313290576e-05, + "loss": 1.0063, + "step": 25270 + }, + { + "epoch": 0.75, + "learning_rate": 3.447240233669209e-05, + "loss": 1.0097, + "step": 25275 + }, + { + "epoch": 0.75, + "learning_rate": 3.443304415717041e-05, + "loss": 1.0799, + "step": 25280 + }, + { + "epoch": 0.75, + "learning_rate": 3.439370378541524e-05, + "loss": 1.0297, + "step": 25285 + }, + { + "epoch": 0.76, + "learning_rate": 3.435438123211123e-05, + "loss": 1.032, + "step": 25290 + }, + { + "epoch": 0.76, + "learning_rate": 3.43150765079382e-05, + "loss": 1.1305, + "step": 25295 + }, + { + "epoch": 0.76, + "learning_rate": 3.4275789623571234e-05, + "loss": 0.9546, + "step": 25300 + }, + { + "epoch": 0.76, + "learning_rate": 3.4236520589680544e-05, + "loss": 1.0394, + "step": 25305 + }, + { + "epoch": 0.76, + "learning_rate": 3.419726941693142e-05, + "loss": 1.0623, + "step": 25310 + }, + { + "epoch": 0.76, + "learning_rate": 3.415803611598435e-05, + "loss": 0.9996, + "step": 25315 + }, + { + "epoch": 0.76, + "learning_rate": 3.4118820697495004e-05, + "loss": 1.0376, + "step": 25320 + }, + { + "epoch": 0.76, + "learning_rate": 3.407962317211411e-05, + "loss": 1.1037, + "step": 25325 + }, + { + "epoch": 0.76, + "learning_rate": 3.4040443550487645e-05, + "loss": 0.9663, + "step": 25330 + }, + { + "epoch": 0.76, + "learning_rate": 3.4001281843256594e-05, + "loss": 1.0691, + "step": 25335 + }, + { + "epoch": 0.76, + "learning_rate": 3.396213806105721e-05, + "loss": 1.1158, + "step": 25340 + }, + { + "epoch": 0.76, + "learning_rate": 3.3923012214520763e-05, + "loss": 1.0353, + "step": 25345 + }, + { + "epoch": 0.76, + "learning_rate": 3.388390431427376e-05, + "loss": 0.9863, + "step": 25350 + }, + { + "epoch": 0.76, + "learning_rate": 3.3844814370937706e-05, + "loss": 1.0788, + "step": 25355 + }, + { + "epoch": 0.76, + "learning_rate": 3.380574239512937e-05, + "loss": 1.0027, + "step": 25360 + }, + { + "epoch": 0.76, + "learning_rate": 3.37666883974605e-05, + "loss": 1.0338, + "step": 25365 + }, + { + "epoch": 0.76, + "learning_rate": 3.3727652388538066e-05, + "loss": 1.0699, + "step": 25370 + }, + { + "epoch": 0.76, + "learning_rate": 3.3688634378964154e-05, + "loss": 1.028, + "step": 25375 + }, + { + "epoch": 0.76, + "learning_rate": 3.3649634379335896e-05, + "loss": 1.0691, + "step": 25380 + }, + { + "epoch": 0.76, + "learning_rate": 3.361065240024551e-05, + "loss": 1.1715, + "step": 25385 + }, + { + "epoch": 0.76, + "learning_rate": 3.357168845228043e-05, + "loss": 1.0491, + "step": 25390 + }, + { + "epoch": 0.76, + "learning_rate": 3.353274254602315e-05, + "loss": 1.0672, + "step": 25395 + }, + { + "epoch": 0.76, + "learning_rate": 3.3493814692051206e-05, + "loss": 1.0116, + "step": 25400 + }, + { + "epoch": 0.76, + "learning_rate": 3.3454904900937265e-05, + "loss": 1.0837, + "step": 25405 + }, + { + "epoch": 0.76, + "learning_rate": 3.3416013183249116e-05, + "loss": 1.0185, + "step": 25410 + }, + { + "epoch": 0.76, + "learning_rate": 3.3377139549549666e-05, + "loss": 1.0415, + "step": 25415 + }, + { + "epoch": 0.76, + "learning_rate": 3.3338284010396816e-05, + "loss": 0.9857, + "step": 25420 + }, + { + "epoch": 0.76, + "learning_rate": 3.329944657634353e-05, + "loss": 1.0154, + "step": 25425 + }, + { + "epoch": 0.76, + "learning_rate": 3.326062725793807e-05, + "loss": 1.0374, + "step": 25430 + }, + { + "epoch": 0.76, + "learning_rate": 3.322182606572357e-05, + "loss": 1.0429, + "step": 25435 + }, + { + "epoch": 0.76, + "learning_rate": 3.318304301023826e-05, + "loss": 1.0897, + "step": 25440 + }, + { + "epoch": 0.76, + "learning_rate": 3.3144278102015524e-05, + "loss": 1.0368, + "step": 25445 + }, + { + "epoch": 0.76, + "learning_rate": 3.310553135158382e-05, + "loss": 1.017, + "step": 25450 + }, + { + "epoch": 0.76, + "learning_rate": 3.306680276946661e-05, + "loss": 0.9524, + "step": 25455 + }, + { + "epoch": 0.76, + "learning_rate": 3.3028092366182394e-05, + "loss": 1.0269, + "step": 25460 + }, + { + "epoch": 0.76, + "learning_rate": 3.2989400152244834e-05, + "loss": 1.0666, + "step": 25465 + }, + { + "epoch": 0.76, + "learning_rate": 3.2950726138162635e-05, + "loss": 0.9549, + "step": 25470 + }, + { + "epoch": 0.76, + "learning_rate": 3.2912070334439505e-05, + "loss": 0.9711, + "step": 25475 + }, + { + "epoch": 0.76, + "learning_rate": 3.2873432751574194e-05, + "loss": 1.0817, + "step": 25480 + }, + { + "epoch": 0.76, + "learning_rate": 3.2834813400060574e-05, + "loss": 0.9732, + "step": 25485 + }, + { + "epoch": 0.76, + "learning_rate": 3.279621229038756e-05, + "loss": 1.0078, + "step": 25490 + }, + { + "epoch": 0.76, + "learning_rate": 3.275762943303906e-05, + "loss": 0.9929, + "step": 25495 + }, + { + "epoch": 0.76, + "learning_rate": 3.2719064838494006e-05, + "loss": 1.0068, + "step": 25500 + }, + { + "epoch": 0.76, + "learning_rate": 3.2680518517226455e-05, + "loss": 0.9716, + "step": 25505 + }, + { + "epoch": 0.76, + "learning_rate": 3.26419904797055e-05, + "loss": 1.0621, + "step": 25510 + }, + { + "epoch": 0.76, + "learning_rate": 3.260348073639513e-05, + "loss": 1.1248, + "step": 25515 + }, + { + "epoch": 0.76, + "learning_rate": 3.2564989297754565e-05, + "loss": 1.0597, + "step": 25520 + }, + { + "epoch": 0.76, + "learning_rate": 3.25265161742379e-05, + "loss": 1.0261, + "step": 25525 + }, + { + "epoch": 0.76, + "learning_rate": 3.2488061376294277e-05, + "loss": 1.0731, + "step": 25530 + }, + { + "epoch": 0.76, + "learning_rate": 3.2449624914367914e-05, + "loss": 1.0612, + "step": 25535 + }, + { + "epoch": 0.76, + "learning_rate": 3.241120679889807e-05, + "loss": 1.0419, + "step": 25540 + }, + { + "epoch": 0.76, + "learning_rate": 3.237280704031895e-05, + "loss": 1.0817, + "step": 25545 + }, + { + "epoch": 0.76, + "learning_rate": 3.233442564905976e-05, + "loss": 1.0345, + "step": 25550 + }, + { + "epoch": 0.76, + "learning_rate": 3.22960626355448e-05, + "loss": 1.0572, + "step": 25555 + }, + { + "epoch": 0.76, + "learning_rate": 3.225771801019334e-05, + "loss": 1.0847, + "step": 25560 + }, + { + "epoch": 0.76, + "learning_rate": 3.221939178341965e-05, + "loss": 1.0792, + "step": 25565 + }, + { + "epoch": 0.76, + "learning_rate": 3.218108396563293e-05, + "loss": 1.0027, + "step": 25570 + }, + { + "epoch": 0.76, + "learning_rate": 3.214279456723758e-05, + "loss": 1.012, + "step": 25575 + }, + { + "epoch": 0.76, + "learning_rate": 3.210452359863282e-05, + "loss": 1.0112, + "step": 25580 + }, + { + "epoch": 0.76, + "learning_rate": 3.2066271070212874e-05, + "loss": 1.1183, + "step": 25585 + }, + { + "epoch": 0.76, + "learning_rate": 3.202803699236703e-05, + "loss": 1.0012, + "step": 25590 + }, + { + "epoch": 0.76, + "learning_rate": 3.198982137547959e-05, + "loss": 1.0009, + "step": 25595 + }, + { + "epoch": 0.76, + "learning_rate": 3.1951624229929736e-05, + "loss": 1.0293, + "step": 25600 + }, + { + "epoch": 0.76, + "learning_rate": 3.191344556609165e-05, + "loss": 1.0824, + "step": 25605 + }, + { + "epoch": 0.76, + "learning_rate": 3.187528539433458e-05, + "loss": 1.0356, + "step": 25610 + }, + { + "epoch": 0.76, + "learning_rate": 3.183714372502272e-05, + "loss": 1.0295, + "step": 25615 + }, + { + "epoch": 0.76, + "learning_rate": 3.179902056851519e-05, + "loss": 1.0563, + "step": 25620 + }, + { + "epoch": 0.77, + "learning_rate": 3.176091593516608e-05, + "loss": 1.0399, + "step": 25625 + }, + { + "epoch": 0.77, + "learning_rate": 3.172282983532453e-05, + "loss": 1.0585, + "step": 25630 + }, + { + "epoch": 0.77, + "learning_rate": 3.16847622793346e-05, + "loss": 1.0386, + "step": 25635 + }, + { + "epoch": 0.77, + "learning_rate": 3.16467132775353e-05, + "loss": 1.0185, + "step": 25640 + }, + { + "epoch": 0.77, + "learning_rate": 3.160868284026057e-05, + "loss": 0.9609, + "step": 25645 + }, + { + "epoch": 0.77, + "learning_rate": 3.157067097783939e-05, + "loss": 1.089, + "step": 25650 + }, + { + "epoch": 0.77, + "learning_rate": 3.153267770059568e-05, + "loss": 1.0197, + "step": 25655 + }, + { + "epoch": 0.77, + "learning_rate": 3.149470301884823e-05, + "loss": 1.0694, + "step": 25660 + }, + { + "epoch": 0.77, + "learning_rate": 3.1456746942910896e-05, + "loss": 1.0853, + "step": 25665 + }, + { + "epoch": 0.77, + "learning_rate": 3.141880948309236e-05, + "loss": 1.0008, + "step": 25670 + }, + { + "epoch": 0.77, + "learning_rate": 3.138089064969637e-05, + "loss": 1.0344, + "step": 25675 + }, + { + "epoch": 0.77, + "learning_rate": 3.134299045302149e-05, + "loss": 0.9639, + "step": 25680 + }, + { + "epoch": 0.77, + "learning_rate": 3.130510890336136e-05, + "loss": 0.9792, + "step": 25685 + }, + { + "epoch": 0.77, + "learning_rate": 3.1267246011004405e-05, + "loss": 1.1146, + "step": 25690 + }, + { + "epoch": 0.77, + "learning_rate": 3.1229401786234134e-05, + "loss": 1.1178, + "step": 25695 + }, + { + "epoch": 0.77, + "learning_rate": 3.119157623932884e-05, + "loss": 1.1204, + "step": 25700 + }, + { + "epoch": 0.77, + "learning_rate": 3.11537693805619e-05, + "loss": 1.0428, + "step": 25705 + }, + { + "epoch": 0.77, + "learning_rate": 3.111598122020144e-05, + "loss": 0.9447, + "step": 25710 + }, + { + "epoch": 0.77, + "learning_rate": 3.107821176851065e-05, + "loss": 1.0541, + "step": 25715 + }, + { + "epoch": 0.77, + "learning_rate": 3.1040461035747605e-05, + "loss": 1.0856, + "step": 25720 + }, + { + "epoch": 0.77, + "learning_rate": 3.1002729032165256e-05, + "loss": 1.0516, + "step": 25725 + }, + { + "epoch": 0.77, + "learning_rate": 3.0965015768011455e-05, + "loss": 1.0017, + "step": 25730 + }, + { + "epoch": 0.77, + "learning_rate": 3.092732125352902e-05, + "loss": 1.106, + "step": 25735 + }, + { + "epoch": 0.77, + "learning_rate": 3.08896454989557e-05, + "loss": 1.1169, + "step": 25740 + }, + { + "epoch": 0.77, + "learning_rate": 3.085198851452407e-05, + "loss": 1.079, + "step": 25745 + }, + { + "epoch": 0.77, + "learning_rate": 3.0814350310461615e-05, + "loss": 1.1127, + "step": 25750 + }, + { + "epoch": 0.77, + "learning_rate": 3.0776730896990756e-05, + "loss": 1.0324, + "step": 25755 + }, + { + "epoch": 0.77, + "learning_rate": 3.0739130284328856e-05, + "loss": 1.0837, + "step": 25760 + }, + { + "epoch": 0.77, + "learning_rate": 3.070154848268807e-05, + "loss": 1.0753, + "step": 25765 + }, + { + "epoch": 0.77, + "learning_rate": 3.066398550227547e-05, + "loss": 1.0661, + "step": 25770 + }, + { + "epoch": 0.77, + "learning_rate": 3.062644135329306e-05, + "loss": 1.0329, + "step": 25775 + }, + { + "epoch": 0.77, + "learning_rate": 3.058891604593773e-05, + "loss": 1.0285, + "step": 25780 + }, + { + "epoch": 0.77, + "learning_rate": 3.0551409590401214e-05, + "loss": 1.0029, + "step": 25785 + }, + { + "epoch": 0.77, + "learning_rate": 3.0513921996870087e-05, + "loss": 0.97, + "step": 25790 + }, + { + "epoch": 0.77, + "learning_rate": 3.0476453275525906e-05, + "loss": 1.0401, + "step": 25795 + }, + { + "epoch": 0.77, + "learning_rate": 3.0439003436545076e-05, + "loss": 1.0599, + "step": 25800 + }, + { + "epoch": 0.77, + "learning_rate": 3.0401572490098783e-05, + "loss": 1.0251, + "step": 25805 + }, + { + "epoch": 0.77, + "learning_rate": 3.0364160446353217e-05, + "loss": 0.9775, + "step": 25810 + }, + { + "epoch": 0.77, + "learning_rate": 3.0326767315469285e-05, + "loss": 1.0372, + "step": 25815 + }, + { + "epoch": 0.77, + "learning_rate": 3.0289393107602914e-05, + "loss": 1.0068, + "step": 25820 + }, + { + "epoch": 0.77, + "learning_rate": 3.0252037832904745e-05, + "loss": 0.9607, + "step": 25825 + }, + { + "epoch": 0.77, + "learning_rate": 3.0214701501520415e-05, + "loss": 1.0407, + "step": 25830 + }, + { + "epoch": 0.77, + "learning_rate": 3.017738412359027e-05, + "loss": 1.0031, + "step": 25835 + }, + { + "epoch": 0.77, + "learning_rate": 3.0140085709249667e-05, + "loss": 1.0148, + "step": 25840 + }, + { + "epoch": 0.77, + "learning_rate": 3.0102806268628658e-05, + "loss": 0.9733, + "step": 25845 + }, + { + "epoch": 0.77, + "learning_rate": 3.006554581185228e-05, + "loss": 1.0016, + "step": 25850 + }, + { + "epoch": 0.77, + "learning_rate": 3.002830434904028e-05, + "loss": 0.9631, + "step": 25855 + }, + { + "epoch": 0.77, + "learning_rate": 2.999108189030735e-05, + "loss": 1.1022, + "step": 25860 + }, + { + "epoch": 0.77, + "learning_rate": 2.9953878445763016e-05, + "loss": 1.0955, + "step": 25865 + }, + { + "epoch": 0.77, + "learning_rate": 2.9916694025511594e-05, + "loss": 1.0236, + "step": 25870 + }, + { + "epoch": 0.77, + "learning_rate": 2.9879528639652188e-05, + "loss": 0.9832, + "step": 25875 + }, + { + "epoch": 0.77, + "learning_rate": 2.984238229827885e-05, + "loss": 0.9476, + "step": 25880 + }, + { + "epoch": 0.77, + "learning_rate": 2.9805255011480415e-05, + "loss": 1.0375, + "step": 25885 + }, + { + "epoch": 0.77, + "learning_rate": 2.9768146789340513e-05, + "loss": 1.0643, + "step": 25890 + }, + { + "epoch": 0.77, + "learning_rate": 2.9731057641937566e-05, + "loss": 0.9928, + "step": 25895 + }, + { + "epoch": 0.77, + "learning_rate": 2.9693987579344917e-05, + "loss": 0.9597, + "step": 25900 + }, + { + "epoch": 0.77, + "learning_rate": 2.9656936611630683e-05, + "loss": 1.0707, + "step": 25905 + }, + { + "epoch": 0.77, + "learning_rate": 2.9619904748857765e-05, + "loss": 1.0085, + "step": 25910 + }, + { + "epoch": 0.77, + "learning_rate": 2.9582892001083805e-05, + "loss": 1.0242, + "step": 25915 + }, + { + "epoch": 0.77, + "learning_rate": 2.9545898378361502e-05, + "loss": 1.0192, + "step": 25920 + }, + { + "epoch": 0.77, + "learning_rate": 2.9508923890738127e-05, + "loss": 0.9726, + "step": 25925 + }, + { + "epoch": 0.77, + "learning_rate": 2.947196854825581e-05, + "loss": 1.0651, + "step": 25930 + }, + { + "epoch": 0.77, + "learning_rate": 2.9435032360951486e-05, + "loss": 1.0181, + "step": 25935 + }, + { + "epoch": 0.77, + "learning_rate": 2.9398115338856923e-05, + "loss": 1.0653, + "step": 25940 + }, + { + "epoch": 0.77, + "learning_rate": 2.9361217491998694e-05, + "loss": 1.0232, + "step": 25945 + }, + { + "epoch": 0.77, + "learning_rate": 2.9324338830398068e-05, + "loss": 1.0497, + "step": 25950 + }, + { + "epoch": 0.77, + "learning_rate": 2.9287479364071237e-05, + "loss": 1.0093, + "step": 25955 + }, + { + "epoch": 0.78, + "learning_rate": 2.9250639103029033e-05, + "loss": 1.0346, + "step": 25960 + }, + { + "epoch": 0.78, + "learning_rate": 2.921381805727722e-05, + "loss": 0.9252, + "step": 25965 + }, + { + "epoch": 0.78, + "learning_rate": 2.917701623681621e-05, + "loss": 0.9798, + "step": 25970 + }, + { + "epoch": 0.78, + "learning_rate": 2.9140233651641314e-05, + "loss": 1.1014, + "step": 25975 + }, + { + "epoch": 0.78, + "learning_rate": 2.9103470311742485e-05, + "loss": 1.0062, + "step": 25980 + }, + { + "epoch": 0.78, + "learning_rate": 2.9066726227104612e-05, + "loss": 1.087, + "step": 25985 + }, + { + "epoch": 0.78, + "learning_rate": 2.9030001407707174e-05, + "loss": 0.9848, + "step": 25990 + }, + { + "epoch": 0.78, + "learning_rate": 2.8993295863524593e-05, + "loss": 1.0126, + "step": 25995 + }, + { + "epoch": 0.78, + "learning_rate": 2.8956609604525908e-05, + "loss": 1.0782, + "step": 26000 + }, + { + "epoch": 0.78, + "learning_rate": 2.8919942640675002e-05, + "loss": 1.0928, + "step": 26005 + }, + { + "epoch": 0.78, + "learning_rate": 2.8883294981930542e-05, + "loss": 1.0186, + "step": 26010 + }, + { + "epoch": 0.78, + "learning_rate": 2.8846666638245878e-05, + "loss": 0.9454, + "step": 26015 + }, + { + "epoch": 0.78, + "learning_rate": 2.881005761956911e-05, + "loss": 0.9777, + "step": 26020 + }, + { + "epoch": 0.78, + "learning_rate": 2.8773467935843167e-05, + "loss": 1.04, + "step": 26025 + }, + { + "epoch": 0.78, + "learning_rate": 2.873689759700571e-05, + "loss": 1.0774, + "step": 26030 + }, + { + "epoch": 0.78, + "learning_rate": 2.8700346612989083e-05, + "loss": 0.9891, + "step": 26035 + }, + { + "epoch": 0.78, + "learning_rate": 2.8663814993720385e-05, + "loss": 1.0537, + "step": 26040 + }, + { + "epoch": 0.78, + "learning_rate": 2.862730274912152e-05, + "loss": 1.1639, + "step": 26045 + }, + { + "epoch": 0.78, + "learning_rate": 2.8590809889109104e-05, + "loss": 1.0412, + "step": 26050 + }, + { + "epoch": 0.78, + "learning_rate": 2.8554336423594476e-05, + "loss": 1.0299, + "step": 26055 + }, + { + "epoch": 0.78, + "learning_rate": 2.8517882362483617e-05, + "loss": 1.0334, + "step": 26060 + }, + { + "epoch": 0.78, + "learning_rate": 2.8481447715677466e-05, + "loss": 1.0254, + "step": 26065 + }, + { + "epoch": 0.78, + "learning_rate": 2.844503249307149e-05, + "loss": 0.9921, + "step": 26070 + }, + { + "epoch": 0.78, + "learning_rate": 2.8408636704555923e-05, + "loss": 1.0319, + "step": 26075 + }, + { + "epoch": 0.78, + "learning_rate": 2.837226036001571e-05, + "loss": 0.9438, + "step": 26080 + }, + { + "epoch": 0.78, + "learning_rate": 2.833590346933065e-05, + "loss": 1.0092, + "step": 26085 + }, + { + "epoch": 0.78, + "learning_rate": 2.829956604237508e-05, + "loss": 0.9951, + "step": 26090 + }, + { + "epoch": 0.78, + "learning_rate": 2.8263248089018113e-05, + "loss": 1.0773, + "step": 26095 + }, + { + "epoch": 0.78, + "learning_rate": 2.822694961912361e-05, + "loss": 1.0475, + "step": 26100 + }, + { + "epoch": 0.78, + "learning_rate": 2.8190670642550143e-05, + "loss": 0.9991, + "step": 26105 + }, + { + "epoch": 0.78, + "learning_rate": 2.815441116915093e-05, + "loss": 1.1085, + "step": 26110 + }, + { + "epoch": 0.78, + "learning_rate": 2.81181712087739e-05, + "loss": 1.054, + "step": 26115 + }, + { + "epoch": 0.78, + "learning_rate": 2.8081950771261723e-05, + "loss": 1.0013, + "step": 26120 + }, + { + "epoch": 0.78, + "learning_rate": 2.8045749866451776e-05, + "loss": 1.0207, + "step": 26125 + }, + { + "epoch": 0.78, + "learning_rate": 2.8009568504176097e-05, + "loss": 0.9987, + "step": 26130 + }, + { + "epoch": 0.78, + "learning_rate": 2.7973406694261362e-05, + "loss": 1.0233, + "step": 26135 + }, + { + "epoch": 0.78, + "learning_rate": 2.7937264446529087e-05, + "loss": 1.1119, + "step": 26140 + }, + { + "epoch": 0.78, + "learning_rate": 2.7901141770795315e-05, + "loss": 0.9424, + "step": 26145 + }, + { + "epoch": 0.78, + "learning_rate": 2.7865038676870868e-05, + "loss": 1.0441, + "step": 26150 + }, + { + "epoch": 0.78, + "learning_rate": 2.7828955174561257e-05, + "loss": 1.0475, + "step": 26155 + }, + { + "epoch": 0.78, + "learning_rate": 2.7792891273666623e-05, + "loss": 1.121, + "step": 26160 + }, + { + "epoch": 0.78, + "learning_rate": 2.7756846983981753e-05, + "loss": 1.0811, + "step": 26165 + }, + { + "epoch": 0.78, + "learning_rate": 2.77208223152962e-05, + "loss": 1.0469, + "step": 26170 + }, + { + "epoch": 0.78, + "learning_rate": 2.7684817277394172e-05, + "loss": 1.0711, + "step": 26175 + }, + { + "epoch": 0.78, + "learning_rate": 2.7648831880054483e-05, + "loss": 1.0412, + "step": 26180 + }, + { + "epoch": 0.78, + "learning_rate": 2.761286613305062e-05, + "loss": 1.0333, + "step": 26185 + }, + { + "epoch": 0.78, + "learning_rate": 2.7576920046150778e-05, + "loss": 1.0395, + "step": 26190 + }, + { + "epoch": 0.78, + "learning_rate": 2.7540993629117852e-05, + "loss": 0.9771, + "step": 26195 + }, + { + "epoch": 0.78, + "learning_rate": 2.7505086891709277e-05, + "loss": 1.0648, + "step": 26200 + }, + { + "epoch": 0.78, + "learning_rate": 2.746919984367715e-05, + "loss": 1.0203, + "step": 26205 + }, + { + "epoch": 0.78, + "learning_rate": 2.7433332494768415e-05, + "loss": 1.0132, + "step": 26210 + }, + { + "epoch": 0.78, + "learning_rate": 2.7397484854724433e-05, + "loss": 1.0347, + "step": 26215 + }, + { + "epoch": 0.78, + "learning_rate": 2.736165693328133e-05, + "loss": 1.0185, + "step": 26220 + }, + { + "epoch": 0.78, + "learning_rate": 2.732584874016977e-05, + "loss": 1.0573, + "step": 26225 + }, + { + "epoch": 0.78, + "learning_rate": 2.729006028511527e-05, + "loss": 0.9561, + "step": 26230 + }, + { + "epoch": 0.78, + "learning_rate": 2.72542915778378e-05, + "loss": 1.0253, + "step": 26235 + }, + { + "epoch": 0.78, + "learning_rate": 2.721854262805198e-05, + "loss": 0.9673, + "step": 26240 + }, + { + "epoch": 0.78, + "learning_rate": 2.7182813445467138e-05, + "loss": 0.9305, + "step": 26245 + }, + { + "epoch": 0.78, + "learning_rate": 2.714710403978723e-05, + "loss": 0.9437, + "step": 26250 + }, + { + "epoch": 0.78, + "learning_rate": 2.711141442071078e-05, + "loss": 1.0434, + "step": 26255 + }, + { + "epoch": 0.78, + "learning_rate": 2.707574459793095e-05, + "loss": 0.9906, + "step": 26260 + }, + { + "epoch": 0.78, + "learning_rate": 2.7040094581135565e-05, + "loss": 1.084, + "step": 26265 + }, + { + "epoch": 0.78, + "learning_rate": 2.700446438000709e-05, + "loss": 0.986, + "step": 26270 + }, + { + "epoch": 0.78, + "learning_rate": 2.696885400422252e-05, + "loss": 1.0865, + "step": 26275 + }, + { + "epoch": 0.78, + "learning_rate": 2.6933263463453505e-05, + "loss": 0.9887, + "step": 26280 + }, + { + "epoch": 0.78, + "learning_rate": 2.6897692767366334e-05, + "loss": 1.0351, + "step": 26285 + }, + { + "epoch": 0.78, + "learning_rate": 2.686214192562193e-05, + "loss": 1.0374, + "step": 26290 + }, + { + "epoch": 0.79, + "learning_rate": 2.682661094787571e-05, + "loss": 1.113, + "step": 26295 + }, + { + "epoch": 0.79, + "learning_rate": 2.6791099843777846e-05, + "loss": 1.1425, + "step": 26300 + }, + { + "epoch": 0.79, + "learning_rate": 2.675560862297296e-05, + "loss": 0.9771, + "step": 26305 + }, + { + "epoch": 0.79, + "learning_rate": 2.672013729510041e-05, + "loss": 1.0047, + "step": 26310 + }, + { + "epoch": 0.79, + "learning_rate": 2.6684685869794035e-05, + "loss": 0.9878, + "step": 26315 + }, + { + "epoch": 0.79, + "learning_rate": 2.6649254356682386e-05, + "loss": 1.0397, + "step": 26320 + }, + { + "epoch": 0.79, + "learning_rate": 2.6613842765388486e-05, + "loss": 1.0402, + "step": 26325 + }, + { + "epoch": 0.79, + "learning_rate": 2.6578451105530055e-05, + "loss": 1.0617, + "step": 26330 + }, + { + "epoch": 0.79, + "learning_rate": 2.6543079386719295e-05, + "loss": 1.005, + "step": 26335 + }, + { + "epoch": 0.79, + "learning_rate": 2.6507727618563117e-05, + "loss": 1.0364, + "step": 26340 + }, + { + "epoch": 0.79, + "learning_rate": 2.6472395810662888e-05, + "loss": 1.053, + "step": 26345 + }, + { + "epoch": 0.79, + "learning_rate": 2.6437083972614572e-05, + "loss": 1.0284, + "step": 26350 + }, + { + "epoch": 0.79, + "learning_rate": 2.640179211400886e-05, + "loss": 1.091, + "step": 26355 + }, + { + "epoch": 0.79, + "learning_rate": 2.6366520244430836e-05, + "loss": 1.0511, + "step": 26360 + }, + { + "epoch": 0.79, + "learning_rate": 2.6331268373460207e-05, + "loss": 1.0229, + "step": 26365 + }, + { + "epoch": 0.79, + "learning_rate": 2.6296036510671284e-05, + "loss": 1.0336, + "step": 26370 + }, + { + "epoch": 0.79, + "learning_rate": 2.6260824665632944e-05, + "loss": 1.0379, + "step": 26375 + }, + { + "epoch": 0.79, + "learning_rate": 2.62256328479086e-05, + "loss": 1.034, + "step": 26380 + }, + { + "epoch": 0.79, + "learning_rate": 2.6190461067056172e-05, + "loss": 1.0471, + "step": 26385 + }, + { + "epoch": 0.79, + "learning_rate": 2.6155309332628254e-05, + "loss": 0.9789, + "step": 26390 + }, + { + "epoch": 0.79, + "learning_rate": 2.6120177654171963e-05, + "loss": 1.0769, + "step": 26395 + }, + { + "epoch": 0.79, + "learning_rate": 2.608506604122891e-05, + "loss": 1.0738, + "step": 26400 + }, + { + "epoch": 0.79, + "learning_rate": 2.6049974503335273e-05, + "loss": 1.0762, + "step": 26405 + }, + { + "epoch": 0.79, + "learning_rate": 2.601490305002181e-05, + "loss": 1.0625, + "step": 26410 + }, + { + "epoch": 0.79, + "learning_rate": 2.597985169081385e-05, + "loss": 0.9961, + "step": 26415 + }, + { + "epoch": 0.79, + "learning_rate": 2.5944820435231198e-05, + "loss": 0.922, + "step": 26420 + }, + { + "epoch": 0.79, + "learning_rate": 2.590980929278819e-05, + "loss": 0.9967, + "step": 26425 + }, + { + "epoch": 0.79, + "learning_rate": 2.5874818272993773e-05, + "loss": 1.0476, + "step": 26430 + }, + { + "epoch": 0.79, + "learning_rate": 2.5839847385351413e-05, + "loss": 1.0677, + "step": 26435 + }, + { + "epoch": 0.79, + "learning_rate": 2.5804896639359032e-05, + "loss": 1.0986, + "step": 26440 + }, + { + "epoch": 0.79, + "learning_rate": 2.57699660445092e-05, + "loss": 1.0052, + "step": 26445 + }, + { + "epoch": 0.79, + "learning_rate": 2.5735055610288872e-05, + "loss": 1.0569, + "step": 26450 + }, + { + "epoch": 0.79, + "learning_rate": 2.5700165346179685e-05, + "loss": 0.9948, + "step": 26455 + }, + { + "epoch": 0.79, + "learning_rate": 2.5665295261657652e-05, + "loss": 0.9262, + "step": 26460 + }, + { + "epoch": 0.79, + "learning_rate": 2.563044536619342e-05, + "loss": 1.0423, + "step": 26465 + }, + { + "epoch": 0.79, + "learning_rate": 2.559561566925206e-05, + "loss": 1.081, + "step": 26470 + }, + { + "epoch": 0.79, + "learning_rate": 2.556080618029326e-05, + "loss": 1.0541, + "step": 26475 + }, + { + "epoch": 0.79, + "learning_rate": 2.55260169087711e-05, + "loss": 1.0365, + "step": 26480 + }, + { + "epoch": 0.79, + "learning_rate": 2.549124786413428e-05, + "loss": 1.0402, + "step": 26485 + }, + { + "epoch": 0.79, + "learning_rate": 2.5456499055825912e-05, + "loss": 0.9999, + "step": 26490 + }, + { + "epoch": 0.79, + "learning_rate": 2.542177049328367e-05, + "loss": 1.0206, + "step": 26495 + }, + { + "epoch": 0.79, + "learning_rate": 2.5387062185939757e-05, + "loss": 1.0495, + "step": 26500 + }, + { + "epoch": 0.79, + "learning_rate": 2.5352374143220803e-05, + "loss": 1.0061, + "step": 26505 + }, + { + "epoch": 0.79, + "learning_rate": 2.5317706374547933e-05, + "loss": 1.052, + "step": 26510 + }, + { + "epoch": 0.79, + "learning_rate": 2.528305888933683e-05, + "loss": 1.08, + "step": 26515 + }, + { + "epoch": 0.79, + "learning_rate": 2.5248431696997654e-05, + "loss": 1.0569, + "step": 26520 + }, + { + "epoch": 0.79, + "learning_rate": 2.5213824806935015e-05, + "loss": 1.0738, + "step": 26525 + }, + { + "epoch": 0.79, + "learning_rate": 2.5179238228547997e-05, + "loss": 1.0215, + "step": 26530 + }, + { + "epoch": 0.79, + "learning_rate": 2.5144671971230228e-05, + "loss": 0.9556, + "step": 26535 + }, + { + "epoch": 0.79, + "learning_rate": 2.511012604436982e-05, + "loss": 1.0332, + "step": 26540 + }, + { + "epoch": 0.79, + "learning_rate": 2.50756004573493e-05, + "loss": 1.0865, + "step": 26545 + }, + { + "epoch": 0.79, + "learning_rate": 2.5041095219545674e-05, + "loss": 1.0271, + "step": 26550 + }, + { + "epoch": 0.79, + "learning_rate": 2.5006610340330472e-05, + "loss": 1.0613, + "step": 26555 + }, + { + "epoch": 0.79, + "learning_rate": 2.4972145829069714e-05, + "loss": 0.9664, + "step": 26560 + }, + { + "epoch": 0.79, + "learning_rate": 2.4937701695123815e-05, + "loss": 0.9145, + "step": 26565 + }, + { + "epoch": 0.79, + "learning_rate": 2.490327794784765e-05, + "loss": 1.0345, + "step": 26570 + }, + { + "epoch": 0.79, + "learning_rate": 2.4868874596590618e-05, + "loss": 1.0098, + "step": 26575 + }, + { + "epoch": 0.79, + "learning_rate": 2.48344916506966e-05, + "loss": 0.9982, + "step": 26580 + }, + { + "epoch": 0.79, + "learning_rate": 2.4800129119503822e-05, + "loss": 1.0655, + "step": 26585 + }, + { + "epoch": 0.79, + "learning_rate": 2.47657870123451e-05, + "loss": 1.0143, + "step": 26590 + }, + { + "epoch": 0.79, + "learning_rate": 2.4731465338547556e-05, + "loss": 1.046, + "step": 26595 + }, + { + "epoch": 0.79, + "learning_rate": 2.4697164107432912e-05, + "loss": 0.9702, + "step": 26600 + }, + { + "epoch": 0.79, + "learning_rate": 2.4662883328317222e-05, + "loss": 1.0466, + "step": 26605 + }, + { + "epoch": 0.79, + "learning_rate": 2.4628623010511075e-05, + "loss": 1.1417, + "step": 26610 + }, + { + "epoch": 0.79, + "learning_rate": 2.459438316331941e-05, + "loss": 1.0417, + "step": 26615 + }, + { + "epoch": 0.79, + "learning_rate": 2.45601637960417e-05, + "loss": 0.9875, + "step": 26620 + }, + { + "epoch": 0.8, + "learning_rate": 2.4525964917971767e-05, + "loss": 0.9851, + "step": 26625 + }, + { + "epoch": 0.8, + "learning_rate": 2.4491786538397967e-05, + "loss": 1.0625, + "step": 26630 + }, + { + "epoch": 0.8, + "learning_rate": 2.445762866660297e-05, + "loss": 0.9772, + "step": 26635 + }, + { + "epoch": 0.8, + "learning_rate": 2.4423491311863965e-05, + "loss": 1.003, + "step": 26640 + }, + { + "epoch": 0.8, + "learning_rate": 2.438937448345259e-05, + "loss": 1.0837, + "step": 26645 + }, + { + "epoch": 0.8, + "learning_rate": 2.435527819063482e-05, + "loss": 1.0253, + "step": 26650 + }, + { + "epoch": 0.8, + "learning_rate": 2.4321202442671066e-05, + "loss": 0.9579, + "step": 26655 + }, + { + "epoch": 0.8, + "learning_rate": 2.428714724881622e-05, + "loss": 1.1074, + "step": 26660 + }, + { + "epoch": 0.8, + "learning_rate": 2.4253112618319584e-05, + "loss": 1.0136, + "step": 26665 + }, + { + "epoch": 0.8, + "learning_rate": 2.4219098560424826e-05, + "loss": 1.0544, + "step": 26670 + }, + { + "epoch": 0.8, + "learning_rate": 2.4185105084370018e-05, + "loss": 1.0551, + "step": 26675 + }, + { + "epoch": 0.8, + "learning_rate": 2.4151132199387704e-05, + "loss": 1.051, + "step": 26680 + }, + { + "epoch": 0.8, + "learning_rate": 2.4117179914704835e-05, + "loss": 1.0234, + "step": 26685 + }, + { + "epoch": 0.8, + "learning_rate": 2.408324823954272e-05, + "loss": 1.137, + "step": 26690 + }, + { + "epoch": 0.8, + "learning_rate": 2.404933718311704e-05, + "loss": 1.0517, + "step": 26695 + }, + { + "epoch": 0.8, + "learning_rate": 2.4015446754637973e-05, + "loss": 1.0093, + "step": 26700 + }, + { + "epoch": 0.8, + "learning_rate": 2.3981576963310048e-05, + "loss": 0.9862, + "step": 26705 + }, + { + "epoch": 0.8, + "learning_rate": 2.394772781833219e-05, + "loss": 0.9466, + "step": 26710 + }, + { + "epoch": 0.8, + "learning_rate": 2.3913899328897647e-05, + "loss": 0.9921, + "step": 26715 + }, + { + "epoch": 0.8, + "learning_rate": 2.3880091504194225e-05, + "loss": 1.0177, + "step": 26720 + }, + { + "epoch": 0.8, + "learning_rate": 2.384630435340397e-05, + "loss": 1.0909, + "step": 26725 + }, + { + "epoch": 0.8, + "learning_rate": 2.3812537885703336e-05, + "loss": 1.0527, + "step": 26730 + }, + { + "epoch": 0.8, + "learning_rate": 2.3778792110263203e-05, + "loss": 1.0238, + "step": 26735 + }, + { + "epoch": 0.8, + "learning_rate": 2.3745067036248836e-05, + "loss": 1.0966, + "step": 26740 + }, + { + "epoch": 0.8, + "learning_rate": 2.3711362672819826e-05, + "loss": 1.0737, + "step": 26745 + }, + { + "epoch": 0.8, + "learning_rate": 2.3677679029130152e-05, + "loss": 1.0462, + "step": 26750 + }, + { + "epoch": 0.8, + "learning_rate": 2.3644016114328214e-05, + "loss": 0.9681, + "step": 26755 + }, + { + "epoch": 0.8, + "learning_rate": 2.3610373937556717e-05, + "loss": 1.056, + "step": 26760 + }, + { + "epoch": 0.8, + "learning_rate": 2.3576752507952793e-05, + "loss": 1.0341, + "step": 26765 + }, + { + "epoch": 0.8, + "learning_rate": 2.354315183464787e-05, + "loss": 1.0972, + "step": 26770 + }, + { + "epoch": 0.8, + "learning_rate": 2.3509571926767825e-05, + "loss": 0.9849, + "step": 26775 + }, + { + "epoch": 0.8, + "learning_rate": 2.34760127934328e-05, + "loss": 1.0213, + "step": 26780 + }, + { + "epoch": 0.8, + "learning_rate": 2.3442474443757366e-05, + "loss": 1.0322, + "step": 26785 + }, + { + "epoch": 0.8, + "learning_rate": 2.3408956886850474e-05, + "loss": 1.0333, + "step": 26790 + }, + { + "epoch": 0.8, + "learning_rate": 2.337546013181533e-05, + "loss": 1.0361, + "step": 26795 + }, + { + "epoch": 0.8, + "learning_rate": 2.3341984187749512e-05, + "loss": 0.9704, + "step": 26800 + }, + { + "epoch": 0.8, + "learning_rate": 2.330852906374502e-05, + "loss": 1.0536, + "step": 26805 + }, + { + "epoch": 0.8, + "learning_rate": 2.327509476888817e-05, + "loss": 1.05, + "step": 26810 + }, + { + "epoch": 0.8, + "learning_rate": 2.324168131225959e-05, + "loss": 1.0976, + "step": 26815 + }, + { + "epoch": 0.8, + "learning_rate": 2.320828870293421e-05, + "loss": 1.0781, + "step": 26820 + }, + { + "epoch": 0.8, + "learning_rate": 2.3174916949981407e-05, + "loss": 1.0256, + "step": 26825 + }, + { + "epoch": 0.8, + "learning_rate": 2.3141566062464847e-05, + "loss": 0.9687, + "step": 26830 + }, + { + "epoch": 0.8, + "learning_rate": 2.31082360494425e-05, + "loss": 0.9723, + "step": 26835 + }, + { + "epoch": 0.8, + "learning_rate": 2.3074926919966654e-05, + "loss": 1.0468, + "step": 26840 + }, + { + "epoch": 0.8, + "learning_rate": 2.3041638683083988e-05, + "loss": 1.0212, + "step": 26845 + }, + { + "epoch": 0.8, + "learning_rate": 2.3008371347835512e-05, + "loss": 1.066, + "step": 26850 + }, + { + "epoch": 0.8, + "learning_rate": 2.2975124923256485e-05, + "loss": 1.0378, + "step": 26855 + }, + { + "epoch": 0.8, + "learning_rate": 2.2941899418376466e-05, + "loss": 1.0009, + "step": 26860 + }, + { + "epoch": 0.8, + "learning_rate": 2.2908694842219513e-05, + "loss": 1.028, + "step": 26865 + }, + { + "epoch": 0.8, + "learning_rate": 2.2875511203803813e-05, + "loss": 1.0512, + "step": 26870 + }, + { + "epoch": 0.8, + "learning_rate": 2.2842348512141908e-05, + "loss": 1.0158, + "step": 26875 + }, + { + "epoch": 0.8, + "learning_rate": 2.280920677624071e-05, + "loss": 1.0609, + "step": 26880 + }, + { + "epoch": 0.8, + "learning_rate": 2.2776086005101417e-05, + "loss": 1.0954, + "step": 26885 + }, + { + "epoch": 0.8, + "learning_rate": 2.2742986207719506e-05, + "loss": 1.0528, + "step": 26890 + }, + { + "epoch": 0.8, + "learning_rate": 2.2709907393084738e-05, + "loss": 1.0503, + "step": 26895 + }, + { + "epoch": 0.8, + "learning_rate": 2.2676849570181225e-05, + "loss": 1.0591, + "step": 26900 + }, + { + "epoch": 0.8, + "learning_rate": 2.2643812747987414e-05, + "loss": 1.0174, + "step": 26905 + }, + { + "epoch": 0.8, + "learning_rate": 2.2610796935475943e-05, + "loss": 1.0398, + "step": 26910 + }, + { + "epoch": 0.8, + "learning_rate": 2.257780214161378e-05, + "loss": 1.0512, + "step": 26915 + }, + { + "epoch": 0.8, + "learning_rate": 2.254482837536224e-05, + "loss": 0.9905, + "step": 26920 + }, + { + "epoch": 0.8, + "learning_rate": 2.2511875645676894e-05, + "loss": 1.0876, + "step": 26925 + }, + { + "epoch": 0.8, + "learning_rate": 2.247894396150755e-05, + "loss": 1.0574, + "step": 26930 + }, + { + "epoch": 0.8, + "learning_rate": 2.244603333179839e-05, + "loss": 1.0374, + "step": 26935 + }, + { + "epoch": 0.8, + "learning_rate": 2.2413143765487786e-05, + "loss": 0.9323, + "step": 26940 + }, + { + "epoch": 0.8, + "learning_rate": 2.2380275271508488e-05, + "loss": 1.1288, + "step": 26945 + }, + { + "epoch": 0.8, + "learning_rate": 2.2347427858787416e-05, + "loss": 1.0254, + "step": 26950 + }, + { + "epoch": 0.8, + "learning_rate": 2.231460153624586e-05, + "loss": 0.9757, + "step": 26955 + }, + { + "epoch": 0.81, + "learning_rate": 2.228179631279933e-05, + "loss": 0.9398, + "step": 26960 + }, + { + "epoch": 0.81, + "learning_rate": 2.224901219735758e-05, + "loss": 1.0836, + "step": 26965 + }, + { + "epoch": 0.81, + "learning_rate": 2.221624919882469e-05, + "loss": 1.017, + "step": 26970 + }, + { + "epoch": 0.81, + "learning_rate": 2.2183507326099005e-05, + "loss": 0.9517, + "step": 26975 + }, + { + "epoch": 0.81, + "learning_rate": 2.21507865880731e-05, + "loss": 1.1093, + "step": 26980 + }, + { + "epoch": 0.81, + "learning_rate": 2.2118086993633746e-05, + "loss": 0.9549, + "step": 26985 + }, + { + "epoch": 0.81, + "learning_rate": 2.2085408551662156e-05, + "loss": 0.9998, + "step": 26990 + }, + { + "epoch": 0.81, + "learning_rate": 2.2052751271033632e-05, + "loss": 1.1442, + "step": 26995 + }, + { + "epoch": 0.81, + "learning_rate": 2.2020115160617783e-05, + "loss": 0.9667, + "step": 27000 + }, + { + "epoch": 0.81, + "learning_rate": 2.1987500229278425e-05, + "loss": 1.0595, + "step": 27005 + }, + { + "epoch": 0.81, + "learning_rate": 2.195490648587375e-05, + "loss": 1.0077, + "step": 27010 + }, + { + "epoch": 0.81, + "learning_rate": 2.1922333939256067e-05, + "loss": 1.0227, + "step": 27015 + }, + { + "epoch": 0.81, + "learning_rate": 2.1889782598271957e-05, + "loss": 1.0444, + "step": 27020 + }, + { + "epoch": 0.81, + "learning_rate": 2.1857252471762258e-05, + "loss": 1.0428, + "step": 27025 + }, + { + "epoch": 0.81, + "learning_rate": 2.182474356856209e-05, + "loss": 0.9872, + "step": 27030 + }, + { + "epoch": 0.81, + "learning_rate": 2.1792255897500734e-05, + "loss": 1.0965, + "step": 27035 + }, + { + "epoch": 0.81, + "learning_rate": 2.1759789467401692e-05, + "loss": 1.0762, + "step": 27040 + }, + { + "epoch": 0.81, + "learning_rate": 2.1727344287082774e-05, + "loss": 1.001, + "step": 27045 + }, + { + "epoch": 0.81, + "learning_rate": 2.169492036535602e-05, + "loss": 0.9293, + "step": 27050 + }, + { + "epoch": 0.81, + "learning_rate": 2.166251771102761e-05, + "loss": 0.9666, + "step": 27055 + }, + { + "epoch": 0.81, + "learning_rate": 2.163013633289799e-05, + "loss": 1.0457, + "step": 27060 + }, + { + "epoch": 0.81, + "learning_rate": 2.1597776239761847e-05, + "loss": 1.0084, + "step": 27065 + }, + { + "epoch": 0.81, + "learning_rate": 2.156543744040811e-05, + "loss": 0.9755, + "step": 27070 + }, + { + "epoch": 0.81, + "learning_rate": 2.1533119943619827e-05, + "loss": 1.0413, + "step": 27075 + }, + { + "epoch": 0.81, + "learning_rate": 2.1500823758174392e-05, + "loss": 1.0121, + "step": 27080 + }, + { + "epoch": 0.81, + "learning_rate": 2.1468548892843265e-05, + "loss": 1.0324, + "step": 27085 + }, + { + "epoch": 0.81, + "learning_rate": 2.1436295356392267e-05, + "loss": 1.0253, + "step": 27090 + }, + { + "epoch": 0.81, + "learning_rate": 2.1404063157581277e-05, + "loss": 0.971, + "step": 27095 + }, + { + "epoch": 0.81, + "learning_rate": 2.137185230516453e-05, + "loss": 0.9728, + "step": 27100 + }, + { + "epoch": 0.81, + "learning_rate": 2.1339662807890302e-05, + "loss": 1.0464, + "step": 27105 + }, + { + "epoch": 0.81, + "learning_rate": 2.130749467450124e-05, + "loss": 0.9913, + "step": 27110 + }, + { + "epoch": 0.81, + "learning_rate": 2.1275347913734022e-05, + "loss": 1.0261, + "step": 27115 + }, + { + "epoch": 0.81, + "learning_rate": 2.1243222534319673e-05, + "loss": 1.0351, + "step": 27120 + }, + { + "epoch": 0.81, + "learning_rate": 2.121111854498328e-05, + "loss": 1.0623, + "step": 27125 + }, + { + "epoch": 0.81, + "learning_rate": 2.1179035954444193e-05, + "loss": 1.0173, + "step": 27130 + }, + { + "epoch": 0.81, + "learning_rate": 2.1146974771415973e-05, + "loss": 0.9972, + "step": 27135 + }, + { + "epoch": 0.81, + "learning_rate": 2.1114935004606307e-05, + "loss": 0.9912, + "step": 27140 + }, + { + "epoch": 0.81, + "learning_rate": 2.1082916662717055e-05, + "loss": 0.9162, + "step": 27145 + }, + { + "epoch": 0.81, + "learning_rate": 2.1050919754444332e-05, + "loss": 0.9906, + "step": 27150 + }, + { + "epoch": 0.81, + "learning_rate": 2.101894428847839e-05, + "loss": 1.0139, + "step": 27155 + }, + { + "epoch": 0.81, + "learning_rate": 2.098699027350367e-05, + "loss": 1.0463, + "step": 27160 + }, + { + "epoch": 0.81, + "learning_rate": 2.0955057718198722e-05, + "loss": 0.9878, + "step": 27165 + }, + { + "epoch": 0.81, + "learning_rate": 2.0923146631236358e-05, + "loss": 1.0481, + "step": 27170 + }, + { + "epoch": 0.81, + "learning_rate": 2.089125702128355e-05, + "loss": 1.0226, + "step": 27175 + }, + { + "epoch": 0.81, + "learning_rate": 2.0859388897001375e-05, + "loss": 1.1479, + "step": 27180 + }, + { + "epoch": 0.81, + "learning_rate": 2.0827542267045085e-05, + "loss": 1.0502, + "step": 27185 + }, + { + "epoch": 0.81, + "learning_rate": 2.0795717140064163e-05, + "loss": 0.9748, + "step": 27190 + }, + { + "epoch": 0.81, + "learning_rate": 2.0763913524702205e-05, + "loss": 1.0257, + "step": 27195 + }, + { + "epoch": 0.81, + "learning_rate": 2.0732131429596956e-05, + "loss": 1.061, + "step": 27200 + }, + { + "epoch": 0.81, + "learning_rate": 2.0700370863380304e-05, + "loss": 1.0184, + "step": 27205 + }, + { + "epoch": 0.81, + "learning_rate": 2.066863183467833e-05, + "loss": 1.059, + "step": 27210 + }, + { + "epoch": 0.81, + "learning_rate": 2.063691435211128e-05, + "loss": 0.9855, + "step": 27215 + }, + { + "epoch": 0.81, + "learning_rate": 2.060521842429347e-05, + "loss": 0.9978, + "step": 27220 + }, + { + "epoch": 0.81, + "learning_rate": 2.057354405983345e-05, + "loss": 1.0309, + "step": 27225 + }, + { + "epoch": 0.81, + "learning_rate": 2.0541891267333813e-05, + "loss": 1.0717, + "step": 27230 + }, + { + "epoch": 0.81, + "learning_rate": 2.0510260055391418e-05, + "loss": 1.0358, + "step": 27235 + }, + { + "epoch": 0.81, + "learning_rate": 2.0478650432597145e-05, + "loss": 1.0544, + "step": 27240 + }, + { + "epoch": 0.81, + "learning_rate": 2.044706240753611e-05, + "loss": 1.0294, + "step": 27245 + }, + { + "epoch": 0.81, + "learning_rate": 2.041549598878746e-05, + "loss": 0.9081, + "step": 27250 + }, + { + "epoch": 0.81, + "learning_rate": 2.0383951184924578e-05, + "loss": 0.9869, + "step": 27255 + }, + { + "epoch": 0.81, + "learning_rate": 2.035242800451489e-05, + "loss": 1.0286, + "step": 27260 + }, + { + "epoch": 0.81, + "learning_rate": 2.0320926456120027e-05, + "loss": 1.0271, + "step": 27265 + }, + { + "epoch": 0.81, + "learning_rate": 2.0289446548295643e-05, + "loss": 0.9797, + "step": 27270 + }, + { + "epoch": 0.81, + "learning_rate": 2.0257988289591622e-05, + "loss": 1.0186, + "step": 27275 + }, + { + "epoch": 0.81, + "learning_rate": 2.0226551688551954e-05, + "loss": 1.0778, + "step": 27280 + }, + { + "epoch": 0.81, + "learning_rate": 2.019513675371466e-05, + "loss": 1.0309, + "step": 27285 + }, + { + "epoch": 0.81, + "learning_rate": 2.016374349361192e-05, + "loss": 1.124, + "step": 27290 + }, + { + "epoch": 0.82, + "learning_rate": 2.0132371916770076e-05, + "loss": 1.0279, + "step": 27295 + }, + { + "epoch": 0.82, + "learning_rate": 2.010102203170955e-05, + "loss": 1.0553, + "step": 27300 + }, + { + "epoch": 0.82, + "learning_rate": 2.006969384694485e-05, + "loss": 1.0364, + "step": 27305 + }, + { + "epoch": 0.82, + "learning_rate": 2.003838737098458e-05, + "loss": 1.0, + "step": 27310 + }, + { + "epoch": 0.82, + "learning_rate": 2.0007102612331497e-05, + "loss": 1.0449, + "step": 27315 + }, + { + "epoch": 0.82, + "learning_rate": 1.997583957948247e-05, + "loss": 1.0565, + "step": 27320 + }, + { + "epoch": 0.82, + "learning_rate": 1.9944598280928408e-05, + "loss": 0.9891, + "step": 27325 + }, + { + "epoch": 0.82, + "learning_rate": 1.991337872515431e-05, + "loss": 0.9986, + "step": 27330 + }, + { + "epoch": 0.82, + "learning_rate": 1.9882180920639327e-05, + "loss": 1.0133, + "step": 27335 + }, + { + "epoch": 0.82, + "learning_rate": 1.9851004875856717e-05, + "loss": 1.0193, + "step": 27340 + }, + { + "epoch": 0.82, + "learning_rate": 1.981985059927376e-05, + "loss": 1.0163, + "step": 27345 + }, + { + "epoch": 0.82, + "learning_rate": 1.978871809935179e-05, + "loss": 0.9787, + "step": 27350 + }, + { + "epoch": 0.82, + "learning_rate": 1.9757607384546407e-05, + "loss": 1.0372, + "step": 27355 + }, + { + "epoch": 0.82, + "learning_rate": 1.972651846330713e-05, + "loss": 0.9938, + "step": 27360 + }, + { + "epoch": 0.82, + "learning_rate": 1.9695451344077564e-05, + "loss": 1.111, + "step": 27365 + }, + { + "epoch": 0.82, + "learning_rate": 1.966440603529549e-05, + "loss": 0.986, + "step": 27370 + }, + { + "epoch": 0.82, + "learning_rate": 1.9633382545392665e-05, + "loss": 0.9983, + "step": 27375 + }, + { + "epoch": 0.82, + "learning_rate": 1.9602380882795023e-05, + "loss": 0.9977, + "step": 27380 + }, + { + "epoch": 0.82, + "learning_rate": 1.9571401055922446e-05, + "loss": 1.062, + "step": 27385 + }, + { + "epoch": 0.82, + "learning_rate": 1.9540443073189008e-05, + "loss": 1.0474, + "step": 27390 + }, + { + "epoch": 0.82, + "learning_rate": 1.9509506943002752e-05, + "loss": 1.0369, + "step": 27395 + }, + { + "epoch": 0.82, + "learning_rate": 1.947859267376586e-05, + "loss": 1.0907, + "step": 27400 + }, + { + "epoch": 0.82, + "learning_rate": 1.9447700273874514e-05, + "loss": 0.9748, + "step": 27405 + }, + { + "epoch": 0.82, + "learning_rate": 1.9416829751719023e-05, + "loss": 1.1434, + "step": 27410 + }, + { + "epoch": 0.82, + "learning_rate": 1.9385981115683672e-05, + "loss": 1.0793, + "step": 27415 + }, + { + "epoch": 0.82, + "learning_rate": 1.935515437414688e-05, + "loss": 0.9201, + "step": 27420 + }, + { + "epoch": 0.82, + "learning_rate": 1.932434953548109e-05, + "loss": 0.9769, + "step": 27425 + }, + { + "epoch": 0.82, + "learning_rate": 1.9293566608052794e-05, + "loss": 1.0236, + "step": 27430 + }, + { + "epoch": 0.82, + "learning_rate": 1.92628056002225e-05, + "loss": 0.9949, + "step": 27435 + }, + { + "epoch": 0.82, + "learning_rate": 1.9232066520344817e-05, + "loss": 1.0628, + "step": 27440 + }, + { + "epoch": 0.82, + "learning_rate": 1.9201349376768396e-05, + "loss": 0.9804, + "step": 27445 + }, + { + "epoch": 0.82, + "learning_rate": 1.917065417783589e-05, + "loss": 1.0584, + "step": 27450 + }, + { + "epoch": 0.82, + "learning_rate": 1.9139980931883993e-05, + "loss": 1.0286, + "step": 27455 + }, + { + "epoch": 0.82, + "learning_rate": 1.910932964724347e-05, + "loss": 1.0801, + "step": 27460 + }, + { + "epoch": 0.82, + "learning_rate": 1.9078700332239154e-05, + "loss": 1.0237, + "step": 27465 + }, + { + "epoch": 0.82, + "learning_rate": 1.9048092995189814e-05, + "loss": 1.0433, + "step": 27470 + }, + { + "epoch": 0.82, + "learning_rate": 1.9017507644408285e-05, + "loss": 1.0249, + "step": 27475 + }, + { + "epoch": 0.82, + "learning_rate": 1.898694428820148e-05, + "loss": 1.0478, + "step": 27480 + }, + { + "epoch": 0.82, + "learning_rate": 1.895640293487032e-05, + "loss": 1.0579, + "step": 27485 + }, + { + "epoch": 0.82, + "learning_rate": 1.8925883592709714e-05, + "loss": 1.0685, + "step": 27490 + }, + { + "epoch": 0.82, + "learning_rate": 1.8895386270008564e-05, + "loss": 0.9896, + "step": 27495 + }, + { + "epoch": 0.82, + "learning_rate": 1.8864910975049933e-05, + "loss": 1.0321, + "step": 27500 + }, + { + "epoch": 0.82, + "learning_rate": 1.8834457716110778e-05, + "loss": 0.9396, + "step": 27505 + }, + { + "epoch": 0.82, + "learning_rate": 1.8804026501462047e-05, + "loss": 1.0297, + "step": 27510 + }, + { + "epoch": 0.82, + "learning_rate": 1.877361733936881e-05, + "loss": 1.0638, + "step": 27515 + }, + { + "epoch": 0.82, + "learning_rate": 1.8743230238090115e-05, + "loss": 1.1102, + "step": 27520 + }, + { + "epoch": 0.82, + "learning_rate": 1.871286520587895e-05, + "loss": 1.0131, + "step": 27525 + }, + { + "epoch": 0.82, + "learning_rate": 1.868252225098236e-05, + "loss": 0.9769, + "step": 27530 + }, + { + "epoch": 0.82, + "learning_rate": 1.8652201381641398e-05, + "loss": 1.0752, + "step": 27535 + }, + { + "epoch": 0.82, + "learning_rate": 1.8621902606091136e-05, + "loss": 1.0212, + "step": 27540 + }, + { + "epoch": 0.82, + "learning_rate": 1.8591625932560607e-05, + "loss": 0.9927, + "step": 27545 + }, + { + "epoch": 0.82, + "learning_rate": 1.8561371369272818e-05, + "loss": 0.9938, + "step": 27550 + }, + { + "epoch": 0.82, + "learning_rate": 1.8531138924444837e-05, + "loss": 1.0038, + "step": 27555 + }, + { + "epoch": 0.82, + "learning_rate": 1.8500928606287726e-05, + "loss": 1.0646, + "step": 27560 + }, + { + "epoch": 0.82, + "learning_rate": 1.847074042300644e-05, + "loss": 1.0039, + "step": 27565 + }, + { + "epoch": 0.82, + "learning_rate": 1.8440574382800057e-05, + "loss": 1.0066, + "step": 27570 + }, + { + "epoch": 0.82, + "learning_rate": 1.841043049386154e-05, + "loss": 1.0108, + "step": 27575 + }, + { + "epoch": 0.82, + "learning_rate": 1.8380308764377842e-05, + "loss": 1.0333, + "step": 27580 + }, + { + "epoch": 0.82, + "learning_rate": 1.8350209202529946e-05, + "loss": 1.0425, + "step": 27585 + }, + { + "epoch": 0.82, + "learning_rate": 1.8320131816492824e-05, + "loss": 1.0079, + "step": 27590 + }, + { + "epoch": 0.82, + "learning_rate": 1.829007661443538e-05, + "loss": 1.0192, + "step": 27595 + }, + { + "epoch": 0.82, + "learning_rate": 1.826004360452046e-05, + "loss": 1.0579, + "step": 27600 + }, + { + "epoch": 0.82, + "learning_rate": 1.823003279490496e-05, + "loss": 1.0532, + "step": 27605 + }, + { + "epoch": 0.82, + "learning_rate": 1.820004419373975e-05, + "loss": 0.9751, + "step": 27610 + }, + { + "epoch": 0.82, + "learning_rate": 1.8170077809169593e-05, + "loss": 0.9586, + "step": 27615 + }, + { + "epoch": 0.82, + "learning_rate": 1.8140133649333258e-05, + "loss": 1.0643, + "step": 27620 + }, + { + "epoch": 0.82, + "learning_rate": 1.811021172236348e-05, + "loss": 1.0345, + "step": 27625 + }, + { + "epoch": 0.83, + "learning_rate": 1.8080312036386994e-05, + "loss": 1.0339, + "step": 27630 + }, + { + "epoch": 0.83, + "learning_rate": 1.8050434599524423e-05, + "loss": 0.9934, + "step": 27635 + }, + { + "epoch": 0.83, + "learning_rate": 1.8020579419890326e-05, + "loss": 1.0862, + "step": 27640 + }, + { + "epoch": 0.83, + "learning_rate": 1.7990746505593393e-05, + "loss": 1.0523, + "step": 27645 + }, + { + "epoch": 0.83, + "learning_rate": 1.7960935864736063e-05, + "loss": 1.0975, + "step": 27650 + }, + { + "epoch": 0.83, + "learning_rate": 1.7931147505414792e-05, + "loss": 1.0015, + "step": 27655 + }, + { + "epoch": 0.83, + "learning_rate": 1.7901381435720032e-05, + "loss": 1.0228, + "step": 27660 + }, + { + "epoch": 0.83, + "learning_rate": 1.7871637663736174e-05, + "loss": 0.9851, + "step": 27665 + }, + { + "epoch": 0.83, + "learning_rate": 1.7841916197541497e-05, + "loss": 1.0008, + "step": 27670 + }, + { + "epoch": 0.83, + "learning_rate": 1.7812217045208224e-05, + "loss": 1.0621, + "step": 27675 + }, + { + "epoch": 0.83, + "learning_rate": 1.778254021480257e-05, + "loss": 1.0256, + "step": 27680 + }, + { + "epoch": 0.83, + "learning_rate": 1.7752885714384694e-05, + "loss": 1.0626, + "step": 27685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7723253552008632e-05, + "loss": 1.0999, + "step": 27690 + }, + { + "epoch": 0.83, + "learning_rate": 1.7693643735722352e-05, + "loss": 0.9905, + "step": 27695 + }, + { + "epoch": 0.83, + "learning_rate": 1.766405627356781e-05, + "loss": 1.0061, + "step": 27700 + }, + { + "epoch": 0.83, + "learning_rate": 1.763449117358089e-05, + "loss": 1.0145, + "step": 27705 + }, + { + "epoch": 0.83, + "learning_rate": 1.7604948443791325e-05, + "loss": 0.9774, + "step": 27710 + }, + { + "epoch": 0.83, + "learning_rate": 1.7575428092222868e-05, + "loss": 1.0192, + "step": 27715 + }, + { + "epoch": 0.83, + "learning_rate": 1.7545930126893084e-05, + "loss": 1.0671, + "step": 27720 + }, + { + "epoch": 0.83, + "learning_rate": 1.75164545558136e-05, + "loss": 0.9947, + "step": 27725 + }, + { + "epoch": 0.83, + "learning_rate": 1.748700138698982e-05, + "loss": 1.023, + "step": 27730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7457570628421172e-05, + "loss": 1.0785, + "step": 27735 + }, + { + "epoch": 0.83, + "learning_rate": 1.7428162288100903e-05, + "loss": 1.0384, + "step": 27740 + }, + { + "epoch": 0.83, + "learning_rate": 1.739877637401627e-05, + "loss": 0.9922, + "step": 27745 + }, + { + "epoch": 0.83, + "learning_rate": 1.736941289414834e-05, + "loss": 0.9896, + "step": 27750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7340071856472195e-05, + "loss": 1.1099, + "step": 27755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7310753268956693e-05, + "loss": 1.022, + "step": 27760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7281457139564737e-05, + "loss": 1.0709, + "step": 27765 + }, + { + "epoch": 0.83, + "learning_rate": 1.7252183476253003e-05, + "loss": 1.1128, + "step": 27770 + }, + { + "epoch": 0.83, + "learning_rate": 1.722293228697216e-05, + "loss": 1.0421, + "step": 27775 + }, + { + "epoch": 0.83, + "learning_rate": 1.719370357966671e-05, + "loss": 1.0009, + "step": 27780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7164497362275022e-05, + "loss": 1.0556, + "step": 27785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7135313642729523e-05, + "loss": 1.0064, + "step": 27790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7106152428956347e-05, + "loss": 1.0219, + "step": 27795 + }, + { + "epoch": 0.83, + "learning_rate": 1.7077013728875556e-05, + "loss": 1.07, + "step": 27800 + }, + { + "epoch": 0.83, + "learning_rate": 1.704789755040116e-05, + "loss": 0.9593, + "step": 27805 + }, + { + "epoch": 0.83, + "learning_rate": 1.701880390144104e-05, + "loss": 1.0281, + "step": 27810 + }, + { + "epoch": 0.83, + "learning_rate": 1.6989732789896907e-05, + "loss": 1.0145, + "step": 27815 + }, + { + "epoch": 0.83, + "learning_rate": 1.6960684223664337e-05, + "loss": 1.0423, + "step": 27820 + }, + { + "epoch": 0.83, + "learning_rate": 1.6931658210632873e-05, + "loss": 1.0445, + "step": 27825 + }, + { + "epoch": 0.83, + "learning_rate": 1.6902654758685897e-05, + "loss": 1.0456, + "step": 27830 + }, + { + "epoch": 0.83, + "learning_rate": 1.6873673875700625e-05, + "loss": 1.0002, + "step": 27835 + }, + { + "epoch": 0.83, + "learning_rate": 1.684471556954814e-05, + "loss": 0.9514, + "step": 27840 + }, + { + "epoch": 0.83, + "learning_rate": 1.681577984809346e-05, + "loss": 1.0272, + "step": 27845 + }, + { + "epoch": 0.83, + "learning_rate": 1.678686671919544e-05, + "loss": 0.9692, + "step": 27850 + }, + { + "epoch": 0.83, + "learning_rate": 1.6757976190706746e-05, + "loss": 1.0692, + "step": 27855 + }, + { + "epoch": 0.83, + "learning_rate": 1.6729108270473992e-05, + "loss": 1.0373, + "step": 27860 + }, + { + "epoch": 0.83, + "learning_rate": 1.6700262966337578e-05, + "loss": 1.0282, + "step": 27865 + }, + { + "epoch": 0.83, + "learning_rate": 1.6671440286131813e-05, + "loss": 1.0402, + "step": 27870 + }, + { + "epoch": 0.83, + "learning_rate": 1.6642640237684816e-05, + "loss": 1.071, + "step": 27875 + }, + { + "epoch": 0.83, + "learning_rate": 1.6613862828818628e-05, + "loss": 1.0472, + "step": 27880 + }, + { + "epoch": 0.83, + "learning_rate": 1.658510806734903e-05, + "loss": 1.0519, + "step": 27885 + }, + { + "epoch": 0.83, + "learning_rate": 1.6556375961085803e-05, + "loss": 1.007, + "step": 27890 + }, + { + "epoch": 0.83, + "learning_rate": 1.6527666517832408e-05, + "loss": 1.0224, + "step": 27895 + }, + { + "epoch": 0.83, + "learning_rate": 1.64989797453863e-05, + "loss": 1.0284, + "step": 27900 + }, + { + "epoch": 0.83, + "learning_rate": 1.647031565153866e-05, + "loss": 0.9984, + "step": 27905 + }, + { + "epoch": 0.83, + "learning_rate": 1.644167424407461e-05, + "loss": 0.9897, + "step": 27910 + }, + { + "epoch": 0.83, + "learning_rate": 1.6413055530773004e-05, + "loss": 1.0294, + "step": 27915 + }, + { + "epoch": 0.83, + "learning_rate": 1.6384459519406648e-05, + "loss": 1.0998, + "step": 27920 + }, + { + "epoch": 0.83, + "learning_rate": 1.635588621774208e-05, + "loss": 0.9808, + "step": 27925 + }, + { + "epoch": 0.83, + "learning_rate": 1.632733563353971e-05, + "loss": 1.0193, + "step": 27930 + }, + { + "epoch": 0.83, + "learning_rate": 1.629880777455384e-05, + "loss": 1.0457, + "step": 27935 + }, + { + "epoch": 0.83, + "learning_rate": 1.6270302648532488e-05, + "loss": 1.0206, + "step": 27940 + }, + { + "epoch": 0.83, + "learning_rate": 1.6241820263217544e-05, + "loss": 1.0674, + "step": 27945 + }, + { + "epoch": 0.83, + "learning_rate": 1.6213360626344754e-05, + "loss": 1.0253, + "step": 27950 + }, + { + "epoch": 0.83, + "learning_rate": 1.6184923745643667e-05, + "loss": 1.0026, + "step": 27955 + }, + { + "epoch": 0.83, + "learning_rate": 1.6156509628837644e-05, + "loss": 1.0119, + "step": 27960 + }, + { + "epoch": 0.84, + "learning_rate": 1.6128118283643824e-05, + "loss": 1.0511, + "step": 27965 + }, + { + "epoch": 0.84, + "learning_rate": 1.6099749717773238e-05, + "loss": 1.0831, + "step": 27970 + }, + { + "epoch": 0.84, + "learning_rate": 1.6071403938930708e-05, + "loss": 1.0253, + "step": 27975 + }, + { + "epoch": 0.84, + "learning_rate": 1.6043080954814828e-05, + "loss": 1.0624, + "step": 27980 + }, + { + "epoch": 0.84, + "learning_rate": 1.6014780773118e-05, + "loss": 0.9995, + "step": 27985 + }, + { + "epoch": 0.84, + "learning_rate": 1.5986503401526497e-05, + "loss": 1.0632, + "step": 27990 + }, + { + "epoch": 0.84, + "learning_rate": 1.595824884772037e-05, + "loss": 1.0359, + "step": 27995 + }, + { + "epoch": 0.84, + "learning_rate": 1.593001711937343e-05, + "loss": 0.9918, + "step": 28000 + }, + { + "epoch": 0.84, + "learning_rate": 1.5901808224153347e-05, + "loss": 1.0573, + "step": 28005 + }, + { + "epoch": 0.84, + "learning_rate": 1.5873622169721523e-05, + "loss": 0.9937, + "step": 28010 + }, + { + "epoch": 0.84, + "learning_rate": 1.5845458963733238e-05, + "loss": 1.0302, + "step": 28015 + }, + { + "epoch": 0.84, + "learning_rate": 1.5817318613837493e-05, + "loss": 1.0473, + "step": 28020 + }, + { + "epoch": 0.84, + "learning_rate": 1.5789201127677156e-05, + "loss": 0.9898, + "step": 28025 + }, + { + "epoch": 0.84, + "learning_rate": 1.576110651288879e-05, + "loss": 0.9981, + "step": 28030 + }, + { + "epoch": 0.84, + "learning_rate": 1.573303477710284e-05, + "loss": 1.0857, + "step": 28035 + }, + { + "epoch": 0.84, + "learning_rate": 1.570498592794346e-05, + "loss": 1.027, + "step": 28040 + }, + { + "epoch": 0.84, + "learning_rate": 1.567695997302867e-05, + "loss": 1.0621, + "step": 28045 + }, + { + "epoch": 0.84, + "learning_rate": 1.564895691997017e-05, + "loss": 0.9847, + "step": 28050 + }, + { + "epoch": 0.84, + "learning_rate": 1.562097677637353e-05, + "loss": 1.0856, + "step": 28055 + }, + { + "epoch": 0.84, + "learning_rate": 1.559301954983807e-05, + "loss": 0.967, + "step": 28060 + }, + { + "epoch": 0.84, + "learning_rate": 1.5565085247956878e-05, + "loss": 1.0348, + "step": 28065 + }, + { + "epoch": 0.84, + "learning_rate": 1.553717387831678e-05, + "loss": 1.031, + "step": 28070 + }, + { + "epoch": 0.84, + "learning_rate": 1.5509285448498446e-05, + "loss": 1.0164, + "step": 28075 + }, + { + "epoch": 0.84, + "learning_rate": 1.548141996607628e-05, + "loss": 0.9814, + "step": 28080 + }, + { + "epoch": 0.84, + "learning_rate": 1.5453577438618462e-05, + "loss": 1.0759, + "step": 28085 + }, + { + "epoch": 0.84, + "learning_rate": 1.5425757873686875e-05, + "loss": 1.0123, + "step": 28090 + }, + { + "epoch": 0.84, + "learning_rate": 1.539796127883727e-05, + "loss": 1.0785, + "step": 28095 + }, + { + "epoch": 0.84, + "learning_rate": 1.5370187661619117e-05, + "loss": 1.011, + "step": 28100 + }, + { + "epoch": 0.84, + "learning_rate": 1.5342437029575618e-05, + "loss": 1.0627, + "step": 28105 + }, + { + "epoch": 0.84, + "learning_rate": 1.531470939024373e-05, + "loss": 0.9686, + "step": 28110 + }, + { + "epoch": 0.84, + "learning_rate": 1.528700475115422e-05, + "loss": 1.0189, + "step": 28115 + }, + { + "epoch": 0.84, + "learning_rate": 1.525932311983158e-05, + "loss": 1.109, + "step": 28120 + }, + { + "epoch": 0.84, + "learning_rate": 1.5231664503794042e-05, + "loss": 1.0025, + "step": 28125 + }, + { + "epoch": 0.84, + "learning_rate": 1.5204028910553526e-05, + "loss": 1.0115, + "step": 28130 + }, + { + "epoch": 0.84, + "learning_rate": 1.5176416347615885e-05, + "loss": 1.0111, + "step": 28135 + }, + { + "epoch": 0.84, + "learning_rate": 1.5148826822480533e-05, + "loss": 1.1188, + "step": 28140 + }, + { + "epoch": 0.84, + "learning_rate": 1.5121260342640686e-05, + "loss": 1.008, + "step": 28145 + }, + { + "epoch": 0.84, + "learning_rate": 1.5093716915583323e-05, + "loss": 1.0404, + "step": 28150 + }, + { + "epoch": 0.84, + "learning_rate": 1.506619654878917e-05, + "loss": 1.0092, + "step": 28155 + }, + { + "epoch": 0.84, + "learning_rate": 1.5038699249732636e-05, + "loss": 1.0153, + "step": 28160 + }, + { + "epoch": 0.84, + "learning_rate": 1.5011225025881893e-05, + "loss": 1.0057, + "step": 28165 + }, + { + "epoch": 0.84, + "learning_rate": 1.4983773884698849e-05, + "loss": 0.9793, + "step": 28170 + }, + { + "epoch": 0.84, + "learning_rate": 1.4956345833639185e-05, + "loss": 1.0471, + "step": 28175 + }, + { + "epoch": 0.84, + "learning_rate": 1.4928940880152232e-05, + "loss": 1.0523, + "step": 28180 + }, + { + "epoch": 0.84, + "learning_rate": 1.4901559031681056e-05, + "loss": 1.0299, + "step": 28185 + }, + { + "epoch": 0.84, + "learning_rate": 1.4874200295662533e-05, + "loss": 1.1044, + "step": 28190 + }, + { + "epoch": 0.84, + "learning_rate": 1.4846864679527162e-05, + "loss": 1.004, + "step": 28195 + }, + { + "epoch": 0.84, + "learning_rate": 1.4819552190699215e-05, + "loss": 0.9262, + "step": 28200 + }, + { + "epoch": 0.84, + "learning_rate": 1.4792262836596715e-05, + "loss": 1.0379, + "step": 28205 + }, + { + "epoch": 0.84, + "learning_rate": 1.476499662463131e-05, + "loss": 1.0461, + "step": 28210 + }, + { + "epoch": 0.84, + "learning_rate": 1.4737753562208411e-05, + "loss": 1.0779, + "step": 28215 + }, + { + "epoch": 0.84, + "learning_rate": 1.4710533656727154e-05, + "loss": 1.0497, + "step": 28220 + }, + { + "epoch": 0.84, + "learning_rate": 1.4683336915580414e-05, + "loss": 1.0077, + "step": 28225 + }, + { + "epoch": 0.84, + "learning_rate": 1.4656163346154684e-05, + "loss": 0.9981, + "step": 28230 + }, + { + "epoch": 0.84, + "learning_rate": 1.4629012955830212e-05, + "loss": 1.0002, + "step": 28235 + }, + { + "epoch": 0.84, + "learning_rate": 1.4601885751980982e-05, + "loss": 1.0671, + "step": 28240 + }, + { + "epoch": 0.84, + "learning_rate": 1.4574781741974652e-05, + "loss": 0.9378, + "step": 28245 + }, + { + "epoch": 0.84, + "learning_rate": 1.454770093317258e-05, + "loss": 1.0896, + "step": 28250 + }, + { + "epoch": 0.84, + "learning_rate": 1.4520643332929783e-05, + "loss": 1.0875, + "step": 28255 + }, + { + "epoch": 0.84, + "learning_rate": 1.449360894859505e-05, + "loss": 0.9998, + "step": 28260 + }, + { + "epoch": 0.84, + "learning_rate": 1.446659778751085e-05, + "loss": 1.032, + "step": 28265 + }, + { + "epoch": 0.84, + "learning_rate": 1.4439609857013304e-05, + "loss": 1.0697, + "step": 28270 + }, + { + "epoch": 0.84, + "learning_rate": 1.4412645164432181e-05, + "loss": 1.0267, + "step": 28275 + }, + { + "epoch": 0.84, + "learning_rate": 1.4385703717091115e-05, + "loss": 1.088, + "step": 28280 + }, + { + "epoch": 0.84, + "learning_rate": 1.435878552230726e-05, + "loss": 0.9866, + "step": 28285 + }, + { + "epoch": 0.84, + "learning_rate": 1.4331890587391484e-05, + "loss": 1.0554, + "step": 28290 + }, + { + "epoch": 0.84, + "learning_rate": 1.4305018919648372e-05, + "loss": 0.9817, + "step": 28295 + }, + { + "epoch": 0.85, + "learning_rate": 1.4278170526376233e-05, + "loss": 1.0928, + "step": 28300 + }, + { + "epoch": 0.85, + "learning_rate": 1.4251345414866946e-05, + "loss": 1.0782, + "step": 28305 + }, + { + "epoch": 0.85, + "learning_rate": 1.4224543592406114e-05, + "loss": 1.0246, + "step": 28310 + }, + { + "epoch": 0.85, + "learning_rate": 1.419776506627305e-05, + "loss": 1.0067, + "step": 28315 + }, + { + "epoch": 0.85, + "learning_rate": 1.4171009843740723e-05, + "loss": 1.0644, + "step": 28320 + }, + { + "epoch": 0.85, + "learning_rate": 1.4144277932075744e-05, + "loss": 1.0037, + "step": 28325 + }, + { + "epoch": 0.85, + "learning_rate": 1.4117569338538395e-05, + "loss": 0.9673, + "step": 28330 + }, + { + "epoch": 0.85, + "learning_rate": 1.4090884070382648e-05, + "loss": 1.0349, + "step": 28335 + }, + { + "epoch": 0.85, + "learning_rate": 1.4064222134856175e-05, + "loss": 1.1054, + "step": 28340 + }, + { + "epoch": 0.85, + "learning_rate": 1.4037583539200204e-05, + "loss": 1.0639, + "step": 28345 + }, + { + "epoch": 0.85, + "learning_rate": 1.4010968290649762e-05, + "loss": 1.036, + "step": 28350 + }, + { + "epoch": 0.85, + "learning_rate": 1.3984376396433385e-05, + "loss": 1.0074, + "step": 28355 + }, + { + "epoch": 0.85, + "learning_rate": 1.3957807863773398e-05, + "loss": 1.0241, + "step": 28360 + }, + { + "epoch": 0.85, + "learning_rate": 1.3931262699885695e-05, + "loss": 1.1143, + "step": 28365 + }, + { + "epoch": 0.85, + "learning_rate": 1.3904740911979885e-05, + "loss": 1.0849, + "step": 28370 + }, + { + "epoch": 0.85, + "learning_rate": 1.3878242507259143e-05, + "loss": 1.0656, + "step": 28375 + }, + { + "epoch": 0.85, + "learning_rate": 1.385176749292042e-05, + "loss": 0.9913, + "step": 28380 + }, + { + "epoch": 0.85, + "learning_rate": 1.3825315876154176e-05, + "loss": 0.9479, + "step": 28385 + }, + { + "epoch": 0.85, + "learning_rate": 1.3798887664144633e-05, + "loss": 1.0109, + "step": 28390 + }, + { + "epoch": 0.85, + "learning_rate": 1.377248286406958e-05, + "loss": 1.0096, + "step": 28395 + }, + { + "epoch": 0.85, + "learning_rate": 1.3746101483100449e-05, + "loss": 0.9676, + "step": 28400 + }, + { + "epoch": 0.85, + "learning_rate": 1.3719743528402362e-05, + "loss": 1.0478, + "step": 28405 + }, + { + "epoch": 0.85, + "learning_rate": 1.3693409007134072e-05, + "loss": 1.009, + "step": 28410 + }, + { + "epoch": 0.85, + "learning_rate": 1.3667097926447924e-05, + "loss": 1.0147, + "step": 28415 + }, + { + "epoch": 0.85, + "learning_rate": 1.3640810293489881e-05, + "loss": 1.0228, + "step": 28420 + }, + { + "epoch": 0.85, + "learning_rate": 1.361454611539965e-05, + "loss": 1.1787, + "step": 28425 + }, + { + "epoch": 0.85, + "learning_rate": 1.3588305399310475e-05, + "loss": 1.0154, + "step": 28430 + }, + { + "epoch": 0.85, + "learning_rate": 1.3562088152349195e-05, + "loss": 1.0126, + "step": 28435 + }, + { + "epoch": 0.85, + "learning_rate": 1.3535894381636372e-05, + "loss": 1.0735, + "step": 28440 + }, + { + "epoch": 0.85, + "learning_rate": 1.3509724094286157e-05, + "loss": 1.078, + "step": 28445 + }, + { + "epoch": 0.85, + "learning_rate": 1.3483577297406303e-05, + "loss": 1.0255, + "step": 28450 + }, + { + "epoch": 0.85, + "learning_rate": 1.3457453998098145e-05, + "loss": 1.0796, + "step": 28455 + }, + { + "epoch": 0.85, + "learning_rate": 1.3431354203456737e-05, + "loss": 0.9961, + "step": 28460 + }, + { + "epoch": 0.85, + "learning_rate": 1.3405277920570702e-05, + "loss": 0.9999, + "step": 28465 + }, + { + "epoch": 0.85, + "learning_rate": 1.3379225156522247e-05, + "loss": 1.0835, + "step": 28470 + }, + { + "epoch": 0.85, + "learning_rate": 1.3353195918387207e-05, + "loss": 1.0221, + "step": 28475 + }, + { + "epoch": 0.85, + "learning_rate": 1.3327190213235042e-05, + "loss": 1.0482, + "step": 28480 + }, + { + "epoch": 0.85, + "learning_rate": 1.3301208048128843e-05, + "loss": 0.9903, + "step": 28485 + }, + { + "epoch": 0.85, + "learning_rate": 1.3275249430125237e-05, + "loss": 1.0239, + "step": 28490 + }, + { + "epoch": 0.85, + "learning_rate": 1.3249314366274546e-05, + "loss": 1.0476, + "step": 28495 + }, + { + "epoch": 0.85, + "learning_rate": 1.3223402863620603e-05, + "loss": 1.0797, + "step": 28500 + }, + { + "epoch": 0.85, + "learning_rate": 1.3197514929200917e-05, + "loss": 1.0183, + "step": 28505 + }, + { + "epoch": 0.85, + "learning_rate": 1.3171650570046534e-05, + "loss": 1.0112, + "step": 28510 + }, + { + "epoch": 0.85, + "learning_rate": 1.3145809793182162e-05, + "loss": 1.0847, + "step": 28515 + }, + { + "epoch": 0.85, + "learning_rate": 1.3119992605626031e-05, + "loss": 1.0585, + "step": 28520 + }, + { + "epoch": 0.85, + "learning_rate": 1.309419901439004e-05, + "loss": 0.9641, + "step": 28525 + }, + { + "epoch": 0.85, + "learning_rate": 1.3068429026479611e-05, + "loss": 1.0217, + "step": 28530 + }, + { + "epoch": 0.85, + "learning_rate": 1.3042682648893833e-05, + "loss": 1.0853, + "step": 28535 + }, + { + "epoch": 0.85, + "learning_rate": 1.3016959888625269e-05, + "loss": 1.0434, + "step": 28540 + }, + { + "epoch": 0.85, + "learning_rate": 1.2991260752660207e-05, + "loss": 0.9911, + "step": 28545 + }, + { + "epoch": 0.85, + "learning_rate": 1.2965585247978374e-05, + "loss": 1.0899, + "step": 28550 + }, + { + "epoch": 0.85, + "learning_rate": 1.2939933381553216e-05, + "loss": 1.0117, + "step": 28555 + }, + { + "epoch": 0.85, + "learning_rate": 1.2914305160351648e-05, + "loss": 1.0095, + "step": 28560 + }, + { + "epoch": 0.85, + "learning_rate": 1.2888700591334223e-05, + "loss": 0.9692, + "step": 28565 + }, + { + "epoch": 0.85, + "learning_rate": 1.2863119681455093e-05, + "loss": 0.986, + "step": 28570 + }, + { + "epoch": 0.85, + "learning_rate": 1.283756243766191e-05, + "loss": 1.0234, + "step": 28575 + }, + { + "epoch": 0.85, + "learning_rate": 1.281202886689592e-05, + "loss": 1.0126, + "step": 28580 + }, + { + "epoch": 0.85, + "learning_rate": 1.2786518976091977e-05, + "loss": 0.996, + "step": 28585 + }, + { + "epoch": 0.85, + "learning_rate": 1.2761032772178506e-05, + "loss": 0.9891, + "step": 28590 + }, + { + "epoch": 0.85, + "learning_rate": 1.2735570262077456e-05, + "loss": 0.9782, + "step": 28595 + }, + { + "epoch": 0.85, + "learning_rate": 1.2710131452704333e-05, + "loss": 1.0157, + "step": 28600 + }, + { + "epoch": 0.85, + "learning_rate": 1.268471635096825e-05, + "loss": 1.0324, + "step": 28605 + }, + { + "epoch": 0.85, + "learning_rate": 1.2659324963771902e-05, + "loss": 1.0042, + "step": 28610 + }, + { + "epoch": 0.85, + "learning_rate": 1.2633957298011467e-05, + "loss": 1.0503, + "step": 28615 + }, + { + "epoch": 0.85, + "learning_rate": 1.2608613360576705e-05, + "loss": 1.0469, + "step": 28620 + }, + { + "epoch": 0.85, + "learning_rate": 1.258329315835095e-05, + "loss": 1.0778, + "step": 28625 + }, + { + "epoch": 0.85, + "learning_rate": 1.2557996698211139e-05, + "loss": 1.0316, + "step": 28630 + }, + { + "epoch": 0.86, + "learning_rate": 1.2532723987027628e-05, + "loss": 1.0513, + "step": 28635 + }, + { + "epoch": 0.86, + "learning_rate": 1.2507475031664473e-05, + "loss": 1.0377, + "step": 28640 + }, + { + "epoch": 0.86, + "learning_rate": 1.2482249838979142e-05, + "loss": 0.9151, + "step": 28645 + }, + { + "epoch": 0.86, + "learning_rate": 1.2457048415822769e-05, + "loss": 0.9903, + "step": 28650 + }, + { + "epoch": 0.86, + "learning_rate": 1.2431870769039922e-05, + "loss": 1.1097, + "step": 28655 + }, + { + "epoch": 0.86, + "learning_rate": 1.2406716905468828e-05, + "loss": 0.9961, + "step": 28660 + }, + { + "epoch": 0.86, + "learning_rate": 1.2381586831941128e-05, + "loss": 0.97, + "step": 28665 + }, + { + "epoch": 0.86, + "learning_rate": 1.2356480555282113e-05, + "loss": 0.9823, + "step": 28670 + }, + { + "epoch": 0.86, + "learning_rate": 1.233139808231053e-05, + "loss": 0.9611, + "step": 28675 + }, + { + "epoch": 0.86, + "learning_rate": 1.2306339419838742e-05, + "loss": 0.9893, + "step": 28680 + }, + { + "epoch": 0.86, + "learning_rate": 1.2281304574672537e-05, + "loss": 1.0319, + "step": 28685 + }, + { + "epoch": 0.86, + "learning_rate": 1.2256293553611353e-05, + "loss": 0.9922, + "step": 28690 + }, + { + "epoch": 0.86, + "learning_rate": 1.2231306363448059e-05, + "loss": 1.0483, + "step": 28695 + }, + { + "epoch": 0.86, + "learning_rate": 1.2206343010969124e-05, + "loss": 0.9704, + "step": 28700 + }, + { + "epoch": 0.86, + "learning_rate": 1.2181403502954469e-05, + "loss": 1.0138, + "step": 28705 + }, + { + "epoch": 0.86, + "learning_rate": 1.215648784617761e-05, + "loss": 1.0773, + "step": 28710 + }, + { + "epoch": 0.86, + "learning_rate": 1.213159604740557e-05, + "loss": 1.0845, + "step": 28715 + }, + { + "epoch": 0.86, + "learning_rate": 1.2106728113398869e-05, + "loss": 1.0434, + "step": 28720 + }, + { + "epoch": 0.86, + "learning_rate": 1.2081884050911518e-05, + "loss": 1.0453, + "step": 28725 + }, + { + "epoch": 0.86, + "learning_rate": 1.205706386669112e-05, + "loss": 1.0684, + "step": 28730 + }, + { + "epoch": 0.86, + "learning_rate": 1.203226756747876e-05, + "loss": 0.9965, + "step": 28735 + }, + { + "epoch": 0.86, + "learning_rate": 1.200749516000902e-05, + "loss": 0.9462, + "step": 28740 + }, + { + "epoch": 0.86, + "learning_rate": 1.1982746651009969e-05, + "loss": 0.9895, + "step": 28745 + }, + { + "epoch": 0.86, + "learning_rate": 1.1958022047203254e-05, + "loss": 1.0961, + "step": 28750 + }, + { + "epoch": 0.86, + "learning_rate": 1.1933321355304006e-05, + "loss": 1.0407, + "step": 28755 + }, + { + "epoch": 0.86, + "learning_rate": 1.1908644582020845e-05, + "loss": 0.9402, + "step": 28760 + }, + { + "epoch": 0.86, + "learning_rate": 1.1883991734055844e-05, + "loss": 1.1078, + "step": 28765 + }, + { + "epoch": 0.86, + "learning_rate": 1.1859362818104714e-05, + "loss": 1.0555, + "step": 28770 + }, + { + "epoch": 0.86, + "learning_rate": 1.1834757840856559e-05, + "loss": 1.0341, + "step": 28775 + }, + { + "epoch": 0.86, + "learning_rate": 1.1810176808993979e-05, + "loss": 1.0119, + "step": 28780 + }, + { + "epoch": 0.86, + "learning_rate": 1.178561972919312e-05, + "loss": 1.0778, + "step": 28785 + }, + { + "epoch": 0.86, + "learning_rate": 1.1761086608123638e-05, + "loss": 0.9924, + "step": 28790 + }, + { + "epoch": 0.86, + "learning_rate": 1.1736577452448604e-05, + "loss": 1.0209, + "step": 28795 + }, + { + "epoch": 0.86, + "learning_rate": 1.1712092268824626e-05, + "loss": 1.0213, + "step": 28800 + }, + { + "epoch": 0.86, + "learning_rate": 1.1687631063901817e-05, + "loss": 1.0446, + "step": 28805 + }, + { + "epoch": 0.86, + "learning_rate": 1.1663193844323727e-05, + "loss": 1.0702, + "step": 28810 + }, + { + "epoch": 0.86, + "learning_rate": 1.1638780616727463e-05, + "loss": 1.0332, + "step": 28815 + }, + { + "epoch": 0.86, + "learning_rate": 1.161439138774354e-05, + "loss": 1.0109, + "step": 28820 + }, + { + "epoch": 0.86, + "learning_rate": 1.1590026163996026e-05, + "loss": 0.979, + "step": 28825 + }, + { + "epoch": 0.86, + "learning_rate": 1.1565684952102395e-05, + "loss": 0.9863, + "step": 28830 + }, + { + "epoch": 0.86, + "learning_rate": 1.1541367758673693e-05, + "loss": 1.0271, + "step": 28835 + }, + { + "epoch": 0.86, + "learning_rate": 1.1517074590314324e-05, + "loss": 0.9968, + "step": 28840 + }, + { + "epoch": 0.86, + "learning_rate": 1.1492805453622291e-05, + "loss": 1.0801, + "step": 28845 + }, + { + "epoch": 0.86, + "learning_rate": 1.1468560355188961e-05, + "loss": 0.9222, + "step": 28850 + }, + { + "epoch": 0.86, + "learning_rate": 1.1444339301599239e-05, + "loss": 1.0151, + "step": 28855 + }, + { + "epoch": 0.86, + "learning_rate": 1.1420142299431513e-05, + "loss": 0.994, + "step": 28860 + }, + { + "epoch": 0.86, + "learning_rate": 1.1395969355257586e-05, + "loss": 1.0436, + "step": 28865 + }, + { + "epoch": 0.86, + "learning_rate": 1.1371820475642725e-05, + "loss": 0.9966, + "step": 28870 + }, + { + "epoch": 0.86, + "learning_rate": 1.1347695667145697e-05, + "loss": 1.0715, + "step": 28875 + }, + { + "epoch": 0.86, + "learning_rate": 1.132359493631875e-05, + "loss": 0.974, + "step": 28880 + }, + { + "epoch": 0.86, + "learning_rate": 1.1299518289707545e-05, + "loss": 1.0343, + "step": 28885 + }, + { + "epoch": 0.86, + "learning_rate": 1.1275465733851199e-05, + "loss": 0.9999, + "step": 28890 + }, + { + "epoch": 0.86, + "learning_rate": 1.1251437275282306e-05, + "loss": 1.0461, + "step": 28895 + }, + { + "epoch": 0.86, + "learning_rate": 1.122743292052697e-05, + "loss": 1.0052, + "step": 28900 + }, + { + "epoch": 0.86, + "learning_rate": 1.1203452676104642e-05, + "loss": 1.002, + "step": 28905 + }, + { + "epoch": 0.86, + "learning_rate": 1.1179496548528246e-05, + "loss": 1.0442, + "step": 28910 + }, + { + "epoch": 0.86, + "learning_rate": 1.1155564544304275e-05, + "loss": 1.0645, + "step": 28915 + }, + { + "epoch": 0.86, + "learning_rate": 1.1131656669932532e-05, + "loss": 1.0118, + "step": 28920 + }, + { + "epoch": 0.86, + "learning_rate": 1.1107772931906301e-05, + "loss": 1.0461, + "step": 28925 + }, + { + "epoch": 0.86, + "learning_rate": 1.1083913336712337e-05, + "loss": 1.0071, + "step": 28930 + }, + { + "epoch": 0.86, + "learning_rate": 1.1060077890830856e-05, + "loss": 0.9882, + "step": 28935 + }, + { + "epoch": 0.86, + "learning_rate": 1.1036266600735468e-05, + "loss": 1.0393, + "step": 28940 + }, + { + "epoch": 0.86, + "learning_rate": 1.1012479472893212e-05, + "loss": 1.0415, + "step": 28945 + }, + { + "epoch": 0.86, + "learning_rate": 1.0988716513764618e-05, + "loss": 0.9953, + "step": 28950 + }, + { + "epoch": 0.86, + "learning_rate": 1.0964977729803638e-05, + "loss": 1.0169, + "step": 28955 + }, + { + "epoch": 0.86, + "learning_rate": 1.0941263127457635e-05, + "loss": 1.0323, + "step": 28960 + }, + { + "epoch": 0.86, + "learning_rate": 1.091757271316739e-05, + "loss": 1.0477, + "step": 28965 + }, + { + "epoch": 0.87, + "learning_rate": 1.0893906493367168e-05, + "loss": 0.9649, + "step": 28970 + }, + { + "epoch": 0.87, + "learning_rate": 1.0870264474484648e-05, + "loss": 1.0664, + "step": 28975 + }, + { + "epoch": 0.87, + "learning_rate": 1.0846646662940907e-05, + "loss": 0.9779, + "step": 28980 + }, + { + "epoch": 0.87, + "learning_rate": 1.0823053065150435e-05, + "loss": 1.0893, + "step": 28985 + }, + { + "epoch": 0.87, + "learning_rate": 1.0799483687521216e-05, + "loss": 1.1045, + "step": 28990 + }, + { + "epoch": 0.87, + "learning_rate": 1.077593853645461e-05, + "loss": 1.0906, + "step": 28995 + }, + { + "epoch": 0.87, + "learning_rate": 1.0752417618345378e-05, + "loss": 1.0487, + "step": 29000 + }, + { + "epoch": 0.87, + "learning_rate": 1.0728920939581755e-05, + "loss": 1.1194, + "step": 29005 + }, + { + "epoch": 0.87, + "learning_rate": 1.0705448506545346e-05, + "loss": 1.0141, + "step": 29010 + }, + { + "epoch": 0.87, + "learning_rate": 1.0682000325611163e-05, + "loss": 1.0628, + "step": 29015 + }, + { + "epoch": 0.87, + "learning_rate": 1.0658576403147668e-05, + "loss": 1.0212, + "step": 29020 + }, + { + "epoch": 0.87, + "learning_rate": 1.063517674551674e-05, + "loss": 1.0414, + "step": 29025 + }, + { + "epoch": 0.87, + "learning_rate": 1.0611801359073648e-05, + "loss": 1.0779, + "step": 29030 + }, + { + "epoch": 0.87, + "learning_rate": 1.0588450250167026e-05, + "loss": 1.067, + "step": 29035 + }, + { + "epoch": 0.87, + "learning_rate": 1.0565123425138978e-05, + "loss": 0.9864, + "step": 29040 + }, + { + "epoch": 0.87, + "learning_rate": 1.0541820890325027e-05, + "loss": 0.9991, + "step": 29045 + }, + { + "epoch": 0.87, + "learning_rate": 1.0518542652054031e-05, + "loss": 0.9543, + "step": 29050 + }, + { + "epoch": 0.87, + "learning_rate": 1.0495288716648256e-05, + "loss": 1.0237, + "step": 29055 + }, + { + "epoch": 0.87, + "learning_rate": 1.0472059090423458e-05, + "loss": 0.9585, + "step": 29060 + }, + { + "epoch": 0.87, + "learning_rate": 1.0448853779688694e-05, + "loss": 0.9428, + "step": 29065 + }, + { + "epoch": 0.87, + "learning_rate": 1.0425672790746421e-05, + "loss": 0.9818, + "step": 29070 + }, + { + "epoch": 0.87, + "learning_rate": 1.0402516129892548e-05, + "loss": 1.0123, + "step": 29075 + }, + { + "epoch": 0.87, + "learning_rate": 1.0379383803416364e-05, + "loss": 0.9868, + "step": 29080 + }, + { + "epoch": 0.87, + "learning_rate": 1.0356275817600503e-05, + "loss": 0.928, + "step": 29085 + }, + { + "epoch": 0.87, + "learning_rate": 1.0333192178721007e-05, + "loss": 1.0086, + "step": 29090 + }, + { + "epoch": 0.87, + "learning_rate": 1.0310132893047342e-05, + "loss": 1.0187, + "step": 29095 + }, + { + "epoch": 0.87, + "learning_rate": 1.0287097966842341e-05, + "loss": 0.9889, + "step": 29100 + }, + { + "epoch": 0.87, + "learning_rate": 1.0264087406362188e-05, + "loss": 0.9902, + "step": 29105 + }, + { + "epoch": 0.87, + "learning_rate": 1.0241101217856464e-05, + "loss": 1.0069, + "step": 29110 + }, + { + "epoch": 0.87, + "learning_rate": 1.0218139407568161e-05, + "loss": 0.9962, + "step": 29115 + }, + { + "epoch": 0.87, + "learning_rate": 1.0195201981733659e-05, + "loss": 1.0604, + "step": 29120 + }, + { + "epoch": 0.87, + "learning_rate": 1.017228894658262e-05, + "loss": 1.03, + "step": 29125 + }, + { + "epoch": 0.87, + "learning_rate": 1.0149400308338209e-05, + "loss": 0.9777, + "step": 29130 + }, + { + "epoch": 0.87, + "learning_rate": 1.0126536073216863e-05, + "loss": 1.0084, + "step": 29135 + }, + { + "epoch": 0.87, + "learning_rate": 1.0103696247428463e-05, + "loss": 1.1209, + "step": 29140 + }, + { + "epoch": 0.87, + "learning_rate": 1.0080880837176198e-05, + "loss": 1.1022, + "step": 29145 + }, + { + "epoch": 0.87, + "learning_rate": 1.0058089848656694e-05, + "loss": 1.0183, + "step": 29150 + }, + { + "epoch": 0.87, + "learning_rate": 1.003532328805986e-05, + "loss": 1.0363, + "step": 29155 + }, + { + "epoch": 0.87, + "learning_rate": 1.0012581161569067e-05, + "loss": 1.0147, + "step": 29160 + }, + { + "epoch": 0.87, + "learning_rate": 9.989863475360961e-06, + "loss": 0.9523, + "step": 29165 + }, + { + "epoch": 0.87, + "learning_rate": 9.967170235605616e-06, + "loss": 1.0281, + "step": 29170 + }, + { + "epoch": 0.87, + "learning_rate": 9.944501448466426e-06, + "loss": 1.0638, + "step": 29175 + }, + { + "epoch": 0.87, + "learning_rate": 9.921857120100174e-06, + "loss": 1.0431, + "step": 29180 + }, + { + "epoch": 0.87, + "learning_rate": 9.899237256656958e-06, + "loss": 1.0272, + "step": 29185 + }, + { + "epoch": 0.87, + "learning_rate": 9.87664186428029e-06, + "loss": 1.023, + "step": 29190 + }, + { + "epoch": 0.87, + "learning_rate": 9.854070949106964e-06, + "loss": 1.0809, + "step": 29195 + }, + { + "epoch": 0.87, + "learning_rate": 9.831524517267188e-06, + "loss": 1.0156, + "step": 29200 + }, + { + "epoch": 0.87, + "learning_rate": 9.809002574884518e-06, + "loss": 1.0842, + "step": 29205 + }, + { + "epoch": 0.87, + "learning_rate": 9.786505128075817e-06, + "loss": 0.9836, + "step": 29210 + }, + { + "epoch": 0.87, + "learning_rate": 9.7640321829513e-06, + "loss": 0.9605, + "step": 29215 + }, + { + "epoch": 0.87, + "learning_rate": 9.741583745614557e-06, + "loss": 1.0973, + "step": 29220 + }, + { + "epoch": 0.87, + "learning_rate": 9.719159822162538e-06, + "loss": 1.072, + "step": 29225 + }, + { + "epoch": 0.87, + "learning_rate": 9.69676041868547e-06, + "loss": 1.0483, + "step": 29230 + }, + { + "epoch": 0.87, + "learning_rate": 9.674385541266951e-06, + "loss": 1.0286, + "step": 29235 + }, + { + "epoch": 0.87, + "learning_rate": 9.652035195983943e-06, + "loss": 0.9802, + "step": 29240 + }, + { + "epoch": 0.87, + "learning_rate": 9.629709388906728e-06, + "loss": 1.0628, + "step": 29245 + }, + { + "epoch": 0.87, + "learning_rate": 9.607408126098927e-06, + "loss": 0.9956, + "step": 29250 + }, + { + "epoch": 0.87, + "learning_rate": 9.585131413617442e-06, + "loss": 1.0854, + "step": 29255 + }, + { + "epoch": 0.87, + "learning_rate": 9.562879257512602e-06, + "loss": 1.0574, + "step": 29260 + }, + { + "epoch": 0.87, + "learning_rate": 9.540651663828004e-06, + "loss": 1.0655, + "step": 29265 + }, + { + "epoch": 0.87, + "learning_rate": 9.518448638600585e-06, + "loss": 1.0279, + "step": 29270 + }, + { + "epoch": 0.87, + "learning_rate": 9.496270187860622e-06, + "loss": 1.0302, + "step": 29275 + }, + { + "epoch": 0.87, + "learning_rate": 9.474116317631687e-06, + "loss": 1.0097, + "step": 29280 + }, + { + "epoch": 0.87, + "learning_rate": 9.451987033930732e-06, + "loss": 1.039, + "step": 29285 + }, + { + "epoch": 0.87, + "learning_rate": 9.429882342767949e-06, + "loss": 1.0546, + "step": 29290 + }, + { + "epoch": 0.87, + "learning_rate": 9.407802250146946e-06, + "loss": 0.9941, + "step": 29295 + }, + { + "epoch": 0.87, + "learning_rate": 9.385746762064573e-06, + "loss": 0.979, + "step": 29300 + }, + { + "epoch": 0.88, + "learning_rate": 9.363715884511049e-06, + "loss": 1.0566, + "step": 29305 + }, + { + "epoch": 0.88, + "learning_rate": 9.341709623469851e-06, + "loss": 0.973, + "step": 29310 + }, + { + "epoch": 0.88, + "learning_rate": 9.319727984917848e-06, + "loss": 1.0016, + "step": 29315 + }, + { + "epoch": 0.88, + "learning_rate": 9.297770974825149e-06, + "loss": 0.9822, + "step": 29320 + }, + { + "epoch": 0.88, + "learning_rate": 9.275838599155217e-06, + "loss": 1.0752, + "step": 29325 + }, + { + "epoch": 0.88, + "learning_rate": 9.253930863864813e-06, + "loss": 1.0631, + "step": 29330 + }, + { + "epoch": 0.88, + "learning_rate": 9.232047774904007e-06, + "loss": 1.0858, + "step": 29335 + }, + { + "epoch": 0.88, + "learning_rate": 9.210189338216158e-06, + "loss": 1.0597, + "step": 29340 + }, + { + "epoch": 0.88, + "learning_rate": 9.188355559737949e-06, + "loss": 1.0116, + "step": 29345 + }, + { + "epoch": 0.88, + "learning_rate": 9.166546445399394e-06, + "loss": 1.0643, + "step": 29350 + }, + { + "epoch": 0.88, + "learning_rate": 9.144762001123742e-06, + "loss": 1.0158, + "step": 29355 + }, + { + "epoch": 0.88, + "learning_rate": 9.123002232827571e-06, + "loss": 1.1442, + "step": 29360 + }, + { + "epoch": 0.88, + "learning_rate": 9.101267146420767e-06, + "loss": 0.9952, + "step": 29365 + }, + { + "epoch": 0.88, + "learning_rate": 9.079556747806527e-06, + "loss": 1.009, + "step": 29370 + }, + { + "epoch": 0.88, + "learning_rate": 9.057871042881317e-06, + "loss": 1.0316, + "step": 29375 + }, + { + "epoch": 0.88, + "learning_rate": 9.036210037534875e-06, + "loss": 1.0294, + "step": 29380 + }, + { + "epoch": 0.88, + "learning_rate": 9.014573737650278e-06, + "loss": 0.9652, + "step": 29385 + }, + { + "epoch": 0.88, + "learning_rate": 8.99296214910389e-06, + "loss": 0.9561, + "step": 29390 + }, + { + "epoch": 0.88, + "learning_rate": 8.971375277765326e-06, + "loss": 1.0152, + "step": 29395 + }, + { + "epoch": 0.88, + "learning_rate": 8.949813129497464e-06, + "loss": 1.025, + "step": 29400 + }, + { + "epoch": 0.88, + "learning_rate": 8.928275710156608e-06, + "loss": 1.1015, + "step": 29405 + }, + { + "epoch": 0.88, + "learning_rate": 8.90676302559219e-06, + "loss": 1.0681, + "step": 29410 + }, + { + "epoch": 0.88, + "learning_rate": 8.885275081646982e-06, + "loss": 0.972, + "step": 29415 + }, + { + "epoch": 0.88, + "learning_rate": 8.863811884157058e-06, + "loss": 1.0367, + "step": 29420 + }, + { + "epoch": 0.88, + "learning_rate": 8.842373438951735e-06, + "loss": 1.0551, + "step": 29425 + }, + { + "epoch": 0.88, + "learning_rate": 8.82095975185364e-06, + "loss": 0.9054, + "step": 29430 + }, + { + "epoch": 0.88, + "learning_rate": 8.799570828678627e-06, + "loss": 1.0166, + "step": 29435 + }, + { + "epoch": 0.88, + "learning_rate": 8.7782066752359e-06, + "loss": 1.0402, + "step": 29440 + }, + { + "epoch": 0.88, + "learning_rate": 8.756867297327842e-06, + "loss": 1.0522, + "step": 29445 + }, + { + "epoch": 0.88, + "learning_rate": 8.735552700750215e-06, + "loss": 1.0613, + "step": 29450 + }, + { + "epoch": 0.88, + "learning_rate": 8.714262891291935e-06, + "loss": 1.015, + "step": 29455 + }, + { + "epoch": 0.88, + "learning_rate": 8.692997874735287e-06, + "loss": 1.0503, + "step": 29460 + }, + { + "epoch": 0.88, + "learning_rate": 8.671757656855751e-06, + "loss": 1.0676, + "step": 29465 + }, + { + "epoch": 0.88, + "learning_rate": 8.650542243422122e-06, + "loss": 0.9613, + "step": 29470 + }, + { + "epoch": 0.88, + "learning_rate": 8.629351640196404e-06, + "loss": 1.0183, + "step": 29475 + }, + { + "epoch": 0.88, + "learning_rate": 8.608185852933925e-06, + "loss": 1.0021, + "step": 29480 + }, + { + "epoch": 0.88, + "learning_rate": 8.587044887383221e-06, + "loss": 0.9626, + "step": 29485 + }, + { + "epoch": 0.88, + "learning_rate": 8.565928749286111e-06, + "loss": 0.9981, + "step": 29490 + }, + { + "epoch": 0.88, + "learning_rate": 8.544837444377695e-06, + "loss": 1.037, + "step": 29495 + }, + { + "epoch": 0.88, + "learning_rate": 8.523770978386281e-06, + "loss": 1.0432, + "step": 29500 + }, + { + "epoch": 0.88, + "learning_rate": 8.502729357033434e-06, + "loss": 0.9933, + "step": 29505 + }, + { + "epoch": 0.88, + "learning_rate": 8.481712586033997e-06, + "loss": 0.9996, + "step": 29510 + }, + { + "epoch": 0.88, + "learning_rate": 8.46072067109609e-06, + "loss": 1.0697, + "step": 29515 + }, + { + "epoch": 0.88, + "learning_rate": 8.43975361792101e-06, + "loss": 1.0159, + "step": 29520 + }, + { + "epoch": 0.88, + "learning_rate": 8.418811432203322e-06, + "loss": 1.0305, + "step": 29525 + }, + { + "epoch": 0.88, + "learning_rate": 8.39789411963089e-06, + "loss": 1.0169, + "step": 29530 + }, + { + "epoch": 0.88, + "learning_rate": 8.377001685884789e-06, + "loss": 1.0061, + "step": 29535 + }, + { + "epoch": 0.88, + "learning_rate": 8.356134136639326e-06, + "loss": 1.0258, + "step": 29540 + }, + { + "epoch": 0.88, + "learning_rate": 8.335291477562012e-06, + "loss": 1.053, + "step": 29545 + }, + { + "epoch": 0.88, + "learning_rate": 8.314473714313719e-06, + "loss": 1.0076, + "step": 29550 + }, + { + "epoch": 0.88, + "learning_rate": 8.293680852548447e-06, + "loss": 1.0291, + "step": 29555 + }, + { + "epoch": 0.88, + "learning_rate": 8.272912897913443e-06, + "loss": 0.9826, + "step": 29560 + }, + { + "epoch": 0.88, + "learning_rate": 8.252169856049253e-06, + "loss": 1.0395, + "step": 29565 + }, + { + "epoch": 0.88, + "learning_rate": 8.231451732589613e-06, + "loss": 1.0104, + "step": 29570 + }, + { + "epoch": 0.88, + "learning_rate": 8.210758533161488e-06, + "loss": 0.9514, + "step": 29575 + }, + { + "epoch": 0.88, + "learning_rate": 8.190090263385063e-06, + "loss": 1.016, + "step": 29580 + }, + { + "epoch": 0.88, + "learning_rate": 8.169446928873792e-06, + "loss": 1.006, + "step": 29585 + }, + { + "epoch": 0.88, + "learning_rate": 8.148828535234353e-06, + "loss": 0.9663, + "step": 29590 + }, + { + "epoch": 0.88, + "learning_rate": 8.128235088066605e-06, + "loss": 0.9892, + "step": 29595 + }, + { + "epoch": 0.88, + "learning_rate": 8.107666592963659e-06, + "loss": 0.9732, + "step": 29600 + }, + { + "epoch": 0.88, + "learning_rate": 8.08712305551187e-06, + "loss": 1.0466, + "step": 29605 + }, + { + "epoch": 0.88, + "learning_rate": 8.066604481290762e-06, + "loss": 0.9989, + "step": 29610 + }, + { + "epoch": 0.88, + "learning_rate": 8.046110875873159e-06, + "loss": 1.0269, + "step": 29615 + }, + { + "epoch": 0.88, + "learning_rate": 8.025642244825005e-06, + "loss": 0.9953, + "step": 29620 + }, + { + "epoch": 0.88, + "learning_rate": 8.005198593705554e-06, + "loss": 1.0127, + "step": 29625 + }, + { + "epoch": 0.88, + "learning_rate": 7.984779928067198e-06, + "loss": 1.0516, + "step": 29630 + }, + { + "epoch": 0.88, + "learning_rate": 7.964386253455591e-06, + "loss": 0.9699, + "step": 29635 + }, + { + "epoch": 0.89, + "learning_rate": 7.944017575409612e-06, + "loss": 1.0953, + "step": 29640 + }, + { + "epoch": 0.89, + "learning_rate": 7.923673899461303e-06, + "loss": 1.0265, + "step": 29645 + }, + { + "epoch": 0.89, + "learning_rate": 7.903355231135923e-06, + "loss": 1.071, + "step": 29650 + }, + { + "epoch": 0.89, + "learning_rate": 7.883061575951978e-06, + "loss": 1.0221, + "step": 29655 + }, + { + "epoch": 0.89, + "learning_rate": 7.86279293942116e-06, + "loss": 1.0335, + "step": 29660 + }, + { + "epoch": 0.89, + "learning_rate": 7.842549327048365e-06, + "loss": 1.008, + "step": 29665 + }, + { + "epoch": 0.89, + "learning_rate": 7.822330744331653e-06, + "loss": 1.0719, + "step": 29670 + }, + { + "epoch": 0.89, + "learning_rate": 7.80213719676235e-06, + "loss": 0.998, + "step": 29675 + }, + { + "epoch": 0.89, + "learning_rate": 7.781968689824981e-06, + "loss": 1.0141, + "step": 29680 + }, + { + "epoch": 0.89, + "learning_rate": 7.761825228997222e-06, + "loss": 1.0259, + "step": 29685 + }, + { + "epoch": 0.89, + "learning_rate": 7.741706819749928e-06, + "loss": 1.0285, + "step": 29690 + }, + { + "epoch": 0.89, + "learning_rate": 7.721613467547274e-06, + "loss": 0.9429, + "step": 29695 + }, + { + "epoch": 0.89, + "learning_rate": 7.701545177846492e-06, + "loss": 1.0391, + "step": 29700 + }, + { + "epoch": 0.89, + "learning_rate": 7.68150195609807e-06, + "loss": 0.9707, + "step": 29705 + }, + { + "epoch": 0.89, + "learning_rate": 7.661483807745683e-06, + "loss": 1.014, + "step": 29710 + }, + { + "epoch": 0.89, + "learning_rate": 7.641490738226221e-06, + "loss": 1.0049, + "step": 29715 + }, + { + "epoch": 0.89, + "learning_rate": 7.621522752969701e-06, + "loss": 1.1093, + "step": 29720 + }, + { + "epoch": 0.89, + "learning_rate": 7.601579857399354e-06, + "loss": 1.125, + "step": 29725 + }, + { + "epoch": 0.89, + "learning_rate": 7.581662056931626e-06, + "loss": 1.0378, + "step": 29730 + }, + { + "epoch": 0.89, + "learning_rate": 7.561769356976145e-06, + "loss": 1.0284, + "step": 29735 + }, + { + "epoch": 0.89, + "learning_rate": 7.541901762935666e-06, + "loss": 1.0965, + "step": 29740 + }, + { + "epoch": 0.89, + "learning_rate": 7.522059280206151e-06, + "loss": 1.1078, + "step": 29745 + }, + { + "epoch": 0.89, + "learning_rate": 7.502241914176789e-06, + "loss": 1.1361, + "step": 29750 + }, + { + "epoch": 0.89, + "learning_rate": 7.482449670229896e-06, + "loss": 1.0439, + "step": 29755 + }, + { + "epoch": 0.89, + "learning_rate": 7.462682553740985e-06, + "loss": 0.986, + "step": 29760 + }, + { + "epoch": 0.89, + "learning_rate": 7.4429405700787155e-06, + "loss": 1.0304, + "step": 29765 + }, + { + "epoch": 0.89, + "learning_rate": 7.423223724604967e-06, + "loss": 1.0031, + "step": 29770 + }, + { + "epoch": 0.89, + "learning_rate": 7.403532022674764e-06, + "loss": 0.9565, + "step": 29775 + }, + { + "epoch": 0.89, + "learning_rate": 7.383865469636298e-06, + "loss": 1.036, + "step": 29780 + }, + { + "epoch": 0.89, + "learning_rate": 7.3642240708309715e-06, + "loss": 1.0881, + "step": 29785 + }, + { + "epoch": 0.89, + "learning_rate": 7.3446078315932755e-06, + "loss": 1.0737, + "step": 29790 + }, + { + "epoch": 0.89, + "learning_rate": 7.325016757250946e-06, + "loss": 0.9994, + "step": 29795 + }, + { + "epoch": 0.89, + "learning_rate": 7.30545085312484e-06, + "loss": 1.1036, + "step": 29800 + }, + { + "epoch": 0.89, + "learning_rate": 7.285910124529016e-06, + "loss": 1.0143, + "step": 29805 + }, + { + "epoch": 0.89, + "learning_rate": 7.266394576770641e-06, + "loss": 0.9984, + "step": 29810 + }, + { + "epoch": 0.89, + "learning_rate": 7.2469042151500745e-06, + "loss": 1.0046, + "step": 29815 + }, + { + "epoch": 0.89, + "learning_rate": 7.227439044960838e-06, + "loss": 0.9868, + "step": 29820 + }, + { + "epoch": 0.89, + "learning_rate": 7.207999071489635e-06, + "loss": 0.9912, + "step": 29825 + }, + { + "epoch": 0.89, + "learning_rate": 7.188584300016266e-06, + "loss": 0.9825, + "step": 29830 + }, + { + "epoch": 0.89, + "learning_rate": 7.1691947358137005e-06, + "loss": 1.0695, + "step": 29835 + }, + { + "epoch": 0.89, + "learning_rate": 7.149830384148137e-06, + "loss": 0.9898, + "step": 29840 + }, + { + "epoch": 0.89, + "learning_rate": 7.1304912502788365e-06, + "loss": 0.9602, + "step": 29845 + }, + { + "epoch": 0.89, + "learning_rate": 7.11117733945823e-06, + "loss": 1.0741, + "step": 29850 + }, + { + "epoch": 0.89, + "learning_rate": 7.091888656931922e-06, + "loss": 0.9869, + "step": 29855 + }, + { + "epoch": 0.89, + "learning_rate": 7.072625207938665e-06, + "loss": 1.0476, + "step": 29860 + }, + { + "epoch": 0.89, + "learning_rate": 7.053386997710354e-06, + "loss": 0.9677, + "step": 29865 + }, + { + "epoch": 0.89, + "learning_rate": 7.034174031471985e-06, + "loss": 0.9455, + "step": 29870 + }, + { + "epoch": 0.89, + "learning_rate": 7.014986314441762e-06, + "loss": 0.9775, + "step": 29875 + }, + { + "epoch": 0.89, + "learning_rate": 6.995823851831007e-06, + "loss": 1.0326, + "step": 29880 + }, + { + "epoch": 0.89, + "learning_rate": 6.976686648844188e-06, + "loss": 1.0552, + "step": 29885 + }, + { + "epoch": 0.89, + "learning_rate": 6.9575747106788715e-06, + "loss": 1.0421, + "step": 29890 + }, + { + "epoch": 0.89, + "learning_rate": 6.9384880425258255e-06, + "loss": 1.0043, + "step": 29895 + }, + { + "epoch": 0.89, + "learning_rate": 6.919426649568927e-06, + "loss": 0.9058, + "step": 29900 + }, + { + "epoch": 0.89, + "learning_rate": 6.900390536985179e-06, + "loss": 0.9703, + "step": 29905 + }, + { + "epoch": 0.89, + "learning_rate": 6.881379709944713e-06, + "loss": 1.0117, + "step": 29910 + }, + { + "epoch": 0.89, + "learning_rate": 6.862394173610809e-06, + "loss": 1.0065, + "step": 29915 + }, + { + "epoch": 0.89, + "learning_rate": 6.843433933139909e-06, + "loss": 1.0246, + "step": 29920 + }, + { + "epoch": 0.89, + "learning_rate": 6.8244989936815054e-06, + "loss": 1.0314, + "step": 29925 + }, + { + "epoch": 0.89, + "learning_rate": 6.805589360378306e-06, + "loss": 1.0423, + "step": 29930 + }, + { + "epoch": 0.89, + "learning_rate": 6.786705038366059e-06, + "loss": 1.0474, + "step": 29935 + }, + { + "epoch": 0.89, + "learning_rate": 6.767846032773728e-06, + "loss": 1.0135, + "step": 29940 + }, + { + "epoch": 0.89, + "learning_rate": 6.749012348723327e-06, + "loss": 1.0369, + "step": 29945 + }, + { + "epoch": 0.89, + "learning_rate": 6.7302039913300304e-06, + "loss": 1.0016, + "step": 29950 + }, + { + "epoch": 0.89, + "learning_rate": 6.7114209657021195e-06, + "loss": 1.0689, + "step": 29955 + }, + { + "epoch": 0.89, + "learning_rate": 6.6926632769410245e-06, + "loss": 1.0176, + "step": 29960 + }, + { + "epoch": 0.89, + "learning_rate": 6.673930930141237e-06, + "loss": 0.9772, + "step": 29965 + }, + { + "epoch": 0.89, + "learning_rate": 6.655223930390431e-06, + "loss": 1.0156, + "step": 29970 + }, + { + "epoch": 0.9, + "learning_rate": 6.636542282769343e-06, + "loss": 0.9712, + "step": 29975 + }, + { + "epoch": 0.9, + "learning_rate": 6.617885992351846e-06, + "loss": 0.9406, + "step": 29980 + }, + { + "epoch": 0.9, + "learning_rate": 6.599255064204946e-06, + "loss": 1.0461, + "step": 29985 + }, + { + "epoch": 0.9, + "learning_rate": 6.580649503388736e-06, + "loss": 1.0436, + "step": 29990 + }, + { + "epoch": 0.9, + "learning_rate": 6.562069314956409e-06, + "loss": 1.1097, + "step": 29995 + }, + { + "epoch": 0.9, + "learning_rate": 6.543514503954273e-06, + "loss": 0.9697, + "step": 30000 + }, + { + "epoch": 0.9, + "learning_rate": 6.5249850754217965e-06, + "loss": 1.0786, + "step": 30005 + }, + { + "epoch": 0.9, + "learning_rate": 6.5064810343914875e-06, + "loss": 1.0166, + "step": 30010 + }, + { + "epoch": 0.9, + "learning_rate": 6.488002385888959e-06, + "loss": 1.0406, + "step": 30015 + }, + { + "epoch": 0.9, + "learning_rate": 6.469549134932973e-06, + "loss": 0.9545, + "step": 30020 + }, + { + "epoch": 0.9, + "learning_rate": 6.451121286535378e-06, + "loss": 1.0461, + "step": 30025 + }, + { + "epoch": 0.9, + "learning_rate": 6.432718845701114e-06, + "loss": 1.0034, + "step": 30030 + }, + { + "epoch": 0.9, + "learning_rate": 6.414341817428193e-06, + "loss": 1.0189, + "step": 30035 + }, + { + "epoch": 0.9, + "learning_rate": 6.39599020670778e-06, + "loss": 1.0154, + "step": 30040 + }, + { + "epoch": 0.9, + "learning_rate": 6.377664018524121e-06, + "loss": 0.9909, + "step": 30045 + }, + { + "epoch": 0.9, + "learning_rate": 6.359363257854534e-06, + "loss": 0.9915, + "step": 30050 + }, + { + "epoch": 0.9, + "learning_rate": 6.34108792966942e-06, + "loss": 0.9411, + "step": 30055 + }, + { + "epoch": 0.9, + "learning_rate": 6.3228380389323325e-06, + "loss": 1.0642, + "step": 30060 + }, + { + "epoch": 0.9, + "learning_rate": 6.304613590599884e-06, + "loss": 0.9811, + "step": 30065 + }, + { + "epoch": 0.9, + "learning_rate": 6.2864145896217475e-06, + "loss": 1.1242, + "step": 30070 + }, + { + "epoch": 0.9, + "learning_rate": 6.268241040940737e-06, + "loss": 1.0141, + "step": 30075 + }, + { + "epoch": 0.9, + "learning_rate": 6.250092949492692e-06, + "loss": 1.0918, + "step": 30080 + }, + { + "epoch": 0.9, + "learning_rate": 6.231970320206615e-06, + "loss": 0.9903, + "step": 30085 + }, + { + "epoch": 0.9, + "learning_rate": 6.213873158004524e-06, + "loss": 1.0144, + "step": 30090 + }, + { + "epoch": 0.9, + "learning_rate": 6.1958014678015655e-06, + "loss": 1.0565, + "step": 30095 + }, + { + "epoch": 0.9, + "learning_rate": 6.1777552545059235e-06, + "loss": 1.0393, + "step": 30100 + }, + { + "epoch": 0.9, + "learning_rate": 6.159734523018923e-06, + "loss": 1.0371, + "step": 30105 + }, + { + "epoch": 0.9, + "learning_rate": 6.141739278234904e-06, + "loss": 0.9804, + "step": 30110 + }, + { + "epoch": 0.9, + "learning_rate": 6.123769525041345e-06, + "loss": 1.0075, + "step": 30115 + }, + { + "epoch": 0.9, + "learning_rate": 6.105825268318743e-06, + "loss": 1.0249, + "step": 30120 + }, + { + "epoch": 0.9, + "learning_rate": 6.0879065129407e-06, + "loss": 0.9814, + "step": 30125 + }, + { + "epoch": 0.9, + "learning_rate": 6.070013263773921e-06, + "loss": 0.9834, + "step": 30130 + }, + { + "epoch": 0.9, + "learning_rate": 6.052145525678144e-06, + "loss": 1.0159, + "step": 30135 + }, + { + "epoch": 0.9, + "learning_rate": 6.03430330350615e-06, + "loss": 1.0173, + "step": 30140 + }, + { + "epoch": 0.9, + "learning_rate": 6.016486602103855e-06, + "loss": 0.9885, + "step": 30145 + }, + { + "epoch": 0.9, + "learning_rate": 5.998695426310219e-06, + "loss": 1.0852, + "step": 30150 + }, + { + "epoch": 0.9, + "learning_rate": 5.980929780957267e-06, + "loss": 0.9527, + "step": 30155 + }, + { + "epoch": 0.9, + "learning_rate": 5.963189670870073e-06, + "loss": 1.0285, + "step": 30160 + }, + { + "epoch": 0.9, + "learning_rate": 5.945475100866793e-06, + "loss": 1.0206, + "step": 30165 + }, + { + "epoch": 0.9, + "learning_rate": 5.927786075758668e-06, + "loss": 0.97, + "step": 30170 + }, + { + "epoch": 0.9, + "learning_rate": 5.910122600349965e-06, + "loss": 0.9686, + "step": 30175 + }, + { + "epoch": 0.9, + "learning_rate": 5.892484679438004e-06, + "loss": 1.0289, + "step": 30180 + }, + { + "epoch": 0.9, + "learning_rate": 5.874872317813218e-06, + "loss": 0.9962, + "step": 30185 + }, + { + "epoch": 0.9, + "learning_rate": 5.857285520259059e-06, + "loss": 1.0703, + "step": 30190 + }, + { + "epoch": 0.9, + "learning_rate": 5.839724291552018e-06, + "loss": 0.9323, + "step": 30195 + }, + { + "epoch": 0.9, + "learning_rate": 5.8221886364616895e-06, + "loss": 0.963, + "step": 30200 + }, + { + "epoch": 0.9, + "learning_rate": 5.8046785597507096e-06, + "loss": 1.0458, + "step": 30205 + }, + { + "epoch": 0.9, + "learning_rate": 5.787194066174739e-06, + "loss": 1.0356, + "step": 30210 + }, + { + "epoch": 0.9, + "learning_rate": 5.7697351604824925e-06, + "loss": 1.0753, + "step": 30215 + }, + { + "epoch": 0.9, + "learning_rate": 5.752301847415775e-06, + "loss": 0.9927, + "step": 30220 + }, + { + "epoch": 0.9, + "learning_rate": 5.734894131709401e-06, + "loss": 0.9767, + "step": 30225 + }, + { + "epoch": 0.9, + "learning_rate": 5.7175120180912755e-06, + "loss": 1.0294, + "step": 30230 + }, + { + "epoch": 0.9, + "learning_rate": 5.700155511282279e-06, + "loss": 1.0514, + "step": 30235 + }, + { + "epoch": 0.9, + "learning_rate": 5.682824615996418e-06, + "loss": 1.0034, + "step": 30240 + }, + { + "epoch": 0.9, + "learning_rate": 5.6655193369406835e-06, + "loss": 1.0552, + "step": 30245 + }, + { + "epoch": 0.9, + "learning_rate": 5.64823967881516e-06, + "loss": 0.9603, + "step": 30250 + }, + { + "epoch": 0.9, + "learning_rate": 5.630985646312903e-06, + "loss": 1.0037, + "step": 30255 + }, + { + "epoch": 0.9, + "learning_rate": 5.613757244120088e-06, + "loss": 1.111, + "step": 30260 + }, + { + "epoch": 0.9, + "learning_rate": 5.596554476915861e-06, + "loss": 1.068, + "step": 30265 + }, + { + "epoch": 0.9, + "learning_rate": 5.57937734937245e-06, + "loss": 0.9845, + "step": 30270 + }, + { + "epoch": 0.9, + "learning_rate": 5.562225866155124e-06, + "loss": 1.0597, + "step": 30275 + }, + { + "epoch": 0.9, + "learning_rate": 5.5451000319221456e-06, + "loss": 0.9797, + "step": 30280 + }, + { + "epoch": 0.9, + "learning_rate": 5.527999851324839e-06, + "loss": 1.0561, + "step": 30285 + }, + { + "epoch": 0.9, + "learning_rate": 5.510925329007544e-06, + "loss": 0.9576, + "step": 30290 + }, + { + "epoch": 0.9, + "learning_rate": 5.4938764696076726e-06, + "loss": 1.0983, + "step": 30295 + }, + { + "epoch": 0.9, + "learning_rate": 5.476853277755634e-06, + "loss": 1.0115, + "step": 30300 + }, + { + "epoch": 0.9, + "learning_rate": 5.459855758074839e-06, + "loss": 0.9431, + "step": 30305 + }, + { + "epoch": 0.91, + "learning_rate": 5.4428839151817735e-06, + "loss": 1.0721, + "step": 30310 + }, + { + "epoch": 0.91, + "learning_rate": 5.42593775368595e-06, + "loss": 1.0002, + "step": 30315 + }, + { + "epoch": 0.91, + "learning_rate": 5.409017278189876e-06, + "loss": 0.9735, + "step": 30320 + }, + { + "epoch": 0.91, + "learning_rate": 5.392122493289076e-06, + "loss": 0.9894, + "step": 30325 + }, + { + "epoch": 0.91, + "learning_rate": 5.375253403572167e-06, + "loss": 1.0444, + "step": 30330 + }, + { + "epoch": 0.91, + "learning_rate": 5.3584100136206965e-06, + "loss": 0.9873, + "step": 30335 + }, + { + "epoch": 0.91, + "learning_rate": 5.341592328009282e-06, + "loss": 1.0287, + "step": 30340 + }, + { + "epoch": 0.91, + "learning_rate": 5.324800351305548e-06, + "loss": 1.0381, + "step": 30345 + }, + { + "epoch": 0.91, + "learning_rate": 5.308034088070157e-06, + "loss": 0.9822, + "step": 30350 + }, + { + "epoch": 0.91, + "learning_rate": 5.291293542856768e-06, + "loss": 1.0641, + "step": 30355 + }, + { + "epoch": 0.91, + "learning_rate": 5.27457872021202e-06, + "loss": 1.0431, + "step": 30360 + }, + { + "epoch": 0.91, + "learning_rate": 5.257889624675638e-06, + "loss": 1.0115, + "step": 30365 + }, + { + "epoch": 0.91, + "learning_rate": 5.24122626078033e-06, + "loss": 1.0226, + "step": 30370 + }, + { + "epoch": 0.91, + "learning_rate": 5.2245886330517966e-06, + "loss": 0.9593, + "step": 30375 + }, + { + "epoch": 0.91, + "learning_rate": 5.207976746008758e-06, + "loss": 0.9483, + "step": 30380 + }, + { + "epoch": 0.91, + "learning_rate": 5.191390604162938e-06, + "loss": 1.0203, + "step": 30385 + }, + { + "epoch": 0.91, + "learning_rate": 5.174830212019111e-06, + "loss": 1.0085, + "step": 30390 + }, + { + "epoch": 0.91, + "learning_rate": 5.158295574075e-06, + "loss": 1.0406, + "step": 30395 + }, + { + "epoch": 0.91, + "learning_rate": 5.14178669482136e-06, + "loss": 0.9713, + "step": 30400 + }, + { + "epoch": 0.91, + "learning_rate": 5.1253035787419356e-06, + "loss": 1.0611, + "step": 30405 + }, + { + "epoch": 0.91, + "learning_rate": 5.1088462303135245e-06, + "loss": 0.9981, + "step": 30410 + }, + { + "epoch": 0.91, + "learning_rate": 5.092414654005839e-06, + "loss": 0.9874, + "step": 30415 + }, + { + "epoch": 0.91, + "learning_rate": 5.076008854281689e-06, + "loss": 1.0721, + "step": 30420 + }, + { + "epoch": 0.91, + "learning_rate": 5.059628835596808e-06, + "loss": 1.0144, + "step": 30425 + }, + { + "epoch": 0.91, + "learning_rate": 5.043274602399939e-06, + "loss": 1.0652, + "step": 30430 + }, + { + "epoch": 0.91, + "learning_rate": 5.026946159132851e-06, + "loss": 1.026, + "step": 30435 + }, + { + "epoch": 0.91, + "learning_rate": 5.010643510230317e-06, + "loss": 1.0192, + "step": 30440 + }, + { + "epoch": 0.91, + "learning_rate": 4.994366660120064e-06, + "loss": 1.0525, + "step": 30445 + }, + { + "epoch": 0.91, + "learning_rate": 4.978115613222823e-06, + "loss": 1.063, + "step": 30450 + }, + { + "epoch": 0.91, + "learning_rate": 4.961890373952327e-06, + "loss": 1.0349, + "step": 30455 + }, + { + "epoch": 0.91, + "learning_rate": 4.945690946715309e-06, + "loss": 1.0278, + "step": 30460 + }, + { + "epoch": 0.91, + "learning_rate": 4.929517335911482e-06, + "loss": 1.0273, + "step": 30465 + }, + { + "epoch": 0.91, + "learning_rate": 4.913369545933499e-06, + "loss": 0.9807, + "step": 30470 + }, + { + "epoch": 0.91, + "learning_rate": 4.897247581167108e-06, + "loss": 1.0034, + "step": 30475 + }, + { + "epoch": 0.91, + "learning_rate": 4.8811514459909505e-06, + "loss": 0.9906, + "step": 30480 + }, + { + "epoch": 0.91, + "learning_rate": 4.865081144776684e-06, + "loss": 1.0638, + "step": 30485 + }, + { + "epoch": 0.91, + "learning_rate": 4.849036681888941e-06, + "loss": 1.0657, + "step": 30490 + }, + { + "epoch": 0.91, + "learning_rate": 4.8330180616853686e-06, + "loss": 1.0897, + "step": 30495 + }, + { + "epoch": 0.91, + "learning_rate": 4.817025288516563e-06, + "loss": 1.0893, + "step": 30500 + }, + { + "epoch": 0.91, + "learning_rate": 4.8010583667260856e-06, + "loss": 1.0758, + "step": 30505 + }, + { + "epoch": 0.91, + "learning_rate": 4.78511730065051e-06, + "loss": 1.0609, + "step": 30510 + }, + { + "epoch": 0.91, + "learning_rate": 4.7692020946193964e-06, + "loss": 1.0287, + "step": 30515 + }, + { + "epoch": 0.91, + "learning_rate": 4.753312752955252e-06, + "loss": 1.0499, + "step": 30520 + }, + { + "epoch": 0.91, + "learning_rate": 4.7374492799735385e-06, + "loss": 1.009, + "step": 30525 + }, + { + "epoch": 0.91, + "learning_rate": 4.721611679982763e-06, + "loss": 1.0475, + "step": 30530 + }, + { + "epoch": 0.91, + "learning_rate": 4.705799957284351e-06, + "loss": 1.0604, + "step": 30535 + }, + { + "epoch": 0.91, + "learning_rate": 4.690014116172714e-06, + "loss": 0.9854, + "step": 30540 + }, + { + "epoch": 0.91, + "learning_rate": 4.674254160935221e-06, + "loss": 1.012, + "step": 30545 + }, + { + "epoch": 0.91, + "learning_rate": 4.658520095852225e-06, + "loss": 1.0591, + "step": 30550 + }, + { + "epoch": 0.91, + "learning_rate": 4.642811925197077e-06, + "loss": 1.0596, + "step": 30555 + }, + { + "epoch": 0.91, + "learning_rate": 4.627129653236028e-06, + "loss": 0.9995, + "step": 30560 + }, + { + "epoch": 0.91, + "learning_rate": 4.6114732842283515e-06, + "loss": 0.9355, + "step": 30565 + }, + { + "epoch": 0.91, + "learning_rate": 4.595842822426244e-06, + "loss": 1.0731, + "step": 30570 + }, + { + "epoch": 0.91, + "learning_rate": 4.5802382720749215e-06, + "loss": 1.005, + "step": 30575 + }, + { + "epoch": 0.91, + "learning_rate": 4.564659637412483e-06, + "loss": 1.048, + "step": 30580 + }, + { + "epoch": 0.91, + "learning_rate": 4.5491069226700764e-06, + "loss": 0.9765, + "step": 30585 + }, + { + "epoch": 0.91, + "learning_rate": 4.533580132071736e-06, + "loss": 1.0934, + "step": 30590 + }, + { + "epoch": 0.91, + "learning_rate": 4.518079269834508e-06, + "loss": 1.0305, + "step": 30595 + }, + { + "epoch": 0.91, + "learning_rate": 4.502604340168349e-06, + "loss": 0.9488, + "step": 30600 + }, + { + "epoch": 0.91, + "learning_rate": 4.487155347276228e-06, + "loss": 1.0143, + "step": 30605 + }, + { + "epoch": 0.91, + "learning_rate": 4.471732295354014e-06, + "loss": 0.966, + "step": 30610 + }, + { + "epoch": 0.91, + "learning_rate": 4.456335188590577e-06, + "loss": 1.0043, + "step": 30615 + }, + { + "epoch": 0.91, + "learning_rate": 4.440964031167716e-06, + "loss": 0.9505, + "step": 30620 + }, + { + "epoch": 0.91, + "learning_rate": 4.425618827260181e-06, + "loss": 1.0688, + "step": 30625 + }, + { + "epoch": 0.91, + "learning_rate": 4.41029958103567e-06, + "loss": 1.0354, + "step": 30630 + }, + { + "epoch": 0.91, + "learning_rate": 4.3950062966548465e-06, + "loss": 0.9514, + "step": 30635 + }, + { + "epoch": 0.91, + "learning_rate": 4.37973897827133e-06, + "loss": 1.021, + "step": 30640 + }, + { + "epoch": 0.92, + "learning_rate": 4.364497630031661e-06, + "loss": 1.0498, + "step": 30645 + }, + { + "epoch": 0.92, + "learning_rate": 4.349282256075316e-06, + "loss": 1.028, + "step": 30650 + }, + { + "epoch": 0.92, + "learning_rate": 4.334092860534777e-06, + "loss": 0.9736, + "step": 30655 + }, + { + "epoch": 0.92, + "learning_rate": 4.318929447535425e-06, + "loss": 1.0622, + "step": 30660 + }, + { + "epoch": 0.92, + "learning_rate": 4.303792021195574e-06, + "loss": 1.0702, + "step": 30665 + }, + { + "epoch": 0.92, + "learning_rate": 4.288680585626514e-06, + "loss": 1.0289, + "step": 30670 + }, + { + "epoch": 0.92, + "learning_rate": 4.27359514493244e-06, + "loss": 1.0017, + "step": 30675 + }, + { + "epoch": 0.92, + "learning_rate": 4.258535703210553e-06, + "loss": 1.0773, + "step": 30680 + }, + { + "epoch": 0.92, + "learning_rate": 4.2435022645509025e-06, + "loss": 1.0107, + "step": 30685 + }, + { + "epoch": 0.92, + "learning_rate": 4.228494833036534e-06, + "loss": 1.0461, + "step": 30690 + }, + { + "epoch": 0.92, + "learning_rate": 4.213513412743419e-06, + "loss": 1.0325, + "step": 30695 + }, + { + "epoch": 0.92, + "learning_rate": 4.198558007740461e-06, + "loss": 1.0663, + "step": 30700 + }, + { + "epoch": 0.92, + "learning_rate": 4.183628622089486e-06, + "loss": 0.9985, + "step": 30705 + }, + { + "epoch": 0.92, + "learning_rate": 4.168725259845296e-06, + "loss": 0.9302, + "step": 30710 + }, + { + "epoch": 0.92, + "learning_rate": 4.153847925055554e-06, + "loss": 1.0977, + "step": 30715 + }, + { + "epoch": 0.92, + "learning_rate": 4.138996621760938e-06, + "loss": 1.0143, + "step": 30720 + }, + { + "epoch": 0.92, + "learning_rate": 4.1241713539949656e-06, + "loss": 0.9493, + "step": 30725 + }, + { + "epoch": 0.92, + "learning_rate": 4.1093721257841725e-06, + "loss": 0.9799, + "step": 30730 + }, + { + "epoch": 0.92, + "learning_rate": 4.094598941147954e-06, + "loss": 1.0643, + "step": 30735 + }, + { + "epoch": 0.92, + "learning_rate": 4.079851804098666e-06, + "loss": 1.0517, + "step": 30740 + }, + { + "epoch": 0.92, + "learning_rate": 4.065130718641574e-06, + "loss": 1.0464, + "step": 30745 + }, + { + "epoch": 0.92, + "learning_rate": 4.0504356887748895e-06, + "loss": 1.0213, + "step": 30750 + }, + { + "epoch": 0.92, + "learning_rate": 4.03576671848972e-06, + "loss": 1.0531, + "step": 30755 + }, + { + "epoch": 0.92, + "learning_rate": 4.021123811770111e-06, + "loss": 0.9338, + "step": 30760 + }, + { + "epoch": 0.92, + "learning_rate": 4.006506972593049e-06, + "loss": 1.0024, + "step": 30765 + }, + { + "epoch": 0.92, + "learning_rate": 3.991916204928403e-06, + "loss": 1.0947, + "step": 30770 + }, + { + "epoch": 0.92, + "learning_rate": 3.977351512738958e-06, + "loss": 1.0693, + "step": 30775 + }, + { + "epoch": 0.92, + "learning_rate": 3.962812899980451e-06, + "loss": 0.9643, + "step": 30780 + }, + { + "epoch": 0.92, + "learning_rate": 3.948300370601532e-06, + "loss": 1.0943, + "step": 30785 + }, + { + "epoch": 0.92, + "learning_rate": 3.93381392854375e-06, + "loss": 1.009, + "step": 30790 + }, + { + "epoch": 0.92, + "learning_rate": 3.919353577741558e-06, + "loss": 1.0669, + "step": 30795 + }, + { + "epoch": 0.92, + "learning_rate": 3.904919322122346e-06, + "loss": 1.0138, + "step": 30800 + }, + { + "epoch": 0.92, + "learning_rate": 3.890511165606437e-06, + "loss": 1.1009, + "step": 30805 + }, + { + "epoch": 0.92, + "learning_rate": 3.876129112107008e-06, + "loss": 1.0571, + "step": 30810 + }, + { + "epoch": 0.92, + "learning_rate": 3.861773165530169e-06, + "loss": 1.092, + "step": 30815 + }, + { + "epoch": 0.92, + "learning_rate": 3.84744332977498e-06, + "loss": 0.9928, + "step": 30820 + }, + { + "epoch": 0.92, + "learning_rate": 3.833139608733361e-06, + "loss": 1.078, + "step": 30825 + }, + { + "epoch": 0.92, + "learning_rate": 3.818862006290158e-06, + "loss": 1.0385, + "step": 30830 + }, + { + "epoch": 0.92, + "learning_rate": 3.8046105263231048e-06, + "loss": 1.026, + "step": 30835 + }, + { + "epoch": 0.92, + "learning_rate": 3.7903851727028596e-06, + "loss": 1.0774, + "step": 30840 + }, + { + "epoch": 0.92, + "learning_rate": 3.7761859492929986e-06, + "loss": 1.0394, + "step": 30845 + }, + { + "epoch": 0.92, + "learning_rate": 3.7620128599499706e-06, + "loss": 1.0073, + "step": 30850 + }, + { + "epoch": 0.92, + "learning_rate": 3.7478659085231514e-06, + "loss": 0.9867, + "step": 30855 + }, + { + "epoch": 0.92, + "learning_rate": 3.7337450988547793e-06, + "loss": 1.0024, + "step": 30860 + }, + { + "epoch": 0.92, + "learning_rate": 3.719650434780053e-06, + "loss": 1.0284, + "step": 30865 + }, + { + "epoch": 0.92, + "learning_rate": 3.7055819201269992e-06, + "loss": 1.06, + "step": 30870 + }, + { + "epoch": 0.92, + "learning_rate": 3.691539558716617e-06, + "loss": 1.1062, + "step": 30875 + }, + { + "epoch": 0.92, + "learning_rate": 3.677523354362733e-06, + "loss": 1.0175, + "step": 30880 + }, + { + "epoch": 0.92, + "learning_rate": 3.663533310872136e-06, + "loss": 0.9485, + "step": 30885 + }, + { + "epoch": 0.92, + "learning_rate": 3.649569432044442e-06, + "loss": 1.0013, + "step": 30890 + }, + { + "epoch": 0.92, + "learning_rate": 3.635631721672206e-06, + "loss": 0.9587, + "step": 30895 + }, + { + "epoch": 0.92, + "learning_rate": 3.6217201835408666e-06, + "loss": 0.995, + "step": 30900 + }, + { + "epoch": 0.92, + "learning_rate": 3.607834821428746e-06, + "loss": 0.9713, + "step": 30905 + }, + { + "epoch": 0.92, + "learning_rate": 3.5939756391070722e-06, + "loss": 0.993, + "step": 30910 + }, + { + "epoch": 0.92, + "learning_rate": 3.580142640339956e-06, + "loss": 1.0662, + "step": 30915 + }, + { + "epoch": 0.92, + "learning_rate": 3.5663358288843596e-06, + "loss": 1.0542, + "step": 30920 + }, + { + "epoch": 0.92, + "learning_rate": 3.552555208490205e-06, + "loss": 1.0271, + "step": 30925 + }, + { + "epoch": 0.92, + "learning_rate": 3.5388007829002533e-06, + "loss": 1.1152, + "step": 30930 + }, + { + "epoch": 0.92, + "learning_rate": 3.525072555850151e-06, + "loss": 0.9565, + "step": 30935 + }, + { + "epoch": 0.92, + "learning_rate": 3.5113705310684363e-06, + "loss": 0.9986, + "step": 30940 + }, + { + "epoch": 0.92, + "learning_rate": 3.4976947122765447e-06, + "loss": 1.0819, + "step": 30945 + }, + { + "epoch": 0.92, + "learning_rate": 3.4840451031887933e-06, + "loss": 1.0837, + "step": 30950 + }, + { + "epoch": 0.92, + "learning_rate": 3.4704217075123502e-06, + "loss": 0.9727, + "step": 30955 + }, + { + "epoch": 0.92, + "learning_rate": 3.4568245289472667e-06, + "loss": 1.012, + "step": 30960 + }, + { + "epoch": 0.92, + "learning_rate": 3.443253571186533e-06, + "loss": 1.1164, + "step": 30965 + }, + { + "epoch": 0.92, + "learning_rate": 3.429708837915968e-06, + "loss": 0.9704, + "step": 30970 + }, + { + "epoch": 0.92, + "learning_rate": 3.4161903328142618e-06, + "loss": 1.0039, + "step": 30975 + }, + { + "epoch": 0.93, + "learning_rate": 3.4026980595529777e-06, + "loss": 0.987, + "step": 30980 + }, + { + "epoch": 0.93, + "learning_rate": 3.3892320217966176e-06, + "loss": 0.9569, + "step": 30985 + }, + { + "epoch": 0.93, + "learning_rate": 3.3757922232024898e-06, + "loss": 1.033, + "step": 30990 + }, + { + "epoch": 0.93, + "learning_rate": 3.362378667420785e-06, + "loss": 1.0237, + "step": 30995 + }, + { + "epoch": 0.93, + "learning_rate": 3.348991358094589e-06, + "loss": 0.9458, + "step": 31000 + }, + { + "epoch": 0.93, + "learning_rate": 3.335630298859871e-06, + "loss": 1.0487, + "step": 31005 + }, + { + "epoch": 0.93, + "learning_rate": 3.3222954933454285e-06, + "loss": 1.0234, + "step": 31010 + }, + { + "epoch": 0.93, + "learning_rate": 3.308986945172943e-06, + "loss": 0.9889, + "step": 31015 + }, + { + "epoch": 0.93, + "learning_rate": 3.2957046579569883e-06, + "loss": 0.9772, + "step": 31020 + }, + { + "epoch": 0.93, + "learning_rate": 3.282448635304991e-06, + "loss": 0.9908, + "step": 31025 + }, + { + "epoch": 0.93, + "learning_rate": 3.2692188808172376e-06, + "loss": 1.0165, + "step": 31030 + }, + { + "epoch": 0.93, + "learning_rate": 3.256015398086876e-06, + "loss": 1.0194, + "step": 31035 + }, + { + "epoch": 0.93, + "learning_rate": 3.242838190699948e-06, + "loss": 0.9882, + "step": 31040 + }, + { + "epoch": 0.93, + "learning_rate": 3.2296872622353146e-06, + "loss": 0.952, + "step": 31045 + }, + { + "epoch": 0.93, + "learning_rate": 3.216562616264729e-06, + "loss": 1.0454, + "step": 31050 + }, + { + "epoch": 0.93, + "learning_rate": 3.2034642563528285e-06, + "loss": 1.0446, + "step": 31055 + }, + { + "epoch": 0.93, + "learning_rate": 3.1903921860570564e-06, + "loss": 1.058, + "step": 31060 + }, + { + "epoch": 0.93, + "learning_rate": 3.1773464089277504e-06, + "loss": 1.019, + "step": 31065 + }, + { + "epoch": 0.93, + "learning_rate": 3.164326928508099e-06, + "loss": 0.9848, + "step": 31070 + }, + { + "epoch": 0.93, + "learning_rate": 3.151333748334162e-06, + "loss": 1.0357, + "step": 31075 + }, + { + "epoch": 0.93, + "learning_rate": 3.138366871934828e-06, + "loss": 1.0103, + "step": 31080 + }, + { + "epoch": 0.93, + "learning_rate": 3.1254263028318576e-06, + "loss": 1.0003, + "step": 31085 + }, + { + "epoch": 0.93, + "learning_rate": 3.1125120445398614e-06, + "loss": 1.0292, + "step": 31090 + }, + { + "epoch": 0.93, + "learning_rate": 3.099624100566345e-06, + "loss": 1.0118, + "step": 31095 + }, + { + "epoch": 0.93, + "learning_rate": 3.0867624744115863e-06, + "loss": 1.0103, + "step": 31100 + }, + { + "epoch": 0.93, + "learning_rate": 3.0739271695687688e-06, + "loss": 1.0523, + "step": 31105 + }, + { + "epoch": 0.93, + "learning_rate": 3.0611181895239483e-06, + "loss": 0.9929, + "step": 31110 + }, + { + "epoch": 0.93, + "learning_rate": 3.0483355377559864e-06, + "loss": 1.0506, + "step": 31115 + }, + { + "epoch": 0.93, + "learning_rate": 3.0355792177365948e-06, + "loss": 0.9536, + "step": 31120 + }, + { + "epoch": 0.93, + "learning_rate": 3.022849232930336e-06, + "loss": 1.0199, + "step": 31125 + }, + { + "epoch": 0.93, + "learning_rate": 3.010145586794677e-06, + "loss": 1.088, + "step": 31130 + }, + { + "epoch": 0.93, + "learning_rate": 2.997468282779858e-06, + "loss": 1.0169, + "step": 31135 + }, + { + "epoch": 0.93, + "learning_rate": 2.984817324328981e-06, + "loss": 0.9757, + "step": 31140 + }, + { + "epoch": 0.93, + "learning_rate": 2.9721927148780193e-06, + "loss": 0.9293, + "step": 31145 + }, + { + "epoch": 0.93, + "learning_rate": 2.9595944578557855e-06, + "loss": 1.0413, + "step": 31150 + }, + { + "epoch": 0.93, + "learning_rate": 2.9470225566839092e-06, + "loss": 1.0102, + "step": 31155 + }, + { + "epoch": 0.93, + "learning_rate": 2.934477014776871e-06, + "loss": 1.0021, + "step": 31160 + }, + { + "epoch": 0.93, + "learning_rate": 2.9219578355419996e-06, + "loss": 0.9867, + "step": 31165 + }, + { + "epoch": 0.93, + "learning_rate": 2.909465022379487e-06, + "loss": 1.0863, + "step": 31170 + }, + { + "epoch": 0.93, + "learning_rate": 2.8969985786823194e-06, + "loss": 1.0464, + "step": 31175 + }, + { + "epoch": 0.93, + "learning_rate": 2.8845585078363326e-06, + "loss": 1.0009, + "step": 31180 + }, + { + "epoch": 0.93, + "learning_rate": 2.8721448132202233e-06, + "loss": 1.0306, + "step": 31185 + }, + { + "epoch": 0.93, + "learning_rate": 2.8597574982055176e-06, + "loss": 1.0737, + "step": 31190 + }, + { + "epoch": 0.93, + "learning_rate": 2.8473965661565347e-06, + "loss": 1.0978, + "step": 31195 + }, + { + "epoch": 0.93, + "learning_rate": 2.835062020430501e-06, + "loss": 1.0448, + "step": 31200 + }, + { + "epoch": 0.93, + "learning_rate": 2.8227538643774144e-06, + "loss": 1.0804, + "step": 31205 + }, + { + "epoch": 0.93, + "learning_rate": 2.8104721013401447e-06, + "loss": 1.106, + "step": 31210 + }, + { + "epoch": 0.93, + "learning_rate": 2.7982167346543576e-06, + "loss": 1.0801, + "step": 31215 + }, + { + "epoch": 0.93, + "learning_rate": 2.7859877676486014e-06, + "loss": 1.0355, + "step": 31220 + }, + { + "epoch": 0.93, + "learning_rate": 2.7737852036441857e-06, + "loss": 0.9861, + "step": 31225 + }, + { + "epoch": 0.93, + "learning_rate": 2.7616090459553046e-06, + "loss": 1.0149, + "step": 31230 + }, + { + "epoch": 0.93, + "learning_rate": 2.7494592978889676e-06, + "loss": 1.0767, + "step": 31235 + }, + { + "epoch": 0.93, + "learning_rate": 2.737335962744991e-06, + "loss": 1.0796, + "step": 31240 + }, + { + "epoch": 0.93, + "learning_rate": 2.7252390438160523e-06, + "loss": 1.0218, + "step": 31245 + }, + { + "epoch": 0.93, + "learning_rate": 2.713168544387601e-06, + "loss": 0.9711, + "step": 31250 + }, + { + "epoch": 0.93, + "learning_rate": 2.701124467737981e-06, + "loss": 0.988, + "step": 31255 + }, + { + "epoch": 0.93, + "learning_rate": 2.6891068171383093e-06, + "loss": 1.0307, + "step": 31260 + }, + { + "epoch": 0.93, + "learning_rate": 2.6771155958525307e-06, + "loss": 0.9813, + "step": 31265 + }, + { + "epoch": 0.93, + "learning_rate": 2.665150807137429e-06, + "loss": 0.9782, + "step": 31270 + }, + { + "epoch": 0.93, + "learning_rate": 2.653212454242604e-06, + "loss": 0.9926, + "step": 31275 + }, + { + "epoch": 0.93, + "learning_rate": 2.641300540410474e-06, + "loss": 1.0102, + "step": 31280 + }, + { + "epoch": 0.93, + "learning_rate": 2.629415068876262e-06, + "loss": 0.9902, + "step": 31285 + }, + { + "epoch": 0.93, + "learning_rate": 2.617556042868019e-06, + "loss": 0.9674, + "step": 31290 + }, + { + "epoch": 0.93, + "learning_rate": 2.6057234656066465e-06, + "loss": 0.9718, + "step": 31295 + }, + { + "epoch": 0.93, + "learning_rate": 2.593917340305818e-06, + "loss": 1.0333, + "step": 31300 + }, + { + "epoch": 0.93, + "learning_rate": 2.5821376701720246e-06, + "loss": 0.9976, + "step": 31305 + }, + { + "epoch": 0.93, + "learning_rate": 2.5703844584045846e-06, + "loss": 0.9923, + "step": 31310 + }, + { + "epoch": 0.94, + "learning_rate": 2.5586577081956664e-06, + "loss": 1.0728, + "step": 31315 + }, + { + "epoch": 0.94, + "learning_rate": 2.5469574227301782e-06, + "loss": 1.0178, + "step": 31320 + }, + { + "epoch": 0.94, + "learning_rate": 2.5352836051858897e-06, + "loss": 1.0391, + "step": 31325 + }, + { + "epoch": 0.94, + "learning_rate": 2.523636258733375e-06, + "loss": 1.0578, + "step": 31330 + }, + { + "epoch": 0.94, + "learning_rate": 2.512015386536015e-06, + "loss": 1.0036, + "step": 31335 + }, + { + "epoch": 0.94, + "learning_rate": 2.5004209917499852e-06, + "loss": 1.0508, + "step": 31340 + }, + { + "epoch": 0.94, + "learning_rate": 2.4888530775243112e-06, + "loss": 1.0074, + "step": 31345 + }, + { + "epoch": 0.94, + "learning_rate": 2.4773116470007797e-06, + "loss": 1.0386, + "step": 31350 + }, + { + "epoch": 0.94, + "learning_rate": 2.465796703314016e-06, + "loss": 1.0279, + "step": 31355 + }, + { + "epoch": 0.94, + "learning_rate": 2.4543082495914303e-06, + "loss": 1.0601, + "step": 31360 + }, + { + "epoch": 0.94, + "learning_rate": 2.44284628895326e-06, + "loss": 1.0745, + "step": 31365 + }, + { + "epoch": 0.94, + "learning_rate": 2.4314108245125147e-06, + "loss": 0.9753, + "step": 31370 + }, + { + "epoch": 0.94, + "learning_rate": 2.420001859375054e-06, + "loss": 1.0295, + "step": 31375 + }, + { + "epoch": 0.94, + "learning_rate": 2.408619396639489e-06, + "loss": 1.0434, + "step": 31380 + }, + { + "epoch": 0.94, + "learning_rate": 2.397263439397279e-06, + "loss": 0.9594, + "step": 31385 + }, + { + "epoch": 0.94, + "learning_rate": 2.3859339907326582e-06, + "loss": 1.0566, + "step": 31390 + }, + { + "epoch": 0.94, + "learning_rate": 2.3746310537226648e-06, + "loss": 0.9802, + "step": 31395 + }, + { + "epoch": 0.94, + "learning_rate": 2.3633546314371422e-06, + "loss": 1.0097, + "step": 31400 + }, + { + "epoch": 0.94, + "learning_rate": 2.352104726938731e-06, + "loss": 1.0577, + "step": 31405 + }, + { + "epoch": 0.94, + "learning_rate": 2.3408813432828523e-06, + "loss": 1.0559, + "step": 31410 + }, + { + "epoch": 0.94, + "learning_rate": 2.329684483517758e-06, + "loss": 1.0703, + "step": 31415 + }, + { + "epoch": 0.94, + "learning_rate": 2.3185141506844697e-06, + "loss": 1.0315, + "step": 31420 + }, + { + "epoch": 0.94, + "learning_rate": 2.3073703478168174e-06, + "loss": 1.0333, + "step": 31425 + }, + { + "epoch": 0.94, + "learning_rate": 2.2962530779414126e-06, + "loss": 1.0154, + "step": 31430 + }, + { + "epoch": 0.94, + "learning_rate": 2.2851623440776737e-06, + "loss": 1.0626, + "step": 31435 + }, + { + "epoch": 0.94, + "learning_rate": 2.2740981492378023e-06, + "loss": 1.1136, + "step": 31440 + }, + { + "epoch": 0.94, + "learning_rate": 2.2630604964268163e-06, + "loss": 1.0165, + "step": 31445 + }, + { + "epoch": 0.94, + "learning_rate": 2.2520493886424743e-06, + "loss": 0.9959, + "step": 31450 + }, + { + "epoch": 0.94, + "learning_rate": 2.241064828875372e-06, + "loss": 0.9569, + "step": 31455 + }, + { + "epoch": 0.94, + "learning_rate": 2.2301068201088905e-06, + "loss": 0.994, + "step": 31460 + }, + { + "epoch": 0.94, + "learning_rate": 2.2191753653191817e-06, + "loss": 1.0229, + "step": 31465 + }, + { + "epoch": 0.94, + "learning_rate": 2.208270467475171e-06, + "loss": 1.0079, + "step": 31470 + }, + { + "epoch": 0.94, + "learning_rate": 2.1973921295386114e-06, + "loss": 1.0266, + "step": 31475 + }, + { + "epoch": 0.94, + "learning_rate": 2.1865403544640286e-06, + "loss": 1.0276, + "step": 31480 + }, + { + "epoch": 0.94, + "learning_rate": 2.175715145198709e-06, + "loss": 1.0862, + "step": 31485 + }, + { + "epoch": 0.94, + "learning_rate": 2.1649165046827792e-06, + "loss": 1.0098, + "step": 31490 + }, + { + "epoch": 0.94, + "learning_rate": 2.1541444358490703e-06, + "loss": 1.0495, + "step": 31495 + }, + { + "epoch": 0.94, + "learning_rate": 2.1433989416232646e-06, + "loss": 1.0589, + "step": 31500 + }, + { + "epoch": 0.94, + "learning_rate": 2.1326800249237943e-06, + "loss": 1.0129, + "step": 31505 + }, + { + "epoch": 0.94, + "learning_rate": 2.121987688661886e-06, + "loss": 1.0575, + "step": 31510 + }, + { + "epoch": 0.94, + "learning_rate": 2.1113219357415393e-06, + "loss": 1.0522, + "step": 31515 + }, + { + "epoch": 0.94, + "learning_rate": 2.100682769059548e-06, + "loss": 1.0435, + "step": 31520 + }, + { + "epoch": 0.94, + "learning_rate": 2.0900701915054666e-06, + "loss": 1.0267, + "step": 31525 + }, + { + "epoch": 0.94, + "learning_rate": 2.0794842059616237e-06, + "loss": 1.0417, + "step": 31530 + }, + { + "epoch": 0.94, + "learning_rate": 2.068924815303153e-06, + "loss": 1.0006, + "step": 31535 + }, + { + "epoch": 0.94, + "learning_rate": 2.058392022397937e-06, + "loss": 1.0368, + "step": 31540 + }, + { + "epoch": 0.94, + "learning_rate": 2.0478858301066774e-06, + "loss": 0.9859, + "step": 31545 + }, + { + "epoch": 0.94, + "learning_rate": 2.037406241282791e-06, + "loss": 0.9958, + "step": 31550 + }, + { + "epoch": 0.94, + "learning_rate": 2.0269532587724905e-06, + "loss": 0.9624, + "step": 31555 + }, + { + "epoch": 0.94, + "learning_rate": 2.0165268854147934e-06, + "loss": 1.0006, + "step": 31560 + }, + { + "epoch": 0.94, + "learning_rate": 2.006127124041479e-06, + "loss": 1.0349, + "step": 31565 + }, + { + "epoch": 0.94, + "learning_rate": 1.995753977477066e-06, + "loss": 1.076, + "step": 31570 + }, + { + "epoch": 0.94, + "learning_rate": 1.985407448538856e-06, + "loss": 1.0391, + "step": 31575 + }, + { + "epoch": 0.94, + "learning_rate": 1.9750875400369574e-06, + "loss": 1.0307, + "step": 31580 + }, + { + "epoch": 0.94, + "learning_rate": 1.964794254774216e-06, + "loss": 1.0838, + "step": 31585 + }, + { + "epoch": 0.94, + "learning_rate": 1.9545275955462515e-06, + "loss": 1.0399, + "step": 31590 + }, + { + "epoch": 0.94, + "learning_rate": 1.944287565141445e-06, + "loss": 0.9835, + "step": 31595 + }, + { + "epoch": 0.94, + "learning_rate": 1.93407416634096e-06, + "loss": 1.0287, + "step": 31600 + }, + { + "epoch": 0.94, + "learning_rate": 1.9238874019187337e-06, + "loss": 1.0004, + "step": 31605 + }, + { + "epoch": 0.94, + "learning_rate": 1.9137272746414526e-06, + "loss": 1.0125, + "step": 31610 + }, + { + "epoch": 0.94, + "learning_rate": 1.9035937872685428e-06, + "loss": 1.052, + "step": 31615 + }, + { + "epoch": 0.94, + "learning_rate": 1.8934869425522805e-06, + "loss": 1.0528, + "step": 31620 + }, + { + "epoch": 0.94, + "learning_rate": 1.8834067432376146e-06, + "loss": 0.9839, + "step": 31625 + }, + { + "epoch": 0.94, + "learning_rate": 1.8733531920622882e-06, + "loss": 1.0657, + "step": 31630 + }, + { + "epoch": 0.94, + "learning_rate": 1.8633262917568284e-06, + "loss": 1.0155, + "step": 31635 + }, + { + "epoch": 0.94, + "learning_rate": 1.8533260450445233e-06, + "loss": 1.0006, + "step": 31640 + }, + { + "epoch": 0.94, + "learning_rate": 1.8433524546413783e-06, + "loss": 0.9867, + "step": 31645 + }, + { + "epoch": 0.95, + "learning_rate": 1.833405523256193e-06, + "loss": 1.0397, + "step": 31650 + }, + { + "epoch": 0.95, + "learning_rate": 1.8234852535905289e-06, + "loss": 1.0558, + "step": 31655 + }, + { + "epoch": 0.95, + "learning_rate": 1.8135916483386862e-06, + "loss": 1.1248, + "step": 31660 + }, + { + "epoch": 0.95, + "learning_rate": 1.8037247101877485e-06, + "loss": 1.0147, + "step": 31665 + }, + { + "epoch": 0.95, + "learning_rate": 1.7938844418175283e-06, + "loss": 1.0625, + "step": 31670 + }, + { + "epoch": 0.95, + "learning_rate": 1.7840708459006317e-06, + "loss": 1.0189, + "step": 31675 + }, + { + "epoch": 0.95, + "learning_rate": 1.7742839251023825e-06, + "loss": 1.0023, + "step": 31680 + }, + { + "epoch": 0.95, + "learning_rate": 1.7645236820808763e-06, + "loss": 1.0383, + "step": 31685 + }, + { + "epoch": 0.95, + "learning_rate": 1.7547901194869708e-06, + "loss": 1.075, + "step": 31690 + }, + { + "epoch": 0.95, + "learning_rate": 1.7450832399642624e-06, + "loss": 1.0485, + "step": 31695 + }, + { + "epoch": 0.95, + "learning_rate": 1.7354030461490977e-06, + "loss": 1.0722, + "step": 31700 + }, + { + "epoch": 0.95, + "learning_rate": 1.725749540670596e-06, + "loss": 0.9609, + "step": 31705 + }, + { + "epoch": 0.95, + "learning_rate": 1.7161227261506262e-06, + "loss": 1.0714, + "step": 31710 + }, + { + "epoch": 0.95, + "learning_rate": 1.7065226052037974e-06, + "loss": 1.0413, + "step": 31715 + }, + { + "epoch": 0.95, + "learning_rate": 1.696949180437446e-06, + "loss": 1.0232, + "step": 31720 + }, + { + "epoch": 0.95, + "learning_rate": 1.6874024544517031e-06, + "loss": 1.0165, + "step": 31725 + }, + { + "epoch": 0.95, + "learning_rate": 1.677882429839428e-06, + "loss": 1.0173, + "step": 31730 + }, + { + "epoch": 0.95, + "learning_rate": 1.6683891091862303e-06, + "loss": 0.9346, + "step": 31735 + }, + { + "epoch": 0.95, + "learning_rate": 1.6589224950704364e-06, + "loss": 1.0324, + "step": 31740 + }, + { + "epoch": 0.95, + "learning_rate": 1.6494825900631783e-06, + "loss": 0.9628, + "step": 31745 + }, + { + "epoch": 0.95, + "learning_rate": 1.6400693967282943e-06, + "loss": 1.0238, + "step": 31750 + }, + { + "epoch": 0.95, + "learning_rate": 1.6306829176223836e-06, + "loss": 1.057, + "step": 31755 + }, + { + "epoch": 0.95, + "learning_rate": 1.6213231552947407e-06, + "loss": 1.0534, + "step": 31760 + }, + { + "epoch": 0.95, + "learning_rate": 1.6119901122875091e-06, + "loss": 1.0536, + "step": 31765 + }, + { + "epoch": 0.95, + "learning_rate": 1.602683791135473e-06, + "loss": 1.0999, + "step": 31770 + }, + { + "epoch": 0.95, + "learning_rate": 1.5934041943661993e-06, + "loss": 1.0266, + "step": 31775 + }, + { + "epoch": 0.95, + "learning_rate": 1.584151324500005e-06, + "loss": 1.0348, + "step": 31780 + }, + { + "epoch": 0.95, + "learning_rate": 1.5749251840499468e-06, + "loss": 1.0146, + "step": 31785 + }, + { + "epoch": 0.95, + "learning_rate": 1.565725775521798e-06, + "loss": 0.9694, + "step": 31790 + }, + { + "epoch": 0.95, + "learning_rate": 1.5565531014140933e-06, + "loss": 0.9885, + "step": 31795 + }, + { + "epoch": 0.95, + "learning_rate": 1.5474071642180955e-06, + "loss": 1.036, + "step": 31800 + }, + { + "epoch": 0.95, + "learning_rate": 1.5382879664178173e-06, + "loss": 0.9918, + "step": 31805 + }, + { + "epoch": 0.95, + "learning_rate": 1.5291955104900002e-06, + "loss": 1.0029, + "step": 31810 + }, + { + "epoch": 0.95, + "learning_rate": 1.5201297989041242e-06, + "loss": 1.0074, + "step": 31815 + }, + { + "epoch": 0.95, + "learning_rate": 1.5110908341223973e-06, + "loss": 0.9416, + "step": 31820 + }, + { + "epoch": 0.95, + "learning_rate": 1.5020786185997892e-06, + "loss": 0.9935, + "step": 31825 + }, + { + "epoch": 0.95, + "learning_rate": 1.4930931547839755e-06, + "loss": 1.0025, + "step": 31830 + }, + { + "epoch": 0.95, + "learning_rate": 1.4841344451153926e-06, + "loss": 1.0473, + "step": 31835 + }, + { + "epoch": 0.95, + "learning_rate": 1.4752024920271834e-06, + "loss": 0.9244, + "step": 31840 + }, + { + "epoch": 0.95, + "learning_rate": 1.4662972979452406e-06, + "loss": 1.0574, + "step": 31845 + }, + { + "epoch": 0.95, + "learning_rate": 1.4574188652881737e-06, + "loss": 0.9878, + "step": 31850 + }, + { + "epoch": 0.95, + "learning_rate": 1.4485671964673542e-06, + "loss": 1.031, + "step": 31855 + }, + { + "epoch": 0.95, + "learning_rate": 1.4397422938868587e-06, + "loss": 1.0016, + "step": 31860 + }, + { + "epoch": 0.95, + "learning_rate": 1.4309441599434926e-06, + "loss": 1.031, + "step": 31865 + }, + { + "epoch": 0.95, + "learning_rate": 1.4221727970267995e-06, + "loss": 1.1121, + "step": 31870 + }, + { + "epoch": 0.95, + "learning_rate": 1.413428207519074e-06, + "loss": 1.0544, + "step": 31875 + }, + { + "epoch": 0.95, + "learning_rate": 1.4047103937953055e-06, + "loss": 1.0319, + "step": 31880 + }, + { + "epoch": 0.95, + "learning_rate": 1.3960193582231995e-06, + "loss": 1.0342, + "step": 31885 + }, + { + "epoch": 0.95, + "learning_rate": 1.387355103163257e-06, + "loss": 1.0571, + "step": 31890 + }, + { + "epoch": 0.95, + "learning_rate": 1.378717630968629e-06, + "loss": 1.0248, + "step": 31895 + }, + { + "epoch": 0.95, + "learning_rate": 1.3701069439852388e-06, + "loss": 1.058, + "step": 31900 + }, + { + "epoch": 0.95, + "learning_rate": 1.3615230445516935e-06, + "loss": 1.0246, + "step": 31905 + }, + { + "epoch": 0.95, + "learning_rate": 1.352965934999384e-06, + "loss": 1.0643, + "step": 31910 + }, + { + "epoch": 0.95, + "learning_rate": 1.3444356176523843e-06, + "loss": 1.0222, + "step": 31915 + }, + { + "epoch": 0.95, + "learning_rate": 1.3359320948274967e-06, + "loss": 1.0451, + "step": 31920 + }, + { + "epoch": 0.95, + "learning_rate": 1.3274553688342405e-06, + "loss": 0.9478, + "step": 31925 + }, + { + "epoch": 0.95, + "learning_rate": 1.3190054419748853e-06, + "loss": 1.0766, + "step": 31930 + }, + { + "epoch": 0.95, + "learning_rate": 1.3105823165443843e-06, + "loss": 1.0978, + "step": 31935 + }, + { + "epoch": 0.95, + "learning_rate": 1.3021859948304404e-06, + "loss": 1.0888, + "step": 31940 + }, + { + "epoch": 0.95, + "learning_rate": 1.2938164791134632e-06, + "loss": 1.0472, + "step": 31945 + }, + { + "epoch": 0.95, + "learning_rate": 1.2854737716665898e-06, + "loss": 1.0702, + "step": 31950 + }, + { + "epoch": 0.95, + "learning_rate": 1.2771578747556744e-06, + "loss": 1.071, + "step": 31955 + }, + { + "epoch": 0.95, + "learning_rate": 1.268868790639277e-06, + "loss": 1.0957, + "step": 31960 + }, + { + "epoch": 0.95, + "learning_rate": 1.2606065215686857e-06, + "loss": 1.0649, + "step": 31965 + }, + { + "epoch": 0.95, + "learning_rate": 1.2523710697879165e-06, + "loss": 1.0995, + "step": 31970 + }, + { + "epoch": 0.95, + "learning_rate": 1.2441624375336692e-06, + "loss": 1.016, + "step": 31975 + }, + { + "epoch": 0.95, + "learning_rate": 1.2359806270354157e-06, + "loss": 0.9451, + "step": 31980 + }, + { + "epoch": 0.96, + "learning_rate": 1.2278256405152677e-06, + "loss": 0.9604, + "step": 31985 + }, + { + "epoch": 0.96, + "learning_rate": 1.2196974801881312e-06, + "loss": 1.1377, + "step": 31990 + }, + { + "epoch": 0.96, + "learning_rate": 1.2115961482615513e-06, + "loss": 0.9887, + "step": 31995 + }, + { + "epoch": 0.96, + "learning_rate": 1.2035216469358456e-06, + "loss": 1.047, + "step": 32000 + }, + { + "epoch": 0.96, + "learning_rate": 1.195473978404016e-06, + "loss": 0.9851, + "step": 32005 + }, + { + "epoch": 0.96, + "learning_rate": 1.187453144851791e-06, + "loss": 1.0507, + "step": 32010 + }, + { + "epoch": 0.96, + "learning_rate": 1.1794591484575735e-06, + "loss": 1.0181, + "step": 32015 + }, + { + "epoch": 0.96, + "learning_rate": 1.171491991392548e-06, + "loss": 1.0494, + "step": 32020 + }, + { + "epoch": 0.96, + "learning_rate": 1.163551675820529e-06, + "loss": 1.08, + "step": 32025 + }, + { + "epoch": 0.96, + "learning_rate": 1.1556382038981018e-06, + "loss": 1.0333, + "step": 32030 + }, + { + "epoch": 0.96, + "learning_rate": 1.1477515777745363e-06, + "loss": 1.0454, + "step": 32035 + }, + { + "epoch": 0.96, + "learning_rate": 1.139891799591819e-06, + "loss": 1.0206, + "step": 32040 + }, + { + "epoch": 0.96, + "learning_rate": 1.1320588714846203e-06, + "loss": 1.0503, + "step": 32045 + }, + { + "epoch": 0.96, + "learning_rate": 1.1242527955803494e-06, + "loss": 1.0414, + "step": 32050 + }, + { + "epoch": 0.96, + "learning_rate": 1.1164735739991217e-06, + "loss": 1.039, + "step": 32055 + }, + { + "epoch": 0.96, + "learning_rate": 1.1087212088537357e-06, + "loss": 1.0918, + "step": 32060 + }, + { + "epoch": 0.96, + "learning_rate": 1.1009957022496964e-06, + "loss": 0.9735, + "step": 32065 + }, + { + "epoch": 0.96, + "learning_rate": 1.0932970562852363e-06, + "loss": 1.0426, + "step": 32070 + }, + { + "epoch": 0.96, + "learning_rate": 1.085625273051294e-06, + "loss": 1.0185, + "step": 32075 + }, + { + "epoch": 0.96, + "learning_rate": 1.0779803546314914e-06, + "loss": 1.0293, + "step": 32080 + }, + { + "epoch": 0.96, + "learning_rate": 1.070362303102146e-06, + "loss": 1.0111, + "step": 32085 + }, + { + "epoch": 0.96, + "learning_rate": 1.0627711205323243e-06, + "loss": 0.9563, + "step": 32090 + }, + { + "epoch": 0.96, + "learning_rate": 1.055206808983744e-06, + "loss": 0.9391, + "step": 32095 + }, + { + "epoch": 0.96, + "learning_rate": 1.047669370510862e-06, + "loss": 1.0268, + "step": 32100 + }, + { + "epoch": 0.96, + "learning_rate": 1.040158807160807e-06, + "loss": 0.9797, + "step": 32105 + }, + { + "epoch": 0.96, + "learning_rate": 1.0326751209734253e-06, + "loss": 1.1068, + "step": 32110 + }, + { + "epoch": 0.96, + "learning_rate": 1.0252183139812798e-06, + "loss": 1.0893, + "step": 32115 + }, + { + "epoch": 0.96, + "learning_rate": 1.0177883882095952e-06, + "loss": 1.1198, + "step": 32120 + }, + { + "epoch": 0.96, + "learning_rate": 1.0103853456763234e-06, + "loss": 1.1513, + "step": 32125 + }, + { + "epoch": 0.96, + "learning_rate": 1.0030091883920901e-06, + "loss": 1.0886, + "step": 32130 + }, + { + "epoch": 0.96, + "learning_rate": 9.95659918360259e-07, + "loss": 1.0344, + "step": 32135 + }, + { + "epoch": 0.96, + "learning_rate": 9.883375375768555e-07, + "loss": 1.0995, + "step": 32140 + }, + { + "epoch": 0.96, + "learning_rate": 9.810420480306114e-07, + "loss": 1.0451, + "step": 32145 + }, + { + "epoch": 0.96, + "learning_rate": 9.73773451702964e-07, + "loss": 0.993, + "step": 32150 + }, + { + "epoch": 0.96, + "learning_rate": 9.665317505680338e-07, + "loss": 1.0514, + "step": 32155 + }, + { + "epoch": 0.96, + "learning_rate": 9.593169465926367e-07, + "loss": 1.0695, + "step": 32160 + }, + { + "epoch": 0.96, + "learning_rate": 9.521290417363049e-07, + "loss": 1.0261, + "step": 32165 + }, + { + "epoch": 0.96, + "learning_rate": 9.449680379512438e-07, + "loss": 1.0046, + "step": 32170 + }, + { + "epoch": 0.96, + "learning_rate": 9.37833937182342e-07, + "loss": 1.0536, + "step": 32175 + }, + { + "epoch": 0.96, + "learning_rate": 9.307267413672272e-07, + "loss": 0.9976, + "step": 32180 + }, + { + "epoch": 0.96, + "learning_rate": 9.236464524361777e-07, + "loss": 0.983, + "step": 32185 + }, + { + "epoch": 0.96, + "learning_rate": 9.165930723121663e-07, + "loss": 1.0096, + "step": 32190 + }, + { + "epoch": 0.96, + "learning_rate": 9.095666029108718e-07, + "loss": 1.0738, + "step": 32195 + }, + { + "epoch": 0.96, + "learning_rate": 9.025670461406677e-07, + "loss": 1.0978, + "step": 32200 + }, + { + "epoch": 0.96, + "learning_rate": 8.955944039025999e-07, + "loss": 1.0489, + "step": 32205 + }, + { + "epoch": 0.96, + "learning_rate": 8.886486780904202e-07, + "loss": 1.013, + "step": 32210 + }, + { + "epoch": 0.96, + "learning_rate": 8.817298705905641e-07, + "loss": 1.085, + "step": 32215 + }, + { + "epoch": 0.96, + "learning_rate": 8.748379832821618e-07, + "loss": 1.0305, + "step": 32220 + }, + { + "epoch": 0.96, + "learning_rate": 8.679730180370271e-07, + "loss": 1.0548, + "step": 32225 + }, + { + "epoch": 0.96, + "learning_rate": 8.611349767196464e-07, + "loss": 0.9744, + "step": 32230 + }, + { + "epoch": 0.96, + "learning_rate": 8.543238611872117e-07, + "loss": 1.017, + "step": 32235 + }, + { + "epoch": 0.96, + "learning_rate": 8.475396732896213e-07, + "loss": 1.0419, + "step": 32240 + }, + { + "epoch": 0.96, + "learning_rate": 8.407824148694232e-07, + "loss": 1.0073, + "step": 32245 + }, + { + "epoch": 0.96, + "learning_rate": 8.340520877618496e-07, + "loss": 1.0156, + "step": 32250 + }, + { + "epoch": 0.96, + "learning_rate": 8.273486937948715e-07, + "loss": 1.0191, + "step": 32255 + }, + { + "epoch": 0.96, + "learning_rate": 8.206722347890772e-07, + "loss": 1.0857, + "step": 32260 + }, + { + "epoch": 0.96, + "learning_rate": 8.140227125577826e-07, + "loss": 0.9939, + "step": 32265 + }, + { + "epoch": 0.96, + "learning_rate": 8.074001289069765e-07, + "loss": 1.0154, + "step": 32270 + }, + { + "epoch": 0.96, + "learning_rate": 8.008044856353313e-07, + "loss": 1.035, + "step": 32275 + }, + { + "epoch": 0.96, + "learning_rate": 7.94235784534203e-07, + "loss": 1.0048, + "step": 32280 + }, + { + "epoch": 0.96, + "learning_rate": 7.876940273876199e-07, + "loss": 1.1056, + "step": 32285 + }, + { + "epoch": 0.96, + "learning_rate": 7.811792159723052e-07, + "loss": 1.1132, + "step": 32290 + }, + { + "epoch": 0.96, + "learning_rate": 7.746913520576659e-07, + "loss": 1.0387, + "step": 32295 + }, + { + "epoch": 0.96, + "learning_rate": 7.682304374057703e-07, + "loss": 1.0302, + "step": 32300 + }, + { + "epoch": 0.96, + "learning_rate": 7.617964737714034e-07, + "loss": 0.9582, + "step": 32305 + }, + { + "epoch": 0.96, + "learning_rate": 7.553894629019897e-07, + "loss": 1.0565, + "step": 32310 + }, + { + "epoch": 0.96, + "learning_rate": 7.490094065376596e-07, + "loss": 0.9845, + "step": 32315 + }, + { + "epoch": 0.97, + "learning_rate": 7.426563064112046e-07, + "loss": 0.994, + "step": 32320 + }, + { + "epoch": 0.97, + "learning_rate": 7.363301642481224e-07, + "loss": 1.074, + "step": 32325 + }, + { + "epoch": 0.97, + "learning_rate": 7.300309817665718e-07, + "loss": 1.0267, + "step": 32330 + }, + { + "epoch": 0.97, + "learning_rate": 7.237587606773733e-07, + "loss": 1.0613, + "step": 32335 + }, + { + "epoch": 0.97, + "learning_rate": 7.175135026840419e-07, + "loss": 1.0581, + "step": 32340 + }, + { + "epoch": 0.97, + "learning_rate": 7.112952094827985e-07, + "loss": 1.02, + "step": 32345 + }, + { + "epoch": 0.97, + "learning_rate": 7.051038827624701e-07, + "loss": 1.0211, + "step": 32350 + }, + { + "epoch": 0.97, + "learning_rate": 6.989395242046226e-07, + "loss": 1.0552, + "step": 32355 + }, + { + "epoch": 0.97, + "learning_rate": 6.928021354834724e-07, + "loss": 1.0421, + "step": 32360 + }, + { + "epoch": 0.97, + "learning_rate": 6.866917182659194e-07, + "loss": 1.052, + "step": 32365 + }, + { + "epoch": 0.97, + "learning_rate": 6.80608274211536e-07, + "loss": 1.0149, + "step": 32370 + }, + { + "epoch": 0.97, + "learning_rate": 6.745518049725342e-07, + "loss": 0.9806, + "step": 32375 + }, + { + "epoch": 0.97, + "learning_rate": 6.685223121938644e-07, + "loss": 1.025, + "step": 32380 + }, + { + "epoch": 0.97, + "learning_rate": 6.625197975131059e-07, + "loss": 0.9931, + "step": 32385 + }, + { + "epoch": 0.97, + "learning_rate": 6.565442625605211e-07, + "loss": 1.0155, + "step": 32390 + }, + { + "epoch": 0.97, + "learning_rate": 6.505957089590342e-07, + "loss": 0.9602, + "step": 32395 + }, + { + "epoch": 0.97, + "learning_rate": 6.446741383242638e-07, + "loss": 0.9849, + "step": 32400 + }, + { + "epoch": 0.97, + "learning_rate": 6.387795522645013e-07, + "loss": 1.0305, + "step": 32405 + }, + { + "epoch": 0.97, + "learning_rate": 6.32911952380677e-07, + "loss": 0.9896, + "step": 32410 + }, + { + "epoch": 0.97, + "learning_rate": 6.270713402664163e-07, + "loss": 1.0623, + "step": 32415 + }, + { + "epoch": 0.97, + "learning_rate": 6.212577175080169e-07, + "loss": 1.0462, + "step": 32420 + }, + { + "epoch": 0.97, + "learning_rate": 6.15471085684427e-07, + "loss": 0.9894, + "step": 32425 + }, + { + "epoch": 0.97, + "learning_rate": 6.097114463672892e-07, + "loss": 1.0602, + "step": 32430 + }, + { + "epoch": 0.97, + "learning_rate": 6.039788011208858e-07, + "loss": 0.9752, + "step": 32435 + }, + { + "epoch": 0.97, + "learning_rate": 5.982731515022044e-07, + "loss": 1.0304, + "step": 32440 + }, + { + "epoch": 0.97, + "learning_rate": 5.925944990608722e-07, + "loss": 0.9845, + "step": 32445 + }, + { + "epoch": 0.97, + "learning_rate": 5.86942845339189e-07, + "loss": 0.9608, + "step": 32450 + }, + { + "epoch": 0.97, + "learning_rate": 5.813181918721267e-07, + "loss": 0.9767, + "step": 32455 + }, + { + "epoch": 0.97, + "learning_rate": 5.757205401873189e-07, + "loss": 1.0355, + "step": 32460 + }, + { + "epoch": 0.97, + "learning_rate": 5.701498918050719e-07, + "loss": 1.0736, + "step": 32465 + }, + { + "epoch": 0.97, + "learning_rate": 5.64606248238364e-07, + "loss": 0.9918, + "step": 32470 + }, + { + "epoch": 0.97, + "learning_rate": 5.590896109928245e-07, + "loss": 0.9919, + "step": 32475 + }, + { + "epoch": 0.97, + "learning_rate": 5.535999815667436e-07, + "loss": 1.0201, + "step": 32480 + }, + { + "epoch": 0.97, + "learning_rate": 5.481373614510954e-07, + "loss": 0.9953, + "step": 32485 + }, + { + "epoch": 0.97, + "learning_rate": 5.427017521295153e-07, + "loss": 0.9967, + "step": 32490 + }, + { + "epoch": 0.97, + "learning_rate": 5.372931550782889e-07, + "loss": 1.063, + "step": 32495 + }, + { + "epoch": 0.97, + "learning_rate": 5.319115717663636e-07, + "loss": 1.0328, + "step": 32500 + }, + { + "epoch": 0.97, + "learning_rate": 5.265570036553813e-07, + "loss": 1.1129, + "step": 32505 + }, + { + "epoch": 0.97, + "learning_rate": 5.21229452199612e-07, + "loss": 1.0175, + "step": 32510 + }, + { + "epoch": 0.97, + "learning_rate": 5.159289188459982e-07, + "loss": 1.0648, + "step": 32515 + }, + { + "epoch": 0.97, + "learning_rate": 5.10655405034155e-07, + "loss": 0.9695, + "step": 32520 + }, + { + "epoch": 0.97, + "learning_rate": 5.054089121963368e-07, + "loss": 1.0289, + "step": 32525 + }, + { + "epoch": 0.97, + "learning_rate": 5.001894417574926e-07, + "loss": 0.9954, + "step": 32530 + }, + { + "epoch": 0.97, + "learning_rate": 4.949969951352107e-07, + "loss": 1.0295, + "step": 32535 + }, + { + "epoch": 0.97, + "learning_rate": 4.898315737397186e-07, + "loss": 1.0741, + "step": 32540 + }, + { + "epoch": 0.97, + "learning_rate": 4.846931789739606e-07, + "loss": 1.0239, + "step": 32545 + }, + { + "epoch": 0.97, + "learning_rate": 4.795818122334872e-07, + "loss": 1.0379, + "step": 32550 + }, + { + "epoch": 0.97, + "learning_rate": 4.744974749065323e-07, + "loss": 1.0666, + "step": 32555 + }, + { + "epoch": 0.97, + "learning_rate": 4.694401683739913e-07, + "loss": 1.0056, + "step": 32560 + }, + { + "epoch": 0.97, + "learning_rate": 4.6440989400939884e-07, + "loss": 1.1186, + "step": 32565 + }, + { + "epoch": 0.97, + "learning_rate": 4.5940665317897313e-07, + "loss": 1.0841, + "step": 32570 + }, + { + "epoch": 0.97, + "learning_rate": 4.544304472415717e-07, + "loss": 0.9992, + "step": 32575 + }, + { + "epoch": 0.97, + "learning_rate": 4.494812775487134e-07, + "loss": 1.0727, + "step": 32580 + }, + { + "epoch": 0.97, + "learning_rate": 4.445591454445897e-07, + "loss": 0.9975, + "step": 32585 + }, + { + "epoch": 0.97, + "learning_rate": 4.3966405226602006e-07, + "loss": 1.0705, + "step": 32590 + }, + { + "epoch": 0.97, + "learning_rate": 4.347959993424966e-07, + "loss": 1.0233, + "step": 32595 + }, + { + "epoch": 0.97, + "learning_rate": 4.2995498799617284e-07, + "loss": 1.0065, + "step": 32600 + }, + { + "epoch": 0.97, + "learning_rate": 4.251410195418526e-07, + "loss": 1.1057, + "step": 32605 + }, + { + "epoch": 0.97, + "learning_rate": 4.203540952869789e-07, + "loss": 0.9315, + "step": 32610 + }, + { + "epoch": 0.97, + "learning_rate": 4.155942165316784e-07, + "loss": 0.9815, + "step": 32615 + }, + { + "epoch": 0.97, + "learning_rate": 4.1086138456871704e-07, + "loss": 1.0068, + "step": 32620 + }, + { + "epoch": 0.97, + "learning_rate": 4.061556006835221e-07, + "loss": 0.9568, + "step": 32625 + }, + { + "epoch": 0.97, + "learning_rate": 4.01476866154149e-07, + "loss": 0.9929, + "step": 32630 + }, + { + "epoch": 0.97, + "learning_rate": 3.96825182251348e-07, + "loss": 1.0437, + "step": 32635 + }, + { + "epoch": 0.97, + "learning_rate": 3.922005502384862e-07, + "loss": 0.9601, + "step": 32640 + }, + { + "epoch": 0.97, + "learning_rate": 3.876029713716034e-07, + "loss": 1.0354, + "step": 32645 + }, + { + "epoch": 0.97, + "learning_rate": 3.830324468993896e-07, + "loss": 1.0116, + "step": 32650 + }, + { + "epoch": 0.98, + "learning_rate": 3.7848897806317394e-07, + "loss": 1.0049, + "step": 32655 + }, + { + "epoch": 0.98, + "learning_rate": 3.7397256609695843e-07, + "loss": 1.0258, + "step": 32660 + }, + { + "epoch": 0.98, + "learning_rate": 3.6948321222738393e-07, + "loss": 1.0545, + "step": 32665 + }, + { + "epoch": 0.98, + "learning_rate": 3.650209176737418e-07, + "loss": 0.9421, + "step": 32670 + }, + { + "epoch": 0.98, + "learning_rate": 3.6058568364797373e-07, + "loss": 0.9724, + "step": 32675 + }, + { + "epoch": 0.98, + "learning_rate": 3.561775113546828e-07, + "loss": 0.9906, + "step": 32680 + }, + { + "epoch": 0.98, + "learning_rate": 3.517964019910891e-07, + "loss": 1.0109, + "step": 32685 + }, + { + "epoch": 0.98, + "learning_rate": 3.4744235674711855e-07, + "loss": 1.0737, + "step": 32690 + }, + { + "epoch": 0.98, + "learning_rate": 3.431153768053141e-07, + "loss": 1.1039, + "step": 32695 + }, + { + "epoch": 0.98, + "learning_rate": 3.388154633408358e-07, + "loss": 1.0549, + "step": 32700 + }, + { + "epoch": 0.98, + "learning_rate": 3.345426175215605e-07, + "loss": 0.9412, + "step": 32705 + }, + { + "epoch": 0.98, + "learning_rate": 3.3029684050797096e-07, + "loss": 1.1049, + "step": 32710 + }, + { + "epoch": 0.98, + "learning_rate": 3.260781334531893e-07, + "loss": 1.008, + "step": 32715 + }, + { + "epoch": 0.98, + "learning_rate": 3.2188649750302113e-07, + "loss": 1.0106, + "step": 32720 + }, + { + "epoch": 0.98, + "learning_rate": 3.177219337958892e-07, + "loss": 1.0408, + "step": 32725 + }, + { + "epoch": 0.98, + "learning_rate": 3.1358444346288874e-07, + "loss": 1.0594, + "step": 32730 + }, + { + "epoch": 0.98, + "learning_rate": 3.094740276277319e-07, + "loss": 1.024, + "step": 32735 + }, + { + "epoch": 0.98, + "learning_rate": 3.053906874068035e-07, + "loss": 1.0445, + "step": 32740 + }, + { + "epoch": 0.98, + "learning_rate": 3.0133442390912757e-07, + "loss": 1.0856, + "step": 32745 + }, + { + "epoch": 0.98, + "learning_rate": 2.973052382363561e-07, + "loss": 1.0289, + "step": 32750 + }, + { + "epoch": 0.98, + "learning_rate": 2.9330313148282497e-07, + "loss": 1.0559, + "step": 32755 + }, + { + "epoch": 0.98, + "learning_rate": 2.8932810473547565e-07, + "loss": 1.0297, + "step": 32760 + }, + { + "epoch": 0.98, + "learning_rate": 2.8538015907392244e-07, + "loss": 1.0459, + "step": 32765 + }, + { + "epoch": 0.98, + "learning_rate": 2.814592955704076e-07, + "loss": 0.9938, + "step": 32770 + }, + { + "epoch": 0.98, + "learning_rate": 2.7756551528982376e-07, + "loss": 1.031, + "step": 32775 + }, + { + "epoch": 0.98, + "learning_rate": 2.736988192897139e-07, + "loss": 0.9878, + "step": 32780 + }, + { + "epoch": 0.98, + "learning_rate": 2.6985920862026004e-07, + "loss": 1.0246, + "step": 32785 + }, + { + "epoch": 0.98, + "learning_rate": 2.660466843242837e-07, + "loss": 1.0699, + "step": 32790 + }, + { + "epoch": 0.98, + "learning_rate": 2.6226124743724544e-07, + "loss": 1.0789, + "step": 32795 + }, + { + "epoch": 0.98, + "learning_rate": 2.5850289898727844e-07, + "loss": 0.9432, + "step": 32800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5477163999513275e-07, + "loss": 1.006, + "step": 32805 + }, + { + "epoch": 0.98, + "learning_rate": 2.5106747147418675e-07, + "loss": 1.119, + "step": 32810 + }, + { + "epoch": 0.98, + "learning_rate": 2.4739039443049115e-07, + "loss": 1.0462, + "step": 32815 + }, + { + "epoch": 0.98, + "learning_rate": 2.4374040986274714e-07, + "loss": 1.0925, + "step": 32820 + }, + { + "epoch": 0.98, + "learning_rate": 2.4011751876223955e-07, + "loss": 1.0687, + "step": 32825 + }, + { + "epoch": 0.98, + "learning_rate": 2.3652172211297008e-07, + "loss": 1.0658, + "step": 32830 + }, + { + "epoch": 0.98, + "learning_rate": 2.3295302089152425e-07, + "loss": 0.9806, + "step": 32835 + }, + { + "epoch": 0.98, + "learning_rate": 2.29411416067149e-07, + "loss": 0.9645, + "step": 32840 + }, + { + "epoch": 0.98, + "learning_rate": 2.258969086017415e-07, + "loss": 1.0585, + "step": 32845 + }, + { + "epoch": 0.98, + "learning_rate": 2.2240949944982714e-07, + "loss": 1.0871, + "step": 32850 + }, + { + "epoch": 0.98, + "learning_rate": 2.1894918955858158e-07, + "loss": 1.0287, + "step": 32855 + }, + { + "epoch": 0.98, + "learning_rate": 2.1551597986780859e-07, + "loss": 0.9769, + "step": 32860 + }, + { + "epoch": 0.98, + "learning_rate": 2.121098713099401e-07, + "loss": 1.0395, + "step": 32865 + }, + { + "epoch": 0.98, + "learning_rate": 2.0873086481008053e-07, + "loss": 1.059, + "step": 32870 + }, + { + "epoch": 0.98, + "learning_rate": 2.0537896128596245e-07, + "loss": 1.102, + "step": 32875 + }, + { + "epoch": 0.98, + "learning_rate": 2.0205416164794655e-07, + "loss": 0.9808, + "step": 32880 + }, + { + "epoch": 0.98, + "learning_rate": 1.987564667990216e-07, + "loss": 1.0021, + "step": 32885 + }, + { + "epoch": 0.98, + "learning_rate": 1.9548587763484893e-07, + "loss": 0.9878, + "step": 32890 + }, + { + "epoch": 0.98, + "learning_rate": 1.9224239504370689e-07, + "loss": 1.0223, + "step": 32895 + }, + { + "epoch": 0.98, + "learning_rate": 1.8902601990650192e-07, + "loss": 1.0479, + "step": 32900 + }, + { + "epoch": 0.98, + "learning_rate": 1.8583675309681304e-07, + "loss": 1.0421, + "step": 32905 + }, + { + "epoch": 0.98, + "learning_rate": 1.8267459548081402e-07, + "loss": 1.0738, + "step": 32910 + }, + { + "epoch": 0.98, + "learning_rate": 1.7953954791734007e-07, + "loss": 1.0262, + "step": 32915 + }, + { + "epoch": 0.98, + "learning_rate": 1.7643161125787677e-07, + "loss": 1.1363, + "step": 32920 + }, + { + "epoch": 0.98, + "learning_rate": 1.7335078634650447e-07, + "loss": 0.9707, + "step": 32925 + }, + { + "epoch": 0.98, + "learning_rate": 1.7029707401997606e-07, + "loss": 1.0555, + "step": 32930 + }, + { + "epoch": 0.98, + "learning_rate": 1.6727047510768369e-07, + "loss": 1.0597, + "step": 32935 + }, + { + "epoch": 0.98, + "learning_rate": 1.642709904316142e-07, + "loss": 1.0569, + "step": 32940 + }, + { + "epoch": 0.98, + "learning_rate": 1.612986208064382e-07, + "loss": 1.0853, + "step": 32945 + }, + { + "epoch": 0.98, + "learning_rate": 1.5835336703943216e-07, + "loss": 1.1017, + "step": 32950 + }, + { + "epoch": 0.98, + "learning_rate": 1.5543522993052284e-07, + "loss": 1.0974, + "step": 32955 + }, + { + "epoch": 0.98, + "learning_rate": 1.5254421027225408e-07, + "loss": 0.9823, + "step": 32960 + }, + { + "epoch": 0.98, + "learning_rate": 1.496803088498422e-07, + "loss": 1.0301, + "step": 32965 + }, + { + "epoch": 0.98, + "learning_rate": 1.4684352644108723e-07, + "loss": 1.0211, + "step": 32970 + }, + { + "epoch": 0.98, + "learning_rate": 1.4403386381646177e-07, + "loss": 1.0065, + "step": 32975 + }, + { + "epoch": 0.98, + "learning_rate": 1.412513217390554e-07, + "loss": 1.0621, + "step": 32980 + }, + { + "epoch": 0.98, + "learning_rate": 1.3849590096460806e-07, + "loss": 1.1379, + "step": 32985 + }, + { + "epoch": 0.99, + "learning_rate": 1.3576760224147667e-07, + "loss": 1.0065, + "step": 32990 + }, + { + "epoch": 0.99, + "learning_rate": 1.3306642631064625e-07, + "loss": 1.0494, + "step": 32995 + }, + { + "epoch": 0.99, + "learning_rate": 1.303923739057633e-07, + "loss": 1.0373, + "step": 33000 + }, + { + "epoch": 0.99, + "learning_rate": 1.2774544575309134e-07, + "loss": 1.0533, + "step": 33005 + }, + { + "epoch": 0.99, + "learning_rate": 1.2512564257152193e-07, + "loss": 0.953, + "step": 33010 + }, + { + "epoch": 0.99, + "learning_rate": 1.2253296507257482e-07, + "loss": 0.9525, + "step": 33015 + }, + { + "epoch": 0.99, + "learning_rate": 1.199674139604312e-07, + "loss": 1.0577, + "step": 33020 + }, + { + "epoch": 0.99, + "learning_rate": 1.1742898993187812e-07, + "loss": 1.1482, + "step": 33025 + }, + { + "epoch": 0.99, + "learning_rate": 1.1491769367634187e-07, + "loss": 1.1678, + "step": 33030 + }, + { + "epoch": 0.99, + "learning_rate": 1.1243352587588796e-07, + "loss": 1.0416, + "step": 33035 + }, + { + "epoch": 0.99, + "learning_rate": 1.0997648720519893e-07, + "loss": 1.0466, + "step": 33040 + }, + { + "epoch": 0.99, + "learning_rate": 1.0754657833159653e-07, + "loss": 1.0364, + "step": 33045 + }, + { + "epoch": 0.99, + "learning_rate": 1.0514379991504176e-07, + "loss": 0.9948, + "step": 33050 + }, + { + "epoch": 0.99, + "learning_rate": 1.0276815260813478e-07, + "loss": 1.0748, + "step": 33055 + }, + { + "epoch": 0.99, + "learning_rate": 1.004196370560706e-07, + "loss": 1.084, + "step": 33060 + }, + { + "epoch": 0.99, + "learning_rate": 9.809825389670569e-08, + "loss": 1.0359, + "step": 33065 + }, + { + "epoch": 0.99, + "learning_rate": 9.580400376052456e-08, + "loss": 1.0372, + "step": 33070 + }, + { + "epoch": 0.99, + "learning_rate": 9.353688727063992e-08, + "loss": 1.0734, + "step": 33075 + }, + { + "epoch": 0.99, + "learning_rate": 9.129690504279254e-08, + "loss": 1.0507, + "step": 33080 + }, + { + "epoch": 0.99, + "learning_rate": 8.908405768534022e-08, + "loss": 1.0421, + "step": 33085 + }, + { + "epoch": 0.99, + "learning_rate": 8.689834579929112e-08, + "loss": 1.1419, + "step": 33090 + }, + { + "epoch": 0.99, + "learning_rate": 8.473976997829259e-08, + "loss": 1.0433, + "step": 33095 + }, + { + "epoch": 0.99, + "learning_rate": 8.260833080858677e-08, + "loss": 1.0026, + "step": 33100 + }, + { + "epoch": 0.99, + "learning_rate": 8.050402886907726e-08, + "loss": 1.0695, + "step": 33105 + }, + { + "epoch": 0.99, + "learning_rate": 7.842686473128469e-08, + "loss": 1.0675, + "step": 33110 + }, + { + "epoch": 0.99, + "learning_rate": 7.637683895934666e-08, + "loss": 1.0791, + "step": 33115 + }, + { + "epoch": 0.99, + "learning_rate": 7.435395211006225e-08, + "loss": 1.024, + "step": 33120 + }, + { + "epoch": 0.99, + "learning_rate": 7.235820473282529e-08, + "loss": 1.0324, + "step": 33125 + }, + { + "epoch": 0.99, + "learning_rate": 7.038959736969108e-08, + "loss": 1.0327, + "step": 33130 + }, + { + "epoch": 0.99, + "learning_rate": 6.844813055530974e-08, + "loss": 0.9654, + "step": 33135 + }, + { + "epoch": 0.99, + "learning_rate": 6.653380481698168e-08, + "loss": 1.0353, + "step": 33140 + }, + { + "epoch": 0.99, + "learning_rate": 6.464662067464656e-08, + "loss": 0.9622, + "step": 33145 + }, + { + "epoch": 0.99, + "learning_rate": 6.278657864083881e-08, + "loss": 1.0401, + "step": 33150 + }, + { + "epoch": 0.99, + "learning_rate": 6.095367922074325e-08, + "loss": 1.0605, + "step": 33155 + }, + { + "epoch": 0.99, + "learning_rate": 5.9147922912161644e-08, + "loss": 0.999, + "step": 33160 + }, + { + "epoch": 0.99, + "learning_rate": 5.736931020555725e-08, + "loss": 0.9496, + "step": 33165 + }, + { + "epoch": 0.99, + "learning_rate": 5.561784158397698e-08, + "loss": 1.0708, + "step": 33170 + }, + { + "epoch": 0.99, + "learning_rate": 5.389351752310701e-08, + "loss": 1.0351, + "step": 33175 + }, + { + "epoch": 0.99, + "learning_rate": 5.21963384912838e-08, + "loss": 1.0477, + "step": 33180 + }, + { + "epoch": 0.99, + "learning_rate": 5.052630494944977e-08, + "loss": 0.9883, + "step": 33185 + }, + { + "epoch": 0.99, + "learning_rate": 4.888341735117541e-08, + "loss": 1.0154, + "step": 33190 + }, + { + "epoch": 0.99, + "learning_rate": 4.726767614267047e-08, + "loss": 0.9743, + "step": 33195 + }, + { + "epoch": 0.99, + "learning_rate": 4.5679081762761697e-08, + "loss": 1.0818, + "step": 33200 + }, + { + "epoch": 0.99, + "learning_rate": 4.411763464290397e-08, + "loss": 0.9783, + "step": 33205 + }, + { + "epoch": 0.99, + "learning_rate": 4.258333520718028e-08, + "loss": 1.0376, + "step": 33210 + }, + { + "epoch": 0.99, + "learning_rate": 4.107618387231282e-08, + "loss": 1.0127, + "step": 33215 + }, + { + "epoch": 0.99, + "learning_rate": 3.959618104762974e-08, + "loss": 1.0251, + "step": 33220 + }, + { + "epoch": 0.99, + "learning_rate": 3.8143327135087284e-08, + "loss": 1.0233, + "step": 33225 + }, + { + "epoch": 0.99, + "learning_rate": 3.6717622529292006e-08, + "loss": 0.9605, + "step": 33230 + }, + { + "epoch": 0.99, + "learning_rate": 3.53190676174453e-08, + "loss": 1.0516, + "step": 33235 + }, + { + "epoch": 0.99, + "learning_rate": 3.394766277940997e-08, + "loss": 0.9519, + "step": 33240 + }, + { + "epoch": 0.99, + "learning_rate": 3.260340838763254e-08, + "loss": 0.9851, + "step": 33245 + }, + { + "epoch": 0.99, + "learning_rate": 3.128630480722095e-08, + "loss": 1.0326, + "step": 33250 + }, + { + "epoch": 0.99, + "learning_rate": 2.999635239588905e-08, + "loss": 0.9407, + "step": 33255 + }, + { + "epoch": 0.99, + "learning_rate": 2.873355150400103e-08, + "loss": 1.0578, + "step": 33260 + }, + { + "epoch": 0.99, + "learning_rate": 2.7497902474504788e-08, + "loss": 0.9887, + "step": 33265 + }, + { + "epoch": 0.99, + "learning_rate": 2.6289405643009635e-08, + "loss": 0.9677, + "step": 33270 + }, + { + "epoch": 0.99, + "learning_rate": 2.5108061337741906e-08, + "loss": 1.0865, + "step": 33275 + }, + { + "epoch": 0.99, + "learning_rate": 2.3953869879544956e-08, + "loss": 1.0035, + "step": 33280 + }, + { + "epoch": 0.99, + "learning_rate": 2.2826831581901353e-08, + "loss": 1.0245, + "step": 33285 + }, + { + "epoch": 0.99, + "learning_rate": 2.172694675089959e-08, + "loss": 1.0148, + "step": 33290 + }, + { + "epoch": 0.99, + "learning_rate": 2.0654215685278477e-08, + "loss": 1.0956, + "step": 33295 + }, + { + "epoch": 0.99, + "learning_rate": 1.9608638676371638e-08, + "loss": 1.0079, + "step": 33300 + }, + { + "epoch": 0.99, + "learning_rate": 1.8590216008174122e-08, + "loss": 1.0518, + "step": 33305 + }, + { + "epoch": 0.99, + "learning_rate": 1.7598947957264687e-08, + "loss": 1.0643, + "step": 33310 + }, + { + "epoch": 0.99, + "learning_rate": 1.6634834792894628e-08, + "loss": 1.0189, + "step": 33315 + }, + { + "epoch": 0.99, + "learning_rate": 1.5697876776887832e-08, + "loss": 1.1239, + "step": 33320 + }, + { + "epoch": 1.0, + "learning_rate": 1.4788074163729626e-08, + "loss": 1.0433, + "step": 33325 + }, + { + "epoch": 1.0, + "learning_rate": 1.3905427200522348e-08, + "loss": 1.0062, + "step": 33330 + }, + { + "epoch": 1.0, + "learning_rate": 1.3049936126985352e-08, + "loss": 1.0362, + "step": 33335 + }, + { + "epoch": 1.0, + "learning_rate": 1.2221601175466113e-08, + "loss": 0.9938, + "step": 33340 + }, + { + "epoch": 1.0, + "learning_rate": 1.1420422570951327e-08, + "loss": 1.0636, + "step": 33345 + }, + { + "epoch": 1.0, + "learning_rate": 1.0646400531022504e-08, + "loss": 1.1497, + "step": 33350 + }, + { + "epoch": 1.0, + "learning_rate": 9.899535265900373e-09, + "loss": 1.0251, + "step": 33355 + }, + { + "epoch": 1.0, + "learning_rate": 9.179826978444884e-09, + "loss": 1.0716, + "step": 33360 + }, + { + "epoch": 1.0, + "learning_rate": 8.4872758641108e-09, + "loss": 0.9776, + "step": 33365 + }, + { + "epoch": 1.0, + "learning_rate": 7.821882111003209e-09, + "loss": 0.9675, + "step": 33370 + }, + { + "epoch": 1.0, + "learning_rate": 7.183645899844216e-09, + "loss": 1.0777, + "step": 33375 + }, + { + "epoch": 1.0, + "learning_rate": 6.5725674039618376e-09, + "loss": 1.0456, + "step": 33380 + }, + { + "epoch": 1.0, + "learning_rate": 5.988646789334418e-09, + "loss": 0.9625, + "step": 33385 + }, + { + "epoch": 1.0, + "learning_rate": 5.431884214557315e-09, + "loss": 1.0344, + "step": 33390 + }, + { + "epoch": 1.0, + "learning_rate": 4.902279830831802e-09, + "loss": 1.0507, + "step": 33395 + }, + { + "epoch": 1.0, + "learning_rate": 4.399833781998375e-09, + "loss": 1.0283, + "step": 33400 + }, + { + "epoch": 1.0, + "learning_rate": 3.9245462045367495e-09, + "loss": 0.9996, + "step": 33405 + }, + { + "epoch": 1.0, + "learning_rate": 3.4764172275103536e-09, + "loss": 1.0546, + "step": 33410 + }, + { + "epoch": 1.0, + "learning_rate": 3.0554469726440383e-09, + "loss": 1.0001, + "step": 33415 + }, + { + "epoch": 1.0, + "learning_rate": 2.661635554268571e-09, + "loss": 1.0731, + "step": 33420 + }, + { + "epoch": 1.0, + "learning_rate": 2.2949830793428384e-09, + "loss": 1.0345, + "step": 33425 + }, + { + "epoch": 1.0, + "learning_rate": 1.955489647453845e-09, + "loss": 1.0085, + "step": 33430 + }, + { + "epoch": 1.0, + "learning_rate": 1.6431553507834096e-09, + "loss": 0.9439, + "step": 33435 + }, + { + "epoch": 1.0, + "learning_rate": 1.3579802741858772e-09, + "loss": 1.0171, + "step": 33440 + }, + { + "epoch": 1.0, + "learning_rate": 1.099964495110406e-09, + "loss": 1.0582, + "step": 33445 + }, + { + "epoch": 1.0, + "learning_rate": 8.691080836231713e-10, + "loss": 1.0641, + "step": 33450 + }, + { + "epoch": 1.0, + "learning_rate": 6.654111024295695e-10, + "loss": 0.9505, + "step": 33455 + }, + { + "epoch": 1.0, + "learning_rate": 4.888736068520139e-10, + "loss": 1.0263, + "step": 33460 + }, + { + "epoch": 1.0, + "learning_rate": 3.3949564482993467e-10, + "loss": 0.9571, + "step": 33465 + }, + { + "epoch": 1.0, + "learning_rate": 2.1727725695308566e-10, + "loss": 0.9431, + "step": 33470 + }, + { + "epoch": 1.0, + "learning_rate": 1.2221847639493078e-10, + "loss": 1.0255, + "step": 33475 + }, + { + "epoch": 1.0, + "learning_rate": 5.431932899035985e-11, + "loss": 1.0394, + "step": 33480 + }, + { + "epoch": 1.0, + "learning_rate": 1.3579833169075073e-11, + "loss": 1.0162, + "step": 33485 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9852, + "step": 33490 + }, + { + "epoch": 1.0, + "eval_loss": 1.0256932973861694, + "eval_runtime": 2613.5895, + "eval_samples_per_second": 11.317, + "eval_steps_per_second": 1.415, + "step": 33490 + }, + { + "epoch": 1.0, + "step": 33490, + "total_flos": 6.646586827132109e+16, + "train_loss": 1.0557984271025294, + "train_runtime": 103963.1806, + "train_samples_per_second": 2.577, + "train_steps_per_second": 0.322 + } + ], + "logging_steps": 5, + "max_steps": 33490, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 6.646586827132109e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}