{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.169274992015332, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.389776357827476e-07, "loss": 11.1023, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.2779552715654952e-06, "loss": 11.0812, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.916932907348243e-06, "loss": 11.1009, "step": 30 }, { "epoch": 0.03, "learning_rate": 2.5559105431309904e-06, "loss": 11.1185, "step": 40 }, { "epoch": 0.03, "learning_rate": 3.194888178913738e-06, "loss": 11.0872, "step": 50 }, { "epoch": 0.04, "learning_rate": 3.833865814696486e-06, "loss": 11.0512, "step": 60 }, { "epoch": 0.04, "learning_rate": 4.472843450479234e-06, "loss": 11.0023, "step": 70 }, { "epoch": 0.05, "learning_rate": 5.111821086261981e-06, "loss": 10.9751, "step": 80 }, { "epoch": 0.06, "learning_rate": 5.750798722044729e-06, "loss": 10.9535, "step": 90 }, { "epoch": 0.06, "learning_rate": 6.389776357827476e-06, "loss": 10.9472, "step": 100 }, { "epoch": 0.07, "learning_rate": 7.028753993610224e-06, "loss": 10.922, "step": 110 }, { "epoch": 0.08, "learning_rate": 7.667731629392972e-06, "loss": 10.8773, "step": 120 }, { "epoch": 0.08, "learning_rate": 8.306709265175718e-06, "loss": 10.8485, "step": 130 }, { "epoch": 0.09, "learning_rate": 8.945686900958468e-06, "loss": 10.7794, "step": 140 }, { "epoch": 0.1, "learning_rate": 9.584664536741214e-06, "loss": 10.7876, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.0223642172523962e-05, "loss": 10.7846, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.0862619808306708e-05, "loss": 10.7546, "step": 170 }, { "epoch": 0.11, "learning_rate": 1.1501597444089457e-05, "loss": 10.6649, "step": 180 }, { "epoch": 0.12, "learning_rate": 1.2140575079872205e-05, "loss": 10.6765, "step": 190 }, { "epoch": 0.13, "learning_rate": 1.2779552715654951e-05, "loss": 10.6211, "step": 200 }, { "epoch": 0.13, "learning_rate": 1.3418530351437701e-05, "loss": 10.6008, "step": 210 }, { "epoch": 0.14, "learning_rate": 1.4057507987220449e-05, "loss": 10.5362, "step": 220 }, { "epoch": 0.15, "learning_rate": 1.4696485623003195e-05, "loss": 10.4825, "step": 230 }, { "epoch": 0.15, "learning_rate": 1.5335463258785944e-05, "loss": 10.449, "step": 240 }, { "epoch": 0.16, "learning_rate": 1.597444089456869e-05, "loss": 10.3695, "step": 250 }, { "epoch": 0.17, "learning_rate": 1.6613418530351437e-05, "loss": 10.3128, "step": 260 }, { "epoch": 0.17, "learning_rate": 1.7252396166134186e-05, "loss": 10.221, "step": 270 }, { "epoch": 0.18, "learning_rate": 1.7891373801916936e-05, "loss": 10.1125, "step": 280 }, { "epoch": 0.19, "learning_rate": 1.853035143769968e-05, "loss": 9.9933, "step": 290 }, { "epoch": 0.19, "learning_rate": 1.9169329073482428e-05, "loss": 9.7903, "step": 300 }, { "epoch": 0.2, "learning_rate": 1.9808306709265177e-05, "loss": 9.5898, "step": 310 }, { "epoch": 0.2, "learning_rate": 2.0447284345047924e-05, "loss": 9.4338, "step": 320 }, { "epoch": 0.21, "learning_rate": 2.1086261980830673e-05, "loss": 9.2167, "step": 330 }, { "epoch": 0.22, "learning_rate": 2.1725239616613416e-05, "loss": 8.9299, "step": 340 }, { "epoch": 0.22, "learning_rate": 2.2364217252396165e-05, "loss": 8.5451, "step": 350 }, { "epoch": 0.23, "learning_rate": 2.3003194888178915e-05, "loss": 8.377, "step": 360 }, { "epoch": 0.24, "learning_rate": 2.364217252396166e-05, "loss": 8.1605, "step": 370 }, { "epoch": 0.24, "learning_rate": 2.428115015974441e-05, "loss": 7.8136, "step": 380 }, { "epoch": 0.25, "learning_rate": 2.4920127795527157e-05, "loss": 7.4985, "step": 390 }, { "epoch": 0.26, "learning_rate": 2.5559105431309903e-05, "loss": 7.1695, "step": 400 }, { "epoch": 0.26, "learning_rate": 2.6198083067092652e-05, "loss": 6.9816, "step": 410 }, { "epoch": 0.27, "learning_rate": 2.6837060702875402e-05, "loss": 6.4929, "step": 420 }, { "epoch": 0.27, "learning_rate": 2.7476038338658148e-05, "loss": 6.3582, "step": 430 }, { "epoch": 0.28, "learning_rate": 2.8115015974440897e-05, "loss": 6.1947, "step": 440 }, { "epoch": 0.29, "learning_rate": 2.875399361022364e-05, "loss": 5.8765, "step": 450 }, { "epoch": 0.29, "learning_rate": 2.939297124600639e-05, "loss": 5.6917, "step": 460 }, { "epoch": 0.3, "learning_rate": 3.003194888178914e-05, "loss": 5.5349, "step": 470 }, { "epoch": 0.31, "learning_rate": 3.067092651757189e-05, "loss": 5.4405, "step": 480 }, { "epoch": 0.31, "learning_rate": 3.130990415335463e-05, "loss": 5.2719, "step": 490 }, { "epoch": 0.32, "learning_rate": 3.194888178913738e-05, "loss": 4.8337, "step": 500 }, { "epoch": 0.33, "learning_rate": 3.258785942492013e-05, "loss": 4.8143, "step": 510 }, { "epoch": 0.33, "learning_rate": 3.322683706070287e-05, "loss": 4.5746, "step": 520 }, { "epoch": 0.34, "learning_rate": 3.386581469648562e-05, "loss": 4.348, "step": 530 }, { "epoch": 0.34, "learning_rate": 3.450479233226837e-05, "loss": 4.1607, "step": 540 }, { "epoch": 0.35, "learning_rate": 3.5143769968051115e-05, "loss": 4.0622, "step": 550 }, { "epoch": 0.36, "learning_rate": 3.578274760383387e-05, "loss": 3.9419, "step": 560 }, { "epoch": 0.36, "learning_rate": 3.6421725239616614e-05, "loss": 3.8112, "step": 570 }, { "epoch": 0.37, "learning_rate": 3.706070287539936e-05, "loss": 3.6399, "step": 580 }, { "epoch": 0.38, "learning_rate": 3.769968051118211e-05, "loss": 3.5703, "step": 590 }, { "epoch": 0.38, "learning_rate": 3.8338658146964856e-05, "loss": 3.4017, "step": 600 }, { "epoch": 0.39, "learning_rate": 3.8977635782747605e-05, "loss": 3.2633, "step": 610 }, { "epoch": 0.4, "learning_rate": 3.9616613418530355e-05, "loss": 3.0287, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.02555910543131e-05, "loss": 3.0033, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.089456869009585e-05, "loss": 3.0306, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.15335463258786e-05, "loss": 2.8719, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.2172523961661346e-05, "loss": 2.6886, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.281150159744409e-05, "loss": 2.5607, "step": 670 }, { "epoch": 0.43, "learning_rate": 4.345047923322683e-05, "loss": 2.4779, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.408945686900959e-05, "loss": 2.4884, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.472843450479233e-05, "loss": 2.3248, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.536741214057508e-05, "loss": 2.3108, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.600638977635783e-05, "loss": 2.2866, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.664536741214057e-05, "loss": 2.1556, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.728434504792332e-05, "loss": 2.0662, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.792332268370607e-05, "loss": 2.1048, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.856230031948882e-05, "loss": 1.9392, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.9201277955271564e-05, "loss": 1.9819, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.984025559105431e-05, "loss": 1.9099, "step": 780 }, { "epoch": 0.5, "learning_rate": 5.047923322683706e-05, "loss": 1.8879, "step": 790 }, { "epoch": 0.51, "learning_rate": 5.1118210862619806e-05, "loss": 1.8877, "step": 800 }, { "epoch": 0.52, "learning_rate": 5.175718849840256e-05, "loss": 1.8067, "step": 810 }, { "epoch": 0.52, "learning_rate": 5.2396166134185305e-05, "loss": 1.74, "step": 820 }, { "epoch": 0.53, "learning_rate": 5.3035143769968054e-05, "loss": 1.705, "step": 830 }, { "epoch": 0.54, "learning_rate": 5.3674121405750804e-05, "loss": 1.5959, "step": 840 }, { "epoch": 0.54, "learning_rate": 5.4313099041533546e-05, "loss": 1.6158, "step": 850 }, { "epoch": 0.55, "learning_rate": 5.4952076677316296e-05, "loss": 1.6574, "step": 860 }, { "epoch": 0.56, "learning_rate": 5.559105431309904e-05, "loss": 1.5606, "step": 870 }, { "epoch": 0.56, "learning_rate": 5.6230031948881795e-05, "loss": 1.5595, "step": 880 }, { "epoch": 0.57, "learning_rate": 5.686900958466454e-05, "loss": 1.6025, "step": 890 }, { "epoch": 0.57, "learning_rate": 5.750798722044728e-05, "loss": 1.6317, "step": 900 }, { "epoch": 0.58, "learning_rate": 5.814696485623004e-05, "loss": 1.4737, "step": 910 }, { "epoch": 0.59, "learning_rate": 5.878594249201278e-05, "loss": 1.5243, "step": 920 }, { "epoch": 0.59, "learning_rate": 5.942492012779553e-05, "loss": 1.4923, "step": 930 }, { "epoch": 0.6, "learning_rate": 6.006389776357828e-05, "loss": 1.3972, "step": 940 }, { "epoch": 0.61, "learning_rate": 6.070287539936102e-05, "loss": 1.4433, "step": 950 }, { "epoch": 0.61, "learning_rate": 6.134185303514378e-05, "loss": 1.4677, "step": 960 }, { "epoch": 0.62, "learning_rate": 6.198083067092652e-05, "loss": 1.3918, "step": 970 }, { "epoch": 0.63, "learning_rate": 6.261980830670926e-05, "loss": 1.2902, "step": 980 }, { "epoch": 0.63, "learning_rate": 6.325878594249202e-05, "loss": 1.3005, "step": 990 }, { "epoch": 0.64, "learning_rate": 6.389776357827476e-05, "loss": 1.2544, "step": 1000 }, { "epoch": 0.65, "learning_rate": 6.45367412140575e-05, "loss": 1.2713, "step": 1010 }, { "epoch": 0.65, "learning_rate": 6.517571884984026e-05, "loss": 1.3027, "step": 1020 }, { "epoch": 0.66, "learning_rate": 6.5814696485623e-05, "loss": 1.2434, "step": 1030 }, { "epoch": 0.66, "learning_rate": 6.645367412140575e-05, "loss": 1.2283, "step": 1040 }, { "epoch": 0.67, "learning_rate": 6.70926517571885e-05, "loss": 1.2587, "step": 1050 }, { "epoch": 0.68, "learning_rate": 6.773162939297125e-05, "loss": 1.1828, "step": 1060 }, { "epoch": 0.68, "learning_rate": 6.8370607028754e-05, "loss": 1.1369, "step": 1070 }, { "epoch": 0.69, "learning_rate": 6.900958466453674e-05, "loss": 1.1307, "step": 1080 }, { "epoch": 0.7, "learning_rate": 6.964856230031949e-05, "loss": 1.1675, "step": 1090 }, { "epoch": 0.7, "learning_rate": 7.028753993610223e-05, "loss": 1.1872, "step": 1100 }, { "epoch": 0.71, "learning_rate": 7.092651757188499e-05, "loss": 1.1707, "step": 1110 }, { "epoch": 0.72, "learning_rate": 7.156549520766774e-05, "loss": 1.19, "step": 1120 }, { "epoch": 0.72, "learning_rate": 7.220447284345049e-05, "loss": 1.1268, "step": 1130 }, { "epoch": 0.73, "learning_rate": 7.284345047923323e-05, "loss": 1.0189, "step": 1140 }, { "epoch": 0.73, "learning_rate": 7.348242811501597e-05, "loss": 1.0764, "step": 1150 }, { "epoch": 0.74, "learning_rate": 7.412140575079871e-05, "loss": 1.073, "step": 1160 }, { "epoch": 0.75, "learning_rate": 7.476038338658147e-05, "loss": 0.9837, "step": 1170 }, { "epoch": 0.75, "learning_rate": 7.539936102236423e-05, "loss": 1.014, "step": 1180 }, { "epoch": 0.76, "learning_rate": 7.603833865814697e-05, "loss": 1.0643, "step": 1190 }, { "epoch": 0.77, "learning_rate": 7.667731629392971e-05, "loss": 1.0439, "step": 1200 }, { "epoch": 0.77, "learning_rate": 7.731629392971245e-05, "loss": 1.0419, "step": 1210 }, { "epoch": 0.78, "learning_rate": 7.795527156549521e-05, "loss": 1.01, "step": 1220 }, { "epoch": 0.79, "learning_rate": 7.859424920127795e-05, "loss": 0.9829, "step": 1230 }, { "epoch": 0.79, "learning_rate": 7.923322683706071e-05, "loss": 1.0113, "step": 1240 }, { "epoch": 0.8, "learning_rate": 7.987220447284345e-05, "loss": 0.9818, "step": 1250 }, { "epoch": 0.8, "learning_rate": 8.05111821086262e-05, "loss": 0.9299, "step": 1260 }, { "epoch": 0.81, "learning_rate": 8.115015974440895e-05, "loss": 0.9891, "step": 1270 }, { "epoch": 0.82, "learning_rate": 8.17891373801917e-05, "loss": 0.9429, "step": 1280 }, { "epoch": 0.82, "learning_rate": 8.242811501597444e-05, "loss": 0.8778, "step": 1290 }, { "epoch": 0.83, "learning_rate": 8.30670926517572e-05, "loss": 0.8918, "step": 1300 }, { "epoch": 0.84, "learning_rate": 8.370607028753994e-05, "loss": 0.8873, "step": 1310 }, { "epoch": 0.84, "learning_rate": 8.434504792332269e-05, "loss": 0.9026, "step": 1320 }, { "epoch": 0.85, "learning_rate": 8.498402555910544e-05, "loss": 0.9084, "step": 1330 }, { "epoch": 0.86, "learning_rate": 8.562300319488818e-05, "loss": 0.9513, "step": 1340 }, { "epoch": 0.86, "learning_rate": 8.626198083067092e-05, "loss": 0.8507, "step": 1350 }, { "epoch": 0.87, "learning_rate": 8.690095846645366e-05, "loss": 0.9136, "step": 1360 }, { "epoch": 0.88, "learning_rate": 8.753993610223643e-05, "loss": 0.8661, "step": 1370 }, { "epoch": 0.88, "learning_rate": 8.817891373801918e-05, "loss": 0.8559, "step": 1380 }, { "epoch": 0.89, "learning_rate": 8.881789137380192e-05, "loss": 0.8322, "step": 1390 }, { "epoch": 0.89, "learning_rate": 8.945686900958466e-05, "loss": 0.829, "step": 1400 }, { "epoch": 0.9, "learning_rate": 9.00958466453674e-05, "loss": 0.9206, "step": 1410 }, { "epoch": 0.91, "learning_rate": 9.073482428115016e-05, "loss": 0.788, "step": 1420 }, { "epoch": 0.91, "learning_rate": 9.137380191693292e-05, "loss": 0.8313, "step": 1430 }, { "epoch": 0.92, "learning_rate": 9.201277955271566e-05, "loss": 0.8072, "step": 1440 }, { "epoch": 0.93, "learning_rate": 9.26517571884984e-05, "loss": 0.8115, "step": 1450 }, { "epoch": 0.93, "learning_rate": 9.329073482428115e-05, "loss": 0.7849, "step": 1460 }, { "epoch": 0.94, "learning_rate": 9.39297124600639e-05, "loss": 0.8003, "step": 1470 }, { "epoch": 0.95, "learning_rate": 9.456869009584664e-05, "loss": 0.7835, "step": 1480 }, { "epoch": 0.95, "learning_rate": 9.52076677316294e-05, "loss": 0.8598, "step": 1490 }, { "epoch": 0.96, "learning_rate": 9.584664536741214e-05, "loss": 0.746, "step": 1500 }, { "epoch": 0.96, "learning_rate": 9.648562300319489e-05, "loss": 0.8485, "step": 1510 }, { "epoch": 0.97, "learning_rate": 9.712460063897764e-05, "loss": 0.7976, "step": 1520 }, { "epoch": 0.98, "learning_rate": 9.776357827476038e-05, "loss": 0.7023, "step": 1530 }, { "epoch": 0.98, "learning_rate": 9.840255591054313e-05, "loss": 0.7586, "step": 1540 }, { "epoch": 0.99, "learning_rate": 9.904153354632587e-05, "loss": 0.771, "step": 1550 }, { "epoch": 1.0, "learning_rate": 9.968051118210863e-05, "loss": 0.7551, "step": 1560 }, { "epoch": 1.0, "learning_rate": 0.00010031948881789138, "loss": 0.7311, "step": 1570 }, { "epoch": 1.01, "learning_rate": 0.00010095846645367413, "loss": 0.7774, "step": 1580 }, { "epoch": 1.02, "learning_rate": 0.00010159744408945687, "loss": 0.7289, "step": 1590 }, { "epoch": 1.02, "learning_rate": 0.00010223642172523961, "loss": 0.7326, "step": 1600 }, { "epoch": 1.03, "learning_rate": 0.00010287539936102237, "loss": 0.7089, "step": 1610 }, { "epoch": 1.04, "learning_rate": 0.00010351437699680512, "loss": 0.6825, "step": 1620 }, { "epoch": 1.04, "learning_rate": 0.00010415335463258787, "loss": 0.7002, "step": 1630 }, { "epoch": 1.05, "learning_rate": 0.00010479233226837061, "loss": 0.6959, "step": 1640 }, { "epoch": 1.05, "learning_rate": 0.00010543130990415335, "loss": 0.713, "step": 1650 }, { "epoch": 1.06, "learning_rate": 0.00010607028753993611, "loss": 0.6884, "step": 1660 }, { "epoch": 1.07, "learning_rate": 0.00010670926517571885, "loss": 0.6914, "step": 1670 }, { "epoch": 1.07, "learning_rate": 0.00010734824281150161, "loss": 0.6628, "step": 1680 }, { "epoch": 1.08, "learning_rate": 0.00010798722044728435, "loss": 0.7524, "step": 1690 }, { "epoch": 1.09, "learning_rate": 0.00010862619808306709, "loss": 0.6824, "step": 1700 }, { "epoch": 1.09, "learning_rate": 0.00010926517571884985, "loss": 0.6922, "step": 1710 }, { "epoch": 1.1, "learning_rate": 0.00010990415335463259, "loss": 0.6298, "step": 1720 }, { "epoch": 1.11, "learning_rate": 0.00011054313099041533, "loss": 0.6545, "step": 1730 }, { "epoch": 1.11, "learning_rate": 0.00011118210862619808, "loss": 0.6842, "step": 1740 }, { "epoch": 1.12, "learning_rate": 0.00011182108626198083, "loss": 0.6909, "step": 1750 }, { "epoch": 1.12, "learning_rate": 0.00011246006389776359, "loss": 0.687, "step": 1760 }, { "epoch": 1.13, "learning_rate": 0.00011309904153354633, "loss": 0.6096, "step": 1770 }, { "epoch": 1.14, "learning_rate": 0.00011373801916932908, "loss": 0.6599, "step": 1780 }, { "epoch": 1.14, "learning_rate": 0.00011437699680511182, "loss": 0.7088, "step": 1790 }, { "epoch": 1.15, "learning_rate": 0.00011501597444089456, "loss": 0.7042, "step": 1800 }, { "epoch": 1.16, "learning_rate": 0.00011565495207667733, "loss": 0.6482, "step": 1810 }, { "epoch": 1.16, "learning_rate": 0.00011629392971246007, "loss": 0.6221, "step": 1820 }, { "epoch": 1.17, "learning_rate": 0.00011693290734824282, "loss": 0.6134, "step": 1830 }, { "epoch": 1.18, "learning_rate": 0.00011757188498402556, "loss": 0.5959, "step": 1840 }, { "epoch": 1.18, "learning_rate": 0.0001182108626198083, "loss": 0.6201, "step": 1850 }, { "epoch": 1.19, "learning_rate": 0.00011884984025559106, "loss": 0.6298, "step": 1860 }, { "epoch": 1.19, "learning_rate": 0.00011948881789137381, "loss": 0.5908, "step": 1870 }, { "epoch": 1.2, "learning_rate": 0.00012012779552715656, "loss": 0.5862, "step": 1880 }, { "epoch": 1.21, "learning_rate": 0.0001207667731629393, "loss": 0.6449, "step": 1890 }, { "epoch": 1.21, "learning_rate": 0.00012140575079872204, "loss": 0.5922, "step": 1900 }, { "epoch": 1.22, "learning_rate": 0.0001220447284345048, "loss": 0.604, "step": 1910 }, { "epoch": 1.23, "learning_rate": 0.00012268370607028756, "loss": 0.6396, "step": 1920 }, { "epoch": 1.23, "learning_rate": 0.00012332268370607028, "loss": 0.6068, "step": 1930 }, { "epoch": 1.24, "learning_rate": 0.00012396166134185304, "loss": 0.6454, "step": 1940 }, { "epoch": 1.25, "learning_rate": 0.00012460063897763577, "loss": 0.5194, "step": 1950 }, { "epoch": 1.25, "learning_rate": 0.00012523961661341853, "loss": 0.5865, "step": 1960 }, { "epoch": 1.26, "learning_rate": 0.00012587859424920128, "loss": 0.5784, "step": 1970 }, { "epoch": 1.27, "learning_rate": 0.00012651757188498404, "loss": 0.559, "step": 1980 }, { "epoch": 1.27, "learning_rate": 0.00012715654952076677, "loss": 0.5501, "step": 1990 }, { "epoch": 1.28, "learning_rate": 0.00012779552715654952, "loss": 0.6316, "step": 2000 }, { "epoch": 1.28, "learning_rate": 0.00012843450479233225, "loss": 0.593, "step": 2010 }, { "epoch": 1.29, "learning_rate": 0.000129073482428115, "loss": 0.5946, "step": 2020 }, { "epoch": 1.3, "learning_rate": 0.00012971246006389777, "loss": 0.5813, "step": 2030 }, { "epoch": 1.3, "learning_rate": 0.00013035143769968052, "loss": 0.5541, "step": 2040 }, { "epoch": 1.31, "learning_rate": 0.00013099041533546328, "loss": 0.5761, "step": 2050 }, { "epoch": 1.32, "learning_rate": 0.000131629392971246, "loss": 0.557, "step": 2060 }, { "epoch": 1.32, "learning_rate": 0.00013226837060702876, "loss": 0.5663, "step": 2070 }, { "epoch": 1.33, "learning_rate": 0.0001329073482428115, "loss": 0.5496, "step": 2080 }, { "epoch": 1.34, "learning_rate": 0.00013354632587859425, "loss": 0.5291, "step": 2090 }, { "epoch": 1.34, "learning_rate": 0.000134185303514377, "loss": 0.565, "step": 2100 }, { "epoch": 1.35, "learning_rate": 0.00013482428115015973, "loss": 0.5763, "step": 2110 }, { "epoch": 1.35, "learning_rate": 0.0001354632587859425, "loss": 0.6175, "step": 2120 }, { "epoch": 1.36, "learning_rate": 0.00013610223642172525, "loss": 0.5378, "step": 2130 }, { "epoch": 1.37, "learning_rate": 0.000136741214057508, "loss": 0.574, "step": 2140 }, { "epoch": 1.37, "learning_rate": 0.00013738019169329073, "loss": 0.561, "step": 2150 }, { "epoch": 1.38, "learning_rate": 0.0001380191693290735, "loss": 0.5277, "step": 2160 }, { "epoch": 1.39, "learning_rate": 0.00013865814696485625, "loss": 0.5662, "step": 2170 }, { "epoch": 1.39, "learning_rate": 0.00013929712460063897, "loss": 0.526, "step": 2180 }, { "epoch": 1.4, "learning_rate": 0.00013993610223642173, "loss": 0.4969, "step": 2190 }, { "epoch": 1.41, "learning_rate": 0.00014057507987220446, "loss": 0.5275, "step": 2200 }, { "epoch": 1.41, "learning_rate": 0.00014121405750798722, "loss": 0.553, "step": 2210 }, { "epoch": 1.42, "learning_rate": 0.00014185303514376997, "loss": 0.5598, "step": 2220 }, { "epoch": 1.42, "learning_rate": 0.00014249201277955273, "loss": 0.5127, "step": 2230 }, { "epoch": 1.43, "learning_rate": 0.00014313099041533549, "loss": 0.5606, "step": 2240 }, { "epoch": 1.44, "learning_rate": 0.00014376996805111821, "loss": 0.5499, "step": 2250 }, { "epoch": 1.44, "learning_rate": 0.00014440894568690097, "loss": 0.5118, "step": 2260 }, { "epoch": 1.45, "learning_rate": 0.0001450479233226837, "loss": 0.5638, "step": 2270 }, { "epoch": 1.46, "learning_rate": 0.00014568690095846646, "loss": 0.5235, "step": 2280 }, { "epoch": 1.46, "learning_rate": 0.0001463258785942492, "loss": 0.5114, "step": 2290 }, { "epoch": 1.47, "learning_rate": 0.00014696485623003194, "loss": 0.5375, "step": 2300 }, { "epoch": 1.48, "learning_rate": 0.0001476038338658147, "loss": 0.5737, "step": 2310 }, { "epoch": 1.48, "learning_rate": 0.00014824281150159743, "loss": 0.541, "step": 2320 }, { "epoch": 1.49, "learning_rate": 0.0001488817891373802, "loss": 0.5112, "step": 2330 }, { "epoch": 1.5, "learning_rate": 0.00014952076677316294, "loss": 0.4936, "step": 2340 }, { "epoch": 1.5, "learning_rate": 0.0001501597444089457, "loss": 0.515, "step": 2350 }, { "epoch": 1.51, "learning_rate": 0.00015079872204472845, "loss": 0.507, "step": 2360 }, { "epoch": 1.51, "learning_rate": 0.00015143769968051118, "loss": 0.4934, "step": 2370 }, { "epoch": 1.52, "learning_rate": 0.00015207667731629394, "loss": 0.5081, "step": 2380 }, { "epoch": 1.53, "learning_rate": 0.00015271565495207667, "loss": 0.4726, "step": 2390 }, { "epoch": 1.53, "learning_rate": 0.00015335463258785942, "loss": 0.5149, "step": 2400 }, { "epoch": 1.54, "learning_rate": 0.00015399361022364218, "loss": 0.4544, "step": 2410 }, { "epoch": 1.55, "learning_rate": 0.0001546325878594249, "loss": 0.5026, "step": 2420 }, { "epoch": 1.55, "learning_rate": 0.0001552715654952077, "loss": 0.4987, "step": 2430 }, { "epoch": 1.56, "learning_rate": 0.00015591054313099042, "loss": 0.4715, "step": 2440 }, { "epoch": 1.57, "learning_rate": 0.00015654952076677318, "loss": 0.5137, "step": 2450 }, { "epoch": 1.57, "learning_rate": 0.0001571884984025559, "loss": 0.4659, "step": 2460 }, { "epoch": 1.58, "learning_rate": 0.00015782747603833866, "loss": 0.5293, "step": 2470 }, { "epoch": 1.58, "learning_rate": 0.00015846645367412142, "loss": 0.4848, "step": 2480 }, { "epoch": 1.59, "learning_rate": 0.00015910543130990415, "loss": 0.5464, "step": 2490 }, { "epoch": 1.6, "learning_rate": 0.0001597444089456869, "loss": 0.4918, "step": 2500 }, { "epoch": 1.6, "learning_rate": 0.00016038338658146963, "loss": 0.4821, "step": 2510 }, { "epoch": 1.61, "learning_rate": 0.0001610223642172524, "loss": 0.4464, "step": 2520 }, { "epoch": 1.62, "learning_rate": 0.00016166134185303515, "loss": 0.5041, "step": 2530 }, { "epoch": 1.62, "learning_rate": 0.0001623003194888179, "loss": 0.5254, "step": 2540 }, { "epoch": 1.63, "learning_rate": 0.00016293929712460066, "loss": 0.4722, "step": 2550 }, { "epoch": 1.64, "learning_rate": 0.0001635782747603834, "loss": 0.5127, "step": 2560 }, { "epoch": 1.64, "learning_rate": 0.00016421725239616614, "loss": 0.4978, "step": 2570 }, { "epoch": 1.65, "learning_rate": 0.00016485623003194887, "loss": 0.5174, "step": 2580 }, { "epoch": 1.65, "learning_rate": 0.00016549520766773163, "loss": 0.528, "step": 2590 }, { "epoch": 1.66, "learning_rate": 0.0001661341853035144, "loss": 0.5612, "step": 2600 }, { "epoch": 1.67, "learning_rate": 0.00016677316293929712, "loss": 0.4953, "step": 2610 }, { "epoch": 1.67, "learning_rate": 0.00016741214057507987, "loss": 0.475, "step": 2620 }, { "epoch": 1.68, "learning_rate": 0.00016805111821086263, "loss": 0.476, "step": 2630 }, { "epoch": 1.69, "learning_rate": 0.00016869009584664538, "loss": 0.4799, "step": 2640 }, { "epoch": 1.69, "learning_rate": 0.00016932907348242811, "loss": 0.4838, "step": 2650 }, { "epoch": 1.7, "learning_rate": 0.00016996805111821087, "loss": 0.5215, "step": 2660 }, { "epoch": 1.71, "learning_rate": 0.00017060702875399363, "loss": 0.4927, "step": 2670 }, { "epoch": 1.71, "learning_rate": 0.00017124600638977636, "loss": 0.4973, "step": 2680 }, { "epoch": 1.72, "learning_rate": 0.0001718849840255591, "loss": 0.4703, "step": 2690 }, { "epoch": 1.73, "learning_rate": 0.00017252396166134184, "loss": 0.5073, "step": 2700 }, { "epoch": 1.73, "learning_rate": 0.0001731629392971246, "loss": 0.4445, "step": 2710 }, { "epoch": 1.74, "learning_rate": 0.00017380191693290733, "loss": 0.463, "step": 2720 }, { "epoch": 1.74, "learning_rate": 0.0001744408945686901, "loss": 0.4289, "step": 2730 }, { "epoch": 1.75, "learning_rate": 0.00017507987220447287, "loss": 0.4378, "step": 2740 }, { "epoch": 1.76, "learning_rate": 0.0001757188498402556, "loss": 0.4408, "step": 2750 }, { "epoch": 1.76, "learning_rate": 0.00017635782747603835, "loss": 0.518, "step": 2760 }, { "epoch": 1.77, "learning_rate": 0.00017699680511182108, "loss": 0.4583, "step": 2770 }, { "epoch": 1.78, "learning_rate": 0.00017763578274760384, "loss": 0.4415, "step": 2780 }, { "epoch": 1.78, "learning_rate": 0.0001782747603833866, "loss": 0.4543, "step": 2790 }, { "epoch": 1.79, "learning_rate": 0.00017891373801916932, "loss": 0.4353, "step": 2800 }, { "epoch": 1.8, "learning_rate": 0.00017955271565495208, "loss": 0.5315, "step": 2810 }, { "epoch": 1.8, "learning_rate": 0.0001801916932907348, "loss": 0.468, "step": 2820 }, { "epoch": 1.81, "learning_rate": 0.0001808306709265176, "loss": 0.4915, "step": 2830 }, { "epoch": 1.81, "learning_rate": 0.00018146964856230032, "loss": 0.4684, "step": 2840 }, { "epoch": 1.82, "learning_rate": 0.00018210862619808308, "loss": 0.4367, "step": 2850 }, { "epoch": 1.83, "learning_rate": 0.00018274760383386583, "loss": 0.3751, "step": 2860 }, { "epoch": 1.83, "learning_rate": 0.00018338658146964856, "loss": 0.4347, "step": 2870 }, { "epoch": 1.84, "learning_rate": 0.00018402555910543132, "loss": 0.4611, "step": 2880 }, { "epoch": 1.85, "learning_rate": 0.00018466453674121405, "loss": 0.4425, "step": 2890 }, { "epoch": 1.85, "learning_rate": 0.0001853035143769968, "loss": 0.4483, "step": 2900 }, { "epoch": 1.86, "learning_rate": 0.00018594249201277953, "loss": 0.4603, "step": 2910 }, { "epoch": 1.87, "learning_rate": 0.0001865814696485623, "loss": 0.4369, "step": 2920 }, { "epoch": 1.87, "learning_rate": 0.00018722044728434507, "loss": 0.4521, "step": 2930 }, { "epoch": 1.88, "learning_rate": 0.0001878594249201278, "loss": 0.4239, "step": 2940 }, { "epoch": 1.88, "learning_rate": 0.00018849840255591056, "loss": 0.3795, "step": 2950 }, { "epoch": 1.89, "learning_rate": 0.0001891373801916933, "loss": 0.477, "step": 2960 }, { "epoch": 1.9, "learning_rate": 0.00018977635782747604, "loss": 0.4273, "step": 2970 }, { "epoch": 1.9, "learning_rate": 0.0001904153354632588, "loss": 0.4786, "step": 2980 }, { "epoch": 1.91, "learning_rate": 0.00019105431309904153, "loss": 0.4803, "step": 2990 }, { "epoch": 1.92, "learning_rate": 0.00019169329073482429, "loss": 0.4497, "step": 3000 }, { "epoch": 1.92, "learning_rate": 0.00019233226837060702, "loss": 0.4749, "step": 3010 }, { "epoch": 1.93, "learning_rate": 0.00019297124600638977, "loss": 0.4231, "step": 3020 }, { "epoch": 1.94, "learning_rate": 0.00019361022364217253, "loss": 0.4618, "step": 3030 }, { "epoch": 1.94, "learning_rate": 0.00019424920127795528, "loss": 0.4265, "step": 3040 }, { "epoch": 1.95, "learning_rate": 0.00019488817891373804, "loss": 0.39, "step": 3050 }, { "epoch": 1.95, "learning_rate": 0.00019552715654952077, "loss": 0.4346, "step": 3060 }, { "epoch": 1.96, "learning_rate": 0.00019616613418530353, "loss": 0.4079, "step": 3070 }, { "epoch": 1.97, "learning_rate": 0.00019680511182108626, "loss": 0.4351, "step": 3080 }, { "epoch": 1.97, "learning_rate": 0.000197444089456869, "loss": 0.4402, "step": 3090 }, { "epoch": 1.98, "learning_rate": 0.00019808306709265174, "loss": 0.4294, "step": 3100 }, { "epoch": 1.99, "learning_rate": 0.0001987220447284345, "loss": 0.4079, "step": 3110 }, { "epoch": 1.99, "learning_rate": 0.00019936102236421725, "loss": 0.398, "step": 3120 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.3759, "step": 3130 }, { "epoch": 2.01, "learning_rate": 0.00020063897763578277, "loss": 0.408, "step": 3140 }, { "epoch": 2.01, "learning_rate": 0.0002012779552715655, "loss": 0.4261, "step": 3150 }, { "epoch": 2.02, "learning_rate": 0.00020191693290734825, "loss": 0.3951, "step": 3160 }, { "epoch": 2.03, "learning_rate": 0.000202555910543131, "loss": 0.3488, "step": 3170 }, { "epoch": 2.03, "learning_rate": 0.00020319488817891374, "loss": 0.3529, "step": 3180 }, { "epoch": 2.04, "learning_rate": 0.0002038338658146965, "loss": 0.3806, "step": 3190 }, { "epoch": 2.04, "learning_rate": 0.00020447284345047922, "loss": 0.3641, "step": 3200 }, { "epoch": 2.05, "learning_rate": 0.00020511182108626198, "loss": 0.4043, "step": 3210 }, { "epoch": 2.06, "learning_rate": 0.00020575079872204473, "loss": 0.3598, "step": 3220 }, { "epoch": 2.06, "learning_rate": 0.0002063897763578275, "loss": 0.3858, "step": 3230 }, { "epoch": 2.07, "learning_rate": 0.00020702875399361025, "loss": 0.3666, "step": 3240 }, { "epoch": 2.08, "learning_rate": 0.00020766773162939298, "loss": 0.3702, "step": 3250 }, { "epoch": 2.08, "learning_rate": 0.00020830670926517573, "loss": 0.3782, "step": 3260 }, { "epoch": 2.09, "learning_rate": 0.00020894568690095846, "loss": 0.3782, "step": 3270 }, { "epoch": 2.1, "learning_rate": 0.00020958466453674122, "loss": 0.3474, "step": 3280 }, { "epoch": 2.1, "learning_rate": 0.00021022364217252395, "loss": 0.3645, "step": 3290 }, { "epoch": 2.11, "learning_rate": 0.0002108626198083067, "loss": 0.4034, "step": 3300 }, { "epoch": 2.11, "learning_rate": 0.00021150159744408946, "loss": 0.3892, "step": 3310 }, { "epoch": 2.12, "learning_rate": 0.00021214057507987222, "loss": 0.3843, "step": 3320 }, { "epoch": 2.13, "learning_rate": 0.00021277955271565497, "loss": 0.3667, "step": 3330 }, { "epoch": 2.13, "learning_rate": 0.0002134185303514377, "loss": 0.3954, "step": 3340 }, { "epoch": 2.14, "learning_rate": 0.00021405750798722046, "loss": 0.3733, "step": 3350 }, { "epoch": 2.15, "learning_rate": 0.00021469648562300321, "loss": 0.3801, "step": 3360 }, { "epoch": 2.15, "learning_rate": 0.00021533546325878594, "loss": 0.3813, "step": 3370 }, { "epoch": 2.16, "learning_rate": 0.0002159744408945687, "loss": 0.382, "step": 3380 }, { "epoch": 2.17, "learning_rate": 0.00021661341853035143, "loss": 0.3959, "step": 3390 }, { "epoch": 2.17, "learning_rate": 0.00021725239616613419, "loss": 0.338, "step": 3400 }, { "epoch": 2.18, "learning_rate": 0.00021789137380191691, "loss": 0.3884, "step": 3410 }, { "epoch": 2.19, "learning_rate": 0.0002185303514376997, "loss": 0.3583, "step": 3420 }, { "epoch": 2.19, "learning_rate": 0.00021916932907348245, "loss": 0.3224, "step": 3430 }, { "epoch": 2.2, "learning_rate": 0.00021980830670926518, "loss": 0.3587, "step": 3440 }, { "epoch": 2.2, "learning_rate": 0.00022044728434504794, "loss": 0.3493, "step": 3450 }, { "epoch": 2.21, "learning_rate": 0.00022108626198083067, "loss": 0.392, "step": 3460 }, { "epoch": 2.22, "learning_rate": 0.00022172523961661343, "loss": 0.3555, "step": 3470 }, { "epoch": 2.22, "learning_rate": 0.00022236421725239615, "loss": 0.4006, "step": 3480 }, { "epoch": 2.23, "learning_rate": 0.0002230031948881789, "loss": 0.3817, "step": 3490 }, { "epoch": 2.24, "learning_rate": 0.00022364217252396167, "loss": 0.3957, "step": 3500 }, { "epoch": 2.24, "learning_rate": 0.0002242811501597444, "loss": 0.3538, "step": 3510 }, { "epoch": 2.25, "learning_rate": 0.00022492012779552718, "loss": 0.3811, "step": 3520 }, { "epoch": 2.26, "learning_rate": 0.0002255591054313099, "loss": 0.3615, "step": 3530 }, { "epoch": 2.26, "learning_rate": 0.00022619808306709267, "loss": 0.3491, "step": 3540 }, { "epoch": 2.27, "learning_rate": 0.00022683706070287542, "loss": 0.3904, "step": 3550 }, { "epoch": 2.27, "learning_rate": 0.00022747603833865815, "loss": 0.3692, "step": 3560 }, { "epoch": 2.28, "learning_rate": 0.0002281150159744409, "loss": 0.3764, "step": 3570 }, { "epoch": 2.29, "learning_rate": 0.00022875399361022364, "loss": 0.3189, "step": 3580 }, { "epoch": 2.29, "learning_rate": 0.0002293929712460064, "loss": 0.3437, "step": 3590 }, { "epoch": 2.3, "learning_rate": 0.00023003194888178912, "loss": 0.38, "step": 3600 }, { "epoch": 2.31, "learning_rate": 0.00023067092651757188, "loss": 0.3694, "step": 3610 }, { "epoch": 2.31, "learning_rate": 0.00023130990415335466, "loss": 0.3297, "step": 3620 }, { "epoch": 2.32, "learning_rate": 0.0002319488817891374, "loss": 0.391, "step": 3630 }, { "epoch": 2.33, "learning_rate": 0.00023258785942492015, "loss": 0.3465, "step": 3640 }, { "epoch": 2.33, "learning_rate": 0.00023322683706070288, "loss": 0.3879, "step": 3650 }, { "epoch": 2.34, "learning_rate": 0.00023386581469648563, "loss": 0.3551, "step": 3660 }, { "epoch": 2.34, "learning_rate": 0.00023450479233226836, "loss": 0.3383, "step": 3670 }, { "epoch": 2.35, "learning_rate": 0.00023514376996805112, "loss": 0.3284, "step": 3680 }, { "epoch": 2.36, "learning_rate": 0.00023578274760383387, "loss": 0.3362, "step": 3690 }, { "epoch": 2.36, "learning_rate": 0.0002364217252396166, "loss": 0.349, "step": 3700 }, { "epoch": 2.37, "learning_rate": 0.00023706070287539936, "loss": 0.357, "step": 3710 }, { "epoch": 2.38, "learning_rate": 0.00023769968051118212, "loss": 0.3993, "step": 3720 }, { "epoch": 2.38, "learning_rate": 0.00023833865814696487, "loss": 0.3611, "step": 3730 }, { "epoch": 2.39, "learning_rate": 0.00023897763578274763, "loss": 0.3989, "step": 3740 }, { "epoch": 2.4, "learning_rate": 0.00023961661341853036, "loss": 0.3792, "step": 3750 }, { "epoch": 2.4, "learning_rate": 0.00024025559105431311, "loss": 0.3434, "step": 3760 }, { "epoch": 2.41, "learning_rate": 0.00024089456869009584, "loss": 0.3669, "step": 3770 }, { "epoch": 2.42, "learning_rate": 0.0002415335463258786, "loss": 0.3858, "step": 3780 }, { "epoch": 2.42, "learning_rate": 0.00024217252396166133, "loss": 0.3016, "step": 3790 }, { "epoch": 2.43, "learning_rate": 0.00024281150159744408, "loss": 0.3253, "step": 3800 }, { "epoch": 2.43, "learning_rate": 0.00024345047923322684, "loss": 0.3811, "step": 3810 }, { "epoch": 2.44, "learning_rate": 0.0002440894568690096, "loss": 0.338, "step": 3820 }, { "epoch": 2.45, "learning_rate": 0.0002447284345047923, "loss": 0.3214, "step": 3830 }, { "epoch": 2.45, "learning_rate": 0.0002453674121405751, "loss": 0.3696, "step": 3840 }, { "epoch": 2.46, "learning_rate": 0.00024600638977635784, "loss": 0.3624, "step": 3850 }, { "epoch": 2.47, "learning_rate": 0.00024664536741214057, "loss": 0.3538, "step": 3860 }, { "epoch": 2.47, "learning_rate": 0.00024728434504792335, "loss": 0.372, "step": 3870 }, { "epoch": 2.48, "learning_rate": 0.0002479233226837061, "loss": 0.3884, "step": 3880 }, { "epoch": 2.49, "learning_rate": 0.0002485623003194888, "loss": 0.325, "step": 3890 }, { "epoch": 2.49, "learning_rate": 0.00024920127795527154, "loss": 0.3567, "step": 3900 }, { "epoch": 2.5, "learning_rate": 0.0002498402555910543, "loss": 0.379, "step": 3910 }, { "epoch": 2.5, "learning_rate": 0.00025047923322683705, "loss": 0.3542, "step": 3920 }, { "epoch": 2.51, "learning_rate": 0.00025111821086261984, "loss": 0.3739, "step": 3930 }, { "epoch": 2.52, "learning_rate": 0.00025175718849840256, "loss": 0.3657, "step": 3940 }, { "epoch": 2.52, "learning_rate": 0.0002523961661341853, "loss": 0.3499, "step": 3950 }, { "epoch": 2.53, "learning_rate": 0.0002530351437699681, "loss": 0.3428, "step": 3960 }, { "epoch": 2.54, "learning_rate": 0.0002536741214057508, "loss": 0.3369, "step": 3970 }, { "epoch": 2.54, "learning_rate": 0.00025431309904153354, "loss": 0.4055, "step": 3980 }, { "epoch": 2.55, "learning_rate": 0.00025495207667731626, "loss": 0.365, "step": 3990 }, { "epoch": 2.56, "learning_rate": 0.00025559105431309905, "loss": 0.3533, "step": 4000 }, { "epoch": 2.56, "learning_rate": 0.0002562300319488818, "loss": 0.3223, "step": 4010 }, { "epoch": 2.57, "learning_rate": 0.0002568690095846645, "loss": 0.3176, "step": 4020 }, { "epoch": 2.57, "learning_rate": 0.0002575079872204473, "loss": 0.3098, "step": 4030 }, { "epoch": 2.58, "learning_rate": 0.00025814696485623, "loss": 0.3792, "step": 4040 }, { "epoch": 2.59, "learning_rate": 0.00025878594249201275, "loss": 0.357, "step": 4050 }, { "epoch": 2.59, "learning_rate": 0.00025942492012779553, "loss": 0.3706, "step": 4060 }, { "epoch": 2.6, "learning_rate": 0.0002600638977635783, "loss": 0.346, "step": 4070 }, { "epoch": 2.61, "learning_rate": 0.00026070287539936104, "loss": 0.341, "step": 4080 }, { "epoch": 2.61, "learning_rate": 0.0002613418530351438, "loss": 0.3352, "step": 4090 }, { "epoch": 2.62, "learning_rate": 0.00026198083067092656, "loss": 0.3219, "step": 4100 }, { "epoch": 2.63, "learning_rate": 0.0002626198083067093, "loss": 0.3638, "step": 4110 }, { "epoch": 2.63, "learning_rate": 0.000263258785942492, "loss": 0.3356, "step": 4120 }, { "epoch": 2.64, "learning_rate": 0.0002638977635782748, "loss": 0.3309, "step": 4130 }, { "epoch": 2.65, "learning_rate": 0.00026453674121405753, "loss": 0.3649, "step": 4140 }, { "epoch": 2.65, "learning_rate": 0.00026517571884984026, "loss": 0.3334, "step": 4150 }, { "epoch": 2.66, "learning_rate": 0.000265814696485623, "loss": 0.3279, "step": 4160 }, { "epoch": 2.66, "learning_rate": 0.00026645367412140577, "loss": 0.3603, "step": 4170 }, { "epoch": 2.67, "learning_rate": 0.0002670926517571885, "loss": 0.3207, "step": 4180 }, { "epoch": 2.68, "learning_rate": 0.00026773162939297123, "loss": 0.2887, "step": 4190 }, { "epoch": 2.68, "learning_rate": 0.000268370607028754, "loss": 0.343, "step": 4200 }, { "epoch": 2.69, "learning_rate": 0.00026900958466453674, "loss": 0.3347, "step": 4210 }, { "epoch": 2.7, "learning_rate": 0.00026964856230031947, "loss": 0.3013, "step": 4220 }, { "epoch": 2.7, "learning_rate": 0.0002702875399361022, "loss": 0.3681, "step": 4230 }, { "epoch": 2.71, "learning_rate": 0.000270926517571885, "loss": 0.313, "step": 4240 }, { "epoch": 2.72, "learning_rate": 0.0002715654952076677, "loss": 0.3769, "step": 4250 }, { "epoch": 2.72, "learning_rate": 0.0002722044728434505, "loss": 0.346, "step": 4260 }, { "epoch": 2.73, "learning_rate": 0.0002728434504792333, "loss": 0.321, "step": 4270 }, { "epoch": 2.73, "learning_rate": 0.000273482428115016, "loss": 0.3783, "step": 4280 }, { "epoch": 2.74, "learning_rate": 0.00027412140575079874, "loss": 0.3621, "step": 4290 }, { "epoch": 2.75, "learning_rate": 0.00027476038338658147, "loss": 0.3509, "step": 4300 }, { "epoch": 2.75, "learning_rate": 0.00027539936102236425, "loss": 0.3836, "step": 4310 }, { "epoch": 2.76, "learning_rate": 0.000276038338658147, "loss": 0.3595, "step": 4320 }, { "epoch": 2.77, "learning_rate": 0.0002766773162939297, "loss": 0.3375, "step": 4330 }, { "epoch": 2.77, "learning_rate": 0.0002773162939297125, "loss": 0.3343, "step": 4340 }, { "epoch": 2.78, "learning_rate": 0.0002779552715654952, "loss": 0.3306, "step": 4350 }, { "epoch": 2.79, "learning_rate": 0.00027859424920127795, "loss": 0.3474, "step": 4360 }, { "epoch": 2.79, "learning_rate": 0.0002792332268370607, "loss": 0.3696, "step": 4370 }, { "epoch": 2.8, "learning_rate": 0.00027987220447284346, "loss": 0.35, "step": 4380 }, { "epoch": 2.8, "learning_rate": 0.0002805111821086262, "loss": 0.3887, "step": 4390 }, { "epoch": 2.81, "learning_rate": 0.0002811501597444089, "loss": 0.3449, "step": 4400 }, { "epoch": 2.82, "learning_rate": 0.0002817891373801917, "loss": 0.369, "step": 4410 }, { "epoch": 2.82, "learning_rate": 0.00028242811501597443, "loss": 0.373, "step": 4420 }, { "epoch": 2.83, "learning_rate": 0.00028306709265175716, "loss": 0.3299, "step": 4430 }, { "epoch": 2.84, "learning_rate": 0.00028370607028753995, "loss": 0.3233, "step": 4440 }, { "epoch": 2.84, "learning_rate": 0.0002843450479233227, "loss": 0.3629, "step": 4450 }, { "epoch": 2.85, "learning_rate": 0.00028498402555910546, "loss": 0.3576, "step": 4460 }, { "epoch": 2.86, "learning_rate": 0.0002856230031948882, "loss": 0.3053, "step": 4470 }, { "epoch": 2.86, "learning_rate": 0.00028626198083067097, "loss": 0.3185, "step": 4480 }, { "epoch": 2.87, "learning_rate": 0.0002869009584664537, "loss": 0.3423, "step": 4490 }, { "epoch": 2.88, "learning_rate": 0.00028753993610223643, "loss": 0.3727, "step": 4500 }, { "epoch": 2.88, "learning_rate": 0.0002881789137380192, "loss": 0.3121, "step": 4510 }, { "epoch": 2.89, "learning_rate": 0.00028881789137380194, "loss": 0.322, "step": 4520 }, { "epoch": 2.89, "learning_rate": 0.00028945686900958467, "loss": 0.3581, "step": 4530 }, { "epoch": 2.9, "learning_rate": 0.0002900958466453674, "loss": 0.339, "step": 4540 }, { "epoch": 2.91, "learning_rate": 0.0002907348242811502, "loss": 0.3226, "step": 4550 }, { "epoch": 2.91, "learning_rate": 0.0002913738019169329, "loss": 0.2947, "step": 4560 }, { "epoch": 2.92, "learning_rate": 0.00029201277955271564, "loss": 0.3572, "step": 4570 }, { "epoch": 2.93, "learning_rate": 0.0002926517571884984, "loss": 0.3029, "step": 4580 }, { "epoch": 2.93, "learning_rate": 0.00029329073482428115, "loss": 0.3149, "step": 4590 }, { "epoch": 2.94, "learning_rate": 0.0002939297124600639, "loss": 0.3521, "step": 4600 }, { "epoch": 2.95, "learning_rate": 0.0002945686900958466, "loss": 0.3207, "step": 4610 }, { "epoch": 2.95, "learning_rate": 0.0002952076677316294, "loss": 0.2926, "step": 4620 }, { "epoch": 2.96, "learning_rate": 0.0002958466453674121, "loss": 0.3318, "step": 4630 }, { "epoch": 2.96, "learning_rate": 0.00029648562300319485, "loss": 0.2907, "step": 4640 }, { "epoch": 2.97, "learning_rate": 0.00029712460063897764, "loss": 0.3876, "step": 4650 }, { "epoch": 2.98, "learning_rate": 0.0002977635782747604, "loss": 0.3375, "step": 4660 }, { "epoch": 2.98, "learning_rate": 0.00029840255591054315, "loss": 0.3327, "step": 4670 }, { "epoch": 2.99, "learning_rate": 0.0002990415335463259, "loss": 0.2991, "step": 4680 }, { "epoch": 3.0, "learning_rate": 0.00029968051118210866, "loss": 0.3027, "step": 4690 }, { "epoch": 3.0, "learning_rate": 0.0003003194888178914, "loss": 0.3189, "step": 4700 }, { "epoch": 3.01, "learning_rate": 0.0003009584664536741, "loss": 0.3073, "step": 4710 }, { "epoch": 3.02, "learning_rate": 0.0003015974440894569, "loss": 0.2792, "step": 4720 }, { "epoch": 3.02, "learning_rate": 0.00030223642172523963, "loss": 0.2776, "step": 4730 }, { "epoch": 3.03, "learning_rate": 0.00030287539936102236, "loss": 0.3064, "step": 4740 }, { "epoch": 3.04, "learning_rate": 0.0003035143769968051, "loss": 0.2736, "step": 4750 }, { "epoch": 3.04, "learning_rate": 0.0003041533546325879, "loss": 0.2906, "step": 4760 }, { "epoch": 3.05, "learning_rate": 0.0003047923322683706, "loss": 0.2766, "step": 4770 }, { "epoch": 3.05, "learning_rate": 0.00030543130990415333, "loss": 0.3055, "step": 4780 }, { "epoch": 3.06, "learning_rate": 0.0003060702875399361, "loss": 0.2889, "step": 4790 }, { "epoch": 3.07, "learning_rate": 0.00030670926517571885, "loss": 0.2952, "step": 4800 }, { "epoch": 3.07, "learning_rate": 0.0003073482428115016, "loss": 0.3135, "step": 4810 }, { "epoch": 3.08, "learning_rate": 0.00030798722044728436, "loss": 0.3029, "step": 4820 }, { "epoch": 3.09, "learning_rate": 0.0003086261980830671, "loss": 0.2786, "step": 4830 }, { "epoch": 3.09, "learning_rate": 0.0003092651757188498, "loss": 0.2854, "step": 4840 }, { "epoch": 3.1, "learning_rate": 0.00030990415335463255, "loss": 0.2851, "step": 4850 }, { "epoch": 3.11, "learning_rate": 0.0003105431309904154, "loss": 0.2715, "step": 4860 }, { "epoch": 3.11, "learning_rate": 0.0003111821086261981, "loss": 0.2966, "step": 4870 }, { "epoch": 3.12, "learning_rate": 0.00031182108626198084, "loss": 0.2524, "step": 4880 }, { "epoch": 3.12, "learning_rate": 0.0003124600638977636, "loss": 0.3158, "step": 4890 }, { "epoch": 3.13, "learning_rate": 0.00031309904153354636, "loss": 0.2955, "step": 4900 }, { "epoch": 3.14, "learning_rate": 0.0003137380191693291, "loss": 0.3291, "step": 4910 }, { "epoch": 3.14, "learning_rate": 0.0003143769968051118, "loss": 0.3231, "step": 4920 }, { "epoch": 3.15, "learning_rate": 0.0003150159744408946, "loss": 0.3137, "step": 4930 }, { "epoch": 3.16, "learning_rate": 0.0003156549520766773, "loss": 0.2828, "step": 4940 }, { "epoch": 3.16, "learning_rate": 0.00031629392971246006, "loss": 0.2963, "step": 4950 }, { "epoch": 3.17, "learning_rate": 0.00031693290734824284, "loss": 0.281, "step": 4960 }, { "epoch": 3.18, "learning_rate": 0.00031757188498402557, "loss": 0.2507, "step": 4970 }, { "epoch": 3.18, "learning_rate": 0.0003182108626198083, "loss": 0.2772, "step": 4980 }, { "epoch": 3.19, "learning_rate": 0.000318849840255591, "loss": 0.2989, "step": 4990 }, { "epoch": 3.19, "learning_rate": 0.0003194888178913738, "loss": 0.2848, "step": 5000 }, { "epoch": 3.2, "learning_rate": 0.00032012779552715654, "loss": 0.2925, "step": 5010 }, { "epoch": 3.21, "learning_rate": 0.00032076677316293927, "loss": 0.2756, "step": 5020 }, { "epoch": 3.21, "learning_rate": 0.00032140575079872205, "loss": 0.2793, "step": 5030 }, { "epoch": 3.22, "learning_rate": 0.0003220447284345048, "loss": 0.3096, "step": 5040 }, { "epoch": 3.23, "learning_rate": 0.0003226837060702875, "loss": 0.3056, "step": 5050 }, { "epoch": 3.23, "learning_rate": 0.0003233226837060703, "loss": 0.2636, "step": 5060 }, { "epoch": 3.24, "learning_rate": 0.0003239616613418531, "loss": 0.362, "step": 5070 }, { "epoch": 3.25, "learning_rate": 0.0003246006389776358, "loss": 0.294, "step": 5080 }, { "epoch": 3.25, "learning_rate": 0.00032523961661341854, "loss": 0.2913, "step": 5090 }, { "epoch": 3.26, "learning_rate": 0.0003258785942492013, "loss": 0.3018, "step": 5100 }, { "epoch": 3.27, "learning_rate": 0.00032651757188498405, "loss": 0.3096, "step": 5110 }, { "epoch": 3.27, "learning_rate": 0.0003271565495207668, "loss": 0.2857, "step": 5120 }, { "epoch": 3.28, "learning_rate": 0.0003277955271565495, "loss": 0.2586, "step": 5130 }, { "epoch": 3.28, "learning_rate": 0.0003284345047923323, "loss": 0.2943, "step": 5140 }, { "epoch": 3.29, "learning_rate": 0.000329073482428115, "loss": 0.2719, "step": 5150 }, { "epoch": 3.3, "learning_rate": 0.00032971246006389775, "loss": 0.2682, "step": 5160 }, { "epoch": 3.3, "learning_rate": 0.00033035143769968053, "loss": 0.2883, "step": 5170 }, { "epoch": 3.31, "learning_rate": 0.00033099041533546326, "loss": 0.286, "step": 5180 }, { "epoch": 3.32, "learning_rate": 0.000331629392971246, "loss": 0.3188, "step": 5190 }, { "epoch": 3.32, "learning_rate": 0.0003322683706070288, "loss": 0.2683, "step": 5200 }, { "epoch": 3.33, "learning_rate": 0.0003329073482428115, "loss": 0.2782, "step": 5210 }, { "epoch": 3.34, "learning_rate": 0.00033354632587859423, "loss": 0.2968, "step": 5220 }, { "epoch": 3.34, "learning_rate": 0.00033418530351437696, "loss": 0.2765, "step": 5230 }, { "epoch": 3.35, "learning_rate": 0.00033482428115015974, "loss": 0.2481, "step": 5240 }, { "epoch": 3.35, "learning_rate": 0.00033546325878594253, "loss": 0.301, "step": 5250 }, { "epoch": 3.36, "learning_rate": 0.00033610223642172526, "loss": 0.2576, "step": 5260 }, { "epoch": 3.37, "learning_rate": 0.00033674121405750804, "loss": 0.3013, "step": 5270 }, { "epoch": 3.37, "learning_rate": 0.00033738019169329077, "loss": 0.3062, "step": 5280 }, { "epoch": 3.38, "learning_rate": 0.0003380191693290735, "loss": 0.2969, "step": 5290 }, { "epoch": 3.39, "learning_rate": 0.00033865814696485623, "loss": 0.3039, "step": 5300 }, { "epoch": 3.39, "learning_rate": 0.000339297124600639, "loss": 0.2954, "step": 5310 }, { "epoch": 3.4, "learning_rate": 0.00033993610223642174, "loss": 0.2835, "step": 5320 }, { "epoch": 3.41, "learning_rate": 0.00034057507987220447, "loss": 0.3215, "step": 5330 }, { "epoch": 3.41, "learning_rate": 0.00034121405750798725, "loss": 0.2902, "step": 5340 }, { "epoch": 3.42, "learning_rate": 0.00034185303514377, "loss": 0.2957, "step": 5350 }, { "epoch": 3.42, "learning_rate": 0.0003424920127795527, "loss": 0.2844, "step": 5360 }, { "epoch": 3.43, "learning_rate": 0.00034313099041533544, "loss": 0.2502, "step": 5370 }, { "epoch": 3.44, "learning_rate": 0.0003437699680511182, "loss": 0.2757, "step": 5380 }, { "epoch": 3.44, "learning_rate": 0.00034440894568690095, "loss": 0.3069, "step": 5390 }, { "epoch": 3.45, "learning_rate": 0.0003450479233226837, "loss": 0.2948, "step": 5400 }, { "epoch": 3.46, "learning_rate": 0.00034568690095846647, "loss": 0.2886, "step": 5410 }, { "epoch": 3.46, "learning_rate": 0.0003463258785942492, "loss": 0.2625, "step": 5420 }, { "epoch": 3.47, "learning_rate": 0.0003469648562300319, "loss": 0.3224, "step": 5430 }, { "epoch": 3.48, "learning_rate": 0.00034760383386581465, "loss": 0.3151, "step": 5440 }, { "epoch": 3.48, "learning_rate": 0.0003482428115015975, "loss": 0.3082, "step": 5450 }, { "epoch": 3.49, "learning_rate": 0.0003488817891373802, "loss": 0.2851, "step": 5460 }, { "epoch": 3.5, "learning_rate": 0.00034952076677316295, "loss": 0.2495, "step": 5470 }, { "epoch": 3.5, "learning_rate": 0.00035015974440894573, "loss": 0.3161, "step": 5480 }, { "epoch": 3.51, "learning_rate": 0.00035079872204472846, "loss": 0.3085, "step": 5490 }, { "epoch": 3.51, "learning_rate": 0.0003514376996805112, "loss": 0.2806, "step": 5500 }, { "epoch": 3.52, "learning_rate": 0.0003520766773162939, "loss": 0.2427, "step": 5510 }, { "epoch": 3.53, "learning_rate": 0.0003527156549520767, "loss": 0.2567, "step": 5520 }, { "epoch": 3.53, "learning_rate": 0.00035335463258785943, "loss": 0.2581, "step": 5530 }, { "epoch": 3.54, "learning_rate": 0.00035399361022364216, "loss": 0.3021, "step": 5540 }, { "epoch": 3.55, "learning_rate": 0.00035463258785942495, "loss": 0.2633, "step": 5550 }, { "epoch": 3.55, "learning_rate": 0.0003552715654952077, "loss": 0.2753, "step": 5560 }, { "epoch": 3.56, "learning_rate": 0.0003559105431309904, "loss": 0.2755, "step": 5570 }, { "epoch": 3.57, "learning_rate": 0.0003565495207667732, "loss": 0.297, "step": 5580 }, { "epoch": 3.57, "learning_rate": 0.0003571884984025559, "loss": 0.2739, "step": 5590 }, { "epoch": 3.58, "learning_rate": 0.00035782747603833865, "loss": 0.2994, "step": 5600 }, { "epoch": 3.58, "learning_rate": 0.0003584664536741214, "loss": 0.3026, "step": 5610 }, { "epoch": 3.59, "learning_rate": 0.00035910543130990416, "loss": 0.2724, "step": 5620 }, { "epoch": 3.6, "learning_rate": 0.0003597444089456869, "loss": 0.2863, "step": 5630 }, { "epoch": 3.6, "learning_rate": 0.0003603833865814696, "loss": 0.3245, "step": 5640 }, { "epoch": 3.61, "learning_rate": 0.00036102236421725245, "loss": 0.2751, "step": 5650 }, { "epoch": 3.62, "learning_rate": 0.0003616613418530352, "loss": 0.2783, "step": 5660 }, { "epoch": 3.62, "learning_rate": 0.0003623003194888179, "loss": 0.3159, "step": 5670 }, { "epoch": 3.63, "learning_rate": 0.00036293929712460064, "loss": 0.3649, "step": 5680 }, { "epoch": 3.64, "learning_rate": 0.0003635782747603834, "loss": 0.2486, "step": 5690 }, { "epoch": 3.64, "learning_rate": 0.00036421725239616615, "loss": 0.319, "step": 5700 }, { "epoch": 3.65, "learning_rate": 0.0003648562300319489, "loss": 0.2897, "step": 5710 }, { "epoch": 3.65, "learning_rate": 0.00036549520766773167, "loss": 0.2648, "step": 5720 }, { "epoch": 3.66, "learning_rate": 0.0003661341853035144, "loss": 0.3063, "step": 5730 }, { "epoch": 3.67, "learning_rate": 0.0003667731629392971, "loss": 0.2459, "step": 5740 }, { "epoch": 3.67, "learning_rate": 0.00036741214057507985, "loss": 0.3034, "step": 5750 }, { "epoch": 3.68, "learning_rate": 0.00036805111821086264, "loss": 0.2805, "step": 5760 }, { "epoch": 3.69, "learning_rate": 0.00036869009584664537, "loss": 0.2748, "step": 5770 }, { "epoch": 3.69, "learning_rate": 0.0003693290734824281, "loss": 0.2738, "step": 5780 }, { "epoch": 3.7, "learning_rate": 0.0003699680511182109, "loss": 0.2739, "step": 5790 }, { "epoch": 3.71, "learning_rate": 0.0003706070287539936, "loss": 0.3178, "step": 5800 }, { "epoch": 3.71, "learning_rate": 0.00037124600638977634, "loss": 0.2705, "step": 5810 }, { "epoch": 3.72, "learning_rate": 0.00037188498402555907, "loss": 0.3111, "step": 5820 }, { "epoch": 3.73, "learning_rate": 0.00037252396166134185, "loss": 0.2781, "step": 5830 }, { "epoch": 3.73, "learning_rate": 0.0003731629392971246, "loss": 0.2899, "step": 5840 }, { "epoch": 3.74, "learning_rate": 0.00037380191693290736, "loss": 0.316, "step": 5850 }, { "epoch": 3.74, "learning_rate": 0.00037444089456869015, "loss": 0.3097, "step": 5860 }, { "epoch": 3.75, "learning_rate": 0.0003750798722044729, "loss": 0.2693, "step": 5870 }, { "epoch": 3.76, "learning_rate": 0.0003757188498402556, "loss": 0.2846, "step": 5880 }, { "epoch": 3.76, "learning_rate": 0.00037635782747603833, "loss": 0.2892, "step": 5890 }, { "epoch": 3.77, "learning_rate": 0.0003769968051118211, "loss": 0.313, "step": 5900 }, { "epoch": 3.78, "learning_rate": 0.00037763578274760385, "loss": 0.2979, "step": 5910 }, { "epoch": 3.78, "learning_rate": 0.0003782747603833866, "loss": 0.2734, "step": 5920 }, { "epoch": 3.79, "learning_rate": 0.00037891373801916936, "loss": 0.2706, "step": 5930 }, { "epoch": 3.8, "learning_rate": 0.0003795527156549521, "loss": 0.2963, "step": 5940 }, { "epoch": 3.8, "learning_rate": 0.0003801916932907348, "loss": 0.2532, "step": 5950 }, { "epoch": 3.81, "learning_rate": 0.0003808306709265176, "loss": 0.3183, "step": 5960 }, { "epoch": 3.81, "learning_rate": 0.00038146964856230033, "loss": 0.3032, "step": 5970 }, { "epoch": 3.82, "learning_rate": 0.00038210862619808306, "loss": 0.3129, "step": 5980 }, { "epoch": 3.83, "learning_rate": 0.0003827476038338658, "loss": 0.267, "step": 5990 }, { "epoch": 3.83, "learning_rate": 0.00038338658146964857, "loss": 0.2766, "step": 6000 }, { "epoch": 3.84, "learning_rate": 0.0003840255591054313, "loss": 0.2861, "step": 6010 }, { "epoch": 3.85, "learning_rate": 0.00038466453674121403, "loss": 0.2807, "step": 6020 }, { "epoch": 3.85, "learning_rate": 0.0003853035143769968, "loss": 0.3118, "step": 6030 }, { "epoch": 3.86, "learning_rate": 0.00038594249201277954, "loss": 0.286, "step": 6040 }, { "epoch": 3.87, "learning_rate": 0.0003865814696485623, "loss": 0.2676, "step": 6050 }, { "epoch": 3.87, "learning_rate": 0.00038722044728434506, "loss": 0.3028, "step": 6060 }, { "epoch": 3.88, "learning_rate": 0.00038785942492012784, "loss": 0.2564, "step": 6070 }, { "epoch": 3.88, "learning_rate": 0.00038849840255591057, "loss": 0.2974, "step": 6080 }, { "epoch": 3.89, "learning_rate": 0.0003891373801916933, "loss": 0.2727, "step": 6090 }, { "epoch": 3.9, "learning_rate": 0.0003897763578274761, "loss": 0.2673, "step": 6100 }, { "epoch": 3.9, "learning_rate": 0.0003904153354632588, "loss": 0.27, "step": 6110 }, { "epoch": 3.91, "learning_rate": 0.00039105431309904154, "loss": 0.2758, "step": 6120 }, { "epoch": 3.92, "learning_rate": 0.00039169329073482427, "loss": 0.2891, "step": 6130 }, { "epoch": 3.92, "learning_rate": 0.00039233226837060705, "loss": 0.2904, "step": 6140 }, { "epoch": 3.93, "learning_rate": 0.0003929712460063898, "loss": 0.3009, "step": 6150 }, { "epoch": 3.94, "learning_rate": 0.0003936102236421725, "loss": 0.314, "step": 6160 }, { "epoch": 3.94, "learning_rate": 0.0003942492012779553, "loss": 0.289, "step": 6170 }, { "epoch": 3.95, "learning_rate": 0.000394888178913738, "loss": 0.3163, "step": 6180 }, { "epoch": 3.95, "learning_rate": 0.00039552715654952075, "loss": 0.2605, "step": 6190 }, { "epoch": 3.96, "learning_rate": 0.0003961661341853035, "loss": 0.2758, "step": 6200 }, { "epoch": 3.97, "learning_rate": 0.00039680511182108626, "loss": 0.3064, "step": 6210 }, { "epoch": 3.97, "learning_rate": 0.000397444089456869, "loss": 0.2991, "step": 6220 }, { "epoch": 3.98, "learning_rate": 0.0003980830670926517, "loss": 0.307, "step": 6230 }, { "epoch": 3.99, "learning_rate": 0.0003987220447284345, "loss": 0.285, "step": 6240 }, { "epoch": 3.99, "learning_rate": 0.0003993610223642173, "loss": 0.2797, "step": 6250 }, { "epoch": 4.0, "learning_rate": 0.0004, "loss": 0.2737, "step": 6260 }, { "epoch": 4.01, "learning_rate": 0.00040063897763578275, "loss": 0.2392, "step": 6270 }, { "epoch": 4.01, "learning_rate": 0.00040127795527156553, "loss": 0.2275, "step": 6280 }, { "epoch": 4.02, "learning_rate": 0.00040191693290734826, "loss": 0.2617, "step": 6290 }, { "epoch": 4.03, "learning_rate": 0.000402555910543131, "loss": 0.2528, "step": 6300 }, { "epoch": 4.03, "learning_rate": 0.0004031948881789138, "loss": 0.2262, "step": 6310 }, { "epoch": 4.04, "learning_rate": 0.0004038338658146965, "loss": 0.259, "step": 6320 }, { "epoch": 4.04, "learning_rate": 0.00040447284345047923, "loss": 0.2519, "step": 6330 }, { "epoch": 4.05, "learning_rate": 0.000405111821086262, "loss": 0.2259, "step": 6340 }, { "epoch": 4.06, "learning_rate": 0.00040575079872204474, "loss": 0.2386, "step": 6350 }, { "epoch": 4.06, "learning_rate": 0.0004063897763578275, "loss": 0.2217, "step": 6360 }, { "epoch": 4.07, "learning_rate": 0.0004070287539936102, "loss": 0.2309, "step": 6370 }, { "epoch": 4.08, "learning_rate": 0.000407667731629393, "loss": 0.211, "step": 6380 }, { "epoch": 4.08, "learning_rate": 0.0004083067092651757, "loss": 0.2459, "step": 6390 }, { "epoch": 4.09, "learning_rate": 0.00040894568690095844, "loss": 0.2603, "step": 6400 }, { "epoch": 4.1, "learning_rate": 0.00040958466453674123, "loss": 0.2343, "step": 6410 }, { "epoch": 4.1, "learning_rate": 0.00041022364217252396, "loss": 0.2562, "step": 6420 }, { "epoch": 4.11, "learning_rate": 0.0004108626198083067, "loss": 0.2148, "step": 6430 }, { "epoch": 4.11, "learning_rate": 0.00041150159744408947, "loss": 0.2349, "step": 6440 }, { "epoch": 4.12, "learning_rate": 0.00041214057507987225, "loss": 0.2975, "step": 6450 }, { "epoch": 4.13, "learning_rate": 0.000412779552715655, "loss": 0.2541, "step": 6460 }, { "epoch": 4.13, "learning_rate": 0.0004134185303514377, "loss": 0.2555, "step": 6470 }, { "epoch": 4.14, "learning_rate": 0.0004140575079872205, "loss": 0.2818, "step": 6480 }, { "epoch": 4.15, "learning_rate": 0.0004146964856230032, "loss": 0.2577, "step": 6490 }, { "epoch": 4.15, "learning_rate": 0.00041533546325878595, "loss": 0.2433, "step": 6500 }, { "epoch": 4.16, "learning_rate": 0.0004159744408945687, "loss": 0.2231, "step": 6510 }, { "epoch": 4.17, "learning_rate": 0.00041661341853035147, "loss": 0.2841, "step": 6520 }, { "epoch": 4.17, "learning_rate": 0.0004172523961661342, "loss": 0.2048, "step": 6530 }, { "epoch": 4.18, "learning_rate": 0.0004178913738019169, "loss": 0.2484, "step": 6540 }, { "epoch": 4.19, "learning_rate": 0.0004185303514376997, "loss": 0.2542, "step": 6550 }, { "epoch": 4.19, "learning_rate": 0.00041916932907348244, "loss": 0.2293, "step": 6560 }, { "epoch": 4.2, "learning_rate": 0.00041980830670926517, "loss": 0.2545, "step": 6570 }, { "epoch": 4.2, "learning_rate": 0.0004204472843450479, "loss": 0.2565, "step": 6580 }, { "epoch": 4.21, "learning_rate": 0.0004210862619808307, "loss": 0.2441, "step": 6590 }, { "epoch": 4.22, "learning_rate": 0.0004217252396166134, "loss": 0.2623, "step": 6600 }, { "epoch": 4.22, "learning_rate": 0.00042236421725239614, "loss": 0.2542, "step": 6610 }, { "epoch": 4.23, "learning_rate": 0.0004230031948881789, "loss": 0.2368, "step": 6620 }, { "epoch": 4.24, "learning_rate": 0.00042364217252396165, "loss": 0.2683, "step": 6630 }, { "epoch": 4.24, "learning_rate": 0.00042428115015974443, "loss": 0.2468, "step": 6640 }, { "epoch": 4.25, "learning_rate": 0.00042492012779552716, "loss": 0.226, "step": 6650 }, { "epoch": 4.26, "learning_rate": 0.00042555910543130995, "loss": 0.2362, "step": 6660 }, { "epoch": 4.26, "learning_rate": 0.0004261980830670927, "loss": 0.2606, "step": 6670 }, { "epoch": 4.27, "learning_rate": 0.0004268370607028754, "loss": 0.2436, "step": 6680 }, { "epoch": 4.27, "learning_rate": 0.0004274760383386582, "loss": 0.2501, "step": 6690 }, { "epoch": 4.28, "learning_rate": 0.0004281150159744409, "loss": 0.2367, "step": 6700 }, { "epoch": 4.29, "learning_rate": 0.00042875399361022365, "loss": 0.248, "step": 6710 }, { "epoch": 4.29, "learning_rate": 0.00042939297124600643, "loss": 0.2475, "step": 6720 }, { "epoch": 4.3, "learning_rate": 0.00043003194888178916, "loss": 0.2544, "step": 6730 }, { "epoch": 4.31, "learning_rate": 0.0004306709265175719, "loss": 0.2127, "step": 6740 }, { "epoch": 4.31, "learning_rate": 0.0004313099041533546, "loss": 0.2495, "step": 6750 }, { "epoch": 4.32, "learning_rate": 0.0004319488817891374, "loss": 0.2434, "step": 6760 }, { "epoch": 4.33, "learning_rate": 0.00043258785942492013, "loss": 0.2641, "step": 6770 }, { "epoch": 4.33, "learning_rate": 0.00043322683706070286, "loss": 0.2549, "step": 6780 }, { "epoch": 4.34, "learning_rate": 0.00043386581469648564, "loss": 0.2748, "step": 6790 }, { "epoch": 4.34, "learning_rate": 0.00043450479233226837, "loss": 0.2991, "step": 6800 }, { "epoch": 4.35, "learning_rate": 0.0004351437699680511, "loss": 0.2217, "step": 6810 }, { "epoch": 4.36, "learning_rate": 0.00043578274760383383, "loss": 0.2307, "step": 6820 }, { "epoch": 4.36, "learning_rate": 0.0004364217252396166, "loss": 0.234, "step": 6830 }, { "epoch": 4.37, "learning_rate": 0.0004370607028753994, "loss": 0.2637, "step": 6840 }, { "epoch": 4.38, "learning_rate": 0.0004376996805111821, "loss": 0.2285, "step": 6850 }, { "epoch": 4.38, "learning_rate": 0.0004383386581469649, "loss": 0.2585, "step": 6860 }, { "epoch": 4.39, "learning_rate": 0.00043897763578274764, "loss": 0.2358, "step": 6870 }, { "epoch": 4.4, "learning_rate": 0.00043961661341853037, "loss": 0.246, "step": 6880 }, { "epoch": 4.4, "learning_rate": 0.0004402555910543131, "loss": 0.2672, "step": 6890 }, { "epoch": 4.41, "learning_rate": 0.0004408945686900959, "loss": 0.2606, "step": 6900 }, { "epoch": 4.42, "learning_rate": 0.0004415335463258786, "loss": 0.2397, "step": 6910 }, { "epoch": 4.42, "learning_rate": 0.00044217252396166134, "loss": 0.2539, "step": 6920 }, { "epoch": 4.43, "learning_rate": 0.0004428115015974441, "loss": 0.2348, "step": 6930 }, { "epoch": 4.43, "learning_rate": 0.00044345047923322685, "loss": 0.2845, "step": 6940 }, { "epoch": 4.44, "learning_rate": 0.0004440894568690096, "loss": 0.2402, "step": 6950 }, { "epoch": 4.45, "learning_rate": 0.0004447284345047923, "loss": 0.2678, "step": 6960 }, { "epoch": 4.45, "learning_rate": 0.0004453674121405751, "loss": 0.2653, "step": 6970 }, { "epoch": 4.46, "learning_rate": 0.0004460063897763578, "loss": 0.2813, "step": 6980 }, { "epoch": 4.47, "learning_rate": 0.00044664536741214055, "loss": 0.262, "step": 6990 }, { "epoch": 4.47, "learning_rate": 0.00044728434504792333, "loss": 0.2247, "step": 7000 }, { "epoch": 4.48, "learning_rate": 0.00044792332268370606, "loss": 0.2547, "step": 7010 }, { "epoch": 4.49, "learning_rate": 0.0004485623003194888, "loss": 0.2627, "step": 7020 }, { "epoch": 4.49, "learning_rate": 0.0004492012779552715, "loss": 0.277, "step": 7030 }, { "epoch": 4.5, "learning_rate": 0.00044984025559105436, "loss": 0.2467, "step": 7040 }, { "epoch": 4.5, "learning_rate": 0.0004504792332268371, "loss": 0.2437, "step": 7050 }, { "epoch": 4.51, "learning_rate": 0.0004511182108626198, "loss": 0.2605, "step": 7060 }, { "epoch": 4.52, "learning_rate": 0.0004517571884984026, "loss": 0.2649, "step": 7070 }, { "epoch": 4.52, "learning_rate": 0.00045239616613418533, "loss": 0.283, "step": 7080 }, { "epoch": 4.53, "learning_rate": 0.00045303514376996806, "loss": 0.2337, "step": 7090 }, { "epoch": 4.54, "learning_rate": 0.00045367412140575084, "loss": 0.2756, "step": 7100 }, { "epoch": 4.54, "learning_rate": 0.00045431309904153357, "loss": 0.2699, "step": 7110 }, { "epoch": 4.55, "learning_rate": 0.0004549520766773163, "loss": 0.2649, "step": 7120 }, { "epoch": 4.56, "learning_rate": 0.00045559105431309903, "loss": 0.2685, "step": 7130 }, { "epoch": 4.56, "learning_rate": 0.0004562300319488818, "loss": 0.2451, "step": 7140 }, { "epoch": 4.57, "learning_rate": 0.00045686900958466454, "loss": 0.2614, "step": 7150 }, { "epoch": 4.57, "learning_rate": 0.00045750798722044727, "loss": 0.2484, "step": 7160 }, { "epoch": 4.58, "learning_rate": 0.00045814696485623006, "loss": 0.2642, "step": 7170 }, { "epoch": 4.59, "learning_rate": 0.0004587859424920128, "loss": 0.2413, "step": 7180 }, { "epoch": 4.59, "learning_rate": 0.0004594249201277955, "loss": 0.2324, "step": 7190 }, { "epoch": 4.6, "learning_rate": 0.00046006389776357824, "loss": 0.2632, "step": 7200 }, { "epoch": 4.61, "learning_rate": 0.000460702875399361, "loss": 0.2472, "step": 7210 }, { "epoch": 4.61, "learning_rate": 0.00046134185303514376, "loss": 0.276, "step": 7220 }, { "epoch": 4.62, "learning_rate": 0.0004619808306709265, "loss": 0.2647, "step": 7230 }, { "epoch": 4.63, "learning_rate": 0.0004626198083067093, "loss": 0.2409, "step": 7240 }, { "epoch": 4.63, "learning_rate": 0.00046325878594249205, "loss": 0.2568, "step": 7250 }, { "epoch": 4.64, "learning_rate": 0.0004638977635782748, "loss": 0.2856, "step": 7260 }, { "epoch": 4.65, "learning_rate": 0.0004645367412140575, "loss": 0.249, "step": 7270 }, { "epoch": 4.65, "learning_rate": 0.0004651757188498403, "loss": 0.2779, "step": 7280 }, { "epoch": 4.66, "learning_rate": 0.000465814696485623, "loss": 0.2317, "step": 7290 }, { "epoch": 4.66, "learning_rate": 0.00046645367412140575, "loss": 0.2889, "step": 7300 }, { "epoch": 4.67, "learning_rate": 0.00046709265175718854, "loss": 0.2711, "step": 7310 }, { "epoch": 4.68, "learning_rate": 0.00046773162939297126, "loss": 0.2467, "step": 7320 }, { "epoch": 4.68, "learning_rate": 0.000468370607028754, "loss": 0.2422, "step": 7330 }, { "epoch": 4.69, "learning_rate": 0.0004690095846645367, "loss": 0.2018, "step": 7340 }, { "epoch": 4.7, "learning_rate": 0.0004696485623003195, "loss": 0.2285, "step": 7350 }, { "epoch": 4.7, "learning_rate": 0.00047028753993610224, "loss": 0.2277, "step": 7360 }, { "epoch": 4.71, "learning_rate": 0.00047092651757188496, "loss": 0.2734, "step": 7370 }, { "epoch": 4.72, "learning_rate": 0.00047156549520766775, "loss": 0.2438, "step": 7380 }, { "epoch": 4.72, "learning_rate": 0.0004722044728434505, "loss": 0.2769, "step": 7390 }, { "epoch": 4.73, "learning_rate": 0.0004728434504792332, "loss": 0.2448, "step": 7400 }, { "epoch": 4.73, "learning_rate": 0.00047348242811501594, "loss": 0.2272, "step": 7410 }, { "epoch": 4.74, "learning_rate": 0.0004741214057507987, "loss": 0.2935, "step": 7420 }, { "epoch": 4.75, "learning_rate": 0.0004747603833865815, "loss": 0.2244, "step": 7430 }, { "epoch": 4.75, "learning_rate": 0.00047539936102236423, "loss": 0.2567, "step": 7440 }, { "epoch": 4.76, "learning_rate": 0.000476038338658147, "loss": 0.2725, "step": 7450 }, { "epoch": 4.77, "learning_rate": 0.00047667731629392974, "loss": 0.2912, "step": 7460 }, { "epoch": 4.77, "learning_rate": 0.0004773162939297125, "loss": 0.2618, "step": 7470 }, { "epoch": 4.78, "learning_rate": 0.00047795527156549526, "loss": 0.2646, "step": 7480 }, { "epoch": 4.79, "learning_rate": 0.000478594249201278, "loss": 0.2548, "step": 7490 }, { "epoch": 4.79, "learning_rate": 0.0004792332268370607, "loss": 0.2626, "step": 7500 }, { "epoch": 4.8, "learning_rate": 0.00047987220447284344, "loss": 0.2671, "step": 7510 }, { "epoch": 4.8, "learning_rate": 0.00048051118210862623, "loss": 0.2704, "step": 7520 }, { "epoch": 4.81, "learning_rate": 0.00048115015974440896, "loss": 0.2659, "step": 7530 }, { "epoch": 4.82, "learning_rate": 0.0004817891373801917, "loss": 0.2804, "step": 7540 }, { "epoch": 4.82, "learning_rate": 0.00048242811501597447, "loss": 0.2503, "step": 7550 }, { "epoch": 4.83, "learning_rate": 0.0004830670926517572, "loss": 0.2489, "step": 7560 }, { "epoch": 4.84, "learning_rate": 0.00048370607028753993, "loss": 0.2837, "step": 7570 }, { "epoch": 4.84, "learning_rate": 0.00048434504792332266, "loss": 0.2364, "step": 7580 }, { "epoch": 4.85, "learning_rate": 0.00048498402555910544, "loss": 0.2508, "step": 7590 }, { "epoch": 4.86, "learning_rate": 0.00048562300319488817, "loss": 0.2279, "step": 7600 }, { "epoch": 4.86, "learning_rate": 0.0004862619808306709, "loss": 0.2174, "step": 7610 }, { "epoch": 4.87, "learning_rate": 0.0004869009584664537, "loss": 0.2837, "step": 7620 }, { "epoch": 4.88, "learning_rate": 0.00048753993610223647, "loss": 0.2414, "step": 7630 }, { "epoch": 4.88, "learning_rate": 0.0004881789137380192, "loss": 0.2452, "step": 7640 }, { "epoch": 4.89, "learning_rate": 0.0004888178913738019, "loss": 0.2854, "step": 7650 }, { "epoch": 4.89, "learning_rate": 0.0004894568690095847, "loss": 0.2593, "step": 7660 }, { "epoch": 4.9, "learning_rate": 0.0004900958466453675, "loss": 0.2507, "step": 7670 }, { "epoch": 4.91, "learning_rate": 0.0004907348242811502, "loss": 0.2407, "step": 7680 }, { "epoch": 4.91, "learning_rate": 0.000491373801916933, "loss": 0.2345, "step": 7690 }, { "epoch": 4.92, "learning_rate": 0.0004920127795527157, "loss": 0.2786, "step": 7700 }, { "epoch": 4.93, "learning_rate": 0.0004926517571884984, "loss": 0.2639, "step": 7710 }, { "epoch": 4.93, "learning_rate": 0.0004932907348242811, "loss": 0.2819, "step": 7720 }, { "epoch": 4.94, "learning_rate": 0.0004939297124600639, "loss": 0.2602, "step": 7730 }, { "epoch": 4.95, "learning_rate": 0.0004945686900958467, "loss": 0.2647, "step": 7740 }, { "epoch": 4.95, "learning_rate": 0.0004952076677316294, "loss": 0.2126, "step": 7750 }, { "epoch": 4.96, "learning_rate": 0.0004958466453674122, "loss": 0.2694, "step": 7760 }, { "epoch": 4.96, "learning_rate": 0.0004964856230031949, "loss": 0.2193, "step": 7770 }, { "epoch": 4.97, "learning_rate": 0.0004971246006389776, "loss": 0.2219, "step": 7780 }, { "epoch": 4.98, "learning_rate": 0.0004977635782747603, "loss": 0.2674, "step": 7790 }, { "epoch": 4.98, "learning_rate": 0.0004984025559105431, "loss": 0.2368, "step": 7800 }, { "epoch": 4.99, "learning_rate": 0.0004990415335463259, "loss": 0.2356, "step": 7810 }, { "epoch": 5.0, "learning_rate": 0.0004996805111821086, "loss": 0.2548, "step": 7820 }, { "epoch": 5.0, "learning_rate": 0.0005003194888178914, "loss": 0.277, "step": 7830 }, { "epoch": 5.01, "learning_rate": 0.0005009584664536741, "loss": 0.2186, "step": 7840 }, { "epoch": 5.02, "learning_rate": 0.0005015974440894568, "loss": 0.2285, "step": 7850 }, { "epoch": 5.02, "learning_rate": 0.0005022364217252397, "loss": 0.2198, "step": 7860 }, { "epoch": 5.03, "learning_rate": 0.0005028753993610223, "loss": 0.1782, "step": 7870 }, { "epoch": 5.04, "learning_rate": 0.0005035143769968051, "loss": 0.2177, "step": 7880 }, { "epoch": 5.04, "learning_rate": 0.0005041533546325879, "loss": 0.2345, "step": 7890 }, { "epoch": 5.05, "learning_rate": 0.0005047923322683706, "loss": 0.2268, "step": 7900 }, { "epoch": 5.05, "learning_rate": 0.0005054313099041533, "loss": 0.2024, "step": 7910 }, { "epoch": 5.06, "learning_rate": 0.0005060702875399362, "loss": 0.2183, "step": 7920 }, { "epoch": 5.07, "learning_rate": 0.0005067092651757189, "loss": 0.2126, "step": 7930 }, { "epoch": 5.07, "learning_rate": 0.0005073482428115016, "loss": 0.241, "step": 7940 }, { "epoch": 5.08, "learning_rate": 0.0005079872204472845, "loss": 0.2137, "step": 7950 }, { "epoch": 5.09, "learning_rate": 0.0005086261980830671, "loss": 0.2156, "step": 7960 }, { "epoch": 5.09, "learning_rate": 0.0005092651757188499, "loss": 0.2081, "step": 7970 }, { "epoch": 5.1, "learning_rate": 0.0005099041533546325, "loss": 0.2235, "step": 7980 }, { "epoch": 5.11, "learning_rate": 0.0005105431309904154, "loss": 0.2023, "step": 7990 }, { "epoch": 5.11, "learning_rate": 0.0005111821086261981, "loss": 0.2001, "step": 8000 }, { "epoch": 5.12, "learning_rate": 0.0005118210862619808, "loss": 0.1899, "step": 8010 }, { "epoch": 5.12, "learning_rate": 0.0005124600638977636, "loss": 0.2328, "step": 8020 }, { "epoch": 5.13, "learning_rate": 0.0005130990415335464, "loss": 0.222, "step": 8030 }, { "epoch": 5.14, "learning_rate": 0.000513738019169329, "loss": 0.2191, "step": 8040 }, { "epoch": 5.14, "learning_rate": 0.0005143769968051119, "loss": 0.2012, "step": 8050 }, { "epoch": 5.15, "learning_rate": 0.0005150159744408946, "loss": 0.2093, "step": 8060 }, { "epoch": 5.16, "learning_rate": 0.0005156549520766773, "loss": 0.192, "step": 8070 }, { "epoch": 5.16, "learning_rate": 0.00051629392971246, "loss": 0.2442, "step": 8080 }, { "epoch": 5.17, "learning_rate": 0.0005169329073482429, "loss": 0.2149, "step": 8090 }, { "epoch": 5.18, "learning_rate": 0.0005175718849840255, "loss": 0.2278, "step": 8100 }, { "epoch": 5.18, "learning_rate": 0.0005182108626198083, "loss": 0.2626, "step": 8110 }, { "epoch": 5.19, "learning_rate": 0.0005188498402555911, "loss": 0.2234, "step": 8120 }, { "epoch": 5.19, "learning_rate": 0.0005194888178913738, "loss": 0.2177, "step": 8130 }, { "epoch": 5.2, "learning_rate": 0.0005201277955271566, "loss": 0.264, "step": 8140 }, { "epoch": 5.21, "learning_rate": 0.0005207667731629393, "loss": 0.2129, "step": 8150 }, { "epoch": 5.21, "learning_rate": 0.0005214057507987221, "loss": 0.2431, "step": 8160 }, { "epoch": 5.22, "learning_rate": 0.0005220447284345048, "loss": 0.2204, "step": 8170 }, { "epoch": 5.23, "learning_rate": 0.0005226837060702875, "loss": 0.2081, "step": 8180 }, { "epoch": 5.23, "learning_rate": 0.0005233226837060703, "loss": 0.207, "step": 8190 }, { "epoch": 5.24, "learning_rate": 0.0005239616613418531, "loss": 0.2233, "step": 8200 }, { "epoch": 5.25, "learning_rate": 0.0005246006389776357, "loss": 0.2488, "step": 8210 }, { "epoch": 5.25, "learning_rate": 0.0005252396166134186, "loss": 0.207, "step": 8220 }, { "epoch": 5.26, "learning_rate": 0.0005258785942492013, "loss": 0.2447, "step": 8230 }, { "epoch": 5.27, "learning_rate": 0.000526517571884984, "loss": 0.2295, "step": 8240 }, { "epoch": 5.27, "learning_rate": 0.0005271565495207668, "loss": 0.2539, "step": 8250 }, { "epoch": 5.28, "learning_rate": 0.0005277955271565496, "loss": 0.243, "step": 8260 }, { "epoch": 5.28, "learning_rate": 0.0005284345047923322, "loss": 0.2116, "step": 8270 }, { "epoch": 5.29, "learning_rate": 0.0005290734824281151, "loss": 0.2204, "step": 8280 }, { "epoch": 5.3, "learning_rate": 0.0005297124600638977, "loss": 0.2041, "step": 8290 }, { "epoch": 5.3, "learning_rate": 0.0005303514376996805, "loss": 0.2402, "step": 8300 }, { "epoch": 5.31, "learning_rate": 0.0005309904153354632, "loss": 0.2276, "step": 8310 }, { "epoch": 5.32, "learning_rate": 0.000531629392971246, "loss": 0.2305, "step": 8320 }, { "epoch": 5.32, "learning_rate": 0.0005322683706070288, "loss": 0.2257, "step": 8330 }, { "epoch": 5.33, "learning_rate": 0.0005329073482428115, "loss": 0.2062, "step": 8340 }, { "epoch": 5.34, "learning_rate": 0.0005335463258785943, "loss": 0.2013, "step": 8350 }, { "epoch": 5.34, "learning_rate": 0.000534185303514377, "loss": 0.2429, "step": 8360 }, { "epoch": 5.35, "learning_rate": 0.0005348242811501598, "loss": 0.1853, "step": 8370 }, { "epoch": 5.35, "learning_rate": 0.0005354632587859425, "loss": 0.2314, "step": 8380 }, { "epoch": 5.36, "learning_rate": 0.0005361022364217253, "loss": 0.2033, "step": 8390 }, { "epoch": 5.37, "learning_rate": 0.000536741214057508, "loss": 0.2227, "step": 8400 }, { "epoch": 5.37, "learning_rate": 0.0005373801916932908, "loss": 0.2243, "step": 8410 }, { "epoch": 5.38, "learning_rate": 0.0005380191693290735, "loss": 0.2322, "step": 8420 }, { "epoch": 5.39, "learning_rate": 0.0005386581469648563, "loss": 0.2103, "step": 8430 }, { "epoch": 5.39, "learning_rate": 0.0005392971246006389, "loss": 0.2361, "step": 8440 }, { "epoch": 5.4, "learning_rate": 0.0005399361022364218, "loss": 0.2518, "step": 8450 }, { "epoch": 5.41, "learning_rate": 0.0005405750798722044, "loss": 0.2079, "step": 8460 }, { "epoch": 5.41, "learning_rate": 0.0005412140575079872, "loss": 0.2353, "step": 8470 }, { "epoch": 5.42, "learning_rate": 0.00054185303514377, "loss": 0.2471, "step": 8480 }, { "epoch": 5.42, "learning_rate": 0.0005424920127795527, "loss": 0.2506, "step": 8490 }, { "epoch": 5.43, "learning_rate": 0.0005431309904153354, "loss": 0.2277, "step": 8500 }, { "epoch": 5.44, "learning_rate": 0.0005437699680511183, "loss": 0.2754, "step": 8510 }, { "epoch": 5.44, "learning_rate": 0.000544408945686901, "loss": 0.2325, "step": 8520 }, { "epoch": 5.45, "learning_rate": 0.0005450479233226837, "loss": 0.2329, "step": 8530 }, { "epoch": 5.46, "learning_rate": 0.0005456869009584666, "loss": 0.2542, "step": 8540 }, { "epoch": 5.46, "learning_rate": 0.0005463258785942492, "loss": 0.2054, "step": 8550 }, { "epoch": 5.47, "learning_rate": 0.000546964856230032, "loss": 0.2109, "step": 8560 }, { "epoch": 5.48, "learning_rate": 0.0005476038338658147, "loss": 0.2119, "step": 8570 }, { "epoch": 5.48, "learning_rate": 0.0005482428115015975, "loss": 0.2293, "step": 8580 }, { "epoch": 5.49, "learning_rate": 0.0005488817891373802, "loss": 0.2151, "step": 8590 }, { "epoch": 5.5, "learning_rate": 0.0005495207667731629, "loss": 0.2247, "step": 8600 }, { "epoch": 5.5, "learning_rate": 0.0005501597444089457, "loss": 0.2787, "step": 8610 }, { "epoch": 5.51, "learning_rate": 0.0005507987220447285, "loss": 0.2135, "step": 8620 }, { "epoch": 5.51, "learning_rate": 0.0005514376996805111, "loss": 0.2429, "step": 8630 }, { "epoch": 5.52, "learning_rate": 0.000552076677316294, "loss": 0.224, "step": 8640 }, { "epoch": 5.53, "learning_rate": 0.0005527156549520767, "loss": 0.2559, "step": 8650 }, { "epoch": 5.53, "learning_rate": 0.0005533546325878594, "loss": 0.2006, "step": 8660 }, { "epoch": 5.54, "learning_rate": 0.0005539936102236421, "loss": 0.2098, "step": 8670 }, { "epoch": 5.55, "learning_rate": 0.000554632587859425, "loss": 0.2211, "step": 8680 }, { "epoch": 5.55, "learning_rate": 0.0005552715654952076, "loss": 0.2151, "step": 8690 }, { "epoch": 5.56, "learning_rate": 0.0005559105431309904, "loss": 0.2487, "step": 8700 }, { "epoch": 5.57, "learning_rate": 0.0005565495207667732, "loss": 0.1934, "step": 8710 }, { "epoch": 5.57, "learning_rate": 0.0005571884984025559, "loss": 0.237, "step": 8720 }, { "epoch": 5.58, "learning_rate": 0.0005578274760383387, "loss": 0.2356, "step": 8730 }, { "epoch": 5.58, "learning_rate": 0.0005584664536741214, "loss": 0.242, "step": 8740 }, { "epoch": 5.59, "learning_rate": 0.0005591054313099042, "loss": 0.2187, "step": 8750 }, { "epoch": 5.6, "learning_rate": 0.0005597444089456869, "loss": 0.2434, "step": 8760 }, { "epoch": 5.6, "learning_rate": 0.0005603833865814697, "loss": 0.2641, "step": 8770 }, { "epoch": 5.61, "learning_rate": 0.0005610223642172524, "loss": 0.2223, "step": 8780 }, { "epoch": 5.62, "learning_rate": 0.0005616613418530352, "loss": 0.2159, "step": 8790 }, { "epoch": 5.62, "learning_rate": 0.0005623003194888178, "loss": 0.233, "step": 8800 }, { "epoch": 5.63, "learning_rate": 0.0005629392971246007, "loss": 0.2623, "step": 8810 }, { "epoch": 5.64, "learning_rate": 0.0005635782747603834, "loss": 0.2424, "step": 8820 }, { "epoch": 5.64, "learning_rate": 0.0005642172523961661, "loss": 0.236, "step": 8830 }, { "epoch": 5.65, "learning_rate": 0.0005648562300319489, "loss": 0.2372, "step": 8840 }, { "epoch": 5.65, "learning_rate": 0.0005654952076677317, "loss": 0.2165, "step": 8850 }, { "epoch": 5.66, "learning_rate": 0.0005661341853035143, "loss": 0.2702, "step": 8860 }, { "epoch": 5.67, "learning_rate": 0.0005667731629392972, "loss": 0.2253, "step": 8870 }, { "epoch": 5.67, "learning_rate": 0.0005674121405750799, "loss": 0.2361, "step": 8880 }, { "epoch": 5.68, "learning_rate": 0.0005680511182108626, "loss": 0.2314, "step": 8890 }, { "epoch": 5.69, "learning_rate": 0.0005686900958466453, "loss": 0.2188, "step": 8900 }, { "epoch": 5.69, "learning_rate": 0.0005693290734824281, "loss": 0.2121, "step": 8910 }, { "epoch": 5.7, "learning_rate": 0.0005699680511182109, "loss": 0.208, "step": 8920 }, { "epoch": 5.71, "learning_rate": 0.0005706070287539936, "loss": 0.2587, "step": 8930 }, { "epoch": 5.71, "learning_rate": 0.0005712460063897764, "loss": 0.248, "step": 8940 }, { "epoch": 5.72, "learning_rate": 0.0005718849840255591, "loss": 0.2321, "step": 8950 }, { "epoch": 5.73, "learning_rate": 0.0005725239616613419, "loss": 0.2464, "step": 8960 }, { "epoch": 5.73, "learning_rate": 0.0005731629392971246, "loss": 0.2344, "step": 8970 }, { "epoch": 5.74, "learning_rate": 0.0005738019169329074, "loss": 0.2346, "step": 8980 }, { "epoch": 5.74, "learning_rate": 0.0005744408945686901, "loss": 0.2296, "step": 8990 }, { "epoch": 5.75, "learning_rate": 0.0005750798722044729, "loss": 0.2556, "step": 9000 }, { "epoch": 5.76, "learning_rate": 0.0005757188498402556, "loss": 0.2424, "step": 9010 }, { "epoch": 5.76, "learning_rate": 0.0005763578274760384, "loss": 0.2561, "step": 9020 }, { "epoch": 5.77, "learning_rate": 0.000576996805111821, "loss": 0.2339, "step": 9030 }, { "epoch": 5.78, "learning_rate": 0.0005776357827476039, "loss": 0.2646, "step": 9040 }, { "epoch": 5.78, "learning_rate": 0.0005782747603833865, "loss": 0.2297, "step": 9050 }, { "epoch": 5.79, "learning_rate": 0.0005789137380191693, "loss": 0.1751, "step": 9060 }, { "epoch": 5.8, "learning_rate": 0.0005795527156549521, "loss": 0.2185, "step": 9070 }, { "epoch": 5.8, "learning_rate": 0.0005801916932907348, "loss": 0.2366, "step": 9080 }, { "epoch": 5.81, "learning_rate": 0.0005808306709265175, "loss": 0.2377, "step": 9090 }, { "epoch": 5.81, "learning_rate": 0.0005814696485623004, "loss": 0.2242, "step": 9100 }, { "epoch": 5.82, "learning_rate": 0.000582108626198083, "loss": 0.2487, "step": 9110 }, { "epoch": 5.83, "learning_rate": 0.0005827476038338658, "loss": 0.2498, "step": 9120 }, { "epoch": 5.83, "learning_rate": 0.0005833865814696487, "loss": 0.21, "step": 9130 }, { "epoch": 5.84, "learning_rate": 0.0005840255591054313, "loss": 0.2414, "step": 9140 }, { "epoch": 5.85, "learning_rate": 0.0005846645367412141, "loss": 0.2372, "step": 9150 }, { "epoch": 5.85, "learning_rate": 0.0005853035143769969, "loss": 0.2066, "step": 9160 }, { "epoch": 5.86, "learning_rate": 0.0005859424920127796, "loss": 0.2338, "step": 9170 }, { "epoch": 5.87, "learning_rate": 0.0005865814696485623, "loss": 0.2372, "step": 9180 }, { "epoch": 5.87, "learning_rate": 0.0005872204472843451, "loss": 0.2528, "step": 9190 }, { "epoch": 5.88, "learning_rate": 0.0005878594249201278, "loss": 0.2418, "step": 9200 }, { "epoch": 5.88, "learning_rate": 0.0005884984025559106, "loss": 0.2089, "step": 9210 }, { "epoch": 5.89, "learning_rate": 0.0005891373801916932, "loss": 0.2637, "step": 9220 }, { "epoch": 5.9, "learning_rate": 0.0005897763578274761, "loss": 0.262, "step": 9230 }, { "epoch": 5.9, "learning_rate": 0.0005904153354632588, "loss": 0.2451, "step": 9240 }, { "epoch": 5.91, "learning_rate": 0.0005910543130990415, "loss": 0.2283, "step": 9250 }, { "epoch": 5.92, "learning_rate": 0.0005916932907348243, "loss": 0.227, "step": 9260 }, { "epoch": 5.92, "learning_rate": 0.0005923322683706071, "loss": 0.2483, "step": 9270 }, { "epoch": 5.93, "learning_rate": 0.0005929712460063897, "loss": 0.2338, "step": 9280 }, { "epoch": 5.94, "learning_rate": 0.0005936102236421725, "loss": 0.2519, "step": 9290 }, { "epoch": 5.94, "learning_rate": 0.0005942492012779553, "loss": 0.2135, "step": 9300 }, { "epoch": 5.95, "learning_rate": 0.000594888178913738, "loss": 0.2144, "step": 9310 }, { "epoch": 5.95, "learning_rate": 0.0005955271565495208, "loss": 0.2324, "step": 9320 }, { "epoch": 5.96, "learning_rate": 0.0005961661341853036, "loss": 0.2121, "step": 9330 }, { "epoch": 5.97, "learning_rate": 0.0005968051118210863, "loss": 0.2149, "step": 9340 }, { "epoch": 5.97, "learning_rate": 0.000597444089456869, "loss": 0.2414, "step": 9350 }, { "epoch": 5.98, "learning_rate": 0.0005980830670926518, "loss": 0.2348, "step": 9360 }, { "epoch": 5.99, "learning_rate": 0.0005987220447284345, "loss": 0.2711, "step": 9370 }, { "epoch": 5.99, "learning_rate": 0.0005993610223642173, "loss": 0.2151, "step": 9380 }, { "epoch": 6.0, "learning_rate": 0.0006, "loss": 0.2554, "step": 9390 }, { "epoch": 6.01, "learning_rate": 0.0006006389776357828, "loss": 0.1843, "step": 9400 }, { "epoch": 6.01, "learning_rate": 0.0006012779552715655, "loss": 0.2518, "step": 9410 }, { "epoch": 6.02, "learning_rate": 0.0006019169329073482, "loss": 0.2017, "step": 9420 }, { "epoch": 6.03, "learning_rate": 0.000602555910543131, "loss": 0.2016, "step": 9430 }, { "epoch": 6.03, "learning_rate": 0.0006031948881789138, "loss": 0.1955, "step": 9440 }, { "epoch": 6.04, "learning_rate": 0.0006038338658146964, "loss": 0.2106, "step": 9450 }, { "epoch": 6.04, "learning_rate": 0.0006044728434504793, "loss": 0.2085, "step": 9460 }, { "epoch": 6.05, "learning_rate": 0.000605111821086262, "loss": 0.2208, "step": 9470 }, { "epoch": 6.06, "learning_rate": 0.0006057507987220447, "loss": 0.2107, "step": 9480 }, { "epoch": 6.06, "learning_rate": 0.0006063897763578275, "loss": 0.2016, "step": 9490 }, { "epoch": 6.07, "learning_rate": 0.0006070287539936102, "loss": 0.2203, "step": 9500 }, { "epoch": 6.08, "learning_rate": 0.000607667731629393, "loss": 0.1773, "step": 9510 }, { "epoch": 6.08, "learning_rate": 0.0006083067092651758, "loss": 0.2162, "step": 9520 }, { "epoch": 6.09, "learning_rate": 0.0006089456869009585, "loss": 0.1805, "step": 9530 }, { "epoch": 6.1, "learning_rate": 0.0006095846645367412, "loss": 0.275, "step": 9540 }, { "epoch": 6.1, "learning_rate": 0.000610223642172524, "loss": 0.2398, "step": 9550 }, { "epoch": 6.11, "learning_rate": 0.0006108626198083067, "loss": 0.1857, "step": 9560 }, { "epoch": 6.11, "learning_rate": 0.0006115015974440895, "loss": 0.2261, "step": 9570 }, { "epoch": 6.12, "learning_rate": 0.0006121405750798722, "loss": 0.1816, "step": 9580 }, { "epoch": 6.13, "learning_rate": 0.000612779552715655, "loss": 0.212, "step": 9590 }, { "epoch": 6.13, "learning_rate": 0.0006134185303514377, "loss": 0.2027, "step": 9600 }, { "epoch": 6.14, "learning_rate": 0.0006140575079872205, "loss": 0.2054, "step": 9610 }, { "epoch": 6.15, "learning_rate": 0.0006146964856230032, "loss": 0.2073, "step": 9620 }, { "epoch": 6.15, "learning_rate": 0.000615335463258786, "loss": 0.199, "step": 9630 }, { "epoch": 6.16, "learning_rate": 0.0006159744408945687, "loss": 0.2037, "step": 9640 }, { "epoch": 6.17, "learning_rate": 0.0006166134185303514, "loss": 0.1966, "step": 9650 }, { "epoch": 6.17, "learning_rate": 0.0006172523961661342, "loss": 0.2098, "step": 9660 }, { "epoch": 6.18, "learning_rate": 0.0006178913738019169, "loss": 0.2208, "step": 9670 }, { "epoch": 6.19, "learning_rate": 0.0006185303514376996, "loss": 0.1932, "step": 9680 }, { "epoch": 6.19, "learning_rate": 0.0006191693290734825, "loss": 0.2283, "step": 9690 }, { "epoch": 6.2, "learning_rate": 0.0006198083067092651, "loss": 0.2247, "step": 9700 }, { "epoch": 6.2, "learning_rate": 0.0006204472843450479, "loss": 0.2131, "step": 9710 }, { "epoch": 6.21, "learning_rate": 0.0006210862619808308, "loss": 0.1977, "step": 9720 }, { "epoch": 6.22, "learning_rate": 0.0006217252396166134, "loss": 0.2284, "step": 9730 }, { "epoch": 6.22, "learning_rate": 0.0006223642172523962, "loss": 0.2088, "step": 9740 }, { "epoch": 6.23, "learning_rate": 0.000623003194888179, "loss": 0.238, "step": 9750 }, { "epoch": 6.24, "learning_rate": 0.0006236421725239617, "loss": 0.198, "step": 9760 }, { "epoch": 6.24, "learning_rate": 0.0006242811501597444, "loss": 0.2132, "step": 9770 }, { "epoch": 6.25, "learning_rate": 0.0006249201277955273, "loss": 0.2107, "step": 9780 }, { "epoch": 6.26, "learning_rate": 0.0006255591054313099, "loss": 0.1984, "step": 9790 }, { "epoch": 6.26, "learning_rate": 0.0006261980830670927, "loss": 0.2316, "step": 9800 }, { "epoch": 6.27, "learning_rate": 0.0006268370607028753, "loss": 0.2404, "step": 9810 }, { "epoch": 6.27, "learning_rate": 0.0006274760383386582, "loss": 0.227, "step": 9820 }, { "epoch": 6.28, "learning_rate": 0.0006281150159744409, "loss": 0.1707, "step": 9830 }, { "epoch": 6.29, "learning_rate": 0.0006287539936102236, "loss": 0.2225, "step": 9840 }, { "epoch": 6.29, "learning_rate": 0.0006293929712460064, "loss": 0.2663, "step": 9850 }, { "epoch": 6.3, "learning_rate": 0.0006300319488817892, "loss": 0.2101, "step": 9860 }, { "epoch": 6.31, "learning_rate": 0.0006306709265175718, "loss": 0.2007, "step": 9870 }, { "epoch": 6.31, "learning_rate": 0.0006313099041533547, "loss": 0.2094, "step": 9880 }, { "epoch": 6.32, "learning_rate": 0.0006319488817891374, "loss": 0.2393, "step": 9890 }, { "epoch": 6.33, "learning_rate": 0.0006325878594249201, "loss": 0.2459, "step": 9900 }, { "epoch": 6.33, "learning_rate": 0.000633226837060703, "loss": 0.195, "step": 9910 }, { "epoch": 6.34, "learning_rate": 0.0006338658146964857, "loss": 0.1721, "step": 9920 }, { "epoch": 6.34, "learning_rate": 0.0006345047923322684, "loss": 0.2254, "step": 9930 }, { "epoch": 6.35, "learning_rate": 0.0006351437699680511, "loss": 0.2252, "step": 9940 }, { "epoch": 6.36, "learning_rate": 0.000635782747603834, "loss": 0.1812, "step": 9950 }, { "epoch": 6.36, "learning_rate": 0.0006364217252396166, "loss": 0.2133, "step": 9960 }, { "epoch": 6.37, "learning_rate": 0.0006370607028753994, "loss": 0.2453, "step": 9970 }, { "epoch": 6.38, "learning_rate": 0.000637699680511182, "loss": 0.2257, "step": 9980 }, { "epoch": 6.38, "learning_rate": 0.0006383386581469649, "loss": 0.2017, "step": 9990 }, { "epoch": 6.39, "learning_rate": 0.0006389776357827476, "loss": 0.2141, "step": 10000 }, { "epoch": 6.4, "learning_rate": 0.0006396166134185303, "loss": 0.221, "step": 10010 }, { "epoch": 6.4, "learning_rate": 0.0006402555910543131, "loss": 0.2082, "step": 10020 }, { "epoch": 6.41, "learning_rate": 0.0006408945686900959, "loss": 0.2305, "step": 10030 }, { "epoch": 6.42, "learning_rate": 0.0006415335463258785, "loss": 0.1989, "step": 10040 }, { "epoch": 6.42, "learning_rate": 0.0006421725239616614, "loss": 0.2114, "step": 10050 }, { "epoch": 6.43, "learning_rate": 0.0006428115015974441, "loss": 0.244, "step": 10060 }, { "epoch": 6.43, "learning_rate": 0.0006434504792332268, "loss": 0.2128, "step": 10070 }, { "epoch": 6.44, "learning_rate": 0.0006440894568690096, "loss": 0.2411, "step": 10080 }, { "epoch": 6.45, "learning_rate": 0.0006447284345047924, "loss": 0.2089, "step": 10090 }, { "epoch": 6.45, "learning_rate": 0.000645367412140575, "loss": 0.2048, "step": 10100 }, { "epoch": 6.46, "learning_rate": 0.0006460063897763579, "loss": 0.2081, "step": 10110 }, { "epoch": 6.47, "learning_rate": 0.0006466453674121406, "loss": 0.2285, "step": 10120 }, { "epoch": 6.47, "learning_rate": 0.0006472843450479233, "loss": 0.217, "step": 10130 }, { "epoch": 6.48, "learning_rate": 0.0006479233226837062, "loss": 0.2066, "step": 10140 }, { "epoch": 6.49, "learning_rate": 0.0006485623003194888, "loss": 0.2274, "step": 10150 }, { "epoch": 6.49, "learning_rate": 0.0006492012779552716, "loss": 0.2116, "step": 10160 }, { "epoch": 6.5, "learning_rate": 0.0006498402555910543, "loss": 0.2406, "step": 10170 }, { "epoch": 6.5, "learning_rate": 0.0006504792332268371, "loss": 0.2094, "step": 10180 }, { "epoch": 6.51, "learning_rate": 0.0006511182108626198, "loss": 0.2406, "step": 10190 }, { "epoch": 6.52, "learning_rate": 0.0006517571884984026, "loss": 0.2134, "step": 10200 }, { "epoch": 6.52, "learning_rate": 0.0006523961661341853, "loss": 0.2337, "step": 10210 }, { "epoch": 6.53, "learning_rate": 0.0006530351437699681, "loss": 0.2169, "step": 10220 }, { "epoch": 6.54, "learning_rate": 0.0006536741214057508, "loss": 0.1976, "step": 10230 }, { "epoch": 6.54, "learning_rate": 0.0006543130990415336, "loss": 0.2305, "step": 10240 }, { "epoch": 6.55, "learning_rate": 0.0006549520766773163, "loss": 0.2315, "step": 10250 }, { "epoch": 6.56, "learning_rate": 0.000655591054313099, "loss": 0.2116, "step": 10260 }, { "epoch": 6.56, "learning_rate": 0.0006562300319488817, "loss": 0.2238, "step": 10270 }, { "epoch": 6.57, "learning_rate": 0.0006568690095846646, "loss": 0.1958, "step": 10280 }, { "epoch": 6.57, "learning_rate": 0.0006575079872204472, "loss": 0.2142, "step": 10290 }, { "epoch": 6.58, "learning_rate": 0.00065814696485623, "loss": 0.2262, "step": 10300 }, { "epoch": 6.59, "learning_rate": 0.0006587859424920129, "loss": 0.2009, "step": 10310 }, { "epoch": 6.59, "learning_rate": 0.0006594249201277955, "loss": 0.2405, "step": 10320 }, { "epoch": 6.6, "learning_rate": 0.0006600638977635783, "loss": 0.239, "step": 10330 }, { "epoch": 6.61, "learning_rate": 0.0006607028753993611, "loss": 0.2234, "step": 10340 }, { "epoch": 6.61, "learning_rate": 0.0006613418530351438, "loss": 0.2276, "step": 10350 }, { "epoch": 6.62, "learning_rate": 0.0006619808306709265, "loss": 0.2201, "step": 10360 }, { "epoch": 6.63, "learning_rate": 0.0006626198083067094, "loss": 0.2277, "step": 10370 }, { "epoch": 6.63, "learning_rate": 0.000663258785942492, "loss": 0.2399, "step": 10380 }, { "epoch": 6.64, "learning_rate": 0.0006638977635782748, "loss": 0.2097, "step": 10390 }, { "epoch": 6.65, "learning_rate": 0.0006645367412140575, "loss": 0.2211, "step": 10400 }, { "epoch": 6.65, "learning_rate": 0.0006651757188498403, "loss": 0.2249, "step": 10410 }, { "epoch": 6.66, "learning_rate": 0.000665814696485623, "loss": 0.223, "step": 10420 }, { "epoch": 6.66, "learning_rate": 0.0006664536741214057, "loss": 0.2492, "step": 10430 }, { "epoch": 6.67, "learning_rate": 0.0006670926517571885, "loss": 0.1998, "step": 10440 }, { "epoch": 6.68, "learning_rate": 0.0006677316293929713, "loss": 0.2508, "step": 10450 }, { "epoch": 6.68, "learning_rate": 0.0006683706070287539, "loss": 0.2545, "step": 10460 }, { "epoch": 6.69, "learning_rate": 0.0006690095846645368, "loss": 0.2027, "step": 10470 }, { "epoch": 6.7, "learning_rate": 0.0006696485623003195, "loss": 0.2344, "step": 10480 }, { "epoch": 6.7, "learning_rate": 0.0006702875399361022, "loss": 0.2268, "step": 10490 }, { "epoch": 6.71, "learning_rate": 0.0006709265175718851, "loss": 0.2414, "step": 10500 }, { "epoch": 6.72, "learning_rate": 0.0006715654952076678, "loss": 0.2263, "step": 10510 }, { "epoch": 6.72, "learning_rate": 0.0006722044728434505, "loss": 0.1974, "step": 10520 }, { "epoch": 6.73, "learning_rate": 0.0006728434504792332, "loss": 0.2196, "step": 10530 }, { "epoch": 6.73, "learning_rate": 0.0006734824281150161, "loss": 0.1958, "step": 10540 }, { "epoch": 6.74, "learning_rate": 0.0006741214057507987, "loss": 0.2002, "step": 10550 }, { "epoch": 6.75, "learning_rate": 0.0006747603833865815, "loss": 0.2186, "step": 10560 }, { "epoch": 6.75, "learning_rate": 0.0006753993610223642, "loss": 0.2254, "step": 10570 }, { "epoch": 6.76, "learning_rate": 0.000676038338658147, "loss": 0.1954, "step": 10580 }, { "epoch": 6.77, "learning_rate": 0.0006766773162939297, "loss": 0.2413, "step": 10590 }, { "epoch": 6.77, "learning_rate": 0.0006773162939297125, "loss": 0.2373, "step": 10600 }, { "epoch": 6.78, "learning_rate": 0.0006779552715654952, "loss": 0.2389, "step": 10610 }, { "epoch": 6.79, "learning_rate": 0.000678594249201278, "loss": 0.203, "step": 10620 }, { "epoch": 6.79, "learning_rate": 0.0006792332268370606, "loss": 0.2357, "step": 10630 }, { "epoch": 6.8, "learning_rate": 0.0006798722044728435, "loss": 0.2449, "step": 10640 }, { "epoch": 6.8, "learning_rate": 0.0006805111821086262, "loss": 0.2056, "step": 10650 }, { "epoch": 6.81, "learning_rate": 0.0006811501597444089, "loss": 0.2524, "step": 10660 }, { "epoch": 6.82, "learning_rate": 0.0006817891373801917, "loss": 0.2157, "step": 10670 }, { "epoch": 6.82, "learning_rate": 0.0006824281150159745, "loss": 0.1798, "step": 10680 }, { "epoch": 6.83, "learning_rate": 0.0006830670926517571, "loss": 0.1969, "step": 10690 }, { "epoch": 6.84, "learning_rate": 0.00068370607028754, "loss": 0.2223, "step": 10700 }, { "epoch": 6.84, "learning_rate": 0.0006843450479233228, "loss": 0.2193, "step": 10710 }, { "epoch": 6.85, "learning_rate": 0.0006849840255591054, "loss": 0.2223, "step": 10720 }, { "epoch": 6.86, "learning_rate": 0.0006856230031948883, "loss": 0.2113, "step": 10730 }, { "epoch": 6.86, "learning_rate": 0.0006862619808306709, "loss": 0.2488, "step": 10740 }, { "epoch": 6.87, "learning_rate": 0.0006869009584664537, "loss": 0.2308, "step": 10750 }, { "epoch": 6.88, "learning_rate": 0.0006875399361022364, "loss": 0.2283, "step": 10760 }, { "epoch": 6.88, "learning_rate": 0.0006881789137380192, "loss": 0.2672, "step": 10770 }, { "epoch": 6.89, "learning_rate": 0.0006888178913738019, "loss": 0.2538, "step": 10780 }, { "epoch": 6.89, "learning_rate": 0.0006894568690095847, "loss": 0.2907, "step": 10790 }, { "epoch": 6.9, "learning_rate": 0.0006900958466453674, "loss": 0.1976, "step": 10800 }, { "epoch": 6.91, "learning_rate": 0.0006907348242811502, "loss": 0.2619, "step": 10810 }, { "epoch": 6.91, "learning_rate": 0.0006913738019169329, "loss": 0.1975, "step": 10820 }, { "epoch": 6.92, "learning_rate": 0.0006920127795527157, "loss": 0.1838, "step": 10830 }, { "epoch": 6.93, "learning_rate": 0.0006926517571884984, "loss": 0.2242, "step": 10840 }, { "epoch": 6.93, "learning_rate": 0.0006932907348242812, "loss": 0.2057, "step": 10850 }, { "epoch": 6.94, "learning_rate": 0.0006939297124600638, "loss": 0.2116, "step": 10860 }, { "epoch": 6.95, "learning_rate": 0.0006945686900958467, "loss": 0.2289, "step": 10870 }, { "epoch": 6.95, "learning_rate": 0.0006952076677316293, "loss": 0.202, "step": 10880 }, { "epoch": 6.96, "learning_rate": 0.0006958466453674121, "loss": 0.2221, "step": 10890 }, { "epoch": 6.96, "learning_rate": 0.000696485623003195, "loss": 0.237, "step": 10900 }, { "epoch": 6.97, "learning_rate": 0.0006971246006389776, "loss": 0.3199, "step": 10910 }, { "epoch": 6.98, "learning_rate": 0.0006977635782747604, "loss": 0.2029, "step": 10920 }, { "epoch": 6.98, "learning_rate": 0.0006984025559105432, "loss": 0.223, "step": 10930 }, { "epoch": 6.99, "learning_rate": 0.0006990415335463259, "loss": 0.216, "step": 10940 }, { "epoch": 7.0, "learning_rate": 0.0006996805111821086, "loss": 0.1952, "step": 10950 }, { "epoch": 7.0, "learning_rate": 0.0007003194888178915, "loss": 0.2173, "step": 10960 }, { "epoch": 7.01, "learning_rate": 0.0007009584664536741, "loss": 0.1895, "step": 10970 }, { "epoch": 7.02, "learning_rate": 0.0007015974440894569, "loss": 0.1721, "step": 10980 }, { "epoch": 7.02, "learning_rate": 0.0007022364217252397, "loss": 0.1872, "step": 10990 }, { "epoch": 7.03, "learning_rate": 0.0007028753993610224, "loss": 0.1846, "step": 11000 }, { "epoch": 7.04, "learning_rate": 0.0007035143769968051, "loss": 0.207, "step": 11010 }, { "epoch": 7.04, "learning_rate": 0.0007041533546325878, "loss": 0.2032, "step": 11020 }, { "epoch": 7.05, "learning_rate": 0.0007047923322683706, "loss": 0.1662, "step": 11030 }, { "epoch": 7.05, "learning_rate": 0.0007054313099041534, "loss": 0.167, "step": 11040 }, { "epoch": 7.06, "learning_rate": 0.000706070287539936, "loss": 0.2351, "step": 11050 }, { "epoch": 7.07, "learning_rate": 0.0007067092651757189, "loss": 0.1653, "step": 11060 }, { "epoch": 7.07, "learning_rate": 0.0007073482428115016, "loss": 0.1826, "step": 11070 }, { "epoch": 7.08, "learning_rate": 0.0007079872204472843, "loss": 0.2162, "step": 11080 }, { "epoch": 7.09, "learning_rate": 0.000708626198083067, "loss": 0.1853, "step": 11090 }, { "epoch": 7.09, "learning_rate": 0.0007092651757188499, "loss": 0.1865, "step": 11100 }, { "epoch": 7.1, "learning_rate": 0.0007099041533546326, "loss": 0.1868, "step": 11110 }, { "epoch": 7.11, "learning_rate": 0.0007105431309904153, "loss": 0.1628, "step": 11120 }, { "epoch": 7.11, "learning_rate": 0.0007111821086261982, "loss": 0.1975, "step": 11130 }, { "epoch": 7.12, "learning_rate": 0.0007118210862619808, "loss": 0.1853, "step": 11140 }, { "epoch": 7.12, "learning_rate": 0.0007124600638977636, "loss": 0.2046, "step": 11150 }, { "epoch": 7.13, "learning_rate": 0.0007130990415335464, "loss": 0.1966, "step": 11160 }, { "epoch": 7.14, "learning_rate": 0.0007137380191693291, "loss": 0.2022, "step": 11170 }, { "epoch": 7.14, "learning_rate": 0.0007143769968051118, "loss": 0.1635, "step": 11180 }, { "epoch": 7.15, "learning_rate": 0.0007150159744408946, "loss": 0.2223, "step": 11190 }, { "epoch": 7.16, "learning_rate": 0.0007156549520766773, "loss": 0.2094, "step": 11200 }, { "epoch": 7.16, "learning_rate": 0.0007162939297124601, "loss": 0.1938, "step": 11210 }, { "epoch": 7.17, "learning_rate": 0.0007169329073482428, "loss": 0.2053, "step": 11220 }, { "epoch": 7.18, "learning_rate": 0.0007175718849840256, "loss": 0.1981, "step": 11230 }, { "epoch": 7.18, "learning_rate": 0.0007182108626198083, "loss": 0.1969, "step": 11240 }, { "epoch": 7.19, "learning_rate": 0.000718849840255591, "loss": 0.1912, "step": 11250 }, { "epoch": 7.19, "learning_rate": 0.0007194888178913738, "loss": 0.1875, "step": 11260 }, { "epoch": 7.2, "learning_rate": 0.0007201277955271566, "loss": 0.1944, "step": 11270 }, { "epoch": 7.21, "learning_rate": 0.0007207667731629392, "loss": 0.2254, "step": 11280 }, { "epoch": 7.21, "learning_rate": 0.0007214057507987221, "loss": 0.2204, "step": 11290 }, { "epoch": 7.22, "learning_rate": 0.0007220447284345049, "loss": 0.2089, "step": 11300 }, { "epoch": 7.23, "learning_rate": 0.0007226837060702875, "loss": 0.2098, "step": 11310 }, { "epoch": 7.23, "learning_rate": 0.0007233226837060704, "loss": 0.2107, "step": 11320 }, { "epoch": 7.24, "learning_rate": 0.000723961661341853, "loss": 0.2035, "step": 11330 }, { "epoch": 7.25, "learning_rate": 0.0007246006389776358, "loss": 0.2098, "step": 11340 }, { "epoch": 7.25, "learning_rate": 0.0007252396166134186, "loss": 0.2332, "step": 11350 }, { "epoch": 7.26, "learning_rate": 0.0007258785942492013, "loss": 0.1978, "step": 11360 }, { "epoch": 7.27, "learning_rate": 0.000726517571884984, "loss": 0.1873, "step": 11370 }, { "epoch": 7.27, "learning_rate": 0.0007271565495207669, "loss": 0.1933, "step": 11380 }, { "epoch": 7.28, "learning_rate": 0.0007277955271565495, "loss": 0.2329, "step": 11390 }, { "epoch": 7.28, "learning_rate": 0.0007284345047923323, "loss": 0.1914, "step": 11400 }, { "epoch": 7.29, "learning_rate": 0.000729073482428115, "loss": 0.1788, "step": 11410 }, { "epoch": 7.3, "learning_rate": 0.0007297124600638978, "loss": 0.2233, "step": 11420 }, { "epoch": 7.3, "learning_rate": 0.0007303514376996805, "loss": 0.2045, "step": 11430 }, { "epoch": 7.31, "learning_rate": 0.0007309904153354633, "loss": 0.1945, "step": 11440 }, { "epoch": 7.32, "learning_rate": 0.000731629392971246, "loss": 0.2035, "step": 11450 }, { "epoch": 7.32, "learning_rate": 0.0007322683706070288, "loss": 0.1888, "step": 11460 }, { "epoch": 7.33, "learning_rate": 0.0007329073482428114, "loss": 0.2102, "step": 11470 }, { "epoch": 7.34, "learning_rate": 0.0007335463258785943, "loss": 0.2422, "step": 11480 }, { "epoch": 7.34, "learning_rate": 0.0007341853035143771, "loss": 0.2067, "step": 11490 }, { "epoch": 7.35, "learning_rate": 0.0007348242811501597, "loss": 0.2258, "step": 11500 }, { "epoch": 7.35, "learning_rate": 0.0007354632587859425, "loss": 0.1953, "step": 11510 }, { "epoch": 7.36, "learning_rate": 0.0007361022364217253, "loss": 0.2008, "step": 11520 }, { "epoch": 7.37, "learning_rate": 0.000736741214057508, "loss": 0.1799, "step": 11530 }, { "epoch": 7.37, "learning_rate": 0.0007373801916932907, "loss": 0.1961, "step": 11540 }, { "epoch": 7.38, "learning_rate": 0.0007380191693290736, "loss": 0.2147, "step": 11550 }, { "epoch": 7.39, "learning_rate": 0.0007386581469648562, "loss": 0.2173, "step": 11560 }, { "epoch": 7.39, "learning_rate": 0.000739297124600639, "loss": 0.2015, "step": 11570 }, { "epoch": 7.4, "learning_rate": 0.0007399361022364218, "loss": 0.2245, "step": 11580 }, { "epoch": 7.41, "learning_rate": 0.0007405750798722045, "loss": 0.2001, "step": 11590 }, { "epoch": 7.41, "learning_rate": 0.0007412140575079872, "loss": 0.2022, "step": 11600 }, { "epoch": 7.42, "learning_rate": 0.0007418530351437701, "loss": 0.2094, "step": 11610 }, { "epoch": 7.42, "learning_rate": 0.0007424920127795527, "loss": 0.2429, "step": 11620 }, { "epoch": 7.43, "learning_rate": 0.0007431309904153355, "loss": 0.174, "step": 11630 }, { "epoch": 7.44, "learning_rate": 0.0007437699680511181, "loss": 0.2271, "step": 11640 }, { "epoch": 7.44, "learning_rate": 0.000744408945686901, "loss": 0.1932, "step": 11650 }, { "epoch": 7.45, "learning_rate": 0.0007450479233226837, "loss": 0.1983, "step": 11660 }, { "epoch": 7.46, "learning_rate": 0.0007456869009584664, "loss": 0.1996, "step": 11670 }, { "epoch": 7.46, "learning_rate": 0.0007463258785942492, "loss": 0.2084, "step": 11680 }, { "epoch": 7.47, "learning_rate": 0.000746964856230032, "loss": 0.2157, "step": 11690 }, { "epoch": 7.48, "learning_rate": 0.0007476038338658147, "loss": 0.2366, "step": 11700 }, { "epoch": 7.48, "learning_rate": 0.0007482428115015975, "loss": 0.2104, "step": 11710 }, { "epoch": 7.49, "learning_rate": 0.0007488817891373803, "loss": 0.2164, "step": 11720 }, { "epoch": 7.5, "learning_rate": 0.0007495207667731629, "loss": 0.1951, "step": 11730 }, { "epoch": 7.5, "learning_rate": 0.0007501597444089458, "loss": 0.2207, "step": 11740 }, { "epoch": 7.51, "learning_rate": 0.0007507987220447285, "loss": 0.1786, "step": 11750 }, { "epoch": 7.51, "learning_rate": 0.0007514376996805112, "loss": 0.2087, "step": 11760 }, { "epoch": 7.52, "learning_rate": 0.0007520766773162939, "loss": 0.2059, "step": 11770 }, { "epoch": 7.53, "learning_rate": 0.0007527156549520767, "loss": 0.2304, "step": 11780 }, { "epoch": 7.53, "learning_rate": 0.0007533546325878594, "loss": 0.2173, "step": 11790 }, { "epoch": 7.54, "learning_rate": 0.0007539936102236422, "loss": 0.2221, "step": 11800 }, { "epoch": 7.55, "learning_rate": 0.0007546325878594249, "loss": 0.203, "step": 11810 }, { "epoch": 7.55, "learning_rate": 0.0007552715654952077, "loss": 0.2087, "step": 11820 }, { "epoch": 7.56, "learning_rate": 0.0007559105431309904, "loss": 0.2292, "step": 11830 }, { "epoch": 7.57, "learning_rate": 0.0007565495207667732, "loss": 0.2116, "step": 11840 }, { "epoch": 7.57, "learning_rate": 0.0007571884984025559, "loss": 0.2227, "step": 11850 }, { "epoch": 7.58, "learning_rate": 0.0007578274760383387, "loss": 0.207, "step": 11860 }, { "epoch": 7.58, "learning_rate": 0.0007584664536741213, "loss": 0.2051, "step": 11870 }, { "epoch": 7.59, "learning_rate": 0.0007591054313099042, "loss": 0.2409, "step": 11880 }, { "epoch": 7.6, "learning_rate": 0.000759744408945687, "loss": 0.1849, "step": 11890 }, { "epoch": 7.6, "learning_rate": 0.0007603833865814696, "loss": 0.2098, "step": 11900 }, { "epoch": 7.61, "learning_rate": 0.0007610223642172525, "loss": 0.2225, "step": 11910 }, { "epoch": 7.62, "learning_rate": 0.0007616613418530352, "loss": 0.1973, "step": 11920 }, { "epoch": 7.62, "learning_rate": 0.0007623003194888179, "loss": 0.239, "step": 11930 }, { "epoch": 7.63, "learning_rate": 0.0007629392971246007, "loss": 0.2186, "step": 11940 }, { "epoch": 7.64, "learning_rate": 0.0007635782747603834, "loss": 0.1849, "step": 11950 }, { "epoch": 7.64, "learning_rate": 0.0007642172523961661, "loss": 0.2316, "step": 11960 }, { "epoch": 7.65, "learning_rate": 0.000764856230031949, "loss": 0.2223, "step": 11970 }, { "epoch": 7.65, "learning_rate": 0.0007654952076677316, "loss": 0.2457, "step": 11980 }, { "epoch": 7.66, "learning_rate": 0.0007661341853035144, "loss": 0.2006, "step": 11990 }, { "epoch": 7.67, "learning_rate": 0.0007667731629392971, "loss": 0.2142, "step": 12000 }, { "epoch": 7.67, "learning_rate": 0.0007674121405750799, "loss": 0.1964, "step": 12010 }, { "epoch": 7.68, "learning_rate": 0.0007680511182108626, "loss": 0.2215, "step": 12020 }, { "epoch": 7.69, "learning_rate": 0.0007686900958466454, "loss": 0.2158, "step": 12030 }, { "epoch": 7.69, "learning_rate": 0.0007693290734824281, "loss": 0.2071, "step": 12040 }, { "epoch": 7.7, "learning_rate": 0.0007699680511182109, "loss": 0.221, "step": 12050 }, { "epoch": 7.71, "learning_rate": 0.0007706070287539936, "loss": 0.2322, "step": 12060 }, { "epoch": 7.71, "learning_rate": 0.0007712460063897764, "loss": 0.2176, "step": 12070 }, { "epoch": 7.72, "learning_rate": 0.0007718849840255591, "loss": 0.1703, "step": 12080 }, { "epoch": 7.73, "learning_rate": 0.0007725239616613418, "loss": 0.2264, "step": 12090 }, { "epoch": 7.73, "learning_rate": 0.0007731629392971247, "loss": 0.2123, "step": 12100 }, { "epoch": 7.74, "learning_rate": 0.0007738019169329074, "loss": 0.2096, "step": 12110 }, { "epoch": 7.74, "learning_rate": 0.0007744408945686901, "loss": 0.228, "step": 12120 }, { "epoch": 7.75, "learning_rate": 0.0007750798722044728, "loss": 0.2193, "step": 12130 }, { "epoch": 7.76, "learning_rate": 0.0007757188498402557, "loss": 0.2062, "step": 12140 }, { "epoch": 7.76, "learning_rate": 0.0007763578274760383, "loss": 0.1992, "step": 12150 }, { "epoch": 7.77, "learning_rate": 0.0007769968051118211, "loss": 0.1922, "step": 12160 }, { "epoch": 7.78, "learning_rate": 0.0007776357827476039, "loss": 0.1991, "step": 12170 }, { "epoch": 7.78, "learning_rate": 0.0007782747603833866, "loss": 0.2427, "step": 12180 }, { "epoch": 7.79, "learning_rate": 0.0007789137380191693, "loss": 0.2242, "step": 12190 }, { "epoch": 7.8, "learning_rate": 0.0007795527156549522, "loss": 0.2559, "step": 12200 }, { "epoch": 7.8, "learning_rate": 0.0007801916932907348, "loss": 0.2238, "step": 12210 }, { "epoch": 7.81, "learning_rate": 0.0007808306709265176, "loss": 0.2055, "step": 12220 }, { "epoch": 7.81, "learning_rate": 0.0007814696485623002, "loss": 0.2139, "step": 12230 }, { "epoch": 7.82, "learning_rate": 0.0007821086261980831, "loss": 0.2198, "step": 12240 }, { "epoch": 7.83, "learning_rate": 0.0007827476038338658, "loss": 0.2401, "step": 12250 }, { "epoch": 7.83, "learning_rate": 0.0007833865814696485, "loss": 0.2253, "step": 12260 }, { "epoch": 7.84, "learning_rate": 0.0007840255591054313, "loss": 0.2157, "step": 12270 }, { "epoch": 7.85, "learning_rate": 0.0007846645367412141, "loss": 0.2274, "step": 12280 }, { "epoch": 7.85, "learning_rate": 0.0007853035143769968, "loss": 0.2281, "step": 12290 }, { "epoch": 7.86, "learning_rate": 0.0007859424920127796, "loss": 0.1965, "step": 12300 }, { "epoch": 7.87, "learning_rate": 0.0007865814696485624, "loss": 0.2179, "step": 12310 }, { "epoch": 7.87, "learning_rate": 0.000787220447284345, "loss": 0.1826, "step": 12320 }, { "epoch": 7.88, "learning_rate": 0.0007878594249201279, "loss": 0.172, "step": 12330 }, { "epoch": 7.88, "learning_rate": 0.0007884984025559106, "loss": 0.2562, "step": 12340 }, { "epoch": 7.89, "learning_rate": 0.0007891373801916933, "loss": 0.2094, "step": 12350 }, { "epoch": 7.9, "learning_rate": 0.000789776357827476, "loss": 0.1897, "step": 12360 }, { "epoch": 7.9, "learning_rate": 0.0007904153354632589, "loss": 0.2108, "step": 12370 }, { "epoch": 7.91, "learning_rate": 0.0007910543130990415, "loss": 0.21, "step": 12380 }, { "epoch": 7.92, "learning_rate": 0.0007916932907348243, "loss": 0.2316, "step": 12390 }, { "epoch": 7.92, "learning_rate": 0.000792332268370607, "loss": 0.1809, "step": 12400 }, { "epoch": 7.93, "learning_rate": 0.0007929712460063898, "loss": 0.2491, "step": 12410 }, { "epoch": 7.94, "learning_rate": 0.0007936102236421725, "loss": 0.1887, "step": 12420 }, { "epoch": 7.94, "learning_rate": 0.0007942492012779553, "loss": 0.2278, "step": 12430 }, { "epoch": 7.95, "learning_rate": 0.000794888178913738, "loss": 0.2504, "step": 12440 }, { "epoch": 7.95, "learning_rate": 0.0007955271565495208, "loss": 0.2111, "step": 12450 }, { "epoch": 7.96, "learning_rate": 0.0007961661341853034, "loss": 0.2649, "step": 12460 }, { "epoch": 7.97, "learning_rate": 0.0007968051118210863, "loss": 0.2035, "step": 12470 }, { "epoch": 7.97, "learning_rate": 0.000797444089456869, "loss": 0.2132, "step": 12480 }, { "epoch": 7.98, "learning_rate": 0.0007980830670926517, "loss": 0.2869, "step": 12490 }, { "epoch": 7.99, "learning_rate": 0.0007987220447284346, "loss": 0.261, "step": 12500 }, { "epoch": 7.99, "learning_rate": 0.0007993610223642173, "loss": 0.1863, "step": 12510 }, { "epoch": 8.0, "learning_rate": 0.0008, "loss": 0.2178, "step": 12520 }, { "epoch": 8.01, "learning_rate": 0.0008006389776357828, "loss": 0.1807, "step": 12530 }, { "epoch": 8.01, "learning_rate": 0.0008012779552715655, "loss": 0.1813, "step": 12540 }, { "epoch": 8.02, "learning_rate": 0.0008019169329073482, "loss": 0.1791, "step": 12550 }, { "epoch": 8.03, "learning_rate": 0.0008025559105431311, "loss": 0.1472, "step": 12560 }, { "epoch": 8.03, "learning_rate": 0.0008031948881789137, "loss": 0.18, "step": 12570 }, { "epoch": 8.04, "learning_rate": 0.0008038338658146965, "loss": 0.1829, "step": 12580 }, { "epoch": 8.04, "learning_rate": 0.0008044728434504793, "loss": 0.1877, "step": 12590 }, { "epoch": 8.05, "learning_rate": 0.000805111821086262, "loss": 0.2126, "step": 12600 }, { "epoch": 8.06, "learning_rate": 0.0008057507987220447, "loss": 0.1921, "step": 12610 }, { "epoch": 8.06, "learning_rate": 0.0008063897763578275, "loss": 0.1952, "step": 12620 }, { "epoch": 8.07, "learning_rate": 0.0008070287539936102, "loss": 0.1775, "step": 12630 }, { "epoch": 8.08, "learning_rate": 0.000807667731629393, "loss": 0.2038, "step": 12640 }, { "epoch": 8.08, "learning_rate": 0.0008083067092651757, "loss": 0.2054, "step": 12650 }, { "epoch": 8.09, "learning_rate": 0.0008089456869009585, "loss": 0.2258, "step": 12660 }, { "epoch": 8.1, "learning_rate": 0.0008095846645367412, "loss": 0.2226, "step": 12670 }, { "epoch": 8.1, "learning_rate": 0.000810223642172524, "loss": 0.1902, "step": 12680 }, { "epoch": 8.11, "learning_rate": 0.0008108626198083068, "loss": 0.1996, "step": 12690 }, { "epoch": 8.11, "learning_rate": 0.0008115015974440895, "loss": 0.1904, "step": 12700 }, { "epoch": 8.12, "learning_rate": 0.0008121405750798722, "loss": 0.1486, "step": 12710 }, { "epoch": 8.13, "learning_rate": 0.000812779552715655, "loss": 0.1905, "step": 12720 }, { "epoch": 8.13, "learning_rate": 0.0008134185303514378, "loss": 0.1883, "step": 12730 }, { "epoch": 8.14, "learning_rate": 0.0008140575079872204, "loss": 0.1863, "step": 12740 }, { "epoch": 8.15, "learning_rate": 0.0008146964856230032, "loss": 0.1934, "step": 12750 }, { "epoch": 8.15, "learning_rate": 0.000815335463258786, "loss": 0.1778, "step": 12760 }, { "epoch": 8.16, "learning_rate": 0.0008159744408945687, "loss": 0.1796, "step": 12770 }, { "epoch": 8.17, "learning_rate": 0.0008166134185303514, "loss": 0.1902, "step": 12780 }, { "epoch": 8.17, "learning_rate": 0.0008172523961661343, "loss": 0.1913, "step": 12790 }, { "epoch": 8.18, "learning_rate": 0.0008178913738019169, "loss": 0.1887, "step": 12800 }, { "epoch": 8.19, "learning_rate": 0.0008185303514376997, "loss": 0.1658, "step": 12810 }, { "epoch": 8.19, "learning_rate": 0.0008191693290734825, "loss": 0.169, "step": 12820 }, { "epoch": 8.2, "learning_rate": 0.0008198083067092652, "loss": 0.1946, "step": 12830 }, { "epoch": 8.2, "learning_rate": 0.0008204472843450479, "loss": 0.1913, "step": 12840 }, { "epoch": 8.21, "learning_rate": 0.0008210862619808306, "loss": 0.2135, "step": 12850 }, { "epoch": 8.22, "learning_rate": 0.0008217252396166134, "loss": 0.1913, "step": 12860 }, { "epoch": 8.22, "learning_rate": 0.0008223642172523962, "loss": 0.1958, "step": 12870 }, { "epoch": 8.23, "learning_rate": 0.0008230031948881789, "loss": 0.2013, "step": 12880 }, { "epoch": 8.24, "learning_rate": 0.0008236421725239617, "loss": 0.2246, "step": 12890 }, { "epoch": 8.24, "learning_rate": 0.0008242811501597445, "loss": 0.2016, "step": 12900 }, { "epoch": 8.25, "learning_rate": 0.0008249201277955271, "loss": 0.1932, "step": 12910 }, { "epoch": 8.26, "learning_rate": 0.00082555910543131, "loss": 0.1766, "step": 12920 }, { "epoch": 8.26, "learning_rate": 0.0008261980830670927, "loss": 0.1959, "step": 12930 }, { "epoch": 8.27, "learning_rate": 0.0008268370607028754, "loss": 0.1978, "step": 12940 }, { "epoch": 8.27, "learning_rate": 0.0008274760383386582, "loss": 0.2187, "step": 12950 }, { "epoch": 8.28, "learning_rate": 0.000828115015974441, "loss": 0.1971, "step": 12960 }, { "epoch": 8.29, "learning_rate": 0.0008287539936102236, "loss": 0.1839, "step": 12970 }, { "epoch": 8.29, "learning_rate": 0.0008293929712460064, "loss": 0.2237, "step": 12980 }, { "epoch": 8.3, "learning_rate": 0.0008300319488817891, "loss": 0.1553, "step": 12990 }, { "epoch": 8.31, "learning_rate": 0.0008306709265175719, "loss": 0.193, "step": 13000 }, { "epoch": 8.31, "learning_rate": 0.0008313099041533546, "loss": 0.2027, "step": 13010 }, { "epoch": 8.32, "learning_rate": 0.0008319488817891374, "loss": 0.1927, "step": 13020 }, { "epoch": 8.33, "learning_rate": 0.0008325878594249201, "loss": 0.2067, "step": 13030 }, { "epoch": 8.33, "learning_rate": 0.0008332268370607029, "loss": 0.2119, "step": 13040 }, { "epoch": 8.34, "learning_rate": 0.0008338658146964856, "loss": 0.2125, "step": 13050 }, { "epoch": 8.34, "learning_rate": 0.0008345047923322684, "loss": 0.2148, "step": 13060 }, { "epoch": 8.35, "learning_rate": 0.0008351437699680511, "loss": 0.1722, "step": 13070 }, { "epoch": 8.36, "learning_rate": 0.0008357827476038338, "loss": 0.212, "step": 13080 }, { "epoch": 8.36, "learning_rate": 0.0008364217252396167, "loss": 0.2077, "step": 13090 }, { "epoch": 8.37, "learning_rate": 0.0008370607028753994, "loss": 0.2223, "step": 13100 }, { "epoch": 8.38, "learning_rate": 0.0008376996805111821, "loss": 0.2055, "step": 13110 }, { "epoch": 8.38, "learning_rate": 0.0008383386581469649, "loss": 0.1995, "step": 13120 }, { "epoch": 8.39, "learning_rate": 0.0008389776357827477, "loss": 0.176, "step": 13130 }, { "epoch": 8.4, "learning_rate": 0.0008396166134185303, "loss": 0.2049, "step": 13140 }, { "epoch": 8.4, "learning_rate": 0.0008402555910543132, "loss": 0.2177, "step": 13150 }, { "epoch": 8.41, "learning_rate": 0.0008408945686900958, "loss": 0.1961, "step": 13160 }, { "epoch": 8.42, "learning_rate": 0.0008415335463258786, "loss": 0.2094, "step": 13170 }, { "epoch": 8.42, "learning_rate": 0.0008421725239616614, "loss": 0.2121, "step": 13180 }, { "epoch": 8.43, "learning_rate": 0.0008428115015974441, "loss": 0.1996, "step": 13190 }, { "epoch": 8.43, "learning_rate": 0.0008434504792332268, "loss": 0.1836, "step": 13200 }, { "epoch": 8.44, "learning_rate": 0.0008440894568690097, "loss": 0.1804, "step": 13210 }, { "epoch": 8.45, "learning_rate": 0.0008447284345047923, "loss": 0.2604, "step": 13220 }, { "epoch": 8.45, "learning_rate": 0.0008453674121405751, "loss": 0.2071, "step": 13230 }, { "epoch": 8.46, "learning_rate": 0.0008460063897763578, "loss": 0.2113, "step": 13240 }, { "epoch": 8.47, "learning_rate": 0.0008466453674121406, "loss": 0.1961, "step": 13250 }, { "epoch": 8.47, "learning_rate": 0.0008472843450479233, "loss": 0.2149, "step": 13260 }, { "epoch": 8.48, "learning_rate": 0.0008479233226837061, "loss": 0.2112, "step": 13270 }, { "epoch": 8.49, "learning_rate": 0.0008485623003194889, "loss": 0.2003, "step": 13280 }, { "epoch": 8.49, "learning_rate": 0.0008492012779552716, "loss": 0.2098, "step": 13290 }, { "epoch": 8.5, "learning_rate": 0.0008498402555910543, "loss": 0.1986, "step": 13300 }, { "epoch": 8.5, "learning_rate": 0.000850479233226837, "loss": 0.2055, "step": 13310 }, { "epoch": 8.51, "learning_rate": 0.0008511182108626199, "loss": 0.169, "step": 13320 }, { "epoch": 8.52, "learning_rate": 0.0008517571884984025, "loss": 0.2417, "step": 13330 }, { "epoch": 8.52, "learning_rate": 0.0008523961661341853, "loss": 0.2163, "step": 13340 }, { "epoch": 8.53, "learning_rate": 0.0008530351437699681, "loss": 0.1996, "step": 13350 }, { "epoch": 8.54, "learning_rate": 0.0008536741214057508, "loss": 0.1736, "step": 13360 }, { "epoch": 8.54, "learning_rate": 0.0008543130990415335, "loss": 0.204, "step": 13370 }, { "epoch": 8.55, "learning_rate": 0.0008549520766773164, "loss": 0.2189, "step": 13380 }, { "epoch": 8.56, "learning_rate": 0.000855591054313099, "loss": 0.1797, "step": 13390 }, { "epoch": 8.56, "learning_rate": 0.0008562300319488818, "loss": 0.1723, "step": 13400 }, { "epoch": 8.57, "learning_rate": 0.0008568690095846646, "loss": 0.2189, "step": 13410 }, { "epoch": 8.57, "learning_rate": 0.0008575079872204473, "loss": 0.2004, "step": 13420 }, { "epoch": 8.58, "learning_rate": 0.00085814696485623, "loss": 0.2102, "step": 13430 }, { "epoch": 8.59, "learning_rate": 0.0008587859424920129, "loss": 0.173, "step": 13440 }, { "epoch": 8.59, "learning_rate": 0.0008594249201277955, "loss": 0.2067, "step": 13450 }, { "epoch": 8.6, "learning_rate": 0.0008600638977635783, "loss": 0.2229, "step": 13460 }, { "epoch": 8.61, "learning_rate": 0.0008607028753993609, "loss": 0.2101, "step": 13470 }, { "epoch": 8.61, "learning_rate": 0.0008613418530351438, "loss": 0.1975, "step": 13480 }, { "epoch": 8.62, "learning_rate": 0.0008619808306709266, "loss": 0.1821, "step": 13490 }, { "epoch": 8.63, "learning_rate": 0.0008626198083067092, "loss": 0.244, "step": 13500 }, { "epoch": 8.63, "learning_rate": 0.0008632587859424921, "loss": 0.1859, "step": 13510 }, { "epoch": 8.64, "learning_rate": 0.0008638977635782748, "loss": 0.178, "step": 13520 }, { "epoch": 8.65, "learning_rate": 0.0008645367412140575, "loss": 0.2166, "step": 13530 }, { "epoch": 8.65, "learning_rate": 0.0008651757188498403, "loss": 0.166, "step": 13540 }, { "epoch": 8.66, "learning_rate": 0.0008658146964856231, "loss": 0.1821, "step": 13550 }, { "epoch": 8.66, "learning_rate": 0.0008664536741214057, "loss": 0.1787, "step": 13560 }, { "epoch": 8.67, "learning_rate": 0.0008670926517571886, "loss": 0.1851, "step": 13570 }, { "epoch": 8.68, "learning_rate": 0.0008677316293929713, "loss": 0.181, "step": 13580 }, { "epoch": 8.68, "learning_rate": 0.000868370607028754, "loss": 0.1952, "step": 13590 }, { "epoch": 8.69, "learning_rate": 0.0008690095846645367, "loss": 0.1873, "step": 13600 }, { "epoch": 8.7, "learning_rate": 0.0008696485623003195, "loss": 0.212, "step": 13610 }, { "epoch": 8.7, "learning_rate": 0.0008702875399361022, "loss": 0.2072, "step": 13620 }, { "epoch": 8.71, "learning_rate": 0.000870926517571885, "loss": 0.2092, "step": 13630 }, { "epoch": 8.72, "learning_rate": 0.0008715654952076677, "loss": 0.2119, "step": 13640 }, { "epoch": 8.72, "learning_rate": 0.0008722044728434505, "loss": 0.2139, "step": 13650 }, { "epoch": 8.73, "learning_rate": 0.0008728434504792332, "loss": 0.1895, "step": 13660 }, { "epoch": 8.73, "learning_rate": 0.000873482428115016, "loss": 0.2178, "step": 13670 }, { "epoch": 8.74, "learning_rate": 0.0008741214057507988, "loss": 0.229, "step": 13680 }, { "epoch": 8.75, "learning_rate": 0.0008747603833865815, "loss": 0.2104, "step": 13690 }, { "epoch": 8.75, "learning_rate": 0.0008753993610223643, "loss": 0.2215, "step": 13700 }, { "epoch": 8.76, "learning_rate": 0.000876038338658147, "loss": 0.1943, "step": 13710 }, { "epoch": 8.77, "learning_rate": 0.0008766773162939298, "loss": 0.2193, "step": 13720 }, { "epoch": 8.77, "learning_rate": 0.0008773162939297124, "loss": 0.2294, "step": 13730 }, { "epoch": 8.78, "learning_rate": 0.0008779552715654953, "loss": 0.1998, "step": 13740 }, { "epoch": 8.79, "learning_rate": 0.0008785942492012779, "loss": 0.2152, "step": 13750 }, { "epoch": 8.79, "learning_rate": 0.0008792332268370607, "loss": 0.1832, "step": 13760 }, { "epoch": 8.8, "learning_rate": 0.0008798722044728435, "loss": 0.2159, "step": 13770 }, { "epoch": 8.8, "learning_rate": 0.0008805111821086262, "loss": 0.2204, "step": 13780 }, { "epoch": 8.81, "learning_rate": 0.0008811501597444089, "loss": 0.224, "step": 13790 }, { "epoch": 8.82, "learning_rate": 0.0008817891373801918, "loss": 0.2238, "step": 13800 }, { "epoch": 8.82, "learning_rate": 0.0008824281150159744, "loss": 0.2188, "step": 13810 }, { "epoch": 8.83, "learning_rate": 0.0008830670926517572, "loss": 0.2244, "step": 13820 }, { "epoch": 8.84, "learning_rate": 0.00088370607028754, "loss": 0.193, "step": 13830 }, { "epoch": 8.84, "learning_rate": 0.0008843450479233227, "loss": 0.2003, "step": 13840 }, { "epoch": 8.85, "learning_rate": 0.0008849840255591054, "loss": 0.2043, "step": 13850 }, { "epoch": 8.86, "learning_rate": 0.0008856230031948882, "loss": 0.2051, "step": 13860 }, { "epoch": 8.86, "learning_rate": 0.000886261980830671, "loss": 0.183, "step": 13870 }, { "epoch": 8.87, "learning_rate": 0.0008869009584664537, "loss": 0.2322, "step": 13880 }, { "epoch": 8.88, "learning_rate": 0.0008875399361022365, "loss": 0.2282, "step": 13890 }, { "epoch": 8.88, "learning_rate": 0.0008881789137380192, "loss": 0.196, "step": 13900 }, { "epoch": 8.89, "learning_rate": 0.000888817891373802, "loss": 0.1948, "step": 13910 }, { "epoch": 8.89, "learning_rate": 0.0008894568690095846, "loss": 0.2252, "step": 13920 }, { "epoch": 8.9, "learning_rate": 0.0008900958466453675, "loss": 0.2003, "step": 13930 }, { "epoch": 8.91, "learning_rate": 0.0008907348242811502, "loss": 0.2181, "step": 13940 }, { "epoch": 8.91, "learning_rate": 0.0008913738019169329, "loss": 0.2073, "step": 13950 }, { "epoch": 8.92, "learning_rate": 0.0008920127795527156, "loss": 0.2436, "step": 13960 }, { "epoch": 8.93, "learning_rate": 0.0008926517571884985, "loss": 0.1896, "step": 13970 }, { "epoch": 8.93, "learning_rate": 0.0008932907348242811, "loss": 0.201, "step": 13980 }, { "epoch": 8.94, "learning_rate": 0.0008939297124600639, "loss": 0.2023, "step": 13990 }, { "epoch": 8.95, "learning_rate": 0.0008945686900958467, "loss": 0.2138, "step": 14000 }, { "epoch": 8.95, "learning_rate": 0.0008952076677316294, "loss": 0.2247, "step": 14010 }, { "epoch": 8.96, "learning_rate": 0.0008958466453674121, "loss": 0.2329, "step": 14020 }, { "epoch": 8.96, "learning_rate": 0.000896485623003195, "loss": 0.1843, "step": 14030 }, { "epoch": 8.97, "learning_rate": 0.0008971246006389776, "loss": 0.1895, "step": 14040 }, { "epoch": 8.98, "learning_rate": 0.0008977635782747604, "loss": 0.1814, "step": 14050 }, { "epoch": 8.98, "learning_rate": 0.000898402555910543, "loss": 0.2078, "step": 14060 }, { "epoch": 8.99, "learning_rate": 0.0008990415335463259, "loss": 0.2324, "step": 14070 }, { "epoch": 9.0, "learning_rate": 0.0008996805111821087, "loss": 0.2346, "step": 14080 }, { "epoch": 9.0, "learning_rate": 0.0009003194888178913, "loss": 0.2024, "step": 14090 }, { "epoch": 9.01, "learning_rate": 0.0009009584664536742, "loss": 0.174, "step": 14100 }, { "epoch": 9.02, "learning_rate": 0.0009015974440894569, "loss": 0.1736, "step": 14110 }, { "epoch": 9.02, "learning_rate": 0.0009022364217252396, "loss": 0.2153, "step": 14120 }, { "epoch": 9.03, "learning_rate": 0.0009028753993610224, "loss": 0.1848, "step": 14130 }, { "epoch": 9.04, "learning_rate": 0.0009035143769968052, "loss": 0.1719, "step": 14140 }, { "epoch": 9.04, "learning_rate": 0.0009041533546325878, "loss": 0.1904, "step": 14150 }, { "epoch": 9.05, "learning_rate": 0.0009047923322683707, "loss": 0.1692, "step": 14160 }, { "epoch": 9.05, "learning_rate": 0.0009054313099041534, "loss": 0.1367, "step": 14170 }, { "epoch": 9.06, "learning_rate": 0.0009060702875399361, "loss": 0.1853, "step": 14180 }, { "epoch": 9.07, "learning_rate": 0.0009067092651757188, "loss": 0.1945, "step": 14190 }, { "epoch": 9.07, "learning_rate": 0.0009073482428115017, "loss": 0.1827, "step": 14200 }, { "epoch": 9.08, "learning_rate": 0.0009079872204472843, "loss": 0.1927, "step": 14210 }, { "epoch": 9.09, "learning_rate": 0.0009086261980830671, "loss": 0.1513, "step": 14220 }, { "epoch": 9.09, "learning_rate": 0.0009092651757188498, "loss": 0.2149, "step": 14230 }, { "epoch": 9.1, "learning_rate": 0.0009099041533546326, "loss": 0.1609, "step": 14240 }, { "epoch": 9.11, "learning_rate": 0.0009105431309904153, "loss": 0.1814, "step": 14250 }, { "epoch": 9.11, "learning_rate": 0.0009111821086261981, "loss": 0.192, "step": 14260 }, { "epoch": 9.12, "learning_rate": 0.0009118210862619809, "loss": 0.2028, "step": 14270 }, { "epoch": 9.12, "learning_rate": 0.0009124600638977636, "loss": 0.1814, "step": 14280 }, { "epoch": 9.13, "learning_rate": 0.0009130990415335464, "loss": 0.1721, "step": 14290 }, { "epoch": 9.14, "learning_rate": 0.0009137380191693291, "loss": 0.1894, "step": 14300 }, { "epoch": 9.14, "learning_rate": 0.0009143769968051119, "loss": 0.1716, "step": 14310 }, { "epoch": 9.15, "learning_rate": 0.0009150159744408945, "loss": 0.1957, "step": 14320 }, { "epoch": 9.16, "learning_rate": 0.0009156549520766774, "loss": 0.1671, "step": 14330 }, { "epoch": 9.16, "learning_rate": 0.0009162939297124601, "loss": 0.1852, "step": 14340 }, { "epoch": 9.17, "learning_rate": 0.0009169329073482428, "loss": 0.2002, "step": 14350 }, { "epoch": 9.18, "learning_rate": 0.0009175718849840256, "loss": 0.1912, "step": 14360 }, { "epoch": 9.18, "learning_rate": 0.0009182108626198083, "loss": 0.1929, "step": 14370 }, { "epoch": 9.19, "learning_rate": 0.000918849840255591, "loss": 0.1772, "step": 14380 }, { "epoch": 9.19, "learning_rate": 0.0009194888178913739, "loss": 0.1767, "step": 14390 }, { "epoch": 9.2, "learning_rate": 0.0009201277955271565, "loss": 0.1999, "step": 14400 }, { "epoch": 9.21, "learning_rate": 0.0009207667731629393, "loss": 0.2022, "step": 14410 }, { "epoch": 9.21, "learning_rate": 0.000921405750798722, "loss": 0.2222, "step": 14420 }, { "epoch": 9.22, "learning_rate": 0.0009220447284345048, "loss": 0.1796, "step": 14430 }, { "epoch": 9.23, "learning_rate": 0.0009226837060702875, "loss": 0.197, "step": 14440 }, { "epoch": 9.23, "learning_rate": 0.0009233226837060703, "loss": 0.1699, "step": 14450 }, { "epoch": 9.24, "learning_rate": 0.000923961661341853, "loss": 0.1662, "step": 14460 }, { "epoch": 9.25, "learning_rate": 0.0009246006389776358, "loss": 0.1596, "step": 14470 }, { "epoch": 9.25, "learning_rate": 0.0009252396166134186, "loss": 0.1826, "step": 14480 }, { "epoch": 9.26, "learning_rate": 0.0009258785942492013, "loss": 0.1801, "step": 14490 }, { "epoch": 9.27, "learning_rate": 0.0009265175718849841, "loss": 0.217, "step": 14500 }, { "epoch": 9.27, "learning_rate": 0.0009271565495207667, "loss": 0.1575, "step": 14510 }, { "epoch": 9.28, "learning_rate": 0.0009277955271565496, "loss": 0.2204, "step": 14520 }, { "epoch": 9.28, "learning_rate": 0.0009284345047923323, "loss": 0.1904, "step": 14530 }, { "epoch": 9.29, "learning_rate": 0.000929073482428115, "loss": 0.1612, "step": 14540 }, { "epoch": 9.3, "learning_rate": 0.0009297124600638978, "loss": 0.2108, "step": 14550 }, { "epoch": 9.3, "learning_rate": 0.0009303514376996806, "loss": 0.2171, "step": 14560 }, { "epoch": 9.31, "learning_rate": 0.0009309904153354632, "loss": 0.1911, "step": 14570 }, { "epoch": 9.32, "learning_rate": 0.000931629392971246, "loss": 0.2062, "step": 14580 }, { "epoch": 9.32, "learning_rate": 0.0009322683706070288, "loss": 0.2073, "step": 14590 }, { "epoch": 9.33, "learning_rate": 0.0009329073482428115, "loss": 0.2118, "step": 14600 }, { "epoch": 9.34, "learning_rate": 0.0009335463258785942, "loss": 0.1814, "step": 14610 }, { "epoch": 9.34, "learning_rate": 0.0009341853035143771, "loss": 0.1654, "step": 14620 }, { "epoch": 9.35, "learning_rate": 0.0009348242811501597, "loss": 0.1794, "step": 14630 }, { "epoch": 9.35, "learning_rate": 0.0009354632587859425, "loss": 0.199, "step": 14640 }, { "epoch": 9.36, "learning_rate": 0.0009361022364217253, "loss": 0.1785, "step": 14650 }, { "epoch": 9.37, "learning_rate": 0.000936741214057508, "loss": 0.222, "step": 14660 }, { "epoch": 9.37, "learning_rate": 0.0009373801916932908, "loss": 0.2213, "step": 14670 }, { "epoch": 9.38, "learning_rate": 0.0009380191693290734, "loss": 0.1864, "step": 14680 }, { "epoch": 9.39, "learning_rate": 0.0009386581469648563, "loss": 0.1973, "step": 14690 }, { "epoch": 9.39, "learning_rate": 0.000939297124600639, "loss": 0.2027, "step": 14700 }, { "epoch": 9.4, "learning_rate": 0.0009399361022364217, "loss": 0.2115, "step": 14710 }, { "epoch": 9.41, "learning_rate": 0.0009405750798722045, "loss": 0.207, "step": 14720 }, { "epoch": 9.41, "learning_rate": 0.0009412140575079873, "loss": 0.1878, "step": 14730 }, { "epoch": 9.42, "learning_rate": 0.0009418530351437699, "loss": 0.1794, "step": 14740 }, { "epoch": 9.42, "learning_rate": 0.0009424920127795528, "loss": 0.2063, "step": 14750 }, { "epoch": 9.43, "learning_rate": 0.0009431309904153355, "loss": 0.2194, "step": 14760 }, { "epoch": 9.44, "learning_rate": 0.0009437699680511182, "loss": 0.1841, "step": 14770 }, { "epoch": 9.44, "learning_rate": 0.000944408945686901, "loss": 0.2375, "step": 14780 }, { "epoch": 9.45, "learning_rate": 0.0009450479233226838, "loss": 0.1869, "step": 14790 }, { "epoch": 9.46, "learning_rate": 0.0009456869009584664, "loss": 0.1953, "step": 14800 }, { "epoch": 9.46, "learning_rate": 0.0009463258785942493, "loss": 0.202, "step": 14810 }, { "epoch": 9.47, "learning_rate": 0.0009469648562300319, "loss": 0.2095, "step": 14820 }, { "epoch": 9.48, "learning_rate": 0.0009476038338658147, "loss": 0.1894, "step": 14830 }, { "epoch": 9.48, "learning_rate": 0.0009482428115015974, "loss": 0.2249, "step": 14840 }, { "epoch": 9.49, "learning_rate": 0.0009488817891373802, "loss": 0.1649, "step": 14850 }, { "epoch": 9.5, "learning_rate": 0.000949520766773163, "loss": 0.1917, "step": 14860 }, { "epoch": 9.5, "learning_rate": 0.0009501597444089457, "loss": 0.1856, "step": 14870 }, { "epoch": 9.51, "learning_rate": 0.0009507987220447285, "loss": 0.2134, "step": 14880 }, { "epoch": 9.51, "learning_rate": 0.0009514376996805112, "loss": 0.2183, "step": 14890 }, { "epoch": 9.52, "learning_rate": 0.000952076677316294, "loss": 0.2113, "step": 14900 }, { "epoch": 9.53, "learning_rate": 0.0009527156549520767, "loss": 0.1836, "step": 14910 }, { "epoch": 9.53, "learning_rate": 0.0009533546325878595, "loss": 0.1806, "step": 14920 }, { "epoch": 9.54, "learning_rate": 0.0009539936102236422, "loss": 0.2051, "step": 14930 }, { "epoch": 9.55, "learning_rate": 0.000954632587859425, "loss": 0.1987, "step": 14940 }, { "epoch": 9.55, "learning_rate": 0.0009552715654952077, "loss": 0.2183, "step": 14950 }, { "epoch": 9.56, "learning_rate": 0.0009559105431309905, "loss": 0.188, "step": 14960 }, { "epoch": 9.57, "learning_rate": 0.0009565495207667731, "loss": 0.2088, "step": 14970 }, { "epoch": 9.57, "learning_rate": 0.000957188498402556, "loss": 0.2153, "step": 14980 }, { "epoch": 9.58, "learning_rate": 0.0009578274760383386, "loss": 0.2069, "step": 14990 }, { "epoch": 9.58, "learning_rate": 0.0009584664536741214, "loss": 0.1698, "step": 15000 }, { "epoch": 9.59, "learning_rate": 0.0009591054313099042, "loss": 0.205, "step": 15010 }, { "epoch": 9.6, "learning_rate": 0.0009597444089456869, "loss": 0.1753, "step": 15020 }, { "epoch": 9.6, "learning_rate": 0.0009603833865814696, "loss": 0.1908, "step": 15030 }, { "epoch": 9.61, "learning_rate": 0.0009610223642172525, "loss": 0.1935, "step": 15040 }, { "epoch": 9.62, "learning_rate": 0.0009616613418530351, "loss": 0.2243, "step": 15050 }, { "epoch": 9.62, "learning_rate": 0.0009623003194888179, "loss": 0.2018, "step": 15060 }, { "epoch": 9.63, "learning_rate": 0.0009629392971246008, "loss": 0.2062, "step": 15070 }, { "epoch": 9.64, "learning_rate": 0.0009635782747603834, "loss": 0.2033, "step": 15080 }, { "epoch": 9.64, "learning_rate": 0.0009642172523961662, "loss": 0.1993, "step": 15090 }, { "epoch": 9.65, "learning_rate": 0.0009648562300319489, "loss": 0.1946, "step": 15100 }, { "epoch": 9.65, "learning_rate": 0.0009654952076677317, "loss": 0.2113, "step": 15110 }, { "epoch": 9.66, "learning_rate": 0.0009661341853035144, "loss": 0.2004, "step": 15120 }, { "epoch": 9.67, "learning_rate": 0.0009667731629392971, "loss": 0.21, "step": 15130 }, { "epoch": 9.67, "learning_rate": 0.0009674121405750799, "loss": 0.1794, "step": 15140 }, { "epoch": 9.68, "learning_rate": 0.0009680511182108627, "loss": 0.2395, "step": 15150 }, { "epoch": 9.69, "learning_rate": 0.0009686900958466453, "loss": 0.2506, "step": 15160 }, { "epoch": 9.69, "learning_rate": 0.0009693290734824282, "loss": 0.1916, "step": 15170 }, { "epoch": 9.7, "learning_rate": 0.0009699680511182109, "loss": 0.1933, "step": 15180 }, { "epoch": 9.71, "learning_rate": 0.0009706070287539936, "loss": 0.2162, "step": 15190 }, { "epoch": 9.71, "learning_rate": 0.0009712460063897763, "loss": 0.221, "step": 15200 }, { "epoch": 9.72, "learning_rate": 0.0009718849840255592, "loss": 0.1934, "step": 15210 }, { "epoch": 9.73, "learning_rate": 0.0009725239616613418, "loss": 0.1856, "step": 15220 }, { "epoch": 9.73, "learning_rate": 0.0009731629392971246, "loss": 0.1951, "step": 15230 }, { "epoch": 9.74, "learning_rate": 0.0009738019169329074, "loss": 0.1962, "step": 15240 }, { "epoch": 9.74, "learning_rate": 0.0009744408945686901, "loss": 0.1947, "step": 15250 }, { "epoch": 9.75, "learning_rate": 0.0009750798722044729, "loss": 0.217, "step": 15260 }, { "epoch": 9.76, "learning_rate": 0.0009757188498402556, "loss": 0.2092, "step": 15270 }, { "epoch": 9.76, "learning_rate": 0.0009763578274760384, "loss": 0.1695, "step": 15280 }, { "epoch": 9.77, "learning_rate": 0.0009769968051118211, "loss": 0.1863, "step": 15290 }, { "epoch": 9.78, "learning_rate": 0.0009776357827476038, "loss": 0.1947, "step": 15300 }, { "epoch": 9.78, "learning_rate": 0.0009782747603833866, "loss": 0.1878, "step": 15310 }, { "epoch": 9.79, "learning_rate": 0.0009789137380191693, "loss": 0.2087, "step": 15320 }, { "epoch": 9.8, "learning_rate": 0.000979552715654952, "loss": 0.183, "step": 15330 }, { "epoch": 9.8, "learning_rate": 0.000980191693290735, "loss": 0.1836, "step": 15340 }, { "epoch": 9.81, "learning_rate": 0.0009808306709265175, "loss": 0.2152, "step": 15350 }, { "epoch": 9.81, "learning_rate": 0.0009814696485623004, "loss": 0.2007, "step": 15360 }, { "epoch": 9.82, "learning_rate": 0.000982108626198083, "loss": 0.2202, "step": 15370 }, { "epoch": 9.83, "learning_rate": 0.000982747603833866, "loss": 0.2071, "step": 15380 }, { "epoch": 9.83, "learning_rate": 0.0009833865814696486, "loss": 0.212, "step": 15390 }, { "epoch": 9.84, "learning_rate": 0.0009840255591054314, "loss": 0.217, "step": 15400 }, { "epoch": 9.85, "learning_rate": 0.000984664536741214, "loss": 0.1713, "step": 15410 }, { "epoch": 9.85, "learning_rate": 0.0009853035143769968, "loss": 0.1993, "step": 15420 }, { "epoch": 9.86, "learning_rate": 0.0009859424920127795, "loss": 0.1823, "step": 15430 }, { "epoch": 9.87, "learning_rate": 0.0009865814696485623, "loss": 0.179, "step": 15440 }, { "epoch": 9.87, "learning_rate": 0.000987220447284345, "loss": 0.1938, "step": 15450 }, { "epoch": 9.88, "learning_rate": 0.0009878594249201277, "loss": 0.2068, "step": 15460 }, { "epoch": 9.88, "learning_rate": 0.0009884984025559107, "loss": 0.1709, "step": 15470 }, { "epoch": 9.89, "learning_rate": 0.0009891373801916934, "loss": 0.2332, "step": 15480 }, { "epoch": 9.9, "learning_rate": 0.0009897763578274761, "loss": 0.193, "step": 15490 }, { "epoch": 9.9, "learning_rate": 0.0009904153354632589, "loss": 0.212, "step": 15500 }, { "epoch": 9.91, "learning_rate": 0.0009910543130990416, "loss": 0.2095, "step": 15510 }, { "epoch": 9.92, "learning_rate": 0.0009916932907348243, "loss": 0.1849, "step": 15520 }, { "epoch": 9.92, "learning_rate": 0.000992332268370607, "loss": 0.212, "step": 15530 }, { "epoch": 9.93, "learning_rate": 0.0009929712460063898, "loss": 0.1945, "step": 15540 }, { "epoch": 9.94, "learning_rate": 0.0009936102236421725, "loss": 0.2293, "step": 15550 }, { "epoch": 9.94, "learning_rate": 0.0009942492012779552, "loss": 0.2218, "step": 15560 }, { "epoch": 9.95, "learning_rate": 0.000994888178913738, "loss": 0.2198, "step": 15570 }, { "epoch": 9.95, "learning_rate": 0.0009955271565495207, "loss": 0.2099, "step": 15580 }, { "epoch": 9.96, "learning_rate": 0.0009961661341853036, "loss": 0.2159, "step": 15590 }, { "epoch": 9.97, "learning_rate": 0.0009968051118210862, "loss": 0.2077, "step": 15600 }, { "epoch": 9.97, "learning_rate": 0.000997444089456869, "loss": 0.2324, "step": 15610 }, { "epoch": 9.98, "learning_rate": 0.0009980830670926518, "loss": 0.2459, "step": 15620 }, { "epoch": 9.99, "learning_rate": 0.0009987220447284346, "loss": 0.2167, "step": 15630 }, { "epoch": 9.99, "learning_rate": 0.0009993610223642173, "loss": 0.1978, "step": 15640 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 0.1983, "step": 15650 }, { "epoch": 10.01, "learning_rate": 0.000999929002484913, "loss": 0.1927, "step": 15660 }, { "epoch": 10.01, "learning_rate": 0.000999858004969826, "loss": 0.1822, "step": 15670 }, { "epoch": 10.02, "learning_rate": 0.0009997870074547391, "loss": 0.1596, "step": 15680 }, { "epoch": 10.03, "learning_rate": 0.000999716009939652, "loss": 0.163, "step": 15690 }, { "epoch": 10.03, "learning_rate": 0.0009996450124245652, "loss": 0.1961, "step": 15700 }, { "epoch": 10.04, "learning_rate": 0.0009995740149094782, "loss": 0.1907, "step": 15710 }, { "epoch": 10.04, "learning_rate": 0.0009995030173943912, "loss": 0.1704, "step": 15720 }, { "epoch": 10.05, "learning_rate": 0.0009994320198793041, "loss": 0.1994, "step": 15730 }, { "epoch": 10.06, "learning_rate": 0.0009993610223642173, "loss": 0.1503, "step": 15740 }, { "epoch": 10.06, "learning_rate": 0.0009992900248491302, "loss": 0.151, "step": 15750 }, { "epoch": 10.07, "learning_rate": 0.0009992190273340434, "loss": 0.1875, "step": 15760 }, { "epoch": 10.08, "learning_rate": 0.0009991480298189564, "loss": 0.1754, "step": 15770 }, { "epoch": 10.08, "learning_rate": 0.0009990770323038693, "loss": 0.1899, "step": 15780 }, { "epoch": 10.09, "learning_rate": 0.0009990060347887823, "loss": 0.1809, "step": 15790 }, { "epoch": 10.1, "learning_rate": 0.0009989350372736955, "loss": 0.1691, "step": 15800 }, { "epoch": 10.1, "learning_rate": 0.0009988640397586084, "loss": 0.1784, "step": 15810 }, { "epoch": 10.11, "learning_rate": 0.0009987930422435216, "loss": 0.1872, "step": 15820 }, { "epoch": 10.11, "learning_rate": 0.0009987220447284346, "loss": 0.1759, "step": 15830 }, { "epoch": 10.12, "learning_rate": 0.0009986510472133475, "loss": 0.1688, "step": 15840 }, { "epoch": 10.13, "learning_rate": 0.0009985800496982605, "loss": 0.2019, "step": 15850 }, { "epoch": 10.13, "learning_rate": 0.0009985090521831737, "loss": 0.1546, "step": 15860 }, { "epoch": 10.14, "learning_rate": 0.0009984380546680866, "loss": 0.207, "step": 15870 }, { "epoch": 10.15, "learning_rate": 0.0009983670571529998, "loss": 0.1832, "step": 15880 }, { "epoch": 10.15, "learning_rate": 0.0009982960596379127, "loss": 0.1724, "step": 15890 }, { "epoch": 10.16, "learning_rate": 0.0009982250621228257, "loss": 0.1905, "step": 15900 }, { "epoch": 10.17, "learning_rate": 0.0009981540646077387, "loss": 0.1705, "step": 15910 }, { "epoch": 10.17, "learning_rate": 0.0009980830670926518, "loss": 0.1848, "step": 15920 }, { "epoch": 10.18, "learning_rate": 0.0009980120695775648, "loss": 0.1958, "step": 15930 }, { "epoch": 10.19, "learning_rate": 0.000997941072062478, "loss": 0.1769, "step": 15940 }, { "epoch": 10.19, "learning_rate": 0.000997870074547391, "loss": 0.1765, "step": 15950 }, { "epoch": 10.2, "learning_rate": 0.0009977990770323039, "loss": 0.2068, "step": 15960 }, { "epoch": 10.2, "learning_rate": 0.0009977280795172168, "loss": 0.146, "step": 15970 }, { "epoch": 10.21, "learning_rate": 0.0009976570820021298, "loss": 0.1541, "step": 15980 }, { "epoch": 10.22, "learning_rate": 0.000997586084487043, "loss": 0.188, "step": 15990 }, { "epoch": 10.22, "learning_rate": 0.000997515086971956, "loss": 0.1822, "step": 16000 }, { "epoch": 10.23, "learning_rate": 0.000997444089456869, "loss": 0.1641, "step": 16010 }, { "epoch": 10.24, "learning_rate": 0.000997373091941782, "loss": 0.1893, "step": 16020 }, { "epoch": 10.24, "learning_rate": 0.000997302094426695, "loss": 0.191, "step": 16030 }, { "epoch": 10.25, "learning_rate": 0.000997231096911608, "loss": 0.2111, "step": 16040 }, { "epoch": 10.26, "learning_rate": 0.0009971600993965212, "loss": 0.186, "step": 16050 }, { "epoch": 10.26, "learning_rate": 0.0009970891018814341, "loss": 0.1952, "step": 16060 }, { "epoch": 10.27, "learning_rate": 0.0009970181043663473, "loss": 0.1963, "step": 16070 }, { "epoch": 10.27, "learning_rate": 0.0009969471068512602, "loss": 0.183, "step": 16080 }, { "epoch": 10.28, "learning_rate": 0.0009968761093361732, "loss": 0.1845, "step": 16090 }, { "epoch": 10.29, "learning_rate": 0.0009968051118210862, "loss": 0.2002, "step": 16100 }, { "epoch": 10.29, "learning_rate": 0.0009967341143059993, "loss": 0.1953, "step": 16110 }, { "epoch": 10.3, "learning_rate": 0.0009966631167909123, "loss": 0.1778, "step": 16120 }, { "epoch": 10.31, "learning_rate": 0.0009965921192758255, "loss": 0.2073, "step": 16130 }, { "epoch": 10.31, "learning_rate": 0.0009965211217607384, "loss": 0.1755, "step": 16140 }, { "epoch": 10.32, "learning_rate": 0.0009964501242456514, "loss": 0.2085, "step": 16150 }, { "epoch": 10.33, "learning_rate": 0.0009963791267305643, "loss": 0.1978, "step": 16160 }, { "epoch": 10.33, "learning_rate": 0.0009963081292154775, "loss": 0.2101, "step": 16170 }, { "epoch": 10.34, "learning_rate": 0.0009962371317003905, "loss": 0.2027, "step": 16180 }, { "epoch": 10.34, "learning_rate": 0.0009961661341853036, "loss": 0.1905, "step": 16190 }, { "epoch": 10.35, "learning_rate": 0.0009960951366702166, "loss": 0.175, "step": 16200 }, { "epoch": 10.36, "learning_rate": 0.0009960241391551296, "loss": 0.1816, "step": 16210 }, { "epoch": 10.36, "learning_rate": 0.0009959531416400425, "loss": 0.1917, "step": 16220 }, { "epoch": 10.37, "learning_rate": 0.0009958821441249557, "loss": 0.1956, "step": 16230 }, { "epoch": 10.38, "learning_rate": 0.0009958111466098687, "loss": 0.2037, "step": 16240 }, { "epoch": 10.38, "learning_rate": 0.0009957401490947818, "loss": 0.2194, "step": 16250 }, { "epoch": 10.39, "learning_rate": 0.0009956691515796948, "loss": 0.1633, "step": 16260 }, { "epoch": 10.4, "learning_rate": 0.0009955981540646077, "loss": 0.1803, "step": 16270 }, { "epoch": 10.4, "learning_rate": 0.0009955271565495207, "loss": 0.1885, "step": 16280 }, { "epoch": 10.41, "learning_rate": 0.0009954561590344337, "loss": 0.2004, "step": 16290 }, { "epoch": 10.42, "learning_rate": 0.0009953851615193468, "loss": 0.1961, "step": 16300 }, { "epoch": 10.42, "learning_rate": 0.0009953141640042598, "loss": 0.1827, "step": 16310 }, { "epoch": 10.43, "learning_rate": 0.000995243166489173, "loss": 0.1775, "step": 16320 }, { "epoch": 10.43, "learning_rate": 0.000995172168974086, "loss": 0.1905, "step": 16330 }, { "epoch": 10.44, "learning_rate": 0.0009951011714589989, "loss": 0.2072, "step": 16340 }, { "epoch": 10.45, "learning_rate": 0.0009950301739439118, "loss": 0.1725, "step": 16350 }, { "epoch": 10.45, "learning_rate": 0.000994959176428825, "loss": 0.2023, "step": 16360 }, { "epoch": 10.46, "learning_rate": 0.000994888178913738, "loss": 0.1921, "step": 16370 }, { "epoch": 10.47, "learning_rate": 0.0009948171813986511, "loss": 0.1988, "step": 16380 }, { "epoch": 10.47, "learning_rate": 0.000994746183883564, "loss": 0.1888, "step": 16390 }, { "epoch": 10.48, "learning_rate": 0.000994675186368477, "loss": 0.2047, "step": 16400 }, { "epoch": 10.49, "learning_rate": 0.00099460418885339, "loss": 0.179, "step": 16410 }, { "epoch": 10.49, "learning_rate": 0.0009945331913383032, "loss": 0.1867, "step": 16420 }, { "epoch": 10.5, "learning_rate": 0.0009944621938232162, "loss": 0.1868, "step": 16430 }, { "epoch": 10.5, "learning_rate": 0.0009943911963081293, "loss": 0.212, "step": 16440 }, { "epoch": 10.51, "learning_rate": 0.0009943201987930423, "loss": 0.1937, "step": 16450 }, { "epoch": 10.52, "learning_rate": 0.0009942492012779552, "loss": 0.1915, "step": 16460 }, { "epoch": 10.52, "learning_rate": 0.0009941782037628682, "loss": 0.1712, "step": 16470 }, { "epoch": 10.53, "learning_rate": 0.0009941072062477814, "loss": 0.2135, "step": 16480 }, { "epoch": 10.54, "learning_rate": 0.0009940362087326943, "loss": 0.1716, "step": 16490 }, { "epoch": 10.54, "learning_rate": 0.0009939652112176075, "loss": 0.193, "step": 16500 }, { "epoch": 10.55, "learning_rate": 0.0009938942137025205, "loss": 0.2042, "step": 16510 }, { "epoch": 10.56, "learning_rate": 0.0009938232161874334, "loss": 0.2068, "step": 16520 }, { "epoch": 10.56, "learning_rate": 0.0009937522186723464, "loss": 0.1977, "step": 16530 }, { "epoch": 10.57, "learning_rate": 0.0009936812211572596, "loss": 0.1968, "step": 16540 }, { "epoch": 10.57, "learning_rate": 0.0009936102236421725, "loss": 0.2021, "step": 16550 }, { "epoch": 10.58, "learning_rate": 0.0009935392261270857, "loss": 0.2083, "step": 16560 }, { "epoch": 10.59, "learning_rate": 0.0009934682286119986, "loss": 0.1932, "step": 16570 }, { "epoch": 10.59, "learning_rate": 0.0009933972310969116, "loss": 0.2191, "step": 16580 }, { "epoch": 10.6, "learning_rate": 0.0009933262335818246, "loss": 0.2102, "step": 16590 }, { "epoch": 10.61, "learning_rate": 0.0009932552360667377, "loss": 0.1698, "step": 16600 }, { "epoch": 10.61, "learning_rate": 0.0009931842385516507, "loss": 0.2065, "step": 16610 }, { "epoch": 10.62, "learning_rate": 0.0009931132410365637, "loss": 0.1976, "step": 16620 }, { "epoch": 10.63, "learning_rate": 0.0009930422435214768, "loss": 0.2074, "step": 16630 }, { "epoch": 10.63, "learning_rate": 0.0009929712460063898, "loss": 0.22, "step": 16640 }, { "epoch": 10.64, "learning_rate": 0.0009929002484913027, "loss": 0.1612, "step": 16650 }, { "epoch": 10.65, "learning_rate": 0.000992829250976216, "loss": 0.2161, "step": 16660 }, { "epoch": 10.65, "learning_rate": 0.0009927582534611289, "loss": 0.1898, "step": 16670 }, { "epoch": 10.66, "learning_rate": 0.0009926872559460418, "loss": 0.155, "step": 16680 }, { "epoch": 10.66, "learning_rate": 0.000992616258430955, "loss": 0.1943, "step": 16690 }, { "epoch": 10.67, "learning_rate": 0.000992545260915868, "loss": 0.1812, "step": 16700 }, { "epoch": 10.68, "learning_rate": 0.000992474263400781, "loss": 0.19, "step": 16710 }, { "epoch": 10.68, "learning_rate": 0.000992403265885694, "loss": 0.2032, "step": 16720 }, { "epoch": 10.69, "learning_rate": 0.000992332268370607, "loss": 0.1948, "step": 16730 }, { "epoch": 10.7, "learning_rate": 0.00099226127085552, "loss": 0.2, "step": 16740 }, { "epoch": 10.7, "learning_rate": 0.0009921902733404332, "loss": 0.1899, "step": 16750 }, { "epoch": 10.71, "learning_rate": 0.0009921192758253461, "loss": 0.1937, "step": 16760 }, { "epoch": 10.72, "learning_rate": 0.000992048278310259, "loss": 0.1854, "step": 16770 }, { "epoch": 10.72, "learning_rate": 0.0009919772807951723, "loss": 0.2145, "step": 16780 }, { "epoch": 10.73, "learning_rate": 0.0009919062832800852, "loss": 0.2008, "step": 16790 }, { "epoch": 10.73, "learning_rate": 0.0009918352857649982, "loss": 0.1859, "step": 16800 }, { "epoch": 10.74, "learning_rate": 0.0009917642882499114, "loss": 0.1952, "step": 16810 }, { "epoch": 10.75, "learning_rate": 0.0009916932907348243, "loss": 0.1662, "step": 16820 }, { "epoch": 10.75, "learning_rate": 0.0009916222932197373, "loss": 0.1971, "step": 16830 }, { "epoch": 10.76, "learning_rate": 0.0009915512957046502, "loss": 0.176, "step": 16840 }, { "epoch": 10.77, "learning_rate": 0.0009914802981895634, "loss": 0.1621, "step": 16850 }, { "epoch": 10.77, "learning_rate": 0.0009914093006744764, "loss": 0.2204, "step": 16860 }, { "epoch": 10.78, "learning_rate": 0.0009913383031593895, "loss": 0.1798, "step": 16870 }, { "epoch": 10.79, "learning_rate": 0.0009912673056443025, "loss": 0.1984, "step": 16880 }, { "epoch": 10.79, "learning_rate": 0.0009911963081292155, "loss": 0.1789, "step": 16890 }, { "epoch": 10.8, "learning_rate": 0.0009911253106141284, "loss": 0.1897, "step": 16900 }, { "epoch": 10.8, "learning_rate": 0.0009910543130990416, "loss": 0.1888, "step": 16910 }, { "epoch": 10.81, "learning_rate": 0.0009909833155839546, "loss": 0.1828, "step": 16920 }, { "epoch": 10.82, "learning_rate": 0.0009909123180688675, "loss": 0.1798, "step": 16930 }, { "epoch": 10.82, "learning_rate": 0.0009908413205537807, "loss": 0.1729, "step": 16940 }, { "epoch": 10.83, "learning_rate": 0.0009907703230386936, "loss": 0.2113, "step": 16950 }, { "epoch": 10.84, "learning_rate": 0.0009906993255236066, "loss": 0.2041, "step": 16960 }, { "epoch": 10.84, "learning_rate": 0.0009906283280085198, "loss": 0.1625, "step": 16970 }, { "epoch": 10.85, "learning_rate": 0.0009905573304934327, "loss": 0.1726, "step": 16980 }, { "epoch": 10.86, "learning_rate": 0.0009904863329783457, "loss": 0.1875, "step": 16990 }, { "epoch": 10.86, "learning_rate": 0.0009904153354632589, "loss": 0.2126, "step": 17000 }, { "epoch": 10.87, "learning_rate": 0.0009903443379481718, "loss": 0.1882, "step": 17010 }, { "epoch": 10.88, "learning_rate": 0.0009902733404330848, "loss": 0.2062, "step": 17020 }, { "epoch": 10.88, "learning_rate": 0.000990202342917998, "loss": 0.1705, "step": 17030 }, { "epoch": 10.89, "learning_rate": 0.000990131345402911, "loss": 0.2103, "step": 17040 }, { "epoch": 10.89, "learning_rate": 0.0009900603478878239, "loss": 0.1766, "step": 17050 }, { "epoch": 10.9, "learning_rate": 0.000989989350372737, "loss": 0.189, "step": 17060 }, { "epoch": 10.91, "learning_rate": 0.00098991835285765, "loss": 0.1797, "step": 17070 }, { "epoch": 10.91, "learning_rate": 0.000989847355342563, "loss": 0.2013, "step": 17080 }, { "epoch": 10.92, "learning_rate": 0.0009897763578274761, "loss": 0.2034, "step": 17090 }, { "epoch": 10.93, "learning_rate": 0.000989705360312389, "loss": 0.1947, "step": 17100 }, { "epoch": 10.93, "learning_rate": 0.000989634362797302, "loss": 0.2248, "step": 17110 }, { "epoch": 10.94, "learning_rate": 0.0009895633652822152, "loss": 0.2021, "step": 17120 }, { "epoch": 10.95, "learning_rate": 0.0009894923677671282, "loss": 0.1932, "step": 17130 }, { "epoch": 10.95, "learning_rate": 0.0009894213702520411, "loss": 0.1998, "step": 17140 }, { "epoch": 10.96, "learning_rate": 0.0009893503727369543, "loss": 0.2081, "step": 17150 }, { "epoch": 10.96, "learning_rate": 0.0009892793752218673, "loss": 0.2047, "step": 17160 }, { "epoch": 10.97, "learning_rate": 0.0009892083777067802, "loss": 0.1864, "step": 17170 }, { "epoch": 10.98, "learning_rate": 0.0009891373801916934, "loss": 0.2137, "step": 17180 }, { "epoch": 10.98, "learning_rate": 0.0009890663826766064, "loss": 0.1858, "step": 17190 }, { "epoch": 10.99, "learning_rate": 0.0009889953851615193, "loss": 0.2036, "step": 17200 }, { "epoch": 11.0, "learning_rate": 0.0009889243876464325, "loss": 0.1979, "step": 17210 }, { "epoch": 11.0, "learning_rate": 0.0009888533901313455, "loss": 0.1788, "step": 17220 }, { "epoch": 11.01, "learning_rate": 0.0009887823926162584, "loss": 0.1777, "step": 17230 }, { "epoch": 11.02, "learning_rate": 0.0009887113951011714, "loss": 0.1835, "step": 17240 }, { "epoch": 11.02, "learning_rate": 0.0009886403975860845, "loss": 0.1546, "step": 17250 }, { "epoch": 11.03, "learning_rate": 0.0009885694000709975, "loss": 0.1461, "step": 17260 }, { "epoch": 11.04, "learning_rate": 0.0009884984025559107, "loss": 0.1716, "step": 17270 }, { "epoch": 11.04, "learning_rate": 0.0009884274050408236, "loss": 0.1556, "step": 17280 }, { "epoch": 11.05, "learning_rate": 0.0009883564075257366, "loss": 0.1664, "step": 17290 }, { "epoch": 11.05, "learning_rate": 0.0009882854100106496, "loss": 0.1738, "step": 17300 }, { "epoch": 11.06, "learning_rate": 0.0009882144124955627, "loss": 0.1797, "step": 17310 }, { "epoch": 11.07, "learning_rate": 0.0009881434149804757, "loss": 0.2018, "step": 17320 }, { "epoch": 11.07, "learning_rate": 0.0009880724174653889, "loss": 0.1683, "step": 17330 }, { "epoch": 11.08, "learning_rate": 0.0009880014199503018, "loss": 0.1928, "step": 17340 }, { "epoch": 11.09, "learning_rate": 0.0009879304224352148, "loss": 0.1737, "step": 17350 }, { "epoch": 11.09, "learning_rate": 0.0009878594249201277, "loss": 0.1715, "step": 17360 }, { "epoch": 11.1, "learning_rate": 0.000987788427405041, "loss": 0.1691, "step": 17370 }, { "epoch": 11.11, "learning_rate": 0.0009877174298899539, "loss": 0.1462, "step": 17380 }, { "epoch": 11.11, "learning_rate": 0.000987646432374867, "loss": 0.1835, "step": 17390 }, { "epoch": 11.12, "learning_rate": 0.00098757543485978, "loss": 0.1515, "step": 17400 }, { "epoch": 11.12, "learning_rate": 0.000987504437344693, "loss": 0.1819, "step": 17410 }, { "epoch": 11.13, "learning_rate": 0.000987433439829606, "loss": 0.137, "step": 17420 }, { "epoch": 11.14, "learning_rate": 0.000987362442314519, "loss": 0.1742, "step": 17430 }, { "epoch": 11.14, "learning_rate": 0.000987291444799432, "loss": 0.147, "step": 17440 }, { "epoch": 11.15, "learning_rate": 0.000987220447284345, "loss": 0.1645, "step": 17450 }, { "epoch": 11.16, "learning_rate": 0.0009871494497692582, "loss": 0.1775, "step": 17460 }, { "epoch": 11.16, "learning_rate": 0.0009870784522541711, "loss": 0.1766, "step": 17470 }, { "epoch": 11.17, "learning_rate": 0.000987007454739084, "loss": 0.19, "step": 17480 }, { "epoch": 11.18, "learning_rate": 0.0009869364572239973, "loss": 0.1521, "step": 17490 }, { "epoch": 11.18, "learning_rate": 0.0009868654597089102, "loss": 0.1355, "step": 17500 }, { "epoch": 11.19, "learning_rate": 0.0009867944621938232, "loss": 0.169, "step": 17510 }, { "epoch": 11.19, "learning_rate": 0.0009867234646787364, "loss": 0.1842, "step": 17520 }, { "epoch": 11.2, "learning_rate": 0.0009866524671636493, "loss": 0.1588, "step": 17530 }, { "epoch": 11.21, "learning_rate": 0.0009865814696485623, "loss": 0.1587, "step": 17540 }, { "epoch": 11.21, "learning_rate": 0.0009865104721334752, "loss": 0.1792, "step": 17550 }, { "epoch": 11.22, "learning_rate": 0.0009864394746183884, "loss": 0.1832, "step": 17560 }, { "epoch": 11.23, "learning_rate": 0.0009863684771033014, "loss": 0.145, "step": 17570 }, { "epoch": 11.23, "learning_rate": 0.0009862974795882145, "loss": 0.1764, "step": 17580 }, { "epoch": 11.24, "learning_rate": 0.0009862264820731275, "loss": 0.1706, "step": 17590 }, { "epoch": 11.25, "learning_rate": 0.0009861554845580405, "loss": 0.1737, "step": 17600 }, { "epoch": 11.25, "learning_rate": 0.0009860844870429534, "loss": 0.1445, "step": 17610 }, { "epoch": 11.26, "learning_rate": 0.0009860134895278666, "loss": 0.1704, "step": 17620 }, { "epoch": 11.27, "learning_rate": 0.0009859424920127795, "loss": 0.1714, "step": 17630 }, { "epoch": 11.27, "learning_rate": 0.0009858714944976927, "loss": 0.1495, "step": 17640 }, { "epoch": 11.28, "learning_rate": 0.0009858004969826057, "loss": 0.1698, "step": 17650 }, { "epoch": 11.28, "learning_rate": 0.0009857294994675186, "loss": 0.1824, "step": 17660 }, { "epoch": 11.29, "learning_rate": 0.0009856585019524316, "loss": 0.1515, "step": 17670 }, { "epoch": 11.3, "learning_rate": 0.0009855875044373448, "loss": 0.169, "step": 17680 }, { "epoch": 11.3, "learning_rate": 0.0009855165069222577, "loss": 0.1814, "step": 17690 }, { "epoch": 11.31, "learning_rate": 0.000985445509407171, "loss": 0.1548, "step": 17700 }, { "epoch": 11.32, "learning_rate": 0.0009853745118920839, "loss": 0.1615, "step": 17710 }, { "epoch": 11.32, "learning_rate": 0.0009853035143769968, "loss": 0.1975, "step": 17720 }, { "epoch": 11.33, "learning_rate": 0.0009852325168619098, "loss": 0.1402, "step": 17730 }, { "epoch": 11.34, "learning_rate": 0.000985161519346823, "loss": 0.1733, "step": 17740 }, { "epoch": 11.34, "learning_rate": 0.000985090521831736, "loss": 0.1782, "step": 17750 }, { "epoch": 11.35, "learning_rate": 0.000985019524316649, "loss": 0.1664, "step": 17760 }, { "epoch": 11.35, "learning_rate": 0.000984948526801562, "loss": 0.2117, "step": 17770 }, { "epoch": 11.36, "learning_rate": 0.000984877529286475, "loss": 0.1698, "step": 17780 }, { "epoch": 11.37, "learning_rate": 0.000984806531771388, "loss": 0.1607, "step": 17790 }, { "epoch": 11.37, "learning_rate": 0.0009847355342563011, "loss": 0.171, "step": 17800 }, { "epoch": 11.38, "learning_rate": 0.000984664536741214, "loss": 0.1969, "step": 17810 }, { "epoch": 11.39, "learning_rate": 0.0009845935392261273, "loss": 0.1615, "step": 17820 }, { "epoch": 11.39, "learning_rate": 0.0009845225417110402, "loss": 0.1649, "step": 17830 }, { "epoch": 11.4, "learning_rate": 0.0009844515441959532, "loss": 0.1773, "step": 17840 }, { "epoch": 11.41, "learning_rate": 0.0009843805466808661, "loss": 0.19, "step": 17850 }, { "epoch": 11.41, "learning_rate": 0.000984309549165779, "loss": 0.1501, "step": 17860 }, { "epoch": 11.42, "learning_rate": 0.0009842385516506923, "loss": 0.1397, "step": 17870 }, { "epoch": 11.42, "learning_rate": 0.0009841675541356052, "loss": 0.1641, "step": 17880 }, { "epoch": 11.43, "learning_rate": 0.0009840965566205184, "loss": 0.161, "step": 17890 }, { "epoch": 11.44, "learning_rate": 0.0009840255591054314, "loss": 0.1751, "step": 17900 }, { "epoch": 11.44, "learning_rate": 0.0009839545615903443, "loss": 0.1774, "step": 17910 }, { "epoch": 11.45, "learning_rate": 0.0009838835640752573, "loss": 0.2226, "step": 17920 }, { "epoch": 11.46, "learning_rate": 0.0009838125665601704, "loss": 0.1531, "step": 17930 }, { "epoch": 11.46, "learning_rate": 0.0009837415690450834, "loss": 0.1807, "step": 17940 }, { "epoch": 11.47, "learning_rate": 0.0009836705715299966, "loss": 0.1769, "step": 17950 }, { "epoch": 11.48, "learning_rate": 0.0009835995740149095, "loss": 0.1727, "step": 17960 }, { "epoch": 11.48, "learning_rate": 0.0009835285764998225, "loss": 0.1738, "step": 17970 }, { "epoch": 11.49, "learning_rate": 0.0009834575789847355, "loss": 0.1354, "step": 17980 }, { "epoch": 11.5, "learning_rate": 0.0009833865814696486, "loss": 0.1979, "step": 17990 }, { "epoch": 11.5, "learning_rate": 0.0009833155839545616, "loss": 0.1774, "step": 18000 }, { "epoch": 11.51, "learning_rate": 0.0009832445864394748, "loss": 0.1869, "step": 18010 }, { "epoch": 11.51, "learning_rate": 0.0009831735889243877, "loss": 0.1486, "step": 18020 }, { "epoch": 11.52, "learning_rate": 0.0009831025914093007, "loss": 0.2041, "step": 18030 }, { "epoch": 11.53, "learning_rate": 0.0009830315938942136, "loss": 0.188, "step": 18040 }, { "epoch": 11.53, "learning_rate": 0.0009829605963791268, "loss": 0.1349, "step": 18050 }, { "epoch": 11.54, "learning_rate": 0.0009828895988640398, "loss": 0.1786, "step": 18060 }, { "epoch": 11.55, "learning_rate": 0.000982818601348953, "loss": 0.1886, "step": 18070 }, { "epoch": 11.55, "learning_rate": 0.000982747603833866, "loss": 0.1876, "step": 18080 }, { "epoch": 11.56, "learning_rate": 0.0009826766063187789, "loss": 0.189, "step": 18090 }, { "epoch": 11.57, "learning_rate": 0.0009826056088036918, "loss": 0.1677, "step": 18100 }, { "epoch": 11.57, "learning_rate": 0.000982534611288605, "loss": 0.2143, "step": 18110 }, { "epoch": 11.58, "learning_rate": 0.000982463613773518, "loss": 0.1722, "step": 18120 }, { "epoch": 11.58, "learning_rate": 0.0009823926162584311, "loss": 0.1762, "step": 18130 }, { "epoch": 11.59, "learning_rate": 0.000982321618743344, "loss": 0.1833, "step": 18140 }, { "epoch": 11.6, "learning_rate": 0.000982250621228257, "loss": 0.1845, "step": 18150 }, { "epoch": 11.6, "learning_rate": 0.00098217962371317, "loss": 0.1567, "step": 18160 }, { "epoch": 11.61, "learning_rate": 0.000982108626198083, "loss": 0.1642, "step": 18170 }, { "epoch": 11.62, "learning_rate": 0.0009820376286829961, "loss": 0.1576, "step": 18180 }, { "epoch": 11.62, "learning_rate": 0.000981966631167909, "loss": 0.1377, "step": 18190 }, { "epoch": 11.63, "learning_rate": 0.0009818956336528223, "loss": 0.1921, "step": 18200 }, { "epoch": 11.64, "learning_rate": 0.0009818246361377352, "loss": 0.2037, "step": 18210 }, { "epoch": 11.64, "learning_rate": 0.0009817536386226482, "loss": 0.1988, "step": 18220 }, { "epoch": 11.65, "learning_rate": 0.0009816826411075611, "loss": 0.1576, "step": 18230 }, { "epoch": 11.65, "learning_rate": 0.0009816116435924743, "loss": 0.1727, "step": 18240 }, { "epoch": 11.66, "learning_rate": 0.0009815406460773873, "loss": 0.1925, "step": 18250 }, { "epoch": 11.67, "learning_rate": 0.0009814696485623004, "loss": 0.1942, "step": 18260 }, { "epoch": 11.67, "learning_rate": 0.0009813986510472134, "loss": 0.1506, "step": 18270 }, { "epoch": 11.68, "learning_rate": 0.0009813276535321264, "loss": 0.1973, "step": 18280 }, { "epoch": 11.69, "learning_rate": 0.0009812566560170393, "loss": 0.1641, "step": 18290 }, { "epoch": 11.69, "learning_rate": 0.0009811856585019525, "loss": 0.1719, "step": 18300 }, { "epoch": 11.7, "learning_rate": 0.0009811146609868654, "loss": 0.1681, "step": 18310 }, { "epoch": 11.71, "learning_rate": 0.0009810436634717786, "loss": 0.1644, "step": 18320 }, { "epoch": 11.71, "learning_rate": 0.0009809726659566916, "loss": 0.153, "step": 18330 }, { "epoch": 11.72, "learning_rate": 0.0009809016684416045, "loss": 0.1978, "step": 18340 }, { "epoch": 11.73, "learning_rate": 0.0009808306709265175, "loss": 0.1808, "step": 18350 }, { "epoch": 11.73, "learning_rate": 0.0009807596734114307, "loss": 0.1967, "step": 18360 }, { "epoch": 11.74, "learning_rate": 0.0009806886758963436, "loss": 0.1699, "step": 18370 }, { "epoch": 11.74, "learning_rate": 0.0009806176783812568, "loss": 0.1866, "step": 18380 }, { "epoch": 11.75, "learning_rate": 0.0009805466808661698, "loss": 0.1813, "step": 18390 }, { "epoch": 11.76, "learning_rate": 0.0009804756833510827, "loss": 0.1742, "step": 18400 }, { "epoch": 11.76, "learning_rate": 0.0009804046858359957, "loss": 0.2112, "step": 18410 }, { "epoch": 11.77, "learning_rate": 0.0009803336883209088, "loss": 0.1966, "step": 18420 }, { "epoch": 11.78, "learning_rate": 0.0009802626908058218, "loss": 0.1639, "step": 18430 }, { "epoch": 11.78, "learning_rate": 0.000980191693290735, "loss": 0.1614, "step": 18440 }, { "epoch": 11.79, "learning_rate": 0.000980120695775648, "loss": 0.1992, "step": 18450 }, { "epoch": 11.8, "learning_rate": 0.000980049698260561, "loss": 0.1965, "step": 18460 }, { "epoch": 11.8, "learning_rate": 0.0009799787007454739, "loss": 0.1885, "step": 18470 }, { "epoch": 11.81, "learning_rate": 0.0009799077032303868, "loss": 0.1509, "step": 18480 }, { "epoch": 11.81, "learning_rate": 0.0009798367057153, "loss": 0.1952, "step": 18490 }, { "epoch": 11.82, "learning_rate": 0.000979765708200213, "loss": 0.1801, "step": 18500 }, { "epoch": 11.83, "learning_rate": 0.0009796947106851261, "loss": 0.1521, "step": 18510 }, { "epoch": 11.83, "learning_rate": 0.000979623713170039, "loss": 0.1632, "step": 18520 }, { "epoch": 11.84, "learning_rate": 0.000979552715654952, "loss": 0.1945, "step": 18530 }, { "epoch": 11.85, "learning_rate": 0.000979481718139865, "loss": 0.1883, "step": 18540 }, { "epoch": 11.85, "learning_rate": 0.0009794107206247782, "loss": 0.1832, "step": 18550 }, { "epoch": 11.86, "learning_rate": 0.0009793397231096911, "loss": 0.1933, "step": 18560 }, { "epoch": 11.87, "learning_rate": 0.0009792687255946043, "loss": 0.1785, "step": 18570 }, { "epoch": 11.87, "learning_rate": 0.0009791977280795173, "loss": 0.1788, "step": 18580 }, { "epoch": 11.88, "learning_rate": 0.0009791267305644302, "loss": 0.18, "step": 18590 }, { "epoch": 11.88, "learning_rate": 0.0009790557330493432, "loss": 0.1882, "step": 18600 }, { "epoch": 11.89, "learning_rate": 0.0009789847355342563, "loss": 0.1823, "step": 18610 }, { "epoch": 11.9, "learning_rate": 0.0009789137380191693, "loss": 0.1843, "step": 18620 }, { "epoch": 11.9, "learning_rate": 0.0009788427405040825, "loss": 0.1725, "step": 18630 }, { "epoch": 11.91, "learning_rate": 0.0009787717429889954, "loss": 0.1905, "step": 18640 }, { "epoch": 11.92, "learning_rate": 0.0009787007454739084, "loss": 0.1904, "step": 18650 }, { "epoch": 11.92, "learning_rate": 0.0009786297479588214, "loss": 0.1537, "step": 18660 }, { "epoch": 11.93, "learning_rate": 0.0009785587504437345, "loss": 0.1519, "step": 18670 }, { "epoch": 11.94, "learning_rate": 0.0009784877529286475, "loss": 0.1831, "step": 18680 }, { "epoch": 11.94, "learning_rate": 0.0009784167554135607, "loss": 0.1562, "step": 18690 }, { "epoch": 11.95, "learning_rate": 0.0009783457578984736, "loss": 0.2007, "step": 18700 }, { "epoch": 11.95, "learning_rate": 0.0009782747603833866, "loss": 0.1683, "step": 18710 }, { "epoch": 11.96, "learning_rate": 0.0009782037628682995, "loss": 0.1927, "step": 18720 }, { "epoch": 11.97, "learning_rate": 0.0009781327653532127, "loss": 0.178, "step": 18730 }, { "epoch": 11.97, "learning_rate": 0.0009780617678381257, "loss": 0.1629, "step": 18740 }, { "epoch": 11.98, "learning_rate": 0.0009779907703230388, "loss": 0.177, "step": 18750 }, { "epoch": 11.99, "learning_rate": 0.0009779197728079518, "loss": 0.1758, "step": 18760 }, { "epoch": 11.99, "learning_rate": 0.0009778487752928648, "loss": 0.2114, "step": 18770 }, { "epoch": 12.0, "learning_rate": 0.0009777777777777777, "loss": 0.1582, "step": 18780 }, { "epoch": 12.01, "learning_rate": 0.0009777067802626907, "loss": 0.1586, "step": 18790 }, { "epoch": 12.01, "learning_rate": 0.0009776357827476038, "loss": 0.1523, "step": 18800 }, { "epoch": 12.02, "learning_rate": 0.0009775647852325168, "loss": 0.1614, "step": 18810 }, { "epoch": 12.03, "learning_rate": 0.00097749378771743, "loss": 0.169, "step": 18820 }, { "epoch": 12.03, "learning_rate": 0.000977422790202343, "loss": 0.1339, "step": 18830 }, { "epoch": 12.04, "learning_rate": 0.000977351792687256, "loss": 0.1313, "step": 18840 }, { "epoch": 12.04, "learning_rate": 0.0009772807951721689, "loss": 0.1481, "step": 18850 }, { "epoch": 12.05, "learning_rate": 0.000977209797657082, "loss": 0.1294, "step": 18860 }, { "epoch": 12.06, "learning_rate": 0.000977138800141995, "loss": 0.1555, "step": 18870 }, { "epoch": 12.06, "learning_rate": 0.0009770678026269082, "loss": 0.1555, "step": 18880 }, { "epoch": 12.07, "learning_rate": 0.0009769968051118211, "loss": 0.1688, "step": 18890 }, { "epoch": 12.08, "learning_rate": 0.000976925807596734, "loss": 0.1738, "step": 18900 }, { "epoch": 12.08, "learning_rate": 0.000976854810081647, "loss": 0.1463, "step": 18910 }, { "epoch": 12.09, "learning_rate": 0.0009767838125665602, "loss": 0.1241, "step": 18920 }, { "epoch": 12.1, "learning_rate": 0.0009767128150514732, "loss": 0.1401, "step": 18930 }, { "epoch": 12.1, "learning_rate": 0.0009766418175363863, "loss": 0.1685, "step": 18940 }, { "epoch": 12.11, "learning_rate": 0.0009765708200212993, "loss": 0.1761, "step": 18950 }, { "epoch": 12.11, "learning_rate": 0.0009764998225062123, "loss": 0.1738, "step": 18960 }, { "epoch": 12.12, "learning_rate": 0.0009764288249911253, "loss": 0.1195, "step": 18970 }, { "epoch": 12.13, "learning_rate": 0.0009763578274760384, "loss": 0.1416, "step": 18980 }, { "epoch": 12.13, "learning_rate": 0.0009762868299609515, "loss": 0.1504, "step": 18990 }, { "epoch": 12.14, "learning_rate": 0.0009762158324458644, "loss": 0.1515, "step": 19000 }, { "epoch": 12.15, "learning_rate": 0.0009761448349307775, "loss": 0.1773, "step": 19010 }, { "epoch": 12.15, "learning_rate": 0.0009760738374156904, "loss": 0.1548, "step": 19020 }, { "epoch": 12.16, "learning_rate": 0.0009760028399006035, "loss": 0.1606, "step": 19030 }, { "epoch": 12.17, "learning_rate": 0.0009759318423855166, "loss": 0.1607, "step": 19040 }, { "epoch": 12.17, "learning_rate": 0.0009758608448704296, "loss": 0.1473, "step": 19050 }, { "epoch": 12.18, "learning_rate": 0.0009757898473553426, "loss": 0.1539, "step": 19060 }, { "epoch": 12.19, "learning_rate": 0.0009757188498402556, "loss": 0.1577, "step": 19070 }, { "epoch": 12.19, "learning_rate": 0.0009756478523251686, "loss": 0.1825, "step": 19080 }, { "epoch": 12.2, "learning_rate": 0.0009755768548100816, "loss": 0.1687, "step": 19090 }, { "epoch": 12.2, "learning_rate": 0.0009755058572949948, "loss": 0.188, "step": 19100 }, { "epoch": 12.21, "learning_rate": 0.0009754348597799077, "loss": 0.1424, "step": 19110 }, { "epoch": 12.22, "learning_rate": 0.0009753638622648208, "loss": 0.1478, "step": 19120 }, { "epoch": 12.22, "learning_rate": 0.0009752928647497337, "loss": 0.1601, "step": 19130 }, { "epoch": 12.23, "learning_rate": 0.0009752218672346468, "loss": 0.1706, "step": 19140 }, { "epoch": 12.24, "learning_rate": 0.0009751508697195598, "loss": 0.1565, "step": 19150 }, { "epoch": 12.24, "learning_rate": 0.0009750798722044729, "loss": 0.1258, "step": 19160 }, { "epoch": 12.25, "learning_rate": 0.0009750088746893859, "loss": 0.1468, "step": 19170 }, { "epoch": 12.26, "learning_rate": 0.000974937877174299, "loss": 0.1865, "step": 19180 }, { "epoch": 12.26, "learning_rate": 0.0009748668796592119, "loss": 0.1551, "step": 19190 }, { "epoch": 12.27, "learning_rate": 0.000974795882144125, "loss": 0.1688, "step": 19200 }, { "epoch": 12.27, "learning_rate": 0.0009747248846290379, "loss": 0.16, "step": 19210 }, { "epoch": 12.28, "learning_rate": 0.000974653887113951, "loss": 0.1686, "step": 19220 }, { "epoch": 12.29, "learning_rate": 0.0009745828895988641, "loss": 0.1644, "step": 19230 }, { "epoch": 12.29, "learning_rate": 0.0009745118920837771, "loss": 0.1605, "step": 19240 }, { "epoch": 12.3, "learning_rate": 0.0009744408945686901, "loss": 0.1506, "step": 19250 }, { "epoch": 12.31, "learning_rate": 0.0009743698970536032, "loss": 0.1436, "step": 19260 }, { "epoch": 12.31, "learning_rate": 0.0009742988995385161, "loss": 0.1685, "step": 19270 }, { "epoch": 12.32, "learning_rate": 0.0009742279020234292, "loss": 0.1505, "step": 19280 }, { "epoch": 12.33, "learning_rate": 0.0009741569045083423, "loss": 0.1609, "step": 19290 }, { "epoch": 12.33, "learning_rate": 0.0009740859069932553, "loss": 0.1803, "step": 19300 }, { "epoch": 12.34, "learning_rate": 0.0009740149094781683, "loss": 0.1806, "step": 19310 }, { "epoch": 12.34, "learning_rate": 0.0009739439119630813, "loss": 0.1733, "step": 19320 }, { "epoch": 12.35, "learning_rate": 0.0009738729144479943, "loss": 0.1618, "step": 19330 }, { "epoch": 12.36, "learning_rate": 0.0009738019169329074, "loss": 0.157, "step": 19340 }, { "epoch": 12.36, "learning_rate": 0.0009737309194178204, "loss": 0.1374, "step": 19350 }, { "epoch": 12.37, "learning_rate": 0.0009736599219027335, "loss": 0.1689, "step": 19360 }, { "epoch": 12.38, "learning_rate": 0.0009735889243876465, "loss": 0.1813, "step": 19370 }, { "epoch": 12.38, "learning_rate": 0.0009735179268725594, "loss": 0.1313, "step": 19380 }, { "epoch": 12.39, "learning_rate": 0.0009734469293574725, "loss": 0.16, "step": 19390 }, { "epoch": 12.4, "learning_rate": 0.0009733759318423854, "loss": 0.1317, "step": 19400 }, { "epoch": 12.4, "learning_rate": 0.0009733049343272986, "loss": 0.13, "step": 19410 }, { "epoch": 12.41, "learning_rate": 0.0009732339368122116, "loss": 0.15, "step": 19420 }, { "epoch": 12.42, "learning_rate": 0.0009731629392971246, "loss": 0.1567, "step": 19430 }, { "epoch": 12.42, "learning_rate": 0.0009730919417820376, "loss": 0.1477, "step": 19440 }, { "epoch": 12.43, "learning_rate": 0.0009730209442669507, "loss": 0.1409, "step": 19450 }, { "epoch": 12.43, "learning_rate": 0.0009729499467518636, "loss": 0.1793, "step": 19460 }, { "epoch": 12.44, "learning_rate": 0.0009728789492367768, "loss": 0.1322, "step": 19470 }, { "epoch": 12.45, "learning_rate": 0.0009728079517216898, "loss": 0.1645, "step": 19480 }, { "epoch": 12.45, "learning_rate": 0.0009727369542066028, "loss": 0.195, "step": 19490 }, { "epoch": 12.46, "learning_rate": 0.0009726659566915158, "loss": 0.1419, "step": 19500 }, { "epoch": 12.47, "learning_rate": 0.0009725949591764288, "loss": 0.1422, "step": 19510 }, { "epoch": 12.47, "learning_rate": 0.0009725239616613418, "loss": 0.1796, "step": 19520 }, { "epoch": 12.48, "learning_rate": 0.000972452964146255, "loss": 0.1615, "step": 19530 }, { "epoch": 12.49, "learning_rate": 0.0009723819666311679, "loss": 0.173, "step": 19540 }, { "epoch": 12.49, "learning_rate": 0.000972310969116081, "loss": 0.1442, "step": 19550 }, { "epoch": 12.5, "learning_rate": 0.000972239971600994, "loss": 0.1648, "step": 19560 }, { "epoch": 12.5, "learning_rate": 0.000972168974085907, "loss": 0.1543, "step": 19570 }, { "epoch": 12.51, "learning_rate": 0.00097209797657082, "loss": 0.1507, "step": 19580 }, { "epoch": 12.52, "learning_rate": 0.0009720269790557332, "loss": 0.1571, "step": 19590 }, { "epoch": 12.52, "learning_rate": 0.0009719559815406461, "loss": 0.1697, "step": 19600 }, { "epoch": 12.53, "learning_rate": 0.0009718849840255592, "loss": 0.1622, "step": 19610 }, { "epoch": 12.54, "learning_rate": 0.0009718139865104721, "loss": 0.1548, "step": 19620 }, { "epoch": 12.54, "learning_rate": 0.0009717429889953852, "loss": 0.169, "step": 19630 }, { "epoch": 12.55, "learning_rate": 0.0009716719914802982, "loss": 0.1644, "step": 19640 }, { "epoch": 12.56, "learning_rate": 0.0009716009939652113, "loss": 0.1387, "step": 19650 }, { "epoch": 12.56, "learning_rate": 0.0009715299964501243, "loss": 0.177, "step": 19660 }, { "epoch": 12.57, "learning_rate": 0.0009714589989350374, "loss": 0.1627, "step": 19670 }, { "epoch": 12.57, "learning_rate": 0.0009713880014199503, "loss": 0.1451, "step": 19680 }, { "epoch": 12.58, "learning_rate": 0.0009713170039048633, "loss": 0.1827, "step": 19690 }, { "epoch": 12.59, "learning_rate": 0.0009712460063897763, "loss": 0.1593, "step": 19700 }, { "epoch": 12.59, "learning_rate": 0.0009711750088746894, "loss": 0.1521, "step": 19710 }, { "epoch": 12.6, "learning_rate": 0.0009711040113596025, "loss": 0.1759, "step": 19720 }, { "epoch": 12.61, "learning_rate": 0.0009710330138445154, "loss": 0.1868, "step": 19730 }, { "epoch": 12.61, "learning_rate": 0.0009709620163294285, "loss": 0.1327, "step": 19740 }, { "epoch": 12.62, "learning_rate": 0.0009708910188143415, "loss": 0.1349, "step": 19750 }, { "epoch": 12.63, "learning_rate": 0.0009708200212992545, "loss": 0.1645, "step": 19760 }, { "epoch": 12.63, "learning_rate": 0.0009707490237841676, "loss": 0.1315, "step": 19770 }, { "epoch": 12.64, "learning_rate": 0.0009706780262690807, "loss": 0.184, "step": 19780 }, { "epoch": 12.65, "learning_rate": 0.0009706070287539936, "loss": 0.1557, "step": 19790 }, { "epoch": 12.65, "learning_rate": 0.0009705360312389067, "loss": 0.1622, "step": 19800 }, { "epoch": 12.66, "learning_rate": 0.0009704650337238196, "loss": 0.1598, "step": 19810 }, { "epoch": 12.66, "learning_rate": 0.0009703940362087327, "loss": 0.1914, "step": 19820 }, { "epoch": 12.67, "learning_rate": 0.0009703230386936457, "loss": 0.1757, "step": 19830 }, { "epoch": 12.68, "learning_rate": 0.0009702520411785588, "loss": 0.1613, "step": 19840 }, { "epoch": 12.68, "learning_rate": 0.0009701810436634718, "loss": 0.175, "step": 19850 }, { "epoch": 12.69, "learning_rate": 0.0009701100461483849, "loss": 0.1778, "step": 19860 }, { "epoch": 12.7, "learning_rate": 0.0009700390486332978, "loss": 0.14, "step": 19870 }, { "epoch": 12.7, "learning_rate": 0.0009699680511182109, "loss": 0.148, "step": 19880 }, { "epoch": 12.71, "learning_rate": 0.0009698970536031238, "loss": 0.1899, "step": 19890 }, { "epoch": 12.72, "learning_rate": 0.000969826056088037, "loss": 0.143, "step": 19900 }, { "epoch": 12.72, "learning_rate": 0.00096975505857295, "loss": 0.1668, "step": 19910 }, { "epoch": 12.73, "learning_rate": 0.000969684061057863, "loss": 0.1607, "step": 19920 }, { "epoch": 12.73, "learning_rate": 0.000969613063542776, "loss": 0.1357, "step": 19930 }, { "epoch": 12.74, "learning_rate": 0.0009695420660276891, "loss": 0.18, "step": 19940 }, { "epoch": 12.75, "learning_rate": 0.000969471068512602, "loss": 0.1766, "step": 19950 }, { "epoch": 12.75, "learning_rate": 0.0009694000709975152, "loss": 0.1852, "step": 19960 }, { "epoch": 12.76, "learning_rate": 0.0009693290734824282, "loss": 0.1969, "step": 19970 }, { "epoch": 12.77, "learning_rate": 0.0009692580759673412, "loss": 0.1483, "step": 19980 }, { "epoch": 12.77, "learning_rate": 0.0009691870784522542, "loss": 0.1823, "step": 19990 }, { "epoch": 12.78, "learning_rate": 0.0009691160809371671, "loss": 0.1495, "step": 20000 }, { "epoch": 12.79, "learning_rate": 0.0009690450834220802, "loss": 0.144, "step": 20010 }, { "epoch": 12.79, "learning_rate": 0.0009689740859069933, "loss": 0.1633, "step": 20020 }, { "epoch": 12.8, "learning_rate": 0.0009689030883919063, "loss": 0.1736, "step": 20030 }, { "epoch": 12.8, "learning_rate": 0.0009688320908768193, "loss": 0.1848, "step": 20040 }, { "epoch": 12.81, "learning_rate": 0.0009687610933617324, "loss": 0.1701, "step": 20050 }, { "epoch": 12.82, "learning_rate": 0.0009686900958466453, "loss": 0.1647, "step": 20060 }, { "epoch": 12.82, "learning_rate": 0.0009686190983315584, "loss": 0.1801, "step": 20070 }, { "epoch": 12.83, "learning_rate": 0.0009685481008164714, "loss": 0.1595, "step": 20080 }, { "epoch": 12.84, "learning_rate": 0.0009684771033013845, "loss": 0.1618, "step": 20090 }, { "epoch": 12.84, "learning_rate": 0.0009684061057862975, "loss": 0.1559, "step": 20100 }, { "epoch": 12.85, "learning_rate": 0.0009683351082712105, "loss": 0.173, "step": 20110 }, { "epoch": 12.86, "learning_rate": 0.0009682641107561235, "loss": 0.15, "step": 20120 }, { "epoch": 12.86, "learning_rate": 0.0009681931132410366, "loss": 0.1707, "step": 20130 }, { "epoch": 12.87, "learning_rate": 0.0009681221157259496, "loss": 0.1403, "step": 20140 }, { "epoch": 12.88, "learning_rate": 0.0009680511182108627, "loss": 0.1787, "step": 20150 }, { "epoch": 12.88, "learning_rate": 0.0009679801206957757, "loss": 0.1708, "step": 20160 }, { "epoch": 12.89, "learning_rate": 0.0009679091231806887, "loss": 0.1595, "step": 20170 }, { "epoch": 12.89, "learning_rate": 0.0009678381256656017, "loss": 0.1536, "step": 20180 }, { "epoch": 12.9, "learning_rate": 0.0009677671281505147, "loss": 0.1783, "step": 20190 }, { "epoch": 12.91, "learning_rate": 0.0009676961306354278, "loss": 0.1694, "step": 20200 }, { "epoch": 12.91, "learning_rate": 0.0009676251331203409, "loss": 0.1593, "step": 20210 }, { "epoch": 12.92, "learning_rate": 0.0009675541356052538, "loss": 0.1378, "step": 20220 }, { "epoch": 12.93, "learning_rate": 0.0009674831380901669, "loss": 0.174, "step": 20230 }, { "epoch": 12.93, "learning_rate": 0.0009674121405750799, "loss": 0.1588, "step": 20240 }, { "epoch": 12.94, "learning_rate": 0.0009673411430599929, "loss": 0.1726, "step": 20250 }, { "epoch": 12.95, "learning_rate": 0.000967270145544906, "loss": 0.1696, "step": 20260 }, { "epoch": 12.95, "learning_rate": 0.0009671991480298191, "loss": 0.1529, "step": 20270 }, { "epoch": 12.96, "learning_rate": 0.000967128150514732, "loss": 0.1869, "step": 20280 }, { "epoch": 12.96, "learning_rate": 0.0009670571529996451, "loss": 0.1673, "step": 20290 }, { "epoch": 12.97, "learning_rate": 0.000966986155484558, "loss": 0.1682, "step": 20300 }, { "epoch": 12.98, "learning_rate": 0.000966915157969471, "loss": 0.1937, "step": 20310 }, { "epoch": 12.98, "learning_rate": 0.0009668441604543842, "loss": 0.1636, "step": 20320 }, { "epoch": 12.99, "learning_rate": 0.0009667731629392971, "loss": 0.1535, "step": 20330 }, { "epoch": 13.0, "learning_rate": 0.0009667021654242102, "loss": 0.1794, "step": 20340 }, { "epoch": 13.0, "learning_rate": 0.0009666311679091232, "loss": 0.1457, "step": 20350 }, { "epoch": 13.01, "learning_rate": 0.0009665601703940362, "loss": 0.168, "step": 20360 }, { "epoch": 13.02, "learning_rate": 0.0009664891728789492, "loss": 0.1501, "step": 20370 }, { "epoch": 13.02, "learning_rate": 0.0009664181753638623, "loss": 0.1401, "step": 20380 }, { "epoch": 13.03, "learning_rate": 0.0009663471778487753, "loss": 0.1438, "step": 20390 }, { "epoch": 13.04, "learning_rate": 0.0009662761803336884, "loss": 0.1199, "step": 20400 }, { "epoch": 13.04, "learning_rate": 0.0009662051828186013, "loss": 0.1534, "step": 20410 }, { "epoch": 13.05, "learning_rate": 0.0009661341853035144, "loss": 0.1386, "step": 20420 }, { "epoch": 13.05, "learning_rate": 0.0009660631877884274, "loss": 0.1598, "step": 20430 }, { "epoch": 13.06, "learning_rate": 0.0009659921902733404, "loss": 0.1612, "step": 20440 }, { "epoch": 13.07, "learning_rate": 0.0009659211927582535, "loss": 0.1319, "step": 20450 }, { "epoch": 13.07, "learning_rate": 0.0009658501952431666, "loss": 0.157, "step": 20460 }, { "epoch": 13.08, "learning_rate": 0.0009657791977280795, "loss": 0.1411, "step": 20470 }, { "epoch": 13.09, "learning_rate": 0.0009657082002129926, "loss": 0.1287, "step": 20480 }, { "epoch": 13.09, "learning_rate": 0.0009656372026979055, "loss": 0.1241, "step": 20490 }, { "epoch": 13.1, "learning_rate": 0.0009655662051828186, "loss": 0.168, "step": 20500 }, { "epoch": 13.11, "learning_rate": 0.0009654952076677317, "loss": 0.1583, "step": 20510 }, { "epoch": 13.11, "learning_rate": 0.0009654242101526447, "loss": 0.1267, "step": 20520 }, { "epoch": 13.12, "learning_rate": 0.0009653532126375577, "loss": 0.1357, "step": 20530 }, { "epoch": 13.12, "learning_rate": 0.0009652822151224708, "loss": 0.126, "step": 20540 }, { "epoch": 13.13, "learning_rate": 0.0009652112176073837, "loss": 0.1379, "step": 20550 }, { "epoch": 13.14, "learning_rate": 0.0009651402200922968, "loss": 0.1572, "step": 20560 }, { "epoch": 13.14, "learning_rate": 0.0009650692225772098, "loss": 0.1551, "step": 20570 }, { "epoch": 13.15, "learning_rate": 0.0009649982250621229, "loss": 0.1277, "step": 20580 }, { "epoch": 13.16, "learning_rate": 0.0009649272275470359, "loss": 0.1609, "step": 20590 }, { "epoch": 13.16, "learning_rate": 0.0009648562300319489, "loss": 0.1459, "step": 20600 }, { "epoch": 13.17, "learning_rate": 0.0009647852325168619, "loss": 0.1602, "step": 20610 }, { "epoch": 13.18, "learning_rate": 0.0009647142350017749, "loss": 0.1472, "step": 20620 }, { "epoch": 13.18, "learning_rate": 0.000964643237486688, "loss": 0.128, "step": 20630 }, { "epoch": 13.19, "learning_rate": 0.000964572239971601, "loss": 0.1444, "step": 20640 }, { "epoch": 13.19, "learning_rate": 0.000964501242456514, "loss": 0.1627, "step": 20650 }, { "epoch": 13.2, "learning_rate": 0.000964430244941427, "loss": 0.1418, "step": 20660 }, { "epoch": 13.21, "learning_rate": 0.0009643592474263401, "loss": 0.135, "step": 20670 }, { "epoch": 13.21, "learning_rate": 0.000964288249911253, "loss": 0.1337, "step": 20680 }, { "epoch": 13.22, "learning_rate": 0.0009642172523961662, "loss": 0.1353, "step": 20690 }, { "epoch": 13.23, "learning_rate": 0.0009641462548810792, "loss": 0.1586, "step": 20700 }, { "epoch": 13.23, "learning_rate": 0.0009640752573659922, "loss": 0.1386, "step": 20710 }, { "epoch": 13.24, "learning_rate": 0.0009640042598509052, "loss": 0.1369, "step": 20720 }, { "epoch": 13.25, "learning_rate": 0.0009639332623358183, "loss": 0.1357, "step": 20730 }, { "epoch": 13.25, "learning_rate": 0.0009638622648207312, "loss": 0.1472, "step": 20740 }, { "epoch": 13.26, "learning_rate": 0.0009637912673056444, "loss": 0.1533, "step": 20750 }, { "epoch": 13.27, "learning_rate": 0.0009637202697905573, "loss": 0.1584, "step": 20760 }, { "epoch": 13.27, "learning_rate": 0.0009636492722754704, "loss": 0.126, "step": 20770 }, { "epoch": 13.28, "learning_rate": 0.0009635782747603834, "loss": 0.1642, "step": 20780 }, { "epoch": 13.28, "learning_rate": 0.0009635072772452964, "loss": 0.1284, "step": 20790 }, { "epoch": 13.29, "learning_rate": 0.0009634362797302094, "loss": 0.1457, "step": 20800 }, { "epoch": 13.3, "learning_rate": 0.0009633652822151226, "loss": 0.1481, "step": 20810 }, { "epoch": 13.3, "learning_rate": 0.0009632942847000355, "loss": 0.1639, "step": 20820 }, { "epoch": 13.31, "learning_rate": 0.0009632232871849486, "loss": 0.1459, "step": 20830 }, { "epoch": 13.32, "learning_rate": 0.0009631522896698616, "loss": 0.1742, "step": 20840 }, { "epoch": 13.32, "learning_rate": 0.0009630812921547746, "loss": 0.1388, "step": 20850 }, { "epoch": 13.33, "learning_rate": 0.0009630102946396876, "loss": 0.1769, "step": 20860 }, { "epoch": 13.34, "learning_rate": 0.0009629392971246008, "loss": 0.1517, "step": 20870 }, { "epoch": 13.34, "learning_rate": 0.0009628682996095137, "loss": 0.1463, "step": 20880 }, { "epoch": 13.35, "learning_rate": 0.0009627973020944268, "loss": 0.1135, "step": 20890 }, { "epoch": 13.35, "learning_rate": 0.0009627263045793397, "loss": 0.1543, "step": 20900 }, { "epoch": 13.36, "learning_rate": 0.0009626553070642528, "loss": 0.1598, "step": 20910 }, { "epoch": 13.37, "learning_rate": 0.0009625843095491658, "loss": 0.1745, "step": 20920 }, { "epoch": 13.37, "learning_rate": 0.0009625133120340789, "loss": 0.1595, "step": 20930 }, { "epoch": 13.38, "learning_rate": 0.0009624423145189919, "loss": 0.1329, "step": 20940 }, { "epoch": 13.39, "learning_rate": 0.0009623713170039048, "loss": 0.1612, "step": 20950 }, { "epoch": 13.39, "learning_rate": 0.0009623003194888179, "loss": 0.1528, "step": 20960 }, { "epoch": 13.4, "learning_rate": 0.0009622293219737309, "loss": 0.1518, "step": 20970 }, { "epoch": 13.41, "learning_rate": 0.0009621583244586439, "loss": 0.1635, "step": 20980 }, { "epoch": 13.41, "learning_rate": 0.000962087326943557, "loss": 0.1446, "step": 20990 }, { "epoch": 13.42, "learning_rate": 0.0009620163294284701, "loss": 0.1471, "step": 21000 }, { "epoch": 13.42, "learning_rate": 0.000961945331913383, "loss": 0.1476, "step": 21010 }, { "epoch": 13.43, "learning_rate": 0.0009618743343982961, "loss": 0.1907, "step": 21020 }, { "epoch": 13.44, "learning_rate": 0.000961803336883209, "loss": 0.1495, "step": 21030 }, { "epoch": 13.44, "learning_rate": 0.0009617323393681221, "loss": 0.1817, "step": 21040 }, { "epoch": 13.45, "learning_rate": 0.0009616613418530351, "loss": 0.1571, "step": 21050 }, { "epoch": 13.46, "learning_rate": 0.0009615903443379483, "loss": 0.1746, "step": 21060 }, { "epoch": 13.46, "learning_rate": 0.0009615193468228612, "loss": 0.1451, "step": 21070 }, { "epoch": 13.47, "learning_rate": 0.0009614483493077743, "loss": 0.1545, "step": 21080 }, { "epoch": 13.48, "learning_rate": 0.0009613773517926872, "loss": 0.148, "step": 21090 }, { "epoch": 13.48, "learning_rate": 0.0009613063542776003, "loss": 0.1368, "step": 21100 }, { "epoch": 13.49, "learning_rate": 0.0009612353567625133, "loss": 0.1401, "step": 21110 }, { "epoch": 13.5, "learning_rate": 0.0009611643592474264, "loss": 0.1375, "step": 21120 }, { "epoch": 13.5, "learning_rate": 0.0009610933617323394, "loss": 0.1525, "step": 21130 }, { "epoch": 13.51, "learning_rate": 0.0009610223642172525, "loss": 0.1509, "step": 21140 }, { "epoch": 13.51, "learning_rate": 0.0009609513667021654, "loss": 0.1531, "step": 21150 }, { "epoch": 13.52, "learning_rate": 0.0009608803691870785, "loss": 0.1096, "step": 21160 }, { "epoch": 13.53, "learning_rate": 0.0009608093716719914, "loss": 0.1678, "step": 21170 }, { "epoch": 13.53, "learning_rate": 0.0009607383741569046, "loss": 0.1366, "step": 21180 }, { "epoch": 13.54, "learning_rate": 0.0009606673766418176, "loss": 0.1431, "step": 21190 }, { "epoch": 13.55, "learning_rate": 0.0009605963791267306, "loss": 0.1709, "step": 21200 }, { "epoch": 13.55, "learning_rate": 0.0009605253816116436, "loss": 0.1812, "step": 21210 }, { "epoch": 13.56, "learning_rate": 0.0009604543840965567, "loss": 0.1601, "step": 21220 }, { "epoch": 13.57, "learning_rate": 0.0009603833865814696, "loss": 0.1491, "step": 21230 }, { "epoch": 13.57, "learning_rate": 0.0009603123890663828, "loss": 0.128, "step": 21240 }, { "epoch": 13.58, "learning_rate": 0.0009602413915512958, "loss": 0.1746, "step": 21250 }, { "epoch": 13.58, "learning_rate": 0.0009601703940362087, "loss": 0.1417, "step": 21260 }, { "epoch": 13.59, "learning_rate": 0.0009600993965211218, "loss": 0.1661, "step": 21270 }, { "epoch": 13.6, "learning_rate": 0.0009600283990060347, "loss": 0.1624, "step": 21280 }, { "epoch": 13.6, "learning_rate": 0.0009599574014909478, "loss": 0.1768, "step": 21290 }, { "epoch": 13.61, "learning_rate": 0.0009598864039758609, "loss": 0.1553, "step": 21300 }, { "epoch": 13.62, "learning_rate": 0.0009598154064607739, "loss": 0.1604, "step": 21310 }, { "epoch": 13.62, "learning_rate": 0.0009597444089456869, "loss": 0.1621, "step": 21320 }, { "epoch": 13.63, "learning_rate": 0.0009596734114306, "loss": 0.1551, "step": 21330 }, { "epoch": 13.64, "learning_rate": 0.0009596024139155129, "loss": 0.1578, "step": 21340 }, { "epoch": 13.64, "learning_rate": 0.000959531416400426, "loss": 0.1668, "step": 21350 }, { "epoch": 13.65, "learning_rate": 0.000959460418885339, "loss": 0.1563, "step": 21360 }, { "epoch": 13.65, "learning_rate": 0.0009593894213702521, "loss": 0.1717, "step": 21370 }, { "epoch": 13.66, "learning_rate": 0.0009593184238551651, "loss": 0.1706, "step": 21380 }, { "epoch": 13.67, "learning_rate": 0.0009592474263400781, "loss": 0.1705, "step": 21390 }, { "epoch": 13.67, "learning_rate": 0.0009591764288249911, "loss": 0.1619, "step": 21400 }, { "epoch": 13.68, "learning_rate": 0.0009591054313099042, "loss": 0.1597, "step": 21410 }, { "epoch": 13.69, "learning_rate": 0.0009590344337948172, "loss": 0.1756, "step": 21420 }, { "epoch": 13.69, "learning_rate": 0.0009589634362797303, "loss": 0.1706, "step": 21430 }, { "epoch": 13.7, "learning_rate": 0.0009588924387646433, "loss": 0.1657, "step": 21440 }, { "epoch": 13.71, "learning_rate": 0.0009588214412495563, "loss": 0.1659, "step": 21450 }, { "epoch": 13.71, "learning_rate": 0.0009587504437344693, "loss": 0.1977, "step": 21460 }, { "epoch": 13.72, "learning_rate": 0.0009586794462193823, "loss": 0.1391, "step": 21470 }, { "epoch": 13.73, "learning_rate": 0.0009586084487042954, "loss": 0.1594, "step": 21480 }, { "epoch": 13.73, "learning_rate": 0.0009585374511892085, "loss": 0.1668, "step": 21490 }, { "epoch": 13.74, "learning_rate": 0.0009584664536741214, "loss": 0.1544, "step": 21500 }, { "epoch": 13.74, "learning_rate": 0.0009583954561590345, "loss": 0.1808, "step": 21510 }, { "epoch": 13.75, "learning_rate": 0.0009583244586439475, "loss": 0.1779, "step": 21520 }, { "epoch": 13.76, "learning_rate": 0.0009582534611288605, "loss": 0.1392, "step": 21530 }, { "epoch": 13.76, "learning_rate": 0.0009581824636137736, "loss": 0.165, "step": 21540 }, { "epoch": 13.77, "learning_rate": 0.0009581114660986867, "loss": 0.1705, "step": 21550 }, { "epoch": 13.78, "learning_rate": 0.0009580404685835996, "loss": 0.1484, "step": 21560 }, { "epoch": 13.78, "learning_rate": 0.0009579694710685126, "loss": 0.1389, "step": 21570 }, { "epoch": 13.79, "learning_rate": 0.0009578984735534256, "loss": 0.1566, "step": 21580 }, { "epoch": 13.8, "learning_rate": 0.0009578274760383386, "loss": 0.133, "step": 21590 }, { "epoch": 13.8, "learning_rate": 0.0009577564785232517, "loss": 0.1441, "step": 21600 }, { "epoch": 13.81, "learning_rate": 0.0009576854810081647, "loss": 0.1644, "step": 21610 }, { "epoch": 13.81, "learning_rate": 0.0009576144834930778, "loss": 0.1805, "step": 21620 }, { "epoch": 13.82, "learning_rate": 0.0009575434859779907, "loss": 0.1491, "step": 21630 }, { "epoch": 13.83, "learning_rate": 0.0009574724884629038, "loss": 0.1472, "step": 21640 }, { "epoch": 13.83, "learning_rate": 0.0009574014909478168, "loss": 0.173, "step": 21650 }, { "epoch": 13.84, "learning_rate": 0.0009573304934327298, "loss": 0.1336, "step": 21660 }, { "epoch": 13.85, "learning_rate": 0.0009572594959176429, "loss": 0.1522, "step": 21670 }, { "epoch": 13.85, "learning_rate": 0.000957188498402556, "loss": 0.1586, "step": 21680 }, { "epoch": 13.86, "learning_rate": 0.0009571175008874689, "loss": 0.1364, "step": 21690 }, { "epoch": 13.87, "learning_rate": 0.000957046503372382, "loss": 0.1443, "step": 21700 }, { "epoch": 13.87, "learning_rate": 0.000956975505857295, "loss": 0.182, "step": 21710 }, { "epoch": 13.88, "learning_rate": 0.000956904508342208, "loss": 0.1481, "step": 21720 }, { "epoch": 13.88, "learning_rate": 0.0009568335108271211, "loss": 0.1608, "step": 21730 }, { "epoch": 13.89, "learning_rate": 0.0009567625133120342, "loss": 0.1596, "step": 21740 }, { "epoch": 13.9, "learning_rate": 0.0009566915157969471, "loss": 0.1546, "step": 21750 }, { "epoch": 13.9, "learning_rate": 0.0009566205182818602, "loss": 0.1417, "step": 21760 }, { "epoch": 13.91, "learning_rate": 0.0009565495207667731, "loss": 0.1342, "step": 21770 }, { "epoch": 13.92, "learning_rate": 0.0009564785232516862, "loss": 0.1615, "step": 21780 }, { "epoch": 13.92, "learning_rate": 0.0009564075257365993, "loss": 0.1452, "step": 21790 }, { "epoch": 13.93, "learning_rate": 0.0009563365282215123, "loss": 0.1705, "step": 21800 }, { "epoch": 13.94, "learning_rate": 0.0009562655307064253, "loss": 0.1404, "step": 21810 }, { "epoch": 13.94, "learning_rate": 0.0009561945331913384, "loss": 0.141, "step": 21820 }, { "epoch": 13.95, "learning_rate": 0.0009561235356762513, "loss": 0.1642, "step": 21830 }, { "epoch": 13.95, "learning_rate": 0.0009560525381611644, "loss": 0.1654, "step": 21840 }, { "epoch": 13.96, "learning_rate": 0.0009559815406460774, "loss": 0.1722, "step": 21850 }, { "epoch": 13.97, "learning_rate": 0.0009559105431309905, "loss": 0.1509, "step": 21860 }, { "epoch": 13.97, "learning_rate": 0.0009558395456159035, "loss": 0.1557, "step": 21870 }, { "epoch": 13.98, "learning_rate": 0.0009557685481008164, "loss": 0.1645, "step": 21880 }, { "epoch": 13.99, "learning_rate": 0.0009556975505857295, "loss": 0.1729, "step": 21890 }, { "epoch": 13.99, "learning_rate": 0.0009556265530706425, "loss": 0.1471, "step": 21900 }, { "epoch": 14.0, "learning_rate": 0.0009555555555555556, "loss": 0.1493, "step": 21910 }, { "epoch": 14.01, "learning_rate": 0.0009554845580404686, "loss": 0.1662, "step": 21920 }, { "epoch": 14.01, "learning_rate": 0.0009554135605253817, "loss": 0.1034, "step": 21930 }, { "epoch": 14.02, "learning_rate": 0.0009553425630102946, "loss": 0.1537, "step": 21940 }, { "epoch": 14.03, "learning_rate": 0.0009552715654952077, "loss": 0.1495, "step": 21950 }, { "epoch": 14.03, "learning_rate": 0.0009552005679801206, "loss": 0.1488, "step": 21960 }, { "epoch": 14.04, "learning_rate": 0.0009551295704650338, "loss": 0.1242, "step": 21970 }, { "epoch": 14.04, "learning_rate": 0.0009550585729499468, "loss": 0.1232, "step": 21980 }, { "epoch": 14.05, "learning_rate": 0.0009549875754348598, "loss": 0.126, "step": 21990 }, { "epoch": 14.06, "learning_rate": 0.0009549165779197728, "loss": 0.1493, "step": 22000 }, { "epoch": 14.06, "learning_rate": 0.0009548455804046859, "loss": 0.145, "step": 22010 }, { "epoch": 14.07, "learning_rate": 0.0009547745828895988, "loss": 0.1363, "step": 22020 }, { "epoch": 14.08, "learning_rate": 0.000954703585374512, "loss": 0.1487, "step": 22030 }, { "epoch": 14.08, "learning_rate": 0.000954632587859425, "loss": 0.15, "step": 22040 }, { "epoch": 14.09, "learning_rate": 0.000954561590344338, "loss": 0.1205, "step": 22050 }, { "epoch": 14.1, "learning_rate": 0.000954490592829251, "loss": 0.1431, "step": 22060 }, { "epoch": 14.1, "learning_rate": 0.000954419595314164, "loss": 0.1134, "step": 22070 }, { "epoch": 14.11, "learning_rate": 0.000954348597799077, "loss": 0.1431, "step": 22080 }, { "epoch": 14.11, "learning_rate": 0.0009542776002839902, "loss": 0.1484, "step": 22090 }, { "epoch": 14.12, "learning_rate": 0.0009542066027689031, "loss": 0.1435, "step": 22100 }, { "epoch": 14.13, "learning_rate": 0.0009541356052538162, "loss": 0.1595, "step": 22110 }, { "epoch": 14.13, "learning_rate": 0.0009540646077387292, "loss": 0.1407, "step": 22120 }, { "epoch": 14.14, "learning_rate": 0.0009539936102236422, "loss": 0.1514, "step": 22130 }, { "epoch": 14.15, "learning_rate": 0.0009539226127085552, "loss": 0.1391, "step": 22140 }, { "epoch": 14.15, "learning_rate": 0.0009538516151934684, "loss": 0.1266, "step": 22150 }, { "epoch": 14.16, "learning_rate": 0.0009537806176783813, "loss": 0.1598, "step": 22160 }, { "epoch": 14.17, "learning_rate": 0.0009537096201632944, "loss": 0.1071, "step": 22170 }, { "epoch": 14.17, "learning_rate": 0.0009536386226482073, "loss": 0.1529, "step": 22180 }, { "epoch": 14.18, "learning_rate": 0.0009535676251331203, "loss": 0.1626, "step": 22190 }, { "epoch": 14.19, "learning_rate": 0.0009534966276180334, "loss": 0.0999, "step": 22200 }, { "epoch": 14.19, "learning_rate": 0.0009534256301029463, "loss": 0.1244, "step": 22210 }, { "epoch": 14.2, "learning_rate": 0.0009533546325878595, "loss": 0.1518, "step": 22220 }, { "epoch": 14.2, "learning_rate": 0.0009532836350727724, "loss": 0.1589, "step": 22230 }, { "epoch": 14.21, "learning_rate": 0.0009532126375576855, "loss": 0.128, "step": 22240 }, { "epoch": 14.22, "learning_rate": 0.0009531416400425985, "loss": 0.155, "step": 22250 }, { "epoch": 14.22, "learning_rate": 0.0009530706425275115, "loss": 0.1533, "step": 22260 }, { "epoch": 14.23, "learning_rate": 0.0009529996450124245, "loss": 0.1312, "step": 22270 }, { "epoch": 14.24, "learning_rate": 0.0009529286474973377, "loss": 0.1448, "step": 22280 }, { "epoch": 14.24, "learning_rate": 0.0009528576499822506, "loss": 0.1474, "step": 22290 }, { "epoch": 14.25, "learning_rate": 0.0009527866524671637, "loss": 0.1018, "step": 22300 }, { "epoch": 14.26, "learning_rate": 0.0009527156549520767, "loss": 0.1606, "step": 22310 }, { "epoch": 14.26, "learning_rate": 0.0009526446574369897, "loss": 0.1233, "step": 22320 }, { "epoch": 14.27, "learning_rate": 0.0009525736599219027, "loss": 0.1554, "step": 22330 }, { "epoch": 14.27, "learning_rate": 0.0009525026624068159, "loss": 0.1245, "step": 22340 }, { "epoch": 14.28, "learning_rate": 0.0009524316648917288, "loss": 0.1178, "step": 22350 }, { "epoch": 14.29, "learning_rate": 0.0009523606673766419, "loss": 0.1328, "step": 22360 }, { "epoch": 14.29, "learning_rate": 0.0009522896698615548, "loss": 0.1562, "step": 22370 }, { "epoch": 14.3, "learning_rate": 0.0009522186723464679, "loss": 0.1414, "step": 22380 }, { "epoch": 14.31, "learning_rate": 0.0009521476748313809, "loss": 0.1537, "step": 22390 }, { "epoch": 14.31, "learning_rate": 0.000952076677316294, "loss": 0.1343, "step": 22400 }, { "epoch": 14.32, "learning_rate": 0.000952005679801207, "loss": 0.1536, "step": 22410 }, { "epoch": 14.33, "learning_rate": 0.0009519346822861201, "loss": 0.1516, "step": 22420 }, { "epoch": 14.33, "learning_rate": 0.000951863684771033, "loss": 0.1512, "step": 22430 }, { "epoch": 14.34, "learning_rate": 0.0009517926872559461, "loss": 0.1475, "step": 22440 }, { "epoch": 14.34, "learning_rate": 0.000951721689740859, "loss": 0.1431, "step": 22450 }, { "epoch": 14.35, "learning_rate": 0.0009516506922257722, "loss": 0.1415, "step": 22460 }, { "epoch": 14.36, "learning_rate": 0.0009515796947106852, "loss": 0.1334, "step": 22470 }, { "epoch": 14.36, "learning_rate": 0.0009515086971955982, "loss": 0.1613, "step": 22480 }, { "epoch": 14.37, "learning_rate": 0.0009514376996805112, "loss": 0.1442, "step": 22490 }, { "epoch": 14.38, "learning_rate": 0.0009513667021654242, "loss": 0.1368, "step": 22500 }, { "epoch": 14.38, "learning_rate": 0.0009512957046503372, "loss": 0.1551, "step": 22510 }, { "epoch": 14.39, "learning_rate": 0.0009512247071352503, "loss": 0.1346, "step": 22520 }, { "epoch": 14.4, "learning_rate": 0.0009511537096201633, "loss": 0.1497, "step": 22530 }, { "epoch": 14.4, "learning_rate": 0.0009510827121050763, "loss": 0.1537, "step": 22540 }, { "epoch": 14.41, "learning_rate": 0.0009510117145899894, "loss": 0.1407, "step": 22550 }, { "epoch": 14.42, "learning_rate": 0.0009509407170749023, "loss": 0.1513, "step": 22560 }, { "epoch": 14.42, "learning_rate": 0.0009508697195598154, "loss": 0.1479, "step": 22570 }, { "epoch": 14.43, "learning_rate": 0.0009507987220447285, "loss": 0.1532, "step": 22580 }, { "epoch": 14.43, "learning_rate": 0.0009507277245296415, "loss": 0.1189, "step": 22590 }, { "epoch": 14.44, "learning_rate": 0.0009506567270145545, "loss": 0.1349, "step": 22600 }, { "epoch": 14.45, "learning_rate": 0.0009505857294994676, "loss": 0.1508, "step": 22610 }, { "epoch": 14.45, "learning_rate": 0.0009505147319843805, "loss": 0.1711, "step": 22620 }, { "epoch": 14.46, "learning_rate": 0.0009504437344692936, "loss": 0.1591, "step": 22630 }, { "epoch": 14.47, "learning_rate": 0.0009503727369542066, "loss": 0.1572, "step": 22640 }, { "epoch": 14.47, "learning_rate": 0.0009503017394391197, "loss": 0.1153, "step": 22650 }, { "epoch": 14.48, "learning_rate": 0.0009502307419240327, "loss": 0.1261, "step": 22660 }, { "epoch": 14.49, "learning_rate": 0.0009501597444089457, "loss": 0.1392, "step": 22670 }, { "epoch": 14.49, "learning_rate": 0.0009500887468938587, "loss": 0.1275, "step": 22680 }, { "epoch": 14.5, "learning_rate": 0.0009500177493787718, "loss": 0.1472, "step": 22690 }, { "epoch": 14.5, "learning_rate": 0.0009499467518636848, "loss": 0.1568, "step": 22700 }, { "epoch": 14.51, "learning_rate": 0.0009498757543485979, "loss": 0.1386, "step": 22710 }, { "epoch": 14.52, "learning_rate": 0.0009498047568335108, "loss": 0.1265, "step": 22720 }, { "epoch": 14.52, "learning_rate": 0.0009497337593184239, "loss": 0.1428, "step": 22730 }, { "epoch": 14.53, "learning_rate": 0.0009496627618033369, "loss": 0.175, "step": 22740 }, { "epoch": 14.54, "learning_rate": 0.0009495917642882499, "loss": 0.1614, "step": 22750 }, { "epoch": 14.54, "learning_rate": 0.000949520766773163, "loss": 0.1728, "step": 22760 }, { "epoch": 14.55, "learning_rate": 0.0009494497692580761, "loss": 0.1589, "step": 22770 }, { "epoch": 14.56, "learning_rate": 0.000949378771742989, "loss": 0.1652, "step": 22780 }, { "epoch": 14.56, "learning_rate": 0.0009493077742279021, "loss": 0.1545, "step": 22790 }, { "epoch": 14.57, "learning_rate": 0.000949236776712815, "loss": 0.1602, "step": 22800 }, { "epoch": 14.57, "learning_rate": 0.000949165779197728, "loss": 0.1615, "step": 22810 }, { "epoch": 14.58, "learning_rate": 0.0009490947816826411, "loss": 0.167, "step": 22820 }, { "epoch": 14.59, "learning_rate": 0.0009490237841675541, "loss": 0.145, "step": 22830 }, { "epoch": 14.59, "learning_rate": 0.0009489527866524672, "loss": 0.15, "step": 22840 }, { "epoch": 14.6, "learning_rate": 0.0009488817891373802, "loss": 0.141, "step": 22850 }, { "epoch": 14.61, "learning_rate": 0.0009488107916222932, "loss": 0.1568, "step": 22860 }, { "epoch": 14.61, "learning_rate": 0.0009487397941072062, "loss": 0.1405, "step": 22870 }, { "epoch": 14.62, "learning_rate": 0.0009486687965921193, "loss": 0.1496, "step": 22880 }, { "epoch": 14.63, "learning_rate": 0.0009485977990770323, "loss": 0.1532, "step": 22890 }, { "epoch": 14.63, "learning_rate": 0.0009485268015619454, "loss": 0.1574, "step": 22900 }, { "epoch": 14.64, "learning_rate": 0.0009484558040468583, "loss": 0.144, "step": 22910 }, { "epoch": 14.65, "learning_rate": 0.0009483848065317714, "loss": 0.1783, "step": 22920 }, { "epoch": 14.65, "learning_rate": 0.0009483138090166844, "loss": 0.1661, "step": 22930 }, { "epoch": 14.66, "learning_rate": 0.0009482428115015974, "loss": 0.142, "step": 22940 }, { "epoch": 14.66, "learning_rate": 0.0009481718139865105, "loss": 0.1407, "step": 22950 }, { "epoch": 14.67, "learning_rate": 0.0009481008164714236, "loss": 0.146, "step": 22960 }, { "epoch": 14.68, "learning_rate": 0.0009480298189563365, "loss": 0.1423, "step": 22970 }, { "epoch": 14.68, "learning_rate": 0.0009479588214412496, "loss": 0.1735, "step": 22980 }, { "epoch": 14.69, "learning_rate": 0.0009478878239261626, "loss": 0.1355, "step": 22990 }, { "epoch": 14.7, "learning_rate": 0.0009478168264110756, "loss": 0.1344, "step": 23000 }, { "epoch": 14.7, "learning_rate": 0.0009477458288959887, "loss": 0.1427, "step": 23010 }, { "epoch": 14.71, "learning_rate": 0.0009476748313809018, "loss": 0.1657, "step": 23020 }, { "epoch": 14.72, "learning_rate": 0.0009476038338658147, "loss": 0.1527, "step": 23030 }, { "epoch": 14.72, "learning_rate": 0.0009475328363507278, "loss": 0.1503, "step": 23040 }, { "epoch": 14.73, "learning_rate": 0.0009474618388356407, "loss": 0.1619, "step": 23050 }, { "epoch": 14.73, "learning_rate": 0.0009473908413205538, "loss": 0.1645, "step": 23060 }, { "epoch": 14.74, "learning_rate": 0.0009473198438054669, "loss": 0.1143, "step": 23070 }, { "epoch": 14.75, "learning_rate": 0.0009472488462903799, "loss": 0.1702, "step": 23080 }, { "epoch": 14.75, "learning_rate": 0.0009471778487752929, "loss": 0.1313, "step": 23090 }, { "epoch": 14.76, "learning_rate": 0.000947106851260206, "loss": 0.1716, "step": 23100 }, { "epoch": 14.77, "learning_rate": 0.0009470358537451189, "loss": 0.1421, "step": 23110 }, { "epoch": 14.77, "learning_rate": 0.0009469648562300319, "loss": 0.1407, "step": 23120 }, { "epoch": 14.78, "learning_rate": 0.000946893858714945, "loss": 0.1345, "step": 23130 }, { "epoch": 14.79, "learning_rate": 0.000946822861199858, "loss": 0.162, "step": 23140 }, { "epoch": 14.79, "learning_rate": 0.0009467518636847711, "loss": 0.1601, "step": 23150 }, { "epoch": 14.8, "learning_rate": 0.000946680866169684, "loss": 0.1415, "step": 23160 }, { "epoch": 14.8, "learning_rate": 0.0009466098686545971, "loss": 0.182, "step": 23170 }, { "epoch": 14.81, "learning_rate": 0.00094653887113951, "loss": 0.1519, "step": 23180 }, { "epoch": 14.82, "learning_rate": 0.0009464678736244232, "loss": 0.1856, "step": 23190 }, { "epoch": 14.82, "learning_rate": 0.0009463968761093362, "loss": 0.1641, "step": 23200 }, { "epoch": 14.83, "learning_rate": 0.0009463258785942493, "loss": 0.1401, "step": 23210 }, { "epoch": 14.84, "learning_rate": 0.0009462548810791622, "loss": 0.1536, "step": 23220 }, { "epoch": 14.84, "learning_rate": 0.0009461838835640753, "loss": 0.1346, "step": 23230 }, { "epoch": 14.85, "learning_rate": 0.0009461128860489882, "loss": 0.1536, "step": 23240 }, { "epoch": 14.86, "learning_rate": 0.0009460418885339014, "loss": 0.1416, "step": 23250 }, { "epoch": 14.86, "learning_rate": 0.0009459708910188144, "loss": 0.1659, "step": 23260 }, { "epoch": 14.87, "learning_rate": 0.0009458998935037274, "loss": 0.1343, "step": 23270 }, { "epoch": 14.88, "learning_rate": 0.0009458288959886404, "loss": 0.1592, "step": 23280 }, { "epoch": 14.88, "learning_rate": 0.0009457578984735535, "loss": 0.1342, "step": 23290 }, { "epoch": 14.89, "learning_rate": 0.0009456869009584664, "loss": 0.1514, "step": 23300 }, { "epoch": 14.89, "learning_rate": 0.0009456159034433796, "loss": 0.1519, "step": 23310 }, { "epoch": 14.9, "learning_rate": 0.0009455449059282925, "loss": 0.1524, "step": 23320 }, { "epoch": 14.91, "learning_rate": 0.0009454739084132056, "loss": 0.1608, "step": 23330 }, { "epoch": 14.91, "learning_rate": 0.0009454029108981186, "loss": 0.1324, "step": 23340 }, { "epoch": 14.92, "learning_rate": 0.0009453319133830316, "loss": 0.1542, "step": 23350 }, { "epoch": 14.93, "learning_rate": 0.0009452609158679446, "loss": 0.1339, "step": 23360 }, { "epoch": 14.93, "learning_rate": 0.0009451899183528578, "loss": 0.1496, "step": 23370 }, { "epoch": 14.94, "learning_rate": 0.0009451189208377707, "loss": 0.1244, "step": 23380 }, { "epoch": 14.95, "learning_rate": 0.0009450479233226838, "loss": 0.1439, "step": 23390 }, { "epoch": 14.95, "learning_rate": 0.0009449769258075968, "loss": 0.1165, "step": 23400 }, { "epoch": 14.96, "learning_rate": 0.0009449059282925098, "loss": 0.1267, "step": 23410 }, { "epoch": 14.96, "learning_rate": 0.0009448349307774228, "loss": 0.1383, "step": 23420 }, { "epoch": 14.97, "learning_rate": 0.0009447639332623357, "loss": 0.1468, "step": 23430 }, { "epoch": 14.98, "learning_rate": 0.0009446929357472489, "loss": 0.1508, "step": 23440 }, { "epoch": 14.98, "learning_rate": 0.0009446219382321619, "loss": 0.1566, "step": 23450 }, { "epoch": 14.99, "learning_rate": 0.0009445509407170749, "loss": 0.1399, "step": 23460 }, { "epoch": 15.0, "learning_rate": 0.0009444799432019879, "loss": 0.1656, "step": 23470 }, { "epoch": 15.0, "learning_rate": 0.000944408945686901, "loss": 0.1503, "step": 23480 }, { "epoch": 15.01, "learning_rate": 0.0009443379481718139, "loss": 0.1385, "step": 23490 }, { "epoch": 15.02, "learning_rate": 0.0009442669506567271, "loss": 0.1249, "step": 23500 }, { "epoch": 15.02, "learning_rate": 0.00094419595314164, "loss": 0.1565, "step": 23510 }, { "epoch": 15.03, "learning_rate": 0.0009441249556265531, "loss": 0.1236, "step": 23520 }, { "epoch": 15.04, "learning_rate": 0.0009440539581114661, "loss": 0.1381, "step": 23530 }, { "epoch": 15.04, "learning_rate": 0.0009439829605963791, "loss": 0.1496, "step": 23540 }, { "epoch": 15.05, "learning_rate": 0.0009439119630812921, "loss": 0.1309, "step": 23550 }, { "epoch": 15.05, "learning_rate": 0.0009438409655662053, "loss": 0.1029, "step": 23560 }, { "epoch": 15.06, "learning_rate": 0.0009437699680511182, "loss": 0.1542, "step": 23570 }, { "epoch": 15.07, "learning_rate": 0.0009436989705360313, "loss": 0.1186, "step": 23580 }, { "epoch": 15.07, "learning_rate": 0.0009436279730209442, "loss": 0.1395, "step": 23590 }, { "epoch": 15.08, "learning_rate": 0.0009435569755058573, "loss": 0.1368, "step": 23600 }, { "epoch": 15.09, "learning_rate": 0.0009434859779907703, "loss": 0.1187, "step": 23610 }, { "epoch": 15.09, "learning_rate": 0.0009434149804756834, "loss": 0.1232, "step": 23620 }, { "epoch": 15.1, "learning_rate": 0.0009433439829605964, "loss": 0.1449, "step": 23630 }, { "epoch": 15.11, "learning_rate": 0.0009432729854455095, "loss": 0.1134, "step": 23640 }, { "epoch": 15.11, "learning_rate": 0.0009432019879304224, "loss": 0.1189, "step": 23650 }, { "epoch": 15.12, "learning_rate": 0.0009431309904153355, "loss": 0.1534, "step": 23660 }, { "epoch": 15.12, "learning_rate": 0.0009430599929002485, "loss": 0.1376, "step": 23670 }, { "epoch": 15.13, "learning_rate": 0.0009429889953851616, "loss": 0.1442, "step": 23680 }, { "epoch": 15.14, "learning_rate": 0.0009429179978700746, "loss": 0.1275, "step": 23690 }, { "epoch": 15.14, "learning_rate": 0.0009428470003549877, "loss": 0.1406, "step": 23700 }, { "epoch": 15.15, "learning_rate": 0.0009427760028399006, "loss": 0.1367, "step": 23710 }, { "epoch": 15.16, "learning_rate": 0.0009427050053248137, "loss": 0.149, "step": 23720 }, { "epoch": 15.16, "learning_rate": 0.0009426340078097266, "loss": 0.1376, "step": 23730 }, { "epoch": 15.17, "learning_rate": 0.0009425630102946398, "loss": 0.1431, "step": 23740 }, { "epoch": 15.18, "learning_rate": 0.0009424920127795528, "loss": 0.1326, "step": 23750 }, { "epoch": 15.18, "learning_rate": 0.0009424210152644657, "loss": 0.1056, "step": 23760 }, { "epoch": 15.19, "learning_rate": 0.0009423500177493788, "loss": 0.1506, "step": 23770 }, { "epoch": 15.19, "learning_rate": 0.0009422790202342917, "loss": 0.153, "step": 23780 }, { "epoch": 15.2, "learning_rate": 0.0009422080227192048, "loss": 0.1368, "step": 23790 }, { "epoch": 15.21, "learning_rate": 0.0009421370252041179, "loss": 0.1508, "step": 23800 }, { "epoch": 15.21, "learning_rate": 0.000942066027689031, "loss": 0.1398, "step": 23810 }, { "epoch": 15.22, "learning_rate": 0.0009419950301739439, "loss": 0.1396, "step": 23820 }, { "epoch": 15.23, "learning_rate": 0.000941924032658857, "loss": 0.1394, "step": 23830 }, { "epoch": 15.23, "learning_rate": 0.0009418530351437699, "loss": 0.1578, "step": 23840 }, { "epoch": 15.24, "learning_rate": 0.000941782037628683, "loss": 0.1626, "step": 23850 }, { "epoch": 15.25, "learning_rate": 0.0009417110401135961, "loss": 0.1428, "step": 23860 }, { "epoch": 15.25, "learning_rate": 0.0009416400425985091, "loss": 0.1478, "step": 23870 }, { "epoch": 15.26, "learning_rate": 0.0009415690450834221, "loss": 0.1482, "step": 23880 }, { "epoch": 15.27, "learning_rate": 0.0009414980475683352, "loss": 0.1329, "step": 23890 }, { "epoch": 15.27, "learning_rate": 0.0009414270500532481, "loss": 0.1287, "step": 23900 }, { "epoch": 15.28, "learning_rate": 0.0009413560525381612, "loss": 0.1443, "step": 23910 }, { "epoch": 15.28, "learning_rate": 0.0009412850550230742, "loss": 0.1398, "step": 23920 }, { "epoch": 15.29, "learning_rate": 0.0009412140575079873, "loss": 0.1211, "step": 23930 }, { "epoch": 15.3, "learning_rate": 0.0009411430599929003, "loss": 0.137, "step": 23940 }, { "epoch": 15.3, "learning_rate": 0.0009410720624778133, "loss": 0.1539, "step": 23950 }, { "epoch": 15.31, "learning_rate": 0.0009410010649627263, "loss": 0.1305, "step": 23960 }, { "epoch": 15.32, "learning_rate": 0.0009409300674476394, "loss": 0.1501, "step": 23970 }, { "epoch": 15.32, "learning_rate": 0.0009408590699325523, "loss": 0.1517, "step": 23980 }, { "epoch": 15.33, "learning_rate": 0.0009407880724174655, "loss": 0.1436, "step": 23990 }, { "epoch": 15.34, "learning_rate": 0.0009407170749023784, "loss": 0.1175, "step": 24000 }, { "epoch": 15.34, "learning_rate": 0.0009406460773872915, "loss": 0.1333, "step": 24010 }, { "epoch": 15.35, "learning_rate": 0.0009405750798722045, "loss": 0.1091, "step": 24020 }, { "epoch": 15.35, "learning_rate": 0.0009405040823571175, "loss": 0.143, "step": 24030 }, { "epoch": 15.36, "learning_rate": 0.0009404330848420305, "loss": 0.1213, "step": 24040 }, { "epoch": 15.37, "learning_rate": 0.0009403620873269437, "loss": 0.1295, "step": 24050 }, { "epoch": 15.37, "learning_rate": 0.0009402910898118566, "loss": 0.1312, "step": 24060 }, { "epoch": 15.38, "learning_rate": 0.0009402200922967696, "loss": 0.1402, "step": 24070 }, { "epoch": 15.39, "learning_rate": 0.0009401490947816827, "loss": 0.1299, "step": 24080 }, { "epoch": 15.39, "learning_rate": 0.0009400780972665956, "loss": 0.1399, "step": 24090 }, { "epoch": 15.4, "learning_rate": 0.0009400070997515087, "loss": 0.1339, "step": 24100 }, { "epoch": 15.41, "learning_rate": 0.0009399361022364217, "loss": 0.1145, "step": 24110 }, { "epoch": 15.41, "learning_rate": 0.0009398651047213348, "loss": 0.1567, "step": 24120 }, { "epoch": 15.42, "learning_rate": 0.0009397941072062478, "loss": 0.1406, "step": 24130 }, { "epoch": 15.42, "learning_rate": 0.0009397231096911608, "loss": 0.1522, "step": 24140 }, { "epoch": 15.43, "learning_rate": 0.0009396521121760738, "loss": 0.1274, "step": 24150 }, { "epoch": 15.44, "learning_rate": 0.0009395811146609869, "loss": 0.1309, "step": 24160 }, { "epoch": 15.44, "learning_rate": 0.0009395101171458999, "loss": 0.1333, "step": 24170 }, { "epoch": 15.45, "learning_rate": 0.000939439119630813, "loss": 0.1439, "step": 24180 }, { "epoch": 15.46, "learning_rate": 0.000939368122115726, "loss": 0.1288, "step": 24190 }, { "epoch": 15.46, "learning_rate": 0.000939297124600639, "loss": 0.1154, "step": 24200 }, { "epoch": 15.47, "learning_rate": 0.000939226127085552, "loss": 0.1208, "step": 24210 }, { "epoch": 15.48, "learning_rate": 0.000939155129570465, "loss": 0.1275, "step": 24220 }, { "epoch": 15.48, "learning_rate": 0.0009390841320553781, "loss": 0.136, "step": 24230 }, { "epoch": 15.49, "learning_rate": 0.0009390131345402912, "loss": 0.1182, "step": 24240 }, { "epoch": 15.5, "learning_rate": 0.0009389421370252041, "loss": 0.136, "step": 24250 }, { "epoch": 15.5, "learning_rate": 0.0009388711395101172, "loss": 0.1294, "step": 24260 }, { "epoch": 15.51, "learning_rate": 0.0009388001419950302, "loss": 0.1553, "step": 24270 }, { "epoch": 15.51, "learning_rate": 0.0009387291444799432, "loss": 0.1494, "step": 24280 }, { "epoch": 15.52, "learning_rate": 0.0009386581469648563, "loss": 0.1119, "step": 24290 }, { "epoch": 15.53, "learning_rate": 0.0009385871494497694, "loss": 0.1552, "step": 24300 }, { "epoch": 15.53, "learning_rate": 0.0009385161519346823, "loss": 0.1289, "step": 24310 }, { "epoch": 15.54, "learning_rate": 0.0009384451544195954, "loss": 0.1435, "step": 24320 }, { "epoch": 15.55, "learning_rate": 0.0009383741569045083, "loss": 0.1491, "step": 24330 }, { "epoch": 15.55, "learning_rate": 0.0009383031593894214, "loss": 0.1536, "step": 24340 }, { "epoch": 15.56, "learning_rate": 0.0009382321618743345, "loss": 0.1418, "step": 24350 }, { "epoch": 15.57, "learning_rate": 0.0009381611643592475, "loss": 0.1373, "step": 24360 }, { "epoch": 15.57, "learning_rate": 0.0009380901668441605, "loss": 0.1431, "step": 24370 }, { "epoch": 15.58, "learning_rate": 0.0009380191693290734, "loss": 0.1286, "step": 24380 }, { "epoch": 15.58, "learning_rate": 0.0009379481718139865, "loss": 0.1437, "step": 24390 }, { "epoch": 15.59, "learning_rate": 0.0009378771742988995, "loss": 0.184, "step": 24400 }, { "epoch": 15.6, "learning_rate": 0.0009378061767838126, "loss": 0.1034, "step": 24410 }, { "epoch": 15.6, "learning_rate": 0.0009377351792687256, "loss": 0.1458, "step": 24420 }, { "epoch": 15.61, "learning_rate": 0.0009376641817536387, "loss": 0.1409, "step": 24430 }, { "epoch": 15.62, "learning_rate": 0.0009375931842385516, "loss": 0.1224, "step": 24440 }, { "epoch": 15.62, "learning_rate": 0.0009375221867234647, "loss": 0.1531, "step": 24450 }, { "epoch": 15.63, "learning_rate": 0.0009374511892083777, "loss": 0.1554, "step": 24460 }, { "epoch": 15.64, "learning_rate": 0.0009373801916932908, "loss": 0.145, "step": 24470 }, { "epoch": 15.64, "learning_rate": 0.0009373091941782038, "loss": 0.1375, "step": 24480 }, { "epoch": 15.65, "learning_rate": 0.0009372381966631168, "loss": 0.1538, "step": 24490 }, { "epoch": 15.65, "learning_rate": 0.0009371671991480298, "loss": 0.1313, "step": 24500 }, { "epoch": 15.66, "learning_rate": 0.0009370962016329429, "loss": 0.1241, "step": 24510 }, { "epoch": 15.67, "learning_rate": 0.0009370252041178558, "loss": 0.1408, "step": 24520 }, { "epoch": 15.67, "learning_rate": 0.000936954206602769, "loss": 0.1285, "step": 24530 }, { "epoch": 15.68, "learning_rate": 0.000936883209087682, "loss": 0.1444, "step": 24540 }, { "epoch": 15.69, "learning_rate": 0.000936812211572595, "loss": 0.1534, "step": 24550 }, { "epoch": 15.69, "learning_rate": 0.000936741214057508, "loss": 0.1407, "step": 24560 }, { "epoch": 15.7, "learning_rate": 0.000936670216542421, "loss": 0.1189, "step": 24570 }, { "epoch": 15.71, "learning_rate": 0.000936599219027334, "loss": 0.1567, "step": 24580 }, { "epoch": 15.71, "learning_rate": 0.0009365282215122471, "loss": 0.149, "step": 24590 }, { "epoch": 15.72, "learning_rate": 0.0009364572239971601, "loss": 0.1743, "step": 24600 }, { "epoch": 15.73, "learning_rate": 0.0009363862264820732, "loss": 0.158, "step": 24610 }, { "epoch": 15.73, "learning_rate": 0.0009363152289669862, "loss": 0.1391, "step": 24620 }, { "epoch": 15.74, "learning_rate": 0.0009362442314518992, "loss": 0.147, "step": 24630 }, { "epoch": 15.74, "learning_rate": 0.0009361732339368122, "loss": 0.1432, "step": 24640 }, { "epoch": 15.75, "learning_rate": 0.0009361022364217253, "loss": 0.1382, "step": 24650 }, { "epoch": 15.76, "learning_rate": 0.0009360312389066383, "loss": 0.14, "step": 24660 }, { "epoch": 15.76, "learning_rate": 0.0009359602413915513, "loss": 0.1489, "step": 24670 }, { "epoch": 15.77, "learning_rate": 0.0009358892438764643, "loss": 0.1443, "step": 24680 }, { "epoch": 15.78, "learning_rate": 0.0009358182463613773, "loss": 0.1329, "step": 24690 }, { "epoch": 15.78, "learning_rate": 0.0009357472488462904, "loss": 0.1445, "step": 24700 }, { "epoch": 15.79, "learning_rate": 0.0009356762513312033, "loss": 0.159, "step": 24710 }, { "epoch": 15.8, "learning_rate": 0.0009356052538161165, "loss": 0.1628, "step": 24720 }, { "epoch": 15.8, "learning_rate": 0.0009355342563010295, "loss": 0.1636, "step": 24730 }, { "epoch": 15.81, "learning_rate": 0.0009354632587859425, "loss": 0.1332, "step": 24740 }, { "epoch": 15.81, "learning_rate": 0.0009353922612708555, "loss": 0.1685, "step": 24750 }, { "epoch": 15.82, "learning_rate": 0.0009353212637557686, "loss": 0.1439, "step": 24760 }, { "epoch": 15.83, "learning_rate": 0.0009352502662406815, "loss": 0.1545, "step": 24770 }, { "epoch": 15.83, "learning_rate": 0.0009351792687255947, "loss": 0.173, "step": 24780 }, { "epoch": 15.84, "learning_rate": 0.0009351082712105076, "loss": 0.1309, "step": 24790 }, { "epoch": 15.85, "learning_rate": 0.0009350372736954207, "loss": 0.1452, "step": 24800 }, { "epoch": 15.85, "learning_rate": 0.0009349662761803337, "loss": 0.1527, "step": 24810 }, { "epoch": 15.86, "learning_rate": 0.0009348952786652467, "loss": 0.1403, "step": 24820 }, { "epoch": 15.87, "learning_rate": 0.0009348242811501597, "loss": 0.1439, "step": 24830 }, { "epoch": 15.87, "learning_rate": 0.0009347532836350729, "loss": 0.1238, "step": 24840 }, { "epoch": 15.88, "learning_rate": 0.0009346822861199858, "loss": 0.1268, "step": 24850 }, { "epoch": 15.88, "learning_rate": 0.0009346112886048989, "loss": 0.1617, "step": 24860 }, { "epoch": 15.89, "learning_rate": 0.0009345402910898118, "loss": 0.1181, "step": 24870 }, { "epoch": 15.9, "learning_rate": 0.0009344692935747249, "loss": 0.1561, "step": 24880 }, { "epoch": 15.9, "learning_rate": 0.0009343982960596379, "loss": 0.1353, "step": 24890 }, { "epoch": 15.91, "learning_rate": 0.000934327298544551, "loss": 0.1284, "step": 24900 }, { "epoch": 15.92, "learning_rate": 0.000934256301029464, "loss": 0.1271, "step": 24910 }, { "epoch": 15.92, "learning_rate": 0.0009341853035143771, "loss": 0.1071, "step": 24920 }, { "epoch": 15.93, "learning_rate": 0.00093411430599929, "loss": 0.1475, "step": 24930 }, { "epoch": 15.94, "learning_rate": 0.0009340433084842031, "loss": 0.1313, "step": 24940 }, { "epoch": 15.94, "learning_rate": 0.000933972310969116, "loss": 0.1228, "step": 24950 }, { "epoch": 15.95, "learning_rate": 0.0009339013134540292, "loss": 0.137, "step": 24960 }, { "epoch": 15.95, "learning_rate": 0.0009338303159389422, "loss": 0.1521, "step": 24970 }, { "epoch": 15.96, "learning_rate": 0.0009337593184238551, "loss": 0.1228, "step": 24980 }, { "epoch": 15.97, "learning_rate": 0.0009336883209087682, "loss": 0.1769, "step": 24990 }, { "epoch": 15.97, "learning_rate": 0.0009336173233936812, "loss": 0.1282, "step": 25000 }, { "epoch": 15.98, "learning_rate": 0.0009335463258785942, "loss": 0.1443, "step": 25010 }, { "epoch": 15.99, "learning_rate": 0.0009334753283635073, "loss": 0.1371, "step": 25020 }, { "epoch": 15.99, "learning_rate": 0.0009334043308484204, "loss": 0.1388, "step": 25030 }, { "epoch": 16.0, "learning_rate": 0.0009333333333333333, "loss": 0.1664, "step": 25040 }, { "epoch": 16.01, "learning_rate": 0.0009332623358182464, "loss": 0.1286, "step": 25050 }, { "epoch": 16.01, "learning_rate": 0.0009331913383031593, "loss": 0.1375, "step": 25060 }, { "epoch": 16.02, "learning_rate": 0.0009331203407880724, "loss": 0.1182, "step": 25070 }, { "epoch": 16.03, "learning_rate": 0.0009330493432729855, "loss": 0.1052, "step": 25080 }, { "epoch": 16.03, "learning_rate": 0.0009329783457578985, "loss": 0.1171, "step": 25090 }, { "epoch": 16.04, "learning_rate": 0.0009329073482428115, "loss": 0.1204, "step": 25100 }, { "epoch": 16.04, "learning_rate": 0.0009328363507277246, "loss": 0.1288, "step": 25110 }, { "epoch": 16.05, "learning_rate": 0.0009327653532126375, "loss": 0.1363, "step": 25120 }, { "epoch": 16.06, "learning_rate": 0.0009326943556975506, "loss": 0.1242, "step": 25130 }, { "epoch": 16.06, "learning_rate": 0.0009326233581824637, "loss": 0.0983, "step": 25140 }, { "epoch": 16.07, "learning_rate": 0.0009325523606673767, "loss": 0.1232, "step": 25150 }, { "epoch": 16.08, "learning_rate": 0.0009324813631522897, "loss": 0.133, "step": 25160 }, { "epoch": 16.08, "learning_rate": 0.0009324103656372028, "loss": 0.1143, "step": 25170 }, { "epoch": 16.09, "learning_rate": 0.0009323393681221157, "loss": 0.118, "step": 25180 }, { "epoch": 16.1, "learning_rate": 0.0009322683706070288, "loss": 0.1199, "step": 25190 }, { "epoch": 16.1, "learning_rate": 0.0009321973730919417, "loss": 0.1186, "step": 25200 }, { "epoch": 16.11, "learning_rate": 0.0009321263755768549, "loss": 0.1374, "step": 25210 }, { "epoch": 16.11, "learning_rate": 0.0009320553780617679, "loss": 0.1354, "step": 25220 }, { "epoch": 16.12, "learning_rate": 0.0009319843805466809, "loss": 0.1587, "step": 25230 }, { "epoch": 16.13, "learning_rate": 0.0009319133830315939, "loss": 0.1165, "step": 25240 }, { "epoch": 16.13, "learning_rate": 0.000931842385516507, "loss": 0.1248, "step": 25250 }, { "epoch": 16.14, "learning_rate": 0.0009317713880014199, "loss": 0.1411, "step": 25260 }, { "epoch": 16.15, "learning_rate": 0.0009317003904863331, "loss": 0.1265, "step": 25270 }, { "epoch": 16.15, "learning_rate": 0.000931629392971246, "loss": 0.1404, "step": 25280 }, { "epoch": 16.16, "learning_rate": 0.000931558395456159, "loss": 0.1166, "step": 25290 }, { "epoch": 16.17, "learning_rate": 0.0009314873979410721, "loss": 0.1257, "step": 25300 }, { "epoch": 16.17, "learning_rate": 0.000931416400425985, "loss": 0.1293, "step": 25310 }, { "epoch": 16.18, "learning_rate": 0.0009313454029108981, "loss": 0.1311, "step": 25320 }, { "epoch": 16.19, "learning_rate": 0.0009312744053958112, "loss": 0.1424, "step": 25330 }, { "epoch": 16.19, "learning_rate": 0.0009312034078807242, "loss": 0.123, "step": 25340 }, { "epoch": 16.2, "learning_rate": 0.0009311324103656372, "loss": 0.1401, "step": 25350 }, { "epoch": 16.2, "learning_rate": 0.0009310614128505503, "loss": 0.1472, "step": 25360 }, { "epoch": 16.21, "learning_rate": 0.0009309904153354632, "loss": 0.1571, "step": 25370 }, { "epoch": 16.22, "learning_rate": 0.0009309194178203763, "loss": 0.1117, "step": 25380 }, { "epoch": 16.22, "learning_rate": 0.0009308484203052893, "loss": 0.1385, "step": 25390 }, { "epoch": 16.23, "learning_rate": 0.0009307774227902024, "loss": 0.1311, "step": 25400 }, { "epoch": 16.24, "learning_rate": 0.0009307064252751154, "loss": 0.122, "step": 25410 }, { "epoch": 16.24, "learning_rate": 0.0009306354277600284, "loss": 0.141, "step": 25420 }, { "epoch": 16.25, "learning_rate": 0.0009305644302449414, "loss": 0.1267, "step": 25430 }, { "epoch": 16.26, "learning_rate": 0.0009304934327298545, "loss": 0.1027, "step": 25440 }, { "epoch": 16.26, "learning_rate": 0.0009304224352147675, "loss": 0.1542, "step": 25450 }, { "epoch": 16.27, "learning_rate": 0.0009303514376996806, "loss": 0.1143, "step": 25460 }, { "epoch": 16.27, "learning_rate": 0.0009302804401845935, "loss": 0.1454, "step": 25470 }, { "epoch": 16.28, "learning_rate": 0.0009302094426695066, "loss": 0.1523, "step": 25480 }, { "epoch": 16.29, "learning_rate": 0.0009301384451544196, "loss": 0.1088, "step": 25490 }, { "epoch": 16.29, "learning_rate": 0.0009300674476393326, "loss": 0.1545, "step": 25500 }, { "epoch": 16.3, "learning_rate": 0.0009299964501242457, "loss": 0.1418, "step": 25510 }, { "epoch": 16.31, "learning_rate": 0.0009299254526091588, "loss": 0.1418, "step": 25520 }, { "epoch": 16.31, "learning_rate": 0.0009298544550940717, "loss": 0.1424, "step": 25530 }, { "epoch": 16.32, "learning_rate": 0.0009297834575789848, "loss": 0.1374, "step": 25540 }, { "epoch": 16.33, "learning_rate": 0.0009297124600638978, "loss": 0.148, "step": 25550 }, { "epoch": 16.33, "learning_rate": 0.0009296414625488108, "loss": 0.1299, "step": 25560 }, { "epoch": 16.34, "learning_rate": 0.0009295704650337239, "loss": 0.1407, "step": 25570 }, { "epoch": 16.34, "learning_rate": 0.000929499467518637, "loss": 0.1308, "step": 25580 }, { "epoch": 16.35, "learning_rate": 0.0009294284700035499, "loss": 0.1344, "step": 25590 }, { "epoch": 16.36, "learning_rate": 0.0009293574724884629, "loss": 0.1084, "step": 25600 }, { "epoch": 16.36, "learning_rate": 0.0009292864749733759, "loss": 0.1325, "step": 25610 }, { "epoch": 16.37, "learning_rate": 0.0009292154774582889, "loss": 0.133, "step": 25620 }, { "epoch": 16.38, "learning_rate": 0.0009291444799432021, "loss": 0.107, "step": 25630 }, { "epoch": 16.38, "learning_rate": 0.000929073482428115, "loss": 0.1486, "step": 25640 }, { "epoch": 16.39, "learning_rate": 0.0009290024849130281, "loss": 0.1332, "step": 25650 }, { "epoch": 16.4, "learning_rate": 0.000928931487397941, "loss": 0.1404, "step": 25660 }, { "epoch": 16.4, "learning_rate": 0.0009288604898828541, "loss": 0.1448, "step": 25670 }, { "epoch": 16.41, "learning_rate": 0.0009287894923677671, "loss": 0.1351, "step": 25680 }, { "epoch": 16.42, "learning_rate": 0.0009287184948526802, "loss": 0.1293, "step": 25690 }, { "epoch": 16.42, "learning_rate": 0.0009286474973375932, "loss": 0.1368, "step": 25700 }, { "epoch": 16.43, "learning_rate": 0.0009285764998225063, "loss": 0.1199, "step": 25710 }, { "epoch": 16.43, "learning_rate": 0.0009285055023074192, "loss": 0.1127, "step": 25720 }, { "epoch": 16.44, "learning_rate": 0.0009284345047923323, "loss": 0.146, "step": 25730 }, { "epoch": 16.45, "learning_rate": 0.0009283635072772452, "loss": 0.1567, "step": 25740 }, { "epoch": 16.45, "learning_rate": 0.0009282925097621584, "loss": 0.1227, "step": 25750 }, { "epoch": 16.46, "learning_rate": 0.0009282215122470714, "loss": 0.1116, "step": 25760 }, { "epoch": 16.47, "learning_rate": 0.0009281505147319844, "loss": 0.1299, "step": 25770 }, { "epoch": 16.47, "learning_rate": 0.0009280795172168974, "loss": 0.1208, "step": 25780 }, { "epoch": 16.48, "learning_rate": 0.0009280085197018105, "loss": 0.1362, "step": 25790 }, { "epoch": 16.49, "learning_rate": 0.0009279375221867234, "loss": 0.1251, "step": 25800 }, { "epoch": 16.49, "learning_rate": 0.0009278665246716365, "loss": 0.1411, "step": 25810 }, { "epoch": 16.5, "learning_rate": 0.0009277955271565496, "loss": 0.1121, "step": 25820 }, { "epoch": 16.5, "learning_rate": 0.0009277245296414626, "loss": 0.1616, "step": 25830 }, { "epoch": 16.51, "learning_rate": 0.0009276535321263756, "loss": 0.1454, "step": 25840 }, { "epoch": 16.52, "learning_rate": 0.0009275825346112887, "loss": 0.122, "step": 25850 }, { "epoch": 16.52, "learning_rate": 0.0009275115370962016, "loss": 0.1452, "step": 25860 }, { "epoch": 16.53, "learning_rate": 0.0009274405395811147, "loss": 0.1088, "step": 25870 }, { "epoch": 16.54, "learning_rate": 0.0009273695420660277, "loss": 0.1377, "step": 25880 }, { "epoch": 16.54, "learning_rate": 0.0009272985445509408, "loss": 0.1186, "step": 25890 }, { "epoch": 16.55, "learning_rate": 0.0009272275470358538, "loss": 0.1247, "step": 25900 }, { "epoch": 16.56, "learning_rate": 0.0009271565495207667, "loss": 0.0938, "step": 25910 }, { "epoch": 16.56, "learning_rate": 0.0009270855520056798, "loss": 0.1685, "step": 25920 }, { "epoch": 16.57, "learning_rate": 0.0009270145544905927, "loss": 0.1359, "step": 25930 }, { "epoch": 16.57, "learning_rate": 0.0009269435569755059, "loss": 0.1519, "step": 25940 }, { "epoch": 16.58, "learning_rate": 0.0009268725594604189, "loss": 0.1279, "step": 25950 }, { "epoch": 16.59, "learning_rate": 0.000926801561945332, "loss": 0.1069, "step": 25960 }, { "epoch": 16.59, "learning_rate": 0.0009267305644302449, "loss": 0.1241, "step": 25970 }, { "epoch": 16.6, "learning_rate": 0.000926659566915158, "loss": 0.1594, "step": 25980 }, { "epoch": 16.61, "learning_rate": 0.0009265885694000709, "loss": 0.1272, "step": 25990 }, { "epoch": 16.61, "learning_rate": 0.0009265175718849841, "loss": 0.1267, "step": 26000 }, { "epoch": 16.62, "learning_rate": 0.0009264465743698971, "loss": 0.1224, "step": 26010 }, { "epoch": 16.63, "learning_rate": 0.0009263755768548101, "loss": 0.1352, "step": 26020 }, { "epoch": 16.63, "learning_rate": 0.0009263045793397231, "loss": 0.1276, "step": 26030 }, { "epoch": 16.64, "learning_rate": 0.0009262335818246362, "loss": 0.123, "step": 26040 }, { "epoch": 16.65, "learning_rate": 0.0009261625843095491, "loss": 0.1321, "step": 26050 }, { "epoch": 16.65, "learning_rate": 0.0009260915867944623, "loss": 0.1449, "step": 26060 }, { "epoch": 16.66, "learning_rate": 0.0009260205892793752, "loss": 0.1492, "step": 26070 }, { "epoch": 16.66, "learning_rate": 0.0009259495917642883, "loss": 0.1182, "step": 26080 }, { "epoch": 16.67, "learning_rate": 0.0009258785942492013, "loss": 0.1407, "step": 26090 }, { "epoch": 16.68, "learning_rate": 0.0009258075967341143, "loss": 0.1472, "step": 26100 }, { "epoch": 16.68, "learning_rate": 0.0009257365992190273, "loss": 0.1404, "step": 26110 }, { "epoch": 16.69, "learning_rate": 0.0009256656017039405, "loss": 0.1511, "step": 26120 }, { "epoch": 16.7, "learning_rate": 0.0009255946041888534, "loss": 0.1255, "step": 26130 }, { "epoch": 16.7, "learning_rate": 0.0009255236066737665, "loss": 0.1221, "step": 26140 }, { "epoch": 16.71, "learning_rate": 0.0009254526091586794, "loss": 0.1251, "step": 26150 }, { "epoch": 16.72, "learning_rate": 0.0009253816116435925, "loss": 0.1232, "step": 26160 }, { "epoch": 16.72, "learning_rate": 0.0009253106141285055, "loss": 0.1382, "step": 26170 }, { "epoch": 16.73, "learning_rate": 0.0009252396166134186, "loss": 0.135, "step": 26180 }, { "epoch": 16.73, "learning_rate": 0.0009251686190983316, "loss": 0.1372, "step": 26190 }, { "epoch": 16.74, "learning_rate": 0.0009250976215832447, "loss": 0.1124, "step": 26200 }, { "epoch": 16.75, "learning_rate": 0.0009250266240681576, "loss": 0.1464, "step": 26210 }, { "epoch": 16.75, "learning_rate": 0.0009249556265530706, "loss": 0.1388, "step": 26220 }, { "epoch": 16.76, "learning_rate": 0.0009248846290379837, "loss": 0.1212, "step": 26230 }, { "epoch": 16.77, "learning_rate": 0.0009248136315228967, "loss": 0.1517, "step": 26240 }, { "epoch": 16.77, "learning_rate": 0.0009247426340078098, "loss": 0.1009, "step": 26250 }, { "epoch": 16.78, "learning_rate": 0.0009246716364927227, "loss": 0.1296, "step": 26260 }, { "epoch": 16.79, "learning_rate": 0.0009246006389776358, "loss": 0.1443, "step": 26270 }, { "epoch": 16.79, "learning_rate": 0.0009245296414625488, "loss": 0.1436, "step": 26280 }, { "epoch": 16.8, "learning_rate": 0.0009244586439474618, "loss": 0.1104, "step": 26290 }, { "epoch": 16.8, "learning_rate": 0.0009243876464323749, "loss": 0.1109, "step": 26300 }, { "epoch": 16.81, "learning_rate": 0.000924316648917288, "loss": 0.1459, "step": 26310 }, { "epoch": 16.82, "learning_rate": 0.0009242456514022009, "loss": 0.131, "step": 26320 }, { "epoch": 16.82, "learning_rate": 0.000924174653887114, "loss": 0.1423, "step": 26330 }, { "epoch": 16.83, "learning_rate": 0.000924103656372027, "loss": 0.1342, "step": 26340 }, { "epoch": 16.84, "learning_rate": 0.00092403265885694, "loss": 0.1241, "step": 26350 }, { "epoch": 16.84, "learning_rate": 0.000923961661341853, "loss": 0.1781, "step": 26360 }, { "epoch": 16.85, "learning_rate": 0.0009238906638267661, "loss": 0.116, "step": 26370 }, { "epoch": 16.86, "learning_rate": 0.0009238196663116791, "loss": 0.1799, "step": 26380 }, { "epoch": 16.86, "learning_rate": 0.0009237486687965922, "loss": 0.1317, "step": 26390 }, { "epoch": 16.87, "learning_rate": 0.0009236776712815051, "loss": 0.0983, "step": 26400 }, { "epoch": 16.88, "learning_rate": 0.0009236066737664182, "loss": 0.1497, "step": 26410 }, { "epoch": 16.88, "learning_rate": 0.0009235356762513312, "loss": 0.1571, "step": 26420 }, { "epoch": 16.89, "learning_rate": 0.0009234646787362443, "loss": 0.1443, "step": 26430 }, { "epoch": 16.89, "learning_rate": 0.0009233936812211573, "loss": 0.1457, "step": 26440 }, { "epoch": 16.9, "learning_rate": 0.0009233226837060703, "loss": 0.1255, "step": 26450 }, { "epoch": 16.91, "learning_rate": 0.0009232516861909833, "loss": 0.1181, "step": 26460 }, { "epoch": 16.91, "learning_rate": 0.0009231806886758964, "loss": 0.1382, "step": 26470 }, { "epoch": 16.92, "learning_rate": 0.0009231096911608093, "loss": 0.1318, "step": 26480 }, { "epoch": 16.93, "learning_rate": 0.0009230386936457225, "loss": 0.1427, "step": 26490 }, { "epoch": 16.93, "learning_rate": 0.0009229676961306355, "loss": 0.1359, "step": 26500 }, { "epoch": 16.94, "learning_rate": 0.0009228966986155485, "loss": 0.1099, "step": 26510 }, { "epoch": 16.95, "learning_rate": 0.0009228257011004615, "loss": 0.1713, "step": 26520 }, { "epoch": 16.95, "learning_rate": 0.0009227547035853744, "loss": 0.1392, "step": 26530 }, { "epoch": 16.96, "learning_rate": 0.0009226837060702875, "loss": 0.123, "step": 26540 }, { "epoch": 16.96, "learning_rate": 0.0009226127085552006, "loss": 0.1638, "step": 26550 }, { "epoch": 16.97, "learning_rate": 0.0009225417110401136, "loss": 0.152, "step": 26560 }, { "epoch": 16.98, "learning_rate": 0.0009224707135250266, "loss": 0.1197, "step": 26570 }, { "epoch": 16.98, "learning_rate": 0.0009223997160099397, "loss": 0.1322, "step": 26580 }, { "epoch": 16.99, "learning_rate": 0.0009223287184948526, "loss": 0.1272, "step": 26590 }, { "epoch": 17.0, "learning_rate": 0.0009222577209797657, "loss": 0.1141, "step": 26600 }, { "epoch": 17.0, "learning_rate": 0.0009221867234646788, "loss": 0.122, "step": 26610 }, { "epoch": 17.01, "learning_rate": 0.0009221157259495918, "loss": 0.1188, "step": 26620 }, { "epoch": 17.02, "learning_rate": 0.0009220447284345048, "loss": 0.1244, "step": 26630 }, { "epoch": 17.02, "learning_rate": 0.0009219737309194178, "loss": 0.1568, "step": 26640 }, { "epoch": 17.03, "learning_rate": 0.0009219027334043308, "loss": 0.1023, "step": 26650 }, { "epoch": 17.04, "learning_rate": 0.0009218317358892439, "loss": 0.1231, "step": 26660 }, { "epoch": 17.04, "learning_rate": 0.0009217607383741569, "loss": 0.1095, "step": 26670 }, { "epoch": 17.05, "learning_rate": 0.00092168974085907, "loss": 0.123, "step": 26680 }, { "epoch": 17.05, "learning_rate": 0.000921618743343983, "loss": 0.1071, "step": 26690 }, { "epoch": 17.06, "learning_rate": 0.000921547745828896, "loss": 0.1295, "step": 26700 }, { "epoch": 17.07, "learning_rate": 0.000921476748313809, "loss": 0.1227, "step": 26710 }, { "epoch": 17.07, "learning_rate": 0.000921405750798722, "loss": 0.1109, "step": 26720 }, { "epoch": 17.08, "learning_rate": 0.0009213347532836351, "loss": 0.1039, "step": 26730 }, { "epoch": 17.09, "learning_rate": 0.0009212637557685482, "loss": 0.1291, "step": 26740 }, { "epoch": 17.09, "learning_rate": 0.0009211927582534611, "loss": 0.11, "step": 26750 }, { "epoch": 17.1, "learning_rate": 0.0009211217607383742, "loss": 0.1387, "step": 26760 }, { "epoch": 17.11, "learning_rate": 0.0009210507632232872, "loss": 0.1014, "step": 26770 }, { "epoch": 17.11, "learning_rate": 0.0009209797657082002, "loss": 0.1332, "step": 26780 }, { "epoch": 17.12, "learning_rate": 0.0009209087681931133, "loss": 0.1355, "step": 26790 }, { "epoch": 17.12, "learning_rate": 0.0009208377706780264, "loss": 0.1203, "step": 26800 }, { "epoch": 17.13, "learning_rate": 0.0009207667731629393, "loss": 0.1282, "step": 26810 }, { "epoch": 17.14, "learning_rate": 0.0009206957756478524, "loss": 0.1236, "step": 26820 }, { "epoch": 17.14, "learning_rate": 0.0009206247781327653, "loss": 0.1301, "step": 26830 }, { "epoch": 17.15, "learning_rate": 0.0009205537806176783, "loss": 0.1248, "step": 26840 }, { "epoch": 17.16, "learning_rate": 0.0009204827831025915, "loss": 0.1393, "step": 26850 }, { "epoch": 17.16, "learning_rate": 0.0009204117855875044, "loss": 0.1158, "step": 26860 }, { "epoch": 17.17, "learning_rate": 0.0009203407880724175, "loss": 0.1415, "step": 26870 }, { "epoch": 17.18, "learning_rate": 0.0009202697905573305, "loss": 0.1166, "step": 26880 }, { "epoch": 17.18, "learning_rate": 0.0009201987930422435, "loss": 0.1273, "step": 26890 }, { "epoch": 17.19, "learning_rate": 0.0009201277955271565, "loss": 0.1322, "step": 26900 }, { "epoch": 17.19, "learning_rate": 0.0009200567980120697, "loss": 0.1042, "step": 26910 }, { "epoch": 17.2, "learning_rate": 0.0009199858004969826, "loss": 0.1692, "step": 26920 }, { "epoch": 17.21, "learning_rate": 0.0009199148029818957, "loss": 0.0913, "step": 26930 }, { "epoch": 17.21, "learning_rate": 0.0009198438054668086, "loss": 0.1293, "step": 26940 }, { "epoch": 17.22, "learning_rate": 0.0009197728079517217, "loss": 0.1275, "step": 26950 }, { "epoch": 17.23, "learning_rate": 0.0009197018104366347, "loss": 0.1086, "step": 26960 }, { "epoch": 17.23, "learning_rate": 0.0009196308129215477, "loss": 0.1151, "step": 26970 }, { "epoch": 17.24, "learning_rate": 0.0009195598154064608, "loss": 0.1449, "step": 26980 }, { "epoch": 17.25, "learning_rate": 0.0009194888178913739, "loss": 0.1332, "step": 26990 }, { "epoch": 17.25, "learning_rate": 0.0009194178203762868, "loss": 0.1148, "step": 27000 }, { "epoch": 17.26, "learning_rate": 0.0009193468228611999, "loss": 0.1344, "step": 27010 }, { "epoch": 17.27, "learning_rate": 0.0009192758253461128, "loss": 0.1061, "step": 27020 }, { "epoch": 17.27, "learning_rate": 0.0009192048278310259, "loss": 0.1154, "step": 27030 }, { "epoch": 17.28, "learning_rate": 0.000919133830315939, "loss": 0.0995, "step": 27040 }, { "epoch": 17.28, "learning_rate": 0.000919062832800852, "loss": 0.1339, "step": 27050 }, { "epoch": 17.29, "learning_rate": 0.000918991835285765, "loss": 0.1048, "step": 27060 }, { "epoch": 17.3, "learning_rate": 0.0009189208377706781, "loss": 0.1519, "step": 27070 }, { "epoch": 17.3, "learning_rate": 0.000918849840255591, "loss": 0.078, "step": 27080 }, { "epoch": 17.31, "learning_rate": 0.0009187788427405041, "loss": 0.1348, "step": 27090 }, { "epoch": 17.32, "learning_rate": 0.0009187078452254172, "loss": 0.1258, "step": 27100 }, { "epoch": 17.32, "learning_rate": 0.0009186368477103302, "loss": 0.1221, "step": 27110 }, { "epoch": 17.33, "learning_rate": 0.0009185658501952432, "loss": 0.1198, "step": 27120 }, { "epoch": 17.34, "learning_rate": 0.0009184948526801563, "loss": 0.0938, "step": 27130 }, { "epoch": 17.34, "learning_rate": 0.0009184238551650692, "loss": 0.1246, "step": 27140 }, { "epoch": 17.35, "learning_rate": 0.0009183528576499822, "loss": 0.132, "step": 27150 }, { "epoch": 17.35, "learning_rate": 0.0009182818601348953, "loss": 0.1265, "step": 27160 }, { "epoch": 17.36, "learning_rate": 0.0009182108626198083, "loss": 0.1348, "step": 27170 }, { "epoch": 17.37, "learning_rate": 0.0009181398651047214, "loss": 0.1407, "step": 27180 }, { "epoch": 17.37, "learning_rate": 0.0009180688675896343, "loss": 0.1409, "step": 27190 }, { "epoch": 17.38, "learning_rate": 0.0009179978700745474, "loss": 0.0996, "step": 27200 }, { "epoch": 17.39, "learning_rate": 0.0009179268725594603, "loss": 0.1391, "step": 27210 }, { "epoch": 17.39, "learning_rate": 0.0009178558750443735, "loss": 0.1252, "step": 27220 }, { "epoch": 17.4, "learning_rate": 0.0009177848775292865, "loss": 0.1404, "step": 27230 }, { "epoch": 17.41, "learning_rate": 0.0009177138800141995, "loss": 0.1238, "step": 27240 }, { "epoch": 17.41, "learning_rate": 0.0009176428824991125, "loss": 0.126, "step": 27250 }, { "epoch": 17.42, "learning_rate": 0.0009175718849840256, "loss": 0.1264, "step": 27260 }, { "epoch": 17.42, "learning_rate": 0.0009175008874689385, "loss": 0.1355, "step": 27270 }, { "epoch": 17.43, "learning_rate": 0.0009174298899538517, "loss": 0.1192, "step": 27280 }, { "epoch": 17.44, "learning_rate": 0.0009173588924387647, "loss": 0.1355, "step": 27290 }, { "epoch": 17.44, "learning_rate": 0.0009172878949236777, "loss": 0.1259, "step": 27300 }, { "epoch": 17.45, "learning_rate": 0.0009172168974085907, "loss": 0.1207, "step": 27310 }, { "epoch": 17.46, "learning_rate": 0.0009171458998935038, "loss": 0.111, "step": 27320 }, { "epoch": 17.46, "learning_rate": 0.0009170749023784167, "loss": 0.1248, "step": 27330 }, { "epoch": 17.47, "learning_rate": 0.0009170039048633299, "loss": 0.1155, "step": 27340 }, { "epoch": 17.48, "learning_rate": 0.0009169329073482428, "loss": 0.1202, "step": 27350 }, { "epoch": 17.48, "learning_rate": 0.0009168619098331559, "loss": 0.1468, "step": 27360 }, { "epoch": 17.49, "learning_rate": 0.0009167909123180689, "loss": 0.1278, "step": 27370 }, { "epoch": 17.5, "learning_rate": 0.0009167199148029819, "loss": 0.1361, "step": 27380 }, { "epoch": 17.5, "learning_rate": 0.0009166489172878949, "loss": 0.1249, "step": 27390 }, { "epoch": 17.51, "learning_rate": 0.0009165779197728081, "loss": 0.1392, "step": 27400 }, { "epoch": 17.51, "learning_rate": 0.000916506922257721, "loss": 0.1001, "step": 27410 }, { "epoch": 17.52, "learning_rate": 0.0009164359247426341, "loss": 0.1354, "step": 27420 }, { "epoch": 17.53, "learning_rate": 0.000916364927227547, "loss": 0.1063, "step": 27430 }, { "epoch": 17.53, "learning_rate": 0.0009162939297124601, "loss": 0.1433, "step": 27440 }, { "epoch": 17.54, "learning_rate": 0.0009162229321973731, "loss": 0.1294, "step": 27450 }, { "epoch": 17.55, "learning_rate": 0.0009161519346822862, "loss": 0.1183, "step": 27460 }, { "epoch": 17.55, "learning_rate": 0.0009160809371671992, "loss": 0.1353, "step": 27470 }, { "epoch": 17.56, "learning_rate": 0.0009160099396521122, "loss": 0.1012, "step": 27480 }, { "epoch": 17.57, "learning_rate": 0.0009159389421370252, "loss": 0.1388, "step": 27490 }, { "epoch": 17.57, "learning_rate": 0.0009158679446219382, "loss": 0.1329, "step": 27500 }, { "epoch": 17.58, "learning_rate": 0.0009157969471068513, "loss": 0.1465, "step": 27510 }, { "epoch": 17.58, "learning_rate": 0.0009157259495917643, "loss": 0.1405, "step": 27520 }, { "epoch": 17.59, "learning_rate": 0.0009156549520766774, "loss": 0.1375, "step": 27530 }, { "epoch": 17.6, "learning_rate": 0.0009155839545615903, "loss": 0.1431, "step": 27540 }, { "epoch": 17.6, "learning_rate": 0.0009155129570465034, "loss": 0.1102, "step": 27550 }, { "epoch": 17.61, "learning_rate": 0.0009154419595314164, "loss": 0.1477, "step": 27560 }, { "epoch": 17.62, "learning_rate": 0.0009153709620163294, "loss": 0.1203, "step": 27570 }, { "epoch": 17.62, "learning_rate": 0.0009152999645012424, "loss": 0.142, "step": 27580 }, { "epoch": 17.63, "learning_rate": 0.0009152289669861556, "loss": 0.1107, "step": 27590 }, { "epoch": 17.64, "learning_rate": 0.0009151579694710685, "loss": 0.0862, "step": 27600 }, { "epoch": 17.64, "learning_rate": 0.0009150869719559816, "loss": 0.1198, "step": 27610 }, { "epoch": 17.65, "learning_rate": 0.0009150159744408945, "loss": 0.1224, "step": 27620 }, { "epoch": 17.65, "learning_rate": 0.0009149449769258076, "loss": 0.1223, "step": 27630 }, { "epoch": 17.66, "learning_rate": 0.0009148739794107206, "loss": 0.1656, "step": 27640 }, { "epoch": 17.67, "learning_rate": 0.0009148029818956337, "loss": 0.1391, "step": 27650 }, { "epoch": 17.67, "learning_rate": 0.0009147319843805467, "loss": 0.1391, "step": 27660 }, { "epoch": 17.68, "learning_rate": 0.0009146609868654598, "loss": 0.1292, "step": 27670 }, { "epoch": 17.69, "learning_rate": 0.0009145899893503727, "loss": 0.1195, "step": 27680 }, { "epoch": 17.69, "learning_rate": 0.0009145189918352858, "loss": 0.1563, "step": 27690 }, { "epoch": 17.7, "learning_rate": 0.0009144479943201987, "loss": 0.1435, "step": 27700 }, { "epoch": 17.71, "learning_rate": 0.0009143769968051119, "loss": 0.1365, "step": 27710 }, { "epoch": 17.71, "learning_rate": 0.0009143059992900249, "loss": 0.1296, "step": 27720 }, { "epoch": 17.72, "learning_rate": 0.000914235001774938, "loss": 0.1118, "step": 27730 }, { "epoch": 17.73, "learning_rate": 0.0009141640042598509, "loss": 0.115, "step": 27740 }, { "epoch": 17.73, "learning_rate": 0.000914093006744764, "loss": 0.1173, "step": 27750 }, { "epoch": 17.74, "learning_rate": 0.0009140220092296769, "loss": 0.1318, "step": 27760 }, { "epoch": 17.74, "learning_rate": 0.0009139510117145901, "loss": 0.1547, "step": 27770 }, { "epoch": 17.75, "learning_rate": 0.0009138800141995031, "loss": 0.1173, "step": 27780 }, { "epoch": 17.76, "learning_rate": 0.000913809016684416, "loss": 0.1173, "step": 27790 }, { "epoch": 17.76, "learning_rate": 0.0009137380191693291, "loss": 0.1154, "step": 27800 }, { "epoch": 17.77, "learning_rate": 0.000913667021654242, "loss": 0.1305, "step": 27810 }, { "epoch": 17.78, "learning_rate": 0.0009135960241391551, "loss": 0.1241, "step": 27820 }, { "epoch": 17.78, "learning_rate": 0.0009135250266240682, "loss": 0.1159, "step": 27830 }, { "epoch": 17.79, "learning_rate": 0.0009134540291089812, "loss": 0.1501, "step": 27840 }, { "epoch": 17.8, "learning_rate": 0.0009133830315938942, "loss": 0.1342, "step": 27850 }, { "epoch": 17.8, "learning_rate": 0.0009133120340788073, "loss": 0.1278, "step": 27860 }, { "epoch": 17.81, "learning_rate": 0.0009132410365637202, "loss": 0.134, "step": 27870 }, { "epoch": 17.81, "learning_rate": 0.0009131700390486333, "loss": 0.1363, "step": 27880 }, { "epoch": 17.82, "learning_rate": 0.0009130990415335464, "loss": 0.1314, "step": 27890 }, { "epoch": 17.83, "learning_rate": 0.0009130280440184594, "loss": 0.1324, "step": 27900 }, { "epoch": 17.83, "learning_rate": 0.0009129570465033724, "loss": 0.1197, "step": 27910 }, { "epoch": 17.84, "learning_rate": 0.0009128860489882854, "loss": 0.1307, "step": 27920 }, { "epoch": 17.85, "learning_rate": 0.0009128150514731984, "loss": 0.1197, "step": 27930 }, { "epoch": 17.85, "learning_rate": 0.0009127440539581115, "loss": 0.1454, "step": 27940 }, { "epoch": 17.86, "learning_rate": 0.0009126730564430245, "loss": 0.1155, "step": 27950 }, { "epoch": 17.87, "learning_rate": 0.0009126020589279376, "loss": 0.1238, "step": 27960 }, { "epoch": 17.87, "learning_rate": 0.0009125310614128506, "loss": 0.102, "step": 27970 }, { "epoch": 17.88, "learning_rate": 0.0009124600638977636, "loss": 0.1501, "step": 27980 }, { "epoch": 17.88, "learning_rate": 0.0009123890663826766, "loss": 0.1435, "step": 27990 }, { "epoch": 17.89, "learning_rate": 0.0009123180688675897, "loss": 0.1381, "step": 28000 }, { "epoch": 17.9, "learning_rate": 0.0009122470713525027, "loss": 0.1478, "step": 28010 }, { "epoch": 17.9, "learning_rate": 0.0009121760738374158, "loss": 0.1608, "step": 28020 }, { "epoch": 17.91, "learning_rate": 0.0009121050763223287, "loss": 0.16, "step": 28030 }, { "epoch": 17.92, "learning_rate": 0.0009120340788072418, "loss": 0.1383, "step": 28040 }, { "epoch": 17.92, "learning_rate": 0.0009119630812921548, "loss": 0.1365, "step": 28050 }, { "epoch": 17.93, "learning_rate": 0.0009118920837770678, "loss": 0.1274, "step": 28060 }, { "epoch": 17.94, "learning_rate": 0.0009118210862619809, "loss": 0.1176, "step": 28070 }, { "epoch": 17.94, "learning_rate": 0.000911750088746894, "loss": 0.1474, "step": 28080 }, { "epoch": 17.95, "learning_rate": 0.0009116790912318069, "loss": 0.1065, "step": 28090 }, { "epoch": 17.95, "learning_rate": 0.0009116080937167199, "loss": 0.1241, "step": 28100 }, { "epoch": 17.96, "learning_rate": 0.000911537096201633, "loss": 0.123, "step": 28110 }, { "epoch": 17.97, "learning_rate": 0.0009114660986865459, "loss": 0.1231, "step": 28120 }, { "epoch": 17.97, "learning_rate": 0.0009113951011714591, "loss": 0.1273, "step": 28130 }, { "epoch": 17.98, "learning_rate": 0.000911324103656372, "loss": 0.136, "step": 28140 }, { "epoch": 17.99, "learning_rate": 0.0009112531061412851, "loss": 0.1333, "step": 28150 }, { "epoch": 17.99, "learning_rate": 0.0009111821086261981, "loss": 0.1464, "step": 28160 }, { "epoch": 18.0, "learning_rate": 0.0009111111111111111, "loss": 0.1176, "step": 28170 }, { "epoch": 18.01, "learning_rate": 0.0009110401135960241, "loss": 0.1254, "step": 28180 }, { "epoch": 18.01, "learning_rate": 0.0009109691160809372, "loss": 0.111, "step": 28190 }, { "epoch": 18.02, "learning_rate": 0.0009108981185658502, "loss": 0.1208, "step": 28200 }, { "epoch": 18.03, "learning_rate": 0.0009108271210507633, "loss": 0.1233, "step": 28210 }, { "epoch": 18.03, "learning_rate": 0.0009107561235356762, "loss": 0.097, "step": 28220 }, { "epoch": 18.04, "learning_rate": 0.0009106851260205893, "loss": 0.1101, "step": 28230 }, { "epoch": 18.04, "learning_rate": 0.0009106141285055023, "loss": 0.0998, "step": 28240 }, { "epoch": 18.05, "learning_rate": 0.0009105431309904153, "loss": 0.116, "step": 28250 }, { "epoch": 18.06, "learning_rate": 0.0009104721334753284, "loss": 0.1068, "step": 28260 }, { "epoch": 18.06, "learning_rate": 0.0009104011359602415, "loss": 0.1056, "step": 28270 }, { "epoch": 18.07, "learning_rate": 0.0009103301384451544, "loss": 0.1343, "step": 28280 }, { "epoch": 18.08, "learning_rate": 0.0009102591409300675, "loss": 0.1656, "step": 28290 }, { "epoch": 18.08, "learning_rate": 0.0009101881434149804, "loss": 0.1124, "step": 28300 }, { "epoch": 18.09, "learning_rate": 0.0009101171458998935, "loss": 0.1164, "step": 28310 }, { "epoch": 18.1, "learning_rate": 0.0009100461483848066, "loss": 0.0916, "step": 28320 }, { "epoch": 18.1, "learning_rate": 0.0009099751508697196, "loss": 0.1054, "step": 28330 }, { "epoch": 18.11, "learning_rate": 0.0009099041533546326, "loss": 0.1102, "step": 28340 }, { "epoch": 18.11, "learning_rate": 0.0009098331558395457, "loss": 0.1293, "step": 28350 }, { "epoch": 18.12, "learning_rate": 0.0009097621583244586, "loss": 0.1283, "step": 28360 }, { "epoch": 18.13, "learning_rate": 0.0009096911608093717, "loss": 0.1168, "step": 28370 }, { "epoch": 18.13, "learning_rate": 0.0009096201632942848, "loss": 0.0809, "step": 28380 }, { "epoch": 18.14, "learning_rate": 0.0009095491657791978, "loss": 0.1197, "step": 28390 }, { "epoch": 18.15, "learning_rate": 0.0009094781682641108, "loss": 0.1149, "step": 28400 }, { "epoch": 18.15, "learning_rate": 0.0009094071707490237, "loss": 0.1162, "step": 28410 }, { "epoch": 18.16, "learning_rate": 0.0009093361732339368, "loss": 0.1258, "step": 28420 }, { "epoch": 18.17, "learning_rate": 0.0009092651757188498, "loss": 0.1266, "step": 28430 }, { "epoch": 18.17, "learning_rate": 0.0009091941782037629, "loss": 0.1502, "step": 28440 }, { "epoch": 18.18, "learning_rate": 0.0009091231806886759, "loss": 0.1195, "step": 28450 }, { "epoch": 18.19, "learning_rate": 0.000909052183173589, "loss": 0.1035, "step": 28460 }, { "epoch": 18.19, "learning_rate": 0.0009089811856585019, "loss": 0.1482, "step": 28470 }, { "epoch": 18.2, "learning_rate": 0.000908910188143415, "loss": 0.1187, "step": 28480 }, { "epoch": 18.2, "learning_rate": 0.000908839190628328, "loss": 0.1204, "step": 28490 }, { "epoch": 18.21, "learning_rate": 0.0009087681931132411, "loss": 0.1059, "step": 28500 }, { "epoch": 18.22, "learning_rate": 0.0009086971955981541, "loss": 0.1164, "step": 28510 }, { "epoch": 18.22, "learning_rate": 0.0009086261980830671, "loss": 0.1168, "step": 28520 }, { "epoch": 18.23, "learning_rate": 0.0009085552005679801, "loss": 0.1324, "step": 28530 }, { "epoch": 18.24, "learning_rate": 0.0009084842030528932, "loss": 0.124, "step": 28540 }, { "epoch": 18.24, "learning_rate": 0.0009084132055378061, "loss": 0.1028, "step": 28550 }, { "epoch": 18.25, "learning_rate": 0.0009083422080227193, "loss": 0.1057, "step": 28560 }, { "epoch": 18.26, "learning_rate": 0.0009082712105076323, "loss": 0.1132, "step": 28570 }, { "epoch": 18.26, "learning_rate": 0.0009082002129925453, "loss": 0.1164, "step": 28580 }, { "epoch": 18.27, "learning_rate": 0.0009081292154774583, "loss": 0.1129, "step": 28590 }, { "epoch": 18.27, "learning_rate": 0.0009080582179623713, "loss": 0.1185, "step": 28600 }, { "epoch": 18.28, "learning_rate": 0.0009079872204472843, "loss": 0.1268, "step": 28610 }, { "epoch": 18.29, "learning_rate": 0.0009079162229321975, "loss": 0.1184, "step": 28620 }, { "epoch": 18.29, "learning_rate": 0.0009078452254171104, "loss": 0.0942, "step": 28630 }, { "epoch": 18.3, "learning_rate": 0.0009077742279020235, "loss": 0.109, "step": 28640 }, { "epoch": 18.31, "learning_rate": 0.0009077032303869365, "loss": 0.0995, "step": 28650 }, { "epoch": 18.31, "learning_rate": 0.0009076322328718495, "loss": 0.1432, "step": 28660 }, { "epoch": 18.32, "learning_rate": 0.0009075612353567625, "loss": 0.1517, "step": 28670 }, { "epoch": 18.33, "learning_rate": 0.0009074902378416757, "loss": 0.1044, "step": 28680 }, { "epoch": 18.33, "learning_rate": 0.0009074192403265886, "loss": 0.0846, "step": 28690 }, { "epoch": 18.34, "learning_rate": 0.0009073482428115017, "loss": 0.1275, "step": 28700 }, { "epoch": 18.34, "learning_rate": 0.0009072772452964146, "loss": 0.0935, "step": 28710 }, { "epoch": 18.35, "learning_rate": 0.0009072062477813276, "loss": 0.0898, "step": 28720 }, { "epoch": 18.36, "learning_rate": 0.0009071352502662407, "loss": 0.127, "step": 28730 }, { "epoch": 18.36, "learning_rate": 0.0009070642527511536, "loss": 0.1172, "step": 28740 }, { "epoch": 18.37, "learning_rate": 0.0009069932552360668, "loss": 0.1069, "step": 28750 }, { "epoch": 18.38, "learning_rate": 0.0009069222577209798, "loss": 0.1378, "step": 28760 }, { "epoch": 18.38, "learning_rate": 0.0009068512602058928, "loss": 0.1268, "step": 28770 }, { "epoch": 18.39, "learning_rate": 0.0009067802626908058, "loss": 0.0926, "step": 28780 }, { "epoch": 18.4, "learning_rate": 0.0009067092651757188, "loss": 0.1208, "step": 28790 }, { "epoch": 18.4, "learning_rate": 0.0009066382676606318, "loss": 0.1234, "step": 28800 }, { "epoch": 18.41, "learning_rate": 0.000906567270145545, "loss": 0.1548, "step": 28810 }, { "epoch": 18.42, "learning_rate": 0.0009064962726304579, "loss": 0.1121, "step": 28820 }, { "epoch": 18.42, "learning_rate": 0.000906425275115371, "loss": 0.1457, "step": 28830 }, { "epoch": 18.43, "learning_rate": 0.000906354277600284, "loss": 0.1252, "step": 28840 }, { "epoch": 18.43, "learning_rate": 0.000906283280085197, "loss": 0.1345, "step": 28850 }, { "epoch": 18.44, "learning_rate": 0.00090621228257011, "loss": 0.1101, "step": 28860 }, { "epoch": 18.45, "learning_rate": 0.0009061412850550232, "loss": 0.1146, "step": 28870 }, { "epoch": 18.45, "learning_rate": 0.0009060702875399361, "loss": 0.1232, "step": 28880 }, { "epoch": 18.46, "learning_rate": 0.0009059992900248492, "loss": 0.1239, "step": 28890 }, { "epoch": 18.47, "learning_rate": 0.0009059282925097621, "loss": 0.1202, "step": 28900 }, { "epoch": 18.47, "learning_rate": 0.0009058572949946752, "loss": 0.1124, "step": 28910 }, { "epoch": 18.48, "learning_rate": 0.0009057862974795882, "loss": 0.1148, "step": 28920 }, { "epoch": 18.49, "learning_rate": 0.0009057152999645013, "loss": 0.1111, "step": 28930 }, { "epoch": 18.49, "learning_rate": 0.0009056443024494143, "loss": 0.1243, "step": 28940 }, { "epoch": 18.5, "learning_rate": 0.0009055733049343274, "loss": 0.1227, "step": 28950 }, { "epoch": 18.5, "learning_rate": 0.0009055023074192403, "loss": 0.0993, "step": 28960 }, { "epoch": 18.51, "learning_rate": 0.0009054313099041534, "loss": 0.1286, "step": 28970 }, { "epoch": 18.52, "learning_rate": 0.0009053603123890663, "loss": 0.1054, "step": 28980 }, { "epoch": 18.52, "learning_rate": 0.0009052893148739795, "loss": 0.1196, "step": 28990 }, { "epoch": 18.53, "learning_rate": 0.0009052183173588925, "loss": 0.1255, "step": 29000 }, { "epoch": 18.54, "learning_rate": 0.0009051473198438055, "loss": 0.1325, "step": 29010 }, { "epoch": 18.54, "learning_rate": 0.0009050763223287185, "loss": 0.1161, "step": 29020 }, { "epoch": 18.55, "learning_rate": 0.0009050053248136315, "loss": 0.1144, "step": 29030 }, { "epoch": 18.56, "learning_rate": 0.0009049343272985445, "loss": 0.1293, "step": 29040 }, { "epoch": 18.56, "learning_rate": 0.0009048633297834576, "loss": 0.1284, "step": 29050 }, { "epoch": 18.57, "learning_rate": 0.0009047923322683707, "loss": 0.1086, "step": 29060 }, { "epoch": 18.57, "learning_rate": 0.0009047213347532836, "loss": 0.1142, "step": 29070 }, { "epoch": 18.58, "learning_rate": 0.0009046503372381967, "loss": 0.1223, "step": 29080 }, { "epoch": 18.59, "learning_rate": 0.0009045793397231096, "loss": 0.137, "step": 29090 }, { "epoch": 18.59, "learning_rate": 0.0009045083422080227, "loss": 0.1287, "step": 29100 }, { "epoch": 18.6, "learning_rate": 0.0009044373446929358, "loss": 0.1208, "step": 29110 }, { "epoch": 18.61, "learning_rate": 0.0009043663471778488, "loss": 0.1086, "step": 29120 }, { "epoch": 18.61, "learning_rate": 0.0009042953496627618, "loss": 0.125, "step": 29130 }, { "epoch": 18.62, "learning_rate": 0.0009042243521476749, "loss": 0.1193, "step": 29140 }, { "epoch": 18.63, "learning_rate": 0.0009041533546325878, "loss": 0.1054, "step": 29150 }, { "epoch": 18.63, "learning_rate": 0.0009040823571175009, "loss": 0.1231, "step": 29160 }, { "epoch": 18.64, "learning_rate": 0.000904011359602414, "loss": 0.1086, "step": 29170 }, { "epoch": 18.65, "learning_rate": 0.000903940362087327, "loss": 0.1391, "step": 29180 }, { "epoch": 18.65, "learning_rate": 0.00090386936457224, "loss": 0.1039, "step": 29190 }, { "epoch": 18.66, "learning_rate": 0.000903798367057153, "loss": 0.14, "step": 29200 }, { "epoch": 18.66, "learning_rate": 0.000903727369542066, "loss": 0.1216, "step": 29210 }, { "epoch": 18.67, "learning_rate": 0.0009036563720269791, "loss": 0.1337, "step": 29220 }, { "epoch": 18.68, "learning_rate": 0.0009035853745118921, "loss": 0.1436, "step": 29230 }, { "epoch": 18.68, "learning_rate": 0.0009035143769968052, "loss": 0.1275, "step": 29240 }, { "epoch": 18.69, "learning_rate": 0.0009034433794817182, "loss": 0.1191, "step": 29250 }, { "epoch": 18.7, "learning_rate": 0.0009033723819666312, "loss": 0.1228, "step": 29260 }, { "epoch": 18.7, "learning_rate": 0.0009033013844515442, "loss": 0.1387, "step": 29270 }, { "epoch": 18.71, "learning_rate": 0.0009032303869364573, "loss": 0.146, "step": 29280 }, { "epoch": 18.72, "learning_rate": 0.0009031593894213703, "loss": 0.1387, "step": 29290 }, { "epoch": 18.72, "learning_rate": 0.0009030883919062834, "loss": 0.0893, "step": 29300 }, { "epoch": 18.73, "learning_rate": 0.0009030173943911963, "loss": 0.1221, "step": 29310 }, { "epoch": 18.73, "learning_rate": 0.0009029463968761094, "loss": 0.1158, "step": 29320 }, { "epoch": 18.74, "learning_rate": 0.0009028753993610224, "loss": 0.1393, "step": 29330 }, { "epoch": 18.75, "learning_rate": 0.0009028044018459353, "loss": 0.1195, "step": 29340 }, { "epoch": 18.75, "learning_rate": 0.0009027334043308484, "loss": 0.1365, "step": 29350 }, { "epoch": 18.76, "learning_rate": 0.0009026624068157615, "loss": 0.1153, "step": 29360 }, { "epoch": 18.77, "learning_rate": 0.0009025914093006745, "loss": 0.1396, "step": 29370 }, { "epoch": 18.77, "learning_rate": 0.0009025204117855875, "loss": 0.1326, "step": 29380 }, { "epoch": 18.78, "learning_rate": 0.0009024494142705005, "loss": 0.1302, "step": 29390 }, { "epoch": 18.79, "learning_rate": 0.0009023784167554135, "loss": 0.142, "step": 29400 }, { "epoch": 18.79, "learning_rate": 0.0009023074192403266, "loss": 0.1146, "step": 29410 }, { "epoch": 18.8, "learning_rate": 0.0009022364217252396, "loss": 0.1048, "step": 29420 }, { "epoch": 18.8, "learning_rate": 0.0009021654242101527, "loss": 0.1186, "step": 29430 }, { "epoch": 18.81, "learning_rate": 0.0009020944266950657, "loss": 0.1196, "step": 29440 }, { "epoch": 18.82, "learning_rate": 0.0009020234291799787, "loss": 0.1468, "step": 29450 }, { "epoch": 18.82, "learning_rate": 0.0009019524316648917, "loss": 0.1145, "step": 29460 }, { "epoch": 18.83, "learning_rate": 0.0009018814341498048, "loss": 0.112, "step": 29470 }, { "epoch": 18.84, "learning_rate": 0.0009018104366347178, "loss": 0.1304, "step": 29480 }, { "epoch": 18.84, "learning_rate": 0.0009017394391196309, "loss": 0.1395, "step": 29490 }, { "epoch": 18.85, "learning_rate": 0.0009016684416045438, "loss": 0.1276, "step": 29500 }, { "epoch": 18.86, "learning_rate": 0.0009015974440894569, "loss": 0.097, "step": 29510 }, { "epoch": 18.86, "learning_rate": 0.0009015264465743699, "loss": 0.1287, "step": 29520 }, { "epoch": 18.87, "learning_rate": 0.0009014554490592829, "loss": 0.1234, "step": 29530 }, { "epoch": 18.88, "learning_rate": 0.000901384451544196, "loss": 0.1509, "step": 29540 }, { "epoch": 18.88, "learning_rate": 0.0009013134540291091, "loss": 0.1317, "step": 29550 }, { "epoch": 18.89, "learning_rate": 0.000901242456514022, "loss": 0.1198, "step": 29560 }, { "epoch": 18.89, "learning_rate": 0.0009011714589989351, "loss": 0.1212, "step": 29570 }, { "epoch": 18.9, "learning_rate": 0.000901100461483848, "loss": 0.1124, "step": 29580 }, { "epoch": 18.91, "learning_rate": 0.0009010294639687611, "loss": 0.1289, "step": 29590 }, { "epoch": 18.91, "learning_rate": 0.0009009584664536742, "loss": 0.0943, "step": 29600 }, { "epoch": 18.92, "learning_rate": 0.0009008874689385872, "loss": 0.1308, "step": 29610 }, { "epoch": 18.93, "learning_rate": 0.0009008164714235002, "loss": 0.1405, "step": 29620 }, { "epoch": 18.93, "learning_rate": 0.0009007454739084133, "loss": 0.1067, "step": 29630 }, { "epoch": 18.94, "learning_rate": 0.0009006744763933262, "loss": 0.1219, "step": 29640 }, { "epoch": 18.95, "learning_rate": 0.0009006034788782392, "loss": 0.1205, "step": 29650 }, { "epoch": 18.95, "learning_rate": 0.0009005324813631524, "loss": 0.1326, "step": 29660 }, { "epoch": 18.96, "learning_rate": 0.0009004614838480653, "loss": 0.1112, "step": 29670 }, { "epoch": 18.96, "learning_rate": 0.0009003904863329784, "loss": 0.1184, "step": 29680 }, { "epoch": 18.97, "learning_rate": 0.0009003194888178913, "loss": 0.1426, "step": 29690 }, { "epoch": 18.98, "learning_rate": 0.0009002484913028044, "loss": 0.1358, "step": 29700 }, { "epoch": 18.98, "learning_rate": 0.0009001774937877174, "loss": 0.1562, "step": 29710 }, { "epoch": 18.99, "learning_rate": 0.0009001064962726305, "loss": 0.1396, "step": 29720 }, { "epoch": 19.0, "learning_rate": 0.0009000354987575435, "loss": 0.1173, "step": 29730 }, { "epoch": 19.0, "learning_rate": 0.0008999645012424566, "loss": 0.1017, "step": 29740 }, { "epoch": 19.01, "learning_rate": 0.0008998935037273695, "loss": 0.1158, "step": 29750 }, { "epoch": 19.02, "learning_rate": 0.0008998225062122826, "loss": 0.109, "step": 29760 }, { "epoch": 19.02, "learning_rate": 0.0008997515086971955, "loss": 0.1313, "step": 29770 }, { "epoch": 19.03, "learning_rate": 0.0008996805111821087, "loss": 0.1366, "step": 29780 }, { "epoch": 19.04, "learning_rate": 0.0008996095136670217, "loss": 0.1002, "step": 29790 }, { "epoch": 19.04, "learning_rate": 0.0008995385161519347, "loss": 0.0708, "step": 29800 }, { "epoch": 19.05, "learning_rate": 0.0008994675186368477, "loss": 0.1137, "step": 29810 }, { "epoch": 19.05, "learning_rate": 0.0008993965211217608, "loss": 0.0956, "step": 29820 }, { "epoch": 19.06, "learning_rate": 0.0008993255236066737, "loss": 0.1274, "step": 29830 }, { "epoch": 19.07, "learning_rate": 0.0008992545260915869, "loss": 0.1048, "step": 29840 }, { "epoch": 19.07, "learning_rate": 0.0008991835285764999, "loss": 0.0961, "step": 29850 }, { "epoch": 19.08, "learning_rate": 0.0008991125310614129, "loss": 0.125, "step": 29860 }, { "epoch": 19.09, "learning_rate": 0.0008990415335463259, "loss": 0.1302, "step": 29870 }, { "epoch": 19.09, "learning_rate": 0.000898970536031239, "loss": 0.1347, "step": 29880 }, { "epoch": 19.1, "learning_rate": 0.0008988995385161519, "loss": 0.1157, "step": 29890 }, { "epoch": 19.11, "learning_rate": 0.0008988285410010651, "loss": 0.1186, "step": 29900 }, { "epoch": 19.11, "learning_rate": 0.000898757543485978, "loss": 0.1363, "step": 29910 }, { "epoch": 19.12, "learning_rate": 0.0008986865459708911, "loss": 0.1319, "step": 29920 }, { "epoch": 19.12, "learning_rate": 0.0008986155484558041, "loss": 0.1121, "step": 29930 }, { "epoch": 19.13, "learning_rate": 0.0008985445509407171, "loss": 0.0922, "step": 29940 }, { "epoch": 19.14, "learning_rate": 0.0008984735534256301, "loss": 0.1178, "step": 29950 }, { "epoch": 19.14, "learning_rate": 0.000898402555910543, "loss": 0.1193, "step": 29960 }, { "epoch": 19.15, "learning_rate": 0.0008983315583954562, "loss": 0.111, "step": 29970 }, { "epoch": 19.16, "learning_rate": 0.0008982605608803692, "loss": 0.0945, "step": 29980 }, { "epoch": 19.16, "learning_rate": 0.0008981895633652822, "loss": 0.119, "step": 29990 }, { "epoch": 19.17, "learning_rate": 0.0008981185658501952, "loss": 0.1203, "step": 30000 } ], "max_steps": 156500, "num_train_epochs": 100, "total_flos": 0.0, "trial_name": null, "trial_params": null }