{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6972165648336728, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.999999431403446e-05, "loss": 1.8514, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9999977256144297e-05, "loss": 1.2222, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.9999948826348907e-05, "loss": 1.1193, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.9999909024680632e-05, "loss": 1.0893, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.9999857851184724e-05, "loss": 1.0828, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.9999795305919377e-05, "loss": 1.0137, "step": 120 }, { "epoch": 0.0, "learning_rate": 1.9999721388955722e-05, "loss": 1.0248, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.9999636100377818e-05, "loss": 1.0394, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.9999539440282648e-05, "loss": 1.0873, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.999943140878014e-05, "loss": 1.1132, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.999931200599314e-05, "loss": 1.0493, "step": 220 }, { "epoch": 0.01, "learning_rate": 1.9999181232057437e-05, "loss": 1.0894, "step": 240 }, { "epoch": 0.01, "learning_rate": 1.9999039087121746e-05, "loss": 1.1671, "step": 260 }, { "epoch": 0.01, "learning_rate": 1.9998885571347712e-05, "loss": 1.0525, "step": 280 }, { "epoch": 0.01, "learning_rate": 1.9998720684909913e-05, "loss": 1.0033, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.999854442799585e-05, "loss": 1.0375, "step": 320 }, { "epoch": 0.01, "learning_rate": 1.9998356800805973e-05, "loss": 1.0725, "step": 340 }, { "epoch": 0.01, "learning_rate": 1.9998157803553638e-05, "loss": 1.0104, "step": 360 }, { "epoch": 0.01, "learning_rate": 1.9997947436465157e-05, "loss": 1.0205, "step": 380 }, { "epoch": 0.01, "learning_rate": 1.9997725699779747e-05, "loss": 1.079, "step": 400 }, { "epoch": 0.01, "learning_rate": 1.999749259374957e-05, "loss": 1.0023, "step": 420 }, { "epoch": 0.01, "learning_rate": 1.999724811863971e-05, "loss": 1.0257, "step": 440 }, { "epoch": 0.02, "learning_rate": 1.999699227472818e-05, "loss": 0.9901, "step": 460 }, { "epoch": 0.02, "learning_rate": 1.9996725062305934e-05, "loss": 0.9934, "step": 480 }, { "epoch": 0.02, "learning_rate": 1.999644648167684e-05, "loss": 1.056, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9996156533157692e-05, "loss": 1.0688, "step": 520 }, { "epoch": 0.02, "learning_rate": 1.999585521707822e-05, "loss": 1.1045, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.999554253378108e-05, "loss": 1.0399, "step": 560 }, { "epoch": 0.02, "learning_rate": 1.9995218483621858e-05, "loss": 1.0219, "step": 580 }, { "epoch": 0.02, "learning_rate": 1.9994883066969055e-05, "loss": 1.0287, "step": 600 }, { "epoch": 0.02, "learning_rate": 1.9994536284204106e-05, "loss": 1.0638, "step": 620 }, { "epoch": 0.02, "learning_rate": 1.9994178135721372e-05, "loss": 1.0247, "step": 640 }, { "epoch": 0.02, "learning_rate": 1.999380862192813e-05, "loss": 0.99, "step": 660 }, { "epoch": 0.02, "learning_rate": 1.99934277432446e-05, "loss": 1.0635, "step": 680 }, { "epoch": 0.02, "learning_rate": 1.9993035500103904e-05, "loss": 0.9725, "step": 700 }, { "epoch": 0.02, "learning_rate": 1.9992631892952108e-05, "loss": 1.0221, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.999221692224818e-05, "loss": 1.0333, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.9991790588464034e-05, "loss": 1.0619, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.9991352892084483e-05, "loss": 0.9604, "step": 780 }, { "epoch": 0.03, "learning_rate": 1.999090383360728e-05, "loss": 0.9817, "step": 800 }, { "epoch": 0.03, "learning_rate": 1.9990443413543086e-05, "loss": 1.0362, "step": 820 }, { "epoch": 0.03, "learning_rate": 1.998997163241549e-05, "loss": 1.0436, "step": 840 }, { "epoch": 0.03, "learning_rate": 1.9989488490761e-05, "loss": 1.0088, "step": 860 }, { "epoch": 0.03, "learning_rate": 1.9988993989129035e-05, "loss": 1.06, "step": 880 }, { "epoch": 0.03, "learning_rate": 1.9988488128081943e-05, "loss": 1.0384, "step": 900 }, { "epoch": 0.03, "learning_rate": 1.998797090819499e-05, "loss": 0.9912, "step": 920 }, { "epoch": 0.03, "learning_rate": 1.9987442330056352e-05, "loss": 1.0263, "step": 940 }, { "epoch": 0.03, "learning_rate": 1.9986902394267117e-05, "loss": 0.9909, "step": 960 }, { "epoch": 0.03, "learning_rate": 1.9986351101441308e-05, "loss": 0.993, "step": 980 }, { "epoch": 0.03, "learning_rate": 1.9985788452205847e-05, "loss": 1.0823, "step": 1000 }, { "epoch": 0.03, "learning_rate": 1.9985214447200566e-05, "loss": 0.9748, "step": 1020 }, { "epoch": 0.04, "learning_rate": 1.9984629087078234e-05, "loss": 0.9944, "step": 1040 }, { "epoch": 0.04, "learning_rate": 1.998403237250451e-05, "loss": 1.0813, "step": 1060 }, { "epoch": 0.04, "learning_rate": 1.9983424304157976e-05, "loss": 0.9385, "step": 1080 }, { "epoch": 0.04, "learning_rate": 1.998280488273012e-05, "loss": 1.0154, "step": 1100 }, { "epoch": 0.04, "learning_rate": 1.9982174108925346e-05, "loss": 0.9694, "step": 1120 }, { "epoch": 0.04, "learning_rate": 1.998153198346097e-05, "loss": 0.9958, "step": 1140 }, { "epoch": 0.04, "learning_rate": 1.9980878507067207e-05, "loss": 1.1112, "step": 1160 }, { "epoch": 0.04, "learning_rate": 1.9980213680487186e-05, "loss": 1.025, "step": 1180 }, { "epoch": 0.04, "learning_rate": 1.9979537504476945e-05, "loss": 1.025, "step": 1200 }, { "epoch": 0.04, "learning_rate": 1.997884997980543e-05, "loss": 0.9974, "step": 1220 }, { "epoch": 0.04, "learning_rate": 1.9978151107254477e-05, "loss": 1.0615, "step": 1240 }, { "epoch": 0.04, "learning_rate": 1.9977440887618855e-05, "loss": 0.9963, "step": 1260 }, { "epoch": 0.04, "learning_rate": 1.9976719321706213e-05, "loss": 0.9929, "step": 1280 }, { "epoch": 0.04, "learning_rate": 1.997598641033711e-05, "loss": 1.0235, "step": 1300 }, { "epoch": 0.04, "learning_rate": 1.997524215434501e-05, "loss": 0.9834, "step": 1320 }, { "epoch": 0.05, "learning_rate": 1.9974486554576274e-05, "loss": 0.9981, "step": 1340 }, { "epoch": 0.05, "learning_rate": 1.9973719611890164e-05, "loss": 0.914, "step": 1360 }, { "epoch": 0.05, "learning_rate": 1.9972941327158848e-05, "loss": 0.9992, "step": 1380 }, { "epoch": 0.05, "learning_rate": 1.997215170126738e-05, "loss": 0.9329, "step": 1400 }, { "epoch": 0.05, "learning_rate": 1.9971350735113718e-05, "loss": 1.0179, "step": 1420 }, { "epoch": 0.05, "learning_rate": 1.9970538429608716e-05, "loss": 1.0618, "step": 1440 }, { "epoch": 0.05, "learning_rate": 1.996971478567612e-05, "loss": 1.02, "step": 1460 }, { "epoch": 0.05, "learning_rate": 1.996887980425258e-05, "loss": 1.0123, "step": 1480 }, { "epoch": 0.05, "learning_rate": 1.996803348628762e-05, "loss": 1.0023, "step": 1500 }, { "epoch": 0.05, "learning_rate": 1.9967175832743677e-05, "loss": 0.9477, "step": 1520 }, { "epoch": 0.05, "learning_rate": 1.9966306844596063e-05, "loss": 0.9968, "step": 1540 }, { "epoch": 0.05, "learning_rate": 1.9965426522832985e-05, "loss": 0.9729, "step": 1560 }, { "epoch": 0.05, "learning_rate": 1.9964534868455544e-05, "loss": 1.0453, "step": 1580 }, { "epoch": 0.05, "learning_rate": 1.9963631882477716e-05, "loss": 1.0657, "step": 1600 }, { "epoch": 0.05, "learning_rate": 1.9962717565926376e-05, "loss": 0.9159, "step": 1620 }, { "epoch": 0.06, "learning_rate": 1.9961791919841275e-05, "loss": 0.9438, "step": 1640 }, { "epoch": 0.06, "learning_rate": 1.9960854945275055e-05, "loss": 0.9773, "step": 1660 }, { "epoch": 0.06, "learning_rate": 1.995990664329323e-05, "loss": 1.0714, "step": 1680 }, { "epoch": 0.06, "learning_rate": 1.9958947014974216e-05, "loss": 0.9914, "step": 1700 }, { "epoch": 0.06, "learning_rate": 1.9957976061409277e-05, "loss": 0.9779, "step": 1720 }, { "epoch": 0.06, "learning_rate": 1.9956993783702594e-05, "loss": 0.9906, "step": 1740 }, { "epoch": 0.06, "learning_rate": 1.9956000182971192e-05, "loss": 0.9829, "step": 1760 }, { "epoch": 0.06, "learning_rate": 1.9954995260344993e-05, "loss": 1.0036, "step": 1780 }, { "epoch": 0.06, "learning_rate": 1.995397901696679e-05, "loss": 0.9597, "step": 1800 }, { "epoch": 0.06, "learning_rate": 1.995295145399224e-05, "loss": 0.967, "step": 1820 }, { "epoch": 0.06, "learning_rate": 1.9951912572589892e-05, "loss": 1.0017, "step": 1840 }, { "epoch": 0.06, "learning_rate": 1.9950862373941145e-05, "loss": 0.9761, "step": 1860 }, { "epoch": 0.06, "learning_rate": 1.994980085924028e-05, "loss": 0.9761, "step": 1880 }, { "epoch": 0.06, "learning_rate": 1.994872802969445e-05, "loss": 1.0116, "step": 1900 }, { "epoch": 0.07, "learning_rate": 1.994764388652366e-05, "loss": 1.0121, "step": 1920 }, { "epoch": 0.07, "learning_rate": 1.9946548430960796e-05, "loss": 0.9758, "step": 1940 }, { "epoch": 0.07, "learning_rate": 1.9945441664251605e-05, "loss": 0.9948, "step": 1960 }, { "epoch": 0.07, "learning_rate": 1.9944323587654685e-05, "loss": 1.0505, "step": 1980 }, { "epoch": 0.07, "learning_rate": 1.9943194202441514e-05, "loss": 0.9806, "step": 2000 }, { "epoch": 0.07, "learning_rate": 1.9942053509896417e-05, "loss": 0.9377, "step": 2020 }, { "epoch": 0.07, "learning_rate": 1.9940901511316585e-05, "loss": 1.0366, "step": 2040 }, { "epoch": 0.07, "learning_rate": 1.993973820801206e-05, "loss": 0.9959, "step": 2060 }, { "epoch": 0.07, "learning_rate": 1.993856360130574e-05, "loss": 1.037, "step": 2080 }, { "epoch": 0.07, "learning_rate": 1.9937377692533382e-05, "loss": 1.0662, "step": 2100 }, { "epoch": 0.07, "learning_rate": 1.99361804830436e-05, "loss": 1.0312, "step": 2120 }, { "epoch": 0.07, "learning_rate": 1.9934971974197843e-05, "loss": 0.9109, "step": 2140 }, { "epoch": 0.07, "learning_rate": 1.993375216737042e-05, "loss": 0.9742, "step": 2160 }, { "epoch": 0.07, "learning_rate": 1.9932521063948494e-05, "loss": 1.0021, "step": 2180 }, { "epoch": 0.07, "learning_rate": 1.9931278665332057e-05, "loss": 0.9514, "step": 2200 }, { "epoch": 0.08, "learning_rate": 1.9930024972933966e-05, "loss": 0.9497, "step": 2220 }, { "epoch": 0.08, "learning_rate": 1.9928759988179908e-05, "loss": 0.9337, "step": 2240 }, { "epoch": 0.08, "learning_rate": 1.992748371250841e-05, "loss": 0.9945, "step": 2260 }, { "epoch": 0.08, "learning_rate": 1.992619614737085e-05, "loss": 0.9873, "step": 2280 }, { "epoch": 0.08, "learning_rate": 1.9924897294231434e-05, "loss": 0.9715, "step": 2300 }, { "epoch": 0.08, "learning_rate": 1.9923587154567214e-05, "loss": 0.946, "step": 2320 }, { "epoch": 0.08, "learning_rate": 1.9922265729868065e-05, "loss": 0.9688, "step": 2340 }, { "epoch": 0.08, "learning_rate": 1.992093302163671e-05, "loss": 0.9837, "step": 2360 }, { "epoch": 0.08, "learning_rate": 1.9919589031388688e-05, "loss": 1.0015, "step": 2380 }, { "epoch": 0.08, "learning_rate": 1.991823376065238e-05, "loss": 0.9847, "step": 2400 }, { "epoch": 0.08, "learning_rate": 1.991686721096899e-05, "loss": 0.9702, "step": 2420 }, { "epoch": 0.08, "learning_rate": 1.991548938389255e-05, "loss": 0.9749, "step": 2440 }, { "epoch": 0.08, "learning_rate": 1.991410028098991e-05, "loss": 0.968, "step": 2460 }, { "epoch": 0.08, "learning_rate": 1.9912699903840754e-05, "loss": 0.9446, "step": 2480 }, { "epoch": 0.08, "learning_rate": 1.991128825403758e-05, "loss": 0.9667, "step": 2500 }, { "epoch": 0.09, "learning_rate": 1.9909865333185704e-05, "loss": 0.9359, "step": 2520 }, { "epoch": 0.09, "learning_rate": 1.9908431142903263e-05, "loss": 1.0054, "step": 2540 }, { "epoch": 0.09, "learning_rate": 1.9906985684821208e-05, "loss": 0.9583, "step": 2560 }, { "epoch": 0.09, "learning_rate": 1.9905528960583305e-05, "loss": 0.9364, "step": 2580 }, { "epoch": 0.09, "learning_rate": 1.990406097184613e-05, "loss": 0.9839, "step": 2600 }, { "epoch": 0.09, "learning_rate": 1.9902581720279074e-05, "loss": 1.0165, "step": 2620 }, { "epoch": 0.09, "learning_rate": 1.9901091207564326e-05, "loss": 0.9603, "step": 2640 }, { "epoch": 0.09, "learning_rate": 1.9899589435396888e-05, "loss": 0.9977, "step": 2660 }, { "epoch": 0.09, "learning_rate": 1.9898076405484565e-05, "loss": 0.9618, "step": 2680 }, { "epoch": 0.09, "learning_rate": 1.9896552119547964e-05, "loss": 0.89, "step": 2700 }, { "epoch": 0.09, "learning_rate": 1.9895016579320495e-05, "loss": 1.0115, "step": 2720 }, { "epoch": 0.09, "learning_rate": 1.989346978654836e-05, "loss": 0.8936, "step": 2740 }, { "epoch": 0.09, "learning_rate": 1.9891911742990567e-05, "loss": 0.9733, "step": 2760 }, { "epoch": 0.09, "learning_rate": 1.9890342450418907e-05, "loss": 0.9443, "step": 2780 }, { "epoch": 0.1, "learning_rate": 1.9888761910617967e-05, "loss": 1.016, "step": 2800 }, { "epoch": 0.1, "learning_rate": 1.9887170125385135e-05, "loss": 0.8977, "step": 2820 }, { "epoch": 0.1, "learning_rate": 1.9885567096530566e-05, "loss": 0.9582, "step": 2840 }, { "epoch": 0.1, "learning_rate": 1.9883952825877225e-05, "loss": 1.0007, "step": 2860 }, { "epoch": 0.1, "learning_rate": 1.988232731526084e-05, "loss": 1.0113, "step": 2880 }, { "epoch": 0.1, "learning_rate": 1.9880690566529933e-05, "loss": 0.9572, "step": 2900 }, { "epoch": 0.1, "learning_rate": 1.987904258154581e-05, "loss": 1.0198, "step": 2920 }, { "epoch": 0.1, "learning_rate": 1.987738336218254e-05, "loss": 0.8695, "step": 2940 }, { "epoch": 0.1, "learning_rate": 1.9875712910326982e-05, "loss": 0.9847, "step": 2960 }, { "epoch": 0.1, "learning_rate": 1.987403122787876e-05, "loss": 0.9547, "step": 2980 }, { "epoch": 0.1, "learning_rate": 1.9872338316750264e-05, "loss": 0.9653, "step": 3000 }, { "epoch": 0.1, "learning_rate": 1.9870634178866676e-05, "loss": 0.957, "step": 3020 }, { "epoch": 0.1, "learning_rate": 1.986891881616592e-05, "loss": 0.976, "step": 3040 }, { "epoch": 0.1, "learning_rate": 1.9867192230598692e-05, "loss": 0.9866, "step": 3060 }, { "epoch": 0.1, "learning_rate": 1.986545442412846e-05, "loss": 1.0012, "step": 3080 }, { "epoch": 0.11, "learning_rate": 1.9863705398731444e-05, "loss": 0.9578, "step": 3100 }, { "epoch": 0.11, "learning_rate": 1.986194515639662e-05, "loss": 0.9816, "step": 3120 }, { "epoch": 0.11, "learning_rate": 1.986017369912573e-05, "loss": 0.9501, "step": 3140 }, { "epoch": 0.11, "learning_rate": 1.9858391028933258e-05, "loss": 0.9618, "step": 3160 }, { "epoch": 0.11, "learning_rate": 1.9856597147846447e-05, "loss": 0.9178, "step": 3180 }, { "epoch": 0.11, "learning_rate": 1.985479205790528e-05, "loss": 0.9475, "step": 3200 }, { "epoch": 0.11, "learning_rate": 1.98529757611625e-05, "loss": 0.9608, "step": 3220 }, { "epoch": 0.11, "learning_rate": 1.9851148259683586e-05, "loss": 0.8593, "step": 3240 }, { "epoch": 0.11, "learning_rate": 1.984930955554676e-05, "loss": 0.9459, "step": 3260 }, { "epoch": 0.11, "learning_rate": 1.984745965084298e-05, "loss": 0.8713, "step": 3280 }, { "epoch": 0.11, "learning_rate": 1.9845598547675945e-05, "loss": 0.9075, "step": 3300 }, { "epoch": 0.11, "learning_rate": 1.9843726248162096e-05, "loss": 0.9818, "step": 3320 }, { "epoch": 0.11, "learning_rate": 1.984184275443059e-05, "loss": 0.9602, "step": 3340 }, { "epoch": 0.11, "learning_rate": 1.983994806862333e-05, "loss": 0.9412, "step": 3360 }, { "epoch": 0.11, "learning_rate": 1.9838042192894935e-05, "loss": 0.9837, "step": 3380 }, { "epoch": 0.12, "learning_rate": 1.9836125129412756e-05, "loss": 1.0087, "step": 3400 }, { "epoch": 0.12, "learning_rate": 1.9834196880356863e-05, "loss": 1.0303, "step": 3420 }, { "epoch": 0.12, "learning_rate": 1.983225744792005e-05, "loss": 0.9416, "step": 3440 }, { "epoch": 0.12, "learning_rate": 1.9830306834307828e-05, "loss": 0.9829, "step": 3460 }, { "epoch": 0.12, "learning_rate": 1.9828345041738413e-05, "loss": 0.9906, "step": 3480 }, { "epoch": 0.12, "learning_rate": 1.9826372072442747e-05, "loss": 0.9868, "step": 3500 }, { "epoch": 0.12, "learning_rate": 1.9824387928664478e-05, "loss": 0.9515, "step": 3520 }, { "epoch": 0.12, "learning_rate": 1.9822392612659964e-05, "loss": 0.9713, "step": 3540 }, { "epoch": 0.12, "learning_rate": 1.9820386126698256e-05, "loss": 0.8813, "step": 3560 }, { "epoch": 0.12, "learning_rate": 1.981836847306112e-05, "loss": 0.9842, "step": 3580 }, { "epoch": 0.12, "learning_rate": 1.981633965404302e-05, "loss": 0.9981, "step": 3600 }, { "epoch": 0.12, "learning_rate": 1.9814299671951113e-05, "loss": 0.9399, "step": 3620 }, { "epoch": 0.12, "learning_rate": 1.9812248529105254e-05, "loss": 0.8522, "step": 3640 }, { "epoch": 0.12, "learning_rate": 1.9810186227837986e-05, "loss": 0.9817, "step": 3660 }, { "epoch": 0.12, "learning_rate": 1.9808112770494546e-05, "loss": 0.9788, "step": 3680 }, { "epoch": 0.13, "learning_rate": 1.9806028159432854e-05, "loss": 0.9442, "step": 3700 }, { "epoch": 0.13, "learning_rate": 1.9803932397023512e-05, "loss": 1.0024, "step": 3720 }, { "epoch": 0.13, "learning_rate": 1.9801825485649816e-05, "loss": 1.0184, "step": 3740 }, { "epoch": 0.13, "learning_rate": 1.9799707427707723e-05, "loss": 0.99, "step": 3760 }, { "epoch": 0.13, "learning_rate": 1.9797578225605875e-05, "loss": 1.0291, "step": 3780 }, { "epoch": 0.13, "learning_rate": 1.979543788176559e-05, "loss": 0.9496, "step": 3800 }, { "epoch": 0.13, "learning_rate": 1.979328639862085e-05, "loss": 0.9149, "step": 3820 }, { "epoch": 0.13, "learning_rate": 1.9791123778618303e-05, "loss": 0.9756, "step": 3840 }, { "epoch": 0.13, "learning_rate": 1.978895002421727e-05, "loss": 0.9599, "step": 3860 }, { "epoch": 0.13, "learning_rate": 1.978676513788973e-05, "loss": 0.9328, "step": 3880 }, { "epoch": 0.13, "learning_rate": 1.978456912212032e-05, "loss": 0.9399, "step": 3900 }, { "epoch": 0.13, "learning_rate": 1.9782361979406334e-05, "loss": 0.9641, "step": 3920 }, { "epoch": 0.13, "learning_rate": 1.9780143712257715e-05, "loss": 0.9505, "step": 3940 }, { "epoch": 0.13, "learning_rate": 1.9777914323197063e-05, "loss": 1.0097, "step": 3960 }, { "epoch": 0.14, "learning_rate": 1.977567381475963e-05, "loss": 0.9627, "step": 3980 }, { "epoch": 0.14, "learning_rate": 1.97734221894933e-05, "loss": 0.9573, "step": 4000 }, { "epoch": 0.14, "learning_rate": 1.9771159449958612e-05, "loss": 0.974, "step": 4020 }, { "epoch": 0.14, "learning_rate": 1.9768885598728734e-05, "loss": 0.9207, "step": 4040 }, { "epoch": 0.14, "learning_rate": 1.976660063838947e-05, "loss": 0.9765, "step": 4060 }, { "epoch": 0.14, "learning_rate": 1.9764304571539266e-05, "loss": 0.9475, "step": 4080 }, { "epoch": 0.14, "learning_rate": 1.9761997400789194e-05, "loss": 0.9764, "step": 4100 }, { "epoch": 0.14, "learning_rate": 1.9759679128762946e-05, "loss": 0.9277, "step": 4120 }, { "epoch": 0.14, "learning_rate": 1.9757349758096855e-05, "loss": 0.9738, "step": 4140 }, { "epoch": 0.14, "learning_rate": 1.975500929143986e-05, "loss": 0.9991, "step": 4160 }, { "epoch": 0.14, "learning_rate": 1.9752657731453523e-05, "loss": 0.9421, "step": 4180 }, { "epoch": 0.14, "learning_rate": 1.9750295080812025e-05, "loss": 0.972, "step": 4200 }, { "epoch": 0.14, "learning_rate": 1.974792134220215e-05, "loss": 0.967, "step": 4220 }, { "epoch": 0.14, "learning_rate": 1.9745536518323304e-05, "loss": 0.9693, "step": 4240 }, { "epoch": 0.14, "learning_rate": 1.9743140611887487e-05, "loss": 0.9242, "step": 4260 }, { "epoch": 0.15, "learning_rate": 1.974073362561931e-05, "loss": 1.0251, "step": 4280 }, { "epoch": 0.15, "learning_rate": 1.9738315562255983e-05, "loss": 1.0025, "step": 4300 }, { "epoch": 0.15, "learning_rate": 1.9735886424547306e-05, "loss": 1.0041, "step": 4320 }, { "epoch": 0.15, "learning_rate": 1.973344621525568e-05, "loss": 0.8858, "step": 4340 }, { "epoch": 0.15, "learning_rate": 1.97309949371561e-05, "loss": 1.0171, "step": 4360 }, { "epoch": 0.15, "learning_rate": 1.9728532593036132e-05, "loss": 0.9639, "step": 4380 }, { "epoch": 0.15, "learning_rate": 1.972605918569594e-05, "loss": 0.9343, "step": 4400 }, { "epoch": 0.15, "learning_rate": 1.972357471794827e-05, "loss": 0.8996, "step": 4420 }, { "epoch": 0.15, "learning_rate": 1.972107919261844e-05, "loss": 0.9477, "step": 4440 }, { "epoch": 0.15, "learning_rate": 1.9718572612544345e-05, "loss": 1.0198, "step": 4460 }, { "epoch": 0.15, "learning_rate": 1.9716054980576446e-05, "loss": 0.8851, "step": 4480 }, { "epoch": 0.15, "learning_rate": 1.971352629957778e-05, "loss": 0.9439, "step": 4500 }, { "epoch": 0.15, "learning_rate": 1.9710986572423947e-05, "loss": 0.9632, "step": 4520 }, { "epoch": 0.15, "learning_rate": 1.9708435802003105e-05, "loss": 1.0028, "step": 4540 }, { "epoch": 0.15, "learning_rate": 1.9705873991215973e-05, "loss": 0.9911, "step": 4560 }, { "epoch": 0.16, "learning_rate": 1.970330114297583e-05, "loss": 0.9854, "step": 4580 }, { "epoch": 0.16, "learning_rate": 1.970071726020849e-05, "loss": 0.964, "step": 4600 }, { "epoch": 0.16, "learning_rate": 1.969812234585233e-05, "loss": 0.9546, "step": 4620 }, { "epoch": 0.16, "learning_rate": 1.9695516402858277e-05, "loss": 0.8992, "step": 4640 }, { "epoch": 0.16, "learning_rate": 1.9692899434189786e-05, "loss": 0.9148, "step": 4660 }, { "epoch": 0.16, "learning_rate": 1.969027144282285e-05, "loss": 0.9543, "step": 4680 }, { "epoch": 0.16, "learning_rate": 1.9687632431746014e-05, "loss": 1.0007, "step": 4700 }, { "epoch": 0.16, "learning_rate": 1.9684982403960332e-05, "loss": 0.9029, "step": 4720 }, { "epoch": 0.16, "learning_rate": 1.968232136247941e-05, "loss": 0.9436, "step": 4740 }, { "epoch": 0.16, "learning_rate": 1.9679649310329353e-05, "loss": 0.9336, "step": 4760 }, { "epoch": 0.16, "learning_rate": 1.9676966250548807e-05, "loss": 0.9395, "step": 4780 }, { "epoch": 0.16, "learning_rate": 1.967427218618893e-05, "loss": 0.9229, "step": 4800 }, { "epoch": 0.16, "learning_rate": 1.967156712031339e-05, "loss": 0.9191, "step": 4820 }, { "epoch": 0.16, "learning_rate": 1.966885105599837e-05, "loss": 0.913, "step": 4840 }, { "epoch": 0.16, "learning_rate": 1.9666123996332564e-05, "loss": 0.9655, "step": 4860 }, { "epoch": 0.17, "learning_rate": 1.966338594441716e-05, "loss": 1.0067, "step": 4880 }, { "epoch": 0.17, "learning_rate": 1.9660636903365854e-05, "loss": 0.9048, "step": 4900 }, { "epoch": 0.17, "learning_rate": 1.9657876876304835e-05, "loss": 0.9761, "step": 4920 }, { "epoch": 0.17, "learning_rate": 1.965510586637279e-05, "loss": 0.8365, "step": 4940 }, { "epoch": 0.17, "learning_rate": 1.965232387672089e-05, "loss": 0.9765, "step": 4960 }, { "epoch": 0.17, "learning_rate": 1.9649530910512796e-05, "loss": 0.864, "step": 4980 }, { "epoch": 0.17, "learning_rate": 1.9646726970924645e-05, "loss": 0.9732, "step": 5000 }, { "epoch": 0.17, "learning_rate": 1.9643912061145065e-05, "loss": 0.8954, "step": 5020 }, { "epoch": 0.17, "learning_rate": 1.9641086184375148e-05, "loss": 0.956, "step": 5040 }, { "epoch": 0.17, "learning_rate": 1.9638249343828463e-05, "loss": 0.9609, "step": 5060 }, { "epoch": 0.17, "learning_rate": 1.9635401542731043e-05, "loss": 1.0167, "step": 5080 }, { "epoch": 0.17, "learning_rate": 1.963254278432139e-05, "loss": 0.9232, "step": 5100 }, { "epoch": 0.17, "learning_rate": 1.9629673071850463e-05, "loss": 0.9191, "step": 5120 }, { "epoch": 0.17, "learning_rate": 1.962679240858168e-05, "loss": 0.8266, "step": 5140 }, { "epoch": 0.18, "learning_rate": 1.9623900797790914e-05, "loss": 0.9257, "step": 5160 }, { "epoch": 0.18, "learning_rate": 1.9620998242766482e-05, "loss": 0.8912, "step": 5180 }, { "epoch": 0.18, "learning_rate": 1.9618084746809147e-05, "loss": 0.8733, "step": 5200 }, { "epoch": 0.18, "learning_rate": 1.9615160313232122e-05, "loss": 0.9486, "step": 5220 }, { "epoch": 0.18, "learning_rate": 1.961222494536105e-05, "loss": 0.9723, "step": 5240 }, { "epoch": 0.18, "learning_rate": 1.960927864653401e-05, "loss": 1.0091, "step": 5260 }, { "epoch": 0.18, "learning_rate": 1.9606321420101514e-05, "loss": 0.9226, "step": 5280 }, { "epoch": 0.18, "learning_rate": 1.96033532694265e-05, "loss": 0.9532, "step": 5300 }, { "epoch": 0.18, "learning_rate": 1.960037419788433e-05, "loss": 0.9176, "step": 5320 }, { "epoch": 0.18, "learning_rate": 1.9597384208862776e-05, "loss": 0.9545, "step": 5340 }, { "epoch": 0.18, "learning_rate": 1.9594383305762045e-05, "loss": 0.9162, "step": 5360 }, { "epoch": 0.18, "learning_rate": 1.9591371491994735e-05, "loss": 0.9597, "step": 5380 }, { "epoch": 0.18, "learning_rate": 1.958834877098586e-05, "loss": 0.9209, "step": 5400 }, { "epoch": 0.18, "learning_rate": 1.9585315146172843e-05, "loss": 0.9597, "step": 5420 }, { "epoch": 0.18, "learning_rate": 1.9582270621005494e-05, "loss": 0.9402, "step": 5440 }, { "epoch": 0.19, "learning_rate": 1.9579215198946028e-05, "loss": 0.8907, "step": 5460 }, { "epoch": 0.19, "learning_rate": 1.9576148883469054e-05, "loss": 0.9002, "step": 5480 }, { "epoch": 0.19, "learning_rate": 1.9573071678061565e-05, "loss": 0.9352, "step": 5500 }, { "epoch": 0.19, "learning_rate": 1.956998358622293e-05, "loss": 0.9039, "step": 5520 }, { "epoch": 0.19, "learning_rate": 1.9566884611464916e-05, "loss": 0.926, "step": 5540 }, { "epoch": 0.19, "learning_rate": 1.9563774757311646e-05, "loss": 0.9952, "step": 5560 }, { "epoch": 0.19, "learning_rate": 1.956065402729963e-05, "loss": 0.8673, "step": 5580 }, { "epoch": 0.19, "learning_rate": 1.9557522424977745e-05, "loss": 0.9081, "step": 5600 }, { "epoch": 0.19, "learning_rate": 1.9554379953907217e-05, "loss": 0.9287, "step": 5620 }, { "epoch": 0.19, "learning_rate": 1.955122661766165e-05, "loss": 0.9262, "step": 5640 }, { "epoch": 0.19, "learning_rate": 1.954806241982699e-05, "loss": 0.937, "step": 5660 }, { "epoch": 0.19, "learning_rate": 1.954488736400155e-05, "loss": 0.8884, "step": 5680 }, { "epoch": 0.19, "learning_rate": 1.9541701453795976e-05, "loss": 0.9416, "step": 5700 }, { "epoch": 0.19, "learning_rate": 1.9538504692833263e-05, "loss": 0.9081, "step": 5720 }, { "epoch": 0.19, "learning_rate": 1.9535297084748747e-05, "loss": 0.8875, "step": 5740 }, { "epoch": 0.2, "learning_rate": 1.9532078633190097e-05, "loss": 0.9085, "step": 5760 }, { "epoch": 0.2, "learning_rate": 1.9528849341817316e-05, "loss": 0.9679, "step": 5780 }, { "epoch": 0.2, "learning_rate": 1.9525609214302727e-05, "loss": 0.9845, "step": 5800 }, { "epoch": 0.2, "learning_rate": 1.9522358254330987e-05, "loss": 0.9942, "step": 5820 }, { "epoch": 0.2, "learning_rate": 1.951909646559906e-05, "loss": 0.9219, "step": 5840 }, { "epoch": 0.2, "learning_rate": 1.951582385181623e-05, "loss": 0.9227, "step": 5860 }, { "epoch": 0.2, "learning_rate": 1.9512540416704095e-05, "loss": 0.9447, "step": 5880 }, { "epoch": 0.2, "learning_rate": 1.950924616399655e-05, "loss": 0.9101, "step": 5900 }, { "epoch": 0.2, "learning_rate": 1.9505941097439805e-05, "loss": 0.9938, "step": 5920 }, { "epoch": 0.2, "learning_rate": 1.9502625220792345e-05, "loss": 0.9749, "step": 5940 }, { "epoch": 0.2, "learning_rate": 1.9499298537824972e-05, "loss": 0.9585, "step": 5960 }, { "epoch": 0.2, "learning_rate": 1.949596105232077e-05, "loss": 0.9813, "step": 5980 }, { "epoch": 0.2, "learning_rate": 1.9492612768075094e-05, "loss": 0.9358, "step": 6000 }, { "epoch": 0.2, "learning_rate": 1.9489253688895594e-05, "loss": 0.8976, "step": 6020 }, { "epoch": 0.21, "learning_rate": 1.9485883818602194e-05, "loss": 0.9685, "step": 6040 }, { "epoch": 0.21, "learning_rate": 1.948250316102709e-05, "loss": 0.9148, "step": 6060 }, { "epoch": 0.21, "learning_rate": 1.9479111720014737e-05, "loss": 0.8979, "step": 6080 }, { "epoch": 0.21, "learning_rate": 1.947570949942186e-05, "loss": 0.914, "step": 6100 }, { "epoch": 0.21, "learning_rate": 1.9472296503117437e-05, "loss": 0.9587, "step": 6120 }, { "epoch": 0.21, "learning_rate": 1.9468872734982705e-05, "loss": 0.9355, "step": 6140 }, { "epoch": 0.21, "learning_rate": 1.9465438198911158e-05, "loss": 0.925, "step": 6160 }, { "epoch": 0.21, "learning_rate": 1.946199289880852e-05, "loss": 0.8997, "step": 6180 }, { "epoch": 0.21, "learning_rate": 1.9458536838592764e-05, "loss": 0.9549, "step": 6200 }, { "epoch": 0.21, "learning_rate": 1.9455070022194094e-05, "loss": 0.8995, "step": 6220 }, { "epoch": 0.21, "learning_rate": 1.9451592453554956e-05, "loss": 0.967, "step": 6240 }, { "epoch": 0.21, "learning_rate": 1.944810413663001e-05, "loss": 0.945, "step": 6260 }, { "epoch": 0.21, "learning_rate": 1.9444605075386156e-05, "loss": 0.9658, "step": 6280 }, { "epoch": 0.21, "learning_rate": 1.944109527380249e-05, "loss": 0.9514, "step": 6300 }, { "epoch": 0.21, "learning_rate": 1.9437574735870346e-05, "loss": 0.9264, "step": 6320 }, { "epoch": 0.22, "learning_rate": 1.9434043465593247e-05, "loss": 0.9749, "step": 6340 }, { "epoch": 0.22, "learning_rate": 1.9430501466986932e-05, "loss": 0.9507, "step": 6360 }, { "epoch": 0.22, "learning_rate": 1.9426948744079338e-05, "loss": 0.9251, "step": 6380 }, { "epoch": 0.22, "learning_rate": 1.94233853009106e-05, "loss": 0.9631, "step": 6400 }, { "epoch": 0.22, "learning_rate": 1.9419811141533032e-05, "loss": 0.9639, "step": 6420 }, { "epoch": 0.22, "learning_rate": 1.9416226270011152e-05, "loss": 0.8744, "step": 6440 }, { "epoch": 0.22, "learning_rate": 1.9412630690421648e-05, "loss": 0.8919, "step": 6460 }, { "epoch": 0.22, "learning_rate": 1.940902440685339e-05, "loss": 0.9569, "step": 6480 }, { "epoch": 0.22, "learning_rate": 1.9405407423407414e-05, "loss": 0.9714, "step": 6500 }, { "epoch": 0.22, "learning_rate": 1.9401779744196934e-05, "loss": 0.904, "step": 6520 }, { "epoch": 0.22, "learning_rate": 1.9398141373347317e-05, "loss": 0.885, "step": 6540 }, { "epoch": 0.22, "learning_rate": 1.93944923149961e-05, "loss": 0.9219, "step": 6560 }, { "epoch": 0.22, "learning_rate": 1.9390832573292964e-05, "loss": 0.9908, "step": 6580 }, { "epoch": 0.22, "learning_rate": 1.938716215239974e-05, "loss": 0.8588, "step": 6600 }, { "epoch": 0.22, "learning_rate": 1.9383481056490402e-05, "loss": 0.8831, "step": 6620 }, { "epoch": 0.23, "learning_rate": 1.9379789289751073e-05, "loss": 0.9486, "step": 6640 }, { "epoch": 0.23, "learning_rate": 1.9376086856380007e-05, "loss": 0.9116, "step": 6660 }, { "epoch": 0.23, "learning_rate": 1.937237376058758e-05, "loss": 0.941, "step": 6680 }, { "epoch": 0.23, "learning_rate": 1.93686500065963e-05, "loss": 0.9913, "step": 6700 }, { "epoch": 0.23, "learning_rate": 1.9364915598640796e-05, "loss": 0.9316, "step": 6720 }, { "epoch": 0.23, "learning_rate": 1.936117054096781e-05, "loss": 0.8926, "step": 6740 }, { "epoch": 0.23, "learning_rate": 1.9357414837836192e-05, "loss": 0.8829, "step": 6760 }, { "epoch": 0.23, "learning_rate": 1.935364849351691e-05, "loss": 0.9302, "step": 6780 }, { "epoch": 0.23, "learning_rate": 1.934987151229302e-05, "loss": 0.8974, "step": 6800 }, { "epoch": 0.23, "learning_rate": 1.934608389845968e-05, "loss": 0.9036, "step": 6820 }, { "epoch": 0.23, "learning_rate": 1.9342285656324135e-05, "loss": 0.9103, "step": 6840 }, { "epoch": 0.23, "learning_rate": 1.9338476790205726e-05, "loss": 0.9448, "step": 6860 }, { "epoch": 0.23, "learning_rate": 1.933465730443586e-05, "loss": 0.9723, "step": 6880 }, { "epoch": 0.23, "learning_rate": 1.9330827203358044e-05, "loss": 0.9088, "step": 6900 }, { "epoch": 0.23, "learning_rate": 1.932698649132783e-05, "loss": 0.9263, "step": 6920 }, { "epoch": 0.24, "learning_rate": 1.932313517271285e-05, "loss": 0.9501, "step": 6940 }, { "epoch": 0.24, "learning_rate": 1.9319273251892805e-05, "loss": 0.9005, "step": 6960 }, { "epoch": 0.24, "learning_rate": 1.931540073325944e-05, "loss": 0.9459, "step": 6980 }, { "epoch": 0.24, "learning_rate": 1.9311517621216555e-05, "loss": 0.9368, "step": 7000 }, { "epoch": 0.24, "learning_rate": 1.930762392018e-05, "loss": 0.9028, "step": 7020 }, { "epoch": 0.24, "learning_rate": 1.9303719634577668e-05, "loss": 0.9928, "step": 7040 }, { "epoch": 0.24, "learning_rate": 1.929980476884948e-05, "loss": 0.9081, "step": 7060 }, { "epoch": 0.24, "learning_rate": 1.92958793274474e-05, "loss": 0.9539, "step": 7080 }, { "epoch": 0.24, "learning_rate": 1.9291943314835406e-05, "loss": 0.9098, "step": 7100 }, { "epoch": 0.24, "learning_rate": 1.928799673548951e-05, "loss": 0.9151, "step": 7120 }, { "epoch": 0.24, "learning_rate": 1.9284039593897734e-05, "loss": 0.9074, "step": 7140 }, { "epoch": 0.24, "learning_rate": 1.928007189456011e-05, "loss": 0.8805, "step": 7160 }, { "epoch": 0.24, "learning_rate": 1.9276093641988684e-05, "loss": 0.9018, "step": 7180 }, { "epoch": 0.24, "learning_rate": 1.9272104840707487e-05, "loss": 0.9576, "step": 7200 }, { "epoch": 0.25, "learning_rate": 1.9268105495252568e-05, "loss": 0.9561, "step": 7220 }, { "epoch": 0.25, "learning_rate": 1.9264095610171944e-05, "loss": 0.8706, "step": 7240 }, { "epoch": 0.25, "learning_rate": 1.926007519002564e-05, "loss": 0.9495, "step": 7260 }, { "epoch": 0.25, "learning_rate": 1.925604423938564e-05, "loss": 0.9262, "step": 7280 }, { "epoch": 0.25, "learning_rate": 1.9252002762835917e-05, "loss": 0.9658, "step": 7300 }, { "epoch": 0.25, "learning_rate": 1.924795076497241e-05, "loss": 0.9591, "step": 7320 }, { "epoch": 0.25, "learning_rate": 1.924388825040303e-05, "loss": 0.9324, "step": 7340 }, { "epoch": 0.25, "learning_rate": 1.9239815223747632e-05, "loss": 0.8588, "step": 7360 }, { "epoch": 0.25, "learning_rate": 1.9235731689638035e-05, "loss": 0.8995, "step": 7380 }, { "epoch": 0.25, "learning_rate": 1.9231637652718004e-05, "loss": 0.8604, "step": 7400 }, { "epoch": 0.25, "learning_rate": 1.9227533117643256e-05, "loss": 0.918, "step": 7420 }, { "epoch": 0.25, "learning_rate": 1.922341808908144e-05, "loss": 0.9008, "step": 7440 }, { "epoch": 0.25, "learning_rate": 1.921929257171213e-05, "loss": 0.9243, "step": 7460 }, { "epoch": 0.25, "learning_rate": 1.9215156570226844e-05, "loss": 0.9138, "step": 7480 }, { "epoch": 0.25, "learning_rate": 1.921101008932901e-05, "loss": 0.9794, "step": 7500 }, { "epoch": 0.26, "learning_rate": 1.920685313373398e-05, "loss": 0.8578, "step": 7520 }, { "epoch": 0.26, "learning_rate": 1.920268570816901e-05, "loss": 0.9052, "step": 7540 }, { "epoch": 0.26, "learning_rate": 1.9198507817373274e-05, "loss": 0.9486, "step": 7560 }, { "epoch": 0.26, "learning_rate": 1.919431946609784e-05, "loss": 0.9456, "step": 7580 }, { "epoch": 0.26, "learning_rate": 1.9190120659105663e-05, "loss": 0.9079, "step": 7600 }, { "epoch": 0.26, "learning_rate": 1.9185911401171615e-05, "loss": 0.9492, "step": 7620 }, { "epoch": 0.26, "learning_rate": 1.918169169708242e-05, "loss": 0.9227, "step": 7640 }, { "epoch": 0.26, "learning_rate": 1.91774615516367e-05, "loss": 0.9094, "step": 7660 }, { "epoch": 0.26, "learning_rate": 1.9173220969644947e-05, "loss": 0.8853, "step": 7680 }, { "epoch": 0.26, "learning_rate": 1.916896995592953e-05, "loss": 0.9563, "step": 7700 }, { "epoch": 0.26, "learning_rate": 1.916470851532466e-05, "loss": 0.9817, "step": 7720 }, { "epoch": 0.26, "learning_rate": 1.9160436652676425e-05, "loss": 1.0074, "step": 7740 }, { "epoch": 0.26, "learning_rate": 1.9156154372842757e-05, "loss": 0.9542, "step": 7760 }, { "epoch": 0.26, "learning_rate": 1.9151861680693432e-05, "loss": 0.8859, "step": 7780 }, { "epoch": 0.26, "learning_rate": 1.914755858111008e-05, "loss": 0.9705, "step": 7800 }, { "epoch": 0.27, "learning_rate": 1.914324507898614e-05, "loss": 0.8557, "step": 7820 }, { "epoch": 0.27, "learning_rate": 1.9138921179226908e-05, "loss": 0.8834, "step": 7840 }, { "epoch": 0.27, "learning_rate": 1.9134586886749492e-05, "loss": 0.9064, "step": 7860 }, { "epoch": 0.27, "learning_rate": 1.9130242206482817e-05, "loss": 0.8905, "step": 7880 }, { "epoch": 0.27, "learning_rate": 1.912588714336762e-05, "loss": 0.88, "step": 7900 }, { "epoch": 0.27, "learning_rate": 1.912152170235646e-05, "loss": 0.8668, "step": 7920 }, { "epoch": 0.27, "learning_rate": 1.9117145888413678e-05, "loss": 0.9504, "step": 7940 }, { "epoch": 0.27, "learning_rate": 1.911275970651542e-05, "loss": 0.9257, "step": 7960 }, { "epoch": 0.27, "learning_rate": 1.910836316164962e-05, "loss": 0.9762, "step": 7980 }, { "epoch": 0.27, "learning_rate": 1.9103956258816008e-05, "loss": 0.9331, "step": 8000 }, { "epoch": 0.27, "learning_rate": 1.909953900302607e-05, "loss": 0.8791, "step": 8020 }, { "epoch": 0.27, "learning_rate": 1.909511139930309e-05, "loss": 0.9448, "step": 8040 }, { "epoch": 0.27, "learning_rate": 1.9090673452682108e-05, "loss": 0.8983, "step": 8060 }, { "epoch": 0.27, "learning_rate": 1.9086225168209917e-05, "loss": 0.926, "step": 8080 }, { "epoch": 0.27, "learning_rate": 1.9081766550945084e-05, "loss": 0.9388, "step": 8100 }, { "epoch": 0.28, "learning_rate": 1.907729760595791e-05, "loss": 0.9377, "step": 8120 }, { "epoch": 0.28, "learning_rate": 1.9072818338330455e-05, "loss": 0.9594, "step": 8140 }, { "epoch": 0.28, "learning_rate": 1.9068328753156514e-05, "loss": 0.8816, "step": 8160 }, { "epoch": 0.28, "learning_rate": 1.9063828855541605e-05, "loss": 0.9118, "step": 8180 }, { "epoch": 0.28, "learning_rate": 1.9059318650602986e-05, "loss": 0.9076, "step": 8200 }, { "epoch": 0.28, "learning_rate": 1.905479814346962e-05, "loss": 0.8466, "step": 8220 }, { "epoch": 0.28, "learning_rate": 1.9050267339282214e-05, "loss": 0.9428, "step": 8240 }, { "epoch": 0.28, "learning_rate": 1.9045726243193155e-05, "loss": 0.9305, "step": 8260 }, { "epoch": 0.28, "learning_rate": 1.904117486036655e-05, "loss": 0.8242, "step": 8280 }, { "epoch": 0.28, "learning_rate": 1.9036613195978198e-05, "loss": 0.902, "step": 8300 }, { "epoch": 0.28, "learning_rate": 1.9032041255215596e-05, "loss": 0.8798, "step": 8320 }, { "epoch": 0.28, "learning_rate": 1.9027459043277923e-05, "loss": 0.9021, "step": 8340 }, { "epoch": 0.28, "learning_rate": 1.902286656537603e-05, "loss": 0.8776, "step": 8360 }, { "epoch": 0.28, "learning_rate": 1.9018263826732467e-05, "loss": 0.9082, "step": 8380 }, { "epoch": 0.29, "learning_rate": 1.9013650832581424e-05, "loss": 0.8431, "step": 8400 }, { "epoch": 0.29, "learning_rate": 1.9009027588168773e-05, "loss": 0.8934, "step": 8420 }, { "epoch": 0.29, "learning_rate": 1.900439409875203e-05, "loss": 0.9072, "step": 8440 }, { "epoch": 0.29, "learning_rate": 1.899975036960037e-05, "loss": 0.9326, "step": 8460 }, { "epoch": 0.29, "learning_rate": 1.8995096405994614e-05, "loss": 0.9009, "step": 8480 }, { "epoch": 0.29, "learning_rate": 1.899043221322721e-05, "loss": 0.972, "step": 8500 }, { "epoch": 0.29, "learning_rate": 1.898575779660225e-05, "loss": 0.9269, "step": 8520 }, { "epoch": 0.29, "learning_rate": 1.8981073161435447e-05, "loss": 0.8519, "step": 8540 }, { "epoch": 0.29, "learning_rate": 1.897637831305414e-05, "loss": 0.8876, "step": 8560 }, { "epoch": 0.29, "learning_rate": 1.8971673256797275e-05, "loss": 0.8401, "step": 8580 }, { "epoch": 0.29, "learning_rate": 1.8966957998015403e-05, "loss": 0.9571, "step": 8600 }, { "epoch": 0.29, "learning_rate": 1.896223254207069e-05, "loss": 0.9073, "step": 8620 }, { "epoch": 0.29, "learning_rate": 1.89574968943369e-05, "loss": 0.8836, "step": 8640 }, { "epoch": 0.29, "learning_rate": 1.895275106019937e-05, "loss": 0.9202, "step": 8660 }, { "epoch": 0.29, "learning_rate": 1.8947995045055023e-05, "loss": 0.8841, "step": 8680 }, { "epoch": 0.3, "learning_rate": 1.894322885431238e-05, "loss": 0.8863, "step": 8700 }, { "epoch": 0.3, "learning_rate": 1.8938452493391515e-05, "loss": 1.0004, "step": 8720 }, { "epoch": 0.3, "learning_rate": 1.8933665967724072e-05, "loss": 0.8558, "step": 8740 }, { "epoch": 0.3, "learning_rate": 1.892886928275325e-05, "loss": 0.9001, "step": 8760 }, { "epoch": 0.3, "learning_rate": 1.892406244393382e-05, "loss": 0.9562, "step": 8780 }, { "epoch": 0.3, "learning_rate": 1.891924545673207e-05, "loss": 0.9127, "step": 8800 }, { "epoch": 0.3, "learning_rate": 1.891441832662586e-05, "loss": 0.929, "step": 8820 }, { "epoch": 0.3, "learning_rate": 1.8909581059104557e-05, "loss": 0.8909, "step": 8840 }, { "epoch": 0.3, "learning_rate": 1.8904733659669072e-05, "loss": 0.9258, "step": 8860 }, { "epoch": 0.3, "learning_rate": 1.8899876133831835e-05, "loss": 0.9803, "step": 8880 }, { "epoch": 0.3, "learning_rate": 1.8895008487116793e-05, "loss": 0.8716, "step": 8900 }, { "epoch": 0.3, "learning_rate": 1.8890130725059396e-05, "loss": 0.924, "step": 8920 }, { "epoch": 0.3, "learning_rate": 1.8885242853206606e-05, "loss": 0.8649, "step": 8940 }, { "epoch": 0.3, "learning_rate": 1.888034487711687e-05, "loss": 0.9427, "step": 8960 }, { "epoch": 0.3, "learning_rate": 1.8875436802360142e-05, "loss": 0.9313, "step": 8980 }, { "epoch": 0.31, "learning_rate": 1.8870518634517843e-05, "loss": 0.9407, "step": 9000 }, { "epoch": 0.31, "learning_rate": 1.8865590379182885e-05, "loss": 0.9581, "step": 9020 }, { "epoch": 0.31, "learning_rate": 1.8860652041959643e-05, "loss": 0.8846, "step": 9040 }, { "epoch": 0.31, "learning_rate": 1.8855703628463957e-05, "loss": 0.8592, "step": 9060 }, { "epoch": 0.31, "learning_rate": 1.8850745144323137e-05, "loss": 0.9586, "step": 9080 }, { "epoch": 0.31, "learning_rate": 1.884577659517593e-05, "loss": 0.9452, "step": 9100 }, { "epoch": 0.31, "learning_rate": 1.884079798667254e-05, "loss": 0.8987, "step": 9120 }, { "epoch": 0.31, "learning_rate": 1.88358093244746e-05, "loss": 0.925, "step": 9140 }, { "epoch": 0.31, "learning_rate": 1.883081061425519e-05, "loss": 0.8806, "step": 9160 }, { "epoch": 0.31, "learning_rate": 1.882580186169881e-05, "loss": 0.8575, "step": 9180 }, { "epoch": 0.31, "learning_rate": 1.882078307250137e-05, "loss": 0.8711, "step": 9200 }, { "epoch": 0.31, "learning_rate": 1.8815754252370205e-05, "loss": 0.9003, "step": 9220 }, { "epoch": 0.31, "learning_rate": 1.881071540702406e-05, "loss": 0.9115, "step": 9240 }, { "epoch": 0.31, "learning_rate": 1.8805666542193073e-05, "loss": 0.9562, "step": 9260 }, { "epoch": 0.32, "learning_rate": 1.8800607663618774e-05, "loss": 0.8856, "step": 9280 }, { "epoch": 0.32, "learning_rate": 1.879553877705409e-05, "loss": 0.8638, "step": 9300 }, { "epoch": 0.32, "learning_rate": 1.8790459888263323e-05, "loss": 0.882, "step": 9320 }, { "epoch": 0.32, "learning_rate": 1.8785371003022145e-05, "loss": 0.8989, "step": 9340 }, { "epoch": 0.32, "learning_rate": 1.8780272127117606e-05, "loss": 0.9034, "step": 9360 }, { "epoch": 0.32, "learning_rate": 1.8775163266348116e-05, "loss": 0.8835, "step": 9380 }, { "epoch": 0.32, "learning_rate": 1.877004442652343e-05, "loss": 0.816, "step": 9400 }, { "epoch": 0.32, "learning_rate": 1.8764915613464657e-05, "loss": 0.8363, "step": 9420 }, { "epoch": 0.32, "learning_rate": 1.8759776833004256e-05, "loss": 0.869, "step": 9440 }, { "epoch": 0.32, "learning_rate": 1.8754628090986e-05, "loss": 0.8829, "step": 9460 }, { "epoch": 0.32, "learning_rate": 1.8749469393265016e-05, "loss": 0.903, "step": 9480 }, { "epoch": 0.32, "learning_rate": 1.874430074570773e-05, "loss": 0.9455, "step": 9500 }, { "epoch": 0.32, "learning_rate": 1.87391221541919e-05, "loss": 0.9328, "step": 9520 }, { "epoch": 0.32, "learning_rate": 1.873393362460658e-05, "loss": 0.9131, "step": 9540 }, { "epoch": 0.32, "learning_rate": 1.8728735162852125e-05, "loss": 0.9119, "step": 9560 }, { "epoch": 0.33, "learning_rate": 1.87235267748402e-05, "loss": 0.8505, "step": 9580 }, { "epoch": 0.33, "learning_rate": 1.8718308466493745e-05, "loss": 0.8926, "step": 9600 }, { "epoch": 0.33, "learning_rate": 1.871308024374698e-05, "loss": 0.8904, "step": 9620 }, { "epoch": 0.33, "learning_rate": 1.8707842112545406e-05, "loss": 0.8506, "step": 9640 }, { "epoch": 0.33, "learning_rate": 1.870259407884579e-05, "loss": 0.903, "step": 9660 }, { "epoch": 0.33, "learning_rate": 1.8697336148616162e-05, "loss": 0.9225, "step": 9680 }, { "epoch": 0.33, "learning_rate": 1.86920683278358e-05, "loss": 0.8845, "step": 9700 }, { "epoch": 0.33, "learning_rate": 1.868679062249524e-05, "loss": 0.8764, "step": 9720 }, { "epoch": 0.33, "learning_rate": 1.8681503038596248e-05, "loss": 0.9878, "step": 9740 }, { "epoch": 0.33, "learning_rate": 1.8676205582151822e-05, "loss": 0.9531, "step": 9760 }, { "epoch": 0.33, "learning_rate": 1.8670898259186202e-05, "loss": 0.8972, "step": 9780 }, { "epoch": 0.33, "learning_rate": 1.8665581075734836e-05, "loss": 0.8453, "step": 9800 }, { "epoch": 0.33, "learning_rate": 1.866025403784439e-05, "loss": 0.9649, "step": 9820 }, { "epoch": 0.33, "learning_rate": 1.865491715157273e-05, "loss": 0.886, "step": 9840 }, { "epoch": 0.33, "learning_rate": 1.864957042298893e-05, "loss": 0.8603, "step": 9860 }, { "epoch": 0.34, "learning_rate": 1.864421385817325e-05, "loss": 0.9179, "step": 9880 }, { "epoch": 0.34, "learning_rate": 1.8638847463217145e-05, "loss": 0.8456, "step": 9900 }, { "epoch": 0.34, "learning_rate": 1.8633471244223232e-05, "loss": 0.8553, "step": 9920 }, { "epoch": 0.34, "learning_rate": 1.8628085207305325e-05, "loss": 0.8574, "step": 9940 }, { "epoch": 0.34, "learning_rate": 1.8622689358588373e-05, "loss": 0.8474, "step": 9960 }, { "epoch": 0.34, "learning_rate": 1.8617283704208507e-05, "loss": 0.905, "step": 9980 }, { "epoch": 0.34, "learning_rate": 1.8611868250312994e-05, "loss": 0.9268, "step": 10000 }, { "epoch": 0.34, "learning_rate": 1.860644300306026e-05, "loss": 0.8962, "step": 10020 }, { "epoch": 0.34, "learning_rate": 1.8601007968619847e-05, "loss": 0.9645, "step": 10040 }, { "epoch": 0.34, "learning_rate": 1.8595563153172448e-05, "loss": 0.9035, "step": 10060 }, { "epoch": 0.34, "learning_rate": 1.8590108562909862e-05, "loss": 0.9684, "step": 10080 }, { "epoch": 0.34, "learning_rate": 1.858464420403502e-05, "loss": 0.863, "step": 10100 }, { "epoch": 0.34, "learning_rate": 1.857917008276195e-05, "loss": 0.9421, "step": 10120 }, { "epoch": 0.34, "learning_rate": 1.857368620531578e-05, "loss": 0.8535, "step": 10140 }, { "epoch": 0.34, "learning_rate": 1.8568192577932746e-05, "loss": 0.8553, "step": 10160 }, { "epoch": 0.35, "learning_rate": 1.8562689206860155e-05, "loss": 0.8977, "step": 10180 }, { "epoch": 0.35, "learning_rate": 1.8557176098356406e-05, "loss": 0.9678, "step": 10200 }, { "epoch": 0.35, "learning_rate": 1.8551653258690972e-05, "loss": 0.8443, "step": 10220 }, { "epoch": 0.35, "learning_rate": 1.854612069414438e-05, "loss": 0.9013, "step": 10240 }, { "epoch": 0.35, "learning_rate": 1.8540578411008233e-05, "loss": 0.8929, "step": 10260 }, { "epoch": 0.35, "learning_rate": 1.8535026415585168e-05, "loss": 0.9134, "step": 10280 }, { "epoch": 0.35, "learning_rate": 1.8529464714188888e-05, "loss": 0.898, "step": 10300 }, { "epoch": 0.35, "learning_rate": 1.852389331314411e-05, "loss": 0.8557, "step": 10320 }, { "epoch": 0.35, "learning_rate": 1.85183122187866e-05, "loss": 0.9308, "step": 10340 }, { "epoch": 0.35, "learning_rate": 1.8512721437463136e-05, "loss": 0.8048, "step": 10360 }, { "epoch": 0.35, "learning_rate": 1.8507120975531517e-05, "loss": 0.8329, "step": 10380 }, { "epoch": 0.35, "learning_rate": 1.850151083936055e-05, "loss": 0.9244, "step": 10400 }, { "epoch": 0.35, "learning_rate": 1.849589103533004e-05, "loss": 0.8769, "step": 10420 }, { "epoch": 0.35, "learning_rate": 1.84902615698308e-05, "loss": 0.8796, "step": 10440 }, { "epoch": 0.36, "learning_rate": 1.8484622449264608e-05, "loss": 0.8567, "step": 10460 }, { "epoch": 0.36, "learning_rate": 1.8478973680044237e-05, "loss": 0.8471, "step": 10480 }, { "epoch": 0.36, "learning_rate": 1.8473315268593432e-05, "loss": 0.8955, "step": 10500 }, { "epoch": 0.36, "learning_rate": 1.846764722134689e-05, "loss": 0.8996, "step": 10520 }, { "epoch": 0.36, "learning_rate": 1.8461969544750286e-05, "loss": 0.8435, "step": 10540 }, { "epoch": 0.36, "learning_rate": 1.845628224526023e-05, "loss": 0.862, "step": 10560 }, { "epoch": 0.36, "learning_rate": 1.8450585329344277e-05, "loss": 0.8905, "step": 10580 }, { "epoch": 0.36, "learning_rate": 1.8444878803480925e-05, "loss": 0.9191, "step": 10600 }, { "epoch": 0.36, "learning_rate": 1.8439162674159595e-05, "loss": 0.9315, "step": 10620 }, { "epoch": 0.36, "learning_rate": 1.8433436947880626e-05, "loss": 0.8691, "step": 10640 }, { "epoch": 0.36, "learning_rate": 1.842770163115528e-05, "loss": 0.9158, "step": 10660 }, { "epoch": 0.36, "learning_rate": 1.842195673050572e-05, "loss": 0.9328, "step": 10680 }, { "epoch": 0.36, "learning_rate": 1.8416202252465e-05, "loss": 0.8776, "step": 10700 }, { "epoch": 0.36, "learning_rate": 1.841043820357708e-05, "loss": 0.8524, "step": 10720 }, { "epoch": 0.36, "learning_rate": 1.8404664590396796e-05, "loss": 0.8113, "step": 10740 }, { "epoch": 0.37, "learning_rate": 1.8398881419489854e-05, "loss": 0.8449, "step": 10760 }, { "epoch": 0.37, "learning_rate": 1.8393088697432847e-05, "loss": 0.8801, "step": 10780 }, { "epoch": 0.37, "learning_rate": 1.838728643081321e-05, "loss": 0.8911, "step": 10800 }, { "epoch": 0.37, "learning_rate": 1.8381474626229245e-05, "loss": 0.8365, "step": 10820 }, { "epoch": 0.37, "learning_rate": 1.8375653290290093e-05, "loss": 0.8991, "step": 10840 }, { "epoch": 0.37, "learning_rate": 1.836982242961574e-05, "loss": 0.8696, "step": 10860 }, { "epoch": 0.37, "learning_rate": 1.8363982050836996e-05, "loss": 0.8692, "step": 10880 }, { "epoch": 0.37, "learning_rate": 1.835813216059551e-05, "loss": 0.892, "step": 10900 }, { "epoch": 0.37, "learning_rate": 1.8352272765543723e-05, "loss": 0.8675, "step": 10920 }, { "epoch": 0.37, "learning_rate": 1.8346403872344907e-05, "loss": 0.8757, "step": 10940 }, { "epoch": 0.37, "learning_rate": 1.8340525487673128e-05, "loss": 0.942, "step": 10960 }, { "epoch": 0.37, "learning_rate": 1.8334637618213238e-05, "loss": 0.9497, "step": 10980 }, { "epoch": 0.37, "learning_rate": 1.8328740270660887e-05, "loss": 0.8416, "step": 11000 }, { "epoch": 0.37, "learning_rate": 1.83228334517225e-05, "loss": 0.8326, "step": 11020 }, { "epoch": 0.37, "learning_rate": 1.831691716811526e-05, "loss": 0.8522, "step": 11040 }, { "epoch": 0.38, "learning_rate": 1.8310991426567137e-05, "loss": 0.9059, "step": 11060 }, { "epoch": 0.38, "learning_rate": 1.8305056233816833e-05, "loss": 0.9037, "step": 11080 }, { "epoch": 0.38, "learning_rate": 1.8299111596613815e-05, "loss": 0.8691, "step": 11100 }, { "epoch": 0.38, "learning_rate": 1.8293157521718284e-05, "loss": 0.8107, "step": 11120 }, { "epoch": 0.38, "learning_rate": 1.8287194015901167e-05, "loss": 0.9504, "step": 11140 }, { "epoch": 0.38, "learning_rate": 1.8281221085944128e-05, "loss": 0.8615, "step": 11160 }, { "epoch": 0.38, "learning_rate": 1.827523873863954e-05, "loss": 0.8942, "step": 11180 }, { "epoch": 0.38, "learning_rate": 1.8269246980790482e-05, "loss": 0.927, "step": 11200 }, { "epoch": 0.38, "learning_rate": 1.8263245819210745e-05, "loss": 0.9224, "step": 11220 }, { "epoch": 0.38, "learning_rate": 1.825723526072481e-05, "loss": 0.9215, "step": 11240 }, { "epoch": 0.38, "learning_rate": 1.8251215312167833e-05, "loss": 0.8342, "step": 11260 }, { "epoch": 0.38, "learning_rate": 1.8245185980385673e-05, "loss": 0.8617, "step": 11280 }, { "epoch": 0.38, "learning_rate": 1.823914727223483e-05, "loss": 0.8649, "step": 11300 }, { "epoch": 0.38, "learning_rate": 1.823309919458249e-05, "loss": 0.9609, "step": 11320 }, { "epoch": 0.38, "learning_rate": 1.8227041754306484e-05, "loss": 0.9503, "step": 11340 }, { "epoch": 0.39, "learning_rate": 1.822097495829529e-05, "loss": 0.8748, "step": 11360 }, { "epoch": 0.39, "learning_rate": 1.821489881344803e-05, "loss": 0.9055, "step": 11380 }, { "epoch": 0.39, "learning_rate": 1.8208813326674444e-05, "loss": 0.8596, "step": 11400 }, { "epoch": 0.39, "learning_rate": 1.8202718504894917e-05, "loss": 0.9291, "step": 11420 }, { "epoch": 0.39, "learning_rate": 1.8196614355040436e-05, "loss": 0.9413, "step": 11440 }, { "epoch": 0.39, "learning_rate": 1.819050088405259e-05, "loss": 0.9221, "step": 11460 }, { "epoch": 0.39, "learning_rate": 1.818437809888359e-05, "loss": 0.9094, "step": 11480 }, { "epoch": 0.39, "learning_rate": 1.817824600649621e-05, "loss": 0.821, "step": 11500 }, { "epoch": 0.39, "learning_rate": 1.8172104613863838e-05, "loss": 0.9144, "step": 11520 }, { "epoch": 0.39, "learning_rate": 1.816595392797041e-05, "loss": 0.8497, "step": 11540 }, { "epoch": 0.39, "learning_rate": 1.8159793955810453e-05, "loss": 0.9188, "step": 11560 }, { "epoch": 0.39, "learning_rate": 1.815362470438904e-05, "loss": 0.8214, "step": 11580 }, { "epoch": 0.39, "learning_rate": 1.8147446180721802e-05, "loss": 0.9231, "step": 11600 }, { "epoch": 0.39, "learning_rate": 1.8141258391834914e-05, "loss": 0.8962, "step": 11620 }, { "epoch": 0.4, "learning_rate": 1.8135061344765088e-05, "loss": 0.8641, "step": 11640 }, { "epoch": 0.4, "learning_rate": 1.812885504655956e-05, "loss": 0.8497, "step": 11660 }, { "epoch": 0.4, "learning_rate": 1.8122639504276096e-05, "loss": 0.8791, "step": 11680 }, { "epoch": 0.4, "learning_rate": 1.811641472498296e-05, "loss": 0.8411, "step": 11700 }, { "epoch": 0.4, "learning_rate": 1.8110180715758934e-05, "loss": 0.9839, "step": 11720 }, { "epoch": 0.4, "learning_rate": 1.8103937483693288e-05, "loss": 0.9035, "step": 11740 }, { "epoch": 0.4, "learning_rate": 1.809768503588578e-05, "loss": 0.8555, "step": 11760 }, { "epoch": 0.4, "learning_rate": 1.8091423379446658e-05, "loss": 0.8599, "step": 11780 }, { "epoch": 0.4, "learning_rate": 1.8085152521496627e-05, "loss": 0.8166, "step": 11800 }, { "epoch": 0.4, "learning_rate": 1.8078872469166864e-05, "loss": 0.8676, "step": 11820 }, { "epoch": 0.4, "learning_rate": 1.8072583229599004e-05, "loss": 0.8732, "step": 11840 }, { "epoch": 0.4, "learning_rate": 1.8066284809945128e-05, "loss": 0.9253, "step": 11860 }, { "epoch": 0.4, "learning_rate": 1.8059977217367753e-05, "loss": 0.918, "step": 11880 }, { "epoch": 0.4, "learning_rate": 1.8053660459039833e-05, "loss": 0.8492, "step": 11900 }, { "epoch": 0.4, "learning_rate": 1.8047334542144738e-05, "loss": 0.8387, "step": 11920 }, { "epoch": 0.41, "learning_rate": 1.8040999473876258e-05, "loss": 0.8212, "step": 11940 }, { "epoch": 0.41, "learning_rate": 1.8034655261438588e-05, "loss": 0.8189, "step": 11960 }, { "epoch": 0.41, "learning_rate": 1.8028301912046325e-05, "loss": 0.8555, "step": 11980 }, { "epoch": 0.41, "learning_rate": 1.8021939432924456e-05, "loss": 0.8558, "step": 12000 }, { "epoch": 0.41, "learning_rate": 1.8015567831308344e-05, "loss": 0.935, "step": 12020 }, { "epoch": 0.41, "learning_rate": 1.800918711444373e-05, "loss": 0.8454, "step": 12040 }, { "epoch": 0.41, "learning_rate": 1.8002797289586726e-05, "loss": 0.8995, "step": 12060 }, { "epoch": 0.41, "learning_rate": 1.7996398364003797e-05, "loss": 0.8524, "step": 12080 }, { "epoch": 0.41, "learning_rate": 1.7989990344971747e-05, "loss": 0.8203, "step": 12100 }, { "epoch": 0.41, "learning_rate": 1.7983573239777746e-05, "loss": 0.8227, "step": 12120 }, { "epoch": 0.41, "learning_rate": 1.797714705571927e-05, "loss": 0.8572, "step": 12140 }, { "epoch": 0.41, "learning_rate": 1.7970711800104138e-05, "loss": 0.8715, "step": 12160 }, { "epoch": 0.41, "learning_rate": 1.7964267480250476e-05, "loss": 0.8814, "step": 12180 }, { "epoch": 0.41, "learning_rate": 1.7957814103486716e-05, "loss": 0.8674, "step": 12200 }, { "epoch": 0.41, "learning_rate": 1.7951351677151604e-05, "loss": 0.8933, "step": 12220 }, { "epoch": 0.42, "learning_rate": 1.7944880208594156e-05, "loss": 0.8717, "step": 12240 }, { "epoch": 0.42, "learning_rate": 1.7938399705173687e-05, "loss": 0.8722, "step": 12260 }, { "epoch": 0.42, "learning_rate": 1.7931910174259777e-05, "loss": 0.8855, "step": 12280 }, { "epoch": 0.42, "learning_rate": 1.792541162323228e-05, "loss": 0.8634, "step": 12300 }, { "epoch": 0.42, "learning_rate": 1.7918904059481303e-05, "loss": 0.9169, "step": 12320 }, { "epoch": 0.42, "learning_rate": 1.79123874904072e-05, "loss": 0.9152, "step": 12340 }, { "epoch": 0.42, "learning_rate": 1.7905861923420568e-05, "loss": 0.8336, "step": 12360 }, { "epoch": 0.42, "learning_rate": 1.7899327365942242e-05, "loss": 0.8865, "step": 12380 }, { "epoch": 0.42, "learning_rate": 1.7892783825403272e-05, "loss": 0.9224, "step": 12400 }, { "epoch": 0.42, "learning_rate": 1.788623130924493e-05, "loss": 0.899, "step": 12420 }, { "epoch": 0.42, "learning_rate": 1.7879669824918685e-05, "loss": 0.8966, "step": 12440 }, { "epoch": 0.42, "learning_rate": 1.7873099379886217e-05, "loss": 0.896, "step": 12460 }, { "epoch": 0.42, "learning_rate": 1.7866519981619396e-05, "loss": 0.9448, "step": 12480 }, { "epoch": 0.42, "learning_rate": 1.7859931637600256e-05, "loss": 0.8598, "step": 12500 }, { "epoch": 0.42, "learning_rate": 1.7853334355321025e-05, "loss": 0.8833, "step": 12520 }, { "epoch": 0.43, "learning_rate": 1.784672814228409e-05, "loss": 0.8542, "step": 12540 }, { "epoch": 0.43, "learning_rate": 1.784011300600198e-05, "loss": 0.8485, "step": 12560 }, { "epoch": 0.43, "learning_rate": 1.783348895399739e-05, "loss": 0.9221, "step": 12580 }, { "epoch": 0.43, "learning_rate": 1.7826855993803148e-05, "loss": 0.8561, "step": 12600 }, { "epoch": 0.43, "learning_rate": 1.7820214132962205e-05, "loss": 0.8166, "step": 12620 }, { "epoch": 0.43, "learning_rate": 1.7813563379027644e-05, "loss": 0.8632, "step": 12640 }, { "epoch": 0.43, "learning_rate": 1.7806903739562652e-05, "loss": 0.8326, "step": 12660 }, { "epoch": 0.43, "learning_rate": 1.7800235222140525e-05, "loss": 0.9188, "step": 12680 }, { "epoch": 0.43, "learning_rate": 1.7793557834344662e-05, "loss": 0.8959, "step": 12700 }, { "epoch": 0.43, "learning_rate": 1.7786871583768536e-05, "loss": 0.796, "step": 12720 }, { "epoch": 0.43, "learning_rate": 1.7780176478015706e-05, "loss": 0.7971, "step": 12740 }, { "epoch": 0.43, "learning_rate": 1.7773472524699803e-05, "loss": 0.9084, "step": 12760 }, { "epoch": 0.43, "learning_rate": 1.7766759731444512e-05, "loss": 0.8869, "step": 12780 }, { "epoch": 0.43, "learning_rate": 1.776003810588358e-05, "loss": 0.8769, "step": 12800 }, { "epoch": 0.44, "learning_rate": 1.7753307655660792e-05, "loss": 0.8849, "step": 12820 }, { "epoch": 0.44, "learning_rate": 1.7746568388429966e-05, "loss": 0.8451, "step": 12840 }, { "epoch": 0.44, "learning_rate": 1.7739820311854957e-05, "loss": 0.9419, "step": 12860 }, { "epoch": 0.44, "learning_rate": 1.7733063433609623e-05, "loss": 0.9293, "step": 12880 }, { "epoch": 0.44, "learning_rate": 1.7726297761377847e-05, "loss": 0.8162, "step": 12900 }, { "epoch": 0.44, "learning_rate": 1.7719523302853498e-05, "loss": 0.8825, "step": 12920 }, { "epoch": 0.44, "learning_rate": 1.771274006574045e-05, "loss": 0.8961, "step": 12940 }, { "epoch": 0.44, "learning_rate": 1.7705948057752547e-05, "loss": 0.9109, "step": 12960 }, { "epoch": 0.44, "learning_rate": 1.769914728661362e-05, "loss": 0.8448, "step": 12980 }, { "epoch": 0.44, "learning_rate": 1.7692337760057454e-05, "loss": 0.9286, "step": 13000 }, { "epoch": 0.44, "learning_rate": 1.76855194858278e-05, "loss": 0.9075, "step": 13020 }, { "epoch": 0.44, "learning_rate": 1.767869247167835e-05, "loss": 0.8712, "step": 13040 }, { "epoch": 0.44, "learning_rate": 1.7671856725372737e-05, "loss": 0.8144, "step": 13060 }, { "epoch": 0.44, "learning_rate": 1.7665012254684522e-05, "loss": 0.8475, "step": 13080 }, { "epoch": 0.44, "learning_rate": 1.76581590673972e-05, "loss": 0.9759, "step": 13100 }, { "epoch": 0.45, "learning_rate": 1.7651297171304157e-05, "loss": 0.821, "step": 13120 }, { "epoch": 0.45, "learning_rate": 1.7644426574208698e-05, "loss": 0.8912, "step": 13140 }, { "epoch": 0.45, "learning_rate": 1.763754728392402e-05, "loss": 0.9006, "step": 13160 }, { "epoch": 0.45, "learning_rate": 1.7630659308273204e-05, "loss": 0.87, "step": 13180 }, { "epoch": 0.45, "learning_rate": 1.7623762655089208e-05, "loss": 0.9281, "step": 13200 }, { "epoch": 0.45, "learning_rate": 1.761685733221486e-05, "loss": 0.8586, "step": 13220 }, { "epoch": 0.45, "learning_rate": 1.7609943347502846e-05, "loss": 0.8539, "step": 13240 }, { "epoch": 0.45, "learning_rate": 1.7603020708815695e-05, "loss": 0.8956, "step": 13260 }, { "epoch": 0.45, "learning_rate": 1.7596089424025794e-05, "loss": 0.9335, "step": 13280 }, { "epoch": 0.45, "learning_rate": 1.7589149501015345e-05, "loss": 0.8925, "step": 13300 }, { "epoch": 0.45, "learning_rate": 1.758220094767638e-05, "loss": 0.8832, "step": 13320 }, { "epoch": 0.45, "learning_rate": 1.757524377191075e-05, "loss": 0.8692, "step": 13340 }, { "epoch": 0.45, "learning_rate": 1.7568277981630107e-05, "loss": 0.9298, "step": 13360 }, { "epoch": 0.45, "learning_rate": 1.7561303584755903e-05, "loss": 0.9228, "step": 13380 }, { "epoch": 0.45, "learning_rate": 1.7554320589219368e-05, "loss": 0.8039, "step": 13400 }, { "epoch": 0.46, "learning_rate": 1.754732900296151e-05, "loss": 0.9091, "step": 13420 }, { "epoch": 0.46, "learning_rate": 1.754032883393313e-05, "loss": 0.9266, "step": 13440 }, { "epoch": 0.46, "learning_rate": 1.7533320090094763e-05, "loss": 0.8973, "step": 13460 }, { "epoch": 0.46, "learning_rate": 1.7526302779416703e-05, "loss": 0.8943, "step": 13480 }, { "epoch": 0.46, "learning_rate": 1.7519276909878988e-05, "loss": 0.9595, "step": 13500 }, { "epoch": 0.46, "learning_rate": 1.751224248947139e-05, "loss": 0.9303, "step": 13520 }, { "epoch": 0.46, "learning_rate": 1.7505199526193398e-05, "loss": 0.7938, "step": 13540 }, { "epoch": 0.46, "learning_rate": 1.749814802805423e-05, "loss": 0.8912, "step": 13560 }, { "epoch": 0.46, "learning_rate": 1.7491088003072798e-05, "loss": 0.908, "step": 13580 }, { "epoch": 0.46, "learning_rate": 1.748401945927771e-05, "loss": 0.9292, "step": 13600 }, { "epoch": 0.46, "learning_rate": 1.7476942404707267e-05, "loss": 0.904, "step": 13620 }, { "epoch": 0.46, "learning_rate": 1.746985684740945e-05, "loss": 0.8896, "step": 13640 }, { "epoch": 0.46, "learning_rate": 1.74627627954419e-05, "loss": 0.8471, "step": 13660 }, { "epoch": 0.46, "learning_rate": 1.745566025687193e-05, "loss": 0.8564, "step": 13680 }, { "epoch": 0.47, "learning_rate": 1.74485492397765e-05, "loss": 0.8961, "step": 13700 }, { "epoch": 0.47, "learning_rate": 1.74414297522422e-05, "loss": 0.8472, "step": 13720 }, { "epoch": 0.47, "learning_rate": 1.7434301802365264e-05, "loss": 0.8185, "step": 13740 }, { "epoch": 0.47, "learning_rate": 1.7427165398251557e-05, "loss": 0.8764, "step": 13760 }, { "epoch": 0.47, "learning_rate": 1.7420020548016538e-05, "loss": 0.912, "step": 13780 }, { "epoch": 0.47, "learning_rate": 1.7412867259785286e-05, "loss": 0.8544, "step": 13800 }, { "epoch": 0.47, "learning_rate": 1.7405705541692476e-05, "loss": 0.9203, "step": 13820 }, { "epoch": 0.47, "learning_rate": 1.7398535401882353e-05, "loss": 0.83, "step": 13840 }, { "epoch": 0.47, "learning_rate": 1.739135684850876e-05, "loss": 0.9542, "step": 13860 }, { "epoch": 0.47, "learning_rate": 1.7384169889735094e-05, "loss": 0.8845, "step": 13880 }, { "epoch": 0.47, "learning_rate": 1.7376974533734317e-05, "loss": 0.9061, "step": 13900 }, { "epoch": 0.47, "learning_rate": 1.7369770788688937e-05, "loss": 0.9571, "step": 13920 }, { "epoch": 0.47, "learning_rate": 1.7362558662791003e-05, "loss": 0.8479, "step": 13940 }, { "epoch": 0.47, "learning_rate": 1.7355338164242098e-05, "loss": 0.8484, "step": 13960 }, { "epoch": 0.47, "learning_rate": 1.734810930125332e-05, "loss": 0.862, "step": 13980 }, { "epoch": 0.48, "learning_rate": 1.734087208204528e-05, "loss": 0.8531, "step": 14000 }, { "epoch": 0.48, "learning_rate": 1.73336265148481e-05, "loss": 0.8025, "step": 14020 }, { "epoch": 0.48, "learning_rate": 1.7326372607901388e-05, "loss": 0.8492, "step": 14040 }, { "epoch": 0.48, "learning_rate": 1.7319110369454232e-05, "loss": 0.8061, "step": 14060 }, { "epoch": 0.48, "learning_rate": 1.7311839807765204e-05, "loss": 0.8537, "step": 14080 }, { "epoch": 0.48, "learning_rate": 1.7304560931102334e-05, "loss": 0.8739, "step": 14100 }, { "epoch": 0.48, "learning_rate": 1.729727374774311e-05, "loss": 0.8787, "step": 14120 }, { "epoch": 0.48, "learning_rate": 1.7289978265974466e-05, "loss": 0.8357, "step": 14140 }, { "epoch": 0.48, "learning_rate": 1.728267449409278e-05, "loss": 0.8385, "step": 14160 }, { "epoch": 0.48, "learning_rate": 1.7275362440403844e-05, "loss": 0.9139, "step": 14180 }, { "epoch": 0.48, "learning_rate": 1.7268042113222877e-05, "loss": 0.9294, "step": 14200 }, { "epoch": 0.48, "learning_rate": 1.726071352087451e-05, "loss": 0.9138, "step": 14220 }, { "epoch": 0.48, "learning_rate": 1.725337667169276e-05, "loss": 0.8542, "step": 14240 }, { "epoch": 0.48, "learning_rate": 1.7246031574021047e-05, "loss": 0.8184, "step": 14260 }, { "epoch": 0.48, "learning_rate": 1.723867823621216e-05, "loss": 0.8665, "step": 14280 }, { "epoch": 0.49, "learning_rate": 1.7231316666628268e-05, "loss": 0.8361, "step": 14300 }, { "epoch": 0.49, "learning_rate": 1.7223946873640897e-05, "loss": 0.8156, "step": 14320 }, { "epoch": 0.49, "learning_rate": 1.7216568865630924e-05, "loss": 0.8673, "step": 14340 }, { "epoch": 0.49, "learning_rate": 1.720918265098857e-05, "loss": 0.8501, "step": 14360 }, { "epoch": 0.49, "learning_rate": 1.720178823811338e-05, "loss": 0.8468, "step": 14380 }, { "epoch": 0.49, "learning_rate": 1.7194385635414242e-05, "loss": 0.8028, "step": 14400 }, { "epoch": 0.49, "learning_rate": 1.718697485130934e-05, "loss": 0.837, "step": 14420 }, { "epoch": 0.49, "learning_rate": 1.717955589422616e-05, "loss": 0.8196, "step": 14440 }, { "epoch": 0.49, "learning_rate": 1.7172128772601497e-05, "loss": 0.932, "step": 14460 }, { "epoch": 0.49, "learning_rate": 1.7164693494881418e-05, "loss": 0.85, "step": 14480 }, { "epoch": 0.49, "learning_rate": 1.715725006952127e-05, "loss": 0.8667, "step": 14500 }, { "epoch": 0.49, "learning_rate": 1.7149798504985665e-05, "loss": 0.8423, "step": 14520 }, { "epoch": 0.49, "learning_rate": 1.7142338809748475e-05, "loss": 0.9186, "step": 14540 }, { "epoch": 0.49, "learning_rate": 1.7134870992292808e-05, "loss": 0.8717, "step": 14560 }, { "epoch": 0.49, "learning_rate": 1.712739506111102e-05, "loss": 0.8056, "step": 14580 }, { "epoch": 0.5, "learning_rate": 1.7119911024704684e-05, "loss": 0.9074, "step": 14600 }, { "epoch": 0.5, "learning_rate": 1.7112418891584598e-05, "loss": 0.867, "step": 14620 }, { "epoch": 0.5, "learning_rate": 1.7104918670270763e-05, "loss": 0.8923, "step": 14640 }, { "epoch": 0.5, "learning_rate": 1.7097410369292376e-05, "loss": 0.915, "step": 14660 }, { "epoch": 0.5, "learning_rate": 1.708989399718783e-05, "loss": 0.9823, "step": 14680 }, { "epoch": 0.5, "learning_rate": 1.708236956250469e-05, "loss": 0.8399, "step": 14700 }, { "epoch": 0.5, "learning_rate": 1.707483707379969e-05, "loss": 0.8465, "step": 14720 }, { "epoch": 0.5, "learning_rate": 1.706729653963872e-05, "loss": 0.922, "step": 14740 }, { "epoch": 0.5, "learning_rate": 1.7059747968596836e-05, "loss": 0.8394, "step": 14760 }, { "epoch": 0.5, "learning_rate": 1.705219136925821e-05, "loss": 0.7721, "step": 14780 }, { "epoch": 0.5, "learning_rate": 1.704462675021616e-05, "loss": 0.7611, "step": 14800 }, { "epoch": 0.5, "learning_rate": 1.7037054120073115e-05, "loss": 0.8898, "step": 14820 }, { "epoch": 0.5, "learning_rate": 1.7029473487440617e-05, "loss": 0.8221, "step": 14840 }, { "epoch": 0.5, "learning_rate": 1.7021884860939314e-05, "loss": 0.9067, "step": 14860 }, { "epoch": 0.51, "learning_rate": 1.7014288249198937e-05, "loss": 0.8242, "step": 14880 }, { "epoch": 0.51, "learning_rate": 1.70066836608583e-05, "loss": 0.8687, "step": 14900 }, { "epoch": 0.51, "learning_rate": 1.699907110456529e-05, "loss": 0.917, "step": 14920 }, { "epoch": 0.51, "learning_rate": 1.6991450588976853e-05, "loss": 0.8522, "step": 14940 }, { "epoch": 0.51, "learning_rate": 1.698382212275899e-05, "loss": 0.8023, "step": 14960 }, { "epoch": 0.51, "learning_rate": 1.697618571458673e-05, "loss": 0.8668, "step": 14980 }, { "epoch": 0.51, "learning_rate": 1.6968541373144157e-05, "loss": 0.8549, "step": 15000 }, { "epoch": 0.51, "learning_rate": 1.696088910712435e-05, "loss": 0.851, "step": 15020 }, { "epoch": 0.51, "learning_rate": 1.695322892522943e-05, "loss": 0.9246, "step": 15040 }, { "epoch": 0.51, "learning_rate": 1.6945560836170485e-05, "loss": 0.9117, "step": 15060 }, { "epoch": 0.51, "learning_rate": 1.6937884848667625e-05, "loss": 0.8651, "step": 15080 }, { "epoch": 0.51, "learning_rate": 1.6930200971449923e-05, "loss": 0.8053, "step": 15100 }, { "epoch": 0.51, "learning_rate": 1.692250921325544e-05, "loss": 0.8642, "step": 15120 }, { "epoch": 0.51, "learning_rate": 1.691480958283118e-05, "loss": 0.7826, "step": 15140 }, { "epoch": 0.51, "learning_rate": 1.6907102088933118e-05, "loss": 0.8057, "step": 15160 }, { "epoch": 0.52, "learning_rate": 1.6899386740326164e-05, "loss": 0.8133, "step": 15180 }, { "epoch": 0.52, "learning_rate": 1.6891663545784146e-05, "loss": 0.8651, "step": 15200 }, { "epoch": 0.52, "learning_rate": 1.6883932514089842e-05, "loss": 0.8253, "step": 15220 }, { "epoch": 0.52, "learning_rate": 1.6876193654034922e-05, "loss": 0.9001, "step": 15240 }, { "epoch": 0.52, "learning_rate": 1.6868446974419962e-05, "loss": 0.8583, "step": 15260 }, { "epoch": 0.52, "learning_rate": 1.686069248405444e-05, "loss": 0.8354, "step": 15280 }, { "epoch": 0.52, "learning_rate": 1.68529301917567e-05, "loss": 0.8794, "step": 15300 }, { "epoch": 0.52, "learning_rate": 1.6845160106353975e-05, "loss": 0.8645, "step": 15320 }, { "epoch": 0.52, "learning_rate": 1.6837382236682347e-05, "loss": 0.8513, "step": 15340 }, { "epoch": 0.52, "learning_rate": 1.682959659158676e-05, "loss": 0.8559, "step": 15360 }, { "epoch": 0.52, "learning_rate": 1.6821803179920996e-05, "loss": 0.7707, "step": 15380 }, { "epoch": 0.52, "learning_rate": 1.681400201054766e-05, "loss": 0.7838, "step": 15400 }, { "epoch": 0.52, "learning_rate": 1.68061930923382e-05, "loss": 0.8378, "step": 15420 }, { "epoch": 0.52, "learning_rate": 1.6798376434172862e-05, "loss": 0.8646, "step": 15440 }, { "epoch": 0.52, "learning_rate": 1.6790552044940692e-05, "loss": 0.8471, "step": 15460 }, { "epoch": 0.53, "learning_rate": 1.678271993353953e-05, "loss": 0.8338, "step": 15480 }, { "epoch": 0.53, "learning_rate": 1.6774880108876005e-05, "loss": 0.8486, "step": 15500 }, { "epoch": 0.53, "learning_rate": 1.6767032579865503e-05, "loss": 0.8262, "step": 15520 }, { "epoch": 0.53, "learning_rate": 1.6759177355432186e-05, "loss": 0.8425, "step": 15540 }, { "epoch": 0.53, "learning_rate": 1.6751314444508967e-05, "loss": 0.8316, "step": 15560 }, { "epoch": 0.53, "learning_rate": 1.674344385603748e-05, "loss": 0.8687, "step": 15580 }, { "epoch": 0.53, "learning_rate": 1.6735565598968114e-05, "loss": 0.8748, "step": 15600 }, { "epoch": 0.53, "learning_rate": 1.672767968225997e-05, "loss": 0.8936, "step": 15620 }, { "epoch": 0.53, "learning_rate": 1.6719786114880846e-05, "loss": 0.8964, "step": 15640 }, { "epoch": 0.53, "learning_rate": 1.6711884905807264e-05, "loss": 0.8166, "step": 15660 }, { "epoch": 0.53, "learning_rate": 1.670397606402442e-05, "loss": 0.8327, "step": 15680 }, { "epoch": 0.53, "learning_rate": 1.66960595985262e-05, "loss": 0.8051, "step": 15700 }, { "epoch": 0.53, "learning_rate": 1.6688135518315144e-05, "loss": 0.8647, "step": 15720 }, { "epoch": 0.53, "learning_rate": 1.668020383240247e-05, "loss": 0.8963, "step": 15740 }, { "epoch": 0.53, "learning_rate": 1.6672264549808033e-05, "loss": 0.8542, "step": 15760 }, { "epoch": 0.54, "learning_rate": 1.6664317679560333e-05, "loss": 0.8557, "step": 15780 }, { "epoch": 0.54, "learning_rate": 1.665636323069649e-05, "loss": 0.8623, "step": 15800 }, { "epoch": 0.54, "learning_rate": 1.6648401212262253e-05, "loss": 0.8469, "step": 15820 }, { "epoch": 0.54, "learning_rate": 1.6640431633311975e-05, "loss": 0.8281, "step": 15840 }, { "epoch": 0.54, "learning_rate": 1.6632454502908605e-05, "loss": 0.9213, "step": 15860 }, { "epoch": 0.54, "learning_rate": 1.662446983012368e-05, "loss": 0.8172, "step": 15880 }, { "epoch": 0.54, "learning_rate": 1.6616477624037317e-05, "loss": 0.9342, "step": 15900 }, { "epoch": 0.54, "learning_rate": 1.6608477893738197e-05, "loss": 0.8882, "step": 15920 }, { "epoch": 0.54, "learning_rate": 1.6600470648323554e-05, "loss": 0.8612, "step": 15940 }, { "epoch": 0.54, "learning_rate": 1.6592455896899177e-05, "loss": 0.7878, "step": 15960 }, { "epoch": 0.54, "learning_rate": 1.658443364857939e-05, "loss": 0.8418, "step": 15980 }, { "epoch": 0.54, "learning_rate": 1.6576403912487026e-05, "loss": 0.86, "step": 16000 }, { "epoch": 0.54, "learning_rate": 1.656836669775346e-05, "loss": 0.9031, "step": 16020 }, { "epoch": 0.54, "learning_rate": 1.6560322013518542e-05, "loss": 0.7993, "step": 16040 }, { "epoch": 0.55, "learning_rate": 1.6552269868930646e-05, "loss": 0.9313, "step": 16060 }, { "epoch": 0.55, "learning_rate": 1.6544210273146608e-05, "loss": 0.8022, "step": 16080 }, { "epoch": 0.55, "learning_rate": 1.6536143235331743e-05, "loss": 0.8633, "step": 16100 }, { "epoch": 0.55, "learning_rate": 1.6528068764659838e-05, "loss": 0.8447, "step": 16120 }, { "epoch": 0.55, "learning_rate": 1.651998687031312e-05, "loss": 0.8093, "step": 16140 }, { "epoch": 0.55, "learning_rate": 1.6511897561482263e-05, "loss": 0.8275, "step": 16160 }, { "epoch": 0.55, "learning_rate": 1.6503800847366373e-05, "loss": 1.0032, "step": 16180 }, { "epoch": 0.55, "learning_rate": 1.6495696737172982e-05, "loss": 0.8871, "step": 16200 }, { "epoch": 0.55, "learning_rate": 1.6487585240118025e-05, "loss": 0.8384, "step": 16220 }, { "epoch": 0.55, "learning_rate": 1.647946636542584e-05, "loss": 0.8278, "step": 16240 }, { "epoch": 0.55, "learning_rate": 1.6471340122329158e-05, "loss": 0.8858, "step": 16260 }, { "epoch": 0.55, "learning_rate": 1.6463206520069083e-05, "loss": 0.8531, "step": 16280 }, { "epoch": 0.55, "learning_rate": 1.6455065567895098e-05, "loss": 0.8399, "step": 16300 }, { "epoch": 0.55, "learning_rate": 1.6446917275065032e-05, "loss": 0.8538, "step": 16320 }, { "epoch": 0.55, "learning_rate": 1.643876165084507e-05, "loss": 0.7951, "step": 16340 }, { "epoch": 0.56, "learning_rate": 1.6430598704509723e-05, "loss": 0.8208, "step": 16360 }, { "epoch": 0.56, "learning_rate": 1.642242844534185e-05, "loss": 0.8307, "step": 16380 }, { "epoch": 0.56, "learning_rate": 1.6414250882632607e-05, "loss": 0.8354, "step": 16400 }, { "epoch": 0.56, "learning_rate": 1.6406066025681462e-05, "loss": 0.9212, "step": 16420 }, { "epoch": 0.56, "learning_rate": 1.6397873883796183e-05, "loss": 0.8434, "step": 16440 }, { "epoch": 0.56, "learning_rate": 1.6389674466292812e-05, "loss": 0.8562, "step": 16460 }, { "epoch": 0.56, "learning_rate": 1.638146778249567e-05, "loss": 0.8936, "step": 16480 }, { "epoch": 0.56, "learning_rate": 1.637325384173734e-05, "loss": 0.833, "step": 16500 }, { "epoch": 0.56, "learning_rate": 1.6365032653358665e-05, "loss": 0.7673, "step": 16520 }, { "epoch": 0.56, "learning_rate": 1.635680422670872e-05, "loss": 0.8474, "step": 16540 }, { "epoch": 0.56, "learning_rate": 1.6348568571144816e-05, "loss": 0.9436, "step": 16560 }, { "epoch": 0.56, "learning_rate": 1.634032569603248e-05, "loss": 0.8744, "step": 16580 }, { "epoch": 0.56, "learning_rate": 1.633207561074546e-05, "loss": 0.8673, "step": 16600 }, { "epoch": 0.56, "learning_rate": 1.6323818324665693e-05, "loss": 0.827, "step": 16620 }, { "epoch": 0.56, "learning_rate": 1.63155538471833e-05, "loss": 0.8584, "step": 16640 }, { "epoch": 0.57, "learning_rate": 1.6307282187696602e-05, "loss": 0.8387, "step": 16660 }, { "epoch": 0.57, "learning_rate": 1.629900335561206e-05, "loss": 0.8471, "step": 16680 }, { "epoch": 0.57, "learning_rate": 1.6290717360344312e-05, "loss": 0.7533, "step": 16700 }, { "epoch": 0.57, "learning_rate": 1.6282424211316134e-05, "loss": 0.8486, "step": 16720 }, { "epoch": 0.57, "learning_rate": 1.6274123917958438e-05, "loss": 0.8284, "step": 16740 }, { "epoch": 0.57, "learning_rate": 1.626581648971026e-05, "loss": 0.8575, "step": 16760 }, { "epoch": 0.57, "learning_rate": 1.625750193601875e-05, "loss": 0.8387, "step": 16780 }, { "epoch": 0.57, "learning_rate": 1.624918026633916e-05, "loss": 0.8599, "step": 16800 }, { "epoch": 0.57, "learning_rate": 1.6240851490134834e-05, "loss": 0.8463, "step": 16820 }, { "epoch": 0.57, "learning_rate": 1.6232515616877204e-05, "loss": 0.8325, "step": 16840 }, { "epoch": 0.57, "learning_rate": 1.6224172656045767e-05, "loss": 0.8539, "step": 16860 }, { "epoch": 0.57, "learning_rate": 1.6215822617128075e-05, "loss": 0.8314, "step": 16880 }, { "epoch": 0.57, "learning_rate": 1.620746550961974e-05, "loss": 0.8184, "step": 16900 }, { "epoch": 0.57, "learning_rate": 1.6199101343024405e-05, "loss": 0.838, "step": 16920 }, { "epoch": 0.58, "learning_rate": 1.619073012685374e-05, "loss": 0.8426, "step": 16940 }, { "epoch": 0.58, "learning_rate": 1.618235187062744e-05, "loss": 0.7958, "step": 16960 }, { "epoch": 0.58, "learning_rate": 1.6173966583873195e-05, "loss": 0.8954, "step": 16980 }, { "epoch": 0.58, "learning_rate": 1.6165574276126702e-05, "loss": 0.8166, "step": 17000 }, { "epoch": 0.58, "learning_rate": 1.6157174956931627e-05, "loss": 0.8335, "step": 17020 }, { "epoch": 0.58, "learning_rate": 1.6148768635839623e-05, "loss": 0.8346, "step": 17040 }, { "epoch": 0.58, "learning_rate": 1.6140355322410304e-05, "loss": 0.871, "step": 17060 }, { "epoch": 0.58, "learning_rate": 1.6131935026211224e-05, "loss": 0.8357, "step": 17080 }, { "epoch": 0.58, "learning_rate": 1.612350775681789e-05, "loss": 0.802, "step": 17100 }, { "epoch": 0.58, "learning_rate": 1.611507352381373e-05, "loss": 0.8244, "step": 17120 }, { "epoch": 0.58, "learning_rate": 1.6106632336790105e-05, "loss": 0.7501, "step": 17140 }, { "epoch": 0.58, "learning_rate": 1.609818420534627e-05, "loss": 0.792, "step": 17160 }, { "epoch": 0.58, "learning_rate": 1.608972913908938e-05, "loss": 0.7854, "step": 17180 }, { "epoch": 0.58, "learning_rate": 1.608126714763448e-05, "loss": 0.8522, "step": 17200 }, { "epoch": 0.58, "learning_rate": 1.6072798240604487e-05, "loss": 0.8495, "step": 17220 }, { "epoch": 0.59, "learning_rate": 1.6064322427630186e-05, "loss": 0.9158, "step": 17240 }, { "epoch": 0.59, "learning_rate": 1.6055839718350207e-05, "loss": 0.8612, "step": 17260 }, { "epoch": 0.59, "learning_rate": 1.6047350122411038e-05, "loss": 0.8484, "step": 17280 }, { "epoch": 0.59, "learning_rate": 1.6038853649466983e-05, "loss": 0.8433, "step": 17300 }, { "epoch": 0.59, "learning_rate": 1.6030350309180173e-05, "loss": 0.8917, "step": 17320 }, { "epoch": 0.59, "learning_rate": 1.6021840111220548e-05, "loss": 0.7789, "step": 17340 }, { "epoch": 0.59, "learning_rate": 1.6013323065265848e-05, "loss": 0.8575, "step": 17360 }, { "epoch": 0.59, "learning_rate": 1.60047991810016e-05, "loss": 0.812, "step": 17380 }, { "epoch": 0.59, "learning_rate": 1.59962684681211e-05, "loss": 0.828, "step": 17400 }, { "epoch": 0.59, "learning_rate": 1.5987730936325424e-05, "loss": 0.8474, "step": 17420 }, { "epoch": 0.59, "learning_rate": 1.597918659532339e-05, "loss": 0.8686, "step": 17440 }, { "epoch": 0.59, "learning_rate": 1.5970635454831565e-05, "loss": 0.7824, "step": 17460 }, { "epoch": 0.59, "learning_rate": 1.5962077524574248e-05, "loss": 0.9101, "step": 17480 }, { "epoch": 0.59, "learning_rate": 1.595351281428345e-05, "loss": 0.8496, "step": 17500 }, { "epoch": 0.59, "learning_rate": 1.5944941333698912e-05, "loss": 0.8163, "step": 17520 }, { "epoch": 0.6, "learning_rate": 1.5936363092568058e-05, "loss": 0.854, "step": 17540 }, { "epoch": 0.6, "learning_rate": 1.5927778100646007e-05, "loss": 0.8085, "step": 17560 }, { "epoch": 0.6, "learning_rate": 1.5919186367695546e-05, "loss": 0.8276, "step": 17580 }, { "epoch": 0.6, "learning_rate": 1.5910587903487142e-05, "loss": 0.8778, "step": 17600 }, { "epoch": 0.6, "learning_rate": 1.5901982717798903e-05, "loss": 0.8123, "step": 17620 }, { "epoch": 0.6, "learning_rate": 1.5893370820416594e-05, "loss": 0.8197, "step": 17640 }, { "epoch": 0.6, "learning_rate": 1.5884752221133597e-05, "loss": 0.8753, "step": 17660 }, { "epoch": 0.6, "learning_rate": 1.587612692975093e-05, "loss": 0.837, "step": 17680 }, { "epoch": 0.6, "learning_rate": 1.5867494956077215e-05, "loss": 0.7968, "step": 17700 }, { "epoch": 0.6, "learning_rate": 1.585885630992867e-05, "loss": 0.8335, "step": 17720 }, { "epoch": 0.6, "learning_rate": 1.58502110011291e-05, "loss": 0.8233, "step": 17740 }, { "epoch": 0.6, "learning_rate": 1.5841559039509898e-05, "loss": 0.8636, "step": 17760 }, { "epoch": 0.6, "learning_rate": 1.5832900434910014e-05, "loss": 0.7803, "step": 17780 }, { "epoch": 0.6, "learning_rate": 1.5824235197175954e-05, "loss": 0.8154, "step": 17800 }, { "epoch": 0.6, "learning_rate": 1.5815563336161756e-05, "loss": 0.8779, "step": 17820 }, { "epoch": 0.61, "learning_rate": 1.580688486172902e-05, "loss": 0.8222, "step": 17840 }, { "epoch": 0.61, "learning_rate": 1.5798199783746828e-05, "loss": 0.8423, "step": 17860 }, { "epoch": 0.61, "learning_rate": 1.5789508112091804e-05, "loss": 0.7625, "step": 17880 }, { "epoch": 0.61, "learning_rate": 1.578080985664805e-05, "loss": 0.8077, "step": 17900 }, { "epoch": 0.61, "learning_rate": 1.5772105027307168e-05, "loss": 0.8791, "step": 17920 }, { "epoch": 0.61, "learning_rate": 1.5763393633968223e-05, "loss": 0.8595, "step": 17940 }, { "epoch": 0.61, "learning_rate": 1.5754675686537756e-05, "loss": 0.8115, "step": 17960 }, { "epoch": 0.61, "learning_rate": 1.5745951194929756e-05, "loss": 0.8787, "step": 17980 }, { "epoch": 0.61, "learning_rate": 1.5737220169065656e-05, "loss": 0.7391, "step": 18000 }, { "epoch": 0.61, "learning_rate": 1.5728482618874314e-05, "loss": 0.7967, "step": 18020 }, { "epoch": 0.61, "learning_rate": 1.5719738554292017e-05, "loss": 0.7753, "step": 18040 }, { "epoch": 0.61, "learning_rate": 1.571098798526245e-05, "loss": 0.8517, "step": 18060 }, { "epoch": 0.61, "learning_rate": 1.5702230921736705e-05, "loss": 0.7999, "step": 18080 }, { "epoch": 0.61, "learning_rate": 1.569346737367325e-05, "loss": 0.8186, "step": 18100 }, { "epoch": 0.62, "learning_rate": 1.5684697351037935e-05, "loss": 0.8217, "step": 18120 }, { "epoch": 0.62, "learning_rate": 1.5675920863803967e-05, "loss": 0.8555, "step": 18140 }, { "epoch": 0.62, "learning_rate": 1.566713792195191e-05, "loss": 0.8466, "step": 18160 }, { "epoch": 0.62, "learning_rate": 1.565834853546966e-05, "loss": 0.8224, "step": 18180 }, { "epoch": 0.62, "learning_rate": 1.5649552714352448e-05, "loss": 0.8254, "step": 18200 }, { "epoch": 0.62, "learning_rate": 1.5640750468602822e-05, "loss": 0.8414, "step": 18220 }, { "epoch": 0.62, "learning_rate": 1.563194180823064e-05, "loss": 0.7856, "step": 18240 }, { "epoch": 0.62, "learning_rate": 1.562312674325304e-05, "loss": 0.9037, "step": 18260 }, { "epoch": 0.62, "learning_rate": 1.5614305283694462e-05, "loss": 0.8079, "step": 18280 }, { "epoch": 0.62, "learning_rate": 1.56054774395866e-05, "loss": 0.8576, "step": 18300 }, { "epoch": 0.62, "learning_rate": 1.559664322096843e-05, "loss": 0.8414, "step": 18320 }, { "epoch": 0.62, "learning_rate": 1.5587802637886156e-05, "loss": 0.829, "step": 18340 }, { "epoch": 0.62, "learning_rate": 1.557895570039323e-05, "loss": 0.8201, "step": 18360 }, { "epoch": 0.62, "learning_rate": 1.5570102418550327e-05, "loss": 0.7774, "step": 18380 }, { "epoch": 0.62, "learning_rate": 1.5561242802425337e-05, "loss": 0.8544, "step": 18400 }, { "epoch": 0.63, "learning_rate": 1.5552376862093357e-05, "loss": 0.8639, "step": 18420 }, { "epoch": 0.63, "learning_rate": 1.554350460763667e-05, "loss": 0.8273, "step": 18440 }, { "epoch": 0.63, "learning_rate": 1.5534626049144753e-05, "loss": 0.7759, "step": 18460 }, { "epoch": 0.63, "learning_rate": 1.552574119671423e-05, "loss": 0.8248, "step": 18480 }, { "epoch": 0.63, "learning_rate": 1.55168500604489e-05, "loss": 0.8449, "step": 18500 }, { "epoch": 0.63, "learning_rate": 1.5507952650459697e-05, "loss": 0.7627, "step": 18520 }, { "epoch": 0.63, "learning_rate": 1.54990489768647e-05, "loss": 0.7437, "step": 18540 }, { "epoch": 0.63, "learning_rate": 1.5490139049789097e-05, "loss": 0.8273, "step": 18560 }, { "epoch": 0.63, "learning_rate": 1.5481222879365205e-05, "loss": 0.8037, "step": 18580 }, { "epoch": 0.63, "learning_rate": 1.5472300475732427e-05, "loss": 0.7735, "step": 18600 }, { "epoch": 0.63, "learning_rate": 1.546337184903726e-05, "loss": 0.8053, "step": 18620 }, { "epoch": 0.63, "learning_rate": 1.5454437009433277e-05, "loss": 0.8705, "step": 18640 }, { "epoch": 0.63, "learning_rate": 1.5445495967081114e-05, "loss": 0.8378, "step": 18660 }, { "epoch": 0.63, "learning_rate": 1.543654873214846e-05, "loss": 0.8337, "step": 18680 }, { "epoch": 0.63, "learning_rate": 1.542759531481006e-05, "loss": 0.766, "step": 18700 }, { "epoch": 0.64, "learning_rate": 1.5418635725247666e-05, "loss": 0.8558, "step": 18720 }, { "epoch": 0.64, "learning_rate": 1.540966997365007e-05, "loss": 0.8342, "step": 18740 }, { "epoch": 0.64, "learning_rate": 1.540069807021306e-05, "loss": 0.8172, "step": 18760 }, { "epoch": 0.64, "learning_rate": 1.539172002513942e-05, "loss": 0.8388, "step": 18780 }, { "epoch": 0.64, "learning_rate": 1.538273584863893e-05, "loss": 0.8054, "step": 18800 }, { "epoch": 0.64, "learning_rate": 1.537374555092832e-05, "loss": 0.846, "step": 18820 }, { "epoch": 0.64, "learning_rate": 1.5364749142231302e-05, "loss": 0.8163, "step": 18840 }, { "epoch": 0.64, "learning_rate": 1.5355746632778533e-05, "loss": 0.8236, "step": 18860 }, { "epoch": 0.64, "learning_rate": 1.5346738032807603e-05, "loss": 0.7648, "step": 18880 }, { "epoch": 0.64, "learning_rate": 1.5337723352563028e-05, "loss": 0.8125, "step": 18900 }, { "epoch": 0.64, "learning_rate": 1.5328702602296238e-05, "loss": 0.8316, "step": 18920 }, { "epoch": 0.64, "learning_rate": 1.531967579226557e-05, "loss": 0.8042, "step": 18940 }, { "epoch": 0.64, "learning_rate": 1.5310642932736253e-05, "loss": 0.8454, "step": 18960 }, { "epoch": 0.64, "learning_rate": 1.5301604033980387e-05, "loss": 0.7541, "step": 18980 }, { "epoch": 0.64, "learning_rate": 1.529255910627695e-05, "loss": 0.8904, "step": 19000 }, { "epoch": 0.65, "learning_rate": 1.5283508159911776e-05, "loss": 0.7874, "step": 19020 }, { "epoch": 0.65, "learning_rate": 1.5274451205177526e-05, "loss": 0.7914, "step": 19040 }, { "epoch": 0.65, "learning_rate": 1.5265388252373712e-05, "loss": 0.8695, "step": 19060 }, { "epoch": 0.65, "learning_rate": 1.525631931180667e-05, "loss": 0.8793, "step": 19080 }, { "epoch": 0.65, "learning_rate": 1.524724439378953e-05, "loss": 0.8213, "step": 19100 }, { "epoch": 0.65, "learning_rate": 1.5238163508642219e-05, "loss": 0.8038, "step": 19120 }, { "epoch": 0.65, "learning_rate": 1.5229076666691471e-05, "loss": 0.799, "step": 19140 }, { "epoch": 0.65, "learning_rate": 1.5219983878270767e-05, "loss": 0.8665, "step": 19160 }, { "epoch": 0.65, "learning_rate": 1.5210885153720374e-05, "loss": 0.8425, "step": 19180 }, { "epoch": 0.65, "learning_rate": 1.520178050338729e-05, "loss": 0.8568, "step": 19200 }, { "epoch": 0.65, "learning_rate": 1.519266993762527e-05, "loss": 0.8743, "step": 19220 }, { "epoch": 0.65, "learning_rate": 1.5183553466794778e-05, "loss": 0.8136, "step": 19240 }, { "epoch": 0.65, "learning_rate": 1.5174431101263004e-05, "loss": 0.848, "step": 19260 }, { "epoch": 0.65, "learning_rate": 1.5165302851403843e-05, "loss": 0.8114, "step": 19280 }, { "epoch": 0.66, "learning_rate": 1.5156168727597873e-05, "loss": 0.8329, "step": 19300 }, { "epoch": 0.66, "learning_rate": 1.5147028740232361e-05, "loss": 0.7901, "step": 19320 }, { "epoch": 0.66, "learning_rate": 1.5137882899701234e-05, "loss": 0.8436, "step": 19340 }, { "epoch": 0.66, "learning_rate": 1.5128731216405084e-05, "loss": 0.864, "step": 19360 }, { "epoch": 0.66, "learning_rate": 1.5119573700751133e-05, "loss": 0.8219, "step": 19380 }, { "epoch": 0.66, "learning_rate": 1.5110410363153256e-05, "loss": 0.8108, "step": 19400 }, { "epoch": 0.66, "learning_rate": 1.5101241214031928e-05, "loss": 0.8013, "step": 19420 }, { "epoch": 0.66, "learning_rate": 1.5092066263814245e-05, "loss": 0.8176, "step": 19440 }, { "epoch": 0.66, "learning_rate": 1.5082885522933901e-05, "loss": 0.8276, "step": 19460 }, { "epoch": 0.66, "learning_rate": 1.5073699001831168e-05, "loss": 0.7847, "step": 19480 }, { "epoch": 0.66, "learning_rate": 1.506450671095289e-05, "loss": 0.798, "step": 19500 }, { "epoch": 0.66, "learning_rate": 1.5055308660752486e-05, "loss": 0.8289, "step": 19520 }, { "epoch": 0.66, "learning_rate": 1.5046104861689911e-05, "loss": 0.8235, "step": 19540 }, { "epoch": 0.66, "learning_rate": 1.5036895324231662e-05, "loss": 0.8415, "step": 19560 }, { "epoch": 0.66, "learning_rate": 1.5027680058850757e-05, "loss": 0.8438, "step": 19580 }, { "epoch": 0.67, "learning_rate": 1.501845907602674e-05, "loss": 0.7979, "step": 19600 }, { "epoch": 0.67, "learning_rate": 1.5009232386245644e-05, "loss": 0.8477, "step": 19620 }, { "epoch": 0.67, "learning_rate": 1.5000000000000002e-05, "loss": 0.7946, "step": 19640 }, { "epoch": 0.67, "learning_rate": 1.4990761927788815e-05, "loss": 0.827, "step": 19660 }, { "epoch": 0.67, "learning_rate": 1.4981518180117558e-05, "loss": 0.8489, "step": 19680 }, { "epoch": 0.67, "learning_rate": 1.497226876749815e-05, "loss": 0.8442, "step": 19700 }, { "epoch": 0.67, "learning_rate": 1.4963013700448969e-05, "loss": 0.7982, "step": 19720 }, { "epoch": 0.67, "learning_rate": 1.4953752989494815e-05, "loss": 0.7978, "step": 19740 }, { "epoch": 0.67, "learning_rate": 1.4944486645166892e-05, "loss": 0.8114, "step": 19760 }, { "epoch": 0.67, "learning_rate": 1.4935214678002834e-05, "loss": 0.8342, "step": 19780 }, { "epoch": 0.67, "learning_rate": 1.4925937098546652e-05, "loss": 0.7771, "step": 19800 }, { "epoch": 0.67, "learning_rate": 1.4916653917348751e-05, "loss": 0.9235, "step": 19820 }, { "epoch": 0.67, "learning_rate": 1.4907365144965896e-05, "loss": 0.7855, "step": 19840 }, { "epoch": 0.67, "learning_rate": 1.4898070791961215e-05, "loss": 0.8699, "step": 19860 }, { "epoch": 0.67, "learning_rate": 1.4888770868904185e-05, "loss": 0.8658, "step": 19880 }, { "epoch": 0.68, "learning_rate": 1.4879465386370613e-05, "loss": 0.8394, "step": 19900 }, { "epoch": 0.68, "learning_rate": 1.487015435494263e-05, "loss": 0.8058, "step": 19920 }, { "epoch": 0.68, "learning_rate": 1.4860837785208678e-05, "loss": 0.8074, "step": 19940 }, { "epoch": 0.68, "learning_rate": 1.4851515687763492e-05, "loss": 0.8149, "step": 19960 }, { "epoch": 0.68, "learning_rate": 1.48421880732081e-05, "loss": 0.7852, "step": 19980 }, { "epoch": 0.68, "learning_rate": 1.48328549521498e-05, "loss": 0.8215, "step": 20000 }, { "epoch": 0.68, "learning_rate": 1.4823516335202152e-05, "loss": 0.8319, "step": 20020 }, { "epoch": 0.68, "learning_rate": 1.4814172232984968e-05, "loss": 0.799, "step": 20040 }, { "epoch": 0.68, "learning_rate": 1.4804822656124302e-05, "loss": 0.8033, "step": 20060 }, { "epoch": 0.68, "learning_rate": 1.479546761525242e-05, "loss": 0.8247, "step": 20080 }, { "epoch": 0.68, "learning_rate": 1.4786107121007813e-05, "loss": 0.7516, "step": 20100 }, { "epoch": 0.68, "learning_rate": 1.4776741184035171e-05, "loss": 0.8348, "step": 20120 }, { "epoch": 0.68, "learning_rate": 1.476736981498537e-05, "loss": 0.8229, "step": 20140 }, { "epoch": 0.68, "learning_rate": 1.475799302451547e-05, "loss": 0.8529, "step": 20160 }, { "epoch": 0.68, "learning_rate": 1.4748610823288693e-05, "loss": 0.8749, "step": 20180 }, { "epoch": 0.69, "learning_rate": 1.4739223221974415e-05, "loss": 0.8914, "step": 20200 }, { "epoch": 0.69, "learning_rate": 1.4729830231248143e-05, "loss": 0.8839, "step": 20220 }, { "epoch": 0.69, "learning_rate": 1.4720431861791531e-05, "loss": 0.8345, "step": 20240 }, { "epoch": 0.69, "learning_rate": 1.4711028124292332e-05, "loss": 0.8047, "step": 20260 }, { "epoch": 0.69, "learning_rate": 1.4701619029444419e-05, "loss": 0.8083, "step": 20280 }, { "epoch": 0.69, "learning_rate": 1.4692204587947745e-05, "loss": 0.8185, "step": 20300 }, { "epoch": 0.69, "learning_rate": 1.468278481050835e-05, "loss": 0.8176, "step": 20320 }, { "epoch": 0.69, "learning_rate": 1.4673359707838338e-05, "loss": 0.8108, "step": 20340 }, { "epoch": 0.69, "learning_rate": 1.4663929290655872e-05, "loss": 0.8012, "step": 20360 }, { "epoch": 0.69, "learning_rate": 1.4654493569685157e-05, "loss": 0.8082, "step": 20380 }, { "epoch": 0.69, "learning_rate": 1.464505255565643e-05, "loss": 0.833, "step": 20400 }, { "epoch": 0.69, "learning_rate": 1.4635606259305948e-05, "loss": 0.819, "step": 20420 }, { "epoch": 0.69, "learning_rate": 1.4626154691375974e-05, "loss": 0.8406, "step": 20440 }, { "epoch": 0.69, "learning_rate": 1.4616697862614766e-05, "loss": 0.8467, "step": 20460 }, { "epoch": 0.7, "learning_rate": 1.4607235783776563e-05, "loss": 0.8298, "step": 20480 }, { "epoch": 0.7, "learning_rate": 1.4597768465621575e-05, "loss": 0.7843, "step": 20500 }, { "epoch": 0.7, "learning_rate": 1.4588295918915978e-05, "loss": 0.8426, "step": 20520 }, { "epoch": 0.7, "learning_rate": 1.4578818154431878e-05, "loss": 0.7621, "step": 20540 }, { "epoch": 0.7, "learning_rate": 1.4569335182947325e-05, "loss": 0.8628, "step": 20560 }, { "epoch": 0.7, "learning_rate": 1.4559847015246294e-05, "loss": 0.7603, "step": 20580 }, { "epoch": 0.7, "learning_rate": 1.4550353662118659e-05, "loss": 0.7862, "step": 20600 }, { "epoch": 0.7, "learning_rate": 1.4540855134360198e-05, "loss": 0.7932, "step": 20620 }, { "epoch": 0.7, "learning_rate": 1.453135144277257e-05, "loss": 0.7972, "step": 20640 }, { "epoch": 0.7, "learning_rate": 1.4521842598163309e-05, "loss": 0.8247, "step": 20660 }, { "epoch": 0.7, "learning_rate": 1.4512328611345808e-05, "loss": 0.8215, "step": 20680 }, { "epoch": 0.7, "learning_rate": 1.4502809493139305e-05, "loss": 0.7678, "step": 20700 }, { "epoch": 0.7, "learning_rate": 1.4493285254368878e-05, "loss": 0.8288, "step": 20720 }, { "epoch": 0.7, "learning_rate": 1.4483755905865423e-05, "loss": 0.7892, "step": 20740 }, { "epoch": 0.7, "learning_rate": 1.4474221458465651e-05, "loss": 0.875, "step": 20760 }, { "epoch": 0.71, "learning_rate": 1.4464681923012072e-05, "loss": 0.7519, "step": 20780 }, { "epoch": 0.71, "learning_rate": 1.4455137310352978e-05, "loss": 0.8257, "step": 20800 }, { "epoch": 0.71, "learning_rate": 1.4445587631342433e-05, "loss": 0.8261, "step": 20820 }, { "epoch": 0.71, "learning_rate": 1.4436032896840271e-05, "loss": 0.8359, "step": 20840 }, { "epoch": 0.71, "learning_rate": 1.4426473117712071e-05, "loss": 0.8074, "step": 20860 }, { "epoch": 0.71, "learning_rate": 1.4416908304829142e-05, "loss": 0.8617, "step": 20880 }, { "epoch": 0.71, "learning_rate": 1.4407338469068532e-05, "loss": 0.7763, "step": 20900 }, { "epoch": 0.71, "learning_rate": 1.4397763621312985e-05, "loss": 0.8333, "step": 20920 }, { "epoch": 0.71, "learning_rate": 1.4388183772450957e-05, "loss": 0.8397, "step": 20940 }, { "epoch": 0.71, "learning_rate": 1.437859893337658e-05, "loss": 0.8307, "step": 20960 }, { "epoch": 0.71, "learning_rate": 1.4369009114989671e-05, "loss": 0.8456, "step": 20980 }, { "epoch": 0.71, "learning_rate": 1.4359414328195704e-05, "loss": 0.8202, "step": 21000 }, { "epoch": 0.71, "learning_rate": 1.4349814583905807e-05, "loss": 0.7784, "step": 21020 }, { "epoch": 0.71, "learning_rate": 1.434020989303674e-05, "loss": 0.8052, "step": 21040 }, { "epoch": 0.71, "learning_rate": 1.4330600266510895e-05, "loss": 0.7646, "step": 21060 }, { "epoch": 0.72, "learning_rate": 1.4320985715256269e-05, "loss": 0.8493, "step": 21080 }, { "epoch": 0.72, "learning_rate": 1.4311366250206464e-05, "loss": 0.7595, "step": 21100 }, { "epoch": 0.72, "learning_rate": 1.4301741882300673e-05, "loss": 0.7798, "step": 21120 }, { "epoch": 0.72, "learning_rate": 1.4292112622483653e-05, "loss": 0.827, "step": 21140 }, { "epoch": 0.72, "learning_rate": 1.4282478481705742e-05, "loss": 0.9012, "step": 21160 }, { "epoch": 0.72, "learning_rate": 1.4272839470922812e-05, "loss": 0.7908, "step": 21180 }, { "epoch": 0.72, "learning_rate": 1.426319560109628e-05, "loss": 0.8161, "step": 21200 }, { "epoch": 0.72, "learning_rate": 1.4253546883193093e-05, "loss": 0.8432, "step": 21220 }, { "epoch": 0.72, "learning_rate": 1.42438933281857e-05, "loss": 0.797, "step": 21240 }, { "epoch": 0.72, "learning_rate": 1.4234234947052063e-05, "loss": 0.8394, "step": 21260 }, { "epoch": 0.72, "learning_rate": 1.4224571750775625e-05, "loss": 0.8191, "step": 21280 }, { "epoch": 0.72, "learning_rate": 1.4214903750345298e-05, "loss": 0.7181, "step": 21300 }, { "epoch": 0.72, "learning_rate": 1.4205230956755479e-05, "loss": 0.8326, "step": 21320 }, { "epoch": 0.72, "learning_rate": 1.419555338100599e-05, "loss": 0.8444, "step": 21340 }, { "epoch": 0.73, "learning_rate": 1.4185871034102117e-05, "loss": 0.7624, "step": 21360 }, { "epoch": 0.73, "learning_rate": 1.4176183927054545e-05, "loss": 0.8136, "step": 21380 }, { "epoch": 0.73, "learning_rate": 1.4166492070879389e-05, "loss": 0.9168, "step": 21400 }, { "epoch": 0.73, "learning_rate": 1.4156795476598166e-05, "loss": 0.7983, "step": 21420 }, { "epoch": 0.73, "learning_rate": 1.4147094155237772e-05, "loss": 0.7842, "step": 21440 }, { "epoch": 0.73, "learning_rate": 1.4137388117830486e-05, "loss": 0.7559, "step": 21460 }, { "epoch": 0.73, "learning_rate": 1.4127677375413943e-05, "loss": 0.7416, "step": 21480 }, { "epoch": 0.73, "learning_rate": 1.4117961939031135e-05, "loss": 0.8395, "step": 21500 }, { "epoch": 0.73, "learning_rate": 1.4108241819730384e-05, "loss": 0.8557, "step": 21520 }, { "epoch": 0.73, "learning_rate": 1.409851702856535e-05, "loss": 0.7085, "step": 21540 }, { "epoch": 0.73, "learning_rate": 1.4088787576594996e-05, "loss": 0.8655, "step": 21560 }, { "epoch": 0.73, "learning_rate": 1.4079053474883584e-05, "loss": 0.754, "step": 21580 }, { "epoch": 0.73, "learning_rate": 1.4069314734500674e-05, "loss": 0.8455, "step": 21600 }, { "epoch": 0.73, "learning_rate": 1.405957136652109e-05, "loss": 0.8776, "step": 21620 }, { "epoch": 0.73, "learning_rate": 1.4049823382024922e-05, "loss": 0.7825, "step": 21640 }, { "epoch": 0.74, "learning_rate": 1.4040070792097515e-05, "loss": 0.7666, "step": 21660 }, { "epoch": 0.74, "learning_rate": 1.4030313607829444e-05, "loss": 0.8492, "step": 21680 }, { "epoch": 0.74, "learning_rate": 1.4020551840316515e-05, "loss": 0.8367, "step": 21700 }, { "epoch": 0.74, "learning_rate": 1.4010785500659735e-05, "loss": 0.8087, "step": 21720 }, { "epoch": 0.74, "learning_rate": 1.4001014599965327e-05, "loss": 0.7934, "step": 21740 }, { "epoch": 0.74, "learning_rate": 1.3991239149344686e-05, "loss": 0.7869, "step": 21760 }, { "epoch": 0.74, "learning_rate": 1.3981459159914392e-05, "loss": 0.8105, "step": 21780 }, { "epoch": 0.74, "learning_rate": 1.3971674642796178e-05, "loss": 0.878, "step": 21800 }, { "epoch": 0.74, "learning_rate": 1.396188560911693e-05, "loss": 0.8064, "step": 21820 }, { "epoch": 0.74, "learning_rate": 1.3952092070008669e-05, "loss": 0.8331, "step": 21840 }, { "epoch": 0.74, "learning_rate": 1.3942294036608544e-05, "loss": 0.8226, "step": 21860 }, { "epoch": 0.74, "learning_rate": 1.3932491520058807e-05, "loss": 0.797, "step": 21880 }, { "epoch": 0.74, "learning_rate": 1.392268453150681e-05, "loss": 0.8333, "step": 21900 }, { "epoch": 0.74, "learning_rate": 1.3912873082105002e-05, "loss": 0.8006, "step": 21920 }, { "epoch": 0.74, "learning_rate": 1.3903057183010887e-05, "loss": 0.7301, "step": 21940 }, { "epoch": 0.75, "learning_rate": 1.3893236845387041e-05, "loss": 0.7378, "step": 21960 }, { "epoch": 0.75, "learning_rate": 1.3883412080401083e-05, "loss": 0.7564, "step": 21980 }, { "epoch": 0.75, "learning_rate": 1.3873582899225672e-05, "loss": 0.8053, "step": 22000 }, { "epoch": 0.75, "learning_rate": 1.3863749313038484e-05, "loss": 0.8445, "step": 22020 }, { "epoch": 0.75, "learning_rate": 1.3853911333022199e-05, "loss": 0.767, "step": 22040 }, { "epoch": 0.75, "learning_rate": 1.3844068970364506e-05, "loss": 0.7772, "step": 22060 }, { "epoch": 0.75, "learning_rate": 1.383422223625807e-05, "loss": 0.851, "step": 22080 }, { "epoch": 0.75, "learning_rate": 1.3824371141900533e-05, "loss": 0.76, "step": 22100 }, { "epoch": 0.75, "learning_rate": 1.3814515698494484e-05, "loss": 0.7725, "step": 22120 }, { "epoch": 0.75, "learning_rate": 1.3804655917247471e-05, "loss": 0.7981, "step": 22140 }, { "epoch": 0.75, "learning_rate": 1.3794791809371971e-05, "loss": 0.7698, "step": 22160 }, { "epoch": 0.75, "learning_rate": 1.3784923386085373e-05, "loss": 0.8396, "step": 22180 }, { "epoch": 0.75, "learning_rate": 1.3775050658609988e-05, "loss": 0.8182, "step": 22200 }, { "epoch": 0.75, "learning_rate": 1.3765173638173004e-05, "loss": 0.8189, "step": 22220 }, { "epoch": 0.75, "learning_rate": 1.3755292336006512e-05, "loss": 0.7969, "step": 22240 }, { "epoch": 0.76, "learning_rate": 1.3745406763347446e-05, "loss": 0.7591, "step": 22260 }, { "epoch": 0.76, "learning_rate": 1.3735516931437625e-05, "loss": 0.8119, "step": 22280 }, { "epoch": 0.76, "learning_rate": 1.3725622851523692e-05, "loss": 0.8237, "step": 22300 }, { "epoch": 0.76, "learning_rate": 1.3715724534857127e-05, "loss": 0.7985, "step": 22320 }, { "epoch": 0.76, "learning_rate": 1.3705821992694228e-05, "loss": 0.7909, "step": 22340 }, { "epoch": 0.76, "learning_rate": 1.3695915236296095e-05, "loss": 0.7241, "step": 22360 }, { "epoch": 0.76, "learning_rate": 1.3686004276928627e-05, "loss": 0.7716, "step": 22380 }, { "epoch": 0.76, "learning_rate": 1.3676089125862495e-05, "loss": 0.8163, "step": 22400 }, { "epoch": 0.76, "learning_rate": 1.3666169794373141e-05, "loss": 0.7784, "step": 22420 }, { "epoch": 0.76, "learning_rate": 1.3656246293740767e-05, "loss": 0.7793, "step": 22440 }, { "epoch": 0.76, "learning_rate": 1.36463186352503e-05, "loss": 0.8619, "step": 22460 }, { "epoch": 0.76, "learning_rate": 1.3636386830191409e-05, "loss": 0.7603, "step": 22480 }, { "epoch": 0.76, "learning_rate": 1.3626450889858476e-05, "loss": 0.786, "step": 22500 }, { "epoch": 0.76, "learning_rate": 1.3616510825550582e-05, "loss": 0.8596, "step": 22520 }, { "epoch": 0.77, "learning_rate": 1.36065666485715e-05, "loss": 0.8392, "step": 22540 }, { "epoch": 0.77, "learning_rate": 1.3596618370229679e-05, "loss": 0.699, "step": 22560 }, { "epoch": 0.77, "learning_rate": 1.3586666001838231e-05, "loss": 0.7612, "step": 22580 }, { "epoch": 0.77, "learning_rate": 1.3576709554714923e-05, "loss": 0.8525, "step": 22600 }, { "epoch": 0.77, "learning_rate": 1.356674904018216e-05, "loss": 0.8214, "step": 22620 }, { "epoch": 0.77, "learning_rate": 1.3556784469566968e-05, "loss": 0.7399, "step": 22640 }, { "epoch": 0.77, "learning_rate": 1.3546815854200987e-05, "loss": 0.8157, "step": 22660 }, { "epoch": 0.77, "learning_rate": 1.353684320542046e-05, "loss": 0.8849, "step": 22680 }, { "epoch": 0.77, "learning_rate": 1.3526866534566213e-05, "loss": 0.8243, "step": 22700 }, { "epoch": 0.77, "learning_rate": 1.3516885852983645e-05, "loss": 0.7794, "step": 22720 }, { "epoch": 0.77, "learning_rate": 1.3506901172022724e-05, "loss": 0.7773, "step": 22740 }, { "epoch": 0.77, "learning_rate": 1.3496912503037959e-05, "loss": 0.7399, "step": 22760 }, { "epoch": 0.77, "learning_rate": 1.3486919857388392e-05, "loss": 0.8805, "step": 22780 }, { "epoch": 0.77, "learning_rate": 1.347692324643759e-05, "loss": 0.7985, "step": 22800 }, { "epoch": 0.77, "learning_rate": 1.3466922681553636e-05, "loss": 0.7705, "step": 22820 }, { "epoch": 0.78, "learning_rate": 1.3456918174109102e-05, "loss": 0.8026, "step": 22840 }, { "epoch": 0.78, "learning_rate": 1.344690973548104e-05, "loss": 0.7575, "step": 22860 }, { "epoch": 0.78, "learning_rate": 1.3436897377050982e-05, "loss": 0.7957, "step": 22880 }, { "epoch": 0.78, "learning_rate": 1.342688111020491e-05, "loss": 0.7325, "step": 22900 }, { "epoch": 0.78, "learning_rate": 1.3416860946333256e-05, "loss": 0.8267, "step": 22920 }, { "epoch": 0.78, "learning_rate": 1.3406836896830884e-05, "loss": 0.8662, "step": 22940 }, { "epoch": 0.78, "learning_rate": 1.3396808973097066e-05, "loss": 0.8306, "step": 22960 }, { "epoch": 0.78, "learning_rate": 1.3386777186535495e-05, "loss": 0.8397, "step": 22980 }, { "epoch": 0.78, "learning_rate": 1.3376741548554248e-05, "loss": 0.7559, "step": 23000 }, { "epoch": 0.78, "learning_rate": 1.3366702070565775e-05, "loss": 0.801, "step": 23020 }, { "epoch": 0.78, "learning_rate": 1.3356658763986919e-05, "loss": 0.7433, "step": 23040 }, { "epoch": 0.78, "learning_rate": 1.334661164023884e-05, "loss": 0.8088, "step": 23060 }, { "epoch": 0.78, "learning_rate": 1.3336560710747073e-05, "loss": 0.8296, "step": 23080 }, { "epoch": 0.78, "learning_rate": 1.3326505986941456e-05, "loss": 0.7567, "step": 23100 }, { "epoch": 0.78, "learning_rate": 1.3316447480256153e-05, "loss": 0.7866, "step": 23120 }, { "epoch": 0.79, "learning_rate": 1.3306385202129633e-05, "loss": 0.7891, "step": 23140 }, { "epoch": 0.79, "learning_rate": 1.3296319164004646e-05, "loss": 0.7994, "step": 23160 }, { "epoch": 0.79, "learning_rate": 1.3286249377328222e-05, "loss": 0.7251, "step": 23180 }, { "epoch": 0.79, "learning_rate": 1.327617585355165e-05, "loss": 0.7805, "step": 23200 }, { "epoch": 0.79, "learning_rate": 1.3266098604130477e-05, "loss": 0.7611, "step": 23220 }, { "epoch": 0.79, "learning_rate": 1.325601764052448e-05, "loss": 0.8033, "step": 23240 }, { "epoch": 0.79, "learning_rate": 1.324593297419766e-05, "loss": 0.8043, "step": 23260 }, { "epoch": 0.79, "learning_rate": 1.323584461661823e-05, "loss": 0.7901, "step": 23280 }, { "epoch": 0.79, "learning_rate": 1.3225752579258599e-05, "loss": 0.797, "step": 23300 }, { "epoch": 0.79, "learning_rate": 1.3215656873595366e-05, "loss": 0.8191, "step": 23320 }, { "epoch": 0.79, "learning_rate": 1.3205557511109298e-05, "loss": 0.7936, "step": 23340 }, { "epoch": 0.79, "learning_rate": 1.319545450328532e-05, "loss": 0.8261, "step": 23360 }, { "epoch": 0.79, "learning_rate": 1.3185347861612501e-05, "loss": 0.7741, "step": 23380 }, { "epoch": 0.79, "learning_rate": 1.3175237597584045e-05, "loss": 0.7602, "step": 23400 }, { "epoch": 0.79, "learning_rate": 1.3165123722697276e-05, "loss": 0.7664, "step": 23420 }, { "epoch": 0.8, "learning_rate": 1.3155006248453615e-05, "loss": 0.7819, "step": 23440 }, { "epoch": 0.8, "learning_rate": 1.3144885186358596e-05, "loss": 0.7912, "step": 23460 }, { "epoch": 0.8, "learning_rate": 1.3134760547921814e-05, "loss": 0.8161, "step": 23480 }, { "epoch": 0.8, "learning_rate": 1.3124632344656943e-05, "loss": 0.7957, "step": 23500 }, { "epoch": 0.8, "learning_rate": 1.3114500588081698e-05, "loss": 0.8164, "step": 23520 }, { "epoch": 0.8, "learning_rate": 1.3104365289717849e-05, "loss": 0.7976, "step": 23540 }, { "epoch": 0.8, "learning_rate": 1.3094226461091186e-05, "loss": 0.7437, "step": 23560 }, { "epoch": 0.8, "learning_rate": 1.3084084113731514e-05, "loss": 0.84, "step": 23580 }, { "epoch": 0.8, "learning_rate": 1.3073938259172645e-05, "loss": 0.7651, "step": 23600 }, { "epoch": 0.8, "learning_rate": 1.3063788908952368e-05, "loss": 0.7769, "step": 23620 }, { "epoch": 0.8, "learning_rate": 1.3053636074612457e-05, "loss": 0.7899, "step": 23640 }, { "epoch": 0.8, "learning_rate": 1.3043479767698646e-05, "loss": 0.7696, "step": 23660 }, { "epoch": 0.8, "learning_rate": 1.3033319999760619e-05, "loss": 0.7587, "step": 23680 }, { "epoch": 0.8, "learning_rate": 1.3023156782351989e-05, "loss": 0.7693, "step": 23700 }, { "epoch": 0.81, "learning_rate": 1.30129901270303e-05, "loss": 0.7695, "step": 23720 }, { "epoch": 0.81, "learning_rate": 1.3002820045357005e-05, "loss": 0.79, "step": 23740 }, { "epoch": 0.81, "learning_rate": 1.2992646548897442e-05, "loss": 0.8037, "step": 23760 }, { "epoch": 0.81, "learning_rate": 1.2982469649220854e-05, "loss": 0.8023, "step": 23780 }, { "epoch": 0.81, "learning_rate": 1.2972289357900332e-05, "loss": 0.8247, "step": 23800 }, { "epoch": 0.81, "learning_rate": 1.2962105686512834e-05, "loss": 0.8243, "step": 23820 }, { "epoch": 0.81, "learning_rate": 1.2951918646639163e-05, "loss": 0.8695, "step": 23840 }, { "epoch": 0.81, "learning_rate": 1.294172824986395e-05, "loss": 0.8408, "step": 23860 }, { "epoch": 0.81, "learning_rate": 1.293153450777564e-05, "loss": 0.8392, "step": 23880 }, { "epoch": 0.81, "learning_rate": 1.2921337431966494e-05, "loss": 0.7882, "step": 23900 }, { "epoch": 0.81, "learning_rate": 1.2911137034032553e-05, "loss": 0.8327, "step": 23920 }, { "epoch": 0.81, "learning_rate": 1.2900933325573636e-05, "loss": 0.8817, "step": 23940 }, { "epoch": 0.81, "learning_rate": 1.2890726318193332e-05, "loss": 0.8262, "step": 23960 }, { "epoch": 0.81, "learning_rate": 1.2880516023498981e-05, "loss": 0.7659, "step": 23980 }, { "epoch": 0.81, "learning_rate": 1.2870302453101657e-05, "loss": 0.7805, "step": 24000 }, { "epoch": 0.82, "learning_rate": 1.2860085618616169e-05, "loss": 0.8152, "step": 24020 }, { "epoch": 0.82, "learning_rate": 1.284986553166102e-05, "loss": 0.7744, "step": 24040 }, { "epoch": 0.82, "learning_rate": 1.2839642203858433e-05, "loss": 0.7241, "step": 24060 }, { "epoch": 0.82, "learning_rate": 1.2829415646834295e-05, "loss": 0.8229, "step": 24080 }, { "epoch": 0.82, "learning_rate": 1.2819185872218184e-05, "loss": 0.8712, "step": 24100 }, { "epoch": 0.82, "learning_rate": 1.2808952891643327e-05, "loss": 0.7865, "step": 24120 }, { "epoch": 0.82, "learning_rate": 1.2798716716746602e-05, "loss": 0.8225, "step": 24140 }, { "epoch": 0.82, "learning_rate": 1.2788477359168512e-05, "loss": 0.7767, "step": 24160 }, { "epoch": 0.82, "learning_rate": 1.2778234830553183e-05, "loss": 0.8087, "step": 24180 }, { "epoch": 0.82, "learning_rate": 1.276798914254835e-05, "loss": 0.8498, "step": 24200 }, { "epoch": 0.82, "learning_rate": 1.2757740306805345e-05, "loss": 0.785, "step": 24220 }, { "epoch": 0.82, "learning_rate": 1.2747488334979064e-05, "loss": 0.7809, "step": 24240 }, { "epoch": 0.82, "learning_rate": 1.2737233238727982e-05, "loss": 0.77, "step": 24260 }, { "epoch": 0.82, "learning_rate": 1.2726975029714124e-05, "loss": 0.8828, "step": 24280 }, { "epoch": 0.82, "learning_rate": 1.2716713719603053e-05, "loss": 0.8708, "step": 24300 }, { "epoch": 0.83, "learning_rate": 1.2706449320063866e-05, "loss": 0.8074, "step": 24320 }, { "epoch": 0.83, "learning_rate": 1.2696181842769161e-05, "loss": 0.7782, "step": 24340 }, { "epoch": 0.83, "learning_rate": 1.2685911299395046e-05, "loss": 0.8194, "step": 24360 }, { "epoch": 0.83, "learning_rate": 1.2675637701621111e-05, "loss": 0.7642, "step": 24380 }, { "epoch": 0.83, "learning_rate": 1.2665361061130416e-05, "loss": 0.7517, "step": 24400 }, { "epoch": 0.83, "learning_rate": 1.2655081389609496e-05, "loss": 0.7556, "step": 24420 }, { "epoch": 0.83, "learning_rate": 1.2644798698748314e-05, "loss": 0.7667, "step": 24440 }, { "epoch": 0.83, "learning_rate": 1.2634513000240277e-05, "loss": 0.7696, "step": 24460 }, { "epoch": 0.83, "learning_rate": 1.2624224305782215e-05, "loss": 0.8711, "step": 24480 }, { "epoch": 0.83, "learning_rate": 1.2613932627074356e-05, "loss": 0.7339, "step": 24500 }, { "epoch": 0.83, "learning_rate": 1.2603637975820324e-05, "loss": 0.7713, "step": 24520 }, { "epoch": 0.83, "learning_rate": 1.2593340363727131e-05, "loss": 0.7791, "step": 24540 }, { "epoch": 0.83, "learning_rate": 1.2583039802505148e-05, "loss": 0.8241, "step": 24560 }, { "epoch": 0.83, "learning_rate": 1.2572736303868102e-05, "loss": 0.7267, "step": 24580 }, { "epoch": 0.84, "learning_rate": 1.256242987953306e-05, "loss": 0.8083, "step": 24600 }, { "epoch": 0.84, "learning_rate": 1.2552120541220419e-05, "loss": 0.8438, "step": 24620 }, { "epoch": 0.84, "learning_rate": 1.2541808300653883e-05, "loss": 0.7614, "step": 24640 }, { "epoch": 0.84, "learning_rate": 1.253149316956047e-05, "loss": 0.7978, "step": 24660 }, { "epoch": 0.84, "learning_rate": 1.2521175159670464e-05, "loss": 0.7852, "step": 24680 }, { "epoch": 0.84, "learning_rate": 1.2510854282717444e-05, "loss": 0.7439, "step": 24700 }, { "epoch": 0.84, "learning_rate": 1.2500530550438232e-05, "loss": 0.8229, "step": 24720 }, { "epoch": 0.84, "learning_rate": 1.2490203974572915e-05, "loss": 0.7607, "step": 24740 }, { "epoch": 0.84, "learning_rate": 1.2479874566864799e-05, "loss": 0.7557, "step": 24760 }, { "epoch": 0.84, "learning_rate": 1.2469542339060414e-05, "loss": 0.7902, "step": 24780 }, { "epoch": 0.84, "learning_rate": 1.2459207302909499e-05, "loss": 0.7407, "step": 24800 }, { "epoch": 0.84, "learning_rate": 1.2448869470164988e-05, "loss": 0.8128, "step": 24820 }, { "epoch": 0.84, "learning_rate": 1.243852885258299e-05, "loss": 0.7656, "step": 24840 }, { "epoch": 0.84, "learning_rate": 1.2428185461922783e-05, "loss": 0.7291, "step": 24860 }, { "epoch": 0.84, "learning_rate": 1.2417839309946806e-05, "loss": 0.844, "step": 24880 }, { "epoch": 0.85, "learning_rate": 1.2407490408420629e-05, "loss": 0.7925, "step": 24900 }, { "epoch": 0.85, "learning_rate": 1.2397138769112949e-05, "loss": 0.7518, "step": 24920 }, { "epoch": 0.85, "learning_rate": 1.2386784403795577e-05, "loss": 0.7826, "step": 24940 }, { "epoch": 0.85, "learning_rate": 1.2376427324243432e-05, "loss": 0.8126, "step": 24960 }, { "epoch": 0.85, "learning_rate": 1.2366067542234512e-05, "loss": 0.7521, "step": 24980 }, { "epoch": 0.85, "learning_rate": 1.2355705069549885e-05, "loss": 0.8282, "step": 25000 }, { "epoch": 0.85, "learning_rate": 1.2345339917973688e-05, "loss": 0.7748, "step": 25020 }, { "epoch": 0.85, "learning_rate": 1.2334972099293102e-05, "loss": 0.8185, "step": 25040 }, { "epoch": 0.85, "learning_rate": 1.2324601625298331e-05, "loss": 0.8449, "step": 25060 }, { "epoch": 0.85, "learning_rate": 1.2314228507782614e-05, "loss": 0.8139, "step": 25080 }, { "epoch": 0.85, "learning_rate": 1.2303852758542187e-05, "loss": 0.7789, "step": 25100 }, { "epoch": 0.85, "learning_rate": 1.2293474389376281e-05, "loss": 0.8105, "step": 25120 }, { "epoch": 0.85, "learning_rate": 1.2283093412087099e-05, "loss": 0.7276, "step": 25140 }, { "epoch": 0.85, "learning_rate": 1.2272709838479824e-05, "loss": 0.8363, "step": 25160 }, { "epoch": 0.85, "learning_rate": 1.2262323680362587e-05, "loss": 0.7583, "step": 25180 }, { "epoch": 0.86, "learning_rate": 1.2251934949546446e-05, "loss": 0.7626, "step": 25200 }, { "epoch": 0.86, "learning_rate": 1.2241543657845403e-05, "loss": 0.7718, "step": 25220 }, { "epoch": 0.86, "learning_rate": 1.2231149817076358e-05, "loss": 0.7381, "step": 25240 }, { "epoch": 0.86, "learning_rate": 1.2220753439059116e-05, "loss": 0.8, "step": 25260 }, { "epoch": 0.86, "learning_rate": 1.2210354535616366e-05, "loss": 0.7758, "step": 25280 }, { "epoch": 0.86, "learning_rate": 1.2199953118573674e-05, "loss": 0.7634, "step": 25300 }, { "epoch": 0.86, "learning_rate": 1.2189549199759454e-05, "loss": 0.7878, "step": 25320 }, { "epoch": 0.86, "learning_rate": 1.2179142791004974e-05, "loss": 0.7906, "step": 25340 }, { "epoch": 0.86, "learning_rate": 1.2168733904144328e-05, "loss": 0.7498, "step": 25360 }, { "epoch": 0.86, "learning_rate": 1.2158322551014431e-05, "loss": 0.7985, "step": 25380 }, { "epoch": 0.86, "learning_rate": 1.2147908743455005e-05, "loss": 0.7641, "step": 25400 }, { "epoch": 0.86, "learning_rate": 1.2137492493308558e-05, "loss": 0.7765, "step": 25420 }, { "epoch": 0.86, "learning_rate": 1.2127073812420376e-05, "loss": 0.81, "step": 25440 }, { "epoch": 0.86, "learning_rate": 1.2116652712638513e-05, "loss": 0.7686, "step": 25460 }, { "epoch": 0.86, "learning_rate": 1.210622920581377e-05, "loss": 0.7756, "step": 25480 }, { "epoch": 0.87, "learning_rate": 1.2095803303799693e-05, "loss": 0.8119, "step": 25500 }, { "epoch": 0.87, "learning_rate": 1.2085375018452541e-05, "loss": 0.6548, "step": 25520 }, { "epoch": 0.87, "learning_rate": 1.2074944361631288e-05, "loss": 0.7717, "step": 25540 }, { "epoch": 0.87, "learning_rate": 1.2064511345197606e-05, "loss": 0.8245, "step": 25560 }, { "epoch": 0.87, "learning_rate": 1.205407598101585e-05, "loss": 0.7411, "step": 25580 }, { "epoch": 0.87, "learning_rate": 1.2043638280953043e-05, "loss": 0.7405, "step": 25600 }, { "epoch": 0.87, "learning_rate": 1.2033198256878867e-05, "loss": 0.7835, "step": 25620 }, { "epoch": 0.87, "learning_rate": 1.2022755920665648e-05, "loss": 0.7589, "step": 25640 }, { "epoch": 0.87, "learning_rate": 1.201231128418833e-05, "loss": 0.7958, "step": 25660 }, { "epoch": 0.87, "learning_rate": 1.200186435932449e-05, "loss": 0.7981, "step": 25680 }, { "epoch": 0.87, "learning_rate": 1.1991415157954293e-05, "loss": 0.776, "step": 25700 }, { "epoch": 0.87, "learning_rate": 1.1980963691960508e-05, "loss": 0.7557, "step": 25720 }, { "epoch": 0.87, "learning_rate": 1.1970509973228459e-05, "loss": 0.803, "step": 25740 }, { "epoch": 0.87, "learning_rate": 1.1960054013646048e-05, "loss": 0.7916, "step": 25760 }, { "epoch": 0.88, "learning_rate": 1.1949595825103721e-05, "loss": 0.766, "step": 25780 }, { "epoch": 0.88, "learning_rate": 1.1939135419494456e-05, "loss": 0.7568, "step": 25800 }, { "epoch": 0.88, "learning_rate": 1.192867280871376e-05, "loss": 0.7633, "step": 25820 }, { "epoch": 0.88, "learning_rate": 1.1918208004659632e-05, "loss": 0.7924, "step": 25840 }, { "epoch": 0.88, "learning_rate": 1.1907741019232584e-05, "loss": 0.7998, "step": 25860 }, { "epoch": 0.88, "learning_rate": 1.1897271864335594e-05, "loss": 0.7736, "step": 25880 }, { "epoch": 0.88, "learning_rate": 1.1886800551874111e-05, "loss": 0.7891, "step": 25900 }, { "epoch": 0.88, "learning_rate": 1.1876327093756048e-05, "loss": 0.72, "step": 25920 }, { "epoch": 0.88, "learning_rate": 1.1865851501891741e-05, "loss": 0.753, "step": 25940 }, { "epoch": 0.88, "learning_rate": 1.1855373788193969e-05, "loss": 0.7819, "step": 25960 }, { "epoch": 0.88, "learning_rate": 1.1844893964577906e-05, "loss": 0.7506, "step": 25980 }, { "epoch": 0.88, "learning_rate": 1.1834412042961143e-05, "loss": 0.7076, "step": 26000 }, { "epoch": 0.88, "learning_rate": 1.1823928035263646e-05, "loss": 0.7446, "step": 26020 }, { "epoch": 0.88, "learning_rate": 1.1813441953407754e-05, "loss": 0.7617, "step": 26040 }, { "epoch": 0.88, "learning_rate": 1.1802953809318173e-05, "loss": 0.783, "step": 26060 }, { "epoch": 0.89, "learning_rate": 1.1792463614921945e-05, "loss": 0.7408, "step": 26080 }, { "epoch": 0.89, "learning_rate": 1.1781971382148446e-05, "loss": 0.7503, "step": 26100 }, { "epoch": 0.89, "learning_rate": 1.177147712292937e-05, "loss": 0.8042, "step": 26120 }, { "epoch": 0.89, "learning_rate": 1.1760980849198717e-05, "loss": 0.7961, "step": 26140 }, { "epoch": 0.89, "learning_rate": 1.1750482572892781e-05, "loss": 0.75, "step": 26160 }, { "epoch": 0.89, "learning_rate": 1.1739982305950124e-05, "loss": 0.7585, "step": 26180 }, { "epoch": 0.89, "learning_rate": 1.172948006031158e-05, "loss": 0.7957, "step": 26200 }, { "epoch": 0.89, "learning_rate": 1.1718975847920229e-05, "loss": 0.7645, "step": 26220 }, { "epoch": 0.89, "learning_rate": 1.170846968072139e-05, "loss": 0.7516, "step": 26240 }, { "epoch": 0.89, "learning_rate": 1.1697961570662606e-05, "loss": 0.7382, "step": 26260 }, { "epoch": 0.89, "learning_rate": 1.1687451529693626e-05, "loss": 0.834, "step": 26280 }, { "epoch": 0.89, "learning_rate": 1.167693956976639e-05, "loss": 0.8297, "step": 26300 }, { "epoch": 0.89, "learning_rate": 1.1666425702835033e-05, "loss": 0.7982, "step": 26320 }, { "epoch": 0.89, "learning_rate": 1.1655909940855852e-05, "loss": 0.7996, "step": 26340 }, { "epoch": 0.89, "learning_rate": 1.1645392295787299e-05, "loss": 0.7665, "step": 26360 }, { "epoch": 0.9, "learning_rate": 1.1634872779589966e-05, "loss": 0.824, "step": 26380 }, { "epoch": 0.9, "learning_rate": 1.1624351404226573e-05, "loss": 0.7679, "step": 26400 }, { "epoch": 0.9, "learning_rate": 1.1613828181661957e-05, "loss": 0.8051, "step": 26420 }, { "epoch": 0.9, "learning_rate": 1.1603303123863054e-05, "loss": 0.7372, "step": 26440 }, { "epoch": 0.9, "learning_rate": 1.1592776242798887e-05, "loss": 0.7109, "step": 26460 }, { "epoch": 0.9, "learning_rate": 1.1582247550440555e-05, "loss": 0.7646, "step": 26480 }, { "epoch": 0.9, "learning_rate": 1.1571717058761211e-05, "loss": 0.7508, "step": 26500 }, { "epoch": 0.9, "learning_rate": 1.1561184779736061e-05, "loss": 0.7319, "step": 26520 }, { "epoch": 0.9, "learning_rate": 1.1550650725342337e-05, "loss": 0.6879, "step": 26540 }, { "epoch": 0.9, "learning_rate": 1.1540114907559294e-05, "loss": 0.7612, "step": 26560 }, { "epoch": 0.9, "learning_rate": 1.1529577338368191e-05, "loss": 0.7385, "step": 26580 }, { "epoch": 0.9, "learning_rate": 1.1519038029752281e-05, "loss": 0.7635, "step": 26600 }, { "epoch": 0.9, "learning_rate": 1.1508496993696791e-05, "loss": 0.6925, "step": 26620 }, { "epoch": 0.9, "learning_rate": 1.1497954242188913e-05, "loss": 0.7441, "step": 26640 }, { "epoch": 0.9, "learning_rate": 1.14874097872178e-05, "loss": 0.7624, "step": 26660 }, { "epoch": 0.91, "learning_rate": 1.1476863640774523e-05, "loss": 0.7689, "step": 26680 }, { "epoch": 0.91, "learning_rate": 1.1466315814852094e-05, "loss": 0.7357, "step": 26700 }, { "epoch": 0.91, "learning_rate": 1.145576632144542e-05, "loss": 0.7586, "step": 26720 }, { "epoch": 0.91, "learning_rate": 1.144521517255132e-05, "loss": 0.7609, "step": 26740 }, { "epoch": 0.91, "learning_rate": 1.1434662380168487e-05, "loss": 0.7013, "step": 26760 }, { "epoch": 0.91, "learning_rate": 1.1424107956297479e-05, "loss": 0.7781, "step": 26780 }, { "epoch": 0.91, "learning_rate": 1.1413551912940721e-05, "loss": 0.7621, "step": 26800 }, { "epoch": 0.91, "learning_rate": 1.1402994262102468e-05, "loss": 0.7476, "step": 26820 }, { "epoch": 0.91, "learning_rate": 1.1392435015788805e-05, "loss": 0.6879, "step": 26840 }, { "epoch": 0.91, "learning_rate": 1.138187418600764e-05, "loss": 0.7534, "step": 26860 }, { "epoch": 0.91, "learning_rate": 1.1371311784768672e-05, "loss": 0.8091, "step": 26880 }, { "epoch": 0.91, "learning_rate": 1.1360747824083397e-05, "loss": 0.743, "step": 26900 }, { "epoch": 0.91, "learning_rate": 1.1350182315965071e-05, "loss": 0.8519, "step": 26920 }, { "epoch": 0.91, "learning_rate": 1.1339615272428722e-05, "loss": 0.7123, "step": 26940 }, { "epoch": 0.92, "learning_rate": 1.1329046705491111e-05, "loss": 0.7269, "step": 26960 }, { "epoch": 0.92, "learning_rate": 1.1318476627170751e-05, "loss": 0.8065, "step": 26980 }, { "epoch": 0.92, "learning_rate": 1.1307905049487855e-05, "loss": 0.7649, "step": 27000 }, { "epoch": 0.92, "learning_rate": 1.129733198446435e-05, "loss": 0.7449, "step": 27020 }, { "epoch": 0.92, "learning_rate": 1.1286757444123854e-05, "loss": 0.761, "step": 27040 }, { "epoch": 0.92, "learning_rate": 1.1276181440491658e-05, "loss": 0.7445, "step": 27060 }, { "epoch": 0.92, "learning_rate": 1.1265603985594723e-05, "loss": 0.758, "step": 27080 }, { "epoch": 0.92, "learning_rate": 1.1255025091461657e-05, "loss": 0.7528, "step": 27100 }, { "epoch": 0.92, "learning_rate": 1.1244444770122707e-05, "loss": 0.7695, "step": 27120 }, { "epoch": 0.92, "learning_rate": 1.123386303360974e-05, "loss": 0.7742, "step": 27140 }, { "epoch": 0.92, "learning_rate": 1.1223279893956234e-05, "loss": 0.8097, "step": 27160 }, { "epoch": 0.92, "learning_rate": 1.1212695363197262e-05, "loss": 0.8056, "step": 27180 }, { "epoch": 0.92, "learning_rate": 1.120210945336948e-05, "loss": 0.7472, "step": 27200 }, { "epoch": 0.92, "learning_rate": 1.1191522176511112e-05, "loss": 0.8154, "step": 27220 }, { "epoch": 0.92, "learning_rate": 1.1180933544661936e-05, "loss": 0.736, "step": 27240 }, { "epoch": 0.93, "learning_rate": 1.1170343569863274e-05, "loss": 0.7416, "step": 27260 }, { "epoch": 0.93, "learning_rate": 1.1159752264157963e-05, "loss": 0.768, "step": 27280 }, { "epoch": 0.93, "learning_rate": 1.1149159639590377e-05, "loss": 0.7803, "step": 27300 }, { "epoch": 0.93, "learning_rate": 1.1138565708206364e-05, "loss": 0.7374, "step": 27320 }, { "epoch": 0.93, "learning_rate": 1.1127970482053274e-05, "loss": 0.7396, "step": 27340 }, { "epoch": 0.93, "learning_rate": 1.1117373973179925e-05, "loss": 0.7836, "step": 27360 }, { "epoch": 0.93, "learning_rate": 1.1106776193636595e-05, "loss": 0.7813, "step": 27380 }, { "epoch": 0.93, "learning_rate": 1.1096177155475006e-05, "loss": 0.805, "step": 27400 }, { "epoch": 0.93, "learning_rate": 1.1085576870748307e-05, "loss": 0.7897, "step": 27420 }, { "epoch": 0.93, "learning_rate": 1.1074975351511075e-05, "loss": 0.7369, "step": 27440 }, { "epoch": 0.93, "learning_rate": 1.1064372609819279e-05, "loss": 0.7486, "step": 27460 }, { "epoch": 0.93, "learning_rate": 1.1053768657730284e-05, "loss": 0.7335, "step": 27480 }, { "epoch": 0.93, "learning_rate": 1.1043163507302835e-05, "loss": 0.8417, "step": 27500 }, { "epoch": 0.93, "learning_rate": 1.1032557170597034e-05, "loss": 0.6774, "step": 27520 }, { "epoch": 0.93, "learning_rate": 1.1021949659674337e-05, "loss": 0.7958, "step": 27540 }, { "epoch": 0.94, "learning_rate": 1.1011340986597528e-05, "loss": 0.8394, "step": 27560 }, { "epoch": 0.94, "learning_rate": 1.100073116343072e-05, "loss": 0.799, "step": 27580 }, { "epoch": 0.94, "learning_rate": 1.0990120202239324e-05, "loss": 0.8001, "step": 27600 }, { "epoch": 0.94, "learning_rate": 1.097950811509006e-05, "loss": 0.7543, "step": 27620 }, { "epoch": 0.94, "learning_rate": 1.096889491405092e-05, "loss": 0.7774, "step": 27640 }, { "epoch": 0.94, "learning_rate": 1.0958280611191162e-05, "loss": 0.7711, "step": 27660 }, { "epoch": 0.94, "learning_rate": 1.0947665218581293e-05, "loss": 0.8092, "step": 27680 }, { "epoch": 0.94, "learning_rate": 1.0937048748293068e-05, "loss": 0.7781, "step": 27700 }, { "epoch": 0.94, "learning_rate": 1.0926431212399467e-05, "loss": 0.7465, "step": 27720 }, { "epoch": 0.94, "learning_rate": 1.0915812622974673e-05, "loss": 0.7258, "step": 27740 }, { "epoch": 0.94, "learning_rate": 1.0905192992094075e-05, "loss": 0.7642, "step": 27760 }, { "epoch": 0.94, "learning_rate": 1.0894572331834248e-05, "loss": 0.7676, "step": 27780 }, { "epoch": 0.94, "learning_rate": 1.0883950654272925e-05, "loss": 0.838, "step": 27800 }, { "epoch": 0.94, "learning_rate": 1.0873327971489015e-05, "loss": 0.7291, "step": 27820 }, { "epoch": 0.95, "learning_rate": 1.086270429556255e-05, "loss": 0.7436, "step": 27840 }, { "epoch": 0.95, "learning_rate": 1.0852079638574708e-05, "loss": 0.7519, "step": 27860 }, { "epoch": 0.95, "learning_rate": 1.084145401260777e-05, "loss": 0.8155, "step": 27880 }, { "epoch": 0.95, "learning_rate": 1.083082742974513e-05, "loss": 0.7486, "step": 27900 }, { "epoch": 0.95, "learning_rate": 1.082019990207126e-05, "loss": 0.6917, "step": 27920 }, { "epoch": 0.95, "learning_rate": 1.0809571441671716e-05, "loss": 0.7539, "step": 27940 }, { "epoch": 0.95, "learning_rate": 1.0798942060633108e-05, "loss": 0.7732, "step": 27960 }, { "epoch": 0.95, "learning_rate": 1.0788311771043092e-05, "loss": 0.7179, "step": 27980 }, { "epoch": 0.95, "learning_rate": 1.0777680584990366e-05, "loss": 0.8022, "step": 28000 }, { "epoch": 0.95, "learning_rate": 1.0767048514564633e-05, "loss": 0.8168, "step": 28020 }, { "epoch": 0.95, "learning_rate": 1.0756415571856617e-05, "loss": 0.766, "step": 28040 }, { "epoch": 0.95, "learning_rate": 1.074578176895803e-05, "loss": 0.744, "step": 28060 }, { "epoch": 0.95, "learning_rate": 1.073514711796155e-05, "loss": 0.7179, "step": 28080 }, { "epoch": 0.95, "learning_rate": 1.0724511630960837e-05, "loss": 0.7154, "step": 28100 }, { "epoch": 0.95, "learning_rate": 1.0713875320050487e-05, "loss": 0.7586, "step": 28120 }, { "epoch": 0.96, "learning_rate": 1.0703238197326042e-05, "loss": 0.828, "step": 28140 }, { "epoch": 0.96, "learning_rate": 1.0692600274883969e-05, "loss": 0.7452, "step": 28160 }, { "epoch": 0.96, "learning_rate": 1.0681961564821632e-05, "loss": 0.7193, "step": 28180 }, { "epoch": 0.96, "learning_rate": 1.0671322079237307e-05, "loss": 0.7795, "step": 28200 }, { "epoch": 0.96, "learning_rate": 1.0660681830230137e-05, "loss": 0.7692, "step": 28220 }, { "epoch": 0.96, "learning_rate": 1.0650040829900143e-05, "loss": 0.7935, "step": 28240 }, { "epoch": 0.96, "learning_rate": 1.06393990903482e-05, "loss": 0.7645, "step": 28260 }, { "epoch": 0.96, "learning_rate": 1.0628756623676016e-05, "loss": 0.8378, "step": 28280 }, { "epoch": 0.96, "learning_rate": 1.061811344198613e-05, "loss": 0.7507, "step": 28300 }, { "epoch": 0.96, "learning_rate": 1.06074695573819e-05, "loss": 0.8633, "step": 28320 }, { "epoch": 0.96, "learning_rate": 1.0596824981967475e-05, "loss": 0.7645, "step": 28340 }, { "epoch": 0.96, "learning_rate": 1.0586179727847791e-05, "loss": 0.7723, "step": 28360 }, { "epoch": 0.96, "learning_rate": 1.0575533807128567e-05, "loss": 0.8174, "step": 28380 }, { "epoch": 0.96, "learning_rate": 1.0564887231916256e-05, "loss": 0.7738, "step": 28400 }, { "epoch": 0.96, "learning_rate": 1.0554240014318085e-05, "loss": 0.7467, "step": 28420 }, { "epoch": 0.97, "learning_rate": 1.0543592166441983e-05, "loss": 0.7665, "step": 28440 }, { "epoch": 0.97, "learning_rate": 1.0532943700396616e-05, "loss": 0.7758, "step": 28460 }, { "epoch": 0.97, "learning_rate": 1.0522294628291347e-05, "loss": 0.7501, "step": 28480 }, { "epoch": 0.97, "learning_rate": 1.0511644962236228e-05, "loss": 0.7612, "step": 28500 }, { "epoch": 0.97, "learning_rate": 1.0500994714341984e-05, "loss": 0.7664, "step": 28520 }, { "epoch": 0.97, "learning_rate": 1.049034389672e-05, "loss": 0.7437, "step": 28540 }, { "epoch": 0.97, "learning_rate": 1.0479692521482316e-05, "loss": 0.718, "step": 28560 }, { "epoch": 0.97, "learning_rate": 1.0469040600741603e-05, "loss": 0.7596, "step": 28580 }, { "epoch": 0.97, "learning_rate": 1.0458388146611151e-05, "loss": 0.8108, "step": 28600 }, { "epoch": 0.97, "learning_rate": 1.044773517120486e-05, "loss": 0.7887, "step": 28620 }, { "epoch": 0.97, "learning_rate": 1.0437081686637216e-05, "loss": 0.7195, "step": 28640 }, { "epoch": 0.97, "learning_rate": 1.0426427705023289e-05, "loss": 0.7793, "step": 28660 }, { "epoch": 0.97, "learning_rate": 1.0415773238478715e-05, "loss": 0.6975, "step": 28680 }, { "epoch": 0.97, "learning_rate": 1.0405118299119682e-05, "loss": 0.7211, "step": 28700 }, { "epoch": 0.97, "learning_rate": 1.039446289906291e-05, "loss": 0.7757, "step": 28720 }, { "epoch": 0.98, "learning_rate": 1.0383807050425648e-05, "loss": 0.7514, "step": 28740 }, { "epoch": 0.98, "learning_rate": 1.037315076532565e-05, "loss": 0.6998, "step": 28760 }, { "epoch": 0.98, "learning_rate": 1.0362494055881172e-05, "loss": 0.7226, "step": 28780 }, { "epoch": 0.98, "learning_rate": 1.0351836934210957e-05, "loss": 0.7645, "step": 28800 }, { "epoch": 0.98, "learning_rate": 1.03411794124342e-05, "loss": 0.8352, "step": 28820 }, { "epoch": 0.98, "learning_rate": 1.0330521502670574e-05, "loss": 0.7759, "step": 28840 }, { "epoch": 0.98, "learning_rate": 1.0319863217040166e-05, "loss": 0.7611, "step": 28860 }, { "epoch": 0.98, "learning_rate": 1.0309204567663513e-05, "loss": 0.7064, "step": 28880 }, { "epoch": 0.98, "learning_rate": 1.0298545566661558e-05, "loss": 0.6901, "step": 28900 }, { "epoch": 0.98, "learning_rate": 1.028788622615564e-05, "loss": 0.7307, "step": 28920 }, { "epoch": 0.98, "learning_rate": 1.0277226558267494e-05, "loss": 0.7325, "step": 28940 }, { "epoch": 0.98, "learning_rate": 1.0266566575119213e-05, "loss": 0.8501, "step": 28960 }, { "epoch": 0.98, "learning_rate": 1.0255906288833259e-05, "loss": 0.7255, "step": 28980 }, { "epoch": 0.98, "learning_rate": 1.0245245711532437e-05, "loss": 0.6879, "step": 29000 }, { "epoch": 0.99, "learning_rate": 1.0234584855339884e-05, "loss": 0.7236, "step": 29020 }, { "epoch": 0.99, "learning_rate": 1.0223923732379049e-05, "loss": 0.761, "step": 29040 }, { "epoch": 0.99, "learning_rate": 1.0213262354773689e-05, "loss": 0.7608, "step": 29060 }, { "epoch": 0.99, "learning_rate": 1.020260073464785e-05, "loss": 0.7164, "step": 29080 }, { "epoch": 0.99, "learning_rate": 1.0191938884125847e-05, "loss": 0.7795, "step": 29100 }, { "epoch": 0.99, "learning_rate": 1.0181276815332273e-05, "loss": 0.6521, "step": 29120 }, { "epoch": 0.99, "learning_rate": 1.0170614540391952e-05, "loss": 0.7608, "step": 29140 }, { "epoch": 0.99, "learning_rate": 1.0159952071429953e-05, "loss": 0.7849, "step": 29160 }, { "epoch": 0.99, "learning_rate": 1.0149289420571558e-05, "loss": 0.6989, "step": 29180 }, { "epoch": 0.99, "learning_rate": 1.0138626599942263e-05, "loss": 0.7986, "step": 29200 }, { "epoch": 0.99, "learning_rate": 1.0127963621667754e-05, "loss": 0.7628, "step": 29220 }, { "epoch": 0.99, "learning_rate": 1.0117300497873897e-05, "loss": 0.7438, "step": 29240 }, { "epoch": 0.99, "learning_rate": 1.0106637240686723e-05, "loss": 0.7998, "step": 29260 }, { "epoch": 0.99, "learning_rate": 1.009597386223241e-05, "loss": 0.7247, "step": 29280 }, { "epoch": 0.99, "learning_rate": 1.0085310374637288e-05, "loss": 0.7975, "step": 29300 }, { "epoch": 1.0, "learning_rate": 1.0074646790027792e-05, "loss": 0.7294, "step": 29320 }, { "epoch": 1.0, "learning_rate": 1.006398312053048e-05, "loss": 0.7467, "step": 29340 }, { "epoch": 1.0, "learning_rate": 1.0053319378272008e-05, "loss": 0.7376, "step": 29360 }, { "epoch": 1.0, "learning_rate": 1.0042655575379103e-05, "loss": 0.8358, "step": 29380 }, { "epoch": 1.0, "learning_rate": 1.0031991723978574e-05, "loss": 0.8395, "step": 29400 }, { "epoch": 1.0, "learning_rate": 1.0021327836197274e-05, "loss": 0.7004, "step": 29420 }, { "epoch": 1.0, "learning_rate": 1.001066392416211e-05, "loss": 0.6753, "step": 29440 }, { "epoch": 1.0, "learning_rate": 1e-05, "loss": 0.7482, "step": 29460 }, { "epoch": 1.0, "learning_rate": 9.989336075837893e-06, "loss": 0.5971, "step": 29480 }, { "epoch": 1.0, "learning_rate": 9.978672163802726e-06, "loss": 0.5315, "step": 29500 }, { "epoch": 1.0, "learning_rate": 9.96800827602143e-06, "loss": 0.5765, "step": 29520 }, { "epoch": 1.0, "learning_rate": 9.9573444246209e-06, "loss": 0.5696, "step": 29540 }, { "epoch": 1.0, "learning_rate": 9.946680621727995e-06, "loss": 0.5824, "step": 29560 }, { "epoch": 1.0, "learning_rate": 9.936016879469522e-06, "loss": 0.5709, "step": 29580 }, { "epoch": 1.0, "learning_rate": 9.925353209972213e-06, "loss": 0.5152, "step": 29600 }, { "epoch": 1.01, "learning_rate": 9.914689625362717e-06, "loss": 0.5453, "step": 29620 }, { "epoch": 1.01, "learning_rate": 9.904026137767591e-06, "loss": 0.5355, "step": 29640 }, { "epoch": 1.01, "learning_rate": 9.89336275931328e-06, "loss": 0.5089, "step": 29660 }, { "epoch": 1.01, "learning_rate": 9.882699502126103e-06, "loss": 0.5548, "step": 29680 }, { "epoch": 1.01, "learning_rate": 9.872036378332251e-06, "loss": 0.5285, "step": 29700 }, { "epoch": 1.01, "learning_rate": 9.86137340005774e-06, "loss": 0.5213, "step": 29720 }, { "epoch": 1.01, "learning_rate": 9.850710579428445e-06, "loss": 0.4975, "step": 29740 }, { "epoch": 1.01, "learning_rate": 9.84004792857005e-06, "loss": 0.5456, "step": 29760 }, { "epoch": 1.01, "learning_rate": 9.82938545960805e-06, "loss": 0.5541, "step": 29780 }, { "epoch": 1.01, "learning_rate": 9.818723184667732e-06, "loss": 0.5548, "step": 29800 }, { "epoch": 1.01, "learning_rate": 9.808061115874156e-06, "loss": 0.5818, "step": 29820 }, { "epoch": 1.01, "learning_rate": 9.797399265352154e-06, "loss": 0.5393, "step": 29840 }, { "epoch": 1.01, "learning_rate": 9.786737645226313e-06, "loss": 0.5183, "step": 29860 }, { "epoch": 1.01, "learning_rate": 9.776076267620955e-06, "loss": 0.5039, "step": 29880 }, { "epoch": 1.01, "learning_rate": 9.76541514466012e-06, "loss": 0.5041, "step": 29900 }, { "epoch": 1.02, "learning_rate": 9.754754288467564e-06, "loss": 0.4768, "step": 29920 }, { "epoch": 1.02, "learning_rate": 9.744093711166745e-06, "loss": 0.5311, "step": 29940 }, { "epoch": 1.02, "learning_rate": 9.733433424880789e-06, "loss": 0.5408, "step": 29960 }, { "epoch": 1.02, "learning_rate": 9.72277344173251e-06, "loss": 0.4999, "step": 29980 }, { "epoch": 1.02, "learning_rate": 9.712113773844361e-06, "loss": 0.5325, "step": 30000 }, { "epoch": 1.02, "learning_rate": 9.701454433338445e-06, "loss": 0.5887, "step": 30020 }, { "epoch": 1.02, "learning_rate": 9.690795432336489e-06, "loss": 0.5308, "step": 30040 }, { "epoch": 1.02, "learning_rate": 9.680136782959835e-06, "loss": 0.5197, "step": 30060 }, { "epoch": 1.02, "learning_rate": 9.669478497329431e-06, "loss": 0.58, "step": 30080 }, { "epoch": 1.02, "learning_rate": 9.658820587565801e-06, "loss": 0.5552, "step": 30100 }, { "epoch": 1.02, "learning_rate": 9.648163065789046e-06, "loss": 0.5623, "step": 30120 }, { "epoch": 1.02, "learning_rate": 9.637505944118828e-06, "loss": 0.5321, "step": 30140 }, { "epoch": 1.02, "learning_rate": 9.626849234674356e-06, "loss": 0.5571, "step": 30160 }, { "epoch": 1.02, "learning_rate": 9.616192949574357e-06, "loss": 0.5319, "step": 30180 }, { "epoch": 1.03, "learning_rate": 9.605537100937094e-06, "loss": 0.5443, "step": 30200 }, { "epoch": 1.03, "learning_rate": 9.59488170088032e-06, "loss": 0.5153, "step": 30220 }, { "epoch": 1.03, "learning_rate": 9.584226761521285e-06, "loss": 0.5195, "step": 30240 }, { "epoch": 1.03, "learning_rate": 9.573572294976714e-06, "loss": 0.5135, "step": 30260 }, { "epoch": 1.03, "learning_rate": 9.562918313362787e-06, "loss": 0.5177, "step": 30280 }, { "epoch": 1.03, "learning_rate": 9.552264828795142e-06, "loss": 0.5544, "step": 30300 }, { "epoch": 1.03, "learning_rate": 9.541611853388849e-06, "loss": 0.5243, "step": 30320 }, { "epoch": 1.03, "learning_rate": 9.530959399258397e-06, "loss": 0.5352, "step": 30340 }, { "epoch": 1.03, "learning_rate": 9.520307478517686e-06, "loss": 0.5354, "step": 30360 }, { "epoch": 1.03, "learning_rate": 9.509656103280004e-06, "loss": 0.5097, "step": 30380 }, { "epoch": 1.03, "learning_rate": 9.499005285658021e-06, "loss": 0.5286, "step": 30400 }, { "epoch": 1.03, "learning_rate": 9.488355037763774e-06, "loss": 0.5257, "step": 30420 }, { "epoch": 1.03, "learning_rate": 9.477705371708657e-06, "loss": 0.5127, "step": 30440 }, { "epoch": 1.03, "learning_rate": 9.467056299603388e-06, "loss": 0.542, "step": 30460 }, { "epoch": 1.03, "learning_rate": 9.456407833558019e-06, "loss": 0.5338, "step": 30480 }, { "epoch": 1.04, "learning_rate": 9.445759985681919e-06, "loss": 0.5501, "step": 30500 }, { "epoch": 1.04, "learning_rate": 9.435112768083742e-06, "loss": 0.5282, "step": 30520 }, { "epoch": 1.04, "learning_rate": 9.42446619287144e-06, "loss": 0.5156, "step": 30540 }, { "epoch": 1.04, "learning_rate": 9.41382027215221e-06, "loss": 0.534, "step": 30560 }, { "epoch": 1.04, "learning_rate": 9.403175018032528e-06, "loss": 0.5237, "step": 30580 }, { "epoch": 1.04, "learning_rate": 9.3925304426181e-06, "loss": 0.5067, "step": 30600 }, { "epoch": 1.04, "learning_rate": 9.38188655801387e-06, "loss": 0.5491, "step": 30620 }, { "epoch": 1.04, "learning_rate": 9.37124337632399e-06, "loss": 0.5151, "step": 30640 }, { "epoch": 1.04, "learning_rate": 9.360600909651804e-06, "loss": 0.5706, "step": 30660 }, { "epoch": 1.04, "learning_rate": 9.349959170099859e-06, "loss": 0.4919, "step": 30680 }, { "epoch": 1.04, "learning_rate": 9.339318169769865e-06, "loss": 0.5317, "step": 30700 }, { "epoch": 1.04, "learning_rate": 9.328677920762698e-06, "loss": 0.5356, "step": 30720 }, { "epoch": 1.04, "learning_rate": 9.318038435178371e-06, "loss": 0.574, "step": 30740 }, { "epoch": 1.04, "learning_rate": 9.307399725116035e-06, "loss": 0.5253, "step": 30760 }, { "epoch": 1.04, "learning_rate": 9.29676180267396e-06, "loss": 0.5226, "step": 30780 }, { "epoch": 1.05, "learning_rate": 9.286124679949515e-06, "loss": 0.5003, "step": 30800 }, { "epoch": 1.05, "learning_rate": 9.275488369039169e-06, "loss": 0.569, "step": 30820 }, { "epoch": 1.05, "learning_rate": 9.264852882038453e-06, "loss": 0.5178, "step": 30840 }, { "epoch": 1.05, "learning_rate": 9.254218231041973e-06, "loss": 0.519, "step": 30860 }, { "epoch": 1.05, "learning_rate": 9.243584428143383e-06, "loss": 0.5543, "step": 30880 }, { "epoch": 1.05, "learning_rate": 9.232951485435367e-06, "loss": 0.5391, "step": 30900 }, { "epoch": 1.05, "learning_rate": 9.222319415009639e-06, "loss": 0.51, "step": 30920 }, { "epoch": 1.05, "learning_rate": 9.21168822895691e-06, "loss": 0.5161, "step": 30940 }, { "epoch": 1.05, "learning_rate": 9.201057939366896e-06, "loss": 0.5187, "step": 30960 }, { "epoch": 1.05, "learning_rate": 9.190428558328285e-06, "loss": 0.4999, "step": 30980 }, { "epoch": 1.05, "learning_rate": 9.179800097928743e-06, "loss": 0.4937, "step": 31000 }, { "epoch": 1.05, "learning_rate": 9.169172570254874e-06, "loss": 0.5363, "step": 31020 }, { "epoch": 1.05, "learning_rate": 9.158545987392233e-06, "loss": 0.4939, "step": 31040 }, { "epoch": 1.05, "learning_rate": 9.147920361425295e-06, "loss": 0.5205, "step": 31060 }, { "epoch": 1.05, "learning_rate": 9.13729570443745e-06, "loss": 0.5278, "step": 31080 }, { "epoch": 1.06, "learning_rate": 9.12667202851099e-06, "loss": 0.5109, "step": 31100 }, { "epoch": 1.06, "learning_rate": 9.116049345727076e-06, "loss": 0.5284, "step": 31120 }, { "epoch": 1.06, "learning_rate": 9.105427668165755e-06, "loss": 0.549, "step": 31140 }, { "epoch": 1.06, "learning_rate": 9.094807007905927e-06, "loss": 0.5686, "step": 31160 }, { "epoch": 1.06, "learning_rate": 9.084187377025334e-06, "loss": 0.5356, "step": 31180 }, { "epoch": 1.06, "learning_rate": 9.073568787600538e-06, "loss": 0.4808, "step": 31200 }, { "epoch": 1.06, "learning_rate": 9.062951251706933e-06, "loss": 0.5262, "step": 31220 }, { "epoch": 1.06, "learning_rate": 9.052334781418708e-06, "loss": 0.5427, "step": 31240 }, { "epoch": 1.06, "learning_rate": 9.04171938880884e-06, "loss": 0.5331, "step": 31260 }, { "epoch": 1.06, "learning_rate": 9.031105085949083e-06, "loss": 0.501, "step": 31280 }, { "epoch": 1.06, "learning_rate": 9.020491884909941e-06, "loss": 0.5483, "step": 31300 }, { "epoch": 1.06, "learning_rate": 9.009879797760679e-06, "loss": 0.5403, "step": 31320 }, { "epoch": 1.06, "learning_rate": 8.999268836569284e-06, "loss": 0.5151, "step": 31340 }, { "epoch": 1.06, "learning_rate": 8.988659013402474e-06, "loss": 0.5153, "step": 31360 }, { "epoch": 1.07, "learning_rate": 8.978050340325668e-06, "loss": 0.5419, "step": 31380 }, { "epoch": 1.07, "learning_rate": 8.967442829402968e-06, "loss": 0.4932, "step": 31400 }, { "epoch": 1.07, "learning_rate": 8.956836492697167e-06, "loss": 0.5351, "step": 31420 }, { "epoch": 1.07, "learning_rate": 8.94623134226972e-06, "loss": 0.5007, "step": 31440 }, { "epoch": 1.07, "learning_rate": 8.935627390180728e-06, "loss": 0.5453, "step": 31460 }, { "epoch": 1.07, "learning_rate": 8.925024648488932e-06, "loss": 0.528, "step": 31480 }, { "epoch": 1.07, "learning_rate": 8.914423129251697e-06, "loss": 0.5672, "step": 31500 }, { "epoch": 1.07, "learning_rate": 8.903822844524999e-06, "loss": 0.5049, "step": 31520 }, { "epoch": 1.07, "learning_rate": 8.893223806363407e-06, "loss": 0.5704, "step": 31540 }, { "epoch": 1.07, "learning_rate": 8.882626026820078e-06, "loss": 0.4887, "step": 31560 }, { "epoch": 1.07, "learning_rate": 8.872029517946729e-06, "loss": 0.5191, "step": 31580 }, { "epoch": 1.07, "learning_rate": 8.861434291793639e-06, "loss": 0.5468, "step": 31600 }, { "epoch": 1.07, "learning_rate": 8.850840360409627e-06, "loss": 0.5635, "step": 31620 }, { "epoch": 1.07, "learning_rate": 8.840247735842037e-06, "loss": 0.5354, "step": 31640 }, { "epoch": 1.07, "learning_rate": 8.829656430136732e-06, "loss": 0.4913, "step": 31660 }, { "epoch": 1.08, "learning_rate": 8.819066455338067e-06, "loss": 0.5314, "step": 31680 }, { "epoch": 1.08, "learning_rate": 8.80847782348889e-06, "loss": 0.5026, "step": 31700 }, { "epoch": 1.08, "learning_rate": 8.797890546630522e-06, "loss": 0.5835, "step": 31720 }, { "epoch": 1.08, "learning_rate": 8.787304636802743e-06, "loss": 0.5104, "step": 31740 }, { "epoch": 1.08, "learning_rate": 8.77672010604377e-06, "loss": 0.4618, "step": 31760 }, { "epoch": 1.08, "learning_rate": 8.766136966390263e-06, "loss": 0.5285, "step": 31780 }, { "epoch": 1.08, "learning_rate": 8.755555229877295e-06, "loss": 0.5043, "step": 31800 }, { "epoch": 1.08, "learning_rate": 8.744974908538345e-06, "loss": 0.5348, "step": 31820 }, { "epoch": 1.08, "learning_rate": 8.734396014405282e-06, "loss": 0.5492, "step": 31840 }, { "epoch": 1.08, "learning_rate": 8.723818559508345e-06, "loss": 0.5543, "step": 31860 }, { "epoch": 1.08, "learning_rate": 8.713242555876149e-06, "loss": 0.5509, "step": 31880 }, { "epoch": 1.08, "learning_rate": 8.702668015535651e-06, "loss": 0.5401, "step": 31900 }, { "epoch": 1.08, "learning_rate": 8.692094950512145e-06, "loss": 0.5644, "step": 31920 }, { "epoch": 1.08, "learning_rate": 8.681523372829254e-06, "loss": 0.5083, "step": 31940 }, { "epoch": 1.08, "learning_rate": 8.670953294508892e-06, "loss": 0.5296, "step": 31960 }, { "epoch": 1.09, "learning_rate": 8.660384727571282e-06, "loss": 0.6039, "step": 31980 }, { "epoch": 1.09, "learning_rate": 8.64981768403493e-06, "loss": 0.4802, "step": 32000 }, { "epoch": 1.09, "learning_rate": 8.639252175916608e-06, "loss": 0.5125, "step": 32020 }, { "epoch": 1.09, "learning_rate": 8.62868821523133e-06, "loss": 0.4873, "step": 32040 }, { "epoch": 1.09, "learning_rate": 8.618125813992364e-06, "loss": 0.53, "step": 32060 }, { "epoch": 1.09, "learning_rate": 8.607564984211199e-06, "loss": 0.5251, "step": 32080 }, { "epoch": 1.09, "learning_rate": 8.597005737897534e-06, "loss": 0.57, "step": 32100 }, { "epoch": 1.09, "learning_rate": 8.586448087059284e-06, "loss": 0.5491, "step": 32120 }, { "epoch": 1.09, "learning_rate": 8.575892043702523e-06, "loss": 0.5388, "step": 32140 }, { "epoch": 1.09, "learning_rate": 8.565337619831516e-06, "loss": 0.5428, "step": 32160 }, { "epoch": 1.09, "learning_rate": 8.554784827448681e-06, "loss": 0.5715, "step": 32180 }, { "epoch": 1.09, "learning_rate": 8.544233678554581e-06, "loss": 0.5013, "step": 32200 }, { "epoch": 1.09, "learning_rate": 8.533684185147913e-06, "loss": 0.5551, "step": 32220 }, { "epoch": 1.09, "learning_rate": 8.52313635922548e-06, "loss": 0.5181, "step": 32240 }, { "epoch": 1.1, "learning_rate": 8.512590212782203e-06, "loss": 0.5321, "step": 32260 }, { "epoch": 1.1, "learning_rate": 8.502045757811087e-06, "loss": 0.5279, "step": 32280 }, { "epoch": 1.1, "learning_rate": 8.491503006303212e-06, "loss": 0.5403, "step": 32300 }, { "epoch": 1.1, "learning_rate": 8.480961970247722e-06, "loss": 0.5359, "step": 32320 }, { "epoch": 1.1, "learning_rate": 8.47042266163181e-06, "loss": 0.533, "step": 32340 }, { "epoch": 1.1, "learning_rate": 8.459885092440708e-06, "loss": 0.5727, "step": 32360 }, { "epoch": 1.1, "learning_rate": 8.449349274657665e-06, "loss": 0.5204, "step": 32380 }, { "epoch": 1.1, "learning_rate": 8.438815220263942e-06, "loss": 0.5187, "step": 32400 }, { "epoch": 1.1, "learning_rate": 8.42828294123879e-06, "loss": 0.5268, "step": 32420 }, { "epoch": 1.1, "learning_rate": 8.417752449559447e-06, "loss": 0.5333, "step": 32440 }, { "epoch": 1.1, "learning_rate": 8.407223757201113e-06, "loss": 0.5326, "step": 32460 }, { "epoch": 1.1, "learning_rate": 8.396696876136948e-06, "loss": 0.5232, "step": 32480 }, { "epoch": 1.1, "learning_rate": 8.386171818338048e-06, "loss": 0.4898, "step": 32500 }, { "epoch": 1.1, "learning_rate": 8.37564859577343e-06, "loss": 0.4699, "step": 32520 }, { "epoch": 1.1, "learning_rate": 8.365127220410037e-06, "loss": 0.5222, "step": 32540 }, { "epoch": 1.11, "learning_rate": 8.354607704212703e-06, "loss": 0.5207, "step": 32560 }, { "epoch": 1.11, "learning_rate": 8.344090059144151e-06, "loss": 0.5525, "step": 32580 }, { "epoch": 1.11, "learning_rate": 8.333574297164969e-06, "loss": 0.5413, "step": 32600 }, { "epoch": 1.11, "learning_rate": 8.323060430233613e-06, "loss": 0.5338, "step": 32620 }, { "epoch": 1.11, "learning_rate": 8.312548470306379e-06, "loss": 0.535, "step": 32640 }, { "epoch": 1.11, "learning_rate": 8.302038429337396e-06, "loss": 0.5453, "step": 32660 }, { "epoch": 1.11, "learning_rate": 8.291530319278614e-06, "loss": 0.5435, "step": 32680 }, { "epoch": 1.11, "learning_rate": 8.281024152079775e-06, "loss": 0.5129, "step": 32700 }, { "epoch": 1.11, "learning_rate": 8.270519939688421e-06, "loss": 0.5429, "step": 32720 }, { "epoch": 1.11, "learning_rate": 8.260017694049878e-06, "loss": 0.5661, "step": 32740 }, { "epoch": 1.11, "learning_rate": 8.249517427107225e-06, "loss": 0.5217, "step": 32760 }, { "epoch": 1.11, "learning_rate": 8.239019150801286e-06, "loss": 0.4945, "step": 32780 }, { "epoch": 1.11, "learning_rate": 8.228522877070636e-06, "loss": 0.5153, "step": 32800 }, { "epoch": 1.11, "learning_rate": 8.21802861785156e-06, "loss": 0.5137, "step": 32820 }, { "epoch": 1.11, "learning_rate": 8.207536385078057e-06, "loss": 0.5012, "step": 32840 }, { "epoch": 1.12, "learning_rate": 8.197046190681832e-06, "loss": 0.5243, "step": 32860 }, { "epoch": 1.12, "learning_rate": 8.186558046592247e-06, "loss": 0.5136, "step": 32880 }, { "epoch": 1.12, "learning_rate": 8.176071964736358e-06, "loss": 0.4767, "step": 32900 }, { "epoch": 1.12, "learning_rate": 8.16558795703886e-06, "loss": 0.5258, "step": 32920 }, { "epoch": 1.12, "learning_rate": 8.155106035422095e-06, "loss": 0.4979, "step": 32940 }, { "epoch": 1.12, "learning_rate": 8.144626211806038e-06, "loss": 0.4596, "step": 32960 }, { "epoch": 1.12, "learning_rate": 8.13414849810826e-06, "loss": 0.5071, "step": 32980 }, { "epoch": 1.12, "learning_rate": 8.123672906243955e-06, "loss": 0.5356, "step": 33000 }, { "epoch": 1.12, "learning_rate": 8.11319944812589e-06, "loss": 0.5452, "step": 33020 }, { "epoch": 1.12, "learning_rate": 8.102728135664411e-06, "loss": 0.5393, "step": 33040 }, { "epoch": 1.12, "learning_rate": 8.09225898076742e-06, "loss": 0.5167, "step": 33060 }, { "epoch": 1.12, "learning_rate": 8.08179199534037e-06, "loss": 0.4935, "step": 33080 }, { "epoch": 1.12, "learning_rate": 8.071327191286244e-06, "loss": 0.547, "step": 33100 }, { "epoch": 1.12, "learning_rate": 8.060864580505542e-06, "loss": 0.5217, "step": 33120 }, { "epoch": 1.12, "learning_rate": 8.050404174896282e-06, "loss": 0.5212, "step": 33140 }, { "epoch": 1.13, "learning_rate": 8.039945986353954e-06, "loss": 0.4672, "step": 33160 }, { "epoch": 1.13, "learning_rate": 8.029490026771545e-06, "loss": 0.4622, "step": 33180 }, { "epoch": 1.13, "learning_rate": 8.019036308039497e-06, "loss": 0.507, "step": 33200 }, { "epoch": 1.13, "learning_rate": 8.008584842045706e-06, "loss": 0.5324, "step": 33220 }, { "epoch": 1.13, "learning_rate": 7.998135640675514e-06, "loss": 0.5084, "step": 33240 }, { "epoch": 1.13, "learning_rate": 7.987688715811672e-06, "loss": 0.5583, "step": 33260 }, { "epoch": 1.13, "learning_rate": 7.977244079334357e-06, "loss": 0.5154, "step": 33280 }, { "epoch": 1.13, "learning_rate": 7.966801743121133e-06, "loss": 0.5123, "step": 33300 }, { "epoch": 1.13, "learning_rate": 7.95636171904696e-06, "loss": 0.517, "step": 33320 }, { "epoch": 1.13, "learning_rate": 7.945924018984155e-06, "loss": 0.4924, "step": 33340 }, { "epoch": 1.13, "learning_rate": 7.935488654802395e-06, "loss": 0.5687, "step": 33360 }, { "epoch": 1.13, "learning_rate": 7.925055638368713e-06, "loss": 0.5503, "step": 33380 }, { "epoch": 1.13, "learning_rate": 7.914624981547459e-06, "loss": 0.5222, "step": 33400 }, { "epoch": 1.13, "learning_rate": 7.90419669620031e-06, "loss": 0.5184, "step": 33420 }, { "epoch": 1.14, "learning_rate": 7.893770794186231e-06, "loss": 0.5176, "step": 33440 }, { "epoch": 1.14, "learning_rate": 7.88334728736149e-06, "loss": 0.5423, "step": 33460 }, { "epoch": 1.14, "learning_rate": 7.872926187579625e-06, "loss": 0.5321, "step": 33480 }, { "epoch": 1.14, "learning_rate": 7.862507506691444e-06, "loss": 0.5127, "step": 33500 }, { "epoch": 1.14, "learning_rate": 7.852091256545e-06, "loss": 0.4715, "step": 33520 }, { "epoch": 1.14, "learning_rate": 7.841677448985572e-06, "loss": 0.5334, "step": 33540 }, { "epoch": 1.14, "learning_rate": 7.831266095855675e-06, "loss": 0.4881, "step": 33560 }, { "epoch": 1.14, "learning_rate": 7.82085720899503e-06, "loss": 0.5016, "step": 33580 }, { "epoch": 1.14, "learning_rate": 7.810450800240549e-06, "loss": 0.5255, "step": 33600 }, { "epoch": 1.14, "learning_rate": 7.800046881426331e-06, "loss": 0.4765, "step": 33620 }, { "epoch": 1.14, "learning_rate": 7.789645464383635e-06, "loss": 0.5301, "step": 33640 }, { "epoch": 1.14, "learning_rate": 7.779246560940887e-06, "loss": 0.4933, "step": 33660 }, { "epoch": 1.14, "learning_rate": 7.768850182923646e-06, "loss": 0.4941, "step": 33680 }, { "epoch": 1.14, "learning_rate": 7.758456342154602e-06, "loss": 0.4816, "step": 33700 }, { "epoch": 1.14, "learning_rate": 7.748065050453557e-06, "loss": 0.5509, "step": 33720 }, { "epoch": 1.15, "learning_rate": 7.737676319637417e-06, "loss": 0.5269, "step": 33740 }, { "epoch": 1.15, "learning_rate": 7.727290161520176e-06, "loss": 0.4682, "step": 33760 }, { "epoch": 1.15, "learning_rate": 7.716906587912903e-06, "loss": 0.5659, "step": 33780 }, { "epoch": 1.15, "learning_rate": 7.706525610623726e-06, "loss": 0.4829, "step": 33800 }, { "epoch": 1.15, "learning_rate": 7.696147241457817e-06, "loss": 0.5116, "step": 33820 }, { "epoch": 1.15, "learning_rate": 7.685771492217387e-06, "loss": 0.4765, "step": 33840 }, { "epoch": 1.15, "learning_rate": 7.675398374701669e-06, "loss": 0.559, "step": 33860 }, { "epoch": 1.15, "learning_rate": 7.665027900706902e-06, "loss": 0.5885, "step": 33880 }, { "epoch": 1.15, "learning_rate": 7.654660082026313e-06, "loss": 0.5742, "step": 33900 }, { "epoch": 1.15, "learning_rate": 7.644294930450117e-06, "loss": 0.5308, "step": 33920 }, { "epoch": 1.15, "learning_rate": 7.633932457765491e-06, "loss": 0.5628, "step": 33940 }, { "epoch": 1.15, "learning_rate": 7.623572675756568e-06, "loss": 0.527, "step": 33960 }, { "epoch": 1.15, "learning_rate": 7.613215596204425e-06, "loss": 0.5158, "step": 33980 }, { "epoch": 1.15, "learning_rate": 7.602861230887055e-06, "loss": 0.5089, "step": 34000 }, { "epoch": 1.15, "learning_rate": 7.592509591579373e-06, "loss": 0.5192, "step": 34020 }, { "epoch": 1.16, "learning_rate": 7.582160690053194e-06, "loss": 0.536, "step": 34040 }, { "epoch": 1.16, "learning_rate": 7.571814538077216e-06, "loss": 0.5417, "step": 34060 }, { "epoch": 1.16, "learning_rate": 7.5614711474170155e-06, "loss": 0.5583, "step": 34080 }, { "epoch": 1.16, "learning_rate": 7.551130529835016e-06, "loss": 0.5398, "step": 34100 }, { "epoch": 1.16, "learning_rate": 7.540792697090502e-06, "loss": 0.5044, "step": 34120 }, { "epoch": 1.16, "learning_rate": 7.530457660939588e-06, "loss": 0.504, "step": 34140 }, { "epoch": 1.16, "learning_rate": 7.5201254331352056e-06, "loss": 0.4996, "step": 34160 }, { "epoch": 1.16, "learning_rate": 7.509796025427088e-06, "loss": 0.534, "step": 34180 }, { "epoch": 1.16, "learning_rate": 7.49946944956177e-06, "loss": 0.5249, "step": 34200 }, { "epoch": 1.16, "learning_rate": 7.489145717282559e-06, "loss": 0.4901, "step": 34220 }, { "epoch": 1.16, "learning_rate": 7.478824840329537e-06, "loss": 0.4841, "step": 34240 }, { "epoch": 1.16, "learning_rate": 7.4685068304395366e-06, "loss": 0.4928, "step": 34260 }, { "epoch": 1.16, "learning_rate": 7.45819169934612e-06, "loss": 0.551, "step": 34280 }, { "epoch": 1.16, "learning_rate": 7.4478794587795855e-06, "loss": 0.507, "step": 34300 }, { "epoch": 1.16, "learning_rate": 7.437570120466943e-06, "loss": 0.4897, "step": 34320 }, { "epoch": 1.17, "learning_rate": 7.427263696131903e-06, "loss": 0.5578, "step": 34340 }, { "epoch": 1.17, "learning_rate": 7.416960197494856e-06, "loss": 0.5348, "step": 34360 }, { "epoch": 1.17, "learning_rate": 7.406659636272872e-06, "loss": 0.537, "step": 34380 }, { "epoch": 1.17, "learning_rate": 7.396362024179677e-06, "loss": 0.4799, "step": 34400 }, { "epoch": 1.17, "learning_rate": 7.386067372925647e-06, "loss": 0.487, "step": 34420 }, { "epoch": 1.17, "learning_rate": 7.375775694217788e-06, "loss": 0.5118, "step": 34440 }, { "epoch": 1.17, "learning_rate": 7.365486999759723e-06, "loss": 0.4961, "step": 34460 }, { "epoch": 1.17, "learning_rate": 7.355201301251689e-06, "loss": 0.5203, "step": 34480 }, { "epoch": 1.17, "learning_rate": 7.344918610390506e-06, "loss": 0.5447, "step": 34500 }, { "epoch": 1.17, "learning_rate": 7.334638938869582e-06, "loss": 0.5421, "step": 34520 }, { "epoch": 1.17, "learning_rate": 7.324362298378893e-06, "loss": 0.5051, "step": 34540 }, { "epoch": 1.17, "learning_rate": 7.314088700604958e-06, "loss": 0.4877, "step": 34560 }, { "epoch": 1.17, "learning_rate": 7.303818157230841e-06, "loss": 0.4842, "step": 34580 }, { "epoch": 1.17, "learning_rate": 7.293550679936135e-06, "loss": 0.532, "step": 34600 }, { "epoch": 1.18, "learning_rate": 7.283286280396948e-06, "loss": 0.5177, "step": 34620 }, { "epoch": 1.18, "learning_rate": 7.27302497028588e-06, "loss": 0.537, "step": 34640 }, { "epoch": 1.18, "learning_rate": 7.262766761272022e-06, "loss": 0.5476, "step": 34660 }, { "epoch": 1.18, "learning_rate": 7.252511665020939e-06, "loss": 0.5326, "step": 34680 }, { "epoch": 1.18, "learning_rate": 7.242259693194658e-06, "loss": 0.5295, "step": 34700 }, { "epoch": 1.18, "learning_rate": 7.232010857451653e-06, "loss": 0.6052, "step": 34720 }, { "epoch": 1.18, "learning_rate": 7.221765169446819e-06, "loss": 0.5554, "step": 34740 }, { "epoch": 1.18, "learning_rate": 7.211522640831492e-06, "loss": 0.4923, "step": 34760 }, { "epoch": 1.18, "learning_rate": 7.2012832832534e-06, "loss": 0.5459, "step": 34780 }, { "epoch": 1.18, "learning_rate": 7.1910471083566725e-06, "loss": 0.5057, "step": 34800 }, { "epoch": 1.18, "learning_rate": 7.180814127781819e-06, "loss": 0.4881, "step": 34820 }, { "epoch": 1.18, "learning_rate": 7.170584353165708e-06, "loss": 0.5223, "step": 34840 }, { "epoch": 1.18, "learning_rate": 7.1603577961415704e-06, "loss": 0.5601, "step": 34860 }, { "epoch": 1.18, "learning_rate": 7.15013446833898e-06, "loss": 0.5344, "step": 34880 }, { "epoch": 1.18, "learning_rate": 7.139914381383837e-06, "loss": 0.5106, "step": 34900 }, { "epoch": 1.19, "learning_rate": 7.129697546898344e-06, "loss": 0.5414, "step": 34920 }, { "epoch": 1.19, "learning_rate": 7.119483976501021e-06, "loss": 0.5041, "step": 34940 }, { "epoch": 1.19, "learning_rate": 7.10927368180667e-06, "loss": 0.5035, "step": 34960 }, { "epoch": 1.19, "learning_rate": 7.0990666744263646e-06, "loss": 0.5097, "step": 34980 }, { "epoch": 1.19, "learning_rate": 7.0888629659674525e-06, "loss": 0.5195, "step": 35000 }, { "epoch": 1.19, "learning_rate": 7.07866256803351e-06, "loss": 0.5062, "step": 35020 }, { "epoch": 1.19, "learning_rate": 7.068465492224361e-06, "loss": 0.514, "step": 35040 }, { "epoch": 1.19, "learning_rate": 7.058271750136054e-06, "loss": 0.5295, "step": 35060 }, { "epoch": 1.19, "learning_rate": 7.04808135336084e-06, "loss": 0.5194, "step": 35080 }, { "epoch": 1.19, "learning_rate": 7.03789431348717e-06, "loss": 0.5081, "step": 35100 }, { "epoch": 1.19, "learning_rate": 7.027710642099672e-06, "loss": 0.5042, "step": 35120 }, { "epoch": 1.19, "learning_rate": 7.017530350779148e-06, "loss": 0.5264, "step": 35140 }, { "epoch": 1.19, "learning_rate": 7.007353451102557e-06, "loss": 0.4971, "step": 35160 }, { "epoch": 1.19, "learning_rate": 6.997179954642998e-06, "loss": 0.5111, "step": 35180 }, { "epoch": 1.19, "learning_rate": 6.9870098729697015e-06, "loss": 0.5388, "step": 35200 }, { "epoch": 1.2, "learning_rate": 6.976843217648013e-06, "loss": 0.5412, "step": 35220 }, { "epoch": 1.2, "learning_rate": 6.966680000239384e-06, "loss": 0.547, "step": 35240 }, { "epoch": 1.2, "learning_rate": 6.956520232301355e-06, "loss": 0.5167, "step": 35260 }, { "epoch": 1.2, "learning_rate": 6.946363925387546e-06, "loss": 0.506, "step": 35280 }, { "epoch": 1.2, "learning_rate": 6.936211091047636e-06, "loss": 0.5407, "step": 35300 }, { "epoch": 1.2, "learning_rate": 6.926061740827359e-06, "loss": 0.5601, "step": 35320 }, { "epoch": 1.2, "learning_rate": 6.915915886268486e-06, "loss": 0.4812, "step": 35340 }, { "epoch": 1.2, "learning_rate": 6.905773538908814e-06, "loss": 0.5248, "step": 35360 }, { "epoch": 1.2, "learning_rate": 6.895634710282153e-06, "loss": 0.51, "step": 35380 }, { "epoch": 1.2, "learning_rate": 6.885499411918304e-06, "loss": 0.5256, "step": 35400 }, { "epoch": 1.2, "learning_rate": 6.8753676553430606e-06, "loss": 0.5107, "step": 35420 }, { "epoch": 1.2, "learning_rate": 6.8652394520781865e-06, "loss": 0.5276, "step": 35440 }, { "epoch": 1.2, "learning_rate": 6.855114813641408e-06, "loss": 0.5133, "step": 35460 }, { "epoch": 1.2, "learning_rate": 6.844993751546388e-06, "loss": 0.5299, "step": 35480 }, { "epoch": 1.21, "learning_rate": 6.8348762773027285e-06, "loss": 0.5327, "step": 35500 }, { "epoch": 1.21, "learning_rate": 6.824762402415958e-06, "loss": 0.5263, "step": 35520 }, { "epoch": 1.21, "learning_rate": 6.814652138387501e-06, "loss": 0.5323, "step": 35540 }, { "epoch": 1.21, "learning_rate": 6.8045454967146854e-06, "loss": 0.5296, "step": 35560 }, { "epoch": 1.21, "learning_rate": 6.794442488890705e-06, "loss": 0.5498, "step": 35580 }, { "epoch": 1.21, "learning_rate": 6.784343126404636e-06, "loss": 0.4546, "step": 35600 }, { "epoch": 1.21, "learning_rate": 6.774247420741401e-06, "loss": 0.5471, "step": 35620 }, { "epoch": 1.21, "learning_rate": 6.764155383381772e-06, "loss": 0.5375, "step": 35640 }, { "epoch": 1.21, "learning_rate": 6.754067025802346e-06, "loss": 0.5628, "step": 35660 }, { "epoch": 1.21, "learning_rate": 6.743982359475525e-06, "loss": 0.5029, "step": 35680 }, { "epoch": 1.21, "learning_rate": 6.733901395869527e-06, "loss": 0.4811, "step": 35700 }, { "epoch": 1.21, "learning_rate": 6.723824146448352e-06, "loss": 0.5296, "step": 35720 }, { "epoch": 1.21, "learning_rate": 6.713750622671784e-06, "loss": 0.5108, "step": 35740 }, { "epoch": 1.21, "learning_rate": 6.7036808359953585e-06, "loss": 0.5266, "step": 35760 }, { "epoch": 1.21, "learning_rate": 6.693614797870371e-06, "loss": 0.5075, "step": 35780 }, { "epoch": 1.22, "learning_rate": 6.68355251974385e-06, "loss": 0.5385, "step": 35800 }, { "epoch": 1.22, "learning_rate": 6.673494013058548e-06, "loss": 0.5294, "step": 35820 }, { "epoch": 1.22, "learning_rate": 6.663439289252932e-06, "loss": 0.5048, "step": 35840 }, { "epoch": 1.22, "learning_rate": 6.653388359761161e-06, "loss": 0.482, "step": 35860 }, { "epoch": 1.22, "learning_rate": 6.6433412360130855e-06, "loss": 0.4721, "step": 35880 }, { "epoch": 1.22, "learning_rate": 6.633297929434224e-06, "loss": 0.5274, "step": 35900 }, { "epoch": 1.22, "learning_rate": 6.623258451445758e-06, "loss": 0.5133, "step": 35920 }, { "epoch": 1.22, "learning_rate": 6.613222813464508e-06, "loss": 0.534, "step": 35940 }, { "epoch": 1.22, "learning_rate": 6.603191026902936e-06, "loss": 0.496, "step": 35960 }, { "epoch": 1.22, "learning_rate": 6.59316310316912e-06, "loss": 0.5472, "step": 35980 }, { "epoch": 1.22, "learning_rate": 6.583139053666745e-06, "loss": 0.4993, "step": 36000 }, { "epoch": 1.22, "learning_rate": 6.573118889795093e-06, "loss": 0.5548, "step": 36020 }, { "epoch": 1.22, "learning_rate": 6.563102622949021e-06, "loss": 0.509, "step": 36040 }, { "epoch": 1.22, "learning_rate": 6.553090264518962e-06, "loss": 0.5209, "step": 36060 }, { "epoch": 1.22, "learning_rate": 6.543081825890901e-06, "loss": 0.5229, "step": 36080 }, { "epoch": 1.23, "learning_rate": 6.533077318446365e-06, "loss": 0.5561, "step": 36100 }, { "epoch": 1.23, "learning_rate": 6.523076753562411e-06, "loss": 0.4693, "step": 36120 }, { "epoch": 1.23, "learning_rate": 6.513080142611612e-06, "loss": 0.4867, "step": 36140 }, { "epoch": 1.23, "learning_rate": 6.503087496962044e-06, "loss": 0.5337, "step": 36160 }, { "epoch": 1.23, "learning_rate": 6.493098827977278e-06, "loss": 0.5121, "step": 36180 }, { "epoch": 1.23, "learning_rate": 6.483114147016358e-06, "loss": 0.5246, "step": 36200 }, { "epoch": 1.23, "learning_rate": 6.473133465433793e-06, "loss": 0.5049, "step": 36220 }, { "epoch": 1.23, "learning_rate": 6.463156794579543e-06, "loss": 0.5014, "step": 36240 }, { "epoch": 1.23, "learning_rate": 6.453184145799015e-06, "loss": 0.4795, "step": 36260 }, { "epoch": 1.23, "learning_rate": 6.4432155304330335e-06, "loss": 0.5077, "step": 36280 }, { "epoch": 1.23, "learning_rate": 6.433250959817845e-06, "loss": 0.5471, "step": 36300 }, { "epoch": 1.23, "learning_rate": 6.42329044528508e-06, "loss": 0.5006, "step": 36320 }, { "epoch": 1.23, "learning_rate": 6.413333998161772e-06, "loss": 0.518, "step": 36340 }, { "epoch": 1.23, "learning_rate": 6.4033816297703245e-06, "loss": 0.5612, "step": 36360 }, { "epoch": 1.23, "learning_rate": 6.393433351428502e-06, "loss": 0.4518, "step": 36380 }, { "epoch": 1.24, "learning_rate": 6.383489174449422e-06, "loss": 0.5344, "step": 36400 }, { "epoch": 1.24, "learning_rate": 6.373549110141528e-06, "loss": 0.5152, "step": 36420 }, { "epoch": 1.24, "learning_rate": 6.363613169808593e-06, "loss": 0.4774, "step": 36440 }, { "epoch": 1.24, "learning_rate": 6.353681364749703e-06, "loss": 0.481, "step": 36460 }, { "epoch": 1.24, "learning_rate": 6.343753706259239e-06, "loss": 0.5457, "step": 36480 }, { "epoch": 1.24, "learning_rate": 6.333830205626861e-06, "loss": 0.5329, "step": 36500 }, { "epoch": 1.24, "learning_rate": 6.323910874137509e-06, "loss": 0.4489, "step": 36520 }, { "epoch": 1.24, "learning_rate": 6.313995723071377e-06, "loss": 0.4993, "step": 36540 }, { "epoch": 1.24, "learning_rate": 6.3040847637039075e-06, "loss": 0.5238, "step": 36560 }, { "epoch": 1.24, "learning_rate": 6.294178007305776e-06, "loss": 0.507, "step": 36580 }, { "epoch": 1.24, "learning_rate": 6.284275465142875e-06, "loss": 0.5401, "step": 36600 }, { "epoch": 1.24, "learning_rate": 6.27437714847631e-06, "loss": 0.4962, "step": 36620 }, { "epoch": 1.24, "learning_rate": 6.2644830685623756e-06, "loss": 0.4884, "step": 36640 }, { "epoch": 1.24, "learning_rate": 6.254593236652554e-06, "loss": 0.5003, "step": 36660 }, { "epoch": 1.25, "learning_rate": 6.244707663993494e-06, "loss": 0.4925, "step": 36680 }, { "epoch": 1.25, "learning_rate": 6.2348263618269975e-06, "loss": 0.4712, "step": 36700 }, { "epoch": 1.25, "learning_rate": 6.224949341390017e-06, "loss": 0.5206, "step": 36720 }, { "epoch": 1.25, "learning_rate": 6.215076613914628e-06, "loss": 0.4793, "step": 36740 }, { "epoch": 1.25, "learning_rate": 6.205208190628034e-06, "loss": 0.5046, "step": 36760 }, { "epoch": 1.25, "learning_rate": 6.1953440827525306e-06, "loss": 0.5188, "step": 36780 }, { "epoch": 1.25, "learning_rate": 6.185484301505518e-06, "loss": 0.5281, "step": 36800 }, { "epoch": 1.25, "learning_rate": 6.175628858099471e-06, "loss": 0.4982, "step": 36820 }, { "epoch": 1.25, "learning_rate": 6.165777763741932e-06, "loss": 0.5057, "step": 36840 }, { "epoch": 1.25, "learning_rate": 6.1559310296355005e-06, "loss": 0.5274, "step": 36860 }, { "epoch": 1.25, "learning_rate": 6.146088666977804e-06, "loss": 0.487, "step": 36880 }, { "epoch": 1.25, "learning_rate": 6.1362506869615205e-06, "loss": 0.5438, "step": 36900 }, { "epoch": 1.25, "learning_rate": 6.126417100774329e-06, "loss": 0.5017, "step": 36920 }, { "epoch": 1.25, "learning_rate": 6.116587919598915e-06, "loss": 0.5062, "step": 36940 }, { "epoch": 1.25, "learning_rate": 6.106763154612963e-06, "loss": 0.5229, "step": 36960 }, { "epoch": 1.26, "learning_rate": 6.096942816989116e-06, "loss": 0.4945, "step": 36980 }, { "epoch": 1.26, "learning_rate": 6.0871269178949986e-06, "loss": 0.569, "step": 37000 }, { "epoch": 1.26, "learning_rate": 6.0773154684931875e-06, "loss": 0.5008, "step": 37020 }, { "epoch": 1.26, "learning_rate": 6.0675084799411975e-06, "loss": 0.5091, "step": 37040 }, { "epoch": 1.26, "learning_rate": 6.057705963391461e-06, "loss": 0.5372, "step": 37060 }, { "epoch": 1.26, "learning_rate": 6.047907929991333e-06, "loss": 0.5228, "step": 37080 }, { "epoch": 1.26, "learning_rate": 6.038114390883073e-06, "loss": 0.5057, "step": 37100 }, { "epoch": 1.26, "learning_rate": 6.028325357203822e-06, "loss": 0.4866, "step": 37120 }, { "epoch": 1.26, "learning_rate": 6.018540840085611e-06, "loss": 0.5037, "step": 37140 }, { "epoch": 1.26, "learning_rate": 6.008760850655316e-06, "loss": 0.5303, "step": 37160 }, { "epoch": 1.26, "learning_rate": 5.998985400034676e-06, "loss": 0.507, "step": 37180 }, { "epoch": 1.26, "learning_rate": 5.989214499340267e-06, "loss": 0.4896, "step": 37200 }, { "epoch": 1.26, "learning_rate": 5.979448159683491e-06, "loss": 0.5309, "step": 37220 }, { "epoch": 1.26, "learning_rate": 5.96968639217056e-06, "loss": 0.4934, "step": 37240 }, { "epoch": 1.26, "learning_rate": 5.959929207902488e-06, "loss": 0.4939, "step": 37260 }, { "epoch": 1.27, "learning_rate": 5.950176617975079e-06, "loss": 0.5182, "step": 37280 }, { "epoch": 1.27, "learning_rate": 5.9404286334789115e-06, "loss": 0.5739, "step": 37300 }, { "epoch": 1.27, "learning_rate": 5.930685265499329e-06, "loss": 0.4918, "step": 37320 }, { "epoch": 1.27, "learning_rate": 5.9209465251164175e-06, "loss": 0.5235, "step": 37340 }, { "epoch": 1.27, "learning_rate": 5.911212423405008e-06, "loss": 0.5176, "step": 37360 }, { "epoch": 1.27, "learning_rate": 5.901482971434652e-06, "loss": 0.5081, "step": 37380 }, { "epoch": 1.27, "learning_rate": 5.891758180269618e-06, "loss": 0.4788, "step": 37400 }, { "epoch": 1.27, "learning_rate": 5.882038060968871e-06, "loss": 0.528, "step": 37420 }, { "epoch": 1.27, "learning_rate": 5.872322624586061e-06, "loss": 0.5005, "step": 37440 }, { "epoch": 1.27, "learning_rate": 5.862611882169518e-06, "loss": 0.4987, "step": 37460 }, { "epoch": 1.27, "learning_rate": 5.852905844762229e-06, "loss": 0.4889, "step": 37480 }, { "epoch": 1.27, "learning_rate": 5.8432045234018365e-06, "loss": 0.4652, "step": 37500 }, { "epoch": 1.27, "learning_rate": 5.833507929120612e-06, "loss": 0.5429, "step": 37520 }, { "epoch": 1.27, "learning_rate": 5.823816072945458e-06, "loss": 0.5374, "step": 37540 }, { "epoch": 1.27, "learning_rate": 5.814128965897887e-06, "loss": 0.4909, "step": 37560 }, { "epoch": 1.28, "learning_rate": 5.804446618994009e-06, "loss": 0.498, "step": 37580 }, { "epoch": 1.28, "learning_rate": 5.794769043244527e-06, "loss": 0.4974, "step": 37600 }, { "epoch": 1.28, "learning_rate": 5.785096249654705e-06, "loss": 0.4769, "step": 37620 }, { "epoch": 1.28, "learning_rate": 5.775428249224382e-06, "loss": 0.5101, "step": 37640 }, { "epoch": 1.28, "learning_rate": 5.765765052947941e-06, "loss": 0.4938, "step": 37660 }, { "epoch": 1.28, "learning_rate": 5.756106671814301e-06, "loss": 0.4921, "step": 37680 }, { "epoch": 1.28, "learning_rate": 5.7464531168069096e-06, "loss": 0.5029, "step": 37700 }, { "epoch": 1.28, "learning_rate": 5.7368043989037206e-06, "loss": 0.5164, "step": 37720 }, { "epoch": 1.28, "learning_rate": 5.7271605290771895e-06, "loss": 0.5038, "step": 37740 }, { "epoch": 1.28, "learning_rate": 5.717521518294259e-06, "loss": 0.5467, "step": 37760 }, { "epoch": 1.28, "learning_rate": 5.70788737751635e-06, "loss": 0.4912, "step": 37780 }, { "epoch": 1.28, "learning_rate": 5.698258117699334e-06, "loss": 0.5349, "step": 37800 }, { "epoch": 1.28, "learning_rate": 5.68863374979354e-06, "loss": 0.5092, "step": 37820 }, { "epoch": 1.28, "learning_rate": 5.6790142847437355e-06, "loss": 0.5157, "step": 37840 }, { "epoch": 1.29, "learning_rate": 5.669399733489108e-06, "loss": 0.5729, "step": 37860 }, { "epoch": 1.29, "learning_rate": 5.659790106963261e-06, "loss": 0.5483, "step": 37880 }, { "epoch": 1.29, "learning_rate": 5.650185416094194e-06, "loss": 0.5188, "step": 37900 }, { "epoch": 1.29, "learning_rate": 5.640585671804296e-06, "loss": 0.5054, "step": 37920 }, { "epoch": 1.29, "learning_rate": 5.63099088501033e-06, "loss": 0.5646, "step": 37940 }, { "epoch": 1.29, "learning_rate": 5.621401066623421e-06, "loss": 0.5179, "step": 37960 }, { "epoch": 1.29, "learning_rate": 5.611816227549048e-06, "loss": 0.4797, "step": 37980 }, { "epoch": 1.29, "learning_rate": 5.602236378687018e-06, "loss": 0.5401, "step": 38000 }, { "epoch": 1.29, "learning_rate": 5.59266153093147e-06, "loss": 0.4801, "step": 38020 }, { "epoch": 1.29, "learning_rate": 5.583091695170857e-06, "loss": 0.4999, "step": 38040 }, { "epoch": 1.29, "learning_rate": 5.5735268822879344e-06, "loss": 0.4958, "step": 38060 }, { "epoch": 1.29, "learning_rate": 5.563967103159733e-06, "loss": 0.4913, "step": 38080 }, { "epoch": 1.29, "learning_rate": 5.554412368657572e-06, "loss": 0.5331, "step": 38100 }, { "epoch": 1.29, "learning_rate": 5.544862689647024e-06, "loss": 0.5276, "step": 38120 }, { "epoch": 1.29, "learning_rate": 5.535318076987928e-06, "loss": 0.5382, "step": 38140 }, { "epoch": 1.3, "learning_rate": 5.52577854153435e-06, "loss": 0.508, "step": 38160 }, { "epoch": 1.3, "learning_rate": 5.516244094134578e-06, "loss": 0.5848, "step": 38180 }, { "epoch": 1.3, "learning_rate": 5.506714745631124e-06, "loss": 0.5551, "step": 38200 }, { "epoch": 1.3, "learning_rate": 5.497190506860695e-06, "loss": 0.5111, "step": 38220 }, { "epoch": 1.3, "learning_rate": 5.487671388654192e-06, "loss": 0.5263, "step": 38240 }, { "epoch": 1.3, "learning_rate": 5.478157401836694e-06, "loss": 0.5426, "step": 38260 }, { "epoch": 1.3, "learning_rate": 5.468648557227434e-06, "loss": 0.5187, "step": 38280 }, { "epoch": 1.3, "learning_rate": 5.459144865639806e-06, "loss": 0.5232, "step": 38300 }, { "epoch": 1.3, "learning_rate": 5.449646337881345e-06, "loss": 0.5209, "step": 38320 }, { "epoch": 1.3, "learning_rate": 5.440152984753709e-06, "loss": 0.4915, "step": 38340 }, { "epoch": 1.3, "learning_rate": 5.430664817052678e-06, "loss": 0.5327, "step": 38360 }, { "epoch": 1.3, "learning_rate": 5.421181845568125e-06, "loss": 0.4883, "step": 38380 }, { "epoch": 1.3, "learning_rate": 5.411704081084025e-06, "loss": 0.5274, "step": 38400 }, { "epoch": 1.3, "learning_rate": 5.402231534378424e-06, "loss": 0.5467, "step": 38420 }, { "epoch": 1.3, "learning_rate": 5.392764216223441e-06, "loss": 0.5268, "step": 38440 }, { "epoch": 1.31, "learning_rate": 5.383302137385238e-06, "loss": 0.5262, "step": 38460 }, { "epoch": 1.31, "learning_rate": 5.373845308624028e-06, "loss": 0.541, "step": 38480 }, { "epoch": 1.31, "learning_rate": 5.364393740694054e-06, "loss": 0.5145, "step": 38500 }, { "epoch": 1.31, "learning_rate": 5.3549474443435726e-06, "loss": 0.4997, "step": 38520 }, { "epoch": 1.31, "learning_rate": 5.345506430314845e-06, "loss": 0.4927, "step": 38540 }, { "epoch": 1.31, "learning_rate": 5.33607070934413e-06, "loss": 0.516, "step": 38560 }, { "epoch": 1.31, "learning_rate": 5.326640292161665e-06, "loss": 0.549, "step": 38580 }, { "epoch": 1.31, "learning_rate": 5.317215189491651e-06, "loss": 0.5273, "step": 38600 }, { "epoch": 1.31, "learning_rate": 5.307795412052259e-06, "loss": 0.5299, "step": 38620 }, { "epoch": 1.31, "learning_rate": 5.298380970555584e-06, "loss": 0.5154, "step": 38640 }, { "epoch": 1.31, "learning_rate": 5.288971875707669e-06, "loss": 0.5577, "step": 38660 }, { "epoch": 1.31, "learning_rate": 5.279568138208473e-06, "loss": 0.497, "step": 38680 }, { "epoch": 1.31, "learning_rate": 5.270169768751858e-06, "loss": 0.4864, "step": 38700 }, { "epoch": 1.31, "learning_rate": 5.260776778025591e-06, "loss": 0.494, "step": 38720 }, { "epoch": 1.32, "learning_rate": 5.251389176711307e-06, "loss": 0.4946, "step": 38740 }, { "epoch": 1.32, "learning_rate": 5.242006975484528e-06, "loss": 0.4608, "step": 38760 }, { "epoch": 1.32, "learning_rate": 5.2326301850146285e-06, "loss": 0.5286, "step": 38780 }, { "epoch": 1.32, "learning_rate": 5.223258815964833e-06, "loss": 0.511, "step": 38800 }, { "epoch": 1.32, "learning_rate": 5.213892878992191e-06, "loss": 0.5245, "step": 38820 }, { "epoch": 1.32, "learning_rate": 5.204532384747584e-06, "loss": 0.5, "step": 38840 }, { "epoch": 1.32, "learning_rate": 5.195177343875701e-06, "loss": 0.5208, "step": 38860 }, { "epoch": 1.32, "learning_rate": 5.185827767015031e-06, "loss": 0.5128, "step": 38880 }, { "epoch": 1.32, "learning_rate": 5.176483664797852e-06, "loss": 0.5312, "step": 38900 }, { "epoch": 1.32, "learning_rate": 5.1671450478502064e-06, "loss": 0.4755, "step": 38920 }, { "epoch": 1.32, "learning_rate": 5.157811926791905e-06, "loss": 0.4955, "step": 38940 }, { "epoch": 1.32, "learning_rate": 5.1484843122365125e-06, "loss": 0.4997, "step": 38960 }, { "epoch": 1.32, "learning_rate": 5.139162214791322e-06, "loss": 0.5367, "step": 38980 }, { "epoch": 1.32, "learning_rate": 5.129845645057372e-06, "loss": 0.5051, "step": 39000 }, { "epoch": 1.32, "learning_rate": 5.120534613629389e-06, "loss": 0.5145, "step": 39020 }, { "epoch": 1.33, "learning_rate": 5.1112291310958166e-06, "loss": 0.4681, "step": 39040 }, { "epoch": 1.33, "learning_rate": 5.101929208038786e-06, "loss": 0.5096, "step": 39060 }, { "epoch": 1.33, "learning_rate": 5.09263485503411e-06, "loss": 0.4984, "step": 39080 }, { "epoch": 1.33, "learning_rate": 5.083346082651255e-06, "loss": 0.4949, "step": 39100 }, { "epoch": 1.33, "learning_rate": 5.074062901453352e-06, "loss": 0.4977, "step": 39120 }, { "epoch": 1.33, "learning_rate": 5.06478532199717e-06, "loss": 0.5075, "step": 39140 }, { "epoch": 1.33, "learning_rate": 5.05551335483311e-06, "loss": 0.4985, "step": 39160 }, { "epoch": 1.33, "learning_rate": 5.04624701050519e-06, "loss": 0.4894, "step": 39180 }, { "epoch": 1.33, "learning_rate": 5.03698629955103e-06, "loss": 0.4938, "step": 39200 }, { "epoch": 1.33, "learning_rate": 5.027731232501849e-06, "loss": 0.5228, "step": 39220 }, { "epoch": 1.33, "learning_rate": 5.018481819882446e-06, "loss": 0.4612, "step": 39240 }, { "epoch": 1.33, "learning_rate": 5.009238072211187e-06, "loss": 0.5362, "step": 39260 }, { "epoch": 1.33, "learning_rate": 5.000000000000003e-06, "loss": 0.4859, "step": 39280 }, { "epoch": 1.33, "learning_rate": 4.990767613754358e-06, "loss": 0.49, "step": 39300 }, { "epoch": 1.33, "learning_rate": 4.981540923973263e-06, "loss": 0.5071, "step": 39320 }, { "epoch": 1.34, "learning_rate": 4.972319941149245e-06, "loss": 0.5129, "step": 39340 }, { "epoch": 1.34, "learning_rate": 4.963104675768345e-06, "loss": 0.5516, "step": 39360 }, { "epoch": 1.34, "learning_rate": 4.953895138310091e-06, "loss": 0.4968, "step": 39380 }, { "epoch": 1.34, "learning_rate": 4.944691339247515e-06, "loss": 0.5478, "step": 39400 }, { "epoch": 1.34, "learning_rate": 4.9354932890471096e-06, "loss": 0.5459, "step": 39420 }, { "epoch": 1.34, "learning_rate": 4.926300998168835e-06, "loss": 0.518, "step": 39440 }, { "epoch": 1.34, "learning_rate": 4.917114477066104e-06, "loss": 0.4936, "step": 39460 }, { "epoch": 1.34, "learning_rate": 4.907933736185757e-06, "loss": 0.4986, "step": 39480 }, { "epoch": 1.34, "learning_rate": 4.898758785968075e-06, "loss": 0.4951, "step": 39500 }, { "epoch": 1.34, "learning_rate": 4.889589636846747e-06, "loss": 0.5219, "step": 39520 }, { "epoch": 1.34, "learning_rate": 4.880426299248867e-06, "loss": 0.4831, "step": 39540 }, { "epoch": 1.34, "learning_rate": 4.871268783594922e-06, "loss": 0.503, "step": 39560 }, { "epoch": 1.34, "learning_rate": 4.86211710029877e-06, "loss": 0.5001, "step": 39580 }, { "epoch": 1.34, "learning_rate": 4.852971259767642e-06, "loss": 0.4996, "step": 39600 }, { "epoch": 1.34, "learning_rate": 4.843831272402126e-06, "loss": 0.469, "step": 39620 }, { "epoch": 1.35, "learning_rate": 4.83469714859616e-06, "loss": 0.5212, "step": 39640 }, { "epoch": 1.35, "learning_rate": 4.825568898736999e-06, "loss": 0.4983, "step": 39660 }, { "epoch": 1.35, "learning_rate": 4.816446533205225e-06, "loss": 0.5324, "step": 39680 }, { "epoch": 1.35, "learning_rate": 4.807330062374733e-06, "loss": 0.5249, "step": 39700 }, { "epoch": 1.35, "learning_rate": 4.79821949661271e-06, "loss": 0.4734, "step": 39720 }, { "epoch": 1.35, "learning_rate": 4.789114846279631e-06, "loss": 0.5388, "step": 39740 }, { "epoch": 1.35, "learning_rate": 4.780016121729236e-06, "loss": 0.5085, "step": 39760 }, { "epoch": 1.35, "learning_rate": 4.7709233333085345e-06, "loss": 0.5055, "step": 39780 }, { "epoch": 1.35, "learning_rate": 4.761836491357782e-06, "loss": 0.4652, "step": 39800 }, { "epoch": 1.35, "learning_rate": 4.7527556062104756e-06, "loss": 0.524, "step": 39820 }, { "epoch": 1.35, "learning_rate": 4.7436806881933304e-06, "loss": 0.4764, "step": 39840 }, { "epoch": 1.35, "learning_rate": 4.734611747626285e-06, "loss": 0.4892, "step": 39860 }, { "epoch": 1.35, "learning_rate": 4.7255487948224754e-06, "loss": 0.4889, "step": 39880 }, { "epoch": 1.35, "learning_rate": 4.716491840088227e-06, "loss": 0.533, "step": 39900 }, { "epoch": 1.36, "learning_rate": 4.707440893723052e-06, "loss": 0.5227, "step": 39920 }, { "epoch": 1.36, "learning_rate": 4.698395966019615e-06, "loss": 0.5253, "step": 39940 }, { "epoch": 1.36, "learning_rate": 4.689357067263751e-06, "loss": 0.5263, "step": 39960 }, { "epoch": 1.36, "learning_rate": 4.680324207734434e-06, "loss": 0.509, "step": 39980 }, { "epoch": 1.36, "learning_rate": 4.671297397703766e-06, "loss": 0.5364, "step": 40000 }, { "epoch": 1.36, "learning_rate": 4.662276647436977e-06, "loss": 0.4903, "step": 40020 }, { "epoch": 1.36, "learning_rate": 4.653261967192398e-06, "loss": 0.5374, "step": 40040 }, { "epoch": 1.36, "learning_rate": 4.644253367221467e-06, "loss": 0.478, "step": 40060 }, { "epoch": 1.36, "learning_rate": 4.635250857768696e-06, "loss": 0.4984, "step": 40080 }, { "epoch": 1.36, "learning_rate": 4.62625444907168e-06, "loss": 0.4795, "step": 40100 }, { "epoch": 1.36, "learning_rate": 4.6172641513610765e-06, "loss": 0.4602, "step": 40120 }, { "epoch": 1.36, "learning_rate": 4.608279974860581e-06, "loss": 0.4751, "step": 40140 }, { "epoch": 1.36, "learning_rate": 4.599301929786943e-06, "loss": 0.5241, "step": 40160 }, { "epoch": 1.36, "learning_rate": 4.5903300263499315e-06, "loss": 0.504, "step": 40180 }, { "epoch": 1.36, "learning_rate": 4.581364274752338e-06, "loss": 0.5306, "step": 40200 }, { "epoch": 1.37, "learning_rate": 4.572404685189946e-06, "loss": 0.5155, "step": 40220 }, { "epoch": 1.37, "learning_rate": 4.563451267851542e-06, "loss": 0.4783, "step": 40240 }, { "epoch": 1.37, "learning_rate": 4.554504032918888e-06, "loss": 0.464, "step": 40260 }, { "epoch": 1.37, "learning_rate": 4.545562990566725e-06, "loss": 0.4934, "step": 40280 }, { "epoch": 1.37, "learning_rate": 4.536628150962744e-06, "loss": 0.4472, "step": 40300 }, { "epoch": 1.37, "learning_rate": 4.527699524267576e-06, "loss": 0.4941, "step": 40320 }, { "epoch": 1.37, "learning_rate": 4.518777120634798e-06, "loss": 0.4424, "step": 40340 }, { "epoch": 1.37, "learning_rate": 4.509860950210905e-06, "loss": 0.5254, "step": 40360 }, { "epoch": 1.37, "learning_rate": 4.500951023135308e-06, "loss": 0.4916, "step": 40380 }, { "epoch": 1.37, "learning_rate": 4.492047349540308e-06, "loss": 0.5132, "step": 40400 }, { "epoch": 1.37, "learning_rate": 4.483149939551105e-06, "loss": 0.5309, "step": 40420 }, { "epoch": 1.37, "learning_rate": 4.474258803285774e-06, "loss": 0.5116, "step": 40440 }, { "epoch": 1.37, "learning_rate": 4.465373950855249e-06, "loss": 0.4641, "step": 40460 }, { "epoch": 1.37, "learning_rate": 4.4564953923633295e-06, "loss": 0.478, "step": 40480 }, { "epoch": 1.37, "learning_rate": 4.4476231379066445e-06, "loss": 0.4497, "step": 40500 }, { "epoch": 1.38, "learning_rate": 4.4387571975746655e-06, "loss": 0.5384, "step": 40520 }, { "epoch": 1.38, "learning_rate": 4.429897581449677e-06, "loss": 0.4797, "step": 40540 }, { "epoch": 1.38, "learning_rate": 4.4210442996067724e-06, "loss": 0.5034, "step": 40560 }, { "epoch": 1.38, "learning_rate": 4.412197362113848e-06, "loss": 0.4553, "step": 40580 }, { "epoch": 1.38, "learning_rate": 4.403356779031572e-06, "loss": 0.518, "step": 40600 }, { "epoch": 1.38, "learning_rate": 4.394522560413399e-06, "loss": 0.4733, "step": 40620 }, { "epoch": 1.38, "learning_rate": 4.385694716305541e-06, "loss": 0.4931, "step": 40640 }, { "epoch": 1.38, "learning_rate": 4.376873256746962e-06, "loss": 0.5156, "step": 40660 }, { "epoch": 1.38, "learning_rate": 4.368058191769363e-06, "loss": 0.5206, "step": 40680 }, { "epoch": 1.38, "learning_rate": 4.359249531397177e-06, "loss": 0.5079, "step": 40700 }, { "epoch": 1.38, "learning_rate": 4.350447285647552e-06, "loss": 0.4872, "step": 40720 }, { "epoch": 1.38, "learning_rate": 4.341651464530341e-06, "loss": 0.4953, "step": 40740 }, { "epoch": 1.38, "learning_rate": 4.332862078048095e-06, "loss": 0.5342, "step": 40760 }, { "epoch": 1.38, "learning_rate": 4.324079136196034e-06, "loss": 0.5427, "step": 40780 }, { "epoch": 1.38, "learning_rate": 4.315302648962066e-06, "loss": 0.5343, "step": 40800 }, { "epoch": 1.39, "learning_rate": 4.30653262632675e-06, "loss": 0.5144, "step": 40820 }, { "epoch": 1.39, "learning_rate": 4.297769078263296e-06, "loss": 0.4854, "step": 40840 }, { "epoch": 1.39, "learning_rate": 4.2890120147375545e-06, "loss": 0.5271, "step": 40860 }, { "epoch": 1.39, "learning_rate": 4.280261445707985e-06, "loss": 0.5053, "step": 40880 }, { "epoch": 1.39, "learning_rate": 4.271517381125687e-06, "loss": 0.5188, "step": 40900 }, { "epoch": 1.39, "learning_rate": 4.262779830934346e-06, "loss": 0.5301, "step": 40920 }, { "epoch": 1.39, "learning_rate": 4.254048805070248e-06, "loss": 0.4891, "step": 40940 }, { "epoch": 1.39, "learning_rate": 4.2453243134622465e-06, "loss": 0.4493, "step": 40960 }, { "epoch": 1.39, "learning_rate": 4.2366063660317805e-06, "loss": 0.4975, "step": 40980 }, { "epoch": 1.39, "learning_rate": 4.2278949726928354e-06, "loss": 0.5603, "step": 41000 }, { "epoch": 1.39, "learning_rate": 4.219190143351951e-06, "loss": 0.5002, "step": 41020 }, { "epoch": 1.39, "learning_rate": 4.210491887908201e-06, "loss": 0.5157, "step": 41040 }, { "epoch": 1.39, "learning_rate": 4.201800216253176e-06, "loss": 0.5158, "step": 41060 }, { "epoch": 1.39, "learning_rate": 4.193115138270986e-06, "loss": 0.5076, "step": 41080 }, { "epoch": 1.4, "learning_rate": 4.184436663838245e-06, "loss": 0.4968, "step": 41100 }, { "epoch": 1.4, "learning_rate": 4.175764802824049e-06, "loss": 0.5185, "step": 41120 }, { "epoch": 1.4, "learning_rate": 4.167099565089989e-06, "loss": 0.5321, "step": 41140 }, { "epoch": 1.4, "learning_rate": 4.158440960490104e-06, "loss": 0.4719, "step": 41160 }, { "epoch": 1.4, "learning_rate": 4.149788998870902e-06, "loss": 0.525, "step": 41180 }, { "epoch": 1.4, "learning_rate": 4.141143690071335e-06, "loss": 0.497, "step": 41200 }, { "epoch": 1.4, "learning_rate": 4.132505043922792e-06, "loss": 0.4672, "step": 41220 }, { "epoch": 1.4, "learning_rate": 4.123873070249074e-06, "loss": 0.4633, "step": 41240 }, { "epoch": 1.4, "learning_rate": 4.115247778866407e-06, "loss": 0.4851, "step": 41260 }, { "epoch": 1.4, "learning_rate": 4.106629179583412e-06, "loss": 0.4671, "step": 41280 }, { "epoch": 1.4, "learning_rate": 4.0980172822010985e-06, "loss": 0.4747, "step": 41300 }, { "epoch": 1.4, "learning_rate": 4.0894120965128615e-06, "loss": 0.5273, "step": 41320 }, { "epoch": 1.4, "learning_rate": 4.080813632304455e-06, "loss": 0.4695, "step": 41340 }, { "epoch": 1.4, "learning_rate": 4.072221899353995e-06, "loss": 0.5268, "step": 41360 }, { "epoch": 1.4, "learning_rate": 4.063636907431942e-06, "loss": 0.5052, "step": 41380 }, { "epoch": 1.41, "learning_rate": 4.055058666301087e-06, "loss": 0.5012, "step": 41400 }, { "epoch": 1.41, "learning_rate": 4.046487185716552e-06, "loss": 0.5278, "step": 41420 }, { "epoch": 1.41, "learning_rate": 4.037922475425759e-06, "loss": 0.4867, "step": 41440 }, { "epoch": 1.41, "learning_rate": 4.029364545168438e-06, "loss": 0.4981, "step": 41460 }, { "epoch": 1.41, "learning_rate": 4.020813404676612e-06, "loss": 0.4956, "step": 41480 }, { "epoch": 1.41, "learning_rate": 4.012269063674581e-06, "loss": 0.4961, "step": 41500 }, { "epoch": 1.41, "learning_rate": 4.0037315318789e-06, "loss": 0.4949, "step": 41520 }, { "epoch": 1.41, "learning_rate": 3.9952008189984025e-06, "loss": 0.4913, "step": 41540 }, { "epoch": 1.41, "learning_rate": 3.986676934734151e-06, "loss": 0.554, "step": 41560 }, { "epoch": 1.41, "learning_rate": 3.978159888779451e-06, "loss": 0.4984, "step": 41580 }, { "epoch": 1.41, "learning_rate": 3.96964969081983e-06, "loss": 0.5367, "step": 41600 }, { "epoch": 1.41, "learning_rate": 3.961146350533021e-06, "loss": 0.4877, "step": 41620 }, { "epoch": 1.41, "learning_rate": 3.9526498775889645e-06, "loss": 0.5043, "step": 41640 }, { "epoch": 1.41, "learning_rate": 3.944160281649793e-06, "loss": 0.4978, "step": 41660 }, { "epoch": 1.41, "learning_rate": 3.935677572369817e-06, "loss": 0.5095, "step": 41680 }, { "epoch": 1.42, "learning_rate": 3.927201759395518e-06, "loss": 0.4937, "step": 41700 }, { "epoch": 1.42, "learning_rate": 3.918732852365524e-06, "loss": 0.5151, "step": 41720 }, { "epoch": 1.42, "learning_rate": 3.910270860910623e-06, "loss": 0.4712, "step": 41740 }, { "epoch": 1.42, "learning_rate": 3.901815794653729e-06, "loss": 0.4514, "step": 41760 }, { "epoch": 1.42, "learning_rate": 3.893367663209896e-06, "loss": 0.5102, "step": 41780 }, { "epoch": 1.42, "learning_rate": 3.884926476186269e-06, "loss": 0.4833, "step": 41800 }, { "epoch": 1.42, "learning_rate": 3.876492243182113e-06, "loss": 0.4814, "step": 41820 }, { "epoch": 1.42, "learning_rate": 3.8680649737887784e-06, "loss": 0.5291, "step": 41840 }, { "epoch": 1.42, "learning_rate": 3.859644677589699e-06, "loss": 0.5029, "step": 41860 }, { "epoch": 1.42, "learning_rate": 3.851231364160379e-06, "loss": 0.4675, "step": 41880 }, { "epoch": 1.42, "learning_rate": 3.842825043068377e-06, "loss": 0.5084, "step": 41900 }, { "epoch": 1.42, "learning_rate": 3.834425723873303e-06, "loss": 0.4999, "step": 41920 }, { "epoch": 1.42, "learning_rate": 3.826033416126807e-06, "loss": 0.4788, "step": 41940 }, { "epoch": 1.42, "learning_rate": 3.817648129372563e-06, "loss": 0.5149, "step": 41960 }, { "epoch": 1.42, "learning_rate": 3.809269873146263e-06, "loss": 0.5007, "step": 41980 }, { "epoch": 1.43, "learning_rate": 3.800898656975599e-06, "loss": 0.477, "step": 42000 }, { "epoch": 1.43, "learning_rate": 3.792534490380263e-06, "loss": 0.4822, "step": 42020 }, { "epoch": 1.43, "learning_rate": 3.7841773828719277e-06, "loss": 0.4666, "step": 42040 }, { "epoch": 1.43, "learning_rate": 3.775827343954239e-06, "loss": 0.4678, "step": 42060 }, { "epoch": 1.43, "learning_rate": 3.767484383122799e-06, "loss": 0.4648, "step": 42080 }, { "epoch": 1.43, "learning_rate": 3.759148509865168e-06, "loss": 0.4941, "step": 42100 }, { "epoch": 1.43, "learning_rate": 3.750819733660844e-06, "loss": 0.5476, "step": 42120 }, { "epoch": 1.43, "learning_rate": 3.7424980639812534e-06, "loss": 0.4731, "step": 42140 }, { "epoch": 1.43, "learning_rate": 3.734183510289742e-06, "loss": 0.5135, "step": 42160 }, { "epoch": 1.43, "learning_rate": 3.725876082041563e-06, "loss": 0.498, "step": 42180 }, { "epoch": 1.43, "learning_rate": 3.717575788683866e-06, "loss": 0.4935, "step": 42200 }, { "epoch": 1.43, "learning_rate": 3.7092826396556882e-06, "loss": 0.5092, "step": 42220 }, { "epoch": 1.43, "learning_rate": 3.700996644387944e-06, "loss": 0.4793, "step": 42240 }, { "epoch": 1.43, "learning_rate": 3.6927178123034034e-06, "loss": 0.4633, "step": 42260 }, { "epoch": 1.44, "learning_rate": 3.684446152816702e-06, "loss": 0.5189, "step": 42280 }, { "epoch": 1.44, "learning_rate": 3.6761816753343114e-06, "loss": 0.4972, "step": 42300 }, { "epoch": 1.44, "learning_rate": 3.6679243892545414e-06, "loss": 0.5158, "step": 42320 }, { "epoch": 1.44, "learning_rate": 3.659674303967522e-06, "loss": 0.5912, "step": 42340 }, { "epoch": 1.44, "learning_rate": 3.6514314288551877e-06, "loss": 0.4993, "step": 42360 }, { "epoch": 1.44, "learning_rate": 3.6431957732912793e-06, "loss": 0.4731, "step": 42380 }, { "epoch": 1.44, "learning_rate": 3.634967346641334e-06, "loss": 0.5195, "step": 42400 }, { "epoch": 1.44, "learning_rate": 3.6267461582626584e-06, "loss": 0.5218, "step": 42420 }, { "epoch": 1.44, "learning_rate": 3.6185322175043336e-06, "loss": 0.4498, "step": 42440 }, { "epoch": 1.44, "learning_rate": 3.6103255337071918e-06, "loss": 0.5465, "step": 42460 }, { "epoch": 1.44, "learning_rate": 3.602126116203819e-06, "loss": 0.5014, "step": 42480 }, { "epoch": 1.44, "learning_rate": 3.5939339743185376e-06, "loss": 0.5211, "step": 42500 }, { "epoch": 1.44, "learning_rate": 3.5857491173673976e-06, "loss": 0.4967, "step": 42520 }, { "epoch": 1.44, "learning_rate": 3.5775715546581546e-06, "loss": 0.5686, "step": 42540 }, { "epoch": 1.44, "learning_rate": 3.5694012954902803e-06, "loss": 0.4815, "step": 42560 }, { "epoch": 1.45, "learning_rate": 3.5612383491549384e-06, "loss": 0.4797, "step": 42580 }, { "epoch": 1.45, "learning_rate": 3.553082724934973e-06, "loss": 0.4874, "step": 42600 }, { "epoch": 1.45, "learning_rate": 3.544934432104905e-06, "loss": 0.4657, "step": 42620 }, { "epoch": 1.45, "learning_rate": 3.5367934799309167e-06, "loss": 0.5194, "step": 42640 }, { "epoch": 1.45, "learning_rate": 3.5286598776708436e-06, "loss": 0.4837, "step": 42660 }, { "epoch": 1.45, "learning_rate": 3.5205336345741603e-06, "loss": 0.4811, "step": 42680 }, { "epoch": 1.45, "learning_rate": 3.512414759881976e-06, "loss": 0.4934, "step": 42700 }, { "epoch": 1.45, "learning_rate": 3.5043032628270225e-06, "loss": 0.4978, "step": 42720 }, { "epoch": 1.45, "learning_rate": 3.49619915263363e-06, "loss": 0.4769, "step": 42740 }, { "epoch": 1.45, "learning_rate": 3.488102438517742e-06, "loss": 0.5403, "step": 42760 }, { "epoch": 1.45, "learning_rate": 3.4800131296868842e-06, "loss": 0.5017, "step": 42780 }, { "epoch": 1.45, "learning_rate": 3.4719312353401647e-06, "loss": 0.4866, "step": 42800 }, { "epoch": 1.45, "learning_rate": 3.4638567646682576e-06, "loss": 0.4903, "step": 42820 }, { "epoch": 1.45, "learning_rate": 3.4557897268533935e-06, "loss": 0.4781, "step": 42840 }, { "epoch": 1.45, "learning_rate": 3.4477301310693544e-06, "loss": 0.4909, "step": 42860 }, { "epoch": 1.46, "learning_rate": 3.4396779864814568e-06, "loss": 0.5015, "step": 42880 }, { "epoch": 1.46, "learning_rate": 3.4316333022465465e-06, "loss": 0.4896, "step": 42900 }, { "epoch": 1.46, "learning_rate": 3.423596087512976e-06, "loss": 0.5027, "step": 42920 }, { "epoch": 1.46, "learning_rate": 3.415566351420615e-06, "loss": 0.497, "step": 42940 }, { "epoch": 1.46, "learning_rate": 3.407544103100824e-06, "loss": 0.4664, "step": 42960 }, { "epoch": 1.46, "learning_rate": 3.399529351676447e-06, "loss": 0.4745, "step": 42980 }, { "epoch": 1.46, "learning_rate": 3.39152210626181e-06, "loss": 0.5351, "step": 43000 }, { "epoch": 1.46, "learning_rate": 3.3835223759626845e-06, "loss": 0.4961, "step": 43020 }, { "epoch": 1.46, "learning_rate": 3.3755301698763197e-06, "loss": 0.5245, "step": 43040 }, { "epoch": 1.46, "learning_rate": 3.367545497091396e-06, "loss": 0.5402, "step": 43060 }, { "epoch": 1.46, "learning_rate": 3.3595683666880286e-06, "loss": 0.5098, "step": 43080 }, { "epoch": 1.46, "learning_rate": 3.3515987877377498e-06, "loss": 0.4786, "step": 43100 }, { "epoch": 1.46, "learning_rate": 3.343636769303512e-06, "loss": 0.4555, "step": 43120 }, { "epoch": 1.46, "learning_rate": 3.3356823204396706e-06, "loss": 0.4998, "step": 43140 }, { "epoch": 1.47, "learning_rate": 3.3277354501919677e-06, "loss": 0.4714, "step": 43160 }, { "epoch": 1.47, "learning_rate": 3.319796167597533e-06, "loss": 0.4957, "step": 43180 }, { "epoch": 1.47, "learning_rate": 3.3118644816848577e-06, "loss": 0.4871, "step": 43200 }, { "epoch": 1.47, "learning_rate": 3.303940401473804e-06, "loss": 0.4978, "step": 43220 }, { "epoch": 1.47, "learning_rate": 3.2960239359755808e-06, "loss": 0.5106, "step": 43240 }, { "epoch": 1.47, "learning_rate": 3.288115094192734e-06, "loss": 0.4918, "step": 43260 }, { "epoch": 1.47, "learning_rate": 3.2802138851191558e-06, "loss": 0.5428, "step": 43280 }, { "epoch": 1.47, "learning_rate": 3.2723203177400352e-06, "loss": 0.5042, "step": 43300 }, { "epoch": 1.47, "learning_rate": 3.264434401031887e-06, "loss": 0.4565, "step": 43320 }, { "epoch": 1.47, "learning_rate": 3.2565561439625205e-06, "loss": 0.5296, "step": 43340 }, { "epoch": 1.47, "learning_rate": 3.248685555491039e-06, "loss": 0.4808, "step": 43360 }, { "epoch": 1.47, "learning_rate": 3.240822644567815e-06, "loss": 0.5009, "step": 43380 }, { "epoch": 1.47, "learning_rate": 3.2329674201344997e-06, "loss": 0.522, "step": 43400 }, { "epoch": 1.47, "learning_rate": 3.2251198911240002e-06, "loss": 0.5334, "step": 43420 }, { "epoch": 1.47, "learning_rate": 3.217280066460472e-06, "loss": 0.4565, "step": 43440 }, { "epoch": 1.48, "learning_rate": 3.2094479550593107e-06, "loss": 0.5016, "step": 43460 }, { "epoch": 1.48, "learning_rate": 3.201623565827139e-06, "loss": 0.4863, "step": 43480 }, { "epoch": 1.48, "learning_rate": 3.1938069076617983e-06, "loss": 0.4797, "step": 43500 }, { "epoch": 1.48, "learning_rate": 3.1859979894523375e-06, "loss": 0.5096, "step": 43520 }, { "epoch": 1.48, "learning_rate": 3.1781968200790094e-06, "loss": 0.5187, "step": 43540 }, { "epoch": 1.48, "learning_rate": 3.170403408413243e-06, "loss": 0.5091, "step": 43560 }, { "epoch": 1.48, "learning_rate": 3.1626177633176557e-06, "loss": 0.5026, "step": 43580 }, { "epoch": 1.48, "learning_rate": 3.154839893646028e-06, "loss": 0.5275, "step": 43600 }, { "epoch": 1.48, "learning_rate": 3.1470698082433014e-06, "loss": 0.4637, "step": 43620 }, { "epoch": 1.48, "learning_rate": 3.139307515945563e-06, "loss": 0.4792, "step": 43640 }, { "epoch": 1.48, "learning_rate": 3.1315530255800384e-06, "loss": 0.5096, "step": 43660 }, { "epoch": 1.48, "learning_rate": 3.1238063459650802e-06, "loss": 0.5076, "step": 43680 }, { "epoch": 1.48, "learning_rate": 3.116067485910159e-06, "loss": 0.4875, "step": 43700 }, { "epoch": 1.48, "learning_rate": 3.1083364542158535e-06, "loss": 0.5021, "step": 43720 }, { "epoch": 1.48, "learning_rate": 3.1006132596738426e-06, "loss": 0.5041, "step": 43740 }, { "epoch": 1.49, "learning_rate": 3.092897911066883e-06, "loss": 0.4867, "step": 43760 }, { "epoch": 1.49, "learning_rate": 3.0851904171688196e-06, "loss": 0.5268, "step": 43780 }, { "epoch": 1.49, "learning_rate": 3.0774907867445623e-06, "loss": 0.5006, "step": 43800 }, { "epoch": 1.49, "learning_rate": 3.0697990285500802e-06, "loss": 0.4879, "step": 43820 }, { "epoch": 1.49, "learning_rate": 3.06211515133238e-06, "loss": 0.5086, "step": 43840 }, { "epoch": 1.49, "learning_rate": 3.054439163829519e-06, "loss": 0.4727, "step": 43860 }, { "epoch": 1.49, "learning_rate": 3.0467710747705735e-06, "loss": 0.4926, "step": 43880 }, { "epoch": 1.49, "learning_rate": 3.0391108928756486e-06, "loss": 0.508, "step": 43900 }, { "epoch": 1.49, "learning_rate": 3.0314586268558486e-06, "loss": 0.476, "step": 43920 }, { "epoch": 1.49, "learning_rate": 3.0238142854132722e-06, "loss": 0.5425, "step": 43940 }, { "epoch": 1.49, "learning_rate": 3.0161778772410153e-06, "loss": 0.4988, "step": 43960 }, { "epoch": 1.49, "learning_rate": 3.0085494110231493e-06, "loss": 0.4909, "step": 43980 }, { "epoch": 1.49, "learning_rate": 3.0009288954347115e-06, "loss": 0.5108, "step": 44000 }, { "epoch": 1.49, "learning_rate": 2.9933163391417042e-06, "loss": 0.4601, "step": 44020 }, { "epoch": 1.49, "learning_rate": 2.985711750801068e-06, "loss": 0.5138, "step": 44040 }, { "epoch": 1.5, "learning_rate": 2.97811513906069e-06, "loss": 0.4738, "step": 44060 }, { "epoch": 1.5, "learning_rate": 2.9705265125593863e-06, "loss": 0.5138, "step": 44080 }, { "epoch": 1.5, "learning_rate": 2.9629458799268897e-06, "loss": 0.4846, "step": 44100 }, { "epoch": 1.5, "learning_rate": 2.955373249783844e-06, "loss": 0.484, "step": 44120 }, { "epoch": 1.5, "learning_rate": 2.947808630741791e-06, "loss": 0.5328, "step": 44140 }, { "epoch": 1.5, "learning_rate": 2.9402520314031646e-06, "loss": 0.4963, "step": 44160 }, { "epoch": 1.5, "learning_rate": 2.932703460361277e-06, "loss": 0.434, "step": 44180 }, { "epoch": 1.5, "learning_rate": 2.9251629262003135e-06, "loss": 0.4835, "step": 44200 }, { "epoch": 1.5, "learning_rate": 2.9176304374953123e-06, "loss": 0.5047, "step": 44220 }, { "epoch": 1.5, "learning_rate": 2.910106002812172e-06, "loss": 0.4763, "step": 44240 }, { "epoch": 1.5, "learning_rate": 2.902589630707624e-06, "loss": 0.4903, "step": 44260 }, { "epoch": 1.5, "learning_rate": 2.895081329729239e-06, "loss": 0.4682, "step": 44280 }, { "epoch": 1.5, "learning_rate": 2.8875811084154025e-06, "loss": 0.4581, "step": 44300 }, { "epoch": 1.5, "learning_rate": 2.8800889752953156e-06, "loss": 0.4711, "step": 44320 }, { "epoch": 1.51, "learning_rate": 2.872604938888981e-06, "loss": 0.5166, "step": 44340 }, { "epoch": 1.51, "learning_rate": 2.8651290077071914e-06, "loss": 0.5098, "step": 44360 }, { "epoch": 1.51, "learning_rate": 2.8576611902515283e-06, "loss": 0.4655, "step": 44380 }, { "epoch": 1.51, "learning_rate": 2.8502014950143376e-06, "loss": 0.4898, "step": 44400 }, { "epoch": 1.51, "learning_rate": 2.842749930478734e-06, "loss": 0.5059, "step": 44420 }, { "epoch": 1.51, "learning_rate": 2.8353065051185867e-06, "loss": 0.4923, "step": 44440 }, { "epoch": 1.51, "learning_rate": 2.827871227398508e-06, "loss": 0.5248, "step": 44460 }, { "epoch": 1.51, "learning_rate": 2.820444105773845e-06, "loss": 0.4879, "step": 44480 }, { "epoch": 1.51, "learning_rate": 2.8130251486906666e-06, "loss": 0.4507, "step": 44500 }, { "epoch": 1.51, "learning_rate": 2.805614364585758e-06, "loss": 0.5031, "step": 44520 }, { "epoch": 1.51, "learning_rate": 2.7982117618866177e-06, "loss": 0.4808, "step": 44540 }, { "epoch": 1.51, "learning_rate": 2.790817349011432e-06, "loss": 0.4506, "step": 44560 }, { "epoch": 1.51, "learning_rate": 2.783431134369079e-06, "loss": 0.5022, "step": 44580 }, { "epoch": 1.51, "learning_rate": 2.776053126359105e-06, "loss": 0.5251, "step": 44600 }, { "epoch": 1.51, "learning_rate": 2.7686833333717334e-06, "loss": 0.4724, "step": 44620 }, { "epoch": 1.52, "learning_rate": 2.761321763787841e-06, "loss": 0.4463, "step": 44640 }, { "epoch": 1.52, "learning_rate": 2.753968425978958e-06, "loss": 0.457, "step": 44660 }, { "epoch": 1.52, "learning_rate": 2.746623328307243e-06, "loss": 0.4865, "step": 44680 }, { "epoch": 1.52, "learning_rate": 2.739286479125494e-06, "loss": 0.5148, "step": 44700 }, { "epoch": 1.52, "learning_rate": 2.7319578867771237e-06, "loss": 0.493, "step": 44720 }, { "epoch": 1.52, "learning_rate": 2.7246375595961584e-06, "loss": 0.4611, "step": 44740 }, { "epoch": 1.52, "learning_rate": 2.7173255059072234e-06, "loss": 0.4909, "step": 44760 }, { "epoch": 1.52, "learning_rate": 2.7100217340255352e-06, "loss": 0.5355, "step": 44780 }, { "epoch": 1.52, "learning_rate": 2.7027262522568944e-06, "loss": 0.4923, "step": 44800 }, { "epoch": 1.52, "learning_rate": 2.6954390688976706e-06, "loss": 0.5313, "step": 44820 }, { "epoch": 1.52, "learning_rate": 2.6881601922348e-06, "loss": 0.4939, "step": 44840 }, { "epoch": 1.52, "learning_rate": 2.6808896305457733e-06, "loss": 0.529, "step": 44860 }, { "epoch": 1.52, "learning_rate": 2.6736273920986166e-06, "loss": 0.4512, "step": 44880 }, { "epoch": 1.52, "learning_rate": 2.6663734851519017e-06, "loss": 0.4795, "step": 44900 }, { "epoch": 1.52, "learning_rate": 2.65912791795472e-06, "loss": 0.5323, "step": 44920 }, { "epoch": 1.53, "learning_rate": 2.651890698746682e-06, "loss": 0.4636, "step": 44940 }, { "epoch": 1.53, "learning_rate": 2.6446618357579035e-06, "loss": 0.4466, "step": 44960 }, { "epoch": 1.53, "learning_rate": 2.6374413372089968e-06, "loss": 0.5066, "step": 44980 }, { "epoch": 1.53, "learning_rate": 2.630229211311064e-06, "loss": 0.4642, "step": 45000 }, { "epoch": 1.53, "learning_rate": 2.623025466265684e-06, "loss": 0.4945, "step": 45020 }, { "epoch": 1.53, "learning_rate": 2.6158301102649085e-06, "loss": 0.4825, "step": 45040 }, { "epoch": 1.53, "learning_rate": 2.6086431514912438e-06, "loss": 0.4915, "step": 45060 }, { "epoch": 1.53, "learning_rate": 2.6014645981176488e-06, "loss": 0.4504, "step": 45080 }, { "epoch": 1.53, "learning_rate": 2.594294458307528e-06, "loss": 0.4665, "step": 45100 }, { "epoch": 1.53, "learning_rate": 2.5871327402147173e-06, "loss": 0.5008, "step": 45120 }, { "epoch": 1.53, "learning_rate": 2.5799794519834632e-06, "loss": 0.4345, "step": 45140 }, { "epoch": 1.53, "learning_rate": 2.5728346017484453e-06, "loss": 0.4824, "step": 45160 }, { "epoch": 1.53, "learning_rate": 2.565698197634735e-06, "loss": 0.5238, "step": 45180 }, { "epoch": 1.53, "learning_rate": 2.5585702477578024e-06, "loss": 0.4494, "step": 45200 }, { "epoch": 1.53, "learning_rate": 2.5514507602235063e-06, "loss": 0.4856, "step": 45220 }, { "epoch": 1.54, "learning_rate": 2.5443397431280703e-06, "loss": 0.4827, "step": 45240 }, { "epoch": 1.54, "learning_rate": 2.5372372045581006e-06, "loss": 0.4832, "step": 45260 }, { "epoch": 1.54, "learning_rate": 2.530143152590553e-06, "loss": 0.4766, "step": 45280 }, { "epoch": 1.54, "learning_rate": 2.523057595292734e-06, "loss": 0.544, "step": 45300 }, { "epoch": 1.54, "learning_rate": 2.515980540722295e-06, "loss": 0.4732, "step": 45320 }, { "epoch": 1.54, "learning_rate": 2.508911996927207e-06, "loss": 0.478, "step": 45340 }, { "epoch": 1.54, "learning_rate": 2.5018519719457725e-06, "loss": 0.5033, "step": 45360 }, { "epoch": 1.54, "learning_rate": 2.4948004738066e-06, "loss": 0.5514, "step": 45380 }, { "epoch": 1.54, "learning_rate": 2.4877575105286146e-06, "loss": 0.515, "step": 45400 }, { "epoch": 1.54, "learning_rate": 2.4807230901210155e-06, "loss": 0.4947, "step": 45420 }, { "epoch": 1.54, "learning_rate": 2.4736972205832997e-06, "loss": 0.4861, "step": 45440 }, { "epoch": 1.54, "learning_rate": 2.4666799099052384e-06, "loss": 0.4854, "step": 45460 }, { "epoch": 1.54, "learning_rate": 2.459671166066869e-06, "loss": 0.4906, "step": 45480 }, { "epoch": 1.54, "learning_rate": 2.45267099703849e-06, "loss": 0.5068, "step": 45500 }, { "epoch": 1.55, "learning_rate": 2.445679410780638e-06, "loss": 0.5006, "step": 45520 }, { "epoch": 1.55, "learning_rate": 2.4386964152441016e-06, "loss": 0.4426, "step": 45540 }, { "epoch": 1.55, "learning_rate": 2.431722018369894e-06, "loss": 0.5048, "step": 45560 }, { "epoch": 1.55, "learning_rate": 2.424756228089251e-06, "loss": 0.4887, "step": 45580 }, { "epoch": 1.55, "learning_rate": 2.4177990523236216e-06, "loss": 0.4839, "step": 45600 }, { "epoch": 1.55, "learning_rate": 2.410850498984659e-06, "loss": 0.4907, "step": 45620 }, { "epoch": 1.55, "learning_rate": 2.4039105759742098e-06, "loss": 0.5128, "step": 45640 }, { "epoch": 1.55, "learning_rate": 2.396979291184306e-06, "loss": 0.5183, "step": 45660 }, { "epoch": 1.55, "learning_rate": 2.3900566524971593e-06, "loss": 0.4374, "step": 45680 }, { "epoch": 1.55, "learning_rate": 2.3831426677851423e-06, "loss": 0.5259, "step": 45700 }, { "epoch": 1.55, "learning_rate": 2.3762373449107935e-06, "loss": 0.4733, "step": 45720 }, { "epoch": 1.55, "learning_rate": 2.369340691726798e-06, "loss": 0.4627, "step": 45740 }, { "epoch": 1.55, "learning_rate": 2.3624527160759812e-06, "loss": 0.4745, "step": 45760 }, { "epoch": 1.55, "learning_rate": 2.3555734257913045e-06, "loss": 0.4938, "step": 45780 }, { "epoch": 1.55, "learning_rate": 2.3487028286958458e-06, "loss": 0.4779, "step": 45800 }, { "epoch": 1.56, "learning_rate": 2.3418409326028037e-06, "loss": 0.5329, "step": 45820 }, { "epoch": 1.56, "learning_rate": 2.334987745315478e-06, "loss": 0.5157, "step": 45840 }, { "epoch": 1.56, "learning_rate": 2.328143274627266e-06, "loss": 0.4395, "step": 45860 }, { "epoch": 1.56, "learning_rate": 2.3213075283216547e-06, "loss": 0.4928, "step": 45880 }, { "epoch": 1.56, "learning_rate": 2.3144805141722038e-06, "loss": 0.4894, "step": 45900 }, { "epoch": 1.56, "learning_rate": 2.3076622399425485e-06, "loss": 0.486, "step": 45920 }, { "epoch": 1.56, "learning_rate": 2.3008527133863836e-06, "loss": 0.4598, "step": 45940 }, { "epoch": 1.56, "learning_rate": 2.294051942247457e-06, "loss": 0.4787, "step": 45960 }, { "epoch": 1.56, "learning_rate": 2.2872599342595557e-06, "loss": 0.5415, "step": 45980 }, { "epoch": 1.56, "learning_rate": 2.2804766971465054e-06, "loss": 0.5304, "step": 46000 }, { "epoch": 1.56, "learning_rate": 2.2737022386221552e-06, "loss": 0.5048, "step": 46020 }, { "epoch": 1.56, "learning_rate": 2.2669365663903764e-06, "loss": 0.5102, "step": 46040 }, { "epoch": 1.56, "learning_rate": 2.2601796881450468e-06, "loss": 0.5122, "step": 46060 }, { "epoch": 1.56, "learning_rate": 2.253431611570035e-06, "loss": 0.4871, "step": 46080 }, { "epoch": 1.56, "learning_rate": 2.24669234433921e-06, "loss": 0.465, "step": 46100 }, { "epoch": 1.57, "learning_rate": 2.2399618941164202e-06, "loss": 0.4898, "step": 46120 }, { "epoch": 1.57, "learning_rate": 2.2332402685554876e-06, "loss": 0.4535, "step": 46140 }, { "epoch": 1.57, "learning_rate": 2.2265274753002008e-06, "loss": 0.4957, "step": 46160 }, { "epoch": 1.57, "learning_rate": 2.2198235219842967e-06, "loss": 0.5043, "step": 46180 }, { "epoch": 1.57, "learning_rate": 2.213128416231468e-06, "loss": 0.4956, "step": 46200 }, { "epoch": 1.57, "learning_rate": 2.2064421656553415e-06, "loss": 0.474, "step": 46220 }, { "epoch": 1.57, "learning_rate": 2.199764777859477e-06, "loss": 0.5202, "step": 46240 }, { "epoch": 1.57, "learning_rate": 2.1930962604373517e-06, "loss": 0.4743, "step": 46260 }, { "epoch": 1.57, "learning_rate": 2.1864366209723597e-06, "loss": 0.5217, "step": 46280 }, { "epoch": 1.57, "learning_rate": 2.179785867037797e-06, "loss": 0.544, "step": 46300 }, { "epoch": 1.57, "learning_rate": 2.1731440061968536e-06, "loss": 0.5203, "step": 46320 }, { "epoch": 1.57, "learning_rate": 2.166511046002612e-06, "loss": 0.4537, "step": 46340 }, { "epoch": 1.57, "learning_rate": 2.1598869939980225e-06, "loss": 0.5019, "step": 46360 }, { "epoch": 1.57, "learning_rate": 2.153271857715915e-06, "loss": 0.4725, "step": 46380 }, { "epoch": 1.58, "learning_rate": 2.1466656446789757e-06, "loss": 0.5213, "step": 46400 }, { "epoch": 1.58, "learning_rate": 2.140068362399745e-06, "loss": 0.4668, "step": 46420 }, { "epoch": 1.58, "learning_rate": 2.133480018380608e-06, "loss": 0.4751, "step": 46440 }, { "epoch": 1.58, "learning_rate": 2.1269006201137833e-06, "loss": 0.4841, "step": 46460 }, { "epoch": 1.58, "learning_rate": 2.1203301750813166e-06, "loss": 0.4737, "step": 46480 }, { "epoch": 1.58, "learning_rate": 2.1137686907550737e-06, "loss": 0.4682, "step": 46500 }, { "epoch": 1.58, "learning_rate": 2.1072161745967313e-06, "loss": 0.516, "step": 46520 }, { "epoch": 1.58, "learning_rate": 2.1006726340577597e-06, "loss": 0.4704, "step": 46540 }, { "epoch": 1.58, "learning_rate": 2.094138076579433e-06, "loss": 0.4632, "step": 46560 }, { "epoch": 1.58, "learning_rate": 2.0876125095928033e-06, "loss": 0.4756, "step": 46580 }, { "epoch": 1.58, "learning_rate": 2.0810959405187005e-06, "loss": 0.4814, "step": 46600 }, { "epoch": 1.58, "learning_rate": 2.0745883767677246e-06, "loss": 0.5147, "step": 46620 }, { "epoch": 1.58, "learning_rate": 2.068089825740225e-06, "loss": 0.489, "step": 46640 }, { "epoch": 1.58, "learning_rate": 2.0616002948263157e-06, "loss": 0.4748, "step": 46660 }, { "epoch": 1.58, "learning_rate": 2.0551197914058464e-06, "loss": 0.4802, "step": 46680 }, { "epoch": 1.59, "learning_rate": 2.0486483228484012e-06, "loss": 0.494, "step": 46700 }, { "epoch": 1.59, "learning_rate": 2.0421858965132856e-06, "loss": 0.4977, "step": 46720 }, { "epoch": 1.59, "learning_rate": 2.035732519749528e-06, "loss": 0.4961, "step": 46740 }, { "epoch": 1.59, "learning_rate": 2.0292881998958634e-06, "loss": 0.4755, "step": 46760 }, { "epoch": 1.59, "learning_rate": 2.022852944280731e-06, "loss": 0.4676, "step": 46780 }, { "epoch": 1.59, "learning_rate": 2.016426760222259e-06, "loss": 0.4617, "step": 46800 }, { "epoch": 1.59, "learning_rate": 2.0100096550282543e-06, "loss": 0.4792, "step": 46820 }, { "epoch": 1.59, "learning_rate": 2.0036016359962084e-06, "loss": 0.4592, "step": 46840 }, { "epoch": 1.59, "learning_rate": 1.9972027104132763e-06, "loss": 0.4831, "step": 46860 }, { "epoch": 1.59, "learning_rate": 1.990812885556269e-06, "loss": 0.495, "step": 46880 }, { "epoch": 1.59, "learning_rate": 1.9844321686916602e-06, "loss": 0.5107, "step": 46900 }, { "epoch": 1.59, "learning_rate": 1.9780605670755473e-06, "loss": 0.4834, "step": 46920 }, { "epoch": 1.59, "learning_rate": 1.971698087953676e-06, "loss": 0.5159, "step": 46940 }, { "epoch": 1.59, "learning_rate": 1.965344738561414e-06, "loss": 0.4884, "step": 46960 }, { "epoch": 1.59, "learning_rate": 1.9590005261237475e-06, "loss": 0.4492, "step": 46980 }, { "epoch": 1.6, "learning_rate": 1.9526654578552673e-06, "loss": 0.4833, "step": 47000 }, { "epoch": 1.6, "learning_rate": 1.9463395409601713e-06, "loss": 0.4534, "step": 47020 }, { "epoch": 1.6, "learning_rate": 1.940022782632248e-06, "loss": 0.4993, "step": 47040 }, { "epoch": 1.6, "learning_rate": 1.9337151900548734e-06, "loss": 0.4624, "step": 47060 }, { "epoch": 1.6, "learning_rate": 1.9274167704009973e-06, "loss": 0.481, "step": 47080 }, { "epoch": 1.6, "learning_rate": 1.921127530833138e-06, "loss": 0.5272, "step": 47100 }, { "epoch": 1.6, "learning_rate": 1.9148474785033768e-06, "loss": 0.4598, "step": 47120 }, { "epoch": 1.6, "learning_rate": 1.908576620553345e-06, "loss": 0.4906, "step": 47140 }, { "epoch": 1.6, "learning_rate": 1.902314964114219e-06, "loss": 0.4916, "step": 47160 }, { "epoch": 1.6, "learning_rate": 1.8960625163067158e-06, "loss": 0.5078, "step": 47180 }, { "epoch": 1.6, "learning_rate": 1.889819284241069e-06, "loss": 0.4725, "step": 47200 }, { "epoch": 1.6, "learning_rate": 1.8835852750170414e-06, "loss": 0.4765, "step": 47220 }, { "epoch": 1.6, "learning_rate": 1.8773604957239067e-06, "loss": 0.4832, "step": 47240 }, { "epoch": 1.6, "learning_rate": 1.8711449534404424e-06, "loss": 0.5203, "step": 47260 }, { "epoch": 1.6, "learning_rate": 1.8649386552349136e-06, "loss": 0.472, "step": 47280 }, { "epoch": 1.61, "learning_rate": 1.8587416081650866e-06, "loss": 0.4643, "step": 47300 }, { "epoch": 1.61, "learning_rate": 1.8525538192782e-06, "loss": 0.4834, "step": 47320 }, { "epoch": 1.61, "learning_rate": 1.8463752956109616e-06, "loss": 0.5216, "step": 47340 }, { "epoch": 1.61, "learning_rate": 1.8402060441895508e-06, "loss": 0.5254, "step": 47360 }, { "epoch": 1.61, "learning_rate": 1.8340460720295917e-06, "loss": 0.4958, "step": 47380 }, { "epoch": 1.61, "learning_rate": 1.827895386136166e-06, "loss": 0.4614, "step": 47400 }, { "epoch": 1.61, "learning_rate": 1.8217539935037897e-06, "loss": 0.5094, "step": 47420 }, { "epoch": 1.61, "learning_rate": 1.8156219011164121e-06, "loss": 0.4292, "step": 47440 }, { "epoch": 1.61, "learning_rate": 1.8094991159474107e-06, "loss": 0.4841, "step": 47460 }, { "epoch": 1.61, "learning_rate": 1.8033856449595689e-06, "loss": 0.4885, "step": 47480 }, { "epoch": 1.61, "learning_rate": 1.7972814951050855e-06, "loss": 0.4551, "step": 47500 }, { "epoch": 1.61, "learning_rate": 1.7911866733255556e-06, "loss": 0.4634, "step": 47520 }, { "epoch": 1.61, "learning_rate": 1.785101186551975e-06, "loss": 0.4911, "step": 47540 }, { "epoch": 1.61, "learning_rate": 1.7790250417047116e-06, "loss": 0.498, "step": 47560 }, { "epoch": 1.62, "learning_rate": 1.772958245693518e-06, "loss": 0.468, "step": 47580 }, { "epoch": 1.62, "learning_rate": 1.7669008054175108e-06, "loss": 0.4805, "step": 47600 }, { "epoch": 1.62, "learning_rate": 1.7608527277651711e-06, "loss": 0.5318, "step": 47620 }, { "epoch": 1.62, "learning_rate": 1.7548140196143338e-06, "loss": 0.4472, "step": 47640 }, { "epoch": 1.62, "learning_rate": 1.7487846878321691e-06, "loss": 0.4444, "step": 47660 }, { "epoch": 1.62, "learning_rate": 1.7427647392751967e-06, "loss": 0.4735, "step": 47680 }, { "epoch": 1.62, "learning_rate": 1.7367541807892586e-06, "loss": 0.4689, "step": 47700 }, { "epoch": 1.62, "learning_rate": 1.7307530192095212e-06, "loss": 0.4587, "step": 47720 }, { "epoch": 1.62, "learning_rate": 1.7247612613604637e-06, "loss": 0.5297, "step": 47740 }, { "epoch": 1.62, "learning_rate": 1.718778914055873e-06, "loss": 0.5013, "step": 47760 }, { "epoch": 1.62, "learning_rate": 1.712805984098832e-06, "loss": 0.5003, "step": 47780 }, { "epoch": 1.62, "learning_rate": 1.7068424782817162e-06, "loss": 0.5118, "step": 47800 }, { "epoch": 1.62, "learning_rate": 1.700888403386186e-06, "loss": 0.4921, "step": 47820 }, { "epoch": 1.62, "learning_rate": 1.694943766183168e-06, "loss": 0.4948, "step": 47840 }, { "epoch": 1.62, "learning_rate": 1.6890085734328666e-06, "loss": 0.4913, "step": 47860 }, { "epoch": 1.63, "learning_rate": 1.6830828318847415e-06, "loss": 0.4905, "step": 47880 }, { "epoch": 1.63, "learning_rate": 1.6771665482775058e-06, "loss": 0.456, "step": 47900 }, { "epoch": 1.63, "learning_rate": 1.6712597293391153e-06, "loss": 0.4673, "step": 47920 }, { "epoch": 1.63, "learning_rate": 1.6653623817867638e-06, "loss": 0.4698, "step": 47940 }, { "epoch": 1.63, "learning_rate": 1.6594745123268751e-06, "loss": 0.4524, "step": 47960 }, { "epoch": 1.63, "learning_rate": 1.6535961276550948e-06, "loss": 0.4678, "step": 47980 }, { "epoch": 1.63, "learning_rate": 1.6477272344562789e-06, "loss": 0.4924, "step": 48000 }, { "epoch": 1.63, "learning_rate": 1.6418678394044962e-06, "loss": 0.4252, "step": 48020 }, { "epoch": 1.63, "learning_rate": 1.6360179491630056e-06, "loss": 0.4911, "step": 48040 }, { "epoch": 1.63, "learning_rate": 1.6301775703842637e-06, "loss": 0.4731, "step": 48060 }, { "epoch": 1.63, "learning_rate": 1.6243467097099097e-06, "loss": 0.4915, "step": 48080 }, { "epoch": 1.63, "learning_rate": 1.6185253737707607e-06, "loss": 0.4807, "step": 48100 }, { "epoch": 1.63, "learning_rate": 1.6127135691867945e-06, "loss": 0.4835, "step": 48120 }, { "epoch": 1.63, "learning_rate": 1.6069113025671546e-06, "loss": 0.5011, "step": 48140 }, { "epoch": 1.63, "learning_rate": 1.6011185805101447e-06, "loss": 0.5102, "step": 48160 }, { "epoch": 1.64, "learning_rate": 1.5953354096032048e-06, "loss": 0.4949, "step": 48180 }, { "epoch": 1.64, "learning_rate": 1.5895617964229215e-06, "loss": 0.4906, "step": 48200 }, { "epoch": 1.64, "learning_rate": 1.5837977475350008e-06, "loss": 0.5041, "step": 48220 }, { "epoch": 1.64, "learning_rate": 1.5780432694942815e-06, "loss": 0.4643, "step": 48240 }, { "epoch": 1.64, "learning_rate": 1.572298368844719e-06, "loss": 0.4583, "step": 48260 }, { "epoch": 1.64, "learning_rate": 1.5665630521193753e-06, "loss": 0.4727, "step": 48280 }, { "epoch": 1.64, "learning_rate": 1.5608373258404087e-06, "loss": 0.4544, "step": 48300 }, { "epoch": 1.64, "learning_rate": 1.5551211965190783e-06, "loss": 0.5058, "step": 48320 }, { "epoch": 1.64, "learning_rate": 1.5494146706557257e-06, "loss": 0.4836, "step": 48340 }, { "epoch": 1.64, "learning_rate": 1.5437177547397742e-06, "loss": 0.4704, "step": 48360 }, { "epoch": 1.64, "learning_rate": 1.538030455249716e-06, "loss": 0.4754, "step": 48380 }, { "epoch": 1.64, "learning_rate": 1.5323527786531113e-06, "loss": 0.5275, "step": 48400 }, { "epoch": 1.64, "learning_rate": 1.5266847314065726e-06, "loss": 0.4518, "step": 48420 }, { "epoch": 1.64, "learning_rate": 1.521026319955764e-06, "loss": 0.4905, "step": 48440 }, { "epoch": 1.64, "learning_rate": 1.515377550735394e-06, "loss": 0.4558, "step": 48460 }, { "epoch": 1.65, "learning_rate": 1.5097384301692041e-06, "loss": 0.4613, "step": 48480 }, { "epoch": 1.65, "learning_rate": 1.5041089646699603e-06, "loss": 0.4816, "step": 48500 }, { "epoch": 1.65, "learning_rate": 1.4984891606394535e-06, "loss": 0.4806, "step": 48520 }, { "epoch": 1.65, "learning_rate": 1.4928790244684866e-06, "loss": 0.4652, "step": 48540 }, { "epoch": 1.65, "learning_rate": 1.487278562536868e-06, "loss": 0.492, "step": 48560 }, { "epoch": 1.65, "learning_rate": 1.4816877812134034e-06, "loss": 0.4711, "step": 48580 }, { "epoch": 1.65, "learning_rate": 1.4761066868558915e-06, "loss": 0.5062, "step": 48600 }, { "epoch": 1.65, "learning_rate": 1.4705352858111144e-06, "loss": 0.4772, "step": 48620 }, { "epoch": 1.65, "learning_rate": 1.4649735844148304e-06, "loss": 0.4634, "step": 48640 }, { "epoch": 1.65, "learning_rate": 1.4594215889917707e-06, "loss": 0.4814, "step": 48660 }, { "epoch": 1.65, "learning_rate": 1.4538793058556222e-06, "loss": 0.4659, "step": 48680 }, { "epoch": 1.65, "learning_rate": 1.4483467413090324e-06, "loss": 0.5063, "step": 48700 }, { "epoch": 1.65, "learning_rate": 1.4428239016435953e-06, "loss": 0.4821, "step": 48720 }, { "epoch": 1.65, "learning_rate": 1.4373107931398477e-06, "loss": 0.5402, "step": 48740 }, { "epoch": 1.66, "learning_rate": 1.4318074220672595e-06, "loss": 0.4746, "step": 48760 }, { "epoch": 1.66, "learning_rate": 1.426313794684221e-06, "loss": 0.4944, "step": 48780 }, { "epoch": 1.66, "learning_rate": 1.4208299172380512e-06, "loss": 0.4909, "step": 48800 }, { "epoch": 1.66, "learning_rate": 1.4153557959649788e-06, "loss": 0.4646, "step": 48820 }, { "epoch": 1.66, "learning_rate": 1.4098914370901385e-06, "loss": 0.4884, "step": 48840 }, { "epoch": 1.66, "learning_rate": 1.4044368468275549e-06, "loss": 0.4764, "step": 48860 }, { "epoch": 1.66, "learning_rate": 1.3989920313801552e-06, "loss": 0.5018, "step": 48880 }, { "epoch": 1.66, "learning_rate": 1.3935569969397434e-06, "loss": 0.4802, "step": 48900 }, { "epoch": 1.66, "learning_rate": 1.3881317496870062e-06, "loss": 0.4864, "step": 48920 }, { "epoch": 1.66, "learning_rate": 1.3827162957914975e-06, "loss": 0.4689, "step": 48940 }, { "epoch": 1.66, "learning_rate": 1.37731064141163e-06, "loss": 0.4675, "step": 48960 }, { "epoch": 1.66, "learning_rate": 1.3719147926946797e-06, "loss": 0.4988, "step": 48980 }, { "epoch": 1.66, "learning_rate": 1.3665287557767682e-06, "loss": 0.4974, "step": 49000 }, { "epoch": 1.66, "learning_rate": 1.361152536782856e-06, "loss": 0.4712, "step": 49020 }, { "epoch": 1.66, "learning_rate": 1.3557861418267514e-06, "loss": 0.4812, "step": 49040 }, { "epoch": 1.67, "learning_rate": 1.3504295770110731e-06, "loss": 0.5029, "step": 49060 }, { "epoch": 1.67, "learning_rate": 1.3450828484272727e-06, "loss": 0.4581, "step": 49080 }, { "epoch": 1.67, "learning_rate": 1.339745962155613e-06, "loss": 0.4911, "step": 49100 }, { "epoch": 1.67, "learning_rate": 1.3344189242651672e-06, "loss": 0.4557, "step": 49120 }, { "epoch": 1.67, "learning_rate": 1.3291017408138007e-06, "loss": 0.5026, "step": 49140 }, { "epoch": 1.67, "learning_rate": 1.32379441784818e-06, "loss": 0.4814, "step": 49160 }, { "epoch": 1.67, "learning_rate": 1.3184969614037567e-06, "loss": 0.5167, "step": 49180 }, { "epoch": 1.67, "learning_rate": 1.3132093775047616e-06, "loss": 0.4855, "step": 49200 }, { "epoch": 1.67, "learning_rate": 1.3079316721641987e-06, "loss": 0.4367, "step": 49220 }, { "epoch": 1.67, "learning_rate": 1.3026638513838386e-06, "loss": 0.4777, "step": 49240 }, { "epoch": 1.67, "learning_rate": 1.29740592115421e-06, "loss": 0.4559, "step": 49260 }, { "epoch": 1.67, "learning_rate": 1.292157887454596e-06, "loss": 0.4557, "step": 49280 }, { "epoch": 1.67, "learning_rate": 1.2869197562530223e-06, "loss": 0.5089, "step": 49300 }, { "epoch": 1.67, "learning_rate": 1.2816915335062596e-06, "loss": 0.4675, "step": 49320 }, { "epoch": 1.67, "learning_rate": 1.2764732251598022e-06, "loss": 0.4365, "step": 49340 }, { "epoch": 1.68, "learning_rate": 1.2712648371478763e-06, "loss": 0.4803, "step": 49360 }, { "epoch": 1.68, "learning_rate": 1.2660663753934256e-06, "loss": 0.507, "step": 49380 }, { "epoch": 1.68, "learning_rate": 1.260877845808105e-06, "loss": 0.516, "step": 49400 }, { "epoch": 1.68, "learning_rate": 1.2556992542922708e-06, "loss": 0.4704, "step": 49420 }, { "epoch": 1.68, "learning_rate": 1.2505306067349853e-06, "loss": 0.4862, "step": 49440 }, { "epoch": 1.68, "learning_rate": 1.2453719090139994e-06, "loss": 0.4913, "step": 49460 }, { "epoch": 1.68, "learning_rate": 1.2402231669957465e-06, "loss": 0.4875, "step": 49480 }, { "epoch": 1.68, "learning_rate": 1.2350843865353447e-06, "loss": 0.4714, "step": 49500 }, { "epoch": 1.68, "learning_rate": 1.2299555734765733e-06, "loss": 0.4736, "step": 49520 }, { "epoch": 1.68, "learning_rate": 1.2248367336518874e-06, "loss": 0.5141, "step": 49540 }, { "epoch": 1.68, "learning_rate": 1.2197278728823947e-06, "loss": 0.4822, "step": 49560 }, { "epoch": 1.68, "learning_rate": 1.214628996977857e-06, "loss": 0.494, "step": 49580 }, { "epoch": 1.68, "learning_rate": 1.2095401117366823e-06, "loss": 0.5034, "step": 49600 }, { "epoch": 1.68, "learning_rate": 1.204461222945913e-06, "loss": 0.4749, "step": 49620 }, { "epoch": 1.68, "learning_rate": 1.1993923363812288e-06, "loss": 0.4841, "step": 49640 }, { "epoch": 1.69, "learning_rate": 1.1943334578069277e-06, "loss": 0.4757, "step": 49660 }, { "epoch": 1.69, "learning_rate": 1.1892845929759412e-06, "loss": 0.4889, "step": 49680 }, { "epoch": 1.69, "learning_rate": 1.1842457476297953e-06, "loss": 0.4914, "step": 49700 }, { "epoch": 1.69, "learning_rate": 1.179216927498632e-06, "loss": 0.4723, "step": 49720 }, { "epoch": 1.69, "learning_rate": 1.1741981383011924e-06, "loss": 0.4435, "step": 49740 }, { "epoch": 1.69, "learning_rate": 1.1691893857448089e-06, "loss": 0.5092, "step": 49760 }, { "epoch": 1.69, "learning_rate": 1.1641906755254007e-06, "loss": 0.4701, "step": 49780 }, { "epoch": 1.69, "learning_rate": 1.159202013327464e-06, "loss": 0.4724, "step": 49800 }, { "epoch": 1.69, "learning_rate": 1.154223404824073e-06, "loss": 0.4701, "step": 49820 }, { "epoch": 1.69, "learning_rate": 1.1492548556768668e-06, "loss": 0.4989, "step": 49840 }, { "epoch": 1.69, "learning_rate": 1.1442963715360444e-06, "loss": 0.4859, "step": 49860 }, { "epoch": 1.69, "learning_rate": 1.139347958040361e-06, "loss": 0.4877, "step": 49880 }, { "epoch": 1.69, "learning_rate": 1.1344096208171186e-06, "loss": 0.5052, "step": 49900 }, { "epoch": 1.69, "learning_rate": 1.1294813654821591e-06, "loss": 0.507, "step": 49920 }, { "epoch": 1.7, "learning_rate": 1.1245631976398608e-06, "loss": 0.5182, "step": 49940 }, { "epoch": 1.7, "learning_rate": 1.1196551228831321e-06, "loss": 0.4267, "step": 49960 }, { "epoch": 1.7, "learning_rate": 1.1147571467933983e-06, "loss": 0.4479, "step": 49980 }, { "epoch": 1.7, "learning_rate": 1.1098692749406058e-06, "loss": 0.4837, "step": 50000 } ], "max_steps": 58920, "num_train_epochs": 2, "total_flos": 136141580967936.0, "trial_name": null, "trial_params": null }