diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6972165648336728, + "global_step": 50000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.999999431403446e-05, + "loss": 1.8514, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999977256144297e-05, + "loss": 1.2222, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999948826348907e-05, + "loss": 1.1193, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999909024680632e-05, + "loss": 1.0893, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999857851184724e-05, + "loss": 1.0828, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999795305919377e-05, + "loss": 1.0137, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 1.9999721388955722e-05, + "loss": 1.0248, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999636100377818e-05, + "loss": 1.0394, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999539440282648e-05, + "loss": 1.0873, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.999943140878014e-05, + "loss": 1.1132, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 1.999931200599314e-05, + "loss": 1.0493, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999181232057437e-05, + "loss": 1.0894, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999039087121746e-05, + "loss": 1.1671, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 1.9998885571347712e-05, + "loss": 1.0525, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 1.9998720684909913e-05, + "loss": 1.0033, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.999854442799585e-05, + "loss": 1.0375, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 1.9998356800805973e-05, + "loss": 1.0725, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 1.9998157803553638e-05, + "loss": 1.0104, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 1.9997947436465157e-05, + "loss": 1.0205, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 1.9997725699779747e-05, + "loss": 1.079, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 1.999749259374957e-05, + "loss": 1.0023, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 1.999724811863971e-05, + "loss": 1.0257, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 1.999699227472818e-05, + "loss": 0.9901, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 1.9996725062305934e-05, + "loss": 0.9934, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 1.999644648167684e-05, + "loss": 1.056, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1.9996156533157692e-05, + "loss": 1.0688, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.999585521707822e-05, + "loss": 1.1045, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 1.999554253378108e-05, + "loss": 1.0399, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 1.9995218483621858e-05, + "loss": 1.0219, + "step": 580 + }, + { + "epoch": 0.02, + "learning_rate": 1.9994883066969055e-05, + "loss": 1.0287, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 1.9994536284204106e-05, + "loss": 1.0638, + "step": 620 + }, + { + "epoch": 0.02, + "learning_rate": 1.9994178135721372e-05, + "loss": 1.0247, + "step": 640 + }, + { + "epoch": 0.02, + "learning_rate": 1.999380862192813e-05, + "loss": 0.99, + "step": 660 + }, + { + "epoch": 0.02, + "learning_rate": 1.99934277432446e-05, + "loss": 1.0635, + "step": 680 + }, + { + "epoch": 0.02, + "learning_rate": 1.9993035500103904e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 1.9992631892952108e-05, + "loss": 1.0221, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.999221692224818e-05, + "loss": 1.0333, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.9991790588464034e-05, + "loss": 1.0619, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 1.9991352892084483e-05, + "loss": 0.9604, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 1.999090383360728e-05, + "loss": 0.9817, + "step": 800 + }, + { + "epoch": 0.03, + "learning_rate": 1.9990443413543086e-05, + "loss": 1.0362, + "step": 820 + }, + { + "epoch": 0.03, + "learning_rate": 1.998997163241549e-05, + "loss": 1.0436, + "step": 840 + }, + { + "epoch": 0.03, + "learning_rate": 1.9989488490761e-05, + "loss": 1.0088, + "step": 860 + }, + { + "epoch": 0.03, + "learning_rate": 1.9988993989129035e-05, + "loss": 1.06, + "step": 880 + }, + { + "epoch": 0.03, + "learning_rate": 1.9988488128081943e-05, + "loss": 1.0384, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 1.998797090819499e-05, + "loss": 0.9912, + "step": 920 + }, + { + "epoch": 0.03, + "learning_rate": 1.9987442330056352e-05, + "loss": 1.0263, + "step": 940 + }, + { + "epoch": 0.03, + "learning_rate": 1.9986902394267117e-05, + "loss": 0.9909, + "step": 960 + }, + { + "epoch": 0.03, + "learning_rate": 1.9986351101441308e-05, + "loss": 0.993, + "step": 980 + }, + { + "epoch": 0.03, + "learning_rate": 1.9985788452205847e-05, + "loss": 1.0823, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 1.9985214447200566e-05, + "loss": 0.9748, + "step": 1020 + }, + { + "epoch": 0.04, + "learning_rate": 1.9984629087078234e-05, + "loss": 0.9944, + "step": 1040 + }, + { + "epoch": 0.04, + "learning_rate": 1.998403237250451e-05, + "loss": 1.0813, + "step": 1060 + }, + { + "epoch": 0.04, + "learning_rate": 1.9983424304157976e-05, + "loss": 0.9385, + "step": 1080 + }, + { + "epoch": 0.04, + "learning_rate": 1.998280488273012e-05, + "loss": 1.0154, + "step": 1100 + }, + { + "epoch": 0.04, + "learning_rate": 1.9982174108925346e-05, + "loss": 0.9694, + "step": 1120 + }, + { + "epoch": 0.04, + "learning_rate": 1.998153198346097e-05, + "loss": 0.9958, + "step": 1140 + }, + { + "epoch": 0.04, + "learning_rate": 1.9980878507067207e-05, + "loss": 1.1112, + "step": 1160 + }, + { + "epoch": 0.04, + "learning_rate": 1.9980213680487186e-05, + "loss": 1.025, + "step": 1180 + }, + { + "epoch": 0.04, + "learning_rate": 1.9979537504476945e-05, + "loss": 1.025, + "step": 1200 + }, + { + "epoch": 0.04, + "learning_rate": 1.997884997980543e-05, + "loss": 0.9974, + "step": 1220 + }, + { + "epoch": 0.04, + "learning_rate": 1.9978151107254477e-05, + "loss": 1.0615, + "step": 1240 + }, + { + "epoch": 0.04, + "learning_rate": 1.9977440887618855e-05, + "loss": 0.9963, + "step": 1260 + }, + { + "epoch": 0.04, + "learning_rate": 1.9976719321706213e-05, + "loss": 0.9929, + "step": 1280 + }, + { + "epoch": 0.04, + "learning_rate": 1.997598641033711e-05, + "loss": 1.0235, + "step": 1300 + }, + { + "epoch": 0.04, + "learning_rate": 1.997524215434501e-05, + "loss": 0.9834, + "step": 1320 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974486554576274e-05, + "loss": 0.9981, + "step": 1340 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973719611890164e-05, + "loss": 0.914, + "step": 1360 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972941327158848e-05, + "loss": 0.9992, + "step": 1380 + }, + { + "epoch": 0.05, + "learning_rate": 1.997215170126738e-05, + "loss": 0.9329, + "step": 1400 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971350735113718e-05, + "loss": 1.0179, + "step": 1420 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970538429608716e-05, + "loss": 1.0618, + "step": 1440 + }, + { + "epoch": 0.05, + "learning_rate": 1.996971478567612e-05, + "loss": 1.02, + "step": 1460 + }, + { + "epoch": 0.05, + "learning_rate": 1.996887980425258e-05, + "loss": 1.0123, + "step": 1480 + }, + { + "epoch": 0.05, + "learning_rate": 1.996803348628762e-05, + "loss": 1.0023, + "step": 1500 + }, + { + "epoch": 0.05, + "learning_rate": 1.9967175832743677e-05, + "loss": 0.9477, + "step": 1520 + }, + { + "epoch": 0.05, + "learning_rate": 1.9966306844596063e-05, + "loss": 0.9968, + "step": 1540 + }, + { + "epoch": 0.05, + "learning_rate": 1.9965426522832985e-05, + "loss": 0.9729, + "step": 1560 + }, + { + "epoch": 0.05, + "learning_rate": 1.9964534868455544e-05, + "loss": 1.0453, + "step": 1580 + }, + { + "epoch": 0.05, + "learning_rate": 1.9963631882477716e-05, + "loss": 1.0657, + "step": 1600 + }, + { + "epoch": 0.05, + "learning_rate": 1.9962717565926376e-05, + "loss": 0.9159, + "step": 1620 + }, + { + "epoch": 0.06, + "learning_rate": 1.9961791919841275e-05, + "loss": 0.9438, + "step": 1640 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960854945275055e-05, + "loss": 0.9773, + "step": 1660 + }, + { + "epoch": 0.06, + "learning_rate": 1.995990664329323e-05, + "loss": 1.0714, + "step": 1680 + }, + { + "epoch": 0.06, + "learning_rate": 1.9958947014974216e-05, + "loss": 0.9914, + "step": 1700 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957976061409277e-05, + "loss": 0.9779, + "step": 1720 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956993783702594e-05, + "loss": 0.9906, + "step": 1740 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956000182971192e-05, + "loss": 0.9829, + "step": 1760 + }, + { + "epoch": 0.06, + "learning_rate": 1.9954995260344993e-05, + "loss": 1.0036, + "step": 1780 + }, + { + "epoch": 0.06, + "learning_rate": 1.995397901696679e-05, + "loss": 0.9597, + "step": 1800 + }, + { + "epoch": 0.06, + "learning_rate": 1.995295145399224e-05, + "loss": 0.967, + "step": 1820 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951912572589892e-05, + "loss": 1.0017, + "step": 1840 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950862373941145e-05, + "loss": 0.9761, + "step": 1860 + }, + { + "epoch": 0.06, + "learning_rate": 1.994980085924028e-05, + "loss": 0.9761, + "step": 1880 + }, + { + "epoch": 0.06, + "learning_rate": 1.994872802969445e-05, + "loss": 1.0116, + "step": 1900 + }, + { + "epoch": 0.07, + "learning_rate": 1.994764388652366e-05, + "loss": 1.0121, + "step": 1920 + }, + { + "epoch": 0.07, + "learning_rate": 1.9946548430960796e-05, + "loss": 0.9758, + "step": 1940 + }, + { + "epoch": 0.07, + "learning_rate": 1.9945441664251605e-05, + "loss": 0.9948, + "step": 1960 + }, + { + "epoch": 0.07, + "learning_rate": 1.9944323587654685e-05, + "loss": 1.0505, + "step": 1980 + }, + { + "epoch": 0.07, + "learning_rate": 1.9943194202441514e-05, + "loss": 0.9806, + "step": 2000 + }, + { + "epoch": 0.07, + "learning_rate": 1.9942053509896417e-05, + "loss": 0.9377, + "step": 2020 + }, + { + "epoch": 0.07, + "learning_rate": 1.9940901511316585e-05, + "loss": 1.0366, + "step": 2040 + }, + { + "epoch": 0.07, + "learning_rate": 1.993973820801206e-05, + "loss": 0.9959, + "step": 2060 + }, + { + "epoch": 0.07, + "learning_rate": 1.993856360130574e-05, + "loss": 1.037, + "step": 2080 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937377692533382e-05, + "loss": 1.0662, + "step": 2100 + }, + { + "epoch": 0.07, + "learning_rate": 1.99361804830436e-05, + "loss": 1.0312, + "step": 2120 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934971974197843e-05, + "loss": 0.9109, + "step": 2140 + }, + { + "epoch": 0.07, + "learning_rate": 1.993375216737042e-05, + "loss": 0.9742, + "step": 2160 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932521063948494e-05, + "loss": 1.0021, + "step": 2180 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931278665332057e-05, + "loss": 0.9514, + "step": 2200 + }, + { + "epoch": 0.08, + "learning_rate": 1.9930024972933966e-05, + "loss": 0.9497, + "step": 2220 + }, + { + "epoch": 0.08, + "learning_rate": 1.9928759988179908e-05, + "loss": 0.9337, + "step": 2240 + }, + { + "epoch": 0.08, + "learning_rate": 1.992748371250841e-05, + "loss": 0.9945, + "step": 2260 + }, + { + "epoch": 0.08, + "learning_rate": 1.992619614737085e-05, + "loss": 0.9873, + "step": 2280 + }, + { + "epoch": 0.08, + "learning_rate": 1.9924897294231434e-05, + "loss": 0.9715, + "step": 2300 + }, + { + "epoch": 0.08, + "learning_rate": 1.9923587154567214e-05, + "loss": 0.946, + "step": 2320 + }, + { + "epoch": 0.08, + "learning_rate": 1.9922265729868065e-05, + "loss": 0.9688, + "step": 2340 + }, + { + "epoch": 0.08, + "learning_rate": 1.992093302163671e-05, + "loss": 0.9837, + "step": 2360 + }, + { + "epoch": 0.08, + "learning_rate": 1.9919589031388688e-05, + "loss": 1.0015, + "step": 2380 + }, + { + "epoch": 0.08, + "learning_rate": 1.991823376065238e-05, + "loss": 0.9847, + "step": 2400 + }, + { + "epoch": 0.08, + "learning_rate": 1.991686721096899e-05, + "loss": 0.9702, + "step": 2420 + }, + { + "epoch": 0.08, + "learning_rate": 1.991548938389255e-05, + "loss": 0.9749, + "step": 2440 + }, + { + "epoch": 0.08, + "learning_rate": 1.991410028098991e-05, + "loss": 0.968, + "step": 2460 + }, + { + "epoch": 0.08, + "learning_rate": 1.9912699903840754e-05, + "loss": 0.9446, + "step": 2480 + }, + { + "epoch": 0.08, + "learning_rate": 1.991128825403758e-05, + "loss": 0.9667, + "step": 2500 + }, + { + "epoch": 0.09, + "learning_rate": 1.9909865333185704e-05, + "loss": 0.9359, + "step": 2520 + }, + { + "epoch": 0.09, + "learning_rate": 1.9908431142903263e-05, + "loss": 1.0054, + "step": 2540 + }, + { + "epoch": 0.09, + "learning_rate": 1.9906985684821208e-05, + "loss": 0.9583, + "step": 2560 + }, + { + "epoch": 0.09, + "learning_rate": 1.9905528960583305e-05, + "loss": 0.9364, + "step": 2580 + }, + { + "epoch": 0.09, + "learning_rate": 1.990406097184613e-05, + "loss": 0.9839, + "step": 2600 + }, + { + "epoch": 0.09, + "learning_rate": 1.9902581720279074e-05, + "loss": 1.0165, + "step": 2620 + }, + { + "epoch": 0.09, + "learning_rate": 1.9901091207564326e-05, + "loss": 0.9603, + "step": 2640 + }, + { + "epoch": 0.09, + "learning_rate": 1.9899589435396888e-05, + "loss": 0.9977, + "step": 2660 + }, + { + "epoch": 0.09, + "learning_rate": 1.9898076405484565e-05, + "loss": 0.9618, + "step": 2680 + }, + { + "epoch": 0.09, + "learning_rate": 1.9896552119547964e-05, + "loss": 0.89, + "step": 2700 + }, + { + "epoch": 0.09, + "learning_rate": 1.9895016579320495e-05, + "loss": 1.0115, + "step": 2720 + }, + { + "epoch": 0.09, + "learning_rate": 1.989346978654836e-05, + "loss": 0.8936, + "step": 2740 + }, + { + "epoch": 0.09, + "learning_rate": 1.9891911742990567e-05, + "loss": 0.9733, + "step": 2760 + }, + { + "epoch": 0.09, + "learning_rate": 1.9890342450418907e-05, + "loss": 0.9443, + "step": 2780 + }, + { + "epoch": 0.1, + "learning_rate": 1.9888761910617967e-05, + "loss": 1.016, + "step": 2800 + }, + { + "epoch": 0.1, + "learning_rate": 1.9887170125385135e-05, + "loss": 0.8977, + "step": 2820 + }, + { + "epoch": 0.1, + "learning_rate": 1.9885567096530566e-05, + "loss": 0.9582, + "step": 2840 + }, + { + "epoch": 0.1, + "learning_rate": 1.9883952825877225e-05, + "loss": 1.0007, + "step": 2860 + }, + { + "epoch": 0.1, + "learning_rate": 1.988232731526084e-05, + "loss": 1.0113, + "step": 2880 + }, + { + "epoch": 0.1, + "learning_rate": 1.9880690566529933e-05, + "loss": 0.9572, + "step": 2900 + }, + { + "epoch": 0.1, + "learning_rate": 1.987904258154581e-05, + "loss": 1.0198, + "step": 2920 + }, + { + "epoch": 0.1, + "learning_rate": 1.987738336218254e-05, + "loss": 0.8695, + "step": 2940 + }, + { + "epoch": 0.1, + "learning_rate": 1.9875712910326982e-05, + "loss": 0.9847, + "step": 2960 + }, + { + "epoch": 0.1, + "learning_rate": 1.987403122787876e-05, + "loss": 0.9547, + "step": 2980 + }, + { + "epoch": 0.1, + "learning_rate": 1.9872338316750264e-05, + "loss": 0.9653, + "step": 3000 + }, + { + "epoch": 0.1, + "learning_rate": 1.9870634178866676e-05, + "loss": 0.957, + "step": 3020 + }, + { + "epoch": 0.1, + "learning_rate": 1.986891881616592e-05, + "loss": 0.976, + "step": 3040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9867192230598692e-05, + "loss": 0.9866, + "step": 3060 + }, + { + "epoch": 0.1, + "learning_rate": 1.986545442412846e-05, + "loss": 1.0012, + "step": 3080 + }, + { + "epoch": 0.11, + "learning_rate": 1.9863705398731444e-05, + "loss": 0.9578, + "step": 3100 + }, + { + "epoch": 0.11, + "learning_rate": 1.986194515639662e-05, + "loss": 0.9816, + "step": 3120 + }, + { + "epoch": 0.11, + "learning_rate": 1.986017369912573e-05, + "loss": 0.9501, + "step": 3140 + }, + { + "epoch": 0.11, + "learning_rate": 1.9858391028933258e-05, + "loss": 0.9618, + "step": 3160 + }, + { + "epoch": 0.11, + "learning_rate": 1.9856597147846447e-05, + "loss": 0.9178, + "step": 3180 + }, + { + "epoch": 0.11, + "learning_rate": 1.985479205790528e-05, + "loss": 0.9475, + "step": 3200 + }, + { + "epoch": 0.11, + "learning_rate": 1.98529757611625e-05, + "loss": 0.9608, + "step": 3220 + }, + { + "epoch": 0.11, + "learning_rate": 1.9851148259683586e-05, + "loss": 0.8593, + "step": 3240 + }, + { + "epoch": 0.11, + "learning_rate": 1.984930955554676e-05, + "loss": 0.9459, + "step": 3260 + }, + { + "epoch": 0.11, + "learning_rate": 1.984745965084298e-05, + "loss": 0.8713, + "step": 3280 + }, + { + "epoch": 0.11, + "learning_rate": 1.9845598547675945e-05, + "loss": 0.9075, + "step": 3300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9843726248162096e-05, + "loss": 0.9818, + "step": 3320 + }, + { + "epoch": 0.11, + "learning_rate": 1.984184275443059e-05, + "loss": 0.9602, + "step": 3340 + }, + { + "epoch": 0.11, + "learning_rate": 1.983994806862333e-05, + "loss": 0.9412, + "step": 3360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9838042192894935e-05, + "loss": 0.9837, + "step": 3380 + }, + { + "epoch": 0.12, + "learning_rate": 1.9836125129412756e-05, + "loss": 1.0087, + "step": 3400 + }, + { + "epoch": 0.12, + "learning_rate": 1.9834196880356863e-05, + "loss": 1.0303, + "step": 3420 + }, + { + "epoch": 0.12, + "learning_rate": 1.983225744792005e-05, + "loss": 0.9416, + "step": 3440 + }, + { + "epoch": 0.12, + "learning_rate": 1.9830306834307828e-05, + "loss": 0.9829, + "step": 3460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9828345041738413e-05, + "loss": 0.9906, + "step": 3480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9826372072442747e-05, + "loss": 0.9868, + "step": 3500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9824387928664478e-05, + "loss": 0.9515, + "step": 3520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9822392612659964e-05, + "loss": 0.9713, + "step": 3540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9820386126698256e-05, + "loss": 0.8813, + "step": 3560 + }, + { + "epoch": 0.12, + "learning_rate": 1.981836847306112e-05, + "loss": 0.9842, + "step": 3580 + }, + { + "epoch": 0.12, + "learning_rate": 1.981633965404302e-05, + "loss": 0.9981, + "step": 3600 + }, + { + "epoch": 0.12, + "learning_rate": 1.9814299671951113e-05, + "loss": 0.9399, + "step": 3620 + }, + { + "epoch": 0.12, + "learning_rate": 1.9812248529105254e-05, + "loss": 0.8522, + "step": 3640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9810186227837986e-05, + "loss": 0.9817, + "step": 3660 + }, + { + "epoch": 0.12, + "learning_rate": 1.9808112770494546e-05, + "loss": 0.9788, + "step": 3680 + }, + { + "epoch": 0.13, + "learning_rate": 1.9806028159432854e-05, + "loss": 0.9442, + "step": 3700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9803932397023512e-05, + "loss": 1.0024, + "step": 3720 + }, + { + "epoch": 0.13, + "learning_rate": 1.9801825485649816e-05, + "loss": 1.0184, + "step": 3740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9799707427707723e-05, + "loss": 0.99, + "step": 3760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9797578225605875e-05, + "loss": 1.0291, + "step": 3780 + }, + { + "epoch": 0.13, + "learning_rate": 1.979543788176559e-05, + "loss": 0.9496, + "step": 3800 + }, + { + "epoch": 0.13, + "learning_rate": 1.979328639862085e-05, + "loss": 0.9149, + "step": 3820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9791123778618303e-05, + "loss": 0.9756, + "step": 3840 + }, + { + "epoch": 0.13, + "learning_rate": 1.978895002421727e-05, + "loss": 0.9599, + "step": 3860 + }, + { + "epoch": 0.13, + "learning_rate": 1.978676513788973e-05, + "loss": 0.9328, + "step": 3880 + }, + { + "epoch": 0.13, + "learning_rate": 1.978456912212032e-05, + "loss": 0.9399, + "step": 3900 + }, + { + "epoch": 0.13, + "learning_rate": 1.9782361979406334e-05, + "loss": 0.9641, + "step": 3920 + }, + { + "epoch": 0.13, + "learning_rate": 1.9780143712257715e-05, + "loss": 0.9505, + "step": 3940 + }, + { + "epoch": 0.13, + "learning_rate": 1.9777914323197063e-05, + "loss": 1.0097, + "step": 3960 + }, + { + "epoch": 0.14, + "learning_rate": 1.977567381475963e-05, + "loss": 0.9627, + "step": 3980 + }, + { + "epoch": 0.14, + "learning_rate": 1.97734221894933e-05, + "loss": 0.9573, + "step": 4000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9771159449958612e-05, + "loss": 0.974, + "step": 4020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9768885598728734e-05, + "loss": 0.9207, + "step": 4040 + }, + { + "epoch": 0.14, + "learning_rate": 1.976660063838947e-05, + "loss": 0.9765, + "step": 4060 + }, + { + "epoch": 0.14, + "learning_rate": 1.9764304571539266e-05, + "loss": 0.9475, + "step": 4080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9761997400789194e-05, + "loss": 0.9764, + "step": 4100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9759679128762946e-05, + "loss": 0.9277, + "step": 4120 + }, + { + "epoch": 0.14, + "learning_rate": 1.9757349758096855e-05, + "loss": 0.9738, + "step": 4140 + }, + { + "epoch": 0.14, + "learning_rate": 1.975500929143986e-05, + "loss": 0.9991, + "step": 4160 + }, + { + "epoch": 0.14, + "learning_rate": 1.9752657731453523e-05, + "loss": 0.9421, + "step": 4180 + }, + { + "epoch": 0.14, + "learning_rate": 1.9750295080812025e-05, + "loss": 0.972, + "step": 4200 + }, + { + "epoch": 0.14, + "learning_rate": 1.974792134220215e-05, + "loss": 0.967, + "step": 4220 + }, + { + "epoch": 0.14, + "learning_rate": 1.9745536518323304e-05, + "loss": 0.9693, + "step": 4240 + }, + { + "epoch": 0.14, + "learning_rate": 1.9743140611887487e-05, + "loss": 0.9242, + "step": 4260 + }, + { + "epoch": 0.15, + "learning_rate": 1.974073362561931e-05, + "loss": 1.0251, + "step": 4280 + }, + { + "epoch": 0.15, + "learning_rate": 1.9738315562255983e-05, + "loss": 1.0025, + "step": 4300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9735886424547306e-05, + "loss": 1.0041, + "step": 4320 + }, + { + "epoch": 0.15, + "learning_rate": 1.973344621525568e-05, + "loss": 0.8858, + "step": 4340 + }, + { + "epoch": 0.15, + "learning_rate": 1.97309949371561e-05, + "loss": 1.0171, + "step": 4360 + }, + { + "epoch": 0.15, + "learning_rate": 1.9728532593036132e-05, + "loss": 0.9639, + "step": 4380 + }, + { + "epoch": 0.15, + "learning_rate": 1.972605918569594e-05, + "loss": 0.9343, + "step": 4400 + }, + { + "epoch": 0.15, + "learning_rate": 1.972357471794827e-05, + "loss": 0.8996, + "step": 4420 + }, + { + "epoch": 0.15, + "learning_rate": 1.972107919261844e-05, + "loss": 0.9477, + "step": 4440 + }, + { + "epoch": 0.15, + "learning_rate": 1.9718572612544345e-05, + "loss": 1.0198, + "step": 4460 + }, + { + "epoch": 0.15, + "learning_rate": 1.9716054980576446e-05, + "loss": 0.8851, + "step": 4480 + }, + { + "epoch": 0.15, + "learning_rate": 1.971352629957778e-05, + "loss": 0.9439, + "step": 4500 + }, + { + "epoch": 0.15, + "learning_rate": 1.9710986572423947e-05, + "loss": 0.9632, + "step": 4520 + }, + { + "epoch": 0.15, + "learning_rate": 1.9708435802003105e-05, + "loss": 1.0028, + "step": 4540 + }, + { + "epoch": 0.15, + "learning_rate": 1.9705873991215973e-05, + "loss": 0.9911, + "step": 4560 + }, + { + "epoch": 0.16, + "learning_rate": 1.970330114297583e-05, + "loss": 0.9854, + "step": 4580 + }, + { + "epoch": 0.16, + "learning_rate": 1.970071726020849e-05, + "loss": 0.964, + "step": 4600 + }, + { + "epoch": 0.16, + "learning_rate": 1.969812234585233e-05, + "loss": 0.9546, + "step": 4620 + }, + { + "epoch": 0.16, + "learning_rate": 1.9695516402858277e-05, + "loss": 0.8992, + "step": 4640 + }, + { + "epoch": 0.16, + "learning_rate": 1.9692899434189786e-05, + "loss": 0.9148, + "step": 4660 + }, + { + "epoch": 0.16, + "learning_rate": 1.969027144282285e-05, + "loss": 0.9543, + "step": 4680 + }, + { + "epoch": 0.16, + "learning_rate": 1.9687632431746014e-05, + "loss": 1.0007, + "step": 4700 + }, + { + "epoch": 0.16, + "learning_rate": 1.9684982403960332e-05, + "loss": 0.9029, + "step": 4720 + }, + { + "epoch": 0.16, + "learning_rate": 1.968232136247941e-05, + "loss": 0.9436, + "step": 4740 + }, + { + "epoch": 0.16, + "learning_rate": 1.9679649310329353e-05, + "loss": 0.9336, + "step": 4760 + }, + { + "epoch": 0.16, + "learning_rate": 1.9676966250548807e-05, + "loss": 0.9395, + "step": 4780 + }, + { + "epoch": 0.16, + "learning_rate": 1.967427218618893e-05, + "loss": 0.9229, + "step": 4800 + }, + { + "epoch": 0.16, + "learning_rate": 1.967156712031339e-05, + "loss": 0.9191, + "step": 4820 + }, + { + "epoch": 0.16, + "learning_rate": 1.966885105599837e-05, + "loss": 0.913, + "step": 4840 + }, + { + "epoch": 0.16, + "learning_rate": 1.9666123996332564e-05, + "loss": 0.9655, + "step": 4860 + }, + { + "epoch": 0.17, + "learning_rate": 1.966338594441716e-05, + "loss": 1.0067, + "step": 4880 + }, + { + "epoch": 0.17, + "learning_rate": 1.9660636903365854e-05, + "loss": 0.9048, + "step": 4900 + }, + { + "epoch": 0.17, + "learning_rate": 1.9657876876304835e-05, + "loss": 0.9761, + "step": 4920 + }, + { + "epoch": 0.17, + "learning_rate": 1.965510586637279e-05, + "loss": 0.8365, + "step": 4940 + }, + { + "epoch": 0.17, + "learning_rate": 1.965232387672089e-05, + "loss": 0.9765, + "step": 4960 + }, + { + "epoch": 0.17, + "learning_rate": 1.9649530910512796e-05, + "loss": 0.864, + "step": 4980 + }, + { + "epoch": 0.17, + "learning_rate": 1.9646726970924645e-05, + "loss": 0.9732, + "step": 5000 + }, + { + "epoch": 0.17, + "learning_rate": 1.9643912061145065e-05, + "loss": 0.8954, + "step": 5020 + }, + { + "epoch": 0.17, + "learning_rate": 1.9641086184375148e-05, + "loss": 0.956, + "step": 5040 + }, + { + "epoch": 0.17, + "learning_rate": 1.9638249343828463e-05, + "loss": 0.9609, + "step": 5060 + }, + { + "epoch": 0.17, + "learning_rate": 1.9635401542731043e-05, + "loss": 1.0167, + "step": 5080 + }, + { + "epoch": 0.17, + "learning_rate": 1.963254278432139e-05, + "loss": 0.9232, + "step": 5100 + }, + { + "epoch": 0.17, + "learning_rate": 1.9629673071850463e-05, + "loss": 0.9191, + "step": 5120 + }, + { + "epoch": 0.17, + "learning_rate": 1.962679240858168e-05, + "loss": 0.8266, + "step": 5140 + }, + { + "epoch": 0.18, + "learning_rate": 1.9623900797790914e-05, + "loss": 0.9257, + "step": 5160 + }, + { + "epoch": 0.18, + "learning_rate": 1.9620998242766482e-05, + "loss": 0.8912, + "step": 5180 + }, + { + "epoch": 0.18, + "learning_rate": 1.9618084746809147e-05, + "loss": 0.8733, + "step": 5200 + }, + { + "epoch": 0.18, + "learning_rate": 1.9615160313232122e-05, + "loss": 0.9486, + "step": 5220 + }, + { + "epoch": 0.18, + "learning_rate": 1.961222494536105e-05, + "loss": 0.9723, + "step": 5240 + }, + { + "epoch": 0.18, + "learning_rate": 1.960927864653401e-05, + "loss": 1.0091, + "step": 5260 + }, + { + "epoch": 0.18, + "learning_rate": 1.9606321420101514e-05, + "loss": 0.9226, + "step": 5280 + }, + { + "epoch": 0.18, + "learning_rate": 1.96033532694265e-05, + "loss": 0.9532, + "step": 5300 + }, + { + "epoch": 0.18, + "learning_rate": 1.960037419788433e-05, + "loss": 0.9176, + "step": 5320 + }, + { + "epoch": 0.18, + "learning_rate": 1.9597384208862776e-05, + "loss": 0.9545, + "step": 5340 + }, + { + "epoch": 0.18, + "learning_rate": 1.9594383305762045e-05, + "loss": 0.9162, + "step": 5360 + }, + { + "epoch": 0.18, + "learning_rate": 1.9591371491994735e-05, + "loss": 0.9597, + "step": 5380 + }, + { + "epoch": 0.18, + "learning_rate": 1.958834877098586e-05, + "loss": 0.9209, + "step": 5400 + }, + { + "epoch": 0.18, + "learning_rate": 1.9585315146172843e-05, + "loss": 0.9597, + "step": 5420 + }, + { + "epoch": 0.18, + "learning_rate": 1.9582270621005494e-05, + "loss": 0.9402, + "step": 5440 + }, + { + "epoch": 0.19, + "learning_rate": 1.9579215198946028e-05, + "loss": 0.8907, + "step": 5460 + }, + { + "epoch": 0.19, + "learning_rate": 1.9576148883469054e-05, + "loss": 0.9002, + "step": 5480 + }, + { + "epoch": 0.19, + "learning_rate": 1.9573071678061565e-05, + "loss": 0.9352, + "step": 5500 + }, + { + "epoch": 0.19, + "learning_rate": 1.956998358622293e-05, + "loss": 0.9039, + "step": 5520 + }, + { + "epoch": 0.19, + "learning_rate": 1.9566884611464916e-05, + "loss": 0.926, + "step": 5540 + }, + { + "epoch": 0.19, + "learning_rate": 1.9563774757311646e-05, + "loss": 0.9952, + "step": 5560 + }, + { + "epoch": 0.19, + "learning_rate": 1.956065402729963e-05, + "loss": 0.8673, + "step": 5580 + }, + { + "epoch": 0.19, + "learning_rate": 1.9557522424977745e-05, + "loss": 0.9081, + "step": 5600 + }, + { + "epoch": 0.19, + "learning_rate": 1.9554379953907217e-05, + "loss": 0.9287, + "step": 5620 + }, + { + "epoch": 0.19, + "learning_rate": 1.955122661766165e-05, + "loss": 0.9262, + "step": 5640 + }, + { + "epoch": 0.19, + "learning_rate": 1.954806241982699e-05, + "loss": 0.937, + "step": 5660 + }, + { + "epoch": 0.19, + "learning_rate": 1.954488736400155e-05, + "loss": 0.8884, + "step": 5680 + }, + { + "epoch": 0.19, + "learning_rate": 1.9541701453795976e-05, + "loss": 0.9416, + "step": 5700 + }, + { + "epoch": 0.19, + "learning_rate": 1.9538504692833263e-05, + "loss": 0.9081, + "step": 5720 + }, + { + "epoch": 0.19, + "learning_rate": 1.9535297084748747e-05, + "loss": 0.8875, + "step": 5740 + }, + { + "epoch": 0.2, + "learning_rate": 1.9532078633190097e-05, + "loss": 0.9085, + "step": 5760 + }, + { + "epoch": 0.2, + "learning_rate": 1.9528849341817316e-05, + "loss": 0.9679, + "step": 5780 + }, + { + "epoch": 0.2, + "learning_rate": 1.9525609214302727e-05, + "loss": 0.9845, + "step": 5800 + }, + { + "epoch": 0.2, + "learning_rate": 1.9522358254330987e-05, + "loss": 0.9942, + "step": 5820 + }, + { + "epoch": 0.2, + "learning_rate": 1.951909646559906e-05, + "loss": 0.9219, + "step": 5840 + }, + { + "epoch": 0.2, + "learning_rate": 1.951582385181623e-05, + "loss": 0.9227, + "step": 5860 + }, + { + "epoch": 0.2, + "learning_rate": 1.9512540416704095e-05, + "loss": 0.9447, + "step": 5880 + }, + { + "epoch": 0.2, + "learning_rate": 1.950924616399655e-05, + "loss": 0.9101, + "step": 5900 + }, + { + "epoch": 0.2, + "learning_rate": 1.9505941097439805e-05, + "loss": 0.9938, + "step": 5920 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502625220792345e-05, + "loss": 0.9749, + "step": 5940 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499298537824972e-05, + "loss": 0.9585, + "step": 5960 + }, + { + "epoch": 0.2, + "learning_rate": 1.949596105232077e-05, + "loss": 0.9813, + "step": 5980 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492612768075094e-05, + "loss": 0.9358, + "step": 6000 + }, + { + "epoch": 0.2, + "learning_rate": 1.9489253688895594e-05, + "loss": 0.8976, + "step": 6020 + }, + { + "epoch": 0.21, + "learning_rate": 1.9485883818602194e-05, + "loss": 0.9685, + "step": 6040 + }, + { + "epoch": 0.21, + "learning_rate": 1.948250316102709e-05, + "loss": 0.9148, + "step": 6060 + }, + { + "epoch": 0.21, + "learning_rate": 1.9479111720014737e-05, + "loss": 0.8979, + "step": 6080 + }, + { + "epoch": 0.21, + "learning_rate": 1.947570949942186e-05, + "loss": 0.914, + "step": 6100 + }, + { + "epoch": 0.21, + "learning_rate": 1.9472296503117437e-05, + "loss": 0.9587, + "step": 6120 + }, + { + "epoch": 0.21, + "learning_rate": 1.9468872734982705e-05, + "loss": 0.9355, + "step": 6140 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465438198911158e-05, + "loss": 0.925, + "step": 6160 + }, + { + "epoch": 0.21, + "learning_rate": 1.946199289880852e-05, + "loss": 0.8997, + "step": 6180 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458536838592764e-05, + "loss": 0.9549, + "step": 6200 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455070022194094e-05, + "loss": 0.8995, + "step": 6220 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451592453554956e-05, + "loss": 0.967, + "step": 6240 + }, + { + "epoch": 0.21, + "learning_rate": 1.944810413663001e-05, + "loss": 0.945, + "step": 6260 + }, + { + "epoch": 0.21, + "learning_rate": 1.9444605075386156e-05, + "loss": 0.9658, + "step": 6280 + }, + { + "epoch": 0.21, + "learning_rate": 1.944109527380249e-05, + "loss": 0.9514, + "step": 6300 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437574735870346e-05, + "loss": 0.9264, + "step": 6320 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434043465593247e-05, + "loss": 0.9749, + "step": 6340 + }, + { + "epoch": 0.22, + "learning_rate": 1.9430501466986932e-05, + "loss": 0.9507, + "step": 6360 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426948744079338e-05, + "loss": 0.9251, + "step": 6380 + }, + { + "epoch": 0.22, + "learning_rate": 1.94233853009106e-05, + "loss": 0.9631, + "step": 6400 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419811141533032e-05, + "loss": 0.9639, + "step": 6420 + }, + { + "epoch": 0.22, + "learning_rate": 1.9416226270011152e-05, + "loss": 0.8744, + "step": 6440 + }, + { + "epoch": 0.22, + "learning_rate": 1.9412630690421648e-05, + "loss": 0.8919, + "step": 6460 + }, + { + "epoch": 0.22, + "learning_rate": 1.940902440685339e-05, + "loss": 0.9569, + "step": 6480 + }, + { + "epoch": 0.22, + "learning_rate": 1.9405407423407414e-05, + "loss": 0.9714, + "step": 6500 + }, + { + "epoch": 0.22, + "learning_rate": 1.9401779744196934e-05, + "loss": 0.904, + "step": 6520 + }, + { + "epoch": 0.22, + "learning_rate": 1.9398141373347317e-05, + "loss": 0.885, + "step": 6540 + }, + { + "epoch": 0.22, + "learning_rate": 1.93944923149961e-05, + "loss": 0.9219, + "step": 6560 + }, + { + "epoch": 0.22, + "learning_rate": 1.9390832573292964e-05, + "loss": 0.9908, + "step": 6580 + }, + { + "epoch": 0.22, + "learning_rate": 1.938716215239974e-05, + "loss": 0.8588, + "step": 6600 + }, + { + "epoch": 0.22, + "learning_rate": 1.9383481056490402e-05, + "loss": 0.8831, + "step": 6620 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379789289751073e-05, + "loss": 0.9486, + "step": 6640 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376086856380007e-05, + "loss": 0.9116, + "step": 6660 + }, + { + "epoch": 0.23, + "learning_rate": 1.937237376058758e-05, + "loss": 0.941, + "step": 6680 + }, + { + "epoch": 0.23, + "learning_rate": 1.93686500065963e-05, + "loss": 0.9913, + "step": 6700 + }, + { + "epoch": 0.23, + "learning_rate": 1.9364915598640796e-05, + "loss": 0.9316, + "step": 6720 + }, + { + "epoch": 0.23, + "learning_rate": 1.936117054096781e-05, + "loss": 0.8926, + "step": 6740 + }, + { + "epoch": 0.23, + "learning_rate": 1.9357414837836192e-05, + "loss": 0.8829, + "step": 6760 + }, + { + "epoch": 0.23, + "learning_rate": 1.935364849351691e-05, + "loss": 0.9302, + "step": 6780 + }, + { + "epoch": 0.23, + "learning_rate": 1.934987151229302e-05, + "loss": 0.8974, + "step": 6800 + }, + { + "epoch": 0.23, + "learning_rate": 1.934608389845968e-05, + "loss": 0.9036, + "step": 6820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9342285656324135e-05, + "loss": 0.9103, + "step": 6840 + }, + { + "epoch": 0.23, + "learning_rate": 1.9338476790205726e-05, + "loss": 0.9448, + "step": 6860 + }, + { + "epoch": 0.23, + "learning_rate": 1.933465730443586e-05, + "loss": 0.9723, + "step": 6880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9330827203358044e-05, + "loss": 0.9088, + "step": 6900 + }, + { + "epoch": 0.23, + "learning_rate": 1.932698649132783e-05, + "loss": 0.9263, + "step": 6920 + }, + { + "epoch": 0.24, + "learning_rate": 1.932313517271285e-05, + "loss": 0.9501, + "step": 6940 + }, + { + "epoch": 0.24, + "learning_rate": 1.9319273251892805e-05, + "loss": 0.9005, + "step": 6960 + }, + { + "epoch": 0.24, + "learning_rate": 1.931540073325944e-05, + "loss": 0.9459, + "step": 6980 + }, + { + "epoch": 0.24, + "learning_rate": 1.9311517621216555e-05, + "loss": 0.9368, + "step": 7000 + }, + { + "epoch": 0.24, + "learning_rate": 1.930762392018e-05, + "loss": 0.9028, + "step": 7020 + }, + { + "epoch": 0.24, + "learning_rate": 1.9303719634577668e-05, + "loss": 0.9928, + "step": 7040 + }, + { + "epoch": 0.24, + "learning_rate": 1.929980476884948e-05, + "loss": 0.9081, + "step": 7060 + }, + { + "epoch": 0.24, + "learning_rate": 1.92958793274474e-05, + "loss": 0.9539, + "step": 7080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9291943314835406e-05, + "loss": 0.9098, + "step": 7100 + }, + { + "epoch": 0.24, + "learning_rate": 1.928799673548951e-05, + "loss": 0.9151, + "step": 7120 + }, + { + "epoch": 0.24, + "learning_rate": 1.9284039593897734e-05, + "loss": 0.9074, + "step": 7140 + }, + { + "epoch": 0.24, + "learning_rate": 1.928007189456011e-05, + "loss": 0.8805, + "step": 7160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9276093641988684e-05, + "loss": 0.9018, + "step": 7180 + }, + { + "epoch": 0.24, + "learning_rate": 1.9272104840707487e-05, + "loss": 0.9576, + "step": 7200 + }, + { + "epoch": 0.25, + "learning_rate": 1.9268105495252568e-05, + "loss": 0.9561, + "step": 7220 + }, + { + "epoch": 0.25, + "learning_rate": 1.9264095610171944e-05, + "loss": 0.8706, + "step": 7240 + }, + { + "epoch": 0.25, + "learning_rate": 1.926007519002564e-05, + "loss": 0.9495, + "step": 7260 + }, + { + "epoch": 0.25, + "learning_rate": 1.925604423938564e-05, + "loss": 0.9262, + "step": 7280 + }, + { + "epoch": 0.25, + "learning_rate": 1.9252002762835917e-05, + "loss": 0.9658, + "step": 7300 + }, + { + "epoch": 0.25, + "learning_rate": 1.924795076497241e-05, + "loss": 0.9591, + "step": 7320 + }, + { + "epoch": 0.25, + "learning_rate": 1.924388825040303e-05, + "loss": 0.9324, + "step": 7340 + }, + { + "epoch": 0.25, + "learning_rate": 1.9239815223747632e-05, + "loss": 0.8588, + "step": 7360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9235731689638035e-05, + "loss": 0.8995, + "step": 7380 + }, + { + "epoch": 0.25, + "learning_rate": 1.9231637652718004e-05, + "loss": 0.8604, + "step": 7400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9227533117643256e-05, + "loss": 0.918, + "step": 7420 + }, + { + "epoch": 0.25, + "learning_rate": 1.922341808908144e-05, + "loss": 0.9008, + "step": 7440 + }, + { + "epoch": 0.25, + "learning_rate": 1.921929257171213e-05, + "loss": 0.9243, + "step": 7460 + }, + { + "epoch": 0.25, + "learning_rate": 1.9215156570226844e-05, + "loss": 0.9138, + "step": 7480 + }, + { + "epoch": 0.25, + "learning_rate": 1.921101008932901e-05, + "loss": 0.9794, + "step": 7500 + }, + { + "epoch": 0.26, + "learning_rate": 1.920685313373398e-05, + "loss": 0.8578, + "step": 7520 + }, + { + "epoch": 0.26, + "learning_rate": 1.920268570816901e-05, + "loss": 0.9052, + "step": 7540 + }, + { + "epoch": 0.26, + "learning_rate": 1.9198507817373274e-05, + "loss": 0.9486, + "step": 7560 + }, + { + "epoch": 0.26, + "learning_rate": 1.919431946609784e-05, + "loss": 0.9456, + "step": 7580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9190120659105663e-05, + "loss": 0.9079, + "step": 7600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9185911401171615e-05, + "loss": 0.9492, + "step": 7620 + }, + { + "epoch": 0.26, + "learning_rate": 1.918169169708242e-05, + "loss": 0.9227, + "step": 7640 + }, + { + "epoch": 0.26, + "learning_rate": 1.91774615516367e-05, + "loss": 0.9094, + "step": 7660 + }, + { + "epoch": 0.26, + "learning_rate": 1.9173220969644947e-05, + "loss": 0.8853, + "step": 7680 + }, + { + "epoch": 0.26, + "learning_rate": 1.916896995592953e-05, + "loss": 0.9563, + "step": 7700 + }, + { + "epoch": 0.26, + "learning_rate": 1.916470851532466e-05, + "loss": 0.9817, + "step": 7720 + }, + { + "epoch": 0.26, + "learning_rate": 1.9160436652676425e-05, + "loss": 1.0074, + "step": 7740 + }, + { + "epoch": 0.26, + "learning_rate": 1.9156154372842757e-05, + "loss": 0.9542, + "step": 7760 + }, + { + "epoch": 0.26, + "learning_rate": 1.9151861680693432e-05, + "loss": 0.8859, + "step": 7780 + }, + { + "epoch": 0.26, + "learning_rate": 1.914755858111008e-05, + "loss": 0.9705, + "step": 7800 + }, + { + "epoch": 0.27, + "learning_rate": 1.914324507898614e-05, + "loss": 0.8557, + "step": 7820 + }, + { + "epoch": 0.27, + "learning_rate": 1.9138921179226908e-05, + "loss": 0.8834, + "step": 7840 + }, + { + "epoch": 0.27, + "learning_rate": 1.9134586886749492e-05, + "loss": 0.9064, + "step": 7860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9130242206482817e-05, + "loss": 0.8905, + "step": 7880 + }, + { + "epoch": 0.27, + "learning_rate": 1.912588714336762e-05, + "loss": 0.88, + "step": 7900 + }, + { + "epoch": 0.27, + "learning_rate": 1.912152170235646e-05, + "loss": 0.8668, + "step": 7920 + }, + { + "epoch": 0.27, + "learning_rate": 1.9117145888413678e-05, + "loss": 0.9504, + "step": 7940 + }, + { + "epoch": 0.27, + "learning_rate": 1.911275970651542e-05, + "loss": 0.9257, + "step": 7960 + }, + { + "epoch": 0.27, + "learning_rate": 1.910836316164962e-05, + "loss": 0.9762, + "step": 7980 + }, + { + "epoch": 0.27, + "learning_rate": 1.9103956258816008e-05, + "loss": 0.9331, + "step": 8000 + }, + { + "epoch": 0.27, + "learning_rate": 1.909953900302607e-05, + "loss": 0.8791, + "step": 8020 + }, + { + "epoch": 0.27, + "learning_rate": 1.909511139930309e-05, + "loss": 0.9448, + "step": 8040 + }, + { + "epoch": 0.27, + "learning_rate": 1.9090673452682108e-05, + "loss": 0.8983, + "step": 8060 + }, + { + "epoch": 0.27, + "learning_rate": 1.9086225168209917e-05, + "loss": 0.926, + "step": 8080 + }, + { + "epoch": 0.27, + "learning_rate": 1.9081766550945084e-05, + "loss": 0.9388, + "step": 8100 + }, + { + "epoch": 0.28, + "learning_rate": 1.907729760595791e-05, + "loss": 0.9377, + "step": 8120 + }, + { + "epoch": 0.28, + "learning_rate": 1.9072818338330455e-05, + "loss": 0.9594, + "step": 8140 + }, + { + "epoch": 0.28, + "learning_rate": 1.9068328753156514e-05, + "loss": 0.8816, + "step": 8160 + }, + { + "epoch": 0.28, + "learning_rate": 1.9063828855541605e-05, + "loss": 0.9118, + "step": 8180 + }, + { + "epoch": 0.28, + "learning_rate": 1.9059318650602986e-05, + "loss": 0.9076, + "step": 8200 + }, + { + "epoch": 0.28, + "learning_rate": 1.905479814346962e-05, + "loss": 0.8466, + "step": 8220 + }, + { + "epoch": 0.28, + "learning_rate": 1.9050267339282214e-05, + "loss": 0.9428, + "step": 8240 + }, + { + "epoch": 0.28, + "learning_rate": 1.9045726243193155e-05, + "loss": 0.9305, + "step": 8260 + }, + { + "epoch": 0.28, + "learning_rate": 1.904117486036655e-05, + "loss": 0.8242, + "step": 8280 + }, + { + "epoch": 0.28, + "learning_rate": 1.9036613195978198e-05, + "loss": 0.902, + "step": 8300 + }, + { + "epoch": 0.28, + "learning_rate": 1.9032041255215596e-05, + "loss": 0.8798, + "step": 8320 + }, + { + "epoch": 0.28, + "learning_rate": 1.9027459043277923e-05, + "loss": 0.9021, + "step": 8340 + }, + { + "epoch": 0.28, + "learning_rate": 1.902286656537603e-05, + "loss": 0.8776, + "step": 8360 + }, + { + "epoch": 0.28, + "learning_rate": 1.9018263826732467e-05, + "loss": 0.9082, + "step": 8380 + }, + { + "epoch": 0.29, + "learning_rate": 1.9013650832581424e-05, + "loss": 0.8431, + "step": 8400 + }, + { + "epoch": 0.29, + "learning_rate": 1.9009027588168773e-05, + "loss": 0.8934, + "step": 8420 + }, + { + "epoch": 0.29, + "learning_rate": 1.900439409875203e-05, + "loss": 0.9072, + "step": 8440 + }, + { + "epoch": 0.29, + "learning_rate": 1.899975036960037e-05, + "loss": 0.9326, + "step": 8460 + }, + { + "epoch": 0.29, + "learning_rate": 1.8995096405994614e-05, + "loss": 0.9009, + "step": 8480 + }, + { + "epoch": 0.29, + "learning_rate": 1.899043221322721e-05, + "loss": 0.972, + "step": 8500 + }, + { + "epoch": 0.29, + "learning_rate": 1.898575779660225e-05, + "loss": 0.9269, + "step": 8520 + }, + { + "epoch": 0.29, + "learning_rate": 1.8981073161435447e-05, + "loss": 0.8519, + "step": 8540 + }, + { + "epoch": 0.29, + "learning_rate": 1.897637831305414e-05, + "loss": 0.8876, + "step": 8560 + }, + { + "epoch": 0.29, + "learning_rate": 1.8971673256797275e-05, + "loss": 0.8401, + "step": 8580 + }, + { + "epoch": 0.29, + "learning_rate": 1.8966957998015403e-05, + "loss": 0.9571, + "step": 8600 + }, + { + "epoch": 0.29, + "learning_rate": 1.896223254207069e-05, + "loss": 0.9073, + "step": 8620 + }, + { + "epoch": 0.29, + "learning_rate": 1.89574968943369e-05, + "loss": 0.8836, + "step": 8640 + }, + { + "epoch": 0.29, + "learning_rate": 1.895275106019937e-05, + "loss": 0.9202, + "step": 8660 + }, + { + "epoch": 0.29, + "learning_rate": 1.8947995045055023e-05, + "loss": 0.8841, + "step": 8680 + }, + { + "epoch": 0.3, + "learning_rate": 1.894322885431238e-05, + "loss": 0.8863, + "step": 8700 + }, + { + "epoch": 0.3, + "learning_rate": 1.8938452493391515e-05, + "loss": 1.0004, + "step": 8720 + }, + { + "epoch": 0.3, + "learning_rate": 1.8933665967724072e-05, + "loss": 0.8558, + "step": 8740 + }, + { + "epoch": 0.3, + "learning_rate": 1.892886928275325e-05, + "loss": 0.9001, + "step": 8760 + }, + { + "epoch": 0.3, + "learning_rate": 1.892406244393382e-05, + "loss": 0.9562, + "step": 8780 + }, + { + "epoch": 0.3, + "learning_rate": 1.891924545673207e-05, + "loss": 0.9127, + "step": 8800 + }, + { + "epoch": 0.3, + "learning_rate": 1.891441832662586e-05, + "loss": 0.929, + "step": 8820 + }, + { + "epoch": 0.3, + "learning_rate": 1.8909581059104557e-05, + "loss": 0.8909, + "step": 8840 + }, + { + "epoch": 0.3, + "learning_rate": 1.8904733659669072e-05, + "loss": 0.9258, + "step": 8860 + }, + { + "epoch": 0.3, + "learning_rate": 1.8899876133831835e-05, + "loss": 0.9803, + "step": 8880 + }, + { + "epoch": 0.3, + "learning_rate": 1.8895008487116793e-05, + "loss": 0.8716, + "step": 8900 + }, + { + "epoch": 0.3, + "learning_rate": 1.8890130725059396e-05, + "loss": 0.924, + "step": 8920 + }, + { + "epoch": 0.3, + "learning_rate": 1.8885242853206606e-05, + "loss": 0.8649, + "step": 8940 + }, + { + "epoch": 0.3, + "learning_rate": 1.888034487711687e-05, + "loss": 0.9427, + "step": 8960 + }, + { + "epoch": 0.3, + "learning_rate": 1.8875436802360142e-05, + "loss": 0.9313, + "step": 8980 + }, + { + "epoch": 0.31, + "learning_rate": 1.8870518634517843e-05, + "loss": 0.9407, + "step": 9000 + }, + { + "epoch": 0.31, + "learning_rate": 1.8865590379182885e-05, + "loss": 0.9581, + "step": 9020 + }, + { + "epoch": 0.31, + "learning_rate": 1.8860652041959643e-05, + "loss": 0.8846, + "step": 9040 + }, + { + "epoch": 0.31, + "learning_rate": 1.8855703628463957e-05, + "loss": 0.8592, + "step": 9060 + }, + { + "epoch": 0.31, + "learning_rate": 1.8850745144323137e-05, + "loss": 0.9586, + "step": 9080 + }, + { + "epoch": 0.31, + "learning_rate": 1.884577659517593e-05, + "loss": 0.9452, + "step": 9100 + }, + { + "epoch": 0.31, + "learning_rate": 1.884079798667254e-05, + "loss": 0.8987, + "step": 9120 + }, + { + "epoch": 0.31, + "learning_rate": 1.88358093244746e-05, + "loss": 0.925, + "step": 9140 + }, + { + "epoch": 0.31, + "learning_rate": 1.883081061425519e-05, + "loss": 0.8806, + "step": 9160 + }, + { + "epoch": 0.31, + "learning_rate": 1.882580186169881e-05, + "loss": 0.8575, + "step": 9180 + }, + { + "epoch": 0.31, + "learning_rate": 1.882078307250137e-05, + "loss": 0.8711, + "step": 9200 + }, + { + "epoch": 0.31, + "learning_rate": 1.8815754252370205e-05, + "loss": 0.9003, + "step": 9220 + }, + { + "epoch": 0.31, + "learning_rate": 1.881071540702406e-05, + "loss": 0.9115, + "step": 9240 + }, + { + "epoch": 0.31, + "learning_rate": 1.8805666542193073e-05, + "loss": 0.9562, + "step": 9260 + }, + { + "epoch": 0.32, + "learning_rate": 1.8800607663618774e-05, + "loss": 0.8856, + "step": 9280 + }, + { + "epoch": 0.32, + "learning_rate": 1.879553877705409e-05, + "loss": 0.8638, + "step": 9300 + }, + { + "epoch": 0.32, + "learning_rate": 1.8790459888263323e-05, + "loss": 0.882, + "step": 9320 + }, + { + "epoch": 0.32, + "learning_rate": 1.8785371003022145e-05, + "loss": 0.8989, + "step": 9340 + }, + { + "epoch": 0.32, + "learning_rate": 1.8780272127117606e-05, + "loss": 0.9034, + "step": 9360 + }, + { + "epoch": 0.32, + "learning_rate": 1.8775163266348116e-05, + "loss": 0.8835, + "step": 9380 + }, + { + "epoch": 0.32, + "learning_rate": 1.877004442652343e-05, + "loss": 0.816, + "step": 9400 + }, + { + "epoch": 0.32, + "learning_rate": 1.8764915613464657e-05, + "loss": 0.8363, + "step": 9420 + }, + { + "epoch": 0.32, + "learning_rate": 1.8759776833004256e-05, + "loss": 0.869, + "step": 9440 + }, + { + "epoch": 0.32, + "learning_rate": 1.8754628090986e-05, + "loss": 0.8829, + "step": 9460 + }, + { + "epoch": 0.32, + "learning_rate": 1.8749469393265016e-05, + "loss": 0.903, + "step": 9480 + }, + { + "epoch": 0.32, + "learning_rate": 1.874430074570773e-05, + "loss": 0.9455, + "step": 9500 + }, + { + "epoch": 0.32, + "learning_rate": 1.87391221541919e-05, + "loss": 0.9328, + "step": 9520 + }, + { + "epoch": 0.32, + "learning_rate": 1.873393362460658e-05, + "loss": 0.9131, + "step": 9540 + }, + { + "epoch": 0.32, + "learning_rate": 1.8728735162852125e-05, + "loss": 0.9119, + "step": 9560 + }, + { + "epoch": 0.33, + "learning_rate": 1.87235267748402e-05, + "loss": 0.8505, + "step": 9580 + }, + { + "epoch": 0.33, + "learning_rate": 1.8718308466493745e-05, + "loss": 0.8926, + "step": 9600 + }, + { + "epoch": 0.33, + "learning_rate": 1.871308024374698e-05, + "loss": 0.8904, + "step": 9620 + }, + { + "epoch": 0.33, + "learning_rate": 1.8707842112545406e-05, + "loss": 0.8506, + "step": 9640 + }, + { + "epoch": 0.33, + "learning_rate": 1.870259407884579e-05, + "loss": 0.903, + "step": 9660 + }, + { + "epoch": 0.33, + "learning_rate": 1.8697336148616162e-05, + "loss": 0.9225, + "step": 9680 + }, + { + "epoch": 0.33, + "learning_rate": 1.86920683278358e-05, + "loss": 0.8845, + "step": 9700 + }, + { + "epoch": 0.33, + "learning_rate": 1.868679062249524e-05, + "loss": 0.8764, + "step": 9720 + }, + { + "epoch": 0.33, + "learning_rate": 1.8681503038596248e-05, + "loss": 0.9878, + "step": 9740 + }, + { + "epoch": 0.33, + "learning_rate": 1.8676205582151822e-05, + "loss": 0.9531, + "step": 9760 + }, + { + "epoch": 0.33, + "learning_rate": 1.8670898259186202e-05, + "loss": 0.8972, + "step": 9780 + }, + { + "epoch": 0.33, + "learning_rate": 1.8665581075734836e-05, + "loss": 0.8453, + "step": 9800 + }, + { + "epoch": 0.33, + "learning_rate": 1.866025403784439e-05, + "loss": 0.9649, + "step": 9820 + }, + { + "epoch": 0.33, + "learning_rate": 1.865491715157273e-05, + "loss": 0.886, + "step": 9840 + }, + { + "epoch": 0.33, + "learning_rate": 1.864957042298893e-05, + "loss": 0.8603, + "step": 9860 + }, + { + "epoch": 0.34, + "learning_rate": 1.864421385817325e-05, + "loss": 0.9179, + "step": 9880 + }, + { + "epoch": 0.34, + "learning_rate": 1.8638847463217145e-05, + "loss": 0.8456, + "step": 9900 + }, + { + "epoch": 0.34, + "learning_rate": 1.8633471244223232e-05, + "loss": 0.8553, + "step": 9920 + }, + { + "epoch": 0.34, + "learning_rate": 1.8628085207305325e-05, + "loss": 0.8574, + "step": 9940 + }, + { + "epoch": 0.34, + "learning_rate": 1.8622689358588373e-05, + "loss": 0.8474, + "step": 9960 + }, + { + "epoch": 0.34, + "learning_rate": 1.8617283704208507e-05, + "loss": 0.905, + "step": 9980 + }, + { + "epoch": 0.34, + "learning_rate": 1.8611868250312994e-05, + "loss": 0.9268, + "step": 10000 + }, + { + "epoch": 0.34, + "learning_rate": 1.860644300306026e-05, + "loss": 0.8962, + "step": 10020 + }, + { + "epoch": 0.34, + "learning_rate": 1.8601007968619847e-05, + "loss": 0.9645, + "step": 10040 + }, + { + "epoch": 0.34, + "learning_rate": 1.8595563153172448e-05, + "loss": 0.9035, + "step": 10060 + }, + { + "epoch": 0.34, + "learning_rate": 1.8590108562909862e-05, + "loss": 0.9684, + "step": 10080 + }, + { + "epoch": 0.34, + "learning_rate": 1.858464420403502e-05, + "loss": 0.863, + "step": 10100 + }, + { + "epoch": 0.34, + "learning_rate": 1.857917008276195e-05, + "loss": 0.9421, + "step": 10120 + }, + { + "epoch": 0.34, + "learning_rate": 1.857368620531578e-05, + "loss": 0.8535, + "step": 10140 + }, + { + "epoch": 0.34, + "learning_rate": 1.8568192577932746e-05, + "loss": 0.8553, + "step": 10160 + }, + { + "epoch": 0.35, + "learning_rate": 1.8562689206860155e-05, + "loss": 0.8977, + "step": 10180 + }, + { + "epoch": 0.35, + "learning_rate": 1.8557176098356406e-05, + "loss": 0.9678, + "step": 10200 + }, + { + "epoch": 0.35, + "learning_rate": 1.8551653258690972e-05, + "loss": 0.8443, + "step": 10220 + }, + { + "epoch": 0.35, + "learning_rate": 1.854612069414438e-05, + "loss": 0.9013, + "step": 10240 + }, + { + "epoch": 0.35, + "learning_rate": 1.8540578411008233e-05, + "loss": 0.8929, + "step": 10260 + }, + { + "epoch": 0.35, + "learning_rate": 1.8535026415585168e-05, + "loss": 0.9134, + "step": 10280 + }, + { + "epoch": 0.35, + "learning_rate": 1.8529464714188888e-05, + "loss": 0.898, + "step": 10300 + }, + { + "epoch": 0.35, + "learning_rate": 1.852389331314411e-05, + "loss": 0.8557, + "step": 10320 + }, + { + "epoch": 0.35, + "learning_rate": 1.85183122187866e-05, + "loss": 0.9308, + "step": 10340 + }, + { + "epoch": 0.35, + "learning_rate": 1.8512721437463136e-05, + "loss": 0.8048, + "step": 10360 + }, + { + "epoch": 0.35, + "learning_rate": 1.8507120975531517e-05, + "loss": 0.8329, + "step": 10380 + }, + { + "epoch": 0.35, + "learning_rate": 1.850151083936055e-05, + "loss": 0.9244, + "step": 10400 + }, + { + "epoch": 0.35, + "learning_rate": 1.849589103533004e-05, + "loss": 0.8769, + "step": 10420 + }, + { + "epoch": 0.35, + "learning_rate": 1.84902615698308e-05, + "loss": 0.8796, + "step": 10440 + }, + { + "epoch": 0.36, + "learning_rate": 1.8484622449264608e-05, + "loss": 0.8567, + "step": 10460 + }, + { + "epoch": 0.36, + "learning_rate": 1.8478973680044237e-05, + "loss": 0.8471, + "step": 10480 + }, + { + "epoch": 0.36, + "learning_rate": 1.8473315268593432e-05, + "loss": 0.8955, + "step": 10500 + }, + { + "epoch": 0.36, + "learning_rate": 1.846764722134689e-05, + "loss": 0.8996, + "step": 10520 + }, + { + "epoch": 0.36, + "learning_rate": 1.8461969544750286e-05, + "loss": 0.8435, + "step": 10540 + }, + { + "epoch": 0.36, + "learning_rate": 1.845628224526023e-05, + "loss": 0.862, + "step": 10560 + }, + { + "epoch": 0.36, + "learning_rate": 1.8450585329344277e-05, + "loss": 0.8905, + "step": 10580 + }, + { + "epoch": 0.36, + "learning_rate": 1.8444878803480925e-05, + "loss": 0.9191, + "step": 10600 + }, + { + "epoch": 0.36, + "learning_rate": 1.8439162674159595e-05, + "loss": 0.9315, + "step": 10620 + }, + { + "epoch": 0.36, + "learning_rate": 1.8433436947880626e-05, + "loss": 0.8691, + "step": 10640 + }, + { + "epoch": 0.36, + "learning_rate": 1.842770163115528e-05, + "loss": 0.9158, + "step": 10660 + }, + { + "epoch": 0.36, + "learning_rate": 1.842195673050572e-05, + "loss": 0.9328, + "step": 10680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8416202252465e-05, + "loss": 0.8776, + "step": 10700 + }, + { + "epoch": 0.36, + "learning_rate": 1.841043820357708e-05, + "loss": 0.8524, + "step": 10720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8404664590396796e-05, + "loss": 0.8113, + "step": 10740 + }, + { + "epoch": 0.37, + "learning_rate": 1.8398881419489854e-05, + "loss": 0.8449, + "step": 10760 + }, + { + "epoch": 0.37, + "learning_rate": 1.8393088697432847e-05, + "loss": 0.8801, + "step": 10780 + }, + { + "epoch": 0.37, + "learning_rate": 1.838728643081321e-05, + "loss": 0.8911, + "step": 10800 + }, + { + "epoch": 0.37, + "learning_rate": 1.8381474626229245e-05, + "loss": 0.8365, + "step": 10820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8375653290290093e-05, + "loss": 0.8991, + "step": 10840 + }, + { + "epoch": 0.37, + "learning_rate": 1.836982242961574e-05, + "loss": 0.8696, + "step": 10860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8363982050836996e-05, + "loss": 0.8692, + "step": 10880 + }, + { + "epoch": 0.37, + "learning_rate": 1.835813216059551e-05, + "loss": 0.892, + "step": 10900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8352272765543723e-05, + "loss": 0.8675, + "step": 10920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8346403872344907e-05, + "loss": 0.8757, + "step": 10940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8340525487673128e-05, + "loss": 0.942, + "step": 10960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8334637618213238e-05, + "loss": 0.9497, + "step": 10980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8328740270660887e-05, + "loss": 0.8416, + "step": 11000 + }, + { + "epoch": 0.37, + "learning_rate": 1.83228334517225e-05, + "loss": 0.8326, + "step": 11020 + }, + { + "epoch": 0.37, + "learning_rate": 1.831691716811526e-05, + "loss": 0.8522, + "step": 11040 + }, + { + "epoch": 0.38, + "learning_rate": 1.8310991426567137e-05, + "loss": 0.9059, + "step": 11060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8305056233816833e-05, + "loss": 0.9037, + "step": 11080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8299111596613815e-05, + "loss": 0.8691, + "step": 11100 + }, + { + "epoch": 0.38, + "learning_rate": 1.8293157521718284e-05, + "loss": 0.8107, + "step": 11120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8287194015901167e-05, + "loss": 0.9504, + "step": 11140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8281221085944128e-05, + "loss": 0.8615, + "step": 11160 + }, + { + "epoch": 0.38, + "learning_rate": 1.827523873863954e-05, + "loss": 0.8942, + "step": 11180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8269246980790482e-05, + "loss": 0.927, + "step": 11200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8263245819210745e-05, + "loss": 0.9224, + "step": 11220 + }, + { + "epoch": 0.38, + "learning_rate": 1.825723526072481e-05, + "loss": 0.9215, + "step": 11240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8251215312167833e-05, + "loss": 0.8342, + "step": 11260 + }, + { + "epoch": 0.38, + "learning_rate": 1.8245185980385673e-05, + "loss": 0.8617, + "step": 11280 + }, + { + "epoch": 0.38, + "learning_rate": 1.823914727223483e-05, + "loss": 0.8649, + "step": 11300 + }, + { + "epoch": 0.38, + "learning_rate": 1.823309919458249e-05, + "loss": 0.9609, + "step": 11320 + }, + { + "epoch": 0.38, + "learning_rate": 1.8227041754306484e-05, + "loss": 0.9503, + "step": 11340 + }, + { + "epoch": 0.39, + "learning_rate": 1.822097495829529e-05, + "loss": 0.8748, + "step": 11360 + }, + { + "epoch": 0.39, + "learning_rate": 1.821489881344803e-05, + "loss": 0.9055, + "step": 11380 + }, + { + "epoch": 0.39, + "learning_rate": 1.8208813326674444e-05, + "loss": 0.8596, + "step": 11400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8202718504894917e-05, + "loss": 0.9291, + "step": 11420 + }, + { + "epoch": 0.39, + "learning_rate": 1.8196614355040436e-05, + "loss": 0.9413, + "step": 11440 + }, + { + "epoch": 0.39, + "learning_rate": 1.819050088405259e-05, + "loss": 0.9221, + "step": 11460 + }, + { + "epoch": 0.39, + "learning_rate": 1.818437809888359e-05, + "loss": 0.9094, + "step": 11480 + }, + { + "epoch": 0.39, + "learning_rate": 1.817824600649621e-05, + "loss": 0.821, + "step": 11500 + }, + { + "epoch": 0.39, + "learning_rate": 1.8172104613863838e-05, + "loss": 0.9144, + "step": 11520 + }, + { + "epoch": 0.39, + "learning_rate": 1.816595392797041e-05, + "loss": 0.8497, + "step": 11540 + }, + { + "epoch": 0.39, + "learning_rate": 1.8159793955810453e-05, + "loss": 0.9188, + "step": 11560 + }, + { + "epoch": 0.39, + "learning_rate": 1.815362470438904e-05, + "loss": 0.8214, + "step": 11580 + }, + { + "epoch": 0.39, + "learning_rate": 1.8147446180721802e-05, + "loss": 0.9231, + "step": 11600 + }, + { + "epoch": 0.39, + "learning_rate": 1.8141258391834914e-05, + "loss": 0.8962, + "step": 11620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8135061344765088e-05, + "loss": 0.8641, + "step": 11640 + }, + { + "epoch": 0.4, + "learning_rate": 1.812885504655956e-05, + "loss": 0.8497, + "step": 11660 + }, + { + "epoch": 0.4, + "learning_rate": 1.8122639504276096e-05, + "loss": 0.8791, + "step": 11680 + }, + { + "epoch": 0.4, + "learning_rate": 1.811641472498296e-05, + "loss": 0.8411, + "step": 11700 + }, + { + "epoch": 0.4, + "learning_rate": 1.8110180715758934e-05, + "loss": 0.9839, + "step": 11720 + }, + { + "epoch": 0.4, + "learning_rate": 1.8103937483693288e-05, + "loss": 0.9035, + "step": 11740 + }, + { + "epoch": 0.4, + "learning_rate": 1.809768503588578e-05, + "loss": 0.8555, + "step": 11760 + }, + { + "epoch": 0.4, + "learning_rate": 1.8091423379446658e-05, + "loss": 0.8599, + "step": 11780 + }, + { + "epoch": 0.4, + "learning_rate": 1.8085152521496627e-05, + "loss": 0.8166, + "step": 11800 + }, + { + "epoch": 0.4, + "learning_rate": 1.8078872469166864e-05, + "loss": 0.8676, + "step": 11820 + }, + { + "epoch": 0.4, + "learning_rate": 1.8072583229599004e-05, + "loss": 0.8732, + "step": 11840 + }, + { + "epoch": 0.4, + "learning_rate": 1.8066284809945128e-05, + "loss": 0.9253, + "step": 11860 + }, + { + "epoch": 0.4, + "learning_rate": 1.8059977217367753e-05, + "loss": 0.918, + "step": 11880 + }, + { + "epoch": 0.4, + "learning_rate": 1.8053660459039833e-05, + "loss": 0.8492, + "step": 11900 + }, + { + "epoch": 0.4, + "learning_rate": 1.8047334542144738e-05, + "loss": 0.8387, + "step": 11920 + }, + { + "epoch": 0.41, + "learning_rate": 1.8040999473876258e-05, + "loss": 0.8212, + "step": 11940 + }, + { + "epoch": 0.41, + "learning_rate": 1.8034655261438588e-05, + "loss": 0.8189, + "step": 11960 + }, + { + "epoch": 0.41, + "learning_rate": 1.8028301912046325e-05, + "loss": 0.8555, + "step": 11980 + }, + { + "epoch": 0.41, + "learning_rate": 1.8021939432924456e-05, + "loss": 0.8558, + "step": 12000 + }, + { + "epoch": 0.41, + "learning_rate": 1.8015567831308344e-05, + "loss": 0.935, + "step": 12020 + }, + { + "epoch": 0.41, + "learning_rate": 1.800918711444373e-05, + "loss": 0.8454, + "step": 12040 + }, + { + "epoch": 0.41, + "learning_rate": 1.8002797289586726e-05, + "loss": 0.8995, + "step": 12060 + }, + { + "epoch": 0.41, + "learning_rate": 1.7996398364003797e-05, + "loss": 0.8524, + "step": 12080 + }, + { + "epoch": 0.41, + "learning_rate": 1.7989990344971747e-05, + "loss": 0.8203, + "step": 12100 + }, + { + "epoch": 0.41, + "learning_rate": 1.7983573239777746e-05, + "loss": 0.8227, + "step": 12120 + }, + { + "epoch": 0.41, + "learning_rate": 1.797714705571927e-05, + "loss": 0.8572, + "step": 12140 + }, + { + "epoch": 0.41, + "learning_rate": 1.7970711800104138e-05, + "loss": 0.8715, + "step": 12160 + }, + { + "epoch": 0.41, + "learning_rate": 1.7964267480250476e-05, + "loss": 0.8814, + "step": 12180 + }, + { + "epoch": 0.41, + "learning_rate": 1.7957814103486716e-05, + "loss": 0.8674, + "step": 12200 + }, + { + "epoch": 0.41, + "learning_rate": 1.7951351677151604e-05, + "loss": 0.8933, + "step": 12220 + }, + { + "epoch": 0.42, + "learning_rate": 1.7944880208594156e-05, + "loss": 0.8717, + "step": 12240 + }, + { + "epoch": 0.42, + "learning_rate": 1.7938399705173687e-05, + "loss": 0.8722, + "step": 12260 + }, + { + "epoch": 0.42, + "learning_rate": 1.7931910174259777e-05, + "loss": 0.8855, + "step": 12280 + }, + { + "epoch": 0.42, + "learning_rate": 1.792541162323228e-05, + "loss": 0.8634, + "step": 12300 + }, + { + "epoch": 0.42, + "learning_rate": 1.7918904059481303e-05, + "loss": 0.9169, + "step": 12320 + }, + { + "epoch": 0.42, + "learning_rate": 1.79123874904072e-05, + "loss": 0.9152, + "step": 12340 + }, + { + "epoch": 0.42, + "learning_rate": 1.7905861923420568e-05, + "loss": 0.8336, + "step": 12360 + }, + { + "epoch": 0.42, + "learning_rate": 1.7899327365942242e-05, + "loss": 0.8865, + "step": 12380 + }, + { + "epoch": 0.42, + "learning_rate": 1.7892783825403272e-05, + "loss": 0.9224, + "step": 12400 + }, + { + "epoch": 0.42, + "learning_rate": 1.788623130924493e-05, + "loss": 0.899, + "step": 12420 + }, + { + "epoch": 0.42, + "learning_rate": 1.7879669824918685e-05, + "loss": 0.8966, + "step": 12440 + }, + { + "epoch": 0.42, + "learning_rate": 1.7873099379886217e-05, + "loss": 0.896, + "step": 12460 + }, + { + "epoch": 0.42, + "learning_rate": 1.7866519981619396e-05, + "loss": 0.9448, + "step": 12480 + }, + { + "epoch": 0.42, + "learning_rate": 1.7859931637600256e-05, + "loss": 0.8598, + "step": 12500 + }, + { + "epoch": 0.42, + "learning_rate": 1.7853334355321025e-05, + "loss": 0.8833, + "step": 12520 + }, + { + "epoch": 0.43, + "learning_rate": 1.784672814228409e-05, + "loss": 0.8542, + "step": 12540 + }, + { + "epoch": 0.43, + "learning_rate": 1.784011300600198e-05, + "loss": 0.8485, + "step": 12560 + }, + { + "epoch": 0.43, + "learning_rate": 1.783348895399739e-05, + "loss": 0.9221, + "step": 12580 + }, + { + "epoch": 0.43, + "learning_rate": 1.7826855993803148e-05, + "loss": 0.8561, + "step": 12600 + }, + { + "epoch": 0.43, + "learning_rate": 1.7820214132962205e-05, + "loss": 0.8166, + "step": 12620 + }, + { + "epoch": 0.43, + "learning_rate": 1.7813563379027644e-05, + "loss": 0.8632, + "step": 12640 + }, + { + "epoch": 0.43, + "learning_rate": 1.7806903739562652e-05, + "loss": 0.8326, + "step": 12660 + }, + { + "epoch": 0.43, + "learning_rate": 1.7800235222140525e-05, + "loss": 0.9188, + "step": 12680 + }, + { + "epoch": 0.43, + "learning_rate": 1.7793557834344662e-05, + "loss": 0.8959, + "step": 12700 + }, + { + "epoch": 0.43, + "learning_rate": 1.7786871583768536e-05, + "loss": 0.796, + "step": 12720 + }, + { + "epoch": 0.43, + "learning_rate": 1.7780176478015706e-05, + "loss": 0.7971, + "step": 12740 + }, + { + "epoch": 0.43, + "learning_rate": 1.7773472524699803e-05, + "loss": 0.9084, + "step": 12760 + }, + { + "epoch": 0.43, + "learning_rate": 1.7766759731444512e-05, + "loss": 0.8869, + "step": 12780 + }, + { + "epoch": 0.43, + "learning_rate": 1.776003810588358e-05, + "loss": 0.8769, + "step": 12800 + }, + { + "epoch": 0.44, + "learning_rate": 1.7753307655660792e-05, + "loss": 0.8849, + "step": 12820 + }, + { + "epoch": 0.44, + "learning_rate": 1.7746568388429966e-05, + "loss": 0.8451, + "step": 12840 + }, + { + "epoch": 0.44, + "learning_rate": 1.7739820311854957e-05, + "loss": 0.9419, + "step": 12860 + }, + { + "epoch": 0.44, + "learning_rate": 1.7733063433609623e-05, + "loss": 0.9293, + "step": 12880 + }, + { + "epoch": 0.44, + "learning_rate": 1.7726297761377847e-05, + "loss": 0.8162, + "step": 12900 + }, + { + "epoch": 0.44, + "learning_rate": 1.7719523302853498e-05, + "loss": 0.8825, + "step": 12920 + }, + { + "epoch": 0.44, + "learning_rate": 1.771274006574045e-05, + "loss": 0.8961, + "step": 12940 + }, + { + "epoch": 0.44, + "learning_rate": 1.7705948057752547e-05, + "loss": 0.9109, + "step": 12960 + }, + { + "epoch": 0.44, + "learning_rate": 1.769914728661362e-05, + "loss": 0.8448, + "step": 12980 + }, + { + "epoch": 0.44, + "learning_rate": 1.7692337760057454e-05, + "loss": 0.9286, + "step": 13000 + }, + { + "epoch": 0.44, + "learning_rate": 1.76855194858278e-05, + "loss": 0.9075, + "step": 13020 + }, + { + "epoch": 0.44, + "learning_rate": 1.767869247167835e-05, + "loss": 0.8712, + "step": 13040 + }, + { + "epoch": 0.44, + "learning_rate": 1.7671856725372737e-05, + "loss": 0.8144, + "step": 13060 + }, + { + "epoch": 0.44, + "learning_rate": 1.7665012254684522e-05, + "loss": 0.8475, + "step": 13080 + }, + { + "epoch": 0.44, + "learning_rate": 1.76581590673972e-05, + "loss": 0.9759, + "step": 13100 + }, + { + "epoch": 0.45, + "learning_rate": 1.7651297171304157e-05, + "loss": 0.821, + "step": 13120 + }, + { + "epoch": 0.45, + "learning_rate": 1.7644426574208698e-05, + "loss": 0.8912, + "step": 13140 + }, + { + "epoch": 0.45, + "learning_rate": 1.763754728392402e-05, + "loss": 0.9006, + "step": 13160 + }, + { + "epoch": 0.45, + "learning_rate": 1.7630659308273204e-05, + "loss": 0.87, + "step": 13180 + }, + { + "epoch": 0.45, + "learning_rate": 1.7623762655089208e-05, + "loss": 0.9281, + "step": 13200 + }, + { + "epoch": 0.45, + "learning_rate": 1.761685733221486e-05, + "loss": 0.8586, + "step": 13220 + }, + { + "epoch": 0.45, + "learning_rate": 1.7609943347502846e-05, + "loss": 0.8539, + "step": 13240 + }, + { + "epoch": 0.45, + "learning_rate": 1.7603020708815695e-05, + "loss": 0.8956, + "step": 13260 + }, + { + "epoch": 0.45, + "learning_rate": 1.7596089424025794e-05, + "loss": 0.9335, + "step": 13280 + }, + { + "epoch": 0.45, + "learning_rate": 1.7589149501015345e-05, + "loss": 0.8925, + "step": 13300 + }, + { + "epoch": 0.45, + "learning_rate": 1.758220094767638e-05, + "loss": 0.8832, + "step": 13320 + }, + { + "epoch": 0.45, + "learning_rate": 1.757524377191075e-05, + "loss": 0.8692, + "step": 13340 + }, + { + "epoch": 0.45, + "learning_rate": 1.7568277981630107e-05, + "loss": 0.9298, + "step": 13360 + }, + { + "epoch": 0.45, + "learning_rate": 1.7561303584755903e-05, + "loss": 0.9228, + "step": 13380 + }, + { + "epoch": 0.45, + "learning_rate": 1.7554320589219368e-05, + "loss": 0.8039, + "step": 13400 + }, + { + "epoch": 0.46, + "learning_rate": 1.754732900296151e-05, + "loss": 0.9091, + "step": 13420 + }, + { + "epoch": 0.46, + "learning_rate": 1.754032883393313e-05, + "loss": 0.9266, + "step": 13440 + }, + { + "epoch": 0.46, + "learning_rate": 1.7533320090094763e-05, + "loss": 0.8973, + "step": 13460 + }, + { + "epoch": 0.46, + "learning_rate": 1.7526302779416703e-05, + "loss": 0.8943, + "step": 13480 + }, + { + "epoch": 0.46, + "learning_rate": 1.7519276909878988e-05, + "loss": 0.9595, + "step": 13500 + }, + { + "epoch": 0.46, + "learning_rate": 1.751224248947139e-05, + "loss": 0.9303, + "step": 13520 + }, + { + "epoch": 0.46, + "learning_rate": 1.7505199526193398e-05, + "loss": 0.7938, + "step": 13540 + }, + { + "epoch": 0.46, + "learning_rate": 1.749814802805423e-05, + "loss": 0.8912, + "step": 13560 + }, + { + "epoch": 0.46, + "learning_rate": 1.7491088003072798e-05, + "loss": 0.908, + "step": 13580 + }, + { + "epoch": 0.46, + "learning_rate": 1.748401945927771e-05, + "loss": 0.9292, + "step": 13600 + }, + { + "epoch": 0.46, + "learning_rate": 1.7476942404707267e-05, + "loss": 0.904, + "step": 13620 + }, + { + "epoch": 0.46, + "learning_rate": 1.746985684740945e-05, + "loss": 0.8896, + "step": 13640 + }, + { + "epoch": 0.46, + "learning_rate": 1.74627627954419e-05, + "loss": 0.8471, + "step": 13660 + }, + { + "epoch": 0.46, + "learning_rate": 1.745566025687193e-05, + "loss": 0.8564, + "step": 13680 + }, + { + "epoch": 0.47, + "learning_rate": 1.74485492397765e-05, + "loss": 0.8961, + "step": 13700 + }, + { + "epoch": 0.47, + "learning_rate": 1.74414297522422e-05, + "loss": 0.8472, + "step": 13720 + }, + { + "epoch": 0.47, + "learning_rate": 1.7434301802365264e-05, + "loss": 0.8185, + "step": 13740 + }, + { + "epoch": 0.47, + "learning_rate": 1.7427165398251557e-05, + "loss": 0.8764, + "step": 13760 + }, + { + "epoch": 0.47, + "learning_rate": 1.7420020548016538e-05, + "loss": 0.912, + "step": 13780 + }, + { + "epoch": 0.47, + "learning_rate": 1.7412867259785286e-05, + "loss": 0.8544, + "step": 13800 + }, + { + "epoch": 0.47, + "learning_rate": 1.7405705541692476e-05, + "loss": 0.9203, + "step": 13820 + }, + { + "epoch": 0.47, + "learning_rate": 1.7398535401882353e-05, + "loss": 0.83, + "step": 13840 + }, + { + "epoch": 0.47, + "learning_rate": 1.739135684850876e-05, + "loss": 0.9542, + "step": 13860 + }, + { + "epoch": 0.47, + "learning_rate": 1.7384169889735094e-05, + "loss": 0.8845, + "step": 13880 + }, + { + "epoch": 0.47, + "learning_rate": 1.7376974533734317e-05, + "loss": 0.9061, + "step": 13900 + }, + { + "epoch": 0.47, + "learning_rate": 1.7369770788688937e-05, + "loss": 0.9571, + "step": 13920 + }, + { + "epoch": 0.47, + "learning_rate": 1.7362558662791003e-05, + "loss": 0.8479, + "step": 13940 + }, + { + "epoch": 0.47, + "learning_rate": 1.7355338164242098e-05, + "loss": 0.8484, + "step": 13960 + }, + { + "epoch": 0.47, + "learning_rate": 1.734810930125332e-05, + "loss": 0.862, + "step": 13980 + }, + { + "epoch": 0.48, + "learning_rate": 1.734087208204528e-05, + "loss": 0.8531, + "step": 14000 + }, + { + "epoch": 0.48, + "learning_rate": 1.73336265148481e-05, + "loss": 0.8025, + "step": 14020 + }, + { + "epoch": 0.48, + "learning_rate": 1.7326372607901388e-05, + "loss": 0.8492, + "step": 14040 + }, + { + "epoch": 0.48, + "learning_rate": 1.7319110369454232e-05, + "loss": 0.8061, + "step": 14060 + }, + { + "epoch": 0.48, + "learning_rate": 1.7311839807765204e-05, + "loss": 0.8537, + "step": 14080 + }, + { + "epoch": 0.48, + "learning_rate": 1.7304560931102334e-05, + "loss": 0.8739, + "step": 14100 + }, + { + "epoch": 0.48, + "learning_rate": 1.729727374774311e-05, + "loss": 0.8787, + "step": 14120 + }, + { + "epoch": 0.48, + "learning_rate": 1.7289978265974466e-05, + "loss": 0.8357, + "step": 14140 + }, + { + "epoch": 0.48, + "learning_rate": 1.728267449409278e-05, + "loss": 0.8385, + "step": 14160 + }, + { + "epoch": 0.48, + "learning_rate": 1.7275362440403844e-05, + "loss": 0.9139, + "step": 14180 + }, + { + "epoch": 0.48, + "learning_rate": 1.7268042113222877e-05, + "loss": 0.9294, + "step": 14200 + }, + { + "epoch": 0.48, + "learning_rate": 1.726071352087451e-05, + "loss": 0.9138, + "step": 14220 + }, + { + "epoch": 0.48, + "learning_rate": 1.725337667169276e-05, + "loss": 0.8542, + "step": 14240 + }, + { + "epoch": 0.48, + "learning_rate": 1.7246031574021047e-05, + "loss": 0.8184, + "step": 14260 + }, + { + "epoch": 0.48, + "learning_rate": 1.723867823621216e-05, + "loss": 0.8665, + "step": 14280 + }, + { + "epoch": 0.49, + "learning_rate": 1.7231316666628268e-05, + "loss": 0.8361, + "step": 14300 + }, + { + "epoch": 0.49, + "learning_rate": 1.7223946873640897e-05, + "loss": 0.8156, + "step": 14320 + }, + { + "epoch": 0.49, + "learning_rate": 1.7216568865630924e-05, + "loss": 0.8673, + "step": 14340 + }, + { + "epoch": 0.49, + "learning_rate": 1.720918265098857e-05, + "loss": 0.8501, + "step": 14360 + }, + { + "epoch": 0.49, + "learning_rate": 1.720178823811338e-05, + "loss": 0.8468, + "step": 14380 + }, + { + "epoch": 0.49, + "learning_rate": 1.7194385635414242e-05, + "loss": 0.8028, + "step": 14400 + }, + { + "epoch": 0.49, + "learning_rate": 1.718697485130934e-05, + "loss": 0.837, + "step": 14420 + }, + { + "epoch": 0.49, + "learning_rate": 1.717955589422616e-05, + "loss": 0.8196, + "step": 14440 + }, + { + "epoch": 0.49, + "learning_rate": 1.7172128772601497e-05, + "loss": 0.932, + "step": 14460 + }, + { + "epoch": 0.49, + "learning_rate": 1.7164693494881418e-05, + "loss": 0.85, + "step": 14480 + }, + { + "epoch": 0.49, + "learning_rate": 1.715725006952127e-05, + "loss": 0.8667, + "step": 14500 + }, + { + "epoch": 0.49, + "learning_rate": 1.7149798504985665e-05, + "loss": 0.8423, + "step": 14520 + }, + { + "epoch": 0.49, + "learning_rate": 1.7142338809748475e-05, + "loss": 0.9186, + "step": 14540 + }, + { + "epoch": 0.49, + "learning_rate": 1.7134870992292808e-05, + "loss": 0.8717, + "step": 14560 + }, + { + "epoch": 0.49, + "learning_rate": 1.712739506111102e-05, + "loss": 0.8056, + "step": 14580 + }, + { + "epoch": 0.5, + "learning_rate": 1.7119911024704684e-05, + "loss": 0.9074, + "step": 14600 + }, + { + "epoch": 0.5, + "learning_rate": 1.7112418891584598e-05, + "loss": 0.867, + "step": 14620 + }, + { + "epoch": 0.5, + "learning_rate": 1.7104918670270763e-05, + "loss": 0.8923, + "step": 14640 + }, + { + "epoch": 0.5, + "learning_rate": 1.7097410369292376e-05, + "loss": 0.915, + "step": 14660 + }, + { + "epoch": 0.5, + "learning_rate": 1.708989399718783e-05, + "loss": 0.9823, + "step": 14680 + }, + { + "epoch": 0.5, + "learning_rate": 1.708236956250469e-05, + "loss": 0.8399, + "step": 14700 + }, + { + "epoch": 0.5, + "learning_rate": 1.707483707379969e-05, + "loss": 0.8465, + "step": 14720 + }, + { + "epoch": 0.5, + "learning_rate": 1.706729653963872e-05, + "loss": 0.922, + "step": 14740 + }, + { + "epoch": 0.5, + "learning_rate": 1.7059747968596836e-05, + "loss": 0.8394, + "step": 14760 + }, + { + "epoch": 0.5, + "learning_rate": 1.705219136925821e-05, + "loss": 0.7721, + "step": 14780 + }, + { + "epoch": 0.5, + "learning_rate": 1.704462675021616e-05, + "loss": 0.7611, + "step": 14800 + }, + { + "epoch": 0.5, + "learning_rate": 1.7037054120073115e-05, + "loss": 0.8898, + "step": 14820 + }, + { + "epoch": 0.5, + "learning_rate": 1.7029473487440617e-05, + "loss": 0.8221, + "step": 14840 + }, + { + "epoch": 0.5, + "learning_rate": 1.7021884860939314e-05, + "loss": 0.9067, + "step": 14860 + }, + { + "epoch": 0.51, + "learning_rate": 1.7014288249198937e-05, + "loss": 0.8242, + "step": 14880 + }, + { + "epoch": 0.51, + "learning_rate": 1.70066836608583e-05, + "loss": 0.8687, + "step": 14900 + }, + { + "epoch": 0.51, + "learning_rate": 1.699907110456529e-05, + "loss": 0.917, + "step": 14920 + }, + { + "epoch": 0.51, + "learning_rate": 1.6991450588976853e-05, + "loss": 0.8522, + "step": 14940 + }, + { + "epoch": 0.51, + "learning_rate": 1.698382212275899e-05, + "loss": 0.8023, + "step": 14960 + }, + { + "epoch": 0.51, + "learning_rate": 1.697618571458673e-05, + "loss": 0.8668, + "step": 14980 + }, + { + "epoch": 0.51, + "learning_rate": 1.6968541373144157e-05, + "loss": 0.8549, + "step": 15000 + }, + { + "epoch": 0.51, + "learning_rate": 1.696088910712435e-05, + "loss": 0.851, + "step": 15020 + }, + { + "epoch": 0.51, + "learning_rate": 1.695322892522943e-05, + "loss": 0.9246, + "step": 15040 + }, + { + "epoch": 0.51, + "learning_rate": 1.6945560836170485e-05, + "loss": 0.9117, + "step": 15060 + }, + { + "epoch": 0.51, + "learning_rate": 1.6937884848667625e-05, + "loss": 0.8651, + "step": 15080 + }, + { + "epoch": 0.51, + "learning_rate": 1.6930200971449923e-05, + "loss": 0.8053, + "step": 15100 + }, + { + "epoch": 0.51, + "learning_rate": 1.692250921325544e-05, + "loss": 0.8642, + "step": 15120 + }, + { + "epoch": 0.51, + "learning_rate": 1.691480958283118e-05, + "loss": 0.7826, + "step": 15140 + }, + { + "epoch": 0.51, + "learning_rate": 1.6907102088933118e-05, + "loss": 0.8057, + "step": 15160 + }, + { + "epoch": 0.52, + "learning_rate": 1.6899386740326164e-05, + "loss": 0.8133, + "step": 15180 + }, + { + "epoch": 0.52, + "learning_rate": 1.6891663545784146e-05, + "loss": 0.8651, + "step": 15200 + }, + { + "epoch": 0.52, + "learning_rate": 1.6883932514089842e-05, + "loss": 0.8253, + "step": 15220 + }, + { + "epoch": 0.52, + "learning_rate": 1.6876193654034922e-05, + "loss": 0.9001, + "step": 15240 + }, + { + "epoch": 0.52, + "learning_rate": 1.6868446974419962e-05, + "loss": 0.8583, + "step": 15260 + }, + { + "epoch": 0.52, + "learning_rate": 1.686069248405444e-05, + "loss": 0.8354, + "step": 15280 + }, + { + "epoch": 0.52, + "learning_rate": 1.68529301917567e-05, + "loss": 0.8794, + "step": 15300 + }, + { + "epoch": 0.52, + "learning_rate": 1.6845160106353975e-05, + "loss": 0.8645, + "step": 15320 + }, + { + "epoch": 0.52, + "learning_rate": 1.6837382236682347e-05, + "loss": 0.8513, + "step": 15340 + }, + { + "epoch": 0.52, + "learning_rate": 1.682959659158676e-05, + "loss": 0.8559, + "step": 15360 + }, + { + "epoch": 0.52, + "learning_rate": 1.6821803179920996e-05, + "loss": 0.7707, + "step": 15380 + }, + { + "epoch": 0.52, + "learning_rate": 1.681400201054766e-05, + "loss": 0.7838, + "step": 15400 + }, + { + "epoch": 0.52, + "learning_rate": 1.68061930923382e-05, + "loss": 0.8378, + "step": 15420 + }, + { + "epoch": 0.52, + "learning_rate": 1.6798376434172862e-05, + "loss": 0.8646, + "step": 15440 + }, + { + "epoch": 0.52, + "learning_rate": 1.6790552044940692e-05, + "loss": 0.8471, + "step": 15460 + }, + { + "epoch": 0.53, + "learning_rate": 1.678271993353953e-05, + "loss": 0.8338, + "step": 15480 + }, + { + "epoch": 0.53, + "learning_rate": 1.6774880108876005e-05, + "loss": 0.8486, + "step": 15500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6767032579865503e-05, + "loss": 0.8262, + "step": 15520 + }, + { + "epoch": 0.53, + "learning_rate": 1.6759177355432186e-05, + "loss": 0.8425, + "step": 15540 + }, + { + "epoch": 0.53, + "learning_rate": 1.6751314444508967e-05, + "loss": 0.8316, + "step": 15560 + }, + { + "epoch": 0.53, + "learning_rate": 1.674344385603748e-05, + "loss": 0.8687, + "step": 15580 + }, + { + "epoch": 0.53, + "learning_rate": 1.6735565598968114e-05, + "loss": 0.8748, + "step": 15600 + }, + { + "epoch": 0.53, + "learning_rate": 1.672767968225997e-05, + "loss": 0.8936, + "step": 15620 + }, + { + "epoch": 0.53, + "learning_rate": 1.6719786114880846e-05, + "loss": 0.8964, + "step": 15640 + }, + { + "epoch": 0.53, + "learning_rate": 1.6711884905807264e-05, + "loss": 0.8166, + "step": 15660 + }, + { + "epoch": 0.53, + "learning_rate": 1.670397606402442e-05, + "loss": 0.8327, + "step": 15680 + }, + { + "epoch": 0.53, + "learning_rate": 1.66960595985262e-05, + "loss": 0.8051, + "step": 15700 + }, + { + "epoch": 0.53, + "learning_rate": 1.6688135518315144e-05, + "loss": 0.8647, + "step": 15720 + }, + { + "epoch": 0.53, + "learning_rate": 1.668020383240247e-05, + "loss": 0.8963, + "step": 15740 + }, + { + "epoch": 0.53, + "learning_rate": 1.6672264549808033e-05, + "loss": 0.8542, + "step": 15760 + }, + { + "epoch": 0.54, + "learning_rate": 1.6664317679560333e-05, + "loss": 0.8557, + "step": 15780 + }, + { + "epoch": 0.54, + "learning_rate": 1.665636323069649e-05, + "loss": 0.8623, + "step": 15800 + }, + { + "epoch": 0.54, + "learning_rate": 1.6648401212262253e-05, + "loss": 0.8469, + "step": 15820 + }, + { + "epoch": 0.54, + "learning_rate": 1.6640431633311975e-05, + "loss": 0.8281, + "step": 15840 + }, + { + "epoch": 0.54, + "learning_rate": 1.6632454502908605e-05, + "loss": 0.9213, + "step": 15860 + }, + { + "epoch": 0.54, + "learning_rate": 1.662446983012368e-05, + "loss": 0.8172, + "step": 15880 + }, + { + "epoch": 0.54, + "learning_rate": 1.6616477624037317e-05, + "loss": 0.9342, + "step": 15900 + }, + { + "epoch": 0.54, + "learning_rate": 1.6608477893738197e-05, + "loss": 0.8882, + "step": 15920 + }, + { + "epoch": 0.54, + "learning_rate": 1.6600470648323554e-05, + "loss": 0.8612, + "step": 15940 + }, + { + "epoch": 0.54, + "learning_rate": 1.6592455896899177e-05, + "loss": 0.7878, + "step": 15960 + }, + { + "epoch": 0.54, + "learning_rate": 1.658443364857939e-05, + "loss": 0.8418, + "step": 15980 + }, + { + "epoch": 0.54, + "learning_rate": 1.6576403912487026e-05, + "loss": 0.86, + "step": 16000 + }, + { + "epoch": 0.54, + "learning_rate": 1.656836669775346e-05, + "loss": 0.9031, + "step": 16020 + }, + { + "epoch": 0.54, + "learning_rate": 1.6560322013518542e-05, + "loss": 0.7993, + "step": 16040 + }, + { + "epoch": 0.55, + "learning_rate": 1.6552269868930646e-05, + "loss": 0.9313, + "step": 16060 + }, + { + "epoch": 0.55, + "learning_rate": 1.6544210273146608e-05, + "loss": 0.8022, + "step": 16080 + }, + { + "epoch": 0.55, + "learning_rate": 1.6536143235331743e-05, + "loss": 0.8633, + "step": 16100 + }, + { + "epoch": 0.55, + "learning_rate": 1.6528068764659838e-05, + "loss": 0.8447, + "step": 16120 + }, + { + "epoch": 0.55, + "learning_rate": 1.651998687031312e-05, + "loss": 0.8093, + "step": 16140 + }, + { + "epoch": 0.55, + "learning_rate": 1.6511897561482263e-05, + "loss": 0.8275, + "step": 16160 + }, + { + "epoch": 0.55, + "learning_rate": 1.6503800847366373e-05, + "loss": 1.0032, + "step": 16180 + }, + { + "epoch": 0.55, + "learning_rate": 1.6495696737172982e-05, + "loss": 0.8871, + "step": 16200 + }, + { + "epoch": 0.55, + "learning_rate": 1.6487585240118025e-05, + "loss": 0.8384, + "step": 16220 + }, + { + "epoch": 0.55, + "learning_rate": 1.647946636542584e-05, + "loss": 0.8278, + "step": 16240 + }, + { + "epoch": 0.55, + "learning_rate": 1.6471340122329158e-05, + "loss": 0.8858, + "step": 16260 + }, + { + "epoch": 0.55, + "learning_rate": 1.6463206520069083e-05, + "loss": 0.8531, + "step": 16280 + }, + { + "epoch": 0.55, + "learning_rate": 1.6455065567895098e-05, + "loss": 0.8399, + "step": 16300 + }, + { + "epoch": 0.55, + "learning_rate": 1.6446917275065032e-05, + "loss": 0.8538, + "step": 16320 + }, + { + "epoch": 0.55, + "learning_rate": 1.643876165084507e-05, + "loss": 0.7951, + "step": 16340 + }, + { + "epoch": 0.56, + "learning_rate": 1.6430598704509723e-05, + "loss": 0.8208, + "step": 16360 + }, + { + "epoch": 0.56, + "learning_rate": 1.642242844534185e-05, + "loss": 0.8307, + "step": 16380 + }, + { + "epoch": 0.56, + "learning_rate": 1.6414250882632607e-05, + "loss": 0.8354, + "step": 16400 + }, + { + "epoch": 0.56, + "learning_rate": 1.6406066025681462e-05, + "loss": 0.9212, + "step": 16420 + }, + { + "epoch": 0.56, + "learning_rate": 1.6397873883796183e-05, + "loss": 0.8434, + "step": 16440 + }, + { + "epoch": 0.56, + "learning_rate": 1.6389674466292812e-05, + "loss": 0.8562, + "step": 16460 + }, + { + "epoch": 0.56, + "learning_rate": 1.638146778249567e-05, + "loss": 0.8936, + "step": 16480 + }, + { + "epoch": 0.56, + "learning_rate": 1.637325384173734e-05, + "loss": 0.833, + "step": 16500 + }, + { + "epoch": 0.56, + "learning_rate": 1.6365032653358665e-05, + "loss": 0.7673, + "step": 16520 + }, + { + "epoch": 0.56, + "learning_rate": 1.635680422670872e-05, + "loss": 0.8474, + "step": 16540 + }, + { + "epoch": 0.56, + "learning_rate": 1.6348568571144816e-05, + "loss": 0.9436, + "step": 16560 + }, + { + "epoch": 0.56, + "learning_rate": 1.634032569603248e-05, + "loss": 0.8744, + "step": 16580 + }, + { + "epoch": 0.56, + "learning_rate": 1.633207561074546e-05, + "loss": 0.8673, + "step": 16600 + }, + { + "epoch": 0.56, + "learning_rate": 1.6323818324665693e-05, + "loss": 0.827, + "step": 16620 + }, + { + "epoch": 0.56, + "learning_rate": 1.63155538471833e-05, + "loss": 0.8584, + "step": 16640 + }, + { + "epoch": 0.57, + "learning_rate": 1.6307282187696602e-05, + "loss": 0.8387, + "step": 16660 + }, + { + "epoch": 0.57, + "learning_rate": 1.629900335561206e-05, + "loss": 0.8471, + "step": 16680 + }, + { + "epoch": 0.57, + "learning_rate": 1.6290717360344312e-05, + "loss": 0.7533, + "step": 16700 + }, + { + "epoch": 0.57, + "learning_rate": 1.6282424211316134e-05, + "loss": 0.8486, + "step": 16720 + }, + { + "epoch": 0.57, + "learning_rate": 1.6274123917958438e-05, + "loss": 0.8284, + "step": 16740 + }, + { + "epoch": 0.57, + "learning_rate": 1.626581648971026e-05, + "loss": 0.8575, + "step": 16760 + }, + { + "epoch": 0.57, + "learning_rate": 1.625750193601875e-05, + "loss": 0.8387, + "step": 16780 + }, + { + "epoch": 0.57, + "learning_rate": 1.624918026633916e-05, + "loss": 0.8599, + "step": 16800 + }, + { + "epoch": 0.57, + "learning_rate": 1.6240851490134834e-05, + "loss": 0.8463, + "step": 16820 + }, + { + "epoch": 0.57, + "learning_rate": 1.6232515616877204e-05, + "loss": 0.8325, + "step": 16840 + }, + { + "epoch": 0.57, + "learning_rate": 1.6224172656045767e-05, + "loss": 0.8539, + "step": 16860 + }, + { + "epoch": 0.57, + "learning_rate": 1.6215822617128075e-05, + "loss": 0.8314, + "step": 16880 + }, + { + "epoch": 0.57, + "learning_rate": 1.620746550961974e-05, + "loss": 0.8184, + "step": 16900 + }, + { + "epoch": 0.57, + "learning_rate": 1.6199101343024405e-05, + "loss": 0.838, + "step": 16920 + }, + { + "epoch": 0.58, + "learning_rate": 1.619073012685374e-05, + "loss": 0.8426, + "step": 16940 + }, + { + "epoch": 0.58, + "learning_rate": 1.618235187062744e-05, + "loss": 0.7958, + "step": 16960 + }, + { + "epoch": 0.58, + "learning_rate": 1.6173966583873195e-05, + "loss": 0.8954, + "step": 16980 + }, + { + "epoch": 0.58, + "learning_rate": 1.6165574276126702e-05, + "loss": 0.8166, + "step": 17000 + }, + { + "epoch": 0.58, + "learning_rate": 1.6157174956931627e-05, + "loss": 0.8335, + "step": 17020 + }, + { + "epoch": 0.58, + "learning_rate": 1.6148768635839623e-05, + "loss": 0.8346, + "step": 17040 + }, + { + "epoch": 0.58, + "learning_rate": 1.6140355322410304e-05, + "loss": 0.871, + "step": 17060 + }, + { + "epoch": 0.58, + "learning_rate": 1.6131935026211224e-05, + "loss": 0.8357, + "step": 17080 + }, + { + "epoch": 0.58, + "learning_rate": 1.612350775681789e-05, + "loss": 0.802, + "step": 17100 + }, + { + "epoch": 0.58, + "learning_rate": 1.611507352381373e-05, + "loss": 0.8244, + "step": 17120 + }, + { + "epoch": 0.58, + "learning_rate": 1.6106632336790105e-05, + "loss": 0.7501, + "step": 17140 + }, + { + "epoch": 0.58, + "learning_rate": 1.609818420534627e-05, + "loss": 0.792, + "step": 17160 + }, + { + "epoch": 0.58, + "learning_rate": 1.608972913908938e-05, + "loss": 0.7854, + "step": 17180 + }, + { + "epoch": 0.58, + "learning_rate": 1.608126714763448e-05, + "loss": 0.8522, + "step": 17200 + }, + { + "epoch": 0.58, + "learning_rate": 1.6072798240604487e-05, + "loss": 0.8495, + "step": 17220 + }, + { + "epoch": 0.59, + "learning_rate": 1.6064322427630186e-05, + "loss": 0.9158, + "step": 17240 + }, + { + "epoch": 0.59, + "learning_rate": 1.6055839718350207e-05, + "loss": 0.8612, + "step": 17260 + }, + { + "epoch": 0.59, + "learning_rate": 1.6047350122411038e-05, + "loss": 0.8484, + "step": 17280 + }, + { + "epoch": 0.59, + "learning_rate": 1.6038853649466983e-05, + "loss": 0.8433, + "step": 17300 + }, + { + "epoch": 0.59, + "learning_rate": 1.6030350309180173e-05, + "loss": 0.8917, + "step": 17320 + }, + { + "epoch": 0.59, + "learning_rate": 1.6021840111220548e-05, + "loss": 0.7789, + "step": 17340 + }, + { + "epoch": 0.59, + "learning_rate": 1.6013323065265848e-05, + "loss": 0.8575, + "step": 17360 + }, + { + "epoch": 0.59, + "learning_rate": 1.60047991810016e-05, + "loss": 0.812, + "step": 17380 + }, + { + "epoch": 0.59, + "learning_rate": 1.59962684681211e-05, + "loss": 0.828, + "step": 17400 + }, + { + "epoch": 0.59, + "learning_rate": 1.5987730936325424e-05, + "loss": 0.8474, + "step": 17420 + }, + { + "epoch": 0.59, + "learning_rate": 1.597918659532339e-05, + "loss": 0.8686, + "step": 17440 + }, + { + "epoch": 0.59, + "learning_rate": 1.5970635454831565e-05, + "loss": 0.7824, + "step": 17460 + }, + { + "epoch": 0.59, + "learning_rate": 1.5962077524574248e-05, + "loss": 0.9101, + "step": 17480 + }, + { + "epoch": 0.59, + "learning_rate": 1.595351281428345e-05, + "loss": 0.8496, + "step": 17500 + }, + { + "epoch": 0.59, + "learning_rate": 1.5944941333698912e-05, + "loss": 0.8163, + "step": 17520 + }, + { + "epoch": 0.6, + "learning_rate": 1.5936363092568058e-05, + "loss": 0.854, + "step": 17540 + }, + { + "epoch": 0.6, + "learning_rate": 1.5927778100646007e-05, + "loss": 0.8085, + "step": 17560 + }, + { + "epoch": 0.6, + "learning_rate": 1.5919186367695546e-05, + "loss": 0.8276, + "step": 17580 + }, + { + "epoch": 0.6, + "learning_rate": 1.5910587903487142e-05, + "loss": 0.8778, + "step": 17600 + }, + { + "epoch": 0.6, + "learning_rate": 1.5901982717798903e-05, + "loss": 0.8123, + "step": 17620 + }, + { + "epoch": 0.6, + "learning_rate": 1.5893370820416594e-05, + "loss": 0.8197, + "step": 17640 + }, + { + "epoch": 0.6, + "learning_rate": 1.5884752221133597e-05, + "loss": 0.8753, + "step": 17660 + }, + { + "epoch": 0.6, + "learning_rate": 1.587612692975093e-05, + "loss": 0.837, + "step": 17680 + }, + { + "epoch": 0.6, + "learning_rate": 1.5867494956077215e-05, + "loss": 0.7968, + "step": 17700 + }, + { + "epoch": 0.6, + "learning_rate": 1.585885630992867e-05, + "loss": 0.8335, + "step": 17720 + }, + { + "epoch": 0.6, + "learning_rate": 1.58502110011291e-05, + "loss": 0.8233, + "step": 17740 + }, + { + "epoch": 0.6, + "learning_rate": 1.5841559039509898e-05, + "loss": 0.8636, + "step": 17760 + }, + { + "epoch": 0.6, + "learning_rate": 1.5832900434910014e-05, + "loss": 0.7803, + "step": 17780 + }, + { + "epoch": 0.6, + "learning_rate": 1.5824235197175954e-05, + "loss": 0.8154, + "step": 17800 + }, + { + "epoch": 0.6, + "learning_rate": 1.5815563336161756e-05, + "loss": 0.8779, + "step": 17820 + }, + { + "epoch": 0.61, + "learning_rate": 1.580688486172902e-05, + "loss": 0.8222, + "step": 17840 + }, + { + "epoch": 0.61, + "learning_rate": 1.5798199783746828e-05, + "loss": 0.8423, + "step": 17860 + }, + { + "epoch": 0.61, + "learning_rate": 1.5789508112091804e-05, + "loss": 0.7625, + "step": 17880 + }, + { + "epoch": 0.61, + "learning_rate": 1.578080985664805e-05, + "loss": 0.8077, + "step": 17900 + }, + { + "epoch": 0.61, + "learning_rate": 1.5772105027307168e-05, + "loss": 0.8791, + "step": 17920 + }, + { + "epoch": 0.61, + "learning_rate": 1.5763393633968223e-05, + "loss": 0.8595, + "step": 17940 + }, + { + "epoch": 0.61, + "learning_rate": 1.5754675686537756e-05, + "loss": 0.8115, + "step": 17960 + }, + { + "epoch": 0.61, + "learning_rate": 1.5745951194929756e-05, + "loss": 0.8787, + "step": 17980 + }, + { + "epoch": 0.61, + "learning_rate": 1.5737220169065656e-05, + "loss": 0.7391, + "step": 18000 + }, + { + "epoch": 0.61, + "learning_rate": 1.5728482618874314e-05, + "loss": 0.7967, + "step": 18020 + }, + { + "epoch": 0.61, + "learning_rate": 1.5719738554292017e-05, + "loss": 0.7753, + "step": 18040 + }, + { + "epoch": 0.61, + "learning_rate": 1.571098798526245e-05, + "loss": 0.8517, + "step": 18060 + }, + { + "epoch": 0.61, + "learning_rate": 1.5702230921736705e-05, + "loss": 0.7999, + "step": 18080 + }, + { + "epoch": 0.61, + "learning_rate": 1.569346737367325e-05, + "loss": 0.8186, + "step": 18100 + }, + { + "epoch": 0.62, + "learning_rate": 1.5684697351037935e-05, + "loss": 0.8217, + "step": 18120 + }, + { + "epoch": 0.62, + "learning_rate": 1.5675920863803967e-05, + "loss": 0.8555, + "step": 18140 + }, + { + "epoch": 0.62, + "learning_rate": 1.566713792195191e-05, + "loss": 0.8466, + "step": 18160 + }, + { + "epoch": 0.62, + "learning_rate": 1.565834853546966e-05, + "loss": 0.8224, + "step": 18180 + }, + { + "epoch": 0.62, + "learning_rate": 1.5649552714352448e-05, + "loss": 0.8254, + "step": 18200 + }, + { + "epoch": 0.62, + "learning_rate": 1.5640750468602822e-05, + "loss": 0.8414, + "step": 18220 + }, + { + "epoch": 0.62, + "learning_rate": 1.563194180823064e-05, + "loss": 0.7856, + "step": 18240 + }, + { + "epoch": 0.62, + "learning_rate": 1.562312674325304e-05, + "loss": 0.9037, + "step": 18260 + }, + { + "epoch": 0.62, + "learning_rate": 1.5614305283694462e-05, + "loss": 0.8079, + "step": 18280 + }, + { + "epoch": 0.62, + "learning_rate": 1.56054774395866e-05, + "loss": 0.8576, + "step": 18300 + }, + { + "epoch": 0.62, + "learning_rate": 1.559664322096843e-05, + "loss": 0.8414, + "step": 18320 + }, + { + "epoch": 0.62, + "learning_rate": 1.5587802637886156e-05, + "loss": 0.829, + "step": 18340 + }, + { + "epoch": 0.62, + "learning_rate": 1.557895570039323e-05, + "loss": 0.8201, + "step": 18360 + }, + { + "epoch": 0.62, + "learning_rate": 1.5570102418550327e-05, + "loss": 0.7774, + "step": 18380 + }, + { + "epoch": 0.62, + "learning_rate": 1.5561242802425337e-05, + "loss": 0.8544, + "step": 18400 + }, + { + "epoch": 0.63, + "learning_rate": 1.5552376862093357e-05, + "loss": 0.8639, + "step": 18420 + }, + { + "epoch": 0.63, + "learning_rate": 1.554350460763667e-05, + "loss": 0.8273, + "step": 18440 + }, + { + "epoch": 0.63, + "learning_rate": 1.5534626049144753e-05, + "loss": 0.7759, + "step": 18460 + }, + { + "epoch": 0.63, + "learning_rate": 1.552574119671423e-05, + "loss": 0.8248, + "step": 18480 + }, + { + "epoch": 0.63, + "learning_rate": 1.55168500604489e-05, + "loss": 0.8449, + "step": 18500 + }, + { + "epoch": 0.63, + "learning_rate": 1.5507952650459697e-05, + "loss": 0.7627, + "step": 18520 + }, + { + "epoch": 0.63, + "learning_rate": 1.54990489768647e-05, + "loss": 0.7437, + "step": 18540 + }, + { + "epoch": 0.63, + "learning_rate": 1.5490139049789097e-05, + "loss": 0.8273, + "step": 18560 + }, + { + "epoch": 0.63, + "learning_rate": 1.5481222879365205e-05, + "loss": 0.8037, + "step": 18580 + }, + { + "epoch": 0.63, + "learning_rate": 1.5472300475732427e-05, + "loss": 0.7735, + "step": 18600 + }, + { + "epoch": 0.63, + "learning_rate": 1.546337184903726e-05, + "loss": 0.8053, + "step": 18620 + }, + { + "epoch": 0.63, + "learning_rate": 1.5454437009433277e-05, + "loss": 0.8705, + "step": 18640 + }, + { + "epoch": 0.63, + "learning_rate": 1.5445495967081114e-05, + "loss": 0.8378, + "step": 18660 + }, + { + "epoch": 0.63, + "learning_rate": 1.543654873214846e-05, + "loss": 0.8337, + "step": 18680 + }, + { + "epoch": 0.63, + "learning_rate": 1.542759531481006e-05, + "loss": 0.766, + "step": 18700 + }, + { + "epoch": 0.64, + "learning_rate": 1.5418635725247666e-05, + "loss": 0.8558, + "step": 18720 + }, + { + "epoch": 0.64, + "learning_rate": 1.540966997365007e-05, + "loss": 0.8342, + "step": 18740 + }, + { + "epoch": 0.64, + "learning_rate": 1.540069807021306e-05, + "loss": 0.8172, + "step": 18760 + }, + { + "epoch": 0.64, + "learning_rate": 1.539172002513942e-05, + "loss": 0.8388, + "step": 18780 + }, + { + "epoch": 0.64, + "learning_rate": 1.538273584863893e-05, + "loss": 0.8054, + "step": 18800 + }, + { + "epoch": 0.64, + "learning_rate": 1.537374555092832e-05, + "loss": 0.846, + "step": 18820 + }, + { + "epoch": 0.64, + "learning_rate": 1.5364749142231302e-05, + "loss": 0.8163, + "step": 18840 + }, + { + "epoch": 0.64, + "learning_rate": 1.5355746632778533e-05, + "loss": 0.8236, + "step": 18860 + }, + { + "epoch": 0.64, + "learning_rate": 1.5346738032807603e-05, + "loss": 0.7648, + "step": 18880 + }, + { + "epoch": 0.64, + "learning_rate": 1.5337723352563028e-05, + "loss": 0.8125, + "step": 18900 + }, + { + "epoch": 0.64, + "learning_rate": 1.5328702602296238e-05, + "loss": 0.8316, + "step": 18920 + }, + { + "epoch": 0.64, + "learning_rate": 1.531967579226557e-05, + "loss": 0.8042, + "step": 18940 + }, + { + "epoch": 0.64, + "learning_rate": 1.5310642932736253e-05, + "loss": 0.8454, + "step": 18960 + }, + { + "epoch": 0.64, + "learning_rate": 1.5301604033980387e-05, + "loss": 0.7541, + "step": 18980 + }, + { + "epoch": 0.64, + "learning_rate": 1.529255910627695e-05, + "loss": 0.8904, + "step": 19000 + }, + { + "epoch": 0.65, + "learning_rate": 1.5283508159911776e-05, + "loss": 0.7874, + "step": 19020 + }, + { + "epoch": 0.65, + "learning_rate": 1.5274451205177526e-05, + "loss": 0.7914, + "step": 19040 + }, + { + "epoch": 0.65, + "learning_rate": 1.5265388252373712e-05, + "loss": 0.8695, + "step": 19060 + }, + { + "epoch": 0.65, + "learning_rate": 1.525631931180667e-05, + "loss": 0.8793, + "step": 19080 + }, + { + "epoch": 0.65, + "learning_rate": 1.524724439378953e-05, + "loss": 0.8213, + "step": 19100 + }, + { + "epoch": 0.65, + "learning_rate": 1.5238163508642219e-05, + "loss": 0.8038, + "step": 19120 + }, + { + "epoch": 0.65, + "learning_rate": 1.5229076666691471e-05, + "loss": 0.799, + "step": 19140 + }, + { + "epoch": 0.65, + "learning_rate": 1.5219983878270767e-05, + "loss": 0.8665, + "step": 19160 + }, + { + "epoch": 0.65, + "learning_rate": 1.5210885153720374e-05, + "loss": 0.8425, + "step": 19180 + }, + { + "epoch": 0.65, + "learning_rate": 1.520178050338729e-05, + "loss": 0.8568, + "step": 19200 + }, + { + "epoch": 0.65, + "learning_rate": 1.519266993762527e-05, + "loss": 0.8743, + "step": 19220 + }, + { + "epoch": 0.65, + "learning_rate": 1.5183553466794778e-05, + "loss": 0.8136, + "step": 19240 + }, + { + "epoch": 0.65, + "learning_rate": 1.5174431101263004e-05, + "loss": 0.848, + "step": 19260 + }, + { + "epoch": 0.65, + "learning_rate": 1.5165302851403843e-05, + "loss": 0.8114, + "step": 19280 + }, + { + "epoch": 0.66, + "learning_rate": 1.5156168727597873e-05, + "loss": 0.8329, + "step": 19300 + }, + { + "epoch": 0.66, + "learning_rate": 1.5147028740232361e-05, + "loss": 0.7901, + "step": 19320 + }, + { + "epoch": 0.66, + "learning_rate": 1.5137882899701234e-05, + "loss": 0.8436, + "step": 19340 + }, + { + "epoch": 0.66, + "learning_rate": 1.5128731216405084e-05, + "loss": 0.864, + "step": 19360 + }, + { + "epoch": 0.66, + "learning_rate": 1.5119573700751133e-05, + "loss": 0.8219, + "step": 19380 + }, + { + "epoch": 0.66, + "learning_rate": 1.5110410363153256e-05, + "loss": 0.8108, + "step": 19400 + }, + { + "epoch": 0.66, + "learning_rate": 1.5101241214031928e-05, + "loss": 0.8013, + "step": 19420 + }, + { + "epoch": 0.66, + "learning_rate": 1.5092066263814245e-05, + "loss": 0.8176, + "step": 19440 + }, + { + "epoch": 0.66, + "learning_rate": 1.5082885522933901e-05, + "loss": 0.8276, + "step": 19460 + }, + { + "epoch": 0.66, + "learning_rate": 1.5073699001831168e-05, + "loss": 0.7847, + "step": 19480 + }, + { + "epoch": 0.66, + "learning_rate": 1.506450671095289e-05, + "loss": 0.798, + "step": 19500 + }, + { + "epoch": 0.66, + "learning_rate": 1.5055308660752486e-05, + "loss": 0.8289, + "step": 19520 + }, + { + "epoch": 0.66, + "learning_rate": 1.5046104861689911e-05, + "loss": 0.8235, + "step": 19540 + }, + { + "epoch": 0.66, + "learning_rate": 1.5036895324231662e-05, + "loss": 0.8415, + "step": 19560 + }, + { + "epoch": 0.66, + "learning_rate": 1.5027680058850757e-05, + "loss": 0.8438, + "step": 19580 + }, + { + "epoch": 0.67, + "learning_rate": 1.501845907602674e-05, + "loss": 0.7979, + "step": 19600 + }, + { + "epoch": 0.67, + "learning_rate": 1.5009232386245644e-05, + "loss": 0.8477, + "step": 19620 + }, + { + "epoch": 0.67, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.7946, + "step": 19640 + }, + { + "epoch": 0.67, + "learning_rate": 1.4990761927788815e-05, + "loss": 0.827, + "step": 19660 + }, + { + "epoch": 0.67, + "learning_rate": 1.4981518180117558e-05, + "loss": 0.8489, + "step": 19680 + }, + { + "epoch": 0.67, + "learning_rate": 1.497226876749815e-05, + "loss": 0.8442, + "step": 19700 + }, + { + "epoch": 0.67, + "learning_rate": 1.4963013700448969e-05, + "loss": 0.7982, + "step": 19720 + }, + { + "epoch": 0.67, + "learning_rate": 1.4953752989494815e-05, + "loss": 0.7978, + "step": 19740 + }, + { + "epoch": 0.67, + "learning_rate": 1.4944486645166892e-05, + "loss": 0.8114, + "step": 19760 + }, + { + "epoch": 0.67, + "learning_rate": 1.4935214678002834e-05, + "loss": 0.8342, + "step": 19780 + }, + { + "epoch": 0.67, + "learning_rate": 1.4925937098546652e-05, + "loss": 0.7771, + "step": 19800 + }, + { + "epoch": 0.67, + "learning_rate": 1.4916653917348751e-05, + "loss": 0.9235, + "step": 19820 + }, + { + "epoch": 0.67, + "learning_rate": 1.4907365144965896e-05, + "loss": 0.7855, + "step": 19840 + }, + { + "epoch": 0.67, + "learning_rate": 1.4898070791961215e-05, + "loss": 0.8699, + "step": 19860 + }, + { + "epoch": 0.67, + "learning_rate": 1.4888770868904185e-05, + "loss": 0.8658, + "step": 19880 + }, + { + "epoch": 0.68, + "learning_rate": 1.4879465386370613e-05, + "loss": 0.8394, + "step": 19900 + }, + { + "epoch": 0.68, + "learning_rate": 1.487015435494263e-05, + "loss": 0.8058, + "step": 19920 + }, + { + "epoch": 0.68, + "learning_rate": 1.4860837785208678e-05, + "loss": 0.8074, + "step": 19940 + }, + { + "epoch": 0.68, + "learning_rate": 1.4851515687763492e-05, + "loss": 0.8149, + "step": 19960 + }, + { + "epoch": 0.68, + "learning_rate": 1.48421880732081e-05, + "loss": 0.7852, + "step": 19980 + }, + { + "epoch": 0.68, + "learning_rate": 1.48328549521498e-05, + "loss": 0.8215, + "step": 20000 + }, + { + "epoch": 0.68, + "learning_rate": 1.4823516335202152e-05, + "loss": 0.8319, + "step": 20020 + }, + { + "epoch": 0.68, + "learning_rate": 1.4814172232984968e-05, + "loss": 0.799, + "step": 20040 + }, + { + "epoch": 0.68, + "learning_rate": 1.4804822656124302e-05, + "loss": 0.8033, + "step": 20060 + }, + { + "epoch": 0.68, + "learning_rate": 1.479546761525242e-05, + "loss": 0.8247, + "step": 20080 + }, + { + "epoch": 0.68, + "learning_rate": 1.4786107121007813e-05, + "loss": 0.7516, + "step": 20100 + }, + { + "epoch": 0.68, + "learning_rate": 1.4776741184035171e-05, + "loss": 0.8348, + "step": 20120 + }, + { + "epoch": 0.68, + "learning_rate": 1.476736981498537e-05, + "loss": 0.8229, + "step": 20140 + }, + { + "epoch": 0.68, + "learning_rate": 1.475799302451547e-05, + "loss": 0.8529, + "step": 20160 + }, + { + "epoch": 0.68, + "learning_rate": 1.4748610823288693e-05, + "loss": 0.8749, + "step": 20180 + }, + { + "epoch": 0.69, + "learning_rate": 1.4739223221974415e-05, + "loss": 0.8914, + "step": 20200 + }, + { + "epoch": 0.69, + "learning_rate": 1.4729830231248143e-05, + "loss": 0.8839, + "step": 20220 + }, + { + "epoch": 0.69, + "learning_rate": 1.4720431861791531e-05, + "loss": 0.8345, + "step": 20240 + }, + { + "epoch": 0.69, + "learning_rate": 1.4711028124292332e-05, + "loss": 0.8047, + "step": 20260 + }, + { + "epoch": 0.69, + "learning_rate": 1.4701619029444419e-05, + "loss": 0.8083, + "step": 20280 + }, + { + "epoch": 0.69, + "learning_rate": 1.4692204587947745e-05, + "loss": 0.8185, + "step": 20300 + }, + { + "epoch": 0.69, + "learning_rate": 1.468278481050835e-05, + "loss": 0.8176, + "step": 20320 + }, + { + "epoch": 0.69, + "learning_rate": 1.4673359707838338e-05, + "loss": 0.8108, + "step": 20340 + }, + { + "epoch": 0.69, + "learning_rate": 1.4663929290655872e-05, + "loss": 0.8012, + "step": 20360 + }, + { + "epoch": 0.69, + "learning_rate": 1.4654493569685157e-05, + "loss": 0.8082, + "step": 20380 + }, + { + "epoch": 0.69, + "learning_rate": 1.464505255565643e-05, + "loss": 0.833, + "step": 20400 + }, + { + "epoch": 0.69, + "learning_rate": 1.4635606259305948e-05, + "loss": 0.819, + "step": 20420 + }, + { + "epoch": 0.69, + "learning_rate": 1.4626154691375974e-05, + "loss": 0.8406, + "step": 20440 + }, + { + "epoch": 0.69, + "learning_rate": 1.4616697862614766e-05, + "loss": 0.8467, + "step": 20460 + }, + { + "epoch": 0.7, + "learning_rate": 1.4607235783776563e-05, + "loss": 0.8298, + "step": 20480 + }, + { + "epoch": 0.7, + "learning_rate": 1.4597768465621575e-05, + "loss": 0.7843, + "step": 20500 + }, + { + "epoch": 0.7, + "learning_rate": 1.4588295918915978e-05, + "loss": 0.8426, + "step": 20520 + }, + { + "epoch": 0.7, + "learning_rate": 1.4578818154431878e-05, + "loss": 0.7621, + "step": 20540 + }, + { + "epoch": 0.7, + "learning_rate": 1.4569335182947325e-05, + "loss": 0.8628, + "step": 20560 + }, + { + "epoch": 0.7, + "learning_rate": 1.4559847015246294e-05, + "loss": 0.7603, + "step": 20580 + }, + { + "epoch": 0.7, + "learning_rate": 1.4550353662118659e-05, + "loss": 0.7862, + "step": 20600 + }, + { + "epoch": 0.7, + "learning_rate": 1.4540855134360198e-05, + "loss": 0.7932, + "step": 20620 + }, + { + "epoch": 0.7, + "learning_rate": 1.453135144277257e-05, + "loss": 0.7972, + "step": 20640 + }, + { + "epoch": 0.7, + "learning_rate": 1.4521842598163309e-05, + "loss": 0.8247, + "step": 20660 + }, + { + "epoch": 0.7, + "learning_rate": 1.4512328611345808e-05, + "loss": 0.8215, + "step": 20680 + }, + { + "epoch": 0.7, + "learning_rate": 1.4502809493139305e-05, + "loss": 0.7678, + "step": 20700 + }, + { + "epoch": 0.7, + "learning_rate": 1.4493285254368878e-05, + "loss": 0.8288, + "step": 20720 + }, + { + "epoch": 0.7, + "learning_rate": 1.4483755905865423e-05, + "loss": 0.7892, + "step": 20740 + }, + { + "epoch": 0.7, + "learning_rate": 1.4474221458465651e-05, + "loss": 0.875, + "step": 20760 + }, + { + "epoch": 0.71, + "learning_rate": 1.4464681923012072e-05, + "loss": 0.7519, + "step": 20780 + }, + { + "epoch": 0.71, + "learning_rate": 1.4455137310352978e-05, + "loss": 0.8257, + "step": 20800 + }, + { + "epoch": 0.71, + "learning_rate": 1.4445587631342433e-05, + "loss": 0.8261, + "step": 20820 + }, + { + "epoch": 0.71, + "learning_rate": 1.4436032896840271e-05, + "loss": 0.8359, + "step": 20840 + }, + { + "epoch": 0.71, + "learning_rate": 1.4426473117712071e-05, + "loss": 0.8074, + "step": 20860 + }, + { + "epoch": 0.71, + "learning_rate": 1.4416908304829142e-05, + "loss": 0.8617, + "step": 20880 + }, + { + "epoch": 0.71, + "learning_rate": 1.4407338469068532e-05, + "loss": 0.7763, + "step": 20900 + }, + { + "epoch": 0.71, + "learning_rate": 1.4397763621312985e-05, + "loss": 0.8333, + "step": 20920 + }, + { + "epoch": 0.71, + "learning_rate": 1.4388183772450957e-05, + "loss": 0.8397, + "step": 20940 + }, + { + "epoch": 0.71, + "learning_rate": 1.437859893337658e-05, + "loss": 0.8307, + "step": 20960 + }, + { + "epoch": 0.71, + "learning_rate": 1.4369009114989671e-05, + "loss": 0.8456, + "step": 20980 + }, + { + "epoch": 0.71, + "learning_rate": 1.4359414328195704e-05, + "loss": 0.8202, + "step": 21000 + }, + { + "epoch": 0.71, + "learning_rate": 1.4349814583905807e-05, + "loss": 0.7784, + "step": 21020 + }, + { + "epoch": 0.71, + "learning_rate": 1.434020989303674e-05, + "loss": 0.8052, + "step": 21040 + }, + { + "epoch": 0.71, + "learning_rate": 1.4330600266510895e-05, + "loss": 0.7646, + "step": 21060 + }, + { + "epoch": 0.72, + "learning_rate": 1.4320985715256269e-05, + "loss": 0.8493, + "step": 21080 + }, + { + "epoch": 0.72, + "learning_rate": 1.4311366250206464e-05, + "loss": 0.7595, + "step": 21100 + }, + { + "epoch": 0.72, + "learning_rate": 1.4301741882300673e-05, + "loss": 0.7798, + "step": 21120 + }, + { + "epoch": 0.72, + "learning_rate": 1.4292112622483653e-05, + "loss": 0.827, + "step": 21140 + }, + { + "epoch": 0.72, + "learning_rate": 1.4282478481705742e-05, + "loss": 0.9012, + "step": 21160 + }, + { + "epoch": 0.72, + "learning_rate": 1.4272839470922812e-05, + "loss": 0.7908, + "step": 21180 + }, + { + "epoch": 0.72, + "learning_rate": 1.426319560109628e-05, + "loss": 0.8161, + "step": 21200 + }, + { + "epoch": 0.72, + "learning_rate": 1.4253546883193093e-05, + "loss": 0.8432, + "step": 21220 + }, + { + "epoch": 0.72, + "learning_rate": 1.42438933281857e-05, + "loss": 0.797, + "step": 21240 + }, + { + "epoch": 0.72, + "learning_rate": 1.4234234947052063e-05, + "loss": 0.8394, + "step": 21260 + }, + { + "epoch": 0.72, + "learning_rate": 1.4224571750775625e-05, + "loss": 0.8191, + "step": 21280 + }, + { + "epoch": 0.72, + "learning_rate": 1.4214903750345298e-05, + "loss": 0.7181, + "step": 21300 + }, + { + "epoch": 0.72, + "learning_rate": 1.4205230956755479e-05, + "loss": 0.8326, + "step": 21320 + }, + { + "epoch": 0.72, + "learning_rate": 1.419555338100599e-05, + "loss": 0.8444, + "step": 21340 + }, + { + "epoch": 0.73, + "learning_rate": 1.4185871034102117e-05, + "loss": 0.7624, + "step": 21360 + }, + { + "epoch": 0.73, + "learning_rate": 1.4176183927054545e-05, + "loss": 0.8136, + "step": 21380 + }, + { + "epoch": 0.73, + "learning_rate": 1.4166492070879389e-05, + "loss": 0.9168, + "step": 21400 + }, + { + "epoch": 0.73, + "learning_rate": 1.4156795476598166e-05, + "loss": 0.7983, + "step": 21420 + }, + { + "epoch": 0.73, + "learning_rate": 1.4147094155237772e-05, + "loss": 0.7842, + "step": 21440 + }, + { + "epoch": 0.73, + "learning_rate": 1.4137388117830486e-05, + "loss": 0.7559, + "step": 21460 + }, + { + "epoch": 0.73, + "learning_rate": 1.4127677375413943e-05, + "loss": 0.7416, + "step": 21480 + }, + { + "epoch": 0.73, + "learning_rate": 1.4117961939031135e-05, + "loss": 0.8395, + "step": 21500 + }, + { + "epoch": 0.73, + "learning_rate": 1.4108241819730384e-05, + "loss": 0.8557, + "step": 21520 + }, + { + "epoch": 0.73, + "learning_rate": 1.409851702856535e-05, + "loss": 0.7085, + "step": 21540 + }, + { + "epoch": 0.73, + "learning_rate": 1.4088787576594996e-05, + "loss": 0.8655, + "step": 21560 + }, + { + "epoch": 0.73, + "learning_rate": 1.4079053474883584e-05, + "loss": 0.754, + "step": 21580 + }, + { + "epoch": 0.73, + "learning_rate": 1.4069314734500674e-05, + "loss": 0.8455, + "step": 21600 + }, + { + "epoch": 0.73, + "learning_rate": 1.405957136652109e-05, + "loss": 0.8776, + "step": 21620 + }, + { + "epoch": 0.73, + "learning_rate": 1.4049823382024922e-05, + "loss": 0.7825, + "step": 21640 + }, + { + "epoch": 0.74, + "learning_rate": 1.4040070792097515e-05, + "loss": 0.7666, + "step": 21660 + }, + { + "epoch": 0.74, + "learning_rate": 1.4030313607829444e-05, + "loss": 0.8492, + "step": 21680 + }, + { + "epoch": 0.74, + "learning_rate": 1.4020551840316515e-05, + "loss": 0.8367, + "step": 21700 + }, + { + "epoch": 0.74, + "learning_rate": 1.4010785500659735e-05, + "loss": 0.8087, + "step": 21720 + }, + { + "epoch": 0.74, + "learning_rate": 1.4001014599965327e-05, + "loss": 0.7934, + "step": 21740 + }, + { + "epoch": 0.74, + "learning_rate": 1.3991239149344686e-05, + "loss": 0.7869, + "step": 21760 + }, + { + "epoch": 0.74, + "learning_rate": 1.3981459159914392e-05, + "loss": 0.8105, + "step": 21780 + }, + { + "epoch": 0.74, + "learning_rate": 1.3971674642796178e-05, + "loss": 0.878, + "step": 21800 + }, + { + "epoch": 0.74, + "learning_rate": 1.396188560911693e-05, + "loss": 0.8064, + "step": 21820 + }, + { + "epoch": 0.74, + "learning_rate": 1.3952092070008669e-05, + "loss": 0.8331, + "step": 21840 + }, + { + "epoch": 0.74, + "learning_rate": 1.3942294036608544e-05, + "loss": 0.8226, + "step": 21860 + }, + { + "epoch": 0.74, + "learning_rate": 1.3932491520058807e-05, + "loss": 0.797, + "step": 21880 + }, + { + "epoch": 0.74, + "learning_rate": 1.392268453150681e-05, + "loss": 0.8333, + "step": 21900 + }, + { + "epoch": 0.74, + "learning_rate": 1.3912873082105002e-05, + "loss": 0.8006, + "step": 21920 + }, + { + "epoch": 0.74, + "learning_rate": 1.3903057183010887e-05, + "loss": 0.7301, + "step": 21940 + }, + { + "epoch": 0.75, + "learning_rate": 1.3893236845387041e-05, + "loss": 0.7378, + "step": 21960 + }, + { + "epoch": 0.75, + "learning_rate": 1.3883412080401083e-05, + "loss": 0.7564, + "step": 21980 + }, + { + "epoch": 0.75, + "learning_rate": 1.3873582899225672e-05, + "loss": 0.8053, + "step": 22000 + }, + { + "epoch": 0.75, + "learning_rate": 1.3863749313038484e-05, + "loss": 0.8445, + "step": 22020 + }, + { + "epoch": 0.75, + "learning_rate": 1.3853911333022199e-05, + "loss": 0.767, + "step": 22040 + }, + { + "epoch": 0.75, + "learning_rate": 1.3844068970364506e-05, + "loss": 0.7772, + "step": 22060 + }, + { + "epoch": 0.75, + "learning_rate": 1.383422223625807e-05, + "loss": 0.851, + "step": 22080 + }, + { + "epoch": 0.75, + "learning_rate": 1.3824371141900533e-05, + "loss": 0.76, + "step": 22100 + }, + { + "epoch": 0.75, + "learning_rate": 1.3814515698494484e-05, + "loss": 0.7725, + "step": 22120 + }, + { + "epoch": 0.75, + "learning_rate": 1.3804655917247471e-05, + "loss": 0.7981, + "step": 22140 + }, + { + "epoch": 0.75, + "learning_rate": 1.3794791809371971e-05, + "loss": 0.7698, + "step": 22160 + }, + { + "epoch": 0.75, + "learning_rate": 1.3784923386085373e-05, + "loss": 0.8396, + "step": 22180 + }, + { + "epoch": 0.75, + "learning_rate": 1.3775050658609988e-05, + "loss": 0.8182, + "step": 22200 + }, + { + "epoch": 0.75, + "learning_rate": 1.3765173638173004e-05, + "loss": 0.8189, + "step": 22220 + }, + { + "epoch": 0.75, + "learning_rate": 1.3755292336006512e-05, + "loss": 0.7969, + "step": 22240 + }, + { + "epoch": 0.76, + "learning_rate": 1.3745406763347446e-05, + "loss": 0.7591, + "step": 22260 + }, + { + "epoch": 0.76, + "learning_rate": 1.3735516931437625e-05, + "loss": 0.8119, + "step": 22280 + }, + { + "epoch": 0.76, + "learning_rate": 1.3725622851523692e-05, + "loss": 0.8237, + "step": 22300 + }, + { + "epoch": 0.76, + "learning_rate": 1.3715724534857127e-05, + "loss": 0.7985, + "step": 22320 + }, + { + "epoch": 0.76, + "learning_rate": 1.3705821992694228e-05, + "loss": 0.7909, + "step": 22340 + }, + { + "epoch": 0.76, + "learning_rate": 1.3695915236296095e-05, + "loss": 0.7241, + "step": 22360 + }, + { + "epoch": 0.76, + "learning_rate": 1.3686004276928627e-05, + "loss": 0.7716, + "step": 22380 + }, + { + "epoch": 0.76, + "learning_rate": 1.3676089125862495e-05, + "loss": 0.8163, + "step": 22400 + }, + { + "epoch": 0.76, + "learning_rate": 1.3666169794373141e-05, + "loss": 0.7784, + "step": 22420 + }, + { + "epoch": 0.76, + "learning_rate": 1.3656246293740767e-05, + "loss": 0.7793, + "step": 22440 + }, + { + "epoch": 0.76, + "learning_rate": 1.36463186352503e-05, + "loss": 0.8619, + "step": 22460 + }, + { + "epoch": 0.76, + "learning_rate": 1.3636386830191409e-05, + "loss": 0.7603, + "step": 22480 + }, + { + "epoch": 0.76, + "learning_rate": 1.3626450889858476e-05, + "loss": 0.786, + "step": 22500 + }, + { + "epoch": 0.76, + "learning_rate": 1.3616510825550582e-05, + "loss": 0.8596, + "step": 22520 + }, + { + "epoch": 0.77, + "learning_rate": 1.36065666485715e-05, + "loss": 0.8392, + "step": 22540 + }, + { + "epoch": 0.77, + "learning_rate": 1.3596618370229679e-05, + "loss": 0.699, + "step": 22560 + }, + { + "epoch": 0.77, + "learning_rate": 1.3586666001838231e-05, + "loss": 0.7612, + "step": 22580 + }, + { + "epoch": 0.77, + "learning_rate": 1.3576709554714923e-05, + "loss": 0.8525, + "step": 22600 + }, + { + "epoch": 0.77, + "learning_rate": 1.356674904018216e-05, + "loss": 0.8214, + "step": 22620 + }, + { + "epoch": 0.77, + "learning_rate": 1.3556784469566968e-05, + "loss": 0.7399, + "step": 22640 + }, + { + "epoch": 0.77, + "learning_rate": 1.3546815854200987e-05, + "loss": 0.8157, + "step": 22660 + }, + { + "epoch": 0.77, + "learning_rate": 1.353684320542046e-05, + "loss": 0.8849, + "step": 22680 + }, + { + "epoch": 0.77, + "learning_rate": 1.3526866534566213e-05, + "loss": 0.8243, + "step": 22700 + }, + { + "epoch": 0.77, + "learning_rate": 1.3516885852983645e-05, + "loss": 0.7794, + "step": 22720 + }, + { + "epoch": 0.77, + "learning_rate": 1.3506901172022724e-05, + "loss": 0.7773, + "step": 22740 + }, + { + "epoch": 0.77, + "learning_rate": 1.3496912503037959e-05, + "loss": 0.7399, + "step": 22760 + }, + { + "epoch": 0.77, + "learning_rate": 1.3486919857388392e-05, + "loss": 0.8805, + "step": 22780 + }, + { + "epoch": 0.77, + "learning_rate": 1.347692324643759e-05, + "loss": 0.7985, + "step": 22800 + }, + { + "epoch": 0.77, + "learning_rate": 1.3466922681553636e-05, + "loss": 0.7705, + "step": 22820 + }, + { + "epoch": 0.78, + "learning_rate": 1.3456918174109102e-05, + "loss": 0.8026, + "step": 22840 + }, + { + "epoch": 0.78, + "learning_rate": 1.344690973548104e-05, + "loss": 0.7575, + "step": 22860 + }, + { + "epoch": 0.78, + "learning_rate": 1.3436897377050982e-05, + "loss": 0.7957, + "step": 22880 + }, + { + "epoch": 0.78, + "learning_rate": 1.342688111020491e-05, + "loss": 0.7325, + "step": 22900 + }, + { + "epoch": 0.78, + "learning_rate": 1.3416860946333256e-05, + "loss": 0.8267, + "step": 22920 + }, + { + "epoch": 0.78, + "learning_rate": 1.3406836896830884e-05, + "loss": 0.8662, + "step": 22940 + }, + { + "epoch": 0.78, + "learning_rate": 1.3396808973097066e-05, + "loss": 0.8306, + "step": 22960 + }, + { + "epoch": 0.78, + "learning_rate": 1.3386777186535495e-05, + "loss": 0.8397, + "step": 22980 + }, + { + "epoch": 0.78, + "learning_rate": 1.3376741548554248e-05, + "loss": 0.7559, + "step": 23000 + }, + { + "epoch": 0.78, + "learning_rate": 1.3366702070565775e-05, + "loss": 0.801, + "step": 23020 + }, + { + "epoch": 0.78, + "learning_rate": 1.3356658763986919e-05, + "loss": 0.7433, + "step": 23040 + }, + { + "epoch": 0.78, + "learning_rate": 1.334661164023884e-05, + "loss": 0.8088, + "step": 23060 + }, + { + "epoch": 0.78, + "learning_rate": 1.3336560710747073e-05, + "loss": 0.8296, + "step": 23080 + }, + { + "epoch": 0.78, + "learning_rate": 1.3326505986941456e-05, + "loss": 0.7567, + "step": 23100 + }, + { + "epoch": 0.78, + "learning_rate": 1.3316447480256153e-05, + "loss": 0.7866, + "step": 23120 + }, + { + "epoch": 0.79, + "learning_rate": 1.3306385202129633e-05, + "loss": 0.7891, + "step": 23140 + }, + { + "epoch": 0.79, + "learning_rate": 1.3296319164004646e-05, + "loss": 0.7994, + "step": 23160 + }, + { + "epoch": 0.79, + "learning_rate": 1.3286249377328222e-05, + "loss": 0.7251, + "step": 23180 + }, + { + "epoch": 0.79, + "learning_rate": 1.327617585355165e-05, + "loss": 0.7805, + "step": 23200 + }, + { + "epoch": 0.79, + "learning_rate": 1.3266098604130477e-05, + "loss": 0.7611, + "step": 23220 + }, + { + "epoch": 0.79, + "learning_rate": 1.325601764052448e-05, + "loss": 0.8033, + "step": 23240 + }, + { + "epoch": 0.79, + "learning_rate": 1.324593297419766e-05, + "loss": 0.8043, + "step": 23260 + }, + { + "epoch": 0.79, + "learning_rate": 1.323584461661823e-05, + "loss": 0.7901, + "step": 23280 + }, + { + "epoch": 0.79, + "learning_rate": 1.3225752579258599e-05, + "loss": 0.797, + "step": 23300 + }, + { + "epoch": 0.79, + "learning_rate": 1.3215656873595366e-05, + "loss": 0.8191, + "step": 23320 + }, + { + "epoch": 0.79, + "learning_rate": 1.3205557511109298e-05, + "loss": 0.7936, + "step": 23340 + }, + { + "epoch": 0.79, + "learning_rate": 1.319545450328532e-05, + "loss": 0.8261, + "step": 23360 + }, + { + "epoch": 0.79, + "learning_rate": 1.3185347861612501e-05, + "loss": 0.7741, + "step": 23380 + }, + { + "epoch": 0.79, + "learning_rate": 1.3175237597584045e-05, + "loss": 0.7602, + "step": 23400 + }, + { + "epoch": 0.79, + "learning_rate": 1.3165123722697276e-05, + "loss": 0.7664, + "step": 23420 + }, + { + "epoch": 0.8, + "learning_rate": 1.3155006248453615e-05, + "loss": 0.7819, + "step": 23440 + }, + { + "epoch": 0.8, + "learning_rate": 1.3144885186358596e-05, + "loss": 0.7912, + "step": 23460 + }, + { + "epoch": 0.8, + "learning_rate": 1.3134760547921814e-05, + "loss": 0.8161, + "step": 23480 + }, + { + "epoch": 0.8, + "learning_rate": 1.3124632344656943e-05, + "loss": 0.7957, + "step": 23500 + }, + { + "epoch": 0.8, + "learning_rate": 1.3114500588081698e-05, + "loss": 0.8164, + "step": 23520 + }, + { + "epoch": 0.8, + "learning_rate": 1.3104365289717849e-05, + "loss": 0.7976, + "step": 23540 + }, + { + "epoch": 0.8, + "learning_rate": 1.3094226461091186e-05, + "loss": 0.7437, + "step": 23560 + }, + { + "epoch": 0.8, + "learning_rate": 1.3084084113731514e-05, + "loss": 0.84, + "step": 23580 + }, + { + "epoch": 0.8, + "learning_rate": 1.3073938259172645e-05, + "loss": 0.7651, + "step": 23600 + }, + { + "epoch": 0.8, + "learning_rate": 1.3063788908952368e-05, + "loss": 0.7769, + "step": 23620 + }, + { + "epoch": 0.8, + "learning_rate": 1.3053636074612457e-05, + "loss": 0.7899, + "step": 23640 + }, + { + "epoch": 0.8, + "learning_rate": 1.3043479767698646e-05, + "loss": 0.7696, + "step": 23660 + }, + { + "epoch": 0.8, + "learning_rate": 1.3033319999760619e-05, + "loss": 0.7587, + "step": 23680 + }, + { + "epoch": 0.8, + "learning_rate": 1.3023156782351989e-05, + "loss": 0.7693, + "step": 23700 + }, + { + "epoch": 0.81, + "learning_rate": 1.30129901270303e-05, + "loss": 0.7695, + "step": 23720 + }, + { + "epoch": 0.81, + "learning_rate": 1.3002820045357005e-05, + "loss": 0.79, + "step": 23740 + }, + { + "epoch": 0.81, + "learning_rate": 1.2992646548897442e-05, + "loss": 0.8037, + "step": 23760 + }, + { + "epoch": 0.81, + "learning_rate": 1.2982469649220854e-05, + "loss": 0.8023, + "step": 23780 + }, + { + "epoch": 0.81, + "learning_rate": 1.2972289357900332e-05, + "loss": 0.8247, + "step": 23800 + }, + { + "epoch": 0.81, + "learning_rate": 1.2962105686512834e-05, + "loss": 0.8243, + "step": 23820 + }, + { + "epoch": 0.81, + "learning_rate": 1.2951918646639163e-05, + "loss": 0.8695, + "step": 23840 + }, + { + "epoch": 0.81, + "learning_rate": 1.294172824986395e-05, + "loss": 0.8408, + "step": 23860 + }, + { + "epoch": 0.81, + "learning_rate": 1.293153450777564e-05, + "loss": 0.8392, + "step": 23880 + }, + { + "epoch": 0.81, + "learning_rate": 1.2921337431966494e-05, + "loss": 0.7882, + "step": 23900 + }, + { + "epoch": 0.81, + "learning_rate": 1.2911137034032553e-05, + "loss": 0.8327, + "step": 23920 + }, + { + "epoch": 0.81, + "learning_rate": 1.2900933325573636e-05, + "loss": 0.8817, + "step": 23940 + }, + { + "epoch": 0.81, + "learning_rate": 1.2890726318193332e-05, + "loss": 0.8262, + "step": 23960 + }, + { + "epoch": 0.81, + "learning_rate": 1.2880516023498981e-05, + "loss": 0.7659, + "step": 23980 + }, + { + "epoch": 0.81, + "learning_rate": 1.2870302453101657e-05, + "loss": 0.7805, + "step": 24000 + }, + { + "epoch": 0.82, + "learning_rate": 1.2860085618616169e-05, + "loss": 0.8152, + "step": 24020 + }, + { + "epoch": 0.82, + "learning_rate": 1.284986553166102e-05, + "loss": 0.7744, + "step": 24040 + }, + { + "epoch": 0.82, + "learning_rate": 1.2839642203858433e-05, + "loss": 0.7241, + "step": 24060 + }, + { + "epoch": 0.82, + "learning_rate": 1.2829415646834295e-05, + "loss": 0.8229, + "step": 24080 + }, + { + "epoch": 0.82, + "learning_rate": 1.2819185872218184e-05, + "loss": 0.8712, + "step": 24100 + }, + { + "epoch": 0.82, + "learning_rate": 1.2808952891643327e-05, + "loss": 0.7865, + "step": 24120 + }, + { + "epoch": 0.82, + "learning_rate": 1.2798716716746602e-05, + "loss": 0.8225, + "step": 24140 + }, + { + "epoch": 0.82, + "learning_rate": 1.2788477359168512e-05, + "loss": 0.7767, + "step": 24160 + }, + { + "epoch": 0.82, + "learning_rate": 1.2778234830553183e-05, + "loss": 0.8087, + "step": 24180 + }, + { + "epoch": 0.82, + "learning_rate": 1.276798914254835e-05, + "loss": 0.8498, + "step": 24200 + }, + { + "epoch": 0.82, + "learning_rate": 1.2757740306805345e-05, + "loss": 0.785, + "step": 24220 + }, + { + "epoch": 0.82, + "learning_rate": 1.2747488334979064e-05, + "loss": 0.7809, + "step": 24240 + }, + { + "epoch": 0.82, + "learning_rate": 1.2737233238727982e-05, + "loss": 0.77, + "step": 24260 + }, + { + "epoch": 0.82, + "learning_rate": 1.2726975029714124e-05, + "loss": 0.8828, + "step": 24280 + }, + { + "epoch": 0.82, + "learning_rate": 1.2716713719603053e-05, + "loss": 0.8708, + "step": 24300 + }, + { + "epoch": 0.83, + "learning_rate": 1.2706449320063866e-05, + "loss": 0.8074, + "step": 24320 + }, + { + "epoch": 0.83, + "learning_rate": 1.2696181842769161e-05, + "loss": 0.7782, + "step": 24340 + }, + { + "epoch": 0.83, + "learning_rate": 1.2685911299395046e-05, + "loss": 0.8194, + "step": 24360 + }, + { + "epoch": 0.83, + "learning_rate": 1.2675637701621111e-05, + "loss": 0.7642, + "step": 24380 + }, + { + "epoch": 0.83, + "learning_rate": 1.2665361061130416e-05, + "loss": 0.7517, + "step": 24400 + }, + { + "epoch": 0.83, + "learning_rate": 1.2655081389609496e-05, + "loss": 0.7556, + "step": 24420 + }, + { + "epoch": 0.83, + "learning_rate": 1.2644798698748314e-05, + "loss": 0.7667, + "step": 24440 + }, + { + "epoch": 0.83, + "learning_rate": 1.2634513000240277e-05, + "loss": 0.7696, + "step": 24460 + }, + { + "epoch": 0.83, + "learning_rate": 1.2624224305782215e-05, + "loss": 0.8711, + "step": 24480 + }, + { + "epoch": 0.83, + "learning_rate": 1.2613932627074356e-05, + "loss": 0.7339, + "step": 24500 + }, + { + "epoch": 0.83, + "learning_rate": 1.2603637975820324e-05, + "loss": 0.7713, + "step": 24520 + }, + { + "epoch": 0.83, + "learning_rate": 1.2593340363727131e-05, + "loss": 0.7791, + "step": 24540 + }, + { + "epoch": 0.83, + "learning_rate": 1.2583039802505148e-05, + "loss": 0.8241, + "step": 24560 + }, + { + "epoch": 0.83, + "learning_rate": 1.2572736303868102e-05, + "loss": 0.7267, + "step": 24580 + }, + { + "epoch": 0.84, + "learning_rate": 1.256242987953306e-05, + "loss": 0.8083, + "step": 24600 + }, + { + "epoch": 0.84, + "learning_rate": 1.2552120541220419e-05, + "loss": 0.8438, + "step": 24620 + }, + { + "epoch": 0.84, + "learning_rate": 1.2541808300653883e-05, + "loss": 0.7614, + "step": 24640 + }, + { + "epoch": 0.84, + "learning_rate": 1.253149316956047e-05, + "loss": 0.7978, + "step": 24660 + }, + { + "epoch": 0.84, + "learning_rate": 1.2521175159670464e-05, + "loss": 0.7852, + "step": 24680 + }, + { + "epoch": 0.84, + "learning_rate": 1.2510854282717444e-05, + "loss": 0.7439, + "step": 24700 + }, + { + "epoch": 0.84, + "learning_rate": 1.2500530550438232e-05, + "loss": 0.8229, + "step": 24720 + }, + { + "epoch": 0.84, + "learning_rate": 1.2490203974572915e-05, + "loss": 0.7607, + "step": 24740 + }, + { + "epoch": 0.84, + "learning_rate": 1.2479874566864799e-05, + "loss": 0.7557, + "step": 24760 + }, + { + "epoch": 0.84, + "learning_rate": 1.2469542339060414e-05, + "loss": 0.7902, + "step": 24780 + }, + { + "epoch": 0.84, + "learning_rate": 1.2459207302909499e-05, + "loss": 0.7407, + "step": 24800 + }, + { + "epoch": 0.84, + "learning_rate": 1.2448869470164988e-05, + "loss": 0.8128, + "step": 24820 + }, + { + "epoch": 0.84, + "learning_rate": 1.243852885258299e-05, + "loss": 0.7656, + "step": 24840 + }, + { + "epoch": 0.84, + "learning_rate": 1.2428185461922783e-05, + "loss": 0.7291, + "step": 24860 + }, + { + "epoch": 0.84, + "learning_rate": 1.2417839309946806e-05, + "loss": 0.844, + "step": 24880 + }, + { + "epoch": 0.85, + "learning_rate": 1.2407490408420629e-05, + "loss": 0.7925, + "step": 24900 + }, + { + "epoch": 0.85, + "learning_rate": 1.2397138769112949e-05, + "loss": 0.7518, + "step": 24920 + }, + { + "epoch": 0.85, + "learning_rate": 1.2386784403795577e-05, + "loss": 0.7826, + "step": 24940 + }, + { + "epoch": 0.85, + "learning_rate": 1.2376427324243432e-05, + "loss": 0.8126, + "step": 24960 + }, + { + "epoch": 0.85, + "learning_rate": 1.2366067542234512e-05, + "loss": 0.7521, + "step": 24980 + }, + { + "epoch": 0.85, + "learning_rate": 1.2355705069549885e-05, + "loss": 0.8282, + "step": 25000 + }, + { + "epoch": 0.85, + "learning_rate": 1.2345339917973688e-05, + "loss": 0.7748, + "step": 25020 + }, + { + "epoch": 0.85, + "learning_rate": 1.2334972099293102e-05, + "loss": 0.8185, + "step": 25040 + }, + { + "epoch": 0.85, + "learning_rate": 1.2324601625298331e-05, + "loss": 0.8449, + "step": 25060 + }, + { + "epoch": 0.85, + "learning_rate": 1.2314228507782614e-05, + "loss": 0.8139, + "step": 25080 + }, + { + "epoch": 0.85, + "learning_rate": 1.2303852758542187e-05, + "loss": 0.7789, + "step": 25100 + }, + { + "epoch": 0.85, + "learning_rate": 1.2293474389376281e-05, + "loss": 0.8105, + "step": 25120 + }, + { + "epoch": 0.85, + "learning_rate": 1.2283093412087099e-05, + "loss": 0.7276, + "step": 25140 + }, + { + "epoch": 0.85, + "learning_rate": 1.2272709838479824e-05, + "loss": 0.8363, + "step": 25160 + }, + { + "epoch": 0.85, + "learning_rate": 1.2262323680362587e-05, + "loss": 0.7583, + "step": 25180 + }, + { + "epoch": 0.86, + "learning_rate": 1.2251934949546446e-05, + "loss": 0.7626, + "step": 25200 + }, + { + "epoch": 0.86, + "learning_rate": 1.2241543657845403e-05, + "loss": 0.7718, + "step": 25220 + }, + { + "epoch": 0.86, + "learning_rate": 1.2231149817076358e-05, + "loss": 0.7381, + "step": 25240 + }, + { + "epoch": 0.86, + "learning_rate": 1.2220753439059116e-05, + "loss": 0.8, + "step": 25260 + }, + { + "epoch": 0.86, + "learning_rate": 1.2210354535616366e-05, + "loss": 0.7758, + "step": 25280 + }, + { + "epoch": 0.86, + "learning_rate": 1.2199953118573674e-05, + "loss": 0.7634, + "step": 25300 + }, + { + "epoch": 0.86, + "learning_rate": 1.2189549199759454e-05, + "loss": 0.7878, + "step": 25320 + }, + { + "epoch": 0.86, + "learning_rate": 1.2179142791004974e-05, + "loss": 0.7906, + "step": 25340 + }, + { + "epoch": 0.86, + "learning_rate": 1.2168733904144328e-05, + "loss": 0.7498, + "step": 25360 + }, + { + "epoch": 0.86, + "learning_rate": 1.2158322551014431e-05, + "loss": 0.7985, + "step": 25380 + }, + { + "epoch": 0.86, + "learning_rate": 1.2147908743455005e-05, + "loss": 0.7641, + "step": 25400 + }, + { + "epoch": 0.86, + "learning_rate": 1.2137492493308558e-05, + "loss": 0.7765, + "step": 25420 + }, + { + "epoch": 0.86, + "learning_rate": 1.2127073812420376e-05, + "loss": 0.81, + "step": 25440 + }, + { + "epoch": 0.86, + "learning_rate": 1.2116652712638513e-05, + "loss": 0.7686, + "step": 25460 + }, + { + "epoch": 0.86, + "learning_rate": 1.210622920581377e-05, + "loss": 0.7756, + "step": 25480 + }, + { + "epoch": 0.87, + "learning_rate": 1.2095803303799693e-05, + "loss": 0.8119, + "step": 25500 + }, + { + "epoch": 0.87, + "learning_rate": 1.2085375018452541e-05, + "loss": 0.6548, + "step": 25520 + }, + { + "epoch": 0.87, + "learning_rate": 1.2074944361631288e-05, + "loss": 0.7717, + "step": 25540 + }, + { + "epoch": 0.87, + "learning_rate": 1.2064511345197606e-05, + "loss": 0.8245, + "step": 25560 + }, + { + "epoch": 0.87, + "learning_rate": 1.205407598101585e-05, + "loss": 0.7411, + "step": 25580 + }, + { + "epoch": 0.87, + "learning_rate": 1.2043638280953043e-05, + "loss": 0.7405, + "step": 25600 + }, + { + "epoch": 0.87, + "learning_rate": 1.2033198256878867e-05, + "loss": 0.7835, + "step": 25620 + }, + { + "epoch": 0.87, + "learning_rate": 1.2022755920665648e-05, + "loss": 0.7589, + "step": 25640 + }, + { + "epoch": 0.87, + "learning_rate": 1.201231128418833e-05, + "loss": 0.7958, + "step": 25660 + }, + { + "epoch": 0.87, + "learning_rate": 1.200186435932449e-05, + "loss": 0.7981, + "step": 25680 + }, + { + "epoch": 0.87, + "learning_rate": 1.1991415157954293e-05, + "loss": 0.776, + "step": 25700 + }, + { + "epoch": 0.87, + "learning_rate": 1.1980963691960508e-05, + "loss": 0.7557, + "step": 25720 + }, + { + "epoch": 0.87, + "learning_rate": 1.1970509973228459e-05, + "loss": 0.803, + "step": 25740 + }, + { + "epoch": 0.87, + "learning_rate": 1.1960054013646048e-05, + "loss": 0.7916, + "step": 25760 + }, + { + "epoch": 0.88, + "learning_rate": 1.1949595825103721e-05, + "loss": 0.766, + "step": 25780 + }, + { + "epoch": 0.88, + "learning_rate": 1.1939135419494456e-05, + "loss": 0.7568, + "step": 25800 + }, + { + "epoch": 0.88, + "learning_rate": 1.192867280871376e-05, + "loss": 0.7633, + "step": 25820 + }, + { + "epoch": 0.88, + "learning_rate": 1.1918208004659632e-05, + "loss": 0.7924, + "step": 25840 + }, + { + "epoch": 0.88, + "learning_rate": 1.1907741019232584e-05, + "loss": 0.7998, + "step": 25860 + }, + { + "epoch": 0.88, + "learning_rate": 1.1897271864335594e-05, + "loss": 0.7736, + "step": 25880 + }, + { + "epoch": 0.88, + "learning_rate": 1.1886800551874111e-05, + "loss": 0.7891, + "step": 25900 + }, + { + "epoch": 0.88, + "learning_rate": 1.1876327093756048e-05, + "loss": 0.72, + "step": 25920 + }, + { + "epoch": 0.88, + "learning_rate": 1.1865851501891741e-05, + "loss": 0.753, + "step": 25940 + }, + { + "epoch": 0.88, + "learning_rate": 1.1855373788193969e-05, + "loss": 0.7819, + "step": 25960 + }, + { + "epoch": 0.88, + "learning_rate": 1.1844893964577906e-05, + "loss": 0.7506, + "step": 25980 + }, + { + "epoch": 0.88, + "learning_rate": 1.1834412042961143e-05, + "loss": 0.7076, + "step": 26000 + }, + { + "epoch": 0.88, + "learning_rate": 1.1823928035263646e-05, + "loss": 0.7446, + "step": 26020 + }, + { + "epoch": 0.88, + "learning_rate": 1.1813441953407754e-05, + "loss": 0.7617, + "step": 26040 + }, + { + "epoch": 0.88, + "learning_rate": 1.1802953809318173e-05, + "loss": 0.783, + "step": 26060 + }, + { + "epoch": 0.89, + "learning_rate": 1.1792463614921945e-05, + "loss": 0.7408, + "step": 26080 + }, + { + "epoch": 0.89, + "learning_rate": 1.1781971382148446e-05, + "loss": 0.7503, + "step": 26100 + }, + { + "epoch": 0.89, + "learning_rate": 1.177147712292937e-05, + "loss": 0.8042, + "step": 26120 + }, + { + "epoch": 0.89, + "learning_rate": 1.1760980849198717e-05, + "loss": 0.7961, + "step": 26140 + }, + { + "epoch": 0.89, + "learning_rate": 1.1750482572892781e-05, + "loss": 0.75, + "step": 26160 + }, + { + "epoch": 0.89, + "learning_rate": 1.1739982305950124e-05, + "loss": 0.7585, + "step": 26180 + }, + { + "epoch": 0.89, + "learning_rate": 1.172948006031158e-05, + "loss": 0.7957, + "step": 26200 + }, + { + "epoch": 0.89, + "learning_rate": 1.1718975847920229e-05, + "loss": 0.7645, + "step": 26220 + }, + { + "epoch": 0.89, + "learning_rate": 1.170846968072139e-05, + "loss": 0.7516, + "step": 26240 + }, + { + "epoch": 0.89, + "learning_rate": 1.1697961570662606e-05, + "loss": 0.7382, + "step": 26260 + }, + { + "epoch": 0.89, + "learning_rate": 1.1687451529693626e-05, + "loss": 0.834, + "step": 26280 + }, + { + "epoch": 0.89, + "learning_rate": 1.167693956976639e-05, + "loss": 0.8297, + "step": 26300 + }, + { + "epoch": 0.89, + "learning_rate": 1.1666425702835033e-05, + "loss": 0.7982, + "step": 26320 + }, + { + "epoch": 0.89, + "learning_rate": 1.1655909940855852e-05, + "loss": 0.7996, + "step": 26340 + }, + { + "epoch": 0.89, + "learning_rate": 1.1645392295787299e-05, + "loss": 0.7665, + "step": 26360 + }, + { + "epoch": 0.9, + "learning_rate": 1.1634872779589966e-05, + "loss": 0.824, + "step": 26380 + }, + { + "epoch": 0.9, + "learning_rate": 1.1624351404226573e-05, + "loss": 0.7679, + "step": 26400 + }, + { + "epoch": 0.9, + "learning_rate": 1.1613828181661957e-05, + "loss": 0.8051, + "step": 26420 + }, + { + "epoch": 0.9, + "learning_rate": 1.1603303123863054e-05, + "loss": 0.7372, + "step": 26440 + }, + { + "epoch": 0.9, + "learning_rate": 1.1592776242798887e-05, + "loss": 0.7109, + "step": 26460 + }, + { + "epoch": 0.9, + "learning_rate": 1.1582247550440555e-05, + "loss": 0.7646, + "step": 26480 + }, + { + "epoch": 0.9, + "learning_rate": 1.1571717058761211e-05, + "loss": 0.7508, + "step": 26500 + }, + { + "epoch": 0.9, + "learning_rate": 1.1561184779736061e-05, + "loss": 0.7319, + "step": 26520 + }, + { + "epoch": 0.9, + "learning_rate": 1.1550650725342337e-05, + "loss": 0.6879, + "step": 26540 + }, + { + "epoch": 0.9, + "learning_rate": 1.1540114907559294e-05, + "loss": 0.7612, + "step": 26560 + }, + { + "epoch": 0.9, + "learning_rate": 1.1529577338368191e-05, + "loss": 0.7385, + "step": 26580 + }, + { + "epoch": 0.9, + "learning_rate": 1.1519038029752281e-05, + "loss": 0.7635, + "step": 26600 + }, + { + "epoch": 0.9, + "learning_rate": 1.1508496993696791e-05, + "loss": 0.6925, + "step": 26620 + }, + { + "epoch": 0.9, + "learning_rate": 1.1497954242188913e-05, + "loss": 0.7441, + "step": 26640 + }, + { + "epoch": 0.9, + "learning_rate": 1.14874097872178e-05, + "loss": 0.7624, + "step": 26660 + }, + { + "epoch": 0.91, + "learning_rate": 1.1476863640774523e-05, + "loss": 0.7689, + "step": 26680 + }, + { + "epoch": 0.91, + "learning_rate": 1.1466315814852094e-05, + "loss": 0.7357, + "step": 26700 + }, + { + "epoch": 0.91, + "learning_rate": 1.145576632144542e-05, + "loss": 0.7586, + "step": 26720 + }, + { + "epoch": 0.91, + "learning_rate": 1.144521517255132e-05, + "loss": 0.7609, + "step": 26740 + }, + { + "epoch": 0.91, + "learning_rate": 1.1434662380168487e-05, + "loss": 0.7013, + "step": 26760 + }, + { + "epoch": 0.91, + "learning_rate": 1.1424107956297479e-05, + "loss": 0.7781, + "step": 26780 + }, + { + "epoch": 0.91, + "learning_rate": 1.1413551912940721e-05, + "loss": 0.7621, + "step": 26800 + }, + { + "epoch": 0.91, + "learning_rate": 1.1402994262102468e-05, + "loss": 0.7476, + "step": 26820 + }, + { + "epoch": 0.91, + "learning_rate": 1.1392435015788805e-05, + "loss": 0.6879, + "step": 26840 + }, + { + "epoch": 0.91, + "learning_rate": 1.138187418600764e-05, + "loss": 0.7534, + "step": 26860 + }, + { + "epoch": 0.91, + "learning_rate": 1.1371311784768672e-05, + "loss": 0.8091, + "step": 26880 + }, + { + "epoch": 0.91, + "learning_rate": 1.1360747824083397e-05, + "loss": 0.743, + "step": 26900 + }, + { + "epoch": 0.91, + "learning_rate": 1.1350182315965071e-05, + "loss": 0.8519, + "step": 26920 + }, + { + "epoch": 0.91, + "learning_rate": 1.1339615272428722e-05, + "loss": 0.7123, + "step": 26940 + }, + { + "epoch": 0.92, + "learning_rate": 1.1329046705491111e-05, + "loss": 0.7269, + "step": 26960 + }, + { + "epoch": 0.92, + "learning_rate": 1.1318476627170751e-05, + "loss": 0.8065, + "step": 26980 + }, + { + "epoch": 0.92, + "learning_rate": 1.1307905049487855e-05, + "loss": 0.7649, + "step": 27000 + }, + { + "epoch": 0.92, + "learning_rate": 1.129733198446435e-05, + "loss": 0.7449, + "step": 27020 + }, + { + "epoch": 0.92, + "learning_rate": 1.1286757444123854e-05, + "loss": 0.761, + "step": 27040 + }, + { + "epoch": 0.92, + "learning_rate": 1.1276181440491658e-05, + "loss": 0.7445, + "step": 27060 + }, + { + "epoch": 0.92, + "learning_rate": 1.1265603985594723e-05, + "loss": 0.758, + "step": 27080 + }, + { + "epoch": 0.92, + "learning_rate": 1.1255025091461657e-05, + "loss": 0.7528, + "step": 27100 + }, + { + "epoch": 0.92, + "learning_rate": 1.1244444770122707e-05, + "loss": 0.7695, + "step": 27120 + }, + { + "epoch": 0.92, + "learning_rate": 1.123386303360974e-05, + "loss": 0.7742, + "step": 27140 + }, + { + "epoch": 0.92, + "learning_rate": 1.1223279893956234e-05, + "loss": 0.8097, + "step": 27160 + }, + { + "epoch": 0.92, + "learning_rate": 1.1212695363197262e-05, + "loss": 0.8056, + "step": 27180 + }, + { + "epoch": 0.92, + "learning_rate": 1.120210945336948e-05, + "loss": 0.7472, + "step": 27200 + }, + { + "epoch": 0.92, + "learning_rate": 1.1191522176511112e-05, + "loss": 0.8154, + "step": 27220 + }, + { + "epoch": 0.92, + "learning_rate": 1.1180933544661936e-05, + "loss": 0.736, + "step": 27240 + }, + { + "epoch": 0.93, + "learning_rate": 1.1170343569863274e-05, + "loss": 0.7416, + "step": 27260 + }, + { + "epoch": 0.93, + "learning_rate": 1.1159752264157963e-05, + "loss": 0.768, + "step": 27280 + }, + { + "epoch": 0.93, + "learning_rate": 1.1149159639590377e-05, + "loss": 0.7803, + "step": 27300 + }, + { + "epoch": 0.93, + "learning_rate": 1.1138565708206364e-05, + "loss": 0.7374, + "step": 27320 + }, + { + "epoch": 0.93, + "learning_rate": 1.1127970482053274e-05, + "loss": 0.7396, + "step": 27340 + }, + { + "epoch": 0.93, + "learning_rate": 1.1117373973179925e-05, + "loss": 0.7836, + "step": 27360 + }, + { + "epoch": 0.93, + "learning_rate": 1.1106776193636595e-05, + "loss": 0.7813, + "step": 27380 + }, + { + "epoch": 0.93, + "learning_rate": 1.1096177155475006e-05, + "loss": 0.805, + "step": 27400 + }, + { + "epoch": 0.93, + "learning_rate": 1.1085576870748307e-05, + "loss": 0.7897, + "step": 27420 + }, + { + "epoch": 0.93, + "learning_rate": 1.1074975351511075e-05, + "loss": 0.7369, + "step": 27440 + }, + { + "epoch": 0.93, + "learning_rate": 1.1064372609819279e-05, + "loss": 0.7486, + "step": 27460 + }, + { + "epoch": 0.93, + "learning_rate": 1.1053768657730284e-05, + "loss": 0.7335, + "step": 27480 + }, + { + "epoch": 0.93, + "learning_rate": 1.1043163507302835e-05, + "loss": 0.8417, + "step": 27500 + }, + { + "epoch": 0.93, + "learning_rate": 1.1032557170597034e-05, + "loss": 0.6774, + "step": 27520 + }, + { + "epoch": 0.93, + "learning_rate": 1.1021949659674337e-05, + "loss": 0.7958, + "step": 27540 + }, + { + "epoch": 0.94, + "learning_rate": 1.1011340986597528e-05, + "loss": 0.8394, + "step": 27560 + }, + { + "epoch": 0.94, + "learning_rate": 1.100073116343072e-05, + "loss": 0.799, + "step": 27580 + }, + { + "epoch": 0.94, + "learning_rate": 1.0990120202239324e-05, + "loss": 0.8001, + "step": 27600 + }, + { + "epoch": 0.94, + "learning_rate": 1.097950811509006e-05, + "loss": 0.7543, + "step": 27620 + }, + { + "epoch": 0.94, + "learning_rate": 1.096889491405092e-05, + "loss": 0.7774, + "step": 27640 + }, + { + "epoch": 0.94, + "learning_rate": 1.0958280611191162e-05, + "loss": 0.7711, + "step": 27660 + }, + { + "epoch": 0.94, + "learning_rate": 1.0947665218581293e-05, + "loss": 0.8092, + "step": 27680 + }, + { + "epoch": 0.94, + "learning_rate": 1.0937048748293068e-05, + "loss": 0.7781, + "step": 27700 + }, + { + "epoch": 0.94, + "learning_rate": 1.0926431212399467e-05, + "loss": 0.7465, + "step": 27720 + }, + { + "epoch": 0.94, + "learning_rate": 1.0915812622974673e-05, + "loss": 0.7258, + "step": 27740 + }, + { + "epoch": 0.94, + "learning_rate": 1.0905192992094075e-05, + "loss": 0.7642, + "step": 27760 + }, + { + "epoch": 0.94, + "learning_rate": 1.0894572331834248e-05, + "loss": 0.7676, + "step": 27780 + }, + { + "epoch": 0.94, + "learning_rate": 1.0883950654272925e-05, + "loss": 0.838, + "step": 27800 + }, + { + "epoch": 0.94, + "learning_rate": 1.0873327971489015e-05, + "loss": 0.7291, + "step": 27820 + }, + { + "epoch": 0.95, + "learning_rate": 1.086270429556255e-05, + "loss": 0.7436, + "step": 27840 + }, + { + "epoch": 0.95, + "learning_rate": 1.0852079638574708e-05, + "loss": 0.7519, + "step": 27860 + }, + { + "epoch": 0.95, + "learning_rate": 1.084145401260777e-05, + "loss": 0.8155, + "step": 27880 + }, + { + "epoch": 0.95, + "learning_rate": 1.083082742974513e-05, + "loss": 0.7486, + "step": 27900 + }, + { + "epoch": 0.95, + "learning_rate": 1.082019990207126e-05, + "loss": 0.6917, + "step": 27920 + }, + { + "epoch": 0.95, + "learning_rate": 1.0809571441671716e-05, + "loss": 0.7539, + "step": 27940 + }, + { + "epoch": 0.95, + "learning_rate": 1.0798942060633108e-05, + "loss": 0.7732, + "step": 27960 + }, + { + "epoch": 0.95, + "learning_rate": 1.0788311771043092e-05, + "loss": 0.7179, + "step": 27980 + }, + { + "epoch": 0.95, + "learning_rate": 1.0777680584990366e-05, + "loss": 0.8022, + "step": 28000 + }, + { + "epoch": 0.95, + "learning_rate": 1.0767048514564633e-05, + "loss": 0.8168, + "step": 28020 + }, + { + "epoch": 0.95, + "learning_rate": 1.0756415571856617e-05, + "loss": 0.766, + "step": 28040 + }, + { + "epoch": 0.95, + "learning_rate": 1.074578176895803e-05, + "loss": 0.744, + "step": 28060 + }, + { + "epoch": 0.95, + "learning_rate": 1.073514711796155e-05, + "loss": 0.7179, + "step": 28080 + }, + { + "epoch": 0.95, + "learning_rate": 1.0724511630960837e-05, + "loss": 0.7154, + "step": 28100 + }, + { + "epoch": 0.95, + "learning_rate": 1.0713875320050487e-05, + "loss": 0.7586, + "step": 28120 + }, + { + "epoch": 0.96, + "learning_rate": 1.0703238197326042e-05, + "loss": 0.828, + "step": 28140 + }, + { + "epoch": 0.96, + "learning_rate": 1.0692600274883969e-05, + "loss": 0.7452, + "step": 28160 + }, + { + "epoch": 0.96, + "learning_rate": 1.0681961564821632e-05, + "loss": 0.7193, + "step": 28180 + }, + { + "epoch": 0.96, + "learning_rate": 1.0671322079237307e-05, + "loss": 0.7795, + "step": 28200 + }, + { + "epoch": 0.96, + "learning_rate": 1.0660681830230137e-05, + "loss": 0.7692, + "step": 28220 + }, + { + "epoch": 0.96, + "learning_rate": 1.0650040829900143e-05, + "loss": 0.7935, + "step": 28240 + }, + { + "epoch": 0.96, + "learning_rate": 1.06393990903482e-05, + "loss": 0.7645, + "step": 28260 + }, + { + "epoch": 0.96, + "learning_rate": 1.0628756623676016e-05, + "loss": 0.8378, + "step": 28280 + }, + { + "epoch": 0.96, + "learning_rate": 1.061811344198613e-05, + "loss": 0.7507, + "step": 28300 + }, + { + "epoch": 0.96, + "learning_rate": 1.06074695573819e-05, + "loss": 0.8633, + "step": 28320 + }, + { + "epoch": 0.96, + "learning_rate": 1.0596824981967475e-05, + "loss": 0.7645, + "step": 28340 + }, + { + "epoch": 0.96, + "learning_rate": 1.0586179727847791e-05, + "loss": 0.7723, + "step": 28360 + }, + { + "epoch": 0.96, + "learning_rate": 1.0575533807128567e-05, + "loss": 0.8174, + "step": 28380 + }, + { + "epoch": 0.96, + "learning_rate": 1.0564887231916256e-05, + "loss": 0.7738, + "step": 28400 + }, + { + "epoch": 0.96, + "learning_rate": 1.0554240014318085e-05, + "loss": 0.7467, + "step": 28420 + }, + { + "epoch": 0.97, + "learning_rate": 1.0543592166441983e-05, + "loss": 0.7665, + "step": 28440 + }, + { + "epoch": 0.97, + "learning_rate": 1.0532943700396616e-05, + "loss": 0.7758, + "step": 28460 + }, + { + "epoch": 0.97, + "learning_rate": 1.0522294628291347e-05, + "loss": 0.7501, + "step": 28480 + }, + { + "epoch": 0.97, + "learning_rate": 1.0511644962236228e-05, + "loss": 0.7612, + "step": 28500 + }, + { + "epoch": 0.97, + "learning_rate": 1.0500994714341984e-05, + "loss": 0.7664, + "step": 28520 + }, + { + "epoch": 0.97, + "learning_rate": 1.049034389672e-05, + "loss": 0.7437, + "step": 28540 + }, + { + "epoch": 0.97, + "learning_rate": 1.0479692521482316e-05, + "loss": 0.718, + "step": 28560 + }, + { + "epoch": 0.97, + "learning_rate": 1.0469040600741603e-05, + "loss": 0.7596, + "step": 28580 + }, + { + "epoch": 0.97, + "learning_rate": 1.0458388146611151e-05, + "loss": 0.8108, + "step": 28600 + }, + { + "epoch": 0.97, + "learning_rate": 1.044773517120486e-05, + "loss": 0.7887, + "step": 28620 + }, + { + "epoch": 0.97, + "learning_rate": 1.0437081686637216e-05, + "loss": 0.7195, + "step": 28640 + }, + { + "epoch": 0.97, + "learning_rate": 1.0426427705023289e-05, + "loss": 0.7793, + "step": 28660 + }, + { + "epoch": 0.97, + "learning_rate": 1.0415773238478715e-05, + "loss": 0.6975, + "step": 28680 + }, + { + "epoch": 0.97, + "learning_rate": 1.0405118299119682e-05, + "loss": 0.7211, + "step": 28700 + }, + { + "epoch": 0.97, + "learning_rate": 1.039446289906291e-05, + "loss": 0.7757, + "step": 28720 + }, + { + "epoch": 0.98, + "learning_rate": 1.0383807050425648e-05, + "loss": 0.7514, + "step": 28740 + }, + { + "epoch": 0.98, + "learning_rate": 1.037315076532565e-05, + "loss": 0.6998, + "step": 28760 + }, + { + "epoch": 0.98, + "learning_rate": 1.0362494055881172e-05, + "loss": 0.7226, + "step": 28780 + }, + { + "epoch": 0.98, + "learning_rate": 1.0351836934210957e-05, + "loss": 0.7645, + "step": 28800 + }, + { + "epoch": 0.98, + "learning_rate": 1.03411794124342e-05, + "loss": 0.8352, + "step": 28820 + }, + { + "epoch": 0.98, + "learning_rate": 1.0330521502670574e-05, + "loss": 0.7759, + "step": 28840 + }, + { + "epoch": 0.98, + "learning_rate": 1.0319863217040166e-05, + "loss": 0.7611, + "step": 28860 + }, + { + "epoch": 0.98, + "learning_rate": 1.0309204567663513e-05, + "loss": 0.7064, + "step": 28880 + }, + { + "epoch": 0.98, + "learning_rate": 1.0298545566661558e-05, + "loss": 0.6901, + "step": 28900 + }, + { + "epoch": 0.98, + "learning_rate": 1.028788622615564e-05, + "loss": 0.7307, + "step": 28920 + }, + { + "epoch": 0.98, + "learning_rate": 1.0277226558267494e-05, + "loss": 0.7325, + "step": 28940 + }, + { + "epoch": 0.98, + "learning_rate": 1.0266566575119213e-05, + "loss": 0.8501, + "step": 28960 + }, + { + "epoch": 0.98, + "learning_rate": 1.0255906288833259e-05, + "loss": 0.7255, + "step": 28980 + }, + { + "epoch": 0.98, + "learning_rate": 1.0245245711532437e-05, + "loss": 0.6879, + "step": 29000 + }, + { + "epoch": 0.99, + "learning_rate": 1.0234584855339884e-05, + "loss": 0.7236, + "step": 29020 + }, + { + "epoch": 0.99, + "learning_rate": 1.0223923732379049e-05, + "loss": 0.761, + "step": 29040 + }, + { + "epoch": 0.99, + "learning_rate": 1.0213262354773689e-05, + "loss": 0.7608, + "step": 29060 + }, + { + "epoch": 0.99, + "learning_rate": 1.020260073464785e-05, + "loss": 0.7164, + "step": 29080 + }, + { + "epoch": 0.99, + "learning_rate": 1.0191938884125847e-05, + "loss": 0.7795, + "step": 29100 + }, + { + "epoch": 0.99, + "learning_rate": 1.0181276815332273e-05, + "loss": 0.6521, + "step": 29120 + }, + { + "epoch": 0.99, + "learning_rate": 1.0170614540391952e-05, + "loss": 0.7608, + "step": 29140 + }, + { + "epoch": 0.99, + "learning_rate": 1.0159952071429953e-05, + "loss": 0.7849, + "step": 29160 + }, + { + "epoch": 0.99, + "learning_rate": 1.0149289420571558e-05, + "loss": 0.6989, + "step": 29180 + }, + { + "epoch": 0.99, + "learning_rate": 1.0138626599942263e-05, + "loss": 0.7986, + "step": 29200 + }, + { + "epoch": 0.99, + "learning_rate": 1.0127963621667754e-05, + "loss": 0.7628, + "step": 29220 + }, + { + "epoch": 0.99, + "learning_rate": 1.0117300497873897e-05, + "loss": 0.7438, + "step": 29240 + }, + { + "epoch": 0.99, + "learning_rate": 1.0106637240686723e-05, + "loss": 0.7998, + "step": 29260 + }, + { + "epoch": 0.99, + "learning_rate": 1.009597386223241e-05, + "loss": 0.7247, + "step": 29280 + }, + { + "epoch": 0.99, + "learning_rate": 1.0085310374637288e-05, + "loss": 0.7975, + "step": 29300 + }, + { + "epoch": 1.0, + "learning_rate": 1.0074646790027792e-05, + "loss": 0.7294, + "step": 29320 + }, + { + "epoch": 1.0, + "learning_rate": 1.006398312053048e-05, + "loss": 0.7467, + "step": 29340 + }, + { + "epoch": 1.0, + "learning_rate": 1.0053319378272008e-05, + "loss": 0.7376, + "step": 29360 + }, + { + "epoch": 1.0, + "learning_rate": 1.0042655575379103e-05, + "loss": 0.8358, + "step": 29380 + }, + { + "epoch": 1.0, + "learning_rate": 1.0031991723978574e-05, + "loss": 0.8395, + "step": 29400 + }, + { + "epoch": 1.0, + "learning_rate": 1.0021327836197274e-05, + "loss": 0.7004, + "step": 29420 + }, + { + "epoch": 1.0, + "learning_rate": 1.001066392416211e-05, + "loss": 0.6753, + "step": 29440 + }, + { + "epoch": 1.0, + "learning_rate": 1e-05, + "loss": 0.7482, + "step": 29460 + }, + { + "epoch": 1.0, + "learning_rate": 9.989336075837893e-06, + "loss": 0.5971, + "step": 29480 + }, + { + "epoch": 1.0, + "learning_rate": 9.978672163802726e-06, + "loss": 0.5315, + "step": 29500 + }, + { + "epoch": 1.0, + "learning_rate": 9.96800827602143e-06, + "loss": 0.5765, + "step": 29520 + }, + { + "epoch": 1.0, + "learning_rate": 9.9573444246209e-06, + "loss": 0.5696, + "step": 29540 + }, + { + "epoch": 1.0, + "learning_rate": 9.946680621727995e-06, + "loss": 0.5824, + "step": 29560 + }, + { + "epoch": 1.0, + "learning_rate": 9.936016879469522e-06, + "loss": 0.5709, + "step": 29580 + }, + { + "epoch": 1.0, + "learning_rate": 9.925353209972213e-06, + "loss": 0.5152, + "step": 29600 + }, + { + "epoch": 1.01, + "learning_rate": 9.914689625362717e-06, + "loss": 0.5453, + "step": 29620 + }, + { + "epoch": 1.01, + "learning_rate": 9.904026137767591e-06, + "loss": 0.5355, + "step": 29640 + }, + { + "epoch": 1.01, + "learning_rate": 9.89336275931328e-06, + "loss": 0.5089, + "step": 29660 + }, + { + "epoch": 1.01, + "learning_rate": 9.882699502126103e-06, + "loss": 0.5548, + "step": 29680 + }, + { + "epoch": 1.01, + "learning_rate": 9.872036378332251e-06, + "loss": 0.5285, + "step": 29700 + }, + { + "epoch": 1.01, + "learning_rate": 9.86137340005774e-06, + "loss": 0.5213, + "step": 29720 + }, + { + "epoch": 1.01, + "learning_rate": 9.850710579428445e-06, + "loss": 0.4975, + "step": 29740 + }, + { + "epoch": 1.01, + "learning_rate": 9.84004792857005e-06, + "loss": 0.5456, + "step": 29760 + }, + { + "epoch": 1.01, + "learning_rate": 9.82938545960805e-06, + "loss": 0.5541, + "step": 29780 + }, + { + "epoch": 1.01, + "learning_rate": 9.818723184667732e-06, + "loss": 0.5548, + "step": 29800 + }, + { + "epoch": 1.01, + "learning_rate": 9.808061115874156e-06, + "loss": 0.5818, + "step": 29820 + }, + { + "epoch": 1.01, + "learning_rate": 9.797399265352154e-06, + "loss": 0.5393, + "step": 29840 + }, + { + "epoch": 1.01, + "learning_rate": 9.786737645226313e-06, + "loss": 0.5183, + "step": 29860 + }, + { + "epoch": 1.01, + "learning_rate": 9.776076267620955e-06, + "loss": 0.5039, + "step": 29880 + }, + { + "epoch": 1.01, + "learning_rate": 9.76541514466012e-06, + "loss": 0.5041, + "step": 29900 + }, + { + "epoch": 1.02, + "learning_rate": 9.754754288467564e-06, + "loss": 0.4768, + "step": 29920 + }, + { + "epoch": 1.02, + "learning_rate": 9.744093711166745e-06, + "loss": 0.5311, + "step": 29940 + }, + { + "epoch": 1.02, + "learning_rate": 9.733433424880789e-06, + "loss": 0.5408, + "step": 29960 + }, + { + "epoch": 1.02, + "learning_rate": 9.72277344173251e-06, + "loss": 0.4999, + "step": 29980 + }, + { + "epoch": 1.02, + "learning_rate": 9.712113773844361e-06, + "loss": 0.5325, + "step": 30000 + }, + { + "epoch": 1.02, + "learning_rate": 9.701454433338445e-06, + "loss": 0.5887, + "step": 30020 + }, + { + "epoch": 1.02, + "learning_rate": 9.690795432336489e-06, + "loss": 0.5308, + "step": 30040 + }, + { + "epoch": 1.02, + "learning_rate": 9.680136782959835e-06, + "loss": 0.5197, + "step": 30060 + }, + { + "epoch": 1.02, + "learning_rate": 9.669478497329431e-06, + "loss": 0.58, + "step": 30080 + }, + { + "epoch": 1.02, + "learning_rate": 9.658820587565801e-06, + "loss": 0.5552, + "step": 30100 + }, + { + "epoch": 1.02, + "learning_rate": 9.648163065789046e-06, + "loss": 0.5623, + "step": 30120 + }, + { + "epoch": 1.02, + "learning_rate": 9.637505944118828e-06, + "loss": 0.5321, + "step": 30140 + }, + { + "epoch": 1.02, + "learning_rate": 9.626849234674356e-06, + "loss": 0.5571, + "step": 30160 + }, + { + "epoch": 1.02, + "learning_rate": 9.616192949574357e-06, + "loss": 0.5319, + "step": 30180 + }, + { + "epoch": 1.03, + "learning_rate": 9.605537100937094e-06, + "loss": 0.5443, + "step": 30200 + }, + { + "epoch": 1.03, + "learning_rate": 9.59488170088032e-06, + "loss": 0.5153, + "step": 30220 + }, + { + "epoch": 1.03, + "learning_rate": 9.584226761521285e-06, + "loss": 0.5195, + "step": 30240 + }, + { + "epoch": 1.03, + "learning_rate": 9.573572294976714e-06, + "loss": 0.5135, + "step": 30260 + }, + { + "epoch": 1.03, + "learning_rate": 9.562918313362787e-06, + "loss": 0.5177, + "step": 30280 + }, + { + "epoch": 1.03, + "learning_rate": 9.552264828795142e-06, + "loss": 0.5544, + "step": 30300 + }, + { + "epoch": 1.03, + "learning_rate": 9.541611853388849e-06, + "loss": 0.5243, + "step": 30320 + }, + { + "epoch": 1.03, + "learning_rate": 9.530959399258397e-06, + "loss": 0.5352, + "step": 30340 + }, + { + "epoch": 1.03, + "learning_rate": 9.520307478517686e-06, + "loss": 0.5354, + "step": 30360 + }, + { + "epoch": 1.03, + "learning_rate": 9.509656103280004e-06, + "loss": 0.5097, + "step": 30380 + }, + { + "epoch": 1.03, + "learning_rate": 9.499005285658021e-06, + "loss": 0.5286, + "step": 30400 + }, + { + "epoch": 1.03, + "learning_rate": 9.488355037763774e-06, + "loss": 0.5257, + "step": 30420 + }, + { + "epoch": 1.03, + "learning_rate": 9.477705371708657e-06, + "loss": 0.5127, + "step": 30440 + }, + { + "epoch": 1.03, + "learning_rate": 9.467056299603388e-06, + "loss": 0.542, + "step": 30460 + }, + { + "epoch": 1.03, + "learning_rate": 9.456407833558019e-06, + "loss": 0.5338, + "step": 30480 + }, + { + "epoch": 1.04, + "learning_rate": 9.445759985681919e-06, + "loss": 0.5501, + "step": 30500 + }, + { + "epoch": 1.04, + "learning_rate": 9.435112768083742e-06, + "loss": 0.5282, + "step": 30520 + }, + { + "epoch": 1.04, + "learning_rate": 9.42446619287144e-06, + "loss": 0.5156, + "step": 30540 + }, + { + "epoch": 1.04, + "learning_rate": 9.41382027215221e-06, + "loss": 0.534, + "step": 30560 + }, + { + "epoch": 1.04, + "learning_rate": 9.403175018032528e-06, + "loss": 0.5237, + "step": 30580 + }, + { + "epoch": 1.04, + "learning_rate": 9.3925304426181e-06, + "loss": 0.5067, + "step": 30600 + }, + { + "epoch": 1.04, + "learning_rate": 9.38188655801387e-06, + "loss": 0.5491, + "step": 30620 + }, + { + "epoch": 1.04, + "learning_rate": 9.37124337632399e-06, + "loss": 0.5151, + "step": 30640 + }, + { + "epoch": 1.04, + "learning_rate": 9.360600909651804e-06, + "loss": 0.5706, + "step": 30660 + }, + { + "epoch": 1.04, + "learning_rate": 9.349959170099859e-06, + "loss": 0.4919, + "step": 30680 + }, + { + "epoch": 1.04, + "learning_rate": 9.339318169769865e-06, + "loss": 0.5317, + "step": 30700 + }, + { + "epoch": 1.04, + "learning_rate": 9.328677920762698e-06, + "loss": 0.5356, + "step": 30720 + }, + { + "epoch": 1.04, + "learning_rate": 9.318038435178371e-06, + "loss": 0.574, + "step": 30740 + }, + { + "epoch": 1.04, + "learning_rate": 9.307399725116035e-06, + "loss": 0.5253, + "step": 30760 + }, + { + "epoch": 1.04, + "learning_rate": 9.29676180267396e-06, + "loss": 0.5226, + "step": 30780 + }, + { + "epoch": 1.05, + "learning_rate": 9.286124679949515e-06, + "loss": 0.5003, + "step": 30800 + }, + { + "epoch": 1.05, + "learning_rate": 9.275488369039169e-06, + "loss": 0.569, + "step": 30820 + }, + { + "epoch": 1.05, + "learning_rate": 9.264852882038453e-06, + "loss": 0.5178, + "step": 30840 + }, + { + "epoch": 1.05, + "learning_rate": 9.254218231041973e-06, + "loss": 0.519, + "step": 30860 + }, + { + "epoch": 1.05, + "learning_rate": 9.243584428143383e-06, + "loss": 0.5543, + "step": 30880 + }, + { + "epoch": 1.05, + "learning_rate": 9.232951485435367e-06, + "loss": 0.5391, + "step": 30900 + }, + { + "epoch": 1.05, + "learning_rate": 9.222319415009639e-06, + "loss": 0.51, + "step": 30920 + }, + { + "epoch": 1.05, + "learning_rate": 9.21168822895691e-06, + "loss": 0.5161, + "step": 30940 + }, + { + "epoch": 1.05, + "learning_rate": 9.201057939366896e-06, + "loss": 0.5187, + "step": 30960 + }, + { + "epoch": 1.05, + "learning_rate": 9.190428558328285e-06, + "loss": 0.4999, + "step": 30980 + }, + { + "epoch": 1.05, + "learning_rate": 9.179800097928743e-06, + "loss": 0.4937, + "step": 31000 + }, + { + "epoch": 1.05, + "learning_rate": 9.169172570254874e-06, + "loss": 0.5363, + "step": 31020 + }, + { + "epoch": 1.05, + "learning_rate": 9.158545987392233e-06, + "loss": 0.4939, + "step": 31040 + }, + { + "epoch": 1.05, + "learning_rate": 9.147920361425295e-06, + "loss": 0.5205, + "step": 31060 + }, + { + "epoch": 1.05, + "learning_rate": 9.13729570443745e-06, + "loss": 0.5278, + "step": 31080 + }, + { + "epoch": 1.06, + "learning_rate": 9.12667202851099e-06, + "loss": 0.5109, + "step": 31100 + }, + { + "epoch": 1.06, + "learning_rate": 9.116049345727076e-06, + "loss": 0.5284, + "step": 31120 + }, + { + "epoch": 1.06, + "learning_rate": 9.105427668165755e-06, + "loss": 0.549, + "step": 31140 + }, + { + "epoch": 1.06, + "learning_rate": 9.094807007905927e-06, + "loss": 0.5686, + "step": 31160 + }, + { + "epoch": 1.06, + "learning_rate": 9.084187377025334e-06, + "loss": 0.5356, + "step": 31180 + }, + { + "epoch": 1.06, + "learning_rate": 9.073568787600538e-06, + "loss": 0.4808, + "step": 31200 + }, + { + "epoch": 1.06, + "learning_rate": 9.062951251706933e-06, + "loss": 0.5262, + "step": 31220 + }, + { + "epoch": 1.06, + "learning_rate": 9.052334781418708e-06, + "loss": 0.5427, + "step": 31240 + }, + { + "epoch": 1.06, + "learning_rate": 9.04171938880884e-06, + "loss": 0.5331, + "step": 31260 + }, + { + "epoch": 1.06, + "learning_rate": 9.031105085949083e-06, + "loss": 0.501, + "step": 31280 + }, + { + "epoch": 1.06, + "learning_rate": 9.020491884909941e-06, + "loss": 0.5483, + "step": 31300 + }, + { + "epoch": 1.06, + "learning_rate": 9.009879797760679e-06, + "loss": 0.5403, + "step": 31320 + }, + { + "epoch": 1.06, + "learning_rate": 8.999268836569284e-06, + "loss": 0.5151, + "step": 31340 + }, + { + "epoch": 1.06, + "learning_rate": 8.988659013402474e-06, + "loss": 0.5153, + "step": 31360 + }, + { + "epoch": 1.07, + "learning_rate": 8.978050340325668e-06, + "loss": 0.5419, + "step": 31380 + }, + { + "epoch": 1.07, + "learning_rate": 8.967442829402968e-06, + "loss": 0.4932, + "step": 31400 + }, + { + "epoch": 1.07, + "learning_rate": 8.956836492697167e-06, + "loss": 0.5351, + "step": 31420 + }, + { + "epoch": 1.07, + "learning_rate": 8.94623134226972e-06, + "loss": 0.5007, + "step": 31440 + }, + { + "epoch": 1.07, + "learning_rate": 8.935627390180728e-06, + "loss": 0.5453, + "step": 31460 + }, + { + "epoch": 1.07, + "learning_rate": 8.925024648488932e-06, + "loss": 0.528, + "step": 31480 + }, + { + "epoch": 1.07, + "learning_rate": 8.914423129251697e-06, + "loss": 0.5672, + "step": 31500 + }, + { + "epoch": 1.07, + "learning_rate": 8.903822844524999e-06, + "loss": 0.5049, + "step": 31520 + }, + { + "epoch": 1.07, + "learning_rate": 8.893223806363407e-06, + "loss": 0.5704, + "step": 31540 + }, + { + "epoch": 1.07, + "learning_rate": 8.882626026820078e-06, + "loss": 0.4887, + "step": 31560 + }, + { + "epoch": 1.07, + "learning_rate": 8.872029517946729e-06, + "loss": 0.5191, + "step": 31580 + }, + { + "epoch": 1.07, + "learning_rate": 8.861434291793639e-06, + "loss": 0.5468, + "step": 31600 + }, + { + "epoch": 1.07, + "learning_rate": 8.850840360409627e-06, + "loss": 0.5635, + "step": 31620 + }, + { + "epoch": 1.07, + "learning_rate": 8.840247735842037e-06, + "loss": 0.5354, + "step": 31640 + }, + { + "epoch": 1.07, + "learning_rate": 8.829656430136732e-06, + "loss": 0.4913, + "step": 31660 + }, + { + "epoch": 1.08, + "learning_rate": 8.819066455338067e-06, + "loss": 0.5314, + "step": 31680 + }, + { + "epoch": 1.08, + "learning_rate": 8.80847782348889e-06, + "loss": 0.5026, + "step": 31700 + }, + { + "epoch": 1.08, + "learning_rate": 8.797890546630522e-06, + "loss": 0.5835, + "step": 31720 + }, + { + "epoch": 1.08, + "learning_rate": 8.787304636802743e-06, + "loss": 0.5104, + "step": 31740 + }, + { + "epoch": 1.08, + "learning_rate": 8.77672010604377e-06, + "loss": 0.4618, + "step": 31760 + }, + { + "epoch": 1.08, + "learning_rate": 8.766136966390263e-06, + "loss": 0.5285, + "step": 31780 + }, + { + "epoch": 1.08, + "learning_rate": 8.755555229877295e-06, + "loss": 0.5043, + "step": 31800 + }, + { + "epoch": 1.08, + "learning_rate": 8.744974908538345e-06, + "loss": 0.5348, + "step": 31820 + }, + { + "epoch": 1.08, + "learning_rate": 8.734396014405282e-06, + "loss": 0.5492, + "step": 31840 + }, + { + "epoch": 1.08, + "learning_rate": 8.723818559508345e-06, + "loss": 0.5543, + "step": 31860 + }, + { + "epoch": 1.08, + "learning_rate": 8.713242555876149e-06, + "loss": 0.5509, + "step": 31880 + }, + { + "epoch": 1.08, + "learning_rate": 8.702668015535651e-06, + "loss": 0.5401, + "step": 31900 + }, + { + "epoch": 1.08, + "learning_rate": 8.692094950512145e-06, + "loss": 0.5644, + "step": 31920 + }, + { + "epoch": 1.08, + "learning_rate": 8.681523372829254e-06, + "loss": 0.5083, + "step": 31940 + }, + { + "epoch": 1.08, + "learning_rate": 8.670953294508892e-06, + "loss": 0.5296, + "step": 31960 + }, + { + "epoch": 1.09, + "learning_rate": 8.660384727571282e-06, + "loss": 0.6039, + "step": 31980 + }, + { + "epoch": 1.09, + "learning_rate": 8.64981768403493e-06, + "loss": 0.4802, + "step": 32000 + }, + { + "epoch": 1.09, + "learning_rate": 8.639252175916608e-06, + "loss": 0.5125, + "step": 32020 + }, + { + "epoch": 1.09, + "learning_rate": 8.62868821523133e-06, + "loss": 0.4873, + "step": 32040 + }, + { + "epoch": 1.09, + "learning_rate": 8.618125813992364e-06, + "loss": 0.53, + "step": 32060 + }, + { + "epoch": 1.09, + "learning_rate": 8.607564984211199e-06, + "loss": 0.5251, + "step": 32080 + }, + { + "epoch": 1.09, + "learning_rate": 8.597005737897534e-06, + "loss": 0.57, + "step": 32100 + }, + { + "epoch": 1.09, + "learning_rate": 8.586448087059284e-06, + "loss": 0.5491, + "step": 32120 + }, + { + "epoch": 1.09, + "learning_rate": 8.575892043702523e-06, + "loss": 0.5388, + "step": 32140 + }, + { + "epoch": 1.09, + "learning_rate": 8.565337619831516e-06, + "loss": 0.5428, + "step": 32160 + }, + { + "epoch": 1.09, + "learning_rate": 8.554784827448681e-06, + "loss": 0.5715, + "step": 32180 + }, + { + "epoch": 1.09, + "learning_rate": 8.544233678554581e-06, + "loss": 0.5013, + "step": 32200 + }, + { + "epoch": 1.09, + "learning_rate": 8.533684185147913e-06, + "loss": 0.5551, + "step": 32220 + }, + { + "epoch": 1.09, + "learning_rate": 8.52313635922548e-06, + "loss": 0.5181, + "step": 32240 + }, + { + "epoch": 1.1, + "learning_rate": 8.512590212782203e-06, + "loss": 0.5321, + "step": 32260 + }, + { + "epoch": 1.1, + "learning_rate": 8.502045757811087e-06, + "loss": 0.5279, + "step": 32280 + }, + { + "epoch": 1.1, + "learning_rate": 8.491503006303212e-06, + "loss": 0.5403, + "step": 32300 + }, + { + "epoch": 1.1, + "learning_rate": 8.480961970247722e-06, + "loss": 0.5359, + "step": 32320 + }, + { + "epoch": 1.1, + "learning_rate": 8.47042266163181e-06, + "loss": 0.533, + "step": 32340 + }, + { + "epoch": 1.1, + "learning_rate": 8.459885092440708e-06, + "loss": 0.5727, + "step": 32360 + }, + { + "epoch": 1.1, + "learning_rate": 8.449349274657665e-06, + "loss": 0.5204, + "step": 32380 + }, + { + "epoch": 1.1, + "learning_rate": 8.438815220263942e-06, + "loss": 0.5187, + "step": 32400 + }, + { + "epoch": 1.1, + "learning_rate": 8.42828294123879e-06, + "loss": 0.5268, + "step": 32420 + }, + { + "epoch": 1.1, + "learning_rate": 8.417752449559447e-06, + "loss": 0.5333, + "step": 32440 + }, + { + "epoch": 1.1, + "learning_rate": 8.407223757201113e-06, + "loss": 0.5326, + "step": 32460 + }, + { + "epoch": 1.1, + "learning_rate": 8.396696876136948e-06, + "loss": 0.5232, + "step": 32480 + }, + { + "epoch": 1.1, + "learning_rate": 8.386171818338048e-06, + "loss": 0.4898, + "step": 32500 + }, + { + "epoch": 1.1, + "learning_rate": 8.37564859577343e-06, + "loss": 0.4699, + "step": 32520 + }, + { + "epoch": 1.1, + "learning_rate": 8.365127220410037e-06, + "loss": 0.5222, + "step": 32540 + }, + { + "epoch": 1.11, + "learning_rate": 8.354607704212703e-06, + "loss": 0.5207, + "step": 32560 + }, + { + "epoch": 1.11, + "learning_rate": 8.344090059144151e-06, + "loss": 0.5525, + "step": 32580 + }, + { + "epoch": 1.11, + "learning_rate": 8.333574297164969e-06, + "loss": 0.5413, + "step": 32600 + }, + { + "epoch": 1.11, + "learning_rate": 8.323060430233613e-06, + "loss": 0.5338, + "step": 32620 + }, + { + "epoch": 1.11, + "learning_rate": 8.312548470306379e-06, + "loss": 0.535, + "step": 32640 + }, + { + "epoch": 1.11, + "learning_rate": 8.302038429337396e-06, + "loss": 0.5453, + "step": 32660 + }, + { + "epoch": 1.11, + "learning_rate": 8.291530319278614e-06, + "loss": 0.5435, + "step": 32680 + }, + { + "epoch": 1.11, + "learning_rate": 8.281024152079775e-06, + "loss": 0.5129, + "step": 32700 + }, + { + "epoch": 1.11, + "learning_rate": 8.270519939688421e-06, + "loss": 0.5429, + "step": 32720 + }, + { + "epoch": 1.11, + "learning_rate": 8.260017694049878e-06, + "loss": 0.5661, + "step": 32740 + }, + { + "epoch": 1.11, + "learning_rate": 8.249517427107225e-06, + "loss": 0.5217, + "step": 32760 + }, + { + "epoch": 1.11, + "learning_rate": 8.239019150801286e-06, + "loss": 0.4945, + "step": 32780 + }, + { + "epoch": 1.11, + "learning_rate": 8.228522877070636e-06, + "loss": 0.5153, + "step": 32800 + }, + { + "epoch": 1.11, + "learning_rate": 8.21802861785156e-06, + "loss": 0.5137, + "step": 32820 + }, + { + "epoch": 1.11, + "learning_rate": 8.207536385078057e-06, + "loss": 0.5012, + "step": 32840 + }, + { + "epoch": 1.12, + "learning_rate": 8.197046190681832e-06, + "loss": 0.5243, + "step": 32860 + }, + { + "epoch": 1.12, + "learning_rate": 8.186558046592247e-06, + "loss": 0.5136, + "step": 32880 + }, + { + "epoch": 1.12, + "learning_rate": 8.176071964736358e-06, + "loss": 0.4767, + "step": 32900 + }, + { + "epoch": 1.12, + "learning_rate": 8.16558795703886e-06, + "loss": 0.5258, + "step": 32920 + }, + { + "epoch": 1.12, + "learning_rate": 8.155106035422095e-06, + "loss": 0.4979, + "step": 32940 + }, + { + "epoch": 1.12, + "learning_rate": 8.144626211806038e-06, + "loss": 0.4596, + "step": 32960 + }, + { + "epoch": 1.12, + "learning_rate": 8.13414849810826e-06, + "loss": 0.5071, + "step": 32980 + }, + { + "epoch": 1.12, + "learning_rate": 8.123672906243955e-06, + "loss": 0.5356, + "step": 33000 + }, + { + "epoch": 1.12, + "learning_rate": 8.11319944812589e-06, + "loss": 0.5452, + "step": 33020 + }, + { + "epoch": 1.12, + "learning_rate": 8.102728135664411e-06, + "loss": 0.5393, + "step": 33040 + }, + { + "epoch": 1.12, + "learning_rate": 8.09225898076742e-06, + "loss": 0.5167, + "step": 33060 + }, + { + "epoch": 1.12, + "learning_rate": 8.08179199534037e-06, + "loss": 0.4935, + "step": 33080 + }, + { + "epoch": 1.12, + "learning_rate": 8.071327191286244e-06, + "loss": 0.547, + "step": 33100 + }, + { + "epoch": 1.12, + "learning_rate": 8.060864580505542e-06, + "loss": 0.5217, + "step": 33120 + }, + { + "epoch": 1.12, + "learning_rate": 8.050404174896282e-06, + "loss": 0.5212, + "step": 33140 + }, + { + "epoch": 1.13, + "learning_rate": 8.039945986353954e-06, + "loss": 0.4672, + "step": 33160 + }, + { + "epoch": 1.13, + "learning_rate": 8.029490026771545e-06, + "loss": 0.4622, + "step": 33180 + }, + { + "epoch": 1.13, + "learning_rate": 8.019036308039497e-06, + "loss": 0.507, + "step": 33200 + }, + { + "epoch": 1.13, + "learning_rate": 8.008584842045706e-06, + "loss": 0.5324, + "step": 33220 + }, + { + "epoch": 1.13, + "learning_rate": 7.998135640675514e-06, + "loss": 0.5084, + "step": 33240 + }, + { + "epoch": 1.13, + "learning_rate": 7.987688715811672e-06, + "loss": 0.5583, + "step": 33260 + }, + { + "epoch": 1.13, + "learning_rate": 7.977244079334357e-06, + "loss": 0.5154, + "step": 33280 + }, + { + "epoch": 1.13, + "learning_rate": 7.966801743121133e-06, + "loss": 0.5123, + "step": 33300 + }, + { + "epoch": 1.13, + "learning_rate": 7.95636171904696e-06, + "loss": 0.517, + "step": 33320 + }, + { + "epoch": 1.13, + "learning_rate": 7.945924018984155e-06, + "loss": 0.4924, + "step": 33340 + }, + { + "epoch": 1.13, + "learning_rate": 7.935488654802395e-06, + "loss": 0.5687, + "step": 33360 + }, + { + "epoch": 1.13, + "learning_rate": 7.925055638368713e-06, + "loss": 0.5503, + "step": 33380 + }, + { + "epoch": 1.13, + "learning_rate": 7.914624981547459e-06, + "loss": 0.5222, + "step": 33400 + }, + { + "epoch": 1.13, + "learning_rate": 7.90419669620031e-06, + "loss": 0.5184, + "step": 33420 + }, + { + "epoch": 1.14, + "learning_rate": 7.893770794186231e-06, + "loss": 0.5176, + "step": 33440 + }, + { + "epoch": 1.14, + "learning_rate": 7.88334728736149e-06, + "loss": 0.5423, + "step": 33460 + }, + { + "epoch": 1.14, + "learning_rate": 7.872926187579625e-06, + "loss": 0.5321, + "step": 33480 + }, + { + "epoch": 1.14, + "learning_rate": 7.862507506691444e-06, + "loss": 0.5127, + "step": 33500 + }, + { + "epoch": 1.14, + "learning_rate": 7.852091256545e-06, + "loss": 0.4715, + "step": 33520 + }, + { + "epoch": 1.14, + "learning_rate": 7.841677448985572e-06, + "loss": 0.5334, + "step": 33540 + }, + { + "epoch": 1.14, + "learning_rate": 7.831266095855675e-06, + "loss": 0.4881, + "step": 33560 + }, + { + "epoch": 1.14, + "learning_rate": 7.82085720899503e-06, + "loss": 0.5016, + "step": 33580 + }, + { + "epoch": 1.14, + "learning_rate": 7.810450800240549e-06, + "loss": 0.5255, + "step": 33600 + }, + { + "epoch": 1.14, + "learning_rate": 7.800046881426331e-06, + "loss": 0.4765, + "step": 33620 + }, + { + "epoch": 1.14, + "learning_rate": 7.789645464383635e-06, + "loss": 0.5301, + "step": 33640 + }, + { + "epoch": 1.14, + "learning_rate": 7.779246560940887e-06, + "loss": 0.4933, + "step": 33660 + }, + { + "epoch": 1.14, + "learning_rate": 7.768850182923646e-06, + "loss": 0.4941, + "step": 33680 + }, + { + "epoch": 1.14, + "learning_rate": 7.758456342154602e-06, + "loss": 0.4816, + "step": 33700 + }, + { + "epoch": 1.14, + "learning_rate": 7.748065050453557e-06, + "loss": 0.5509, + "step": 33720 + }, + { + "epoch": 1.15, + "learning_rate": 7.737676319637417e-06, + "loss": 0.5269, + "step": 33740 + }, + { + "epoch": 1.15, + "learning_rate": 7.727290161520176e-06, + "loss": 0.4682, + "step": 33760 + }, + { + "epoch": 1.15, + "learning_rate": 7.716906587912903e-06, + "loss": 0.5659, + "step": 33780 + }, + { + "epoch": 1.15, + "learning_rate": 7.706525610623726e-06, + "loss": 0.4829, + "step": 33800 + }, + { + "epoch": 1.15, + "learning_rate": 7.696147241457817e-06, + "loss": 0.5116, + "step": 33820 + }, + { + "epoch": 1.15, + "learning_rate": 7.685771492217387e-06, + "loss": 0.4765, + "step": 33840 + }, + { + "epoch": 1.15, + "learning_rate": 7.675398374701669e-06, + "loss": 0.559, + "step": 33860 + }, + { + "epoch": 1.15, + "learning_rate": 7.665027900706902e-06, + "loss": 0.5885, + "step": 33880 + }, + { + "epoch": 1.15, + "learning_rate": 7.654660082026313e-06, + "loss": 0.5742, + "step": 33900 + }, + { + "epoch": 1.15, + "learning_rate": 7.644294930450117e-06, + "loss": 0.5308, + "step": 33920 + }, + { + "epoch": 1.15, + "learning_rate": 7.633932457765491e-06, + "loss": 0.5628, + "step": 33940 + }, + { + "epoch": 1.15, + "learning_rate": 7.623572675756568e-06, + "loss": 0.527, + "step": 33960 + }, + { + "epoch": 1.15, + "learning_rate": 7.613215596204425e-06, + "loss": 0.5158, + "step": 33980 + }, + { + "epoch": 1.15, + "learning_rate": 7.602861230887055e-06, + "loss": 0.5089, + "step": 34000 + }, + { + "epoch": 1.15, + "learning_rate": 7.592509591579373e-06, + "loss": 0.5192, + "step": 34020 + }, + { + "epoch": 1.16, + "learning_rate": 7.582160690053194e-06, + "loss": 0.536, + "step": 34040 + }, + { + "epoch": 1.16, + "learning_rate": 7.571814538077216e-06, + "loss": 0.5417, + "step": 34060 + }, + { + "epoch": 1.16, + "learning_rate": 7.5614711474170155e-06, + "loss": 0.5583, + "step": 34080 + }, + { + "epoch": 1.16, + "learning_rate": 7.551130529835016e-06, + "loss": 0.5398, + "step": 34100 + }, + { + "epoch": 1.16, + "learning_rate": 7.540792697090502e-06, + "loss": 0.5044, + "step": 34120 + }, + { + "epoch": 1.16, + "learning_rate": 7.530457660939588e-06, + "loss": 0.504, + "step": 34140 + }, + { + "epoch": 1.16, + "learning_rate": 7.5201254331352056e-06, + "loss": 0.4996, + "step": 34160 + }, + { + "epoch": 1.16, + "learning_rate": 7.509796025427088e-06, + "loss": 0.534, + "step": 34180 + }, + { + "epoch": 1.16, + "learning_rate": 7.49946944956177e-06, + "loss": 0.5249, + "step": 34200 + }, + { + "epoch": 1.16, + "learning_rate": 7.489145717282559e-06, + "loss": 0.4901, + "step": 34220 + }, + { + "epoch": 1.16, + "learning_rate": 7.478824840329537e-06, + "loss": 0.4841, + "step": 34240 + }, + { + "epoch": 1.16, + "learning_rate": 7.4685068304395366e-06, + "loss": 0.4928, + "step": 34260 + }, + { + "epoch": 1.16, + "learning_rate": 7.45819169934612e-06, + "loss": 0.551, + "step": 34280 + }, + { + "epoch": 1.16, + "learning_rate": 7.4478794587795855e-06, + "loss": 0.507, + "step": 34300 + }, + { + "epoch": 1.16, + "learning_rate": 7.437570120466943e-06, + "loss": 0.4897, + "step": 34320 + }, + { + "epoch": 1.17, + "learning_rate": 7.427263696131903e-06, + "loss": 0.5578, + "step": 34340 + }, + { + "epoch": 1.17, + "learning_rate": 7.416960197494856e-06, + "loss": 0.5348, + "step": 34360 + }, + { + "epoch": 1.17, + "learning_rate": 7.406659636272872e-06, + "loss": 0.537, + "step": 34380 + }, + { + "epoch": 1.17, + "learning_rate": 7.396362024179677e-06, + "loss": 0.4799, + "step": 34400 + }, + { + "epoch": 1.17, + "learning_rate": 7.386067372925647e-06, + "loss": 0.487, + "step": 34420 + }, + { + "epoch": 1.17, + "learning_rate": 7.375775694217788e-06, + "loss": 0.5118, + "step": 34440 + }, + { + "epoch": 1.17, + "learning_rate": 7.365486999759723e-06, + "loss": 0.4961, + "step": 34460 + }, + { + "epoch": 1.17, + "learning_rate": 7.355201301251689e-06, + "loss": 0.5203, + "step": 34480 + }, + { + "epoch": 1.17, + "learning_rate": 7.344918610390506e-06, + "loss": 0.5447, + "step": 34500 + }, + { + "epoch": 1.17, + "learning_rate": 7.334638938869582e-06, + "loss": 0.5421, + "step": 34520 + }, + { + "epoch": 1.17, + "learning_rate": 7.324362298378893e-06, + "loss": 0.5051, + "step": 34540 + }, + { + "epoch": 1.17, + "learning_rate": 7.314088700604958e-06, + "loss": 0.4877, + "step": 34560 + }, + { + "epoch": 1.17, + "learning_rate": 7.303818157230841e-06, + "loss": 0.4842, + "step": 34580 + }, + { + "epoch": 1.17, + "learning_rate": 7.293550679936135e-06, + "loss": 0.532, + "step": 34600 + }, + { + "epoch": 1.18, + "learning_rate": 7.283286280396948e-06, + "loss": 0.5177, + "step": 34620 + }, + { + "epoch": 1.18, + "learning_rate": 7.27302497028588e-06, + "loss": 0.537, + "step": 34640 + }, + { + "epoch": 1.18, + "learning_rate": 7.262766761272022e-06, + "loss": 0.5476, + "step": 34660 + }, + { + "epoch": 1.18, + "learning_rate": 7.252511665020939e-06, + "loss": 0.5326, + "step": 34680 + }, + { + "epoch": 1.18, + "learning_rate": 7.242259693194658e-06, + "loss": 0.5295, + "step": 34700 + }, + { + "epoch": 1.18, + "learning_rate": 7.232010857451653e-06, + "loss": 0.6052, + "step": 34720 + }, + { + "epoch": 1.18, + "learning_rate": 7.221765169446819e-06, + "loss": 0.5554, + "step": 34740 + }, + { + "epoch": 1.18, + "learning_rate": 7.211522640831492e-06, + "loss": 0.4923, + "step": 34760 + }, + { + "epoch": 1.18, + "learning_rate": 7.2012832832534e-06, + "loss": 0.5459, + "step": 34780 + }, + { + "epoch": 1.18, + "learning_rate": 7.1910471083566725e-06, + "loss": 0.5057, + "step": 34800 + }, + { + "epoch": 1.18, + "learning_rate": 7.180814127781819e-06, + "loss": 0.4881, + "step": 34820 + }, + { + "epoch": 1.18, + "learning_rate": 7.170584353165708e-06, + "loss": 0.5223, + "step": 34840 + }, + { + "epoch": 1.18, + "learning_rate": 7.1603577961415704e-06, + "loss": 0.5601, + "step": 34860 + }, + { + "epoch": 1.18, + "learning_rate": 7.15013446833898e-06, + "loss": 0.5344, + "step": 34880 + }, + { + "epoch": 1.18, + "learning_rate": 7.139914381383837e-06, + "loss": 0.5106, + "step": 34900 + }, + { + "epoch": 1.19, + "learning_rate": 7.129697546898344e-06, + "loss": 0.5414, + "step": 34920 + }, + { + "epoch": 1.19, + "learning_rate": 7.119483976501021e-06, + "loss": 0.5041, + "step": 34940 + }, + { + "epoch": 1.19, + "learning_rate": 7.10927368180667e-06, + "loss": 0.5035, + "step": 34960 + }, + { + "epoch": 1.19, + "learning_rate": 7.0990666744263646e-06, + "loss": 0.5097, + "step": 34980 + }, + { + "epoch": 1.19, + "learning_rate": 7.0888629659674525e-06, + "loss": 0.5195, + "step": 35000 + }, + { + "epoch": 1.19, + "learning_rate": 7.07866256803351e-06, + "loss": 0.5062, + "step": 35020 + }, + { + "epoch": 1.19, + "learning_rate": 7.068465492224361e-06, + "loss": 0.514, + "step": 35040 + }, + { + "epoch": 1.19, + "learning_rate": 7.058271750136054e-06, + "loss": 0.5295, + "step": 35060 + }, + { + "epoch": 1.19, + "learning_rate": 7.04808135336084e-06, + "loss": 0.5194, + "step": 35080 + }, + { + "epoch": 1.19, + "learning_rate": 7.03789431348717e-06, + "loss": 0.5081, + "step": 35100 + }, + { + "epoch": 1.19, + "learning_rate": 7.027710642099672e-06, + "loss": 0.5042, + "step": 35120 + }, + { + "epoch": 1.19, + "learning_rate": 7.017530350779148e-06, + "loss": 0.5264, + "step": 35140 + }, + { + "epoch": 1.19, + "learning_rate": 7.007353451102557e-06, + "loss": 0.4971, + "step": 35160 + }, + { + "epoch": 1.19, + "learning_rate": 6.997179954642998e-06, + "loss": 0.5111, + "step": 35180 + }, + { + "epoch": 1.19, + "learning_rate": 6.9870098729697015e-06, + "loss": 0.5388, + "step": 35200 + }, + { + "epoch": 1.2, + "learning_rate": 6.976843217648013e-06, + "loss": 0.5412, + "step": 35220 + }, + { + "epoch": 1.2, + "learning_rate": 6.966680000239384e-06, + "loss": 0.547, + "step": 35240 + }, + { + "epoch": 1.2, + "learning_rate": 6.956520232301355e-06, + "loss": 0.5167, + "step": 35260 + }, + { + "epoch": 1.2, + "learning_rate": 6.946363925387546e-06, + "loss": 0.506, + "step": 35280 + }, + { + "epoch": 1.2, + "learning_rate": 6.936211091047636e-06, + "loss": 0.5407, + "step": 35300 + }, + { + "epoch": 1.2, + "learning_rate": 6.926061740827359e-06, + "loss": 0.5601, + "step": 35320 + }, + { + "epoch": 1.2, + "learning_rate": 6.915915886268486e-06, + "loss": 0.4812, + "step": 35340 + }, + { + "epoch": 1.2, + "learning_rate": 6.905773538908814e-06, + "loss": 0.5248, + "step": 35360 + }, + { + "epoch": 1.2, + "learning_rate": 6.895634710282153e-06, + "loss": 0.51, + "step": 35380 + }, + { + "epoch": 1.2, + "learning_rate": 6.885499411918304e-06, + "loss": 0.5256, + "step": 35400 + }, + { + "epoch": 1.2, + "learning_rate": 6.8753676553430606e-06, + "loss": 0.5107, + "step": 35420 + }, + { + "epoch": 1.2, + "learning_rate": 6.8652394520781865e-06, + "loss": 0.5276, + "step": 35440 + }, + { + "epoch": 1.2, + "learning_rate": 6.855114813641408e-06, + "loss": 0.5133, + "step": 35460 + }, + { + "epoch": 1.2, + "learning_rate": 6.844993751546388e-06, + "loss": 0.5299, + "step": 35480 + }, + { + "epoch": 1.21, + "learning_rate": 6.8348762773027285e-06, + "loss": 0.5327, + "step": 35500 + }, + { + "epoch": 1.21, + "learning_rate": 6.824762402415958e-06, + "loss": 0.5263, + "step": 35520 + }, + { + "epoch": 1.21, + "learning_rate": 6.814652138387501e-06, + "loss": 0.5323, + "step": 35540 + }, + { + "epoch": 1.21, + "learning_rate": 6.8045454967146854e-06, + "loss": 0.5296, + "step": 35560 + }, + { + "epoch": 1.21, + "learning_rate": 6.794442488890705e-06, + "loss": 0.5498, + "step": 35580 + }, + { + "epoch": 1.21, + "learning_rate": 6.784343126404636e-06, + "loss": 0.4546, + "step": 35600 + }, + { + "epoch": 1.21, + "learning_rate": 6.774247420741401e-06, + "loss": 0.5471, + "step": 35620 + }, + { + "epoch": 1.21, + "learning_rate": 6.764155383381772e-06, + "loss": 0.5375, + "step": 35640 + }, + { + "epoch": 1.21, + "learning_rate": 6.754067025802346e-06, + "loss": 0.5628, + "step": 35660 + }, + { + "epoch": 1.21, + "learning_rate": 6.743982359475525e-06, + "loss": 0.5029, + "step": 35680 + }, + { + "epoch": 1.21, + "learning_rate": 6.733901395869527e-06, + "loss": 0.4811, + "step": 35700 + }, + { + "epoch": 1.21, + "learning_rate": 6.723824146448352e-06, + "loss": 0.5296, + "step": 35720 + }, + { + "epoch": 1.21, + "learning_rate": 6.713750622671784e-06, + "loss": 0.5108, + "step": 35740 + }, + { + "epoch": 1.21, + "learning_rate": 6.7036808359953585e-06, + "loss": 0.5266, + "step": 35760 + }, + { + "epoch": 1.21, + "learning_rate": 6.693614797870371e-06, + "loss": 0.5075, + "step": 35780 + }, + { + "epoch": 1.22, + "learning_rate": 6.68355251974385e-06, + "loss": 0.5385, + "step": 35800 + }, + { + "epoch": 1.22, + "learning_rate": 6.673494013058548e-06, + "loss": 0.5294, + "step": 35820 + }, + { + "epoch": 1.22, + "learning_rate": 6.663439289252932e-06, + "loss": 0.5048, + "step": 35840 + }, + { + "epoch": 1.22, + "learning_rate": 6.653388359761161e-06, + "loss": 0.482, + "step": 35860 + }, + { + "epoch": 1.22, + "learning_rate": 6.6433412360130855e-06, + "loss": 0.4721, + "step": 35880 + }, + { + "epoch": 1.22, + "learning_rate": 6.633297929434224e-06, + "loss": 0.5274, + "step": 35900 + }, + { + "epoch": 1.22, + "learning_rate": 6.623258451445758e-06, + "loss": 0.5133, + "step": 35920 + }, + { + "epoch": 1.22, + "learning_rate": 6.613222813464508e-06, + "loss": 0.534, + "step": 35940 + }, + { + "epoch": 1.22, + "learning_rate": 6.603191026902936e-06, + "loss": 0.496, + "step": 35960 + }, + { + "epoch": 1.22, + "learning_rate": 6.59316310316912e-06, + "loss": 0.5472, + "step": 35980 + }, + { + "epoch": 1.22, + "learning_rate": 6.583139053666745e-06, + "loss": 0.4993, + "step": 36000 + }, + { + "epoch": 1.22, + "learning_rate": 6.573118889795093e-06, + "loss": 0.5548, + "step": 36020 + }, + { + "epoch": 1.22, + "learning_rate": 6.563102622949021e-06, + "loss": 0.509, + "step": 36040 + }, + { + "epoch": 1.22, + "learning_rate": 6.553090264518962e-06, + "loss": 0.5209, + "step": 36060 + }, + { + "epoch": 1.22, + "learning_rate": 6.543081825890901e-06, + "loss": 0.5229, + "step": 36080 + }, + { + "epoch": 1.23, + "learning_rate": 6.533077318446365e-06, + "loss": 0.5561, + "step": 36100 + }, + { + "epoch": 1.23, + "learning_rate": 6.523076753562411e-06, + "loss": 0.4693, + "step": 36120 + }, + { + "epoch": 1.23, + "learning_rate": 6.513080142611612e-06, + "loss": 0.4867, + "step": 36140 + }, + { + "epoch": 1.23, + "learning_rate": 6.503087496962044e-06, + "loss": 0.5337, + "step": 36160 + }, + { + "epoch": 1.23, + "learning_rate": 6.493098827977278e-06, + "loss": 0.5121, + "step": 36180 + }, + { + "epoch": 1.23, + "learning_rate": 6.483114147016358e-06, + "loss": 0.5246, + "step": 36200 + }, + { + "epoch": 1.23, + "learning_rate": 6.473133465433793e-06, + "loss": 0.5049, + "step": 36220 + }, + { + "epoch": 1.23, + "learning_rate": 6.463156794579543e-06, + "loss": 0.5014, + "step": 36240 + }, + { + "epoch": 1.23, + "learning_rate": 6.453184145799015e-06, + "loss": 0.4795, + "step": 36260 + }, + { + "epoch": 1.23, + "learning_rate": 6.4432155304330335e-06, + "loss": 0.5077, + "step": 36280 + }, + { + "epoch": 1.23, + "learning_rate": 6.433250959817845e-06, + "loss": 0.5471, + "step": 36300 + }, + { + "epoch": 1.23, + "learning_rate": 6.42329044528508e-06, + "loss": 0.5006, + "step": 36320 + }, + { + "epoch": 1.23, + "learning_rate": 6.413333998161772e-06, + "loss": 0.518, + "step": 36340 + }, + { + "epoch": 1.23, + "learning_rate": 6.4033816297703245e-06, + "loss": 0.5612, + "step": 36360 + }, + { + "epoch": 1.23, + "learning_rate": 6.393433351428502e-06, + "loss": 0.4518, + "step": 36380 + }, + { + "epoch": 1.24, + "learning_rate": 6.383489174449422e-06, + "loss": 0.5344, + "step": 36400 + }, + { + "epoch": 1.24, + "learning_rate": 6.373549110141528e-06, + "loss": 0.5152, + "step": 36420 + }, + { + "epoch": 1.24, + "learning_rate": 6.363613169808593e-06, + "loss": 0.4774, + "step": 36440 + }, + { + "epoch": 1.24, + "learning_rate": 6.353681364749703e-06, + "loss": 0.481, + "step": 36460 + }, + { + "epoch": 1.24, + "learning_rate": 6.343753706259239e-06, + "loss": 0.5457, + "step": 36480 + }, + { + "epoch": 1.24, + "learning_rate": 6.333830205626861e-06, + "loss": 0.5329, + "step": 36500 + }, + { + "epoch": 1.24, + "learning_rate": 6.323910874137509e-06, + "loss": 0.4489, + "step": 36520 + }, + { + "epoch": 1.24, + "learning_rate": 6.313995723071377e-06, + "loss": 0.4993, + "step": 36540 + }, + { + "epoch": 1.24, + "learning_rate": 6.3040847637039075e-06, + "loss": 0.5238, + "step": 36560 + }, + { + "epoch": 1.24, + "learning_rate": 6.294178007305776e-06, + "loss": 0.507, + "step": 36580 + }, + { + "epoch": 1.24, + "learning_rate": 6.284275465142875e-06, + "loss": 0.5401, + "step": 36600 + }, + { + "epoch": 1.24, + "learning_rate": 6.27437714847631e-06, + "loss": 0.4962, + "step": 36620 + }, + { + "epoch": 1.24, + "learning_rate": 6.2644830685623756e-06, + "loss": 0.4884, + "step": 36640 + }, + { + "epoch": 1.24, + "learning_rate": 6.254593236652554e-06, + "loss": 0.5003, + "step": 36660 + }, + { + "epoch": 1.25, + "learning_rate": 6.244707663993494e-06, + "loss": 0.4925, + "step": 36680 + }, + { + "epoch": 1.25, + "learning_rate": 6.2348263618269975e-06, + "loss": 0.4712, + "step": 36700 + }, + { + "epoch": 1.25, + "learning_rate": 6.224949341390017e-06, + "loss": 0.5206, + "step": 36720 + }, + { + "epoch": 1.25, + "learning_rate": 6.215076613914628e-06, + "loss": 0.4793, + "step": 36740 + }, + { + "epoch": 1.25, + "learning_rate": 6.205208190628034e-06, + "loss": 0.5046, + "step": 36760 + }, + { + "epoch": 1.25, + "learning_rate": 6.1953440827525306e-06, + "loss": 0.5188, + "step": 36780 + }, + { + "epoch": 1.25, + "learning_rate": 6.185484301505518e-06, + "loss": 0.5281, + "step": 36800 + }, + { + "epoch": 1.25, + "learning_rate": 6.175628858099471e-06, + "loss": 0.4982, + "step": 36820 + }, + { + "epoch": 1.25, + "learning_rate": 6.165777763741932e-06, + "loss": 0.5057, + "step": 36840 + }, + { + "epoch": 1.25, + "learning_rate": 6.1559310296355005e-06, + "loss": 0.5274, + "step": 36860 + }, + { + "epoch": 1.25, + "learning_rate": 6.146088666977804e-06, + "loss": 0.487, + "step": 36880 + }, + { + "epoch": 1.25, + "learning_rate": 6.1362506869615205e-06, + "loss": 0.5438, + "step": 36900 + }, + { + "epoch": 1.25, + "learning_rate": 6.126417100774329e-06, + "loss": 0.5017, + "step": 36920 + }, + { + "epoch": 1.25, + "learning_rate": 6.116587919598915e-06, + "loss": 0.5062, + "step": 36940 + }, + { + "epoch": 1.25, + "learning_rate": 6.106763154612963e-06, + "loss": 0.5229, + "step": 36960 + }, + { + "epoch": 1.26, + "learning_rate": 6.096942816989116e-06, + "loss": 0.4945, + "step": 36980 + }, + { + "epoch": 1.26, + "learning_rate": 6.0871269178949986e-06, + "loss": 0.569, + "step": 37000 + }, + { + "epoch": 1.26, + "learning_rate": 6.0773154684931875e-06, + "loss": 0.5008, + "step": 37020 + }, + { + "epoch": 1.26, + "learning_rate": 6.0675084799411975e-06, + "loss": 0.5091, + "step": 37040 + }, + { + "epoch": 1.26, + "learning_rate": 6.057705963391461e-06, + "loss": 0.5372, + "step": 37060 + }, + { + "epoch": 1.26, + "learning_rate": 6.047907929991333e-06, + "loss": 0.5228, + "step": 37080 + }, + { + "epoch": 1.26, + "learning_rate": 6.038114390883073e-06, + "loss": 0.5057, + "step": 37100 + }, + { + "epoch": 1.26, + "learning_rate": 6.028325357203822e-06, + "loss": 0.4866, + "step": 37120 + }, + { + "epoch": 1.26, + "learning_rate": 6.018540840085611e-06, + "loss": 0.5037, + "step": 37140 + }, + { + "epoch": 1.26, + "learning_rate": 6.008760850655316e-06, + "loss": 0.5303, + "step": 37160 + }, + { + "epoch": 1.26, + "learning_rate": 5.998985400034676e-06, + "loss": 0.507, + "step": 37180 + }, + { + "epoch": 1.26, + "learning_rate": 5.989214499340267e-06, + "loss": 0.4896, + "step": 37200 + }, + { + "epoch": 1.26, + "learning_rate": 5.979448159683491e-06, + "loss": 0.5309, + "step": 37220 + }, + { + "epoch": 1.26, + "learning_rate": 5.96968639217056e-06, + "loss": 0.4934, + "step": 37240 + }, + { + "epoch": 1.26, + "learning_rate": 5.959929207902488e-06, + "loss": 0.4939, + "step": 37260 + }, + { + "epoch": 1.27, + "learning_rate": 5.950176617975079e-06, + "loss": 0.5182, + "step": 37280 + }, + { + "epoch": 1.27, + "learning_rate": 5.9404286334789115e-06, + "loss": 0.5739, + "step": 37300 + }, + { + "epoch": 1.27, + "learning_rate": 5.930685265499329e-06, + "loss": 0.4918, + "step": 37320 + }, + { + "epoch": 1.27, + "learning_rate": 5.9209465251164175e-06, + "loss": 0.5235, + "step": 37340 + }, + { + "epoch": 1.27, + "learning_rate": 5.911212423405008e-06, + "loss": 0.5176, + "step": 37360 + }, + { + "epoch": 1.27, + "learning_rate": 5.901482971434652e-06, + "loss": 0.5081, + "step": 37380 + }, + { + "epoch": 1.27, + "learning_rate": 5.891758180269618e-06, + "loss": 0.4788, + "step": 37400 + }, + { + "epoch": 1.27, + "learning_rate": 5.882038060968871e-06, + "loss": 0.528, + "step": 37420 + }, + { + "epoch": 1.27, + "learning_rate": 5.872322624586061e-06, + "loss": 0.5005, + "step": 37440 + }, + { + "epoch": 1.27, + "learning_rate": 5.862611882169518e-06, + "loss": 0.4987, + "step": 37460 + }, + { + "epoch": 1.27, + "learning_rate": 5.852905844762229e-06, + "loss": 0.4889, + "step": 37480 + }, + { + "epoch": 1.27, + "learning_rate": 5.8432045234018365e-06, + "loss": 0.4652, + "step": 37500 + }, + { + "epoch": 1.27, + "learning_rate": 5.833507929120612e-06, + "loss": 0.5429, + "step": 37520 + }, + { + "epoch": 1.27, + "learning_rate": 5.823816072945458e-06, + "loss": 0.5374, + "step": 37540 + }, + { + "epoch": 1.27, + "learning_rate": 5.814128965897887e-06, + "loss": 0.4909, + "step": 37560 + }, + { + "epoch": 1.28, + "learning_rate": 5.804446618994009e-06, + "loss": 0.498, + "step": 37580 + }, + { + "epoch": 1.28, + "learning_rate": 5.794769043244527e-06, + "loss": 0.4974, + "step": 37600 + }, + { + "epoch": 1.28, + "learning_rate": 5.785096249654705e-06, + "loss": 0.4769, + "step": 37620 + }, + { + "epoch": 1.28, + "learning_rate": 5.775428249224382e-06, + "loss": 0.5101, + "step": 37640 + }, + { + "epoch": 1.28, + "learning_rate": 5.765765052947941e-06, + "loss": 0.4938, + "step": 37660 + }, + { + "epoch": 1.28, + "learning_rate": 5.756106671814301e-06, + "loss": 0.4921, + "step": 37680 + }, + { + "epoch": 1.28, + "learning_rate": 5.7464531168069096e-06, + "loss": 0.5029, + "step": 37700 + }, + { + "epoch": 1.28, + "learning_rate": 5.7368043989037206e-06, + "loss": 0.5164, + "step": 37720 + }, + { + "epoch": 1.28, + "learning_rate": 5.7271605290771895e-06, + "loss": 0.5038, + "step": 37740 + }, + { + "epoch": 1.28, + "learning_rate": 5.717521518294259e-06, + "loss": 0.5467, + "step": 37760 + }, + { + "epoch": 1.28, + "learning_rate": 5.70788737751635e-06, + "loss": 0.4912, + "step": 37780 + }, + { + "epoch": 1.28, + "learning_rate": 5.698258117699334e-06, + "loss": 0.5349, + "step": 37800 + }, + { + "epoch": 1.28, + "learning_rate": 5.68863374979354e-06, + "loss": 0.5092, + "step": 37820 + }, + { + "epoch": 1.28, + "learning_rate": 5.6790142847437355e-06, + "loss": 0.5157, + "step": 37840 + }, + { + "epoch": 1.29, + "learning_rate": 5.669399733489108e-06, + "loss": 0.5729, + "step": 37860 + }, + { + "epoch": 1.29, + "learning_rate": 5.659790106963261e-06, + "loss": 0.5483, + "step": 37880 + }, + { + "epoch": 1.29, + "learning_rate": 5.650185416094194e-06, + "loss": 0.5188, + "step": 37900 + }, + { + "epoch": 1.29, + "learning_rate": 5.640585671804296e-06, + "loss": 0.5054, + "step": 37920 + }, + { + "epoch": 1.29, + "learning_rate": 5.63099088501033e-06, + "loss": 0.5646, + "step": 37940 + }, + { + "epoch": 1.29, + "learning_rate": 5.621401066623421e-06, + "loss": 0.5179, + "step": 37960 + }, + { + "epoch": 1.29, + "learning_rate": 5.611816227549048e-06, + "loss": 0.4797, + "step": 37980 + }, + { + "epoch": 1.29, + "learning_rate": 5.602236378687018e-06, + "loss": 0.5401, + "step": 38000 + }, + { + "epoch": 1.29, + "learning_rate": 5.59266153093147e-06, + "loss": 0.4801, + "step": 38020 + }, + { + "epoch": 1.29, + "learning_rate": 5.583091695170857e-06, + "loss": 0.4999, + "step": 38040 + }, + { + "epoch": 1.29, + "learning_rate": 5.5735268822879344e-06, + "loss": 0.4958, + "step": 38060 + }, + { + "epoch": 1.29, + "learning_rate": 5.563967103159733e-06, + "loss": 0.4913, + "step": 38080 + }, + { + "epoch": 1.29, + "learning_rate": 5.554412368657572e-06, + "loss": 0.5331, + "step": 38100 + }, + { + "epoch": 1.29, + "learning_rate": 5.544862689647024e-06, + "loss": 0.5276, + "step": 38120 + }, + { + "epoch": 1.29, + "learning_rate": 5.535318076987928e-06, + "loss": 0.5382, + "step": 38140 + }, + { + "epoch": 1.3, + "learning_rate": 5.52577854153435e-06, + "loss": 0.508, + "step": 38160 + }, + { + "epoch": 1.3, + "learning_rate": 5.516244094134578e-06, + "loss": 0.5848, + "step": 38180 + }, + { + "epoch": 1.3, + "learning_rate": 5.506714745631124e-06, + "loss": 0.5551, + "step": 38200 + }, + { + "epoch": 1.3, + "learning_rate": 5.497190506860695e-06, + "loss": 0.5111, + "step": 38220 + }, + { + "epoch": 1.3, + "learning_rate": 5.487671388654192e-06, + "loss": 0.5263, + "step": 38240 + }, + { + "epoch": 1.3, + "learning_rate": 5.478157401836694e-06, + "loss": 0.5426, + "step": 38260 + }, + { + "epoch": 1.3, + "learning_rate": 5.468648557227434e-06, + "loss": 0.5187, + "step": 38280 + }, + { + "epoch": 1.3, + "learning_rate": 5.459144865639806e-06, + "loss": 0.5232, + "step": 38300 + }, + { + "epoch": 1.3, + "learning_rate": 5.449646337881345e-06, + "loss": 0.5209, + "step": 38320 + }, + { + "epoch": 1.3, + "learning_rate": 5.440152984753709e-06, + "loss": 0.4915, + "step": 38340 + }, + { + "epoch": 1.3, + "learning_rate": 5.430664817052678e-06, + "loss": 0.5327, + "step": 38360 + }, + { + "epoch": 1.3, + "learning_rate": 5.421181845568125e-06, + "loss": 0.4883, + "step": 38380 + }, + { + "epoch": 1.3, + "learning_rate": 5.411704081084025e-06, + "loss": 0.5274, + "step": 38400 + }, + { + "epoch": 1.3, + "learning_rate": 5.402231534378424e-06, + "loss": 0.5467, + "step": 38420 + }, + { + "epoch": 1.3, + "learning_rate": 5.392764216223441e-06, + "loss": 0.5268, + "step": 38440 + }, + { + "epoch": 1.31, + "learning_rate": 5.383302137385238e-06, + "loss": 0.5262, + "step": 38460 + }, + { + "epoch": 1.31, + "learning_rate": 5.373845308624028e-06, + "loss": 0.541, + "step": 38480 + }, + { + "epoch": 1.31, + "learning_rate": 5.364393740694054e-06, + "loss": 0.5145, + "step": 38500 + }, + { + "epoch": 1.31, + "learning_rate": 5.3549474443435726e-06, + "loss": 0.4997, + "step": 38520 + }, + { + "epoch": 1.31, + "learning_rate": 5.345506430314845e-06, + "loss": 0.4927, + "step": 38540 + }, + { + "epoch": 1.31, + "learning_rate": 5.33607070934413e-06, + "loss": 0.516, + "step": 38560 + }, + { + "epoch": 1.31, + "learning_rate": 5.326640292161665e-06, + "loss": 0.549, + "step": 38580 + }, + { + "epoch": 1.31, + "learning_rate": 5.317215189491651e-06, + "loss": 0.5273, + "step": 38600 + }, + { + "epoch": 1.31, + "learning_rate": 5.307795412052259e-06, + "loss": 0.5299, + "step": 38620 + }, + { + "epoch": 1.31, + "learning_rate": 5.298380970555584e-06, + "loss": 0.5154, + "step": 38640 + }, + { + "epoch": 1.31, + "learning_rate": 5.288971875707669e-06, + "loss": 0.5577, + "step": 38660 + }, + { + "epoch": 1.31, + "learning_rate": 5.279568138208473e-06, + "loss": 0.497, + "step": 38680 + }, + { + "epoch": 1.31, + "learning_rate": 5.270169768751858e-06, + "loss": 0.4864, + "step": 38700 + }, + { + "epoch": 1.31, + "learning_rate": 5.260776778025591e-06, + "loss": 0.494, + "step": 38720 + }, + { + "epoch": 1.32, + "learning_rate": 5.251389176711307e-06, + "loss": 0.4946, + "step": 38740 + }, + { + "epoch": 1.32, + "learning_rate": 5.242006975484528e-06, + "loss": 0.4608, + "step": 38760 + }, + { + "epoch": 1.32, + "learning_rate": 5.2326301850146285e-06, + "loss": 0.5286, + "step": 38780 + }, + { + "epoch": 1.32, + "learning_rate": 5.223258815964833e-06, + "loss": 0.511, + "step": 38800 + }, + { + "epoch": 1.32, + "learning_rate": 5.213892878992191e-06, + "loss": 0.5245, + "step": 38820 + }, + { + "epoch": 1.32, + "learning_rate": 5.204532384747584e-06, + "loss": 0.5, + "step": 38840 + }, + { + "epoch": 1.32, + "learning_rate": 5.195177343875701e-06, + "loss": 0.5208, + "step": 38860 + }, + { + "epoch": 1.32, + "learning_rate": 5.185827767015031e-06, + "loss": 0.5128, + "step": 38880 + }, + { + "epoch": 1.32, + "learning_rate": 5.176483664797852e-06, + "loss": 0.5312, + "step": 38900 + }, + { + "epoch": 1.32, + "learning_rate": 5.1671450478502064e-06, + "loss": 0.4755, + "step": 38920 + }, + { + "epoch": 1.32, + "learning_rate": 5.157811926791905e-06, + "loss": 0.4955, + "step": 38940 + }, + { + "epoch": 1.32, + "learning_rate": 5.1484843122365125e-06, + "loss": 0.4997, + "step": 38960 + }, + { + "epoch": 1.32, + "learning_rate": 5.139162214791322e-06, + "loss": 0.5367, + "step": 38980 + }, + { + "epoch": 1.32, + "learning_rate": 5.129845645057372e-06, + "loss": 0.5051, + "step": 39000 + }, + { + "epoch": 1.32, + "learning_rate": 5.120534613629389e-06, + "loss": 0.5145, + "step": 39020 + }, + { + "epoch": 1.33, + "learning_rate": 5.1112291310958166e-06, + "loss": 0.4681, + "step": 39040 + }, + { + "epoch": 1.33, + "learning_rate": 5.101929208038786e-06, + "loss": 0.5096, + "step": 39060 + }, + { + "epoch": 1.33, + "learning_rate": 5.09263485503411e-06, + "loss": 0.4984, + "step": 39080 + }, + { + "epoch": 1.33, + "learning_rate": 5.083346082651255e-06, + "loss": 0.4949, + "step": 39100 + }, + { + "epoch": 1.33, + "learning_rate": 5.074062901453352e-06, + "loss": 0.4977, + "step": 39120 + }, + { + "epoch": 1.33, + "learning_rate": 5.06478532199717e-06, + "loss": 0.5075, + "step": 39140 + }, + { + "epoch": 1.33, + "learning_rate": 5.05551335483311e-06, + "loss": 0.4985, + "step": 39160 + }, + { + "epoch": 1.33, + "learning_rate": 5.04624701050519e-06, + "loss": 0.4894, + "step": 39180 + }, + { + "epoch": 1.33, + "learning_rate": 5.03698629955103e-06, + "loss": 0.4938, + "step": 39200 + }, + { + "epoch": 1.33, + "learning_rate": 5.027731232501849e-06, + "loss": 0.5228, + "step": 39220 + }, + { + "epoch": 1.33, + "learning_rate": 5.018481819882446e-06, + "loss": 0.4612, + "step": 39240 + }, + { + "epoch": 1.33, + "learning_rate": 5.009238072211187e-06, + "loss": 0.5362, + "step": 39260 + }, + { + "epoch": 1.33, + "learning_rate": 5.000000000000003e-06, + "loss": 0.4859, + "step": 39280 + }, + { + "epoch": 1.33, + "learning_rate": 4.990767613754358e-06, + "loss": 0.49, + "step": 39300 + }, + { + "epoch": 1.33, + "learning_rate": 4.981540923973263e-06, + "loss": 0.5071, + "step": 39320 + }, + { + "epoch": 1.34, + "learning_rate": 4.972319941149245e-06, + "loss": 0.5129, + "step": 39340 + }, + { + "epoch": 1.34, + "learning_rate": 4.963104675768345e-06, + "loss": 0.5516, + "step": 39360 + }, + { + "epoch": 1.34, + "learning_rate": 4.953895138310091e-06, + "loss": 0.4968, + "step": 39380 + }, + { + "epoch": 1.34, + "learning_rate": 4.944691339247515e-06, + "loss": 0.5478, + "step": 39400 + }, + { + "epoch": 1.34, + "learning_rate": 4.9354932890471096e-06, + "loss": 0.5459, + "step": 39420 + }, + { + "epoch": 1.34, + "learning_rate": 4.926300998168835e-06, + "loss": 0.518, + "step": 39440 + }, + { + "epoch": 1.34, + "learning_rate": 4.917114477066104e-06, + "loss": 0.4936, + "step": 39460 + }, + { + "epoch": 1.34, + "learning_rate": 4.907933736185757e-06, + "loss": 0.4986, + "step": 39480 + }, + { + "epoch": 1.34, + "learning_rate": 4.898758785968075e-06, + "loss": 0.4951, + "step": 39500 + }, + { + "epoch": 1.34, + "learning_rate": 4.889589636846747e-06, + "loss": 0.5219, + "step": 39520 + }, + { + "epoch": 1.34, + "learning_rate": 4.880426299248867e-06, + "loss": 0.4831, + "step": 39540 + }, + { + "epoch": 1.34, + "learning_rate": 4.871268783594922e-06, + "loss": 0.503, + "step": 39560 + }, + { + "epoch": 1.34, + "learning_rate": 4.86211710029877e-06, + "loss": 0.5001, + "step": 39580 + }, + { + "epoch": 1.34, + "learning_rate": 4.852971259767642e-06, + "loss": 0.4996, + "step": 39600 + }, + { + "epoch": 1.34, + "learning_rate": 4.843831272402126e-06, + "loss": 0.469, + "step": 39620 + }, + { + "epoch": 1.35, + "learning_rate": 4.83469714859616e-06, + "loss": 0.5212, + "step": 39640 + }, + { + "epoch": 1.35, + "learning_rate": 4.825568898736999e-06, + "loss": 0.4983, + "step": 39660 + }, + { + "epoch": 1.35, + "learning_rate": 4.816446533205225e-06, + "loss": 0.5324, + "step": 39680 + }, + { + "epoch": 1.35, + "learning_rate": 4.807330062374733e-06, + "loss": 0.5249, + "step": 39700 + }, + { + "epoch": 1.35, + "learning_rate": 4.79821949661271e-06, + "loss": 0.4734, + "step": 39720 + }, + { + "epoch": 1.35, + "learning_rate": 4.789114846279631e-06, + "loss": 0.5388, + "step": 39740 + }, + { + "epoch": 1.35, + "learning_rate": 4.780016121729236e-06, + "loss": 0.5085, + "step": 39760 + }, + { + "epoch": 1.35, + "learning_rate": 4.7709233333085345e-06, + "loss": 0.5055, + "step": 39780 + }, + { + "epoch": 1.35, + "learning_rate": 4.761836491357782e-06, + "loss": 0.4652, + "step": 39800 + }, + { + "epoch": 1.35, + "learning_rate": 4.7527556062104756e-06, + "loss": 0.524, + "step": 39820 + }, + { + "epoch": 1.35, + "learning_rate": 4.7436806881933304e-06, + "loss": 0.4764, + "step": 39840 + }, + { + "epoch": 1.35, + "learning_rate": 4.734611747626285e-06, + "loss": 0.4892, + "step": 39860 + }, + { + "epoch": 1.35, + "learning_rate": 4.7255487948224754e-06, + "loss": 0.4889, + "step": 39880 + }, + { + "epoch": 1.35, + "learning_rate": 4.716491840088227e-06, + "loss": 0.533, + "step": 39900 + }, + { + "epoch": 1.36, + "learning_rate": 4.707440893723052e-06, + "loss": 0.5227, + "step": 39920 + }, + { + "epoch": 1.36, + "learning_rate": 4.698395966019615e-06, + "loss": 0.5253, + "step": 39940 + }, + { + "epoch": 1.36, + "learning_rate": 4.689357067263751e-06, + "loss": 0.5263, + "step": 39960 + }, + { + "epoch": 1.36, + "learning_rate": 4.680324207734434e-06, + "loss": 0.509, + "step": 39980 + }, + { + "epoch": 1.36, + "learning_rate": 4.671297397703766e-06, + "loss": 0.5364, + "step": 40000 + }, + { + "epoch": 1.36, + "learning_rate": 4.662276647436977e-06, + "loss": 0.4903, + "step": 40020 + }, + { + "epoch": 1.36, + "learning_rate": 4.653261967192398e-06, + "loss": 0.5374, + "step": 40040 + }, + { + "epoch": 1.36, + "learning_rate": 4.644253367221467e-06, + "loss": 0.478, + "step": 40060 + }, + { + "epoch": 1.36, + "learning_rate": 4.635250857768696e-06, + "loss": 0.4984, + "step": 40080 + }, + { + "epoch": 1.36, + "learning_rate": 4.62625444907168e-06, + "loss": 0.4795, + "step": 40100 + }, + { + "epoch": 1.36, + "learning_rate": 4.6172641513610765e-06, + "loss": 0.4602, + "step": 40120 + }, + { + "epoch": 1.36, + "learning_rate": 4.608279974860581e-06, + "loss": 0.4751, + "step": 40140 + }, + { + "epoch": 1.36, + "learning_rate": 4.599301929786943e-06, + "loss": 0.5241, + "step": 40160 + }, + { + "epoch": 1.36, + "learning_rate": 4.5903300263499315e-06, + "loss": 0.504, + "step": 40180 + }, + { + "epoch": 1.36, + "learning_rate": 4.581364274752338e-06, + "loss": 0.5306, + "step": 40200 + }, + { + "epoch": 1.37, + "learning_rate": 4.572404685189946e-06, + "loss": 0.5155, + "step": 40220 + }, + { + "epoch": 1.37, + "learning_rate": 4.563451267851542e-06, + "loss": 0.4783, + "step": 40240 + }, + { + "epoch": 1.37, + "learning_rate": 4.554504032918888e-06, + "loss": 0.464, + "step": 40260 + }, + { + "epoch": 1.37, + "learning_rate": 4.545562990566725e-06, + "loss": 0.4934, + "step": 40280 + }, + { + "epoch": 1.37, + "learning_rate": 4.536628150962744e-06, + "loss": 0.4472, + "step": 40300 + }, + { + "epoch": 1.37, + "learning_rate": 4.527699524267576e-06, + "loss": 0.4941, + "step": 40320 + }, + { + "epoch": 1.37, + "learning_rate": 4.518777120634798e-06, + "loss": 0.4424, + "step": 40340 + }, + { + "epoch": 1.37, + "learning_rate": 4.509860950210905e-06, + "loss": 0.5254, + "step": 40360 + }, + { + "epoch": 1.37, + "learning_rate": 4.500951023135308e-06, + "loss": 0.4916, + "step": 40380 + }, + { + "epoch": 1.37, + "learning_rate": 4.492047349540308e-06, + "loss": 0.5132, + "step": 40400 + }, + { + "epoch": 1.37, + "learning_rate": 4.483149939551105e-06, + "loss": 0.5309, + "step": 40420 + }, + { + "epoch": 1.37, + "learning_rate": 4.474258803285774e-06, + "loss": 0.5116, + "step": 40440 + }, + { + "epoch": 1.37, + "learning_rate": 4.465373950855249e-06, + "loss": 0.4641, + "step": 40460 + }, + { + "epoch": 1.37, + "learning_rate": 4.4564953923633295e-06, + "loss": 0.478, + "step": 40480 + }, + { + "epoch": 1.37, + "learning_rate": 4.4476231379066445e-06, + "loss": 0.4497, + "step": 40500 + }, + { + "epoch": 1.38, + "learning_rate": 4.4387571975746655e-06, + "loss": 0.5384, + "step": 40520 + }, + { + "epoch": 1.38, + "learning_rate": 4.429897581449677e-06, + "loss": 0.4797, + "step": 40540 + }, + { + "epoch": 1.38, + "learning_rate": 4.4210442996067724e-06, + "loss": 0.5034, + "step": 40560 + }, + { + "epoch": 1.38, + "learning_rate": 4.412197362113848e-06, + "loss": 0.4553, + "step": 40580 + }, + { + "epoch": 1.38, + "learning_rate": 4.403356779031572e-06, + "loss": 0.518, + "step": 40600 + }, + { + "epoch": 1.38, + "learning_rate": 4.394522560413399e-06, + "loss": 0.4733, + "step": 40620 + }, + { + "epoch": 1.38, + "learning_rate": 4.385694716305541e-06, + "loss": 0.4931, + "step": 40640 + }, + { + "epoch": 1.38, + "learning_rate": 4.376873256746962e-06, + "loss": 0.5156, + "step": 40660 + }, + { + "epoch": 1.38, + "learning_rate": 4.368058191769363e-06, + "loss": 0.5206, + "step": 40680 + }, + { + "epoch": 1.38, + "learning_rate": 4.359249531397177e-06, + "loss": 0.5079, + "step": 40700 + }, + { + "epoch": 1.38, + "learning_rate": 4.350447285647552e-06, + "loss": 0.4872, + "step": 40720 + }, + { + "epoch": 1.38, + "learning_rate": 4.341651464530341e-06, + "loss": 0.4953, + "step": 40740 + }, + { + "epoch": 1.38, + "learning_rate": 4.332862078048095e-06, + "loss": 0.5342, + "step": 40760 + }, + { + "epoch": 1.38, + "learning_rate": 4.324079136196034e-06, + "loss": 0.5427, + "step": 40780 + }, + { + "epoch": 1.38, + "learning_rate": 4.315302648962066e-06, + "loss": 0.5343, + "step": 40800 + }, + { + "epoch": 1.39, + "learning_rate": 4.30653262632675e-06, + "loss": 0.5144, + "step": 40820 + }, + { + "epoch": 1.39, + "learning_rate": 4.297769078263296e-06, + "loss": 0.4854, + "step": 40840 + }, + { + "epoch": 1.39, + "learning_rate": 4.2890120147375545e-06, + "loss": 0.5271, + "step": 40860 + }, + { + "epoch": 1.39, + "learning_rate": 4.280261445707985e-06, + "loss": 0.5053, + "step": 40880 + }, + { + "epoch": 1.39, + "learning_rate": 4.271517381125687e-06, + "loss": 0.5188, + "step": 40900 + }, + { + "epoch": 1.39, + "learning_rate": 4.262779830934346e-06, + "loss": 0.5301, + "step": 40920 + }, + { + "epoch": 1.39, + "learning_rate": 4.254048805070248e-06, + "loss": 0.4891, + "step": 40940 + }, + { + "epoch": 1.39, + "learning_rate": 4.2453243134622465e-06, + "loss": 0.4493, + "step": 40960 + }, + { + "epoch": 1.39, + "learning_rate": 4.2366063660317805e-06, + "loss": 0.4975, + "step": 40980 + }, + { + "epoch": 1.39, + "learning_rate": 4.2278949726928354e-06, + "loss": 0.5603, + "step": 41000 + }, + { + "epoch": 1.39, + "learning_rate": 4.219190143351951e-06, + "loss": 0.5002, + "step": 41020 + }, + { + "epoch": 1.39, + "learning_rate": 4.210491887908201e-06, + "loss": 0.5157, + "step": 41040 + }, + { + "epoch": 1.39, + "learning_rate": 4.201800216253176e-06, + "loss": 0.5158, + "step": 41060 + }, + { + "epoch": 1.39, + "learning_rate": 4.193115138270986e-06, + "loss": 0.5076, + "step": 41080 + }, + { + "epoch": 1.4, + "learning_rate": 4.184436663838245e-06, + "loss": 0.4968, + "step": 41100 + }, + { + "epoch": 1.4, + "learning_rate": 4.175764802824049e-06, + "loss": 0.5185, + "step": 41120 + }, + { + "epoch": 1.4, + "learning_rate": 4.167099565089989e-06, + "loss": 0.5321, + "step": 41140 + }, + { + "epoch": 1.4, + "learning_rate": 4.158440960490104e-06, + "loss": 0.4719, + "step": 41160 + }, + { + "epoch": 1.4, + "learning_rate": 4.149788998870902e-06, + "loss": 0.525, + "step": 41180 + }, + { + "epoch": 1.4, + "learning_rate": 4.141143690071335e-06, + "loss": 0.497, + "step": 41200 + }, + { + "epoch": 1.4, + "learning_rate": 4.132505043922792e-06, + "loss": 0.4672, + "step": 41220 + }, + { + "epoch": 1.4, + "learning_rate": 4.123873070249074e-06, + "loss": 0.4633, + "step": 41240 + }, + { + "epoch": 1.4, + "learning_rate": 4.115247778866407e-06, + "loss": 0.4851, + "step": 41260 + }, + { + "epoch": 1.4, + "learning_rate": 4.106629179583412e-06, + "loss": 0.4671, + "step": 41280 + }, + { + "epoch": 1.4, + "learning_rate": 4.0980172822010985e-06, + "loss": 0.4747, + "step": 41300 + }, + { + "epoch": 1.4, + "learning_rate": 4.0894120965128615e-06, + "loss": 0.5273, + "step": 41320 + }, + { + "epoch": 1.4, + "learning_rate": 4.080813632304455e-06, + "loss": 0.4695, + "step": 41340 + }, + { + "epoch": 1.4, + "learning_rate": 4.072221899353995e-06, + "loss": 0.5268, + "step": 41360 + }, + { + "epoch": 1.4, + "learning_rate": 4.063636907431942e-06, + "loss": 0.5052, + "step": 41380 + }, + { + "epoch": 1.41, + "learning_rate": 4.055058666301087e-06, + "loss": 0.5012, + "step": 41400 + }, + { + "epoch": 1.41, + "learning_rate": 4.046487185716552e-06, + "loss": 0.5278, + "step": 41420 + }, + { + "epoch": 1.41, + "learning_rate": 4.037922475425759e-06, + "loss": 0.4867, + "step": 41440 + }, + { + "epoch": 1.41, + "learning_rate": 4.029364545168438e-06, + "loss": 0.4981, + "step": 41460 + }, + { + "epoch": 1.41, + "learning_rate": 4.020813404676612e-06, + "loss": 0.4956, + "step": 41480 + }, + { + "epoch": 1.41, + "learning_rate": 4.012269063674581e-06, + "loss": 0.4961, + "step": 41500 + }, + { + "epoch": 1.41, + "learning_rate": 4.0037315318789e-06, + "loss": 0.4949, + "step": 41520 + }, + { + "epoch": 1.41, + "learning_rate": 3.9952008189984025e-06, + "loss": 0.4913, + "step": 41540 + }, + { + "epoch": 1.41, + "learning_rate": 3.986676934734151e-06, + "loss": 0.554, + "step": 41560 + }, + { + "epoch": 1.41, + "learning_rate": 3.978159888779451e-06, + "loss": 0.4984, + "step": 41580 + }, + { + "epoch": 1.41, + "learning_rate": 3.96964969081983e-06, + "loss": 0.5367, + "step": 41600 + }, + { + "epoch": 1.41, + "learning_rate": 3.961146350533021e-06, + "loss": 0.4877, + "step": 41620 + }, + { + "epoch": 1.41, + "learning_rate": 3.9526498775889645e-06, + "loss": 0.5043, + "step": 41640 + }, + { + "epoch": 1.41, + "learning_rate": 3.944160281649793e-06, + "loss": 0.4978, + "step": 41660 + }, + { + "epoch": 1.41, + "learning_rate": 3.935677572369817e-06, + "loss": 0.5095, + "step": 41680 + }, + { + "epoch": 1.42, + "learning_rate": 3.927201759395518e-06, + "loss": 0.4937, + "step": 41700 + }, + { + "epoch": 1.42, + "learning_rate": 3.918732852365524e-06, + "loss": 0.5151, + "step": 41720 + }, + { + "epoch": 1.42, + "learning_rate": 3.910270860910623e-06, + "loss": 0.4712, + "step": 41740 + }, + { + "epoch": 1.42, + "learning_rate": 3.901815794653729e-06, + "loss": 0.4514, + "step": 41760 + }, + { + "epoch": 1.42, + "learning_rate": 3.893367663209896e-06, + "loss": 0.5102, + "step": 41780 + }, + { + "epoch": 1.42, + "learning_rate": 3.884926476186269e-06, + "loss": 0.4833, + "step": 41800 + }, + { + "epoch": 1.42, + "learning_rate": 3.876492243182113e-06, + "loss": 0.4814, + "step": 41820 + }, + { + "epoch": 1.42, + "learning_rate": 3.8680649737887784e-06, + "loss": 0.5291, + "step": 41840 + }, + { + "epoch": 1.42, + "learning_rate": 3.859644677589699e-06, + "loss": 0.5029, + "step": 41860 + }, + { + "epoch": 1.42, + "learning_rate": 3.851231364160379e-06, + "loss": 0.4675, + "step": 41880 + }, + { + "epoch": 1.42, + "learning_rate": 3.842825043068377e-06, + "loss": 0.5084, + "step": 41900 + }, + { + "epoch": 1.42, + "learning_rate": 3.834425723873303e-06, + "loss": 0.4999, + "step": 41920 + }, + { + "epoch": 1.42, + "learning_rate": 3.826033416126807e-06, + "loss": 0.4788, + "step": 41940 + }, + { + "epoch": 1.42, + "learning_rate": 3.817648129372563e-06, + "loss": 0.5149, + "step": 41960 + }, + { + "epoch": 1.42, + "learning_rate": 3.809269873146263e-06, + "loss": 0.5007, + "step": 41980 + }, + { + "epoch": 1.43, + "learning_rate": 3.800898656975599e-06, + "loss": 0.477, + "step": 42000 + }, + { + "epoch": 1.43, + "learning_rate": 3.792534490380263e-06, + "loss": 0.4822, + "step": 42020 + }, + { + "epoch": 1.43, + "learning_rate": 3.7841773828719277e-06, + "loss": 0.4666, + "step": 42040 + }, + { + "epoch": 1.43, + "learning_rate": 3.775827343954239e-06, + "loss": 0.4678, + "step": 42060 + }, + { + "epoch": 1.43, + "learning_rate": 3.767484383122799e-06, + "loss": 0.4648, + "step": 42080 + }, + { + "epoch": 1.43, + "learning_rate": 3.759148509865168e-06, + "loss": 0.4941, + "step": 42100 + }, + { + "epoch": 1.43, + "learning_rate": 3.750819733660844e-06, + "loss": 0.5476, + "step": 42120 + }, + { + "epoch": 1.43, + "learning_rate": 3.7424980639812534e-06, + "loss": 0.4731, + "step": 42140 + }, + { + "epoch": 1.43, + "learning_rate": 3.734183510289742e-06, + "loss": 0.5135, + "step": 42160 + }, + { + "epoch": 1.43, + "learning_rate": 3.725876082041563e-06, + "loss": 0.498, + "step": 42180 + }, + { + "epoch": 1.43, + "learning_rate": 3.717575788683866e-06, + "loss": 0.4935, + "step": 42200 + }, + { + "epoch": 1.43, + "learning_rate": 3.7092826396556882e-06, + "loss": 0.5092, + "step": 42220 + }, + { + "epoch": 1.43, + "learning_rate": 3.700996644387944e-06, + "loss": 0.4793, + "step": 42240 + }, + { + "epoch": 1.43, + "learning_rate": 3.6927178123034034e-06, + "loss": 0.4633, + "step": 42260 + }, + { + "epoch": 1.44, + "learning_rate": 3.684446152816702e-06, + "loss": 0.5189, + "step": 42280 + }, + { + "epoch": 1.44, + "learning_rate": 3.6761816753343114e-06, + "loss": 0.4972, + "step": 42300 + }, + { + "epoch": 1.44, + "learning_rate": 3.6679243892545414e-06, + "loss": 0.5158, + "step": 42320 + }, + { + "epoch": 1.44, + "learning_rate": 3.659674303967522e-06, + "loss": 0.5912, + "step": 42340 + }, + { + "epoch": 1.44, + "learning_rate": 3.6514314288551877e-06, + "loss": 0.4993, + "step": 42360 + }, + { + "epoch": 1.44, + "learning_rate": 3.6431957732912793e-06, + "loss": 0.4731, + "step": 42380 + }, + { + "epoch": 1.44, + "learning_rate": 3.634967346641334e-06, + "loss": 0.5195, + "step": 42400 + }, + { + "epoch": 1.44, + "learning_rate": 3.6267461582626584e-06, + "loss": 0.5218, + "step": 42420 + }, + { + "epoch": 1.44, + "learning_rate": 3.6185322175043336e-06, + "loss": 0.4498, + "step": 42440 + }, + { + "epoch": 1.44, + "learning_rate": 3.6103255337071918e-06, + "loss": 0.5465, + "step": 42460 + }, + { + "epoch": 1.44, + "learning_rate": 3.602126116203819e-06, + "loss": 0.5014, + "step": 42480 + }, + { + "epoch": 1.44, + "learning_rate": 3.5939339743185376e-06, + "loss": 0.5211, + "step": 42500 + }, + { + "epoch": 1.44, + "learning_rate": 3.5857491173673976e-06, + "loss": 0.4967, + "step": 42520 + }, + { + "epoch": 1.44, + "learning_rate": 3.5775715546581546e-06, + "loss": 0.5686, + "step": 42540 + }, + { + "epoch": 1.44, + "learning_rate": 3.5694012954902803e-06, + "loss": 0.4815, + "step": 42560 + }, + { + "epoch": 1.45, + "learning_rate": 3.5612383491549384e-06, + "loss": 0.4797, + "step": 42580 + }, + { + "epoch": 1.45, + "learning_rate": 3.553082724934973e-06, + "loss": 0.4874, + "step": 42600 + }, + { + "epoch": 1.45, + "learning_rate": 3.544934432104905e-06, + "loss": 0.4657, + "step": 42620 + }, + { + "epoch": 1.45, + "learning_rate": 3.5367934799309167e-06, + "loss": 0.5194, + "step": 42640 + }, + { + "epoch": 1.45, + "learning_rate": 3.5286598776708436e-06, + "loss": 0.4837, + "step": 42660 + }, + { + "epoch": 1.45, + "learning_rate": 3.5205336345741603e-06, + "loss": 0.4811, + "step": 42680 + }, + { + "epoch": 1.45, + "learning_rate": 3.512414759881976e-06, + "loss": 0.4934, + "step": 42700 + }, + { + "epoch": 1.45, + "learning_rate": 3.5043032628270225e-06, + "loss": 0.4978, + "step": 42720 + }, + { + "epoch": 1.45, + "learning_rate": 3.49619915263363e-06, + "loss": 0.4769, + "step": 42740 + }, + { + "epoch": 1.45, + "learning_rate": 3.488102438517742e-06, + "loss": 0.5403, + "step": 42760 + }, + { + "epoch": 1.45, + "learning_rate": 3.4800131296868842e-06, + "loss": 0.5017, + "step": 42780 + }, + { + "epoch": 1.45, + "learning_rate": 3.4719312353401647e-06, + "loss": 0.4866, + "step": 42800 + }, + { + "epoch": 1.45, + "learning_rate": 3.4638567646682576e-06, + "loss": 0.4903, + "step": 42820 + }, + { + "epoch": 1.45, + "learning_rate": 3.4557897268533935e-06, + "loss": 0.4781, + "step": 42840 + }, + { + "epoch": 1.45, + "learning_rate": 3.4477301310693544e-06, + "loss": 0.4909, + "step": 42860 + }, + { + "epoch": 1.46, + "learning_rate": 3.4396779864814568e-06, + "loss": 0.5015, + "step": 42880 + }, + { + "epoch": 1.46, + "learning_rate": 3.4316333022465465e-06, + "loss": 0.4896, + "step": 42900 + }, + { + "epoch": 1.46, + "learning_rate": 3.423596087512976e-06, + "loss": 0.5027, + "step": 42920 + }, + { + "epoch": 1.46, + "learning_rate": 3.415566351420615e-06, + "loss": 0.497, + "step": 42940 + }, + { + "epoch": 1.46, + "learning_rate": 3.407544103100824e-06, + "loss": 0.4664, + "step": 42960 + }, + { + "epoch": 1.46, + "learning_rate": 3.399529351676447e-06, + "loss": 0.4745, + "step": 42980 + }, + { + "epoch": 1.46, + "learning_rate": 3.39152210626181e-06, + "loss": 0.5351, + "step": 43000 + }, + { + "epoch": 1.46, + "learning_rate": 3.3835223759626845e-06, + "loss": 0.4961, + "step": 43020 + }, + { + "epoch": 1.46, + "learning_rate": 3.3755301698763197e-06, + "loss": 0.5245, + "step": 43040 + }, + { + "epoch": 1.46, + "learning_rate": 3.367545497091396e-06, + "loss": 0.5402, + "step": 43060 + }, + { + "epoch": 1.46, + "learning_rate": 3.3595683666880286e-06, + "loss": 0.5098, + "step": 43080 + }, + { + "epoch": 1.46, + "learning_rate": 3.3515987877377498e-06, + "loss": 0.4786, + "step": 43100 + }, + { + "epoch": 1.46, + "learning_rate": 3.343636769303512e-06, + "loss": 0.4555, + "step": 43120 + }, + { + "epoch": 1.46, + "learning_rate": 3.3356823204396706e-06, + "loss": 0.4998, + "step": 43140 + }, + { + "epoch": 1.47, + "learning_rate": 3.3277354501919677e-06, + "loss": 0.4714, + "step": 43160 + }, + { + "epoch": 1.47, + "learning_rate": 3.319796167597533e-06, + "loss": 0.4957, + "step": 43180 + }, + { + "epoch": 1.47, + "learning_rate": 3.3118644816848577e-06, + "loss": 0.4871, + "step": 43200 + }, + { + "epoch": 1.47, + "learning_rate": 3.303940401473804e-06, + "loss": 0.4978, + "step": 43220 + }, + { + "epoch": 1.47, + "learning_rate": 3.2960239359755808e-06, + "loss": 0.5106, + "step": 43240 + }, + { + "epoch": 1.47, + "learning_rate": 3.288115094192734e-06, + "loss": 0.4918, + "step": 43260 + }, + { + "epoch": 1.47, + "learning_rate": 3.2802138851191558e-06, + "loss": 0.5428, + "step": 43280 + }, + { + "epoch": 1.47, + "learning_rate": 3.2723203177400352e-06, + "loss": 0.5042, + "step": 43300 + }, + { + "epoch": 1.47, + "learning_rate": 3.264434401031887e-06, + "loss": 0.4565, + "step": 43320 + }, + { + "epoch": 1.47, + "learning_rate": 3.2565561439625205e-06, + "loss": 0.5296, + "step": 43340 + }, + { + "epoch": 1.47, + "learning_rate": 3.248685555491039e-06, + "loss": 0.4808, + "step": 43360 + }, + { + "epoch": 1.47, + "learning_rate": 3.240822644567815e-06, + "loss": 0.5009, + "step": 43380 + }, + { + "epoch": 1.47, + "learning_rate": 3.2329674201344997e-06, + "loss": 0.522, + "step": 43400 + }, + { + "epoch": 1.47, + "learning_rate": 3.2251198911240002e-06, + "loss": 0.5334, + "step": 43420 + }, + { + "epoch": 1.47, + "learning_rate": 3.217280066460472e-06, + "loss": 0.4565, + "step": 43440 + }, + { + "epoch": 1.48, + "learning_rate": 3.2094479550593107e-06, + "loss": 0.5016, + "step": 43460 + }, + { + "epoch": 1.48, + "learning_rate": 3.201623565827139e-06, + "loss": 0.4863, + "step": 43480 + }, + { + "epoch": 1.48, + "learning_rate": 3.1938069076617983e-06, + "loss": 0.4797, + "step": 43500 + }, + { + "epoch": 1.48, + "learning_rate": 3.1859979894523375e-06, + "loss": 0.5096, + "step": 43520 + }, + { + "epoch": 1.48, + "learning_rate": 3.1781968200790094e-06, + "loss": 0.5187, + "step": 43540 + }, + { + "epoch": 1.48, + "learning_rate": 3.170403408413243e-06, + "loss": 0.5091, + "step": 43560 + }, + { + "epoch": 1.48, + "learning_rate": 3.1626177633176557e-06, + "loss": 0.5026, + "step": 43580 + }, + { + "epoch": 1.48, + "learning_rate": 3.154839893646028e-06, + "loss": 0.5275, + "step": 43600 + }, + { + "epoch": 1.48, + "learning_rate": 3.1470698082433014e-06, + "loss": 0.4637, + "step": 43620 + }, + { + "epoch": 1.48, + "learning_rate": 3.139307515945563e-06, + "loss": 0.4792, + "step": 43640 + }, + { + "epoch": 1.48, + "learning_rate": 3.1315530255800384e-06, + "loss": 0.5096, + "step": 43660 + }, + { + "epoch": 1.48, + "learning_rate": 3.1238063459650802e-06, + "loss": 0.5076, + "step": 43680 + }, + { + "epoch": 1.48, + "learning_rate": 3.116067485910159e-06, + "loss": 0.4875, + "step": 43700 + }, + { + "epoch": 1.48, + "learning_rate": 3.1083364542158535e-06, + "loss": 0.5021, + "step": 43720 + }, + { + "epoch": 1.48, + "learning_rate": 3.1006132596738426e-06, + "loss": 0.5041, + "step": 43740 + }, + { + "epoch": 1.49, + "learning_rate": 3.092897911066883e-06, + "loss": 0.4867, + "step": 43760 + }, + { + "epoch": 1.49, + "learning_rate": 3.0851904171688196e-06, + "loss": 0.5268, + "step": 43780 + }, + { + "epoch": 1.49, + "learning_rate": 3.0774907867445623e-06, + "loss": 0.5006, + "step": 43800 + }, + { + "epoch": 1.49, + "learning_rate": 3.0697990285500802e-06, + "loss": 0.4879, + "step": 43820 + }, + { + "epoch": 1.49, + "learning_rate": 3.06211515133238e-06, + "loss": 0.5086, + "step": 43840 + }, + { + "epoch": 1.49, + "learning_rate": 3.054439163829519e-06, + "loss": 0.4727, + "step": 43860 + }, + { + "epoch": 1.49, + "learning_rate": 3.0467710747705735e-06, + "loss": 0.4926, + "step": 43880 + }, + { + "epoch": 1.49, + "learning_rate": 3.0391108928756486e-06, + "loss": 0.508, + "step": 43900 + }, + { + "epoch": 1.49, + "learning_rate": 3.0314586268558486e-06, + "loss": 0.476, + "step": 43920 + }, + { + "epoch": 1.49, + "learning_rate": 3.0238142854132722e-06, + "loss": 0.5425, + "step": 43940 + }, + { + "epoch": 1.49, + "learning_rate": 3.0161778772410153e-06, + "loss": 0.4988, + "step": 43960 + }, + { + "epoch": 1.49, + "learning_rate": 3.0085494110231493e-06, + "loss": 0.4909, + "step": 43980 + }, + { + "epoch": 1.49, + "learning_rate": 3.0009288954347115e-06, + "loss": 0.5108, + "step": 44000 + }, + { + "epoch": 1.49, + "learning_rate": 2.9933163391417042e-06, + "loss": 0.4601, + "step": 44020 + }, + { + "epoch": 1.49, + "learning_rate": 2.985711750801068e-06, + "loss": 0.5138, + "step": 44040 + }, + { + "epoch": 1.5, + "learning_rate": 2.97811513906069e-06, + "loss": 0.4738, + "step": 44060 + }, + { + "epoch": 1.5, + "learning_rate": 2.9705265125593863e-06, + "loss": 0.5138, + "step": 44080 + }, + { + "epoch": 1.5, + "learning_rate": 2.9629458799268897e-06, + "loss": 0.4846, + "step": 44100 + }, + { + "epoch": 1.5, + "learning_rate": 2.955373249783844e-06, + "loss": 0.484, + "step": 44120 + }, + { + "epoch": 1.5, + "learning_rate": 2.947808630741791e-06, + "loss": 0.5328, + "step": 44140 + }, + { + "epoch": 1.5, + "learning_rate": 2.9402520314031646e-06, + "loss": 0.4963, + "step": 44160 + }, + { + "epoch": 1.5, + "learning_rate": 2.932703460361277e-06, + "loss": 0.434, + "step": 44180 + }, + { + "epoch": 1.5, + "learning_rate": 2.9251629262003135e-06, + "loss": 0.4835, + "step": 44200 + }, + { + "epoch": 1.5, + "learning_rate": 2.9176304374953123e-06, + "loss": 0.5047, + "step": 44220 + }, + { + "epoch": 1.5, + "learning_rate": 2.910106002812172e-06, + "loss": 0.4763, + "step": 44240 + }, + { + "epoch": 1.5, + "learning_rate": 2.902589630707624e-06, + "loss": 0.4903, + "step": 44260 + }, + { + "epoch": 1.5, + "learning_rate": 2.895081329729239e-06, + "loss": 0.4682, + "step": 44280 + }, + { + "epoch": 1.5, + "learning_rate": 2.8875811084154025e-06, + "loss": 0.4581, + "step": 44300 + }, + { + "epoch": 1.5, + "learning_rate": 2.8800889752953156e-06, + "loss": 0.4711, + "step": 44320 + }, + { + "epoch": 1.51, + "learning_rate": 2.872604938888981e-06, + "loss": 0.5166, + "step": 44340 + }, + { + "epoch": 1.51, + "learning_rate": 2.8651290077071914e-06, + "loss": 0.5098, + "step": 44360 + }, + { + "epoch": 1.51, + "learning_rate": 2.8576611902515283e-06, + "loss": 0.4655, + "step": 44380 + }, + { + "epoch": 1.51, + "learning_rate": 2.8502014950143376e-06, + "loss": 0.4898, + "step": 44400 + }, + { + "epoch": 1.51, + "learning_rate": 2.842749930478734e-06, + "loss": 0.5059, + "step": 44420 + }, + { + "epoch": 1.51, + "learning_rate": 2.8353065051185867e-06, + "loss": 0.4923, + "step": 44440 + }, + { + "epoch": 1.51, + "learning_rate": 2.827871227398508e-06, + "loss": 0.5248, + "step": 44460 + }, + { + "epoch": 1.51, + "learning_rate": 2.820444105773845e-06, + "loss": 0.4879, + "step": 44480 + }, + { + "epoch": 1.51, + "learning_rate": 2.8130251486906666e-06, + "loss": 0.4507, + "step": 44500 + }, + { + "epoch": 1.51, + "learning_rate": 2.805614364585758e-06, + "loss": 0.5031, + "step": 44520 + }, + { + "epoch": 1.51, + "learning_rate": 2.7982117618866177e-06, + "loss": 0.4808, + "step": 44540 + }, + { + "epoch": 1.51, + "learning_rate": 2.790817349011432e-06, + "loss": 0.4506, + "step": 44560 + }, + { + "epoch": 1.51, + "learning_rate": 2.783431134369079e-06, + "loss": 0.5022, + "step": 44580 + }, + { + "epoch": 1.51, + "learning_rate": 2.776053126359105e-06, + "loss": 0.5251, + "step": 44600 + }, + { + "epoch": 1.51, + "learning_rate": 2.7686833333717334e-06, + "loss": 0.4724, + "step": 44620 + }, + { + "epoch": 1.52, + "learning_rate": 2.761321763787841e-06, + "loss": 0.4463, + "step": 44640 + }, + { + "epoch": 1.52, + "learning_rate": 2.753968425978958e-06, + "loss": 0.457, + "step": 44660 + }, + { + "epoch": 1.52, + "learning_rate": 2.746623328307243e-06, + "loss": 0.4865, + "step": 44680 + }, + { + "epoch": 1.52, + "learning_rate": 2.739286479125494e-06, + "loss": 0.5148, + "step": 44700 + }, + { + "epoch": 1.52, + "learning_rate": 2.7319578867771237e-06, + "loss": 0.493, + "step": 44720 + }, + { + "epoch": 1.52, + "learning_rate": 2.7246375595961584e-06, + "loss": 0.4611, + "step": 44740 + }, + { + "epoch": 1.52, + "learning_rate": 2.7173255059072234e-06, + "loss": 0.4909, + "step": 44760 + }, + { + "epoch": 1.52, + "learning_rate": 2.7100217340255352e-06, + "loss": 0.5355, + "step": 44780 + }, + { + "epoch": 1.52, + "learning_rate": 2.7027262522568944e-06, + "loss": 0.4923, + "step": 44800 + }, + { + "epoch": 1.52, + "learning_rate": 2.6954390688976706e-06, + "loss": 0.5313, + "step": 44820 + }, + { + "epoch": 1.52, + "learning_rate": 2.6881601922348e-06, + "loss": 0.4939, + "step": 44840 + }, + { + "epoch": 1.52, + "learning_rate": 2.6808896305457733e-06, + "loss": 0.529, + "step": 44860 + }, + { + "epoch": 1.52, + "learning_rate": 2.6736273920986166e-06, + "loss": 0.4512, + "step": 44880 + }, + { + "epoch": 1.52, + "learning_rate": 2.6663734851519017e-06, + "loss": 0.4795, + "step": 44900 + }, + { + "epoch": 1.52, + "learning_rate": 2.65912791795472e-06, + "loss": 0.5323, + "step": 44920 + }, + { + "epoch": 1.53, + "learning_rate": 2.651890698746682e-06, + "loss": 0.4636, + "step": 44940 + }, + { + "epoch": 1.53, + "learning_rate": 2.6446618357579035e-06, + "loss": 0.4466, + "step": 44960 + }, + { + "epoch": 1.53, + "learning_rate": 2.6374413372089968e-06, + "loss": 0.5066, + "step": 44980 + }, + { + "epoch": 1.53, + "learning_rate": 2.630229211311064e-06, + "loss": 0.4642, + "step": 45000 + }, + { + "epoch": 1.53, + "learning_rate": 2.623025466265684e-06, + "loss": 0.4945, + "step": 45020 + }, + { + "epoch": 1.53, + "learning_rate": 2.6158301102649085e-06, + "loss": 0.4825, + "step": 45040 + }, + { + "epoch": 1.53, + "learning_rate": 2.6086431514912438e-06, + "loss": 0.4915, + "step": 45060 + }, + { + "epoch": 1.53, + "learning_rate": 2.6014645981176488e-06, + "loss": 0.4504, + "step": 45080 + }, + { + "epoch": 1.53, + "learning_rate": 2.594294458307528e-06, + "loss": 0.4665, + "step": 45100 + }, + { + "epoch": 1.53, + "learning_rate": 2.5871327402147173e-06, + "loss": 0.5008, + "step": 45120 + }, + { + "epoch": 1.53, + "learning_rate": 2.5799794519834632e-06, + "loss": 0.4345, + "step": 45140 + }, + { + "epoch": 1.53, + "learning_rate": 2.5728346017484453e-06, + "loss": 0.4824, + "step": 45160 + }, + { + "epoch": 1.53, + "learning_rate": 2.565698197634735e-06, + "loss": 0.5238, + "step": 45180 + }, + { + "epoch": 1.53, + "learning_rate": 2.5585702477578024e-06, + "loss": 0.4494, + "step": 45200 + }, + { + "epoch": 1.53, + "learning_rate": 2.5514507602235063e-06, + "loss": 0.4856, + "step": 45220 + }, + { + "epoch": 1.54, + "learning_rate": 2.5443397431280703e-06, + "loss": 0.4827, + "step": 45240 + }, + { + "epoch": 1.54, + "learning_rate": 2.5372372045581006e-06, + "loss": 0.4832, + "step": 45260 + }, + { + "epoch": 1.54, + "learning_rate": 2.530143152590553e-06, + "loss": 0.4766, + "step": 45280 + }, + { + "epoch": 1.54, + "learning_rate": 2.523057595292734e-06, + "loss": 0.544, + "step": 45300 + }, + { + "epoch": 1.54, + "learning_rate": 2.515980540722295e-06, + "loss": 0.4732, + "step": 45320 + }, + { + "epoch": 1.54, + "learning_rate": 2.508911996927207e-06, + "loss": 0.478, + "step": 45340 + }, + { + "epoch": 1.54, + "learning_rate": 2.5018519719457725e-06, + "loss": 0.5033, + "step": 45360 + }, + { + "epoch": 1.54, + "learning_rate": 2.4948004738066e-06, + "loss": 0.5514, + "step": 45380 + }, + { + "epoch": 1.54, + "learning_rate": 2.4877575105286146e-06, + "loss": 0.515, + "step": 45400 + }, + { + "epoch": 1.54, + "learning_rate": 2.4807230901210155e-06, + "loss": 0.4947, + "step": 45420 + }, + { + "epoch": 1.54, + "learning_rate": 2.4736972205832997e-06, + "loss": 0.4861, + "step": 45440 + }, + { + "epoch": 1.54, + "learning_rate": 2.4666799099052384e-06, + "loss": 0.4854, + "step": 45460 + }, + { + "epoch": 1.54, + "learning_rate": 2.459671166066869e-06, + "loss": 0.4906, + "step": 45480 + }, + { + "epoch": 1.54, + "learning_rate": 2.45267099703849e-06, + "loss": 0.5068, + "step": 45500 + }, + { + "epoch": 1.55, + "learning_rate": 2.445679410780638e-06, + "loss": 0.5006, + "step": 45520 + }, + { + "epoch": 1.55, + "learning_rate": 2.4386964152441016e-06, + "loss": 0.4426, + "step": 45540 + }, + { + "epoch": 1.55, + "learning_rate": 2.431722018369894e-06, + "loss": 0.5048, + "step": 45560 + }, + { + "epoch": 1.55, + "learning_rate": 2.424756228089251e-06, + "loss": 0.4887, + "step": 45580 + }, + { + "epoch": 1.55, + "learning_rate": 2.4177990523236216e-06, + "loss": 0.4839, + "step": 45600 + }, + { + "epoch": 1.55, + "learning_rate": 2.410850498984659e-06, + "loss": 0.4907, + "step": 45620 + }, + { + "epoch": 1.55, + "learning_rate": 2.4039105759742098e-06, + "loss": 0.5128, + "step": 45640 + }, + { + "epoch": 1.55, + "learning_rate": 2.396979291184306e-06, + "loss": 0.5183, + "step": 45660 + }, + { + "epoch": 1.55, + "learning_rate": 2.3900566524971593e-06, + "loss": 0.4374, + "step": 45680 + }, + { + "epoch": 1.55, + "learning_rate": 2.3831426677851423e-06, + "loss": 0.5259, + "step": 45700 + }, + { + "epoch": 1.55, + "learning_rate": 2.3762373449107935e-06, + "loss": 0.4733, + "step": 45720 + }, + { + "epoch": 1.55, + "learning_rate": 2.369340691726798e-06, + "loss": 0.4627, + "step": 45740 + }, + { + "epoch": 1.55, + "learning_rate": 2.3624527160759812e-06, + "loss": 0.4745, + "step": 45760 + }, + { + "epoch": 1.55, + "learning_rate": 2.3555734257913045e-06, + "loss": 0.4938, + "step": 45780 + }, + { + "epoch": 1.55, + "learning_rate": 2.3487028286958458e-06, + "loss": 0.4779, + "step": 45800 + }, + { + "epoch": 1.56, + "learning_rate": 2.3418409326028037e-06, + "loss": 0.5329, + "step": 45820 + }, + { + "epoch": 1.56, + "learning_rate": 2.334987745315478e-06, + "loss": 0.5157, + "step": 45840 + }, + { + "epoch": 1.56, + "learning_rate": 2.328143274627266e-06, + "loss": 0.4395, + "step": 45860 + }, + { + "epoch": 1.56, + "learning_rate": 2.3213075283216547e-06, + "loss": 0.4928, + "step": 45880 + }, + { + "epoch": 1.56, + "learning_rate": 2.3144805141722038e-06, + "loss": 0.4894, + "step": 45900 + }, + { + "epoch": 1.56, + "learning_rate": 2.3076622399425485e-06, + "loss": 0.486, + "step": 45920 + }, + { + "epoch": 1.56, + "learning_rate": 2.3008527133863836e-06, + "loss": 0.4598, + "step": 45940 + }, + { + "epoch": 1.56, + "learning_rate": 2.294051942247457e-06, + "loss": 0.4787, + "step": 45960 + }, + { + "epoch": 1.56, + "learning_rate": 2.2872599342595557e-06, + "loss": 0.5415, + "step": 45980 + }, + { + "epoch": 1.56, + "learning_rate": 2.2804766971465054e-06, + "loss": 0.5304, + "step": 46000 + }, + { + "epoch": 1.56, + "learning_rate": 2.2737022386221552e-06, + "loss": 0.5048, + "step": 46020 + }, + { + "epoch": 1.56, + "learning_rate": 2.2669365663903764e-06, + "loss": 0.5102, + "step": 46040 + }, + { + "epoch": 1.56, + "learning_rate": 2.2601796881450468e-06, + "loss": 0.5122, + "step": 46060 + }, + { + "epoch": 1.56, + "learning_rate": 2.253431611570035e-06, + "loss": 0.4871, + "step": 46080 + }, + { + "epoch": 1.56, + "learning_rate": 2.24669234433921e-06, + "loss": 0.465, + "step": 46100 + }, + { + "epoch": 1.57, + "learning_rate": 2.2399618941164202e-06, + "loss": 0.4898, + "step": 46120 + }, + { + "epoch": 1.57, + "learning_rate": 2.2332402685554876e-06, + "loss": 0.4535, + "step": 46140 + }, + { + "epoch": 1.57, + "learning_rate": 2.2265274753002008e-06, + "loss": 0.4957, + "step": 46160 + }, + { + "epoch": 1.57, + "learning_rate": 2.2198235219842967e-06, + "loss": 0.5043, + "step": 46180 + }, + { + "epoch": 1.57, + "learning_rate": 2.213128416231468e-06, + "loss": 0.4956, + "step": 46200 + }, + { + "epoch": 1.57, + "learning_rate": 2.2064421656553415e-06, + "loss": 0.474, + "step": 46220 + }, + { + "epoch": 1.57, + "learning_rate": 2.199764777859477e-06, + "loss": 0.5202, + "step": 46240 + }, + { + "epoch": 1.57, + "learning_rate": 2.1930962604373517e-06, + "loss": 0.4743, + "step": 46260 + }, + { + "epoch": 1.57, + "learning_rate": 2.1864366209723597e-06, + "loss": 0.5217, + "step": 46280 + }, + { + "epoch": 1.57, + "learning_rate": 2.179785867037797e-06, + "loss": 0.544, + "step": 46300 + }, + { + "epoch": 1.57, + "learning_rate": 2.1731440061968536e-06, + "loss": 0.5203, + "step": 46320 + }, + { + "epoch": 1.57, + "learning_rate": 2.166511046002612e-06, + "loss": 0.4537, + "step": 46340 + }, + { + "epoch": 1.57, + "learning_rate": 2.1598869939980225e-06, + "loss": 0.5019, + "step": 46360 + }, + { + "epoch": 1.57, + "learning_rate": 2.153271857715915e-06, + "loss": 0.4725, + "step": 46380 + }, + { + "epoch": 1.58, + "learning_rate": 2.1466656446789757e-06, + "loss": 0.5213, + "step": 46400 + }, + { + "epoch": 1.58, + "learning_rate": 2.140068362399745e-06, + "loss": 0.4668, + "step": 46420 + }, + { + "epoch": 1.58, + "learning_rate": 2.133480018380608e-06, + "loss": 0.4751, + "step": 46440 + }, + { + "epoch": 1.58, + "learning_rate": 2.1269006201137833e-06, + "loss": 0.4841, + "step": 46460 + }, + { + "epoch": 1.58, + "learning_rate": 2.1203301750813166e-06, + "loss": 0.4737, + "step": 46480 + }, + { + "epoch": 1.58, + "learning_rate": 2.1137686907550737e-06, + "loss": 0.4682, + "step": 46500 + }, + { + "epoch": 1.58, + "learning_rate": 2.1072161745967313e-06, + "loss": 0.516, + "step": 46520 + }, + { + "epoch": 1.58, + "learning_rate": 2.1006726340577597e-06, + "loss": 0.4704, + "step": 46540 + }, + { + "epoch": 1.58, + "learning_rate": 2.094138076579433e-06, + "loss": 0.4632, + "step": 46560 + }, + { + "epoch": 1.58, + "learning_rate": 2.0876125095928033e-06, + "loss": 0.4756, + "step": 46580 + }, + { + "epoch": 1.58, + "learning_rate": 2.0810959405187005e-06, + "loss": 0.4814, + "step": 46600 + }, + { + "epoch": 1.58, + "learning_rate": 2.0745883767677246e-06, + "loss": 0.5147, + "step": 46620 + }, + { + "epoch": 1.58, + "learning_rate": 2.068089825740225e-06, + "loss": 0.489, + "step": 46640 + }, + { + "epoch": 1.58, + "learning_rate": 2.0616002948263157e-06, + "loss": 0.4748, + "step": 46660 + }, + { + "epoch": 1.58, + "learning_rate": 2.0551197914058464e-06, + "loss": 0.4802, + "step": 46680 + }, + { + "epoch": 1.59, + "learning_rate": 2.0486483228484012e-06, + "loss": 0.494, + "step": 46700 + }, + { + "epoch": 1.59, + "learning_rate": 2.0421858965132856e-06, + "loss": 0.4977, + "step": 46720 + }, + { + "epoch": 1.59, + "learning_rate": 2.035732519749528e-06, + "loss": 0.4961, + "step": 46740 + }, + { + "epoch": 1.59, + "learning_rate": 2.0292881998958634e-06, + "loss": 0.4755, + "step": 46760 + }, + { + "epoch": 1.59, + "learning_rate": 2.022852944280731e-06, + "loss": 0.4676, + "step": 46780 + }, + { + "epoch": 1.59, + "learning_rate": 2.016426760222259e-06, + "loss": 0.4617, + "step": 46800 + }, + { + "epoch": 1.59, + "learning_rate": 2.0100096550282543e-06, + "loss": 0.4792, + "step": 46820 + }, + { + "epoch": 1.59, + "learning_rate": 2.0036016359962084e-06, + "loss": 0.4592, + "step": 46840 + }, + { + "epoch": 1.59, + "learning_rate": 1.9972027104132763e-06, + "loss": 0.4831, + "step": 46860 + }, + { + "epoch": 1.59, + "learning_rate": 1.990812885556269e-06, + "loss": 0.495, + "step": 46880 + }, + { + "epoch": 1.59, + "learning_rate": 1.9844321686916602e-06, + "loss": 0.5107, + "step": 46900 + }, + { + "epoch": 1.59, + "learning_rate": 1.9780605670755473e-06, + "loss": 0.4834, + "step": 46920 + }, + { + "epoch": 1.59, + "learning_rate": 1.971698087953676e-06, + "loss": 0.5159, + "step": 46940 + }, + { + "epoch": 1.59, + "learning_rate": 1.965344738561414e-06, + "loss": 0.4884, + "step": 46960 + }, + { + "epoch": 1.59, + "learning_rate": 1.9590005261237475e-06, + "loss": 0.4492, + "step": 46980 + }, + { + "epoch": 1.6, + "learning_rate": 1.9526654578552673e-06, + "loss": 0.4833, + "step": 47000 + }, + { + "epoch": 1.6, + "learning_rate": 1.9463395409601713e-06, + "loss": 0.4534, + "step": 47020 + }, + { + "epoch": 1.6, + "learning_rate": 1.940022782632248e-06, + "loss": 0.4993, + "step": 47040 + }, + { + "epoch": 1.6, + "learning_rate": 1.9337151900548734e-06, + "loss": 0.4624, + "step": 47060 + }, + { + "epoch": 1.6, + "learning_rate": 1.9274167704009973e-06, + "loss": 0.481, + "step": 47080 + }, + { + "epoch": 1.6, + "learning_rate": 1.921127530833138e-06, + "loss": 0.5272, + "step": 47100 + }, + { + "epoch": 1.6, + "learning_rate": 1.9148474785033768e-06, + "loss": 0.4598, + "step": 47120 + }, + { + "epoch": 1.6, + "learning_rate": 1.908576620553345e-06, + "loss": 0.4906, + "step": 47140 + }, + { + "epoch": 1.6, + "learning_rate": 1.902314964114219e-06, + "loss": 0.4916, + "step": 47160 + }, + { + "epoch": 1.6, + "learning_rate": 1.8960625163067158e-06, + "loss": 0.5078, + "step": 47180 + }, + { + "epoch": 1.6, + "learning_rate": 1.889819284241069e-06, + "loss": 0.4725, + "step": 47200 + }, + { + "epoch": 1.6, + "learning_rate": 1.8835852750170414e-06, + "loss": 0.4765, + "step": 47220 + }, + { + "epoch": 1.6, + "learning_rate": 1.8773604957239067e-06, + "loss": 0.4832, + "step": 47240 + }, + { + "epoch": 1.6, + "learning_rate": 1.8711449534404424e-06, + "loss": 0.5203, + "step": 47260 + }, + { + "epoch": 1.6, + "learning_rate": 1.8649386552349136e-06, + "loss": 0.472, + "step": 47280 + }, + { + "epoch": 1.61, + "learning_rate": 1.8587416081650866e-06, + "loss": 0.4643, + "step": 47300 + }, + { + "epoch": 1.61, + "learning_rate": 1.8525538192782e-06, + "loss": 0.4834, + "step": 47320 + }, + { + "epoch": 1.61, + "learning_rate": 1.8463752956109616e-06, + "loss": 0.5216, + "step": 47340 + }, + { + "epoch": 1.61, + "learning_rate": 1.8402060441895508e-06, + "loss": 0.5254, + "step": 47360 + }, + { + "epoch": 1.61, + "learning_rate": 1.8340460720295917e-06, + "loss": 0.4958, + "step": 47380 + }, + { + "epoch": 1.61, + "learning_rate": 1.827895386136166e-06, + "loss": 0.4614, + "step": 47400 + }, + { + "epoch": 1.61, + "learning_rate": 1.8217539935037897e-06, + "loss": 0.5094, + "step": 47420 + }, + { + "epoch": 1.61, + "learning_rate": 1.8156219011164121e-06, + "loss": 0.4292, + "step": 47440 + }, + { + "epoch": 1.61, + "learning_rate": 1.8094991159474107e-06, + "loss": 0.4841, + "step": 47460 + }, + { + "epoch": 1.61, + "learning_rate": 1.8033856449595689e-06, + "loss": 0.4885, + "step": 47480 + }, + { + "epoch": 1.61, + "learning_rate": 1.7972814951050855e-06, + "loss": 0.4551, + "step": 47500 + }, + { + "epoch": 1.61, + "learning_rate": 1.7911866733255556e-06, + "loss": 0.4634, + "step": 47520 + }, + { + "epoch": 1.61, + "learning_rate": 1.785101186551975e-06, + "loss": 0.4911, + "step": 47540 + }, + { + "epoch": 1.61, + "learning_rate": 1.7790250417047116e-06, + "loss": 0.498, + "step": 47560 + }, + { + "epoch": 1.62, + "learning_rate": 1.772958245693518e-06, + "loss": 0.468, + "step": 47580 + }, + { + "epoch": 1.62, + "learning_rate": 1.7669008054175108e-06, + "loss": 0.4805, + "step": 47600 + }, + { + "epoch": 1.62, + "learning_rate": 1.7608527277651711e-06, + "loss": 0.5318, + "step": 47620 + }, + { + "epoch": 1.62, + "learning_rate": 1.7548140196143338e-06, + "loss": 0.4472, + "step": 47640 + }, + { + "epoch": 1.62, + "learning_rate": 1.7487846878321691e-06, + "loss": 0.4444, + "step": 47660 + }, + { + "epoch": 1.62, + "learning_rate": 1.7427647392751967e-06, + "loss": 0.4735, + "step": 47680 + }, + { + "epoch": 1.62, + "learning_rate": 1.7367541807892586e-06, + "loss": 0.4689, + "step": 47700 + }, + { + "epoch": 1.62, + "learning_rate": 1.7307530192095212e-06, + "loss": 0.4587, + "step": 47720 + }, + { + "epoch": 1.62, + "learning_rate": 1.7247612613604637e-06, + "loss": 0.5297, + "step": 47740 + }, + { + "epoch": 1.62, + "learning_rate": 1.718778914055873e-06, + "loss": 0.5013, + "step": 47760 + }, + { + "epoch": 1.62, + "learning_rate": 1.712805984098832e-06, + "loss": 0.5003, + "step": 47780 + }, + { + "epoch": 1.62, + "learning_rate": 1.7068424782817162e-06, + "loss": 0.5118, + "step": 47800 + }, + { + "epoch": 1.62, + "learning_rate": 1.700888403386186e-06, + "loss": 0.4921, + "step": 47820 + }, + { + "epoch": 1.62, + "learning_rate": 1.694943766183168e-06, + "loss": 0.4948, + "step": 47840 + }, + { + "epoch": 1.62, + "learning_rate": 1.6890085734328666e-06, + "loss": 0.4913, + "step": 47860 + }, + { + "epoch": 1.63, + "learning_rate": 1.6830828318847415e-06, + "loss": 0.4905, + "step": 47880 + }, + { + "epoch": 1.63, + "learning_rate": 1.6771665482775058e-06, + "loss": 0.456, + "step": 47900 + }, + { + "epoch": 1.63, + "learning_rate": 1.6712597293391153e-06, + "loss": 0.4673, + "step": 47920 + }, + { + "epoch": 1.63, + "learning_rate": 1.6653623817867638e-06, + "loss": 0.4698, + "step": 47940 + }, + { + "epoch": 1.63, + "learning_rate": 1.6594745123268751e-06, + "loss": 0.4524, + "step": 47960 + }, + { + "epoch": 1.63, + "learning_rate": 1.6535961276550948e-06, + "loss": 0.4678, + "step": 47980 + }, + { + "epoch": 1.63, + "learning_rate": 1.6477272344562789e-06, + "loss": 0.4924, + "step": 48000 + }, + { + "epoch": 1.63, + "learning_rate": 1.6418678394044962e-06, + "loss": 0.4252, + "step": 48020 + }, + { + "epoch": 1.63, + "learning_rate": 1.6360179491630056e-06, + "loss": 0.4911, + "step": 48040 + }, + { + "epoch": 1.63, + "learning_rate": 1.6301775703842637e-06, + "loss": 0.4731, + "step": 48060 + }, + { + "epoch": 1.63, + "learning_rate": 1.6243467097099097e-06, + "loss": 0.4915, + "step": 48080 + }, + { + "epoch": 1.63, + "learning_rate": 1.6185253737707607e-06, + "loss": 0.4807, + "step": 48100 + }, + { + "epoch": 1.63, + "learning_rate": 1.6127135691867945e-06, + "loss": 0.4835, + "step": 48120 + }, + { + "epoch": 1.63, + "learning_rate": 1.6069113025671546e-06, + "loss": 0.5011, + "step": 48140 + }, + { + "epoch": 1.63, + "learning_rate": 1.6011185805101447e-06, + "loss": 0.5102, + "step": 48160 + }, + { + "epoch": 1.64, + "learning_rate": 1.5953354096032048e-06, + "loss": 0.4949, + "step": 48180 + }, + { + "epoch": 1.64, + "learning_rate": 1.5895617964229215e-06, + "loss": 0.4906, + "step": 48200 + }, + { + "epoch": 1.64, + "learning_rate": 1.5837977475350008e-06, + "loss": 0.5041, + "step": 48220 + }, + { + "epoch": 1.64, + "learning_rate": 1.5780432694942815e-06, + "loss": 0.4643, + "step": 48240 + }, + { + "epoch": 1.64, + "learning_rate": 1.572298368844719e-06, + "loss": 0.4583, + "step": 48260 + }, + { + "epoch": 1.64, + "learning_rate": 1.5665630521193753e-06, + "loss": 0.4727, + "step": 48280 + }, + { + "epoch": 1.64, + "learning_rate": 1.5608373258404087e-06, + "loss": 0.4544, + "step": 48300 + }, + { + "epoch": 1.64, + "learning_rate": 1.5551211965190783e-06, + "loss": 0.5058, + "step": 48320 + }, + { + "epoch": 1.64, + "learning_rate": 1.5494146706557257e-06, + "loss": 0.4836, + "step": 48340 + }, + { + "epoch": 1.64, + "learning_rate": 1.5437177547397742e-06, + "loss": 0.4704, + "step": 48360 + }, + { + "epoch": 1.64, + "learning_rate": 1.538030455249716e-06, + "loss": 0.4754, + "step": 48380 + }, + { + "epoch": 1.64, + "learning_rate": 1.5323527786531113e-06, + "loss": 0.5275, + "step": 48400 + }, + { + "epoch": 1.64, + "learning_rate": 1.5266847314065726e-06, + "loss": 0.4518, + "step": 48420 + }, + { + "epoch": 1.64, + "learning_rate": 1.521026319955764e-06, + "loss": 0.4905, + "step": 48440 + }, + { + "epoch": 1.64, + "learning_rate": 1.515377550735394e-06, + "loss": 0.4558, + "step": 48460 + }, + { + "epoch": 1.65, + "learning_rate": 1.5097384301692041e-06, + "loss": 0.4613, + "step": 48480 + }, + { + "epoch": 1.65, + "learning_rate": 1.5041089646699603e-06, + "loss": 0.4816, + "step": 48500 + }, + { + "epoch": 1.65, + "learning_rate": 1.4984891606394535e-06, + "loss": 0.4806, + "step": 48520 + }, + { + "epoch": 1.65, + "learning_rate": 1.4928790244684866e-06, + "loss": 0.4652, + "step": 48540 + }, + { + "epoch": 1.65, + "learning_rate": 1.487278562536868e-06, + "loss": 0.492, + "step": 48560 + }, + { + "epoch": 1.65, + "learning_rate": 1.4816877812134034e-06, + "loss": 0.4711, + "step": 48580 + }, + { + "epoch": 1.65, + "learning_rate": 1.4761066868558915e-06, + "loss": 0.5062, + "step": 48600 + }, + { + "epoch": 1.65, + "learning_rate": 1.4705352858111144e-06, + "loss": 0.4772, + "step": 48620 + }, + { + "epoch": 1.65, + "learning_rate": 1.4649735844148304e-06, + "loss": 0.4634, + "step": 48640 + }, + { + "epoch": 1.65, + "learning_rate": 1.4594215889917707e-06, + "loss": 0.4814, + "step": 48660 + }, + { + "epoch": 1.65, + "learning_rate": 1.4538793058556222e-06, + "loss": 0.4659, + "step": 48680 + }, + { + "epoch": 1.65, + "learning_rate": 1.4483467413090324e-06, + "loss": 0.5063, + "step": 48700 + }, + { + "epoch": 1.65, + "learning_rate": 1.4428239016435953e-06, + "loss": 0.4821, + "step": 48720 + }, + { + "epoch": 1.65, + "learning_rate": 1.4373107931398477e-06, + "loss": 0.5402, + "step": 48740 + }, + { + "epoch": 1.66, + "learning_rate": 1.4318074220672595e-06, + "loss": 0.4746, + "step": 48760 + }, + { + "epoch": 1.66, + "learning_rate": 1.426313794684221e-06, + "loss": 0.4944, + "step": 48780 + }, + { + "epoch": 1.66, + "learning_rate": 1.4208299172380512e-06, + "loss": 0.4909, + "step": 48800 + }, + { + "epoch": 1.66, + "learning_rate": 1.4153557959649788e-06, + "loss": 0.4646, + "step": 48820 + }, + { + "epoch": 1.66, + "learning_rate": 1.4098914370901385e-06, + "loss": 0.4884, + "step": 48840 + }, + { + "epoch": 1.66, + "learning_rate": 1.4044368468275549e-06, + "loss": 0.4764, + "step": 48860 + }, + { + "epoch": 1.66, + "learning_rate": 1.3989920313801552e-06, + "loss": 0.5018, + "step": 48880 + }, + { + "epoch": 1.66, + "learning_rate": 1.3935569969397434e-06, + "loss": 0.4802, + "step": 48900 + }, + { + "epoch": 1.66, + "learning_rate": 1.3881317496870062e-06, + "loss": 0.4864, + "step": 48920 + }, + { + "epoch": 1.66, + "learning_rate": 1.3827162957914975e-06, + "loss": 0.4689, + "step": 48940 + }, + { + "epoch": 1.66, + "learning_rate": 1.37731064141163e-06, + "loss": 0.4675, + "step": 48960 + }, + { + "epoch": 1.66, + "learning_rate": 1.3719147926946797e-06, + "loss": 0.4988, + "step": 48980 + }, + { + "epoch": 1.66, + "learning_rate": 1.3665287557767682e-06, + "loss": 0.4974, + "step": 49000 + }, + { + "epoch": 1.66, + "learning_rate": 1.361152536782856e-06, + "loss": 0.4712, + "step": 49020 + }, + { + "epoch": 1.66, + "learning_rate": 1.3557861418267514e-06, + "loss": 0.4812, + "step": 49040 + }, + { + "epoch": 1.67, + "learning_rate": 1.3504295770110731e-06, + "loss": 0.5029, + "step": 49060 + }, + { + "epoch": 1.67, + "learning_rate": 1.3450828484272727e-06, + "loss": 0.4581, + "step": 49080 + }, + { + "epoch": 1.67, + "learning_rate": 1.339745962155613e-06, + "loss": 0.4911, + "step": 49100 + }, + { + "epoch": 1.67, + "learning_rate": 1.3344189242651672e-06, + "loss": 0.4557, + "step": 49120 + }, + { + "epoch": 1.67, + "learning_rate": 1.3291017408138007e-06, + "loss": 0.5026, + "step": 49140 + }, + { + "epoch": 1.67, + "learning_rate": 1.32379441784818e-06, + "loss": 0.4814, + "step": 49160 + }, + { + "epoch": 1.67, + "learning_rate": 1.3184969614037567e-06, + "loss": 0.5167, + "step": 49180 + }, + { + "epoch": 1.67, + "learning_rate": 1.3132093775047616e-06, + "loss": 0.4855, + "step": 49200 + }, + { + "epoch": 1.67, + "learning_rate": 1.3079316721641987e-06, + "loss": 0.4367, + "step": 49220 + }, + { + "epoch": 1.67, + "learning_rate": 1.3026638513838386e-06, + "loss": 0.4777, + "step": 49240 + }, + { + "epoch": 1.67, + "learning_rate": 1.29740592115421e-06, + "loss": 0.4559, + "step": 49260 + }, + { + "epoch": 1.67, + "learning_rate": 1.292157887454596e-06, + "loss": 0.4557, + "step": 49280 + }, + { + "epoch": 1.67, + "learning_rate": 1.2869197562530223e-06, + "loss": 0.5089, + "step": 49300 + }, + { + "epoch": 1.67, + "learning_rate": 1.2816915335062596e-06, + "loss": 0.4675, + "step": 49320 + }, + { + "epoch": 1.67, + "learning_rate": 1.2764732251598022e-06, + "loss": 0.4365, + "step": 49340 + }, + { + "epoch": 1.68, + "learning_rate": 1.2712648371478763e-06, + "loss": 0.4803, + "step": 49360 + }, + { + "epoch": 1.68, + "learning_rate": 1.2660663753934256e-06, + "loss": 0.507, + "step": 49380 + }, + { + "epoch": 1.68, + "learning_rate": 1.260877845808105e-06, + "loss": 0.516, + "step": 49400 + }, + { + "epoch": 1.68, + "learning_rate": 1.2556992542922708e-06, + "loss": 0.4704, + "step": 49420 + }, + { + "epoch": 1.68, + "learning_rate": 1.2505306067349853e-06, + "loss": 0.4862, + "step": 49440 + }, + { + "epoch": 1.68, + "learning_rate": 1.2453719090139994e-06, + "loss": 0.4913, + "step": 49460 + }, + { + "epoch": 1.68, + "learning_rate": 1.2402231669957465e-06, + "loss": 0.4875, + "step": 49480 + }, + { + "epoch": 1.68, + "learning_rate": 1.2350843865353447e-06, + "loss": 0.4714, + "step": 49500 + }, + { + "epoch": 1.68, + "learning_rate": 1.2299555734765733e-06, + "loss": 0.4736, + "step": 49520 + }, + { + "epoch": 1.68, + "learning_rate": 1.2248367336518874e-06, + "loss": 0.5141, + "step": 49540 + }, + { + "epoch": 1.68, + "learning_rate": 1.2197278728823947e-06, + "loss": 0.4822, + "step": 49560 + }, + { + "epoch": 1.68, + "learning_rate": 1.214628996977857e-06, + "loss": 0.494, + "step": 49580 + }, + { + "epoch": 1.68, + "learning_rate": 1.2095401117366823e-06, + "loss": 0.5034, + "step": 49600 + }, + { + "epoch": 1.68, + "learning_rate": 1.204461222945913e-06, + "loss": 0.4749, + "step": 49620 + }, + { + "epoch": 1.68, + "learning_rate": 1.1993923363812288e-06, + "loss": 0.4841, + "step": 49640 + }, + { + "epoch": 1.69, + "learning_rate": 1.1943334578069277e-06, + "loss": 0.4757, + "step": 49660 + }, + { + "epoch": 1.69, + "learning_rate": 1.1892845929759412e-06, + "loss": 0.4889, + "step": 49680 + }, + { + "epoch": 1.69, + "learning_rate": 1.1842457476297953e-06, + "loss": 0.4914, + "step": 49700 + }, + { + "epoch": 1.69, + "learning_rate": 1.179216927498632e-06, + "loss": 0.4723, + "step": 49720 + }, + { + "epoch": 1.69, + "learning_rate": 1.1741981383011924e-06, + "loss": 0.4435, + "step": 49740 + }, + { + "epoch": 1.69, + "learning_rate": 1.1691893857448089e-06, + "loss": 0.5092, + "step": 49760 + }, + { + "epoch": 1.69, + "learning_rate": 1.1641906755254007e-06, + "loss": 0.4701, + "step": 49780 + }, + { + "epoch": 1.69, + "learning_rate": 1.159202013327464e-06, + "loss": 0.4724, + "step": 49800 + }, + { + "epoch": 1.69, + "learning_rate": 1.154223404824073e-06, + "loss": 0.4701, + "step": 49820 + }, + { + "epoch": 1.69, + "learning_rate": 1.1492548556768668e-06, + "loss": 0.4989, + "step": 49840 + }, + { + "epoch": 1.69, + "learning_rate": 1.1442963715360444e-06, + "loss": 0.4859, + "step": 49860 + }, + { + "epoch": 1.69, + "learning_rate": 1.139347958040361e-06, + "loss": 0.4877, + "step": 49880 + }, + { + "epoch": 1.69, + "learning_rate": 1.1344096208171186e-06, + "loss": 0.5052, + "step": 49900 + }, + { + "epoch": 1.69, + "learning_rate": 1.1294813654821591e-06, + "loss": 0.507, + "step": 49920 + }, + { + "epoch": 1.7, + "learning_rate": 1.1245631976398608e-06, + "loss": 0.5182, + "step": 49940 + }, + { + "epoch": 1.7, + "learning_rate": 1.1196551228831321e-06, + "loss": 0.4267, + "step": 49960 + }, + { + "epoch": 1.7, + "learning_rate": 1.1147571467933983e-06, + "loss": 0.4479, + "step": 49980 + }, + { + "epoch": 1.7, + "learning_rate": 1.1098692749406058e-06, + "loss": 0.4837, + "step": 50000 + } + ], + "max_steps": 58920, + "num_train_epochs": 2, + "total_flos": 136141580967936.0, + "trial_name": null, + "trial_params": null +}